blob: 20298fc7f9b8a2b02f847a0fc9cabe376d0ea847 [file] [log] [blame]
sewardjd20c8852005-01-20 20:04:07 +00001
2/*--------------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin guest_amd64_toIR.c ---*/
sewardjd20c8852005-01-20 20:04:07 +00004/*--------------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
sewardjd20c8852005-01-20 20:04:07 +00009
sewardj89ae8472013-10-18 14:12:58 +000010 Copyright (C) 2004-2013 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
sewardjd20c8852005-01-20 20:04:07 +000012
sewardj752f9062010-05-03 21:38:49 +000013 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
sewardjd20c8852005-01-20 20:04:07 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000026 02110-1301, USA.
27
sewardj752f9062010-05-03 21:38:49 +000028 The GNU General Public License is contained in the file COPYING.
sewardjd20c8852005-01-20 20:04:07 +000029
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
sewardjd20c8852005-01-20 20:04:07 +000034*/
35
sewardje9d8a262009-07-01 08:06:34 +000036/* Translates AMD64 code to IR. */
sewardj9ff93bc2005-03-23 11:25:12 +000037
sewardj820611e2005-08-24 10:56:01 +000038/* TODO:
39
40 All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked
41 to ensure a 64-bit value is being written.
42
sewardje9d8a262009-07-01 08:06:34 +000043 x87 FP Limitations:
44
45 * all arithmetic done at 64 bits
46
47 * no FP exceptions, except for handling stack over/underflow
48
49 * FP rounding mode observed only for float->int conversions and
50 int->float conversions which could lose accuracy, and for
51 float-to-float rounding. For all other operations,
52 round-to-nearest is used, regardless.
53
sewardje9d8a262009-07-01 08:06:34 +000054 * some of the FCOM cases could do with testing -- not convinced
55 that the args are the right way round.
56
57 * FSAVE does not re-initialise the FPU; it should do
58
59 * FINIT not only initialises the FPU environment, it also zeroes
60 all the FP registers. It should leave the registers unchanged.
61
sewardje9d8a262009-07-01 08:06:34 +000062 SAHF should cause eflags[1] == 1, and in fact it produces 0. As
63 per Intel docs this bit has no meaning anyway. Since PUSHF is the
64 only way to observe eflags[1], a proper fix would be to make that
65 bit be set by PUSHF.
66
67 This module uses global variables and so is not MT-safe (if that
68 should ever become relevant).
sewardj820611e2005-08-24 10:56:01 +000069*/
sewardj44d494d2005-01-20 20:26:33 +000070
sewardj42561ef2005-11-04 14:18:31 +000071/* Notes re address size overrides (0x67).
72
73 According to the AMD documentation (24594 Rev 3.09, Sept 2003,
74 "AMD64 Architecture Programmer's Manual Volume 3: General-Purpose
75 and System Instructions"), Section 1.2.3 ("Address-Size Override
76 Prefix"):
77
78 0x67 applies to all explicit memory references, causing the top
79 32 bits of the effective address to become zero.
80
81 0x67 has no effect on stack references (push/pop); these always
82 use a 64-bit address.
83
84 0x67 changes the interpretation of instructions which implicitly
85 reference RCX/RSI/RDI, so that in fact ECX/ESI/EDI are used
86 instead. These are:
87
88 cmp{s,sb,sw,sd,sq}
89 in{s,sb,sw,sd}
90 jcxz, jecxz, jrcxz
91 lod{s,sb,sw,sd,sq}
92 loop{,e,bz,be,z}
93 mov{s,sb,sw,sd,sq}
94 out{s,sb,sw,sd}
95 rep{,e,ne,nz}
96 sca{s,sb,sw,sd,sq}
97 sto{s,sb,sw,sd,sq}
98 xlat{,b} */
99
sewardjce02aa72006-01-12 12:27:58 +0000100/* "Special" instructions.
101
102 This instruction decoder can decode three special instructions
103 which mean nothing natively (are no-ops as far as regs/mem are
104 concerned) but have meaning for supporting Valgrind. A special
105 instruction is flagged by the 16-byte preamble 48C1C703 48C1C70D
106 48C1C73D 48C1C733 (in the standard interpretation, that means: rolq
107 $3, %rdi; rolq $13, %rdi; rolq $61, %rdi; rolq $51, %rdi).
108 Following that, one of the following 3 are allowed (standard
109 interpretation in parentheses):
110
111 4887DB (xchgq %rbx,%rbx) %RDX = client_request ( %RAX )
112 4887C9 (xchgq %rcx,%rcx) %RAX = guest_NRADDR
113 4887D2 (xchgq %rdx,%rdx) call-noredir *%RAX
florian2245ce92012-08-28 16:49:30 +0000114 4887F6 (xchgq %rdi,%rdi) IR injection
sewardjce02aa72006-01-12 12:27:58 +0000115
116 Any other bytes following the 16-byte preamble are illegal and
117 constitute a failure in instruction decoding. This all assumes
118 that the preamble will never occur except in specific code
119 fragments designed for Valgrind to catch.
120
sewardje9d8a262009-07-01 08:06:34 +0000121 No prefixes may precede a "Special" instruction.
122*/
sewardjce02aa72006-01-12 12:27:58 +0000123
sewardje9d8a262009-07-01 08:06:34 +0000124/* casLE (implementation of lock-prefixed insns) and rep-prefixed
125 insns: the side-exit back to the start of the insn is done with
126 Ijk_Boring. This is quite wrong, it should be done with
127 Ijk_NoRedir, since otherwise the side exit, which is intended to
128 restart the instruction for whatever reason, could go somewhere
129 entirely else. Doing it right (with Ijk_NoRedir jumps) would make
130 no-redir jumps performance critical, at least for rep-prefixed
131 instructions, since all iterations thereof would involve such a
132 jump. It's not such a big deal with casLE since the side exit is
133 only taken if the CAS fails, that is, the location is contended,
134 which is relatively unlikely.
sewardj1fb8c922009-07-12 12:56:53 +0000135
136 Note also, the test for CAS success vs failure is done using
137 Iop_CasCmp{EQ,NE}{8,16,32,64} rather than the ordinary
138 Iop_Cmp{EQ,NE} equivalents. This is so as to tell Memcheck that it
139 shouldn't definedness-check these comparisons. See
140 COMMENT_ON_CasCmpEQ in memcheck/mc_translate.c for
141 background/rationale.
sewardje9d8a262009-07-01 08:06:34 +0000142*/
143
144/* LOCK prefixed instructions. These are translated using IR-level
145 CAS statements (IRCAS) and are believed to preserve atomicity, even
146 from the point of view of some other process racing against a
147 simulated one (presumably they communicate via a shared memory
148 segment).
149
150 Handlers which are aware of LOCK prefixes are:
151 dis_op2_G_E (add, or, adc, sbb, and, sub, xor)
152 dis_cmpxchg_G_E (cmpxchg)
153 dis_Grp1 (add, or, adc, sbb, and, sub, xor)
154 dis_Grp3 (not, neg)
155 dis_Grp4 (inc, dec)
156 dis_Grp5 (inc, dec)
157 dis_Grp8_Imm (bts, btc, btr)
158 dis_bt_G_E (bts, btc, btr)
159 dis_xadd_G_E (xadd)
160*/
161
sewardj44d494d2005-01-20 20:26:33 +0000162
163#include "libvex_basictypes.h"
164#include "libvex_ir.h"
165#include "libvex.h"
166#include "libvex_guest_amd64.h"
167
sewardjcef7d3e2009-07-02 12:21:59 +0000168#include "main_util.h"
169#include "main_globals.h"
170#include "guest_generic_bb_to_IR.h"
171#include "guest_generic_x87.h"
172#include "guest_amd64_defs.h"
sewardj44d494d2005-01-20 20:26:33 +0000173
174
sewardjecb94892005-01-21 14:26:37 +0000175/*------------------------------------------------------------*/
176/*--- Globals ---*/
177/*------------------------------------------------------------*/
178
sewardj9e6491a2005-07-02 19:24:10 +0000179/* These are set at the start of the translation of an insn, right
180 down in disInstr_AMD64, so that we don't have to pass them around
181 endlessly. They are all constant during the translation of any
182 given insn. */
sewardj4b744762005-02-07 15:02:25 +0000183
sewardjecb94892005-01-21 14:26:37 +0000184/* These are set at the start of the translation of a BB, so
185 that we don't have to pass them around endlessly. */
186
187/* We need to know this to do sub-register accesses correctly. */
sewardj9b769162014-07-24 12:42:03 +0000188static VexEndness host_endness;
sewardjecb94892005-01-21 14:26:37 +0000189
sewardj9e6491a2005-07-02 19:24:10 +0000190/* Pointer to the guest code area (points to start of BB, not to the
191 insn being processed). */
florian8462d112014-09-24 15:18:09 +0000192static const UChar* guest_code;
sewardjb3a04292005-01-21 20:33:44 +0000193
sewardjdf0e0022005-01-25 15:48:43 +0000194/* The guest address corresponding to guest_code[0]. */
sewardj9e6491a2005-07-02 19:24:10 +0000195static Addr64 guest_RIP_bbstart;
sewardj4b744762005-02-07 15:02:25 +0000196
sewardjb3a04292005-01-21 20:33:44 +0000197/* The guest address for the instruction currently being
198 translated. */
sewardj9e6491a2005-07-02 19:24:10 +0000199static Addr64 guest_RIP_curr_instr;
sewardjecb94892005-01-21 14:26:37 +0000200
sewardjdd40fdf2006-12-24 02:20:24 +0000201/* The IRSB* into which we're generating code. */
202static IRSB* irsb;
sewardjecb94892005-01-21 14:26:37 +0000203
sewardj4b744762005-02-07 15:02:25 +0000204/* For ensuring that %rip-relative addressing is done right. A read
205 of %rip generates the address of the next instruction. It may be
206 that we don't conveniently know that inside disAMode(). For sanity
207 checking, if the next insn %rip is needed, we make a guess at what
208 it is, record that guess here, and set the accompanying Bool to
209 indicate that -- after this insn's decode is finished -- that guess
210 needs to be checked. */
211
212/* At the start of each insn decode, is set to (0, False).
213 After the decode, if _mustcheck is now True, _assumed is
214 checked. */
215
sewardj9e6491a2005-07-02 19:24:10 +0000216static Addr64 guest_RIP_next_assumed;
217static Bool guest_RIP_next_mustcheck;
sewardj4b744762005-02-07 15:02:25 +0000218
219
sewardjecb94892005-01-21 14:26:37 +0000220/*------------------------------------------------------------*/
221/*--- Helpers for constructing IR. ---*/
222/*------------------------------------------------------------*/
223
sewardjb3a04292005-01-21 20:33:44 +0000224/* Generate a new temporary of the given type. */
225static IRTemp newTemp ( IRType ty )
226{
sewardj496a58d2005-03-20 18:44:44 +0000227 vassert(isPlausibleIRType(ty));
sewardjdd40fdf2006-12-24 02:20:24 +0000228 return newIRTemp( irsb->tyenv, ty );
sewardjb3a04292005-01-21 20:33:44 +0000229}
230
sewardjdd40fdf2006-12-24 02:20:24 +0000231/* Add a statement to the list held by "irsb". */
sewardjecb94892005-01-21 14:26:37 +0000232static void stmt ( IRStmt* st )
233{
sewardjdd40fdf2006-12-24 02:20:24 +0000234 addStmtToIRSB( irsb, st );
sewardjecb94892005-01-21 14:26:37 +0000235}
sewardjb3a04292005-01-21 20:33:44 +0000236
237/* Generate a statement "dst := e". */
238static void assign ( IRTemp dst, IRExpr* e )
239{
sewardjdd40fdf2006-12-24 02:20:24 +0000240 stmt( IRStmt_WrTmp(dst, e) );
sewardjb3a04292005-01-21 20:33:44 +0000241}
242
sewardjecb94892005-01-21 14:26:37 +0000243static IRExpr* unop ( IROp op, IRExpr* a )
244{
245 return IRExpr_Unop(op, a);
246}
247
sewardjb3a04292005-01-21 20:33:44 +0000248static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
249{
250 return IRExpr_Binop(op, a1, a2);
251}
252
sewardj4796d662006-02-05 16:06:26 +0000253static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
254{
255 return IRExpr_Triop(op, a1, a2, a3);
256}
257
sewardjdf0e0022005-01-25 15:48:43 +0000258static IRExpr* mkexpr ( IRTemp tmp )
259{
sewardjdd40fdf2006-12-24 02:20:24 +0000260 return IRExpr_RdTmp(tmp);
sewardjdf0e0022005-01-25 15:48:43 +0000261}
sewardjb3a04292005-01-21 20:33:44 +0000262
sewardj3ca55a12005-01-27 16:06:23 +0000263static IRExpr* mkU8 ( ULong i )
sewardjb3a04292005-01-21 20:33:44 +0000264{
265 vassert(i < 256);
sewardj3ca55a12005-01-27 16:06:23 +0000266 return IRExpr_Const(IRConst_U8( (UChar)i ));
sewardjb3a04292005-01-21 20:33:44 +0000267}
268
sewardj5e525292005-01-28 15:13:10 +0000269static IRExpr* mkU16 ( ULong i )
270{
271 vassert(i < 0x10000ULL);
272 return IRExpr_Const(IRConst_U16( (UShort)i ));
273}
sewardj3ca55a12005-01-27 16:06:23 +0000274
275static IRExpr* mkU32 ( ULong i )
276{
277 vassert(i < 0x100000000ULL);
278 return IRExpr_Const(IRConst_U32( (UInt)i ));
279}
sewardjb3a04292005-01-21 20:33:44 +0000280
281static IRExpr* mkU64 ( ULong i )
282{
283 return IRExpr_Const(IRConst_U64(i));
284}
sewardjecb94892005-01-21 14:26:37 +0000285
sewardj3ca55a12005-01-27 16:06:23 +0000286static IRExpr* mkU ( IRType ty, ULong i )
287{
288 switch (ty) {
289 case Ity_I8: return mkU8(i);
sewardj5e525292005-01-28 15:13:10 +0000290 case Ity_I16: return mkU16(i);
sewardj3ca55a12005-01-27 16:06:23 +0000291 case Ity_I32: return mkU32(i);
292 case Ity_I64: return mkU64(i);
293 default: vpanic("mkU(amd64)");
294 }
295}
296
sewardj5e525292005-01-28 15:13:10 +0000297static void storeLE ( IRExpr* addr, IRExpr* data )
298{
sewardje768e922009-11-26 17:17:37 +0000299 stmt( IRStmt_Store(Iend_LE, addr, data) );
sewardj5e525292005-01-28 15:13:10 +0000300}
301
sewardje768e922009-11-26 17:17:37 +0000302static IRExpr* loadLE ( IRType ty, IRExpr* addr )
sewardj5e525292005-01-28 15:13:10 +0000303{
sewardje768e922009-11-26 17:17:37 +0000304 return IRExpr_Load(Iend_LE, ty, addr);
sewardj5e525292005-01-28 15:13:10 +0000305}
306
307static IROp mkSizedOp ( IRType ty, IROp op8 )
308{
309 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8
310 || op8 == Iop_Mul8
311 || op8 == Iop_Or8 || op8 == Iop_And8 || op8 == Iop_Xor8
312 || op8 == Iop_Shl8 || op8 == Iop_Shr8 || op8 == Iop_Sar8
313 || op8 == Iop_CmpEQ8 || op8 == Iop_CmpNE8
sewardj1fb8c922009-07-12 12:56:53 +0000314 || op8 == Iop_CasCmpNE8
sewardj5e525292005-01-28 15:13:10 +0000315 || op8 == Iop_Not8 );
316 switch (ty) {
317 case Ity_I8: return 0 +op8;
318 case Ity_I16: return 1 +op8;
319 case Ity_I32: return 2 +op8;
320 case Ity_I64: return 3 +op8;
321 default: vpanic("mkSizedOp(amd64)");
322 }
323}
324
325static
326IRExpr* doScalarWidening ( Int szSmall, Int szBig, Bool signd, IRExpr* src )
327{
328 if (szSmall == 1 && szBig == 4) {
329 return unop(signd ? Iop_8Sto32 : Iop_8Uto32, src);
330 }
331 if (szSmall == 1 && szBig == 2) {
332 return unop(signd ? Iop_8Sto16 : Iop_8Uto16, src);
333 }
334 if (szSmall == 2 && szBig == 4) {
335 return unop(signd ? Iop_16Sto32 : Iop_16Uto32, src);
336 }
337 if (szSmall == 1 && szBig == 8 && !signd) {
sewardje58967e2005-04-27 11:50:56 +0000338 return unop(Iop_8Uto64, src);
sewardj5e525292005-01-28 15:13:10 +0000339 }
sewardj03b07cc2005-01-31 18:09:43 +0000340 if (szSmall == 1 && szBig == 8 && signd) {
sewardje58967e2005-04-27 11:50:56 +0000341 return unop(Iop_8Sto64, src);
sewardj03b07cc2005-01-31 18:09:43 +0000342 }
sewardj5e525292005-01-28 15:13:10 +0000343 if (szSmall == 2 && szBig == 8 && !signd) {
sewardje58967e2005-04-27 11:50:56 +0000344 return unop(Iop_16Uto64, src);
sewardj5e525292005-01-28 15:13:10 +0000345 }
sewardj03b07cc2005-01-31 18:09:43 +0000346 if (szSmall == 2 && szBig == 8 && signd) {
sewardje58967e2005-04-27 11:50:56 +0000347 return unop(Iop_16Sto64, src);
sewardj03b07cc2005-01-31 18:09:43 +0000348 }
sewardj5e525292005-01-28 15:13:10 +0000349 vpanic("doScalarWidening(amd64)");
350}
351
352
sewardjecb94892005-01-21 14:26:37 +0000353
354/*------------------------------------------------------------*/
355/*--- Debugging output ---*/
356/*------------------------------------------------------------*/
357
sewardjb3a04292005-01-21 20:33:44 +0000358/* Bomb out if we can't handle something. */
359__attribute__ ((noreturn))
florian55085f82012-11-21 00:36:55 +0000360static void unimplemented ( const HChar* str )
sewardjb3a04292005-01-21 20:33:44 +0000361{
362 vex_printf("amd64toIR: unimplemented feature\n");
363 vpanic(str);
364}
365
sewardjecb94892005-01-21 14:26:37 +0000366#define DIP(format, args...) \
sewardj9e6491a2005-07-02 19:24:10 +0000367 if (vex_traceflags & VEX_TRACE_FE) \
sewardjecb94892005-01-21 14:26:37 +0000368 vex_printf(format, ## args)
369
370#define DIS(buf, format, args...) \
sewardj9e6491a2005-07-02 19:24:10 +0000371 if (vex_traceflags & VEX_TRACE_FE) \
sewardjecb94892005-01-21 14:26:37 +0000372 vex_sprintf(buf, format, ## args)
373
374
375/*------------------------------------------------------------*/
376/*--- Offsets of various parts of the amd64 guest state. ---*/
377/*------------------------------------------------------------*/
378
379#define OFFB_RAX offsetof(VexGuestAMD64State,guest_RAX)
380#define OFFB_RBX offsetof(VexGuestAMD64State,guest_RBX)
381#define OFFB_RCX offsetof(VexGuestAMD64State,guest_RCX)
382#define OFFB_RDX offsetof(VexGuestAMD64State,guest_RDX)
383#define OFFB_RSP offsetof(VexGuestAMD64State,guest_RSP)
384#define OFFB_RBP offsetof(VexGuestAMD64State,guest_RBP)
385#define OFFB_RSI offsetof(VexGuestAMD64State,guest_RSI)
386#define OFFB_RDI offsetof(VexGuestAMD64State,guest_RDI)
387#define OFFB_R8 offsetof(VexGuestAMD64State,guest_R8)
388#define OFFB_R9 offsetof(VexGuestAMD64State,guest_R9)
389#define OFFB_R10 offsetof(VexGuestAMD64State,guest_R10)
390#define OFFB_R11 offsetof(VexGuestAMD64State,guest_R11)
391#define OFFB_R12 offsetof(VexGuestAMD64State,guest_R12)
392#define OFFB_R13 offsetof(VexGuestAMD64State,guest_R13)
393#define OFFB_R14 offsetof(VexGuestAMD64State,guest_R14)
394#define OFFB_R15 offsetof(VexGuestAMD64State,guest_R15)
395
396#define OFFB_RIP offsetof(VexGuestAMD64State,guest_RIP)
397
sewardja6b93d12005-02-17 09:28:28 +0000398#define OFFB_FS_ZERO offsetof(VexGuestAMD64State,guest_FS_ZERO)
sewardjd660d412008-12-03 21:29:59 +0000399#define OFFB_GS_0x60 offsetof(VexGuestAMD64State,guest_GS_0x60)
sewardja6b93d12005-02-17 09:28:28 +0000400
sewardjecb94892005-01-21 14:26:37 +0000401#define OFFB_CC_OP offsetof(VexGuestAMD64State,guest_CC_OP)
402#define OFFB_CC_DEP1 offsetof(VexGuestAMD64State,guest_CC_DEP1)
403#define OFFB_CC_DEP2 offsetof(VexGuestAMD64State,guest_CC_DEP2)
404#define OFFB_CC_NDEP offsetof(VexGuestAMD64State,guest_CC_NDEP)
405
sewardj8d965312005-02-25 02:48:47 +0000406#define OFFB_FPREGS offsetof(VexGuestAMD64State,guest_FPREG[0])
407#define OFFB_FPTAGS offsetof(VexGuestAMD64State,guest_FPTAG[0])
sewardjd0a12df2005-02-10 02:07:43 +0000408#define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG)
sewardj5e120aa2010-09-28 15:59:04 +0000409#define OFFB_ACFLAG offsetof(VexGuestAMD64State,guest_ACFLAG)
sewardj85520e42005-02-19 15:22:38 +0000410#define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG)
sewardj8d965312005-02-25 02:48:47 +0000411#define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP)
sewardj25a85812005-05-08 23:03:48 +0000412#define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210)
sewardjc49ce232005-02-25 13:03:03 +0000413#define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
sewardj1001dc42005-02-21 08:25:55 +0000414
415#define OFFB_SSEROUND offsetof(VexGuestAMD64State,guest_SSEROUND)
sewardjc4530ae2012-05-21 10:18:49 +0000416#define OFFB_YMM0 offsetof(VexGuestAMD64State,guest_YMM0)
417#define OFFB_YMM1 offsetof(VexGuestAMD64State,guest_YMM1)
418#define OFFB_YMM2 offsetof(VexGuestAMD64State,guest_YMM2)
419#define OFFB_YMM3 offsetof(VexGuestAMD64State,guest_YMM3)
420#define OFFB_YMM4 offsetof(VexGuestAMD64State,guest_YMM4)
421#define OFFB_YMM5 offsetof(VexGuestAMD64State,guest_YMM5)
422#define OFFB_YMM6 offsetof(VexGuestAMD64State,guest_YMM6)
423#define OFFB_YMM7 offsetof(VexGuestAMD64State,guest_YMM7)
424#define OFFB_YMM8 offsetof(VexGuestAMD64State,guest_YMM8)
425#define OFFB_YMM9 offsetof(VexGuestAMD64State,guest_YMM9)
426#define OFFB_YMM10 offsetof(VexGuestAMD64State,guest_YMM10)
427#define OFFB_YMM11 offsetof(VexGuestAMD64State,guest_YMM11)
428#define OFFB_YMM12 offsetof(VexGuestAMD64State,guest_YMM12)
429#define OFFB_YMM13 offsetof(VexGuestAMD64State,guest_YMM13)
430#define OFFB_YMM14 offsetof(VexGuestAMD64State,guest_YMM14)
431#define OFFB_YMM15 offsetof(VexGuestAMD64State,guest_YMM15)
432#define OFFB_YMM16 offsetof(VexGuestAMD64State,guest_YMM16)
sewardj1001dc42005-02-21 08:25:55 +0000433
florian6ef84be2012-08-26 03:20:07 +0000434#define OFFB_EMNOTE offsetof(VexGuestAMD64State,guest_EMNOTE)
sewardj05f5e012014-05-04 10:52:11 +0000435#define OFFB_CMSTART offsetof(VexGuestAMD64State,guest_CMSTART)
436#define OFFB_CMLEN offsetof(VexGuestAMD64State,guest_CMLEN)
sewardjdf0e0022005-01-25 15:48:43 +0000437
sewardjce02aa72006-01-12 12:27:58 +0000438#define OFFB_NRADDR offsetof(VexGuestAMD64State,guest_NRADDR)
439
sewardjdf0e0022005-01-25 15:48:43 +0000440
441/*------------------------------------------------------------*/
sewardjecb94892005-01-21 14:26:37 +0000442/*--- Helper bits and pieces for deconstructing the ---*/
443/*--- amd64 insn stream. ---*/
444/*------------------------------------------------------------*/
445
446/* This is the AMD64 register encoding -- integer regs. */
447#define R_RAX 0
448#define R_RCX 1
449#define R_RDX 2
450#define R_RBX 3
451#define R_RSP 4
452#define R_RBP 5
453#define R_RSI 6
454#define R_RDI 7
455#define R_R8 8
456#define R_R9 9
457#define R_R10 10
458#define R_R11 11
459#define R_R12 12
460#define R_R13 13
461#define R_R14 14
462#define R_R15 15
463
sewardjecb94892005-01-21 14:26:37 +0000464/* This is the Intel register encoding -- segment regs. */
465#define R_ES 0
466#define R_CS 1
467#define R_SS 2
468#define R_DS 3
469#define R_FS 4
470#define R_GS 5
471
472
sewardjb3a04292005-01-21 20:33:44 +0000473/* Various simple conversions */
474
475static ULong extend_s_8to64 ( UChar x )
476{
477 return (ULong)((((Long)x) << 56) >> 56);
478}
479
480static ULong extend_s_16to64 ( UShort x )
481{
482 return (ULong)((((Long)x) << 48) >> 48);
483}
484
485static ULong extend_s_32to64 ( UInt x )
486{
487 return (ULong)((((Long)x) << 32) >> 32);
488}
489
sewardjdf0e0022005-01-25 15:48:43 +0000490/* Figure out whether the mod and rm parts of a modRM byte refer to a
491 register or memory. If so, the byte will have the form 11XXXYYY,
492 where YYY is the register number. */
sewardj5b470602005-02-27 13:10:48 +0000493inline
sewardjdf0e0022005-01-25 15:48:43 +0000494static Bool epartIsReg ( UChar mod_reg_rm )
495{
sewardj7a240552005-01-28 21:37:12 +0000496 return toBool(0xC0 == (mod_reg_rm & 0xC0));
sewardjdf0e0022005-01-25 15:48:43 +0000497}
498
sewardj901ed122005-02-27 13:25:31 +0000499/* Extract the 'g' field from a modRM byte. This only produces 3
500 bits, which is not a complete register number. You should avoid
501 this function if at all possible. */
502inline
503static Int gregLO3ofRM ( UChar mod_reg_rm )
sewardjdf0e0022005-01-25 15:48:43 +0000504{
505 return (Int)( (mod_reg_rm >> 3) & 7 );
506}
507
sewardj8711f662005-05-09 17:52:56 +0000508/* Ditto the 'e' field of a modRM byte. */
509inline
510static Int eregLO3ofRM ( UChar mod_reg_rm )
511{
512 return (Int)(mod_reg_rm & 0x7);
513}
514
sewardjdf0e0022005-01-25 15:48:43 +0000515/* Get a 8/16/32-bit unsigned value out of the insn stream. */
516
sewardj80611e32012-01-20 13:07:24 +0000517static inline UChar getUChar ( Long delta )
sewardjdf0e0022005-01-25 15:48:43 +0000518{
sewardj8c332e22005-01-28 01:36:56 +0000519 UChar v = guest_code[delta+0];
520 return v;
sewardjdf0e0022005-01-25 15:48:43 +0000521}
522
sewardj47c2d4d2006-11-14 17:50:16 +0000523static UInt getUDisp16 ( Long delta )
524{
525 UInt v = guest_code[delta+1]; v <<= 8;
526 v |= guest_code[delta+0];
527 return v & 0xFFFF;
528}
529
sewardj270def42005-07-03 01:03:01 +0000530//.. static UInt getUDisp ( Int size, Long delta )
sewardjd20c8852005-01-20 20:04:07 +0000531//.. {
532//.. switch (size) {
533//.. case 4: return getUDisp32(delta);
534//.. case 2: return getUDisp16(delta);
535//.. case 1: return getUChar(delta);
536//.. default: vpanic("getUDisp(x86)");
537//.. }
538//.. return 0; /*notreached*/
539//.. }
sewardjb3a04292005-01-21 20:33:44 +0000540
541
542/* Get a byte value out of the insn stream and sign-extend to 64
543 bits. */
sewardj270def42005-07-03 01:03:01 +0000544static Long getSDisp8 ( Long delta )
sewardjb3a04292005-01-21 20:33:44 +0000545{
546 return extend_s_8to64( guest_code[delta] );
547}
548
sewardj5e525292005-01-28 15:13:10 +0000549/* Get a 16-bit value out of the insn stream and sign-extend to 64
550 bits. */
sewardj270def42005-07-03 01:03:01 +0000551static Long getSDisp16 ( Long delta )
sewardj5e525292005-01-28 15:13:10 +0000552{
sewardj118b23e2005-01-29 02:14:44 +0000553 UInt v = guest_code[delta+1]; v <<= 8;
sewardj5e525292005-01-28 15:13:10 +0000554 v |= guest_code[delta+0];
sewardj118b23e2005-01-29 02:14:44 +0000555 return extend_s_16to64( (UShort)v );
sewardj5e525292005-01-28 15:13:10 +0000556}
557
sewardjb3a04292005-01-21 20:33:44 +0000558/* Get a 32-bit value out of the insn stream and sign-extend to 64
559 bits. */
sewardj270def42005-07-03 01:03:01 +0000560static Long getSDisp32 ( Long delta )
sewardjb3a04292005-01-21 20:33:44 +0000561{
562 UInt v = guest_code[delta+3]; v <<= 8;
563 v |= guest_code[delta+2]; v <<= 8;
564 v |= guest_code[delta+1]; v <<= 8;
565 v |= guest_code[delta+0];
566 return extend_s_32to64( v );
567}
568
sewardj03b07cc2005-01-31 18:09:43 +0000569/* Get a 64-bit value out of the insn stream. */
sewardj270def42005-07-03 01:03:01 +0000570static Long getDisp64 ( Long delta )
sewardj03b07cc2005-01-31 18:09:43 +0000571{
sewardj7eaa7cf2005-01-31 18:55:22 +0000572 ULong v = 0;
sewardj03b07cc2005-01-31 18:09:43 +0000573 v |= guest_code[delta+7]; v <<= 8;
574 v |= guest_code[delta+6]; v <<= 8;
575 v |= guest_code[delta+5]; v <<= 8;
576 v |= guest_code[delta+4]; v <<= 8;
577 v |= guest_code[delta+3]; v <<= 8;
578 v |= guest_code[delta+2]; v <<= 8;
579 v |= guest_code[delta+1]; v <<= 8;
580 v |= guest_code[delta+0];
581 return v;
582}
583
sewardj3ca55a12005-01-27 16:06:23 +0000584/* Note: because AMD64 doesn't allow 64-bit literals, it is an error
585 if this is called with size==8. Should not happen. */
sewardj270def42005-07-03 01:03:01 +0000586static Long getSDisp ( Int size, Long delta )
sewardj3ca55a12005-01-27 16:06:23 +0000587{
588 switch (size) {
589 case 4: return getSDisp32(delta);
sewardj5e525292005-01-28 15:13:10 +0000590 case 2: return getSDisp16(delta);
sewardj3ca55a12005-01-27 16:06:23 +0000591 case 1: return getSDisp8(delta);
592 default: vpanic("getSDisp(amd64)");
593 }
594}
595
sewardj1389d4d2005-01-28 13:46:29 +0000596static ULong mkSizeMask ( Int sz )
sewardj3ca55a12005-01-27 16:06:23 +0000597{
598 switch (sz) {
sewardj1389d4d2005-01-28 13:46:29 +0000599 case 1: return 0x00000000000000FFULL;
600 case 2: return 0x000000000000FFFFULL;
601 case 4: return 0x00000000FFFFFFFFULL;
sewardj3ca55a12005-01-27 16:06:23 +0000602 case 8: return 0xFFFFFFFFFFFFFFFFULL;
603 default: vpanic("mkSzMask(amd64)");
604 }
605}
606
607static Int imin ( Int a, Int b )
608{
609 return (a < b) ? a : b;
610}
sewardjecb94892005-01-21 14:26:37 +0000611
sewardj5b470602005-02-27 13:10:48 +0000612static IRType szToITy ( Int n )
613{
614 switch (n) {
615 case 1: return Ity_I8;
616 case 2: return Ity_I16;
617 case 4: return Ity_I32;
618 case 8: return Ity_I64;
sewardjf53b7352005-04-06 20:01:56 +0000619 default: vex_printf("\nszToITy(%d)\n", n);
620 vpanic("szToITy(amd64)");
sewardj5b470602005-02-27 13:10:48 +0000621 }
622}
623
sewardjecb94892005-01-21 14:26:37 +0000624
625/*------------------------------------------------------------*/
626/*--- For dealing with prefixes. ---*/
627/*------------------------------------------------------------*/
628
629/* The idea is to pass around an int holding a bitmask summarising
630 info from the prefixes seen on the current instruction, including
631 info from the REX byte. This info is used in various places, but
632 most especially when making sense of register fields in
633 instructions.
634
sewardjc4530ae2012-05-21 10:18:49 +0000635 The top 8 bits of the prefix are 0x55, just as a hacky way to
636 ensure it really is a valid prefix.
sewardjdf0e0022005-01-25 15:48:43 +0000637
638 Things you can safely assume about a well-formed prefix:
639 * at most one segment-override bit (CS,DS,ES,FS,GS,SS) is set.
sewardj5b470602005-02-27 13:10:48 +0000640 * if REX is not present then REXW,REXR,REXX,REXB will read
641 as zero.
sewardjdf0e0022005-01-25 15:48:43 +0000642 * F2 and F3 will not both be 1.
sewardjecb94892005-01-21 14:26:37 +0000643*/
644
645typedef UInt Prefix;
646
sewardjc4530ae2012-05-21 10:18:49 +0000647#define PFX_ASO (1<<0) /* address-size override present (0x67) */
648#define PFX_66 (1<<1) /* operand-size override-to-16 present (0x66) */
649#define PFX_REX (1<<2) /* REX byte present (0x40 to 0x4F) */
650#define PFX_REXW (1<<3) /* REX W bit, if REX present, else 0 */
651#define PFX_REXR (1<<4) /* REX R bit, if REX present, else 0 */
652#define PFX_REXX (1<<5) /* REX X bit, if REX present, else 0 */
653#define PFX_REXB (1<<6) /* REX B bit, if REX present, else 0 */
654#define PFX_LOCK (1<<7) /* bus LOCK prefix present (0xF0) */
655#define PFX_F2 (1<<8) /* REP/REPE/REPZ prefix present (0xF2) */
656#define PFX_F3 (1<<9) /* REPNE/REPNZ prefix present (0xF3) */
657#define PFX_CS (1<<10) /* CS segment prefix present (0x2E) */
658#define PFX_DS (1<<11) /* DS segment prefix present (0x3E) */
659#define PFX_ES (1<<12) /* ES segment prefix present (0x26) */
660#define PFX_FS (1<<13) /* FS segment prefix present (0x64) */
661#define PFX_GS (1<<14) /* GS segment prefix present (0x65) */
662#define PFX_SS (1<<15) /* SS segment prefix present (0x36) */
663#define PFX_VEX (1<<16) /* VEX prefix present (0xC4 or 0xC5) */
664#define PFX_VEXL (1<<17) /* VEX L bit, if VEX present, else 0 */
665/* The extra register field VEX.vvvv is encoded (after not-ing it) as
666 PFX_VEXnV3 .. PFX_VEXnV0, so these must occupy adjacent bit
667 positions. */
668#define PFX_VEXnV0 (1<<18) /* ~VEX vvvv[0], if VEX present, else 0 */
669#define PFX_VEXnV1 (1<<19) /* ~VEX vvvv[1], if VEX present, else 0 */
670#define PFX_VEXnV2 (1<<20) /* ~VEX vvvv[2], if VEX present, else 0 */
671#define PFX_VEXnV3 (1<<21) /* ~VEX vvvv[3], if VEX present, else 0 */
sewardj3ca55a12005-01-27 16:06:23 +0000672
sewardjc4530ae2012-05-21 10:18:49 +0000673
674#define PFX_EMPTY 0x55000000
sewardjecb94892005-01-21 14:26:37 +0000675
sewardjb3a04292005-01-21 20:33:44 +0000676static Bool IS_VALID_PFX ( Prefix pfx ) {
sewardjc4530ae2012-05-21 10:18:49 +0000677 return toBool((pfx & 0xFF000000) == PFX_EMPTY);
sewardjecb94892005-01-21 14:26:37 +0000678}
679
sewardjb3a04292005-01-21 20:33:44 +0000680static Bool haveREX ( Prefix pfx ) {
sewardj7a240552005-01-28 21:37:12 +0000681 return toBool(pfx & PFX_REX);
sewardjecb94892005-01-21 14:26:37 +0000682}
683
sewardj5e525292005-01-28 15:13:10 +0000684static Int getRexW ( Prefix pfx ) {
685 return (pfx & PFX_REXW) ? 1 : 0;
686}
sewardjdf0e0022005-01-25 15:48:43 +0000687static Int getRexR ( Prefix pfx ) {
688 return (pfx & PFX_REXR) ? 1 : 0;
689}
sewardj5b470602005-02-27 13:10:48 +0000690static Int getRexX ( Prefix pfx ) {
691 return (pfx & PFX_REXX) ? 1 : 0;
692}
sewardjdf0e0022005-01-25 15:48:43 +0000693static Int getRexB ( Prefix pfx ) {
694 return (pfx & PFX_REXB) ? 1 : 0;
695}
696
sewardj3ca55a12005-01-27 16:06:23 +0000697/* Check a prefix doesn't have F2 or F3 set in it, since usually that
698 completely changes what instruction it really is. */
699static Bool haveF2orF3 ( Prefix pfx ) {
sewardj7a240552005-01-28 21:37:12 +0000700 return toBool((pfx & (PFX_F2|PFX_F3)) > 0);
sewardj3ca55a12005-01-27 16:06:23 +0000701}
sewardj38b1d692013-10-15 17:21:42 +0000702static Bool haveF2andF3 ( Prefix pfx ) {
703 return toBool((pfx & (PFX_F2|PFX_F3)) == (PFX_F2|PFX_F3));
704}
sewardj55dbb262005-01-28 16:36:51 +0000705static Bool haveF2 ( Prefix pfx ) {
sewardj7a240552005-01-28 21:37:12 +0000706 return toBool((pfx & PFX_F2) > 0);
sewardj55dbb262005-01-28 16:36:51 +0000707}
708static Bool haveF3 ( Prefix pfx ) {
sewardj7a240552005-01-28 21:37:12 +0000709 return toBool((pfx & PFX_F3) > 0);
sewardj55dbb262005-01-28 16:36:51 +0000710}
sewardj6359f812005-07-20 10:15:34 +0000711
sewardjc8b26352005-07-20 09:23:13 +0000712static Bool have66 ( Prefix pfx ) {
713 return toBool((pfx & PFX_66) > 0);
714}
sewardj6359f812005-07-20 10:15:34 +0000715static Bool haveASO ( Prefix pfx ) {
716 return toBool((pfx & PFX_ASO) > 0);
717}
sewardj38b1d692013-10-15 17:21:42 +0000718static Bool haveLOCK ( Prefix pfx ) {
719 return toBool((pfx & PFX_LOCK) > 0);
720}
sewardjecb94892005-01-21 14:26:37 +0000721
sewardj1001dc42005-02-21 08:25:55 +0000722/* Return True iff pfx has 66 set and F2 and F3 clear */
723static Bool have66noF2noF3 ( Prefix pfx )
724{
725 return
sewardj8d965312005-02-25 02:48:47 +0000726 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_66);
sewardj1001dc42005-02-21 08:25:55 +0000727}
728
729/* Return True iff pfx has F2 set and 66 and F3 clear */
730static Bool haveF2no66noF3 ( Prefix pfx )
731{
732 return
sewardj8d965312005-02-25 02:48:47 +0000733 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F2);
734}
735
736/* Return True iff pfx has F3 set and 66 and F2 clear */
737static Bool haveF3no66noF2 ( Prefix pfx )
738{
739 return
740 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == PFX_F3);
sewardj1001dc42005-02-21 08:25:55 +0000741}
742
sewardjfd181282010-06-14 21:29:35 +0000743/* Return True iff pfx has F3 set and F2 clear */
744static Bool haveF3noF2 ( Prefix pfx )
745{
746 return
747 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F3);
748}
749
sewardj186f8692011-01-21 17:51:44 +0000750/* Return True iff pfx has F2 set and F3 clear */
751static Bool haveF2noF3 ( Prefix pfx )
752{
753 return
754 toBool((pfx & (PFX_F2|PFX_F3)) == PFX_F2);
755}
756
sewardj1001dc42005-02-21 08:25:55 +0000757/* Return True iff pfx has 66, F2 and F3 clear */
758static Bool haveNo66noF2noF3 ( Prefix pfx )
759{
760 return
sewardj8d965312005-02-25 02:48:47 +0000761 toBool((pfx & (PFX_66|PFX_F2|PFX_F3)) == 0);
sewardj1001dc42005-02-21 08:25:55 +0000762}
763
sewardj8711f662005-05-09 17:52:56 +0000764/* Return True iff pfx has any of 66, F2 and F3 set */
765static Bool have66orF2orF3 ( Prefix pfx )
766{
sewardjca673ab2005-05-11 10:03:08 +0000767 return toBool( ! haveNo66noF2noF3(pfx) );
sewardj8711f662005-05-09 17:52:56 +0000768}
769
mjw67ac3fd2014-05-09 11:41:06 +0000770/* Return True iff pfx has 66 or F3 set */
771static Bool have66orF3 ( Prefix pfx )
sewardj47c2d4d2006-11-14 17:50:16 +0000772{
mjw67ac3fd2014-05-09 11:41:06 +0000773 return toBool((pfx & (PFX_66|PFX_F3)) > 0);
sewardj47c2d4d2006-11-14 17:50:16 +0000774}
775
sewardj1389d4d2005-01-28 13:46:29 +0000776/* Clear all the segment-override bits in a prefix. */
777static Prefix clearSegBits ( Prefix p )
778{
sewardj1001dc42005-02-21 08:25:55 +0000779 return
780 p & ~(PFX_CS | PFX_DS | PFX_ES | PFX_FS | PFX_GS | PFX_SS);
781}
782
sewardjc4530ae2012-05-21 10:18:49 +0000783/* Get the (inverted, hence back to "normal") VEX.vvvv field. */
784static UInt getVexNvvvv ( Prefix pfx ) {
785 UInt r = (UInt)pfx;
786 r /= (UInt)PFX_VEXnV0; /* pray this turns into a shift */
787 return r & 0xF;
788}
789
790static Bool haveVEX ( Prefix pfx ) {
791 return toBool(pfx & PFX_VEX);
792}
793
794static Int getVexL ( Prefix pfx ) {
795 return (pfx & PFX_VEXL) ? 1 : 0;
796}
797
sewardj1389d4d2005-01-28 13:46:29 +0000798
sewardjecb94892005-01-21 14:26:37 +0000799/*------------------------------------------------------------*/
sewardj80611e32012-01-20 13:07:24 +0000800/*--- For dealing with escapes ---*/
801/*------------------------------------------------------------*/
802
803
804/* Escapes come after the prefixes, but before the primary opcode
805 byte. They escape the primary opcode byte into a bigger space.
806 The 0xF0000000 isn't significant, except so as to make it not
807 overlap valid Prefix values, for sanity checking.
808*/
809
810typedef
811 enum {
812 ESC_NONE=0xF0000000, // none
813 ESC_0F, // 0F
814 ESC_0F38, // 0F 38
815 ESC_0F3A // 0F 3A
816 }
817 Escape;
818
819
820/*------------------------------------------------------------*/
sewardj5b470602005-02-27 13:10:48 +0000821/*--- For dealing with integer registers ---*/
sewardjecb94892005-01-21 14:26:37 +0000822/*------------------------------------------------------------*/
823
sewardj5b470602005-02-27 13:10:48 +0000824/* This is somewhat complex. The rules are:
825
826 For 64, 32 and 16 bit register references, the e or g fields in the
827 modrm bytes supply the low 3 bits of the register number. The
828 fourth (most-significant) bit of the register number is supplied by
829 the REX byte, if it is present; else that bit is taken to be zero.
830
831 The REX.R bit supplies the high bit corresponding to the g register
832 field, and the REX.B bit supplies the high bit corresponding to the
833 e register field (when the mod part of modrm indicates that modrm's
834 e component refers to a register and not to memory).
835
836 The REX.X bit supplies a high register bit for certain registers
837 in SIB address modes, and is generally rarely used.
838
839 For 8 bit register references, the presence of the REX byte itself
840 has significance. If there is no REX present, then the 3-bit
841 number extracted from the modrm e or g field is treated as an index
842 into the sequence %al %cl %dl %bl %ah %ch %dh %bh -- that is, the
843 old x86 encoding scheme.
844
845 But if there is a REX present, the register reference is
846 interpreted in the same way as for 64/32/16-bit references: a high
847 bit is extracted from REX, giving a 4-bit number, and the denoted
848 register is the lowest 8 bits of the 16 integer registers denoted
849 by the number. In particular, values 3 through 7 of this sequence
850 do not refer to %ah %ch %dh %bh but instead to the lowest 8 bits of
851 %rsp %rbp %rsi %rdi.
852
853 The REX.W bit has no bearing at all on register numbers. Instead
854 its presence indicates that the operand size is to be overridden
855 from its default value (32 bits) to 64 bits instead. This is in
856 the same fashion that an 0x66 prefix indicates the operand size is
857 to be overridden from 32 bits down to 16 bits. When both REX.W and
858 0x66 are present there is a conflict, and REX.W takes precedence.
859
860 Rather than try to handle this complexity using a single huge
861 function, several smaller ones are provided. The aim is to make it
862 as difficult as possible to screw up register decoding in a subtle
863 and hard-to-track-down way.
864
865 Because these routines fish around in the host's memory (that is,
866 in the guest state area) for sub-parts of guest registers, their
867 correctness depends on the host's endianness. So far these
868 routines only work for little-endian hosts. Those for which
869 endianness is important have assertions to ensure sanity.
870*/
sewardjecb94892005-01-21 14:26:37 +0000871
872
sewardj5b470602005-02-27 13:10:48 +0000873/* About the simplest question you can ask: where do the 64-bit
874 integer registers live (in the guest state) ? */
sewardjecb94892005-01-21 14:26:37 +0000875
sewardj3ca55a12005-01-27 16:06:23 +0000876static Int integerGuestReg64Offset ( UInt reg )
sewardjb3a04292005-01-21 20:33:44 +0000877{
878 switch (reg) {
879 case R_RAX: return OFFB_RAX;
880 case R_RCX: return OFFB_RCX;
881 case R_RDX: return OFFB_RDX;
882 case R_RBX: return OFFB_RBX;
883 case R_RSP: return OFFB_RSP;
884 case R_RBP: return OFFB_RBP;
885 case R_RSI: return OFFB_RSI;
886 case R_RDI: return OFFB_RDI;
887 case R_R8: return OFFB_R8;
888 case R_R9: return OFFB_R9;
889 case R_R10: return OFFB_R10;
890 case R_R11: return OFFB_R11;
891 case R_R12: return OFFB_R12;
892 case R_R13: return OFFB_R13;
893 case R_R14: return OFFB_R14;
894 case R_R15: return OFFB_R15;
895 default: vpanic("integerGuestReg64Offset(amd64)");
896 }
897}
898
899
sewardj5b470602005-02-27 13:10:48 +0000900/* Produce the name of an integer register, for printing purposes.
901 reg is a number in the range 0 .. 15 that has been generated from a
902 3-bit reg-field number and a REX extension bit. irregular denotes
903 the case where sz==1 and no REX byte is present. */
sewardjecb94892005-01-21 14:26:37 +0000904
905static
florian55085f82012-11-21 00:36:55 +0000906const HChar* nameIReg ( Int sz, UInt reg, Bool irregular )
sewardjecb94892005-01-21 14:26:37 +0000907{
florian55085f82012-11-21 00:36:55 +0000908 static const HChar* ireg64_names[16]
sewardjecb94892005-01-21 14:26:37 +0000909 = { "%rax", "%rcx", "%rdx", "%rbx", "%rsp", "%rbp", "%rsi", "%rdi",
910 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
florian55085f82012-11-21 00:36:55 +0000911 static const HChar* ireg32_names[16]
sewardjecb94892005-01-21 14:26:37 +0000912 = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi",
913 "%r8d", "%r9d", "%r10d","%r11d","%r12d","%r13d","%r14d","%r15d" };
florian55085f82012-11-21 00:36:55 +0000914 static const HChar* ireg16_names[16]
sewardjecb94892005-01-21 14:26:37 +0000915 = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di",
916 "%r8w", "%r9w", "%r10w","%r11w","%r12w","%r13w","%r14w","%r15w" };
florian55085f82012-11-21 00:36:55 +0000917 static const HChar* ireg8_names[16]
sewardjecb94892005-01-21 14:26:37 +0000918 = { "%al", "%cl", "%dl", "%bl", "%spl", "%bpl", "%sil", "%dil",
919 "%r8b", "%r9b", "%r10b","%r11b","%r12b","%r13b","%r14b","%r15b" };
florian55085f82012-11-21 00:36:55 +0000920 static const HChar* ireg8_irregular[8]
sewardjecb94892005-01-21 14:26:37 +0000921 = { "%al", "%cl", "%dl", "%bl", "%ah", "%ch", "%dh", "%bh" };
922
sewardj5b470602005-02-27 13:10:48 +0000923 vassert(reg < 16);
924 if (sz == 1) {
925 if (irregular)
926 vassert(reg < 8);
927 } else {
928 vassert(irregular == False);
929 }
sewardjecb94892005-01-21 14:26:37 +0000930
931 switch (sz) {
sewardj5b470602005-02-27 13:10:48 +0000932 case 8: return ireg64_names[reg];
933 case 4: return ireg32_names[reg];
934 case 2: return ireg16_names[reg];
935 case 1: if (irregular) {
936 return ireg8_irregular[reg];
937 } else {
938 return ireg8_names[reg];
939 }
940 default: vpanic("nameIReg(amd64)");
sewardjecb94892005-01-21 14:26:37 +0000941 }
sewardjecb94892005-01-21 14:26:37 +0000942}
943
sewardj5b470602005-02-27 13:10:48 +0000944/* Using the same argument conventions as nameIReg, produce the
945 guest state offset of an integer register. */
sewardjb3a04292005-01-21 20:33:44 +0000946
sewardjecb94892005-01-21 14:26:37 +0000947static
sewardj5b470602005-02-27 13:10:48 +0000948Int offsetIReg ( Int sz, UInt reg, Bool irregular )
sewardjecb94892005-01-21 14:26:37 +0000949{
sewardj5b470602005-02-27 13:10:48 +0000950 vassert(reg < 16);
951 if (sz == 1) {
952 if (irregular)
953 vassert(reg < 8);
954 } else {
955 vassert(irregular == False);
sewardjecb94892005-01-21 14:26:37 +0000956 }
sewardj5b470602005-02-27 13:10:48 +0000957
958 /* Deal with irregular case -- sz==1 and no REX present */
959 if (sz == 1 && irregular) {
960 switch (reg) {
961 case R_RSP: return 1+ OFFB_RAX;
962 case R_RBP: return 1+ OFFB_RCX;
963 case R_RSI: return 1+ OFFB_RDX;
964 case R_RDI: return 1+ OFFB_RBX;
965 default: break; /* use the normal case */
966 }
sewardjecb94892005-01-21 14:26:37 +0000967 }
sewardj5b470602005-02-27 13:10:48 +0000968
969 /* Normal case */
970 return integerGuestReg64Offset(reg);
sewardjecb94892005-01-21 14:26:37 +0000971}
972
973
sewardj5b470602005-02-27 13:10:48 +0000974/* Read the %CL register :: Ity_I8, for shift/rotate operations. */
975
976static IRExpr* getIRegCL ( void )
977{
sewardj9b769162014-07-24 12:42:03 +0000978 vassert(host_endness == VexEndnessLE);
sewardj5b470602005-02-27 13:10:48 +0000979 return IRExpr_Get( OFFB_RCX, Ity_I8 );
980}
981
982
983/* Write to the %AH register. */
984
985static void putIRegAH ( IRExpr* e )
986{
sewardj9b769162014-07-24 12:42:03 +0000987 vassert(host_endness == VexEndnessLE);
sewardjdd40fdf2006-12-24 02:20:24 +0000988 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I8);
sewardj5b470602005-02-27 13:10:48 +0000989 stmt( IRStmt_Put( OFFB_RAX+1, e ) );
990}
991
992
993/* Read/write various widths of %RAX, as it has various
994 special-purpose uses. */
995
florian55085f82012-11-21 00:36:55 +0000996static const HChar* nameIRegRAX ( Int sz )
sewardj5b470602005-02-27 13:10:48 +0000997{
998 switch (sz) {
999 case 1: return "%al";
1000 case 2: return "%ax";
1001 case 4: return "%eax";
1002 case 8: return "%rax";
1003 default: vpanic("nameIRegRAX(amd64)");
1004 }
1005}
1006
1007static IRExpr* getIRegRAX ( Int sz )
1008{
sewardj9b769162014-07-24 12:42:03 +00001009 vassert(host_endness == VexEndnessLE);
sewardj5b470602005-02-27 13:10:48 +00001010 switch (sz) {
1011 case 1: return IRExpr_Get( OFFB_RAX, Ity_I8 );
1012 case 2: return IRExpr_Get( OFFB_RAX, Ity_I16 );
sewardjef425db2010-01-11 10:46:18 +00001013 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RAX, Ity_I64 ));
sewardj5b470602005-02-27 13:10:48 +00001014 case 8: return IRExpr_Get( OFFB_RAX, Ity_I64 );
1015 default: vpanic("getIRegRAX(amd64)");
1016 }
1017}
1018
1019static void putIRegRAX ( Int sz, IRExpr* e )
1020{
sewardjdd40fdf2006-12-24 02:20:24 +00001021 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj9b769162014-07-24 12:42:03 +00001022 vassert(host_endness == VexEndnessLE);
sewardj5b470602005-02-27 13:10:48 +00001023 switch (sz) {
1024 case 8: vassert(ty == Ity_I64);
1025 stmt( IRStmt_Put( OFFB_RAX, e ));
1026 break;
1027 case 4: vassert(ty == Ity_I32);
1028 stmt( IRStmt_Put( OFFB_RAX, unop(Iop_32Uto64,e) ));
1029 break;
1030 case 2: vassert(ty == Ity_I16);
1031 stmt( IRStmt_Put( OFFB_RAX, e ));
1032 break;
1033 case 1: vassert(ty == Ity_I8);
1034 stmt( IRStmt_Put( OFFB_RAX, e ));
1035 break;
1036 default: vpanic("putIRegRAX(amd64)");
1037 }
1038}
1039
1040
1041/* Read/write various widths of %RDX, as it has various
1042 special-purpose uses. */
1043
florian55085f82012-11-21 00:36:55 +00001044static const HChar* nameIRegRDX ( Int sz )
sewardjbb4396c2007-11-20 17:29:08 +00001045{
1046 switch (sz) {
1047 case 1: return "%dl";
1048 case 2: return "%dx";
1049 case 4: return "%edx";
1050 case 8: return "%rdx";
1051 default: vpanic("nameIRegRDX(amd64)");
1052 }
1053}
1054
sewardj5b470602005-02-27 13:10:48 +00001055static IRExpr* getIRegRDX ( Int sz )
1056{
sewardj9b769162014-07-24 12:42:03 +00001057 vassert(host_endness == VexEndnessLE);
sewardj5b470602005-02-27 13:10:48 +00001058 switch (sz) {
1059 case 1: return IRExpr_Get( OFFB_RDX, Ity_I8 );
1060 case 2: return IRExpr_Get( OFFB_RDX, Ity_I16 );
sewardjef425db2010-01-11 10:46:18 +00001061 case 4: return unop(Iop_64to32, IRExpr_Get( OFFB_RDX, Ity_I64 ));
sewardj5b470602005-02-27 13:10:48 +00001062 case 8: return IRExpr_Get( OFFB_RDX, Ity_I64 );
1063 default: vpanic("getIRegRDX(amd64)");
1064 }
1065}
1066
1067static void putIRegRDX ( Int sz, IRExpr* e )
1068{
sewardj9b769162014-07-24 12:42:03 +00001069 vassert(host_endness == VexEndnessLE);
sewardjdd40fdf2006-12-24 02:20:24 +00001070 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
sewardj5b470602005-02-27 13:10:48 +00001071 switch (sz) {
1072 case 8: stmt( IRStmt_Put( OFFB_RDX, e ));
1073 break;
1074 case 4: stmt( IRStmt_Put( OFFB_RDX, unop(Iop_32Uto64,e) ));
1075 break;
1076 case 2: stmt( IRStmt_Put( OFFB_RDX, e ));
1077 break;
1078 case 1: stmt( IRStmt_Put( OFFB_RDX, e ));
1079 break;
1080 default: vpanic("putIRegRDX(amd64)");
1081 }
1082}
1083
1084
1085/* Simplistic functions to deal with the integer registers as a
1086 straightforward bank of 16 64-bit regs. */
sewardjb3a04292005-01-21 20:33:44 +00001087
1088static IRExpr* getIReg64 ( UInt regno )
1089{
1090 return IRExpr_Get( integerGuestReg64Offset(regno),
1091 Ity_I64 );
1092}
1093
sewardj2f959cc2005-01-26 01:19:35 +00001094static void putIReg64 ( UInt regno, IRExpr* e )
1095{
sewardjdd40fdf2006-12-24 02:20:24 +00001096 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
sewardj2f959cc2005-01-26 01:19:35 +00001097 stmt( IRStmt_Put( integerGuestReg64Offset(regno), e ) );
1098}
1099
florian55085f82012-11-21 00:36:55 +00001100static const HChar* nameIReg64 ( UInt regno )
sewardjb3a04292005-01-21 20:33:44 +00001101{
sewardj5b470602005-02-27 13:10:48 +00001102 return nameIReg( 8, regno, False );
sewardjb3a04292005-01-21 20:33:44 +00001103}
sewardj5b470602005-02-27 13:10:48 +00001104
1105
1106/* Simplistic functions to deal with the lower halves of integer
1107 registers as a straightforward bank of 16 32-bit regs. */
1108
1109static IRExpr* getIReg32 ( UInt regno )
1110{
sewardj9b769162014-07-24 12:42:03 +00001111 vassert(host_endness == VexEndnessLE);
sewardjef425db2010-01-11 10:46:18 +00001112 return unop(Iop_64to32,
1113 IRExpr_Get( integerGuestReg64Offset(regno),
1114 Ity_I64 ));
sewardj5b470602005-02-27 13:10:48 +00001115}
1116
1117static void putIReg32 ( UInt regno, IRExpr* e )
1118{
sewardjdd40fdf2006-12-24 02:20:24 +00001119 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
sewardj5b470602005-02-27 13:10:48 +00001120 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1121 unop(Iop_32Uto64,e) ) );
1122}
1123
florian55085f82012-11-21 00:36:55 +00001124static const HChar* nameIReg32 ( UInt regno )
sewardj5b470602005-02-27 13:10:48 +00001125{
1126 return nameIReg( 4, regno, False );
1127}
1128
1129
sewardja7ba8c42005-05-10 20:08:34 +00001130/* Simplistic functions to deal with the lower quarters of integer
1131 registers as a straightforward bank of 16 16-bit regs. */
1132
1133static IRExpr* getIReg16 ( UInt regno )
1134{
sewardj9b769162014-07-24 12:42:03 +00001135 vassert(host_endness == VexEndnessLE);
sewardja7ba8c42005-05-10 20:08:34 +00001136 return IRExpr_Get( integerGuestReg64Offset(regno),
1137 Ity_I16 );
1138}
1139
tom0fb4cbd2011-08-10 12:58:03 +00001140static void putIReg16 ( UInt regno, IRExpr* e )
1141{
1142 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I16);
1143 stmt( IRStmt_Put( integerGuestReg64Offset(regno),
1144 unop(Iop_16Uto64,e) ) );
1145}
1146
florian55085f82012-11-21 00:36:55 +00001147static const HChar* nameIReg16 ( UInt regno )
sewardja7ba8c42005-05-10 20:08:34 +00001148{
1149 return nameIReg( 2, regno, False );
1150}
1151
1152
sewardj5b470602005-02-27 13:10:48 +00001153/* Sometimes what we know is a 3-bit register number, a REX byte, and
1154 which field of the REX byte is to be used to extend to a 4-bit
1155 number. These functions cater for that situation.
1156*/
1157static IRExpr* getIReg64rexX ( Prefix pfx, UInt lo3bits )
1158{
1159 vassert(lo3bits < 8);
1160 vassert(IS_VALID_PFX(pfx));
1161 return getIReg64( lo3bits | (getRexX(pfx) << 3) );
1162}
1163
florian55085f82012-11-21 00:36:55 +00001164static const HChar* nameIReg64rexX ( Prefix pfx, UInt lo3bits )
sewardj5b470602005-02-27 13:10:48 +00001165{
1166 vassert(lo3bits < 8);
1167 vassert(IS_VALID_PFX(pfx));
1168 return nameIReg( 8, lo3bits | (getRexX(pfx) << 3), False );
1169}
1170
florian55085f82012-11-21 00:36:55 +00001171static const HChar* nameIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
sewardj5b470602005-02-27 13:10:48 +00001172{
1173 vassert(lo3bits < 8);
1174 vassert(IS_VALID_PFX(pfx));
1175 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1176 return nameIReg( sz, lo3bits | (getRexB(pfx) << 3),
sewardj397f88b2005-02-27 13:39:25 +00001177 toBool(sz==1 && !haveREX(pfx)) );
sewardj5b470602005-02-27 13:10:48 +00001178}
1179
1180static IRExpr* getIRegRexB ( Int sz, Prefix pfx, UInt lo3bits )
1181{
1182 vassert(lo3bits < 8);
1183 vassert(IS_VALID_PFX(pfx));
1184 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
sewardjef425db2010-01-11 10:46:18 +00001185 if (sz == 4) {
1186 sz = 8;
1187 return unop(Iop_64to32,
1188 IRExpr_Get(
1189 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
sewardj8cef87d2013-01-20 11:39:52 +00001190 False/*!irregular*/ ),
sewardjef425db2010-01-11 10:46:18 +00001191 szToITy(sz)
1192 )
1193 );
1194 } else {
1195 return IRExpr_Get(
1196 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
1197 toBool(sz==1 && !haveREX(pfx)) ),
1198 szToITy(sz)
1199 );
1200 }
sewardj5b470602005-02-27 13:10:48 +00001201}
1202
1203static void putIRegRexB ( Int sz, Prefix pfx, UInt lo3bits, IRExpr* e )
1204{
1205 vassert(lo3bits < 8);
1206 vassert(IS_VALID_PFX(pfx));
sewardj98e9f342005-07-23 12:07:37 +00001207 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
sewardjdd40fdf2006-12-24 02:20:24 +00001208 vassert(typeOfIRExpr(irsb->tyenv, e) == szToITy(sz));
sewardj5b470602005-02-27 13:10:48 +00001209 stmt( IRStmt_Put(
1210 offsetIReg( sz, lo3bits | (getRexB(pfx) << 3),
sewardj397f88b2005-02-27 13:39:25 +00001211 toBool(sz==1 && !haveREX(pfx)) ),
sewardj5b470602005-02-27 13:10:48 +00001212 sz==4 ? unop(Iop_32Uto64,e) : e
1213 ));
1214}
1215
1216
1217/* Functions for getting register numbers from modrm bytes and REX
1218 when we don't have to consider the complexities of integer subreg
1219 accesses.
1220*/
1221/* Extract the g reg field from a modRM byte, and augment it using the
1222 REX.R bit from the supplied REX byte. The R bit usually is
1223 associated with the g register field.
1224*/
1225static UInt gregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1226{
1227 Int reg = (Int)( (mod_reg_rm >> 3) & 7 );
1228 reg += (pfx & PFX_REXR) ? 8 : 0;
1229 return reg;
1230}
1231
1232/* Extract the e reg field from a modRM byte, and augment it using the
1233 REX.B bit from the supplied REX byte. The B bit usually is
1234 associated with the e register field (when modrm indicates e is a
1235 register, that is).
1236*/
1237static UInt eregOfRexRM ( Prefix pfx, UChar mod_reg_rm )
1238{
1239 Int rm;
1240 vassert(epartIsReg(mod_reg_rm));
1241 rm = (Int)(mod_reg_rm & 0x7);
1242 rm += (pfx & PFX_REXB) ? 8 : 0;
1243 return rm;
1244}
1245
1246
1247/* General functions for dealing with integer register access. */
1248
1249/* Produce the guest state offset for a reference to the 'g' register
1250 field in a modrm byte, taking into account REX (or its absence),
1251 and the size of the access.
1252*/
1253static UInt offsetIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1254{
1255 UInt reg;
sewardj9b769162014-07-24 12:42:03 +00001256 vassert(host_endness == VexEndnessLE);
sewardj5b470602005-02-27 13:10:48 +00001257 vassert(IS_VALID_PFX(pfx));
1258 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1259 reg = gregOfRexRM( pfx, mod_reg_rm );
sewardj397f88b2005-02-27 13:39:25 +00001260 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
sewardj5b470602005-02-27 13:10:48 +00001261}
1262
1263static
1264IRExpr* getIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
1265{
sewardjef425db2010-01-11 10:46:18 +00001266 if (sz == 4) {
1267 sz = 8;
1268 return unop(Iop_64to32,
1269 IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
1270 szToITy(sz) ));
1271 } else {
1272 return IRExpr_Get( offsetIRegG( sz, pfx, mod_reg_rm ),
1273 szToITy(sz) );
1274 }
sewardj5b470602005-02-27 13:10:48 +00001275}
1276
1277static
1278void putIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1279{
sewardjdd40fdf2006-12-24 02:20:24 +00001280 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
sewardj5b470602005-02-27 13:10:48 +00001281 if (sz == 4) {
1282 e = unop(Iop_32Uto64,e);
1283 }
1284 stmt( IRStmt_Put( offsetIRegG( sz, pfx, mod_reg_rm ), e ) );
1285}
1286
1287static
florian55085f82012-11-21 00:36:55 +00001288const HChar* nameIRegG ( Int sz, Prefix pfx, UChar mod_reg_rm )
sewardj5b470602005-02-27 13:10:48 +00001289{
1290 return nameIReg( sz, gregOfRexRM(pfx,mod_reg_rm),
sewardj397f88b2005-02-27 13:39:25 +00001291 toBool(sz==1 && !haveREX(pfx)) );
sewardj5b470602005-02-27 13:10:48 +00001292}
1293
1294
sewardjcc3d2192013-03-27 11:37:33 +00001295static
1296IRExpr* getIRegV ( Int sz, Prefix pfx )
1297{
1298 if (sz == 4) {
1299 sz = 8;
1300 return unop(Iop_64to32,
1301 IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ),
1302 szToITy(sz) ));
1303 } else {
1304 return IRExpr_Get( offsetIReg( sz, getVexNvvvv(pfx), False ),
1305 szToITy(sz) );
1306 }
1307}
1308
1309static
1310void putIRegV ( Int sz, Prefix pfx, IRExpr* e )
1311{
1312 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
1313 if (sz == 4) {
1314 e = unop(Iop_32Uto64,e);
1315 }
1316 stmt( IRStmt_Put( offsetIReg( sz, getVexNvvvv(pfx), False ), e ) );
1317}
1318
1319static
1320const HChar* nameIRegV ( Int sz, Prefix pfx )
1321{
1322 return nameIReg( sz, getVexNvvvv(pfx), False );
1323}
1324
1325
1326
sewardj5b470602005-02-27 13:10:48 +00001327/* Produce the guest state offset for a reference to the 'e' register
1328 field in a modrm byte, taking into account REX (or its absence),
1329 and the size of the access. eregOfRexRM will assert if mod_reg_rm
1330 denotes a memory access rather than a register access.
1331*/
1332static UInt offsetIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1333{
1334 UInt reg;
sewardj9b769162014-07-24 12:42:03 +00001335 vassert(host_endness == VexEndnessLE);
sewardj5b470602005-02-27 13:10:48 +00001336 vassert(IS_VALID_PFX(pfx));
1337 vassert(sz == 8 || sz == 4 || sz == 2 || sz == 1);
1338 reg = eregOfRexRM( pfx, mod_reg_rm );
sewardj397f88b2005-02-27 13:39:25 +00001339 return offsetIReg( sz, reg, toBool(sz == 1 && !haveREX(pfx)) );
sewardj5b470602005-02-27 13:10:48 +00001340}
1341
1342static
1343IRExpr* getIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
1344{
sewardjef425db2010-01-11 10:46:18 +00001345 if (sz == 4) {
1346 sz = 8;
1347 return unop(Iop_64to32,
1348 IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
1349 szToITy(sz) ));
1350 } else {
1351 return IRExpr_Get( offsetIRegE( sz, pfx, mod_reg_rm ),
1352 szToITy(sz) );
1353 }
sewardj5b470602005-02-27 13:10:48 +00001354}
1355
1356static
1357void putIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm, IRExpr* e )
1358{
sewardjdd40fdf2006-12-24 02:20:24 +00001359 vassert(typeOfIRExpr(irsb->tyenv,e) == szToITy(sz));
sewardj5b470602005-02-27 13:10:48 +00001360 if (sz == 4) {
1361 e = unop(Iop_32Uto64,e);
1362 }
1363 stmt( IRStmt_Put( offsetIRegE( sz, pfx, mod_reg_rm ), e ) );
1364}
1365
1366static
florian55085f82012-11-21 00:36:55 +00001367const HChar* nameIRegE ( Int sz, Prefix pfx, UChar mod_reg_rm )
sewardj5b470602005-02-27 13:10:48 +00001368{
1369 return nameIReg( sz, eregOfRexRM(pfx,mod_reg_rm),
sewardj397f88b2005-02-27 13:39:25 +00001370 toBool(sz==1 && !haveREX(pfx)) );
sewardj5b470602005-02-27 13:10:48 +00001371}
1372
1373
1374/*------------------------------------------------------------*/
1375/*--- For dealing with XMM registers ---*/
1376/*------------------------------------------------------------*/
sewardjecb94892005-01-21 14:26:37 +00001377
sewardjc4530ae2012-05-21 10:18:49 +00001378static Int ymmGuestRegOffset ( UInt ymmreg )
1379{
1380 switch (ymmreg) {
1381 case 0: return OFFB_YMM0;
1382 case 1: return OFFB_YMM1;
1383 case 2: return OFFB_YMM2;
1384 case 3: return OFFB_YMM3;
1385 case 4: return OFFB_YMM4;
1386 case 5: return OFFB_YMM5;
1387 case 6: return OFFB_YMM6;
1388 case 7: return OFFB_YMM7;
1389 case 8: return OFFB_YMM8;
1390 case 9: return OFFB_YMM9;
1391 case 10: return OFFB_YMM10;
1392 case 11: return OFFB_YMM11;
1393 case 12: return OFFB_YMM12;
1394 case 13: return OFFB_YMM13;
1395 case 14: return OFFB_YMM14;
1396 case 15: return OFFB_YMM15;
1397 default: vpanic("ymmGuestRegOffset(amd64)");
1398 }
1399}
sewardj1001dc42005-02-21 08:25:55 +00001400
1401static Int xmmGuestRegOffset ( UInt xmmreg )
1402{
sewardjc4530ae2012-05-21 10:18:49 +00001403 /* Correct for little-endian host only. */
sewardj9b769162014-07-24 12:42:03 +00001404 vassert(host_endness == VexEndnessLE);
sewardjc4530ae2012-05-21 10:18:49 +00001405 return ymmGuestRegOffset( xmmreg );
sewardj1001dc42005-02-21 08:25:55 +00001406}
1407
sewardj97628592005-05-10 22:42:54 +00001408/* Lanes of vector registers are always numbered from zero being the
1409 least significant lane (rightmost in the register). */
1410
1411static Int xmmGuestRegLane16offset ( UInt xmmreg, Int laneno )
1412{
1413 /* Correct for little-endian host only. */
sewardj9b769162014-07-24 12:42:03 +00001414 vassert(host_endness == VexEndnessLE);
sewardj97628592005-05-10 22:42:54 +00001415 vassert(laneno >= 0 && laneno < 8);
1416 return xmmGuestRegOffset( xmmreg ) + 2 * laneno;
1417}
sewardj8d965312005-02-25 02:48:47 +00001418
1419static Int xmmGuestRegLane32offset ( UInt xmmreg, Int laneno )
1420{
1421 /* Correct for little-endian host only. */
sewardj9b769162014-07-24 12:42:03 +00001422 vassert(host_endness == VexEndnessLE);
sewardj8d965312005-02-25 02:48:47 +00001423 vassert(laneno >= 0 && laneno < 4);
1424 return xmmGuestRegOffset( xmmreg ) + 4 * laneno;
1425}
sewardj1001dc42005-02-21 08:25:55 +00001426
1427static Int xmmGuestRegLane64offset ( UInt xmmreg, Int laneno )
1428{
1429 /* Correct for little-endian host only. */
sewardj9b769162014-07-24 12:42:03 +00001430 vassert(host_endness == VexEndnessLE);
sewardj1001dc42005-02-21 08:25:55 +00001431 vassert(laneno >= 0 && laneno < 2);
1432 return xmmGuestRegOffset( xmmreg ) + 8 * laneno;
1433}
1434
sewardjc4530ae2012-05-21 10:18:49 +00001435static Int ymmGuestRegLane128offset ( UInt ymmreg, Int laneno )
1436{
1437 /* Correct for little-endian host only. */
sewardj9b769162014-07-24 12:42:03 +00001438 vassert(host_endness == VexEndnessLE);
sewardjc4530ae2012-05-21 10:18:49 +00001439 vassert(laneno >= 0 && laneno < 2);
1440 return ymmGuestRegOffset( ymmreg ) + 16 * laneno;
1441}
sewardj1001dc42005-02-21 08:25:55 +00001442
sewardj66becf32012-06-18 23:15:16 +00001443static Int ymmGuestRegLane64offset ( UInt ymmreg, Int laneno )
1444{
1445 /* Correct for little-endian host only. */
sewardj9b769162014-07-24 12:42:03 +00001446 vassert(host_endness == VexEndnessLE);
sewardj66becf32012-06-18 23:15:16 +00001447 vassert(laneno >= 0 && laneno < 4);
1448 return ymmGuestRegOffset( ymmreg ) + 8 * laneno;
1449}
1450
1451static Int ymmGuestRegLane32offset ( UInt ymmreg, Int laneno )
1452{
1453 /* Correct for little-endian host only. */
sewardj9b769162014-07-24 12:42:03 +00001454 vassert(host_endness == VexEndnessLE);
sewardj66becf32012-06-18 23:15:16 +00001455 vassert(laneno >= 0 && laneno < 8);
1456 return ymmGuestRegOffset( ymmreg ) + 4 * laneno;
1457}
1458
sewardj1001dc42005-02-21 08:25:55 +00001459static IRExpr* getXMMReg ( UInt xmmreg )
1460{
1461 return IRExpr_Get( xmmGuestRegOffset(xmmreg), Ity_V128 );
1462}
1463
1464static IRExpr* getXMMRegLane64 ( UInt xmmreg, Int laneno )
1465{
1466 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_I64 );
1467}
1468
sewardj18303862005-02-21 12:36:54 +00001469static IRExpr* getXMMRegLane64F ( UInt xmmreg, Int laneno )
1470{
1471 return IRExpr_Get( xmmGuestRegLane64offset(xmmreg,laneno), Ity_F64 );
1472}
1473
sewardj8d965312005-02-25 02:48:47 +00001474static IRExpr* getXMMRegLane32 ( UInt xmmreg, Int laneno )
1475{
1476 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_I32 );
1477}
1478
sewardjc49ce232005-02-25 13:03:03 +00001479static IRExpr* getXMMRegLane32F ( UInt xmmreg, Int laneno )
1480{
1481 return IRExpr_Get( xmmGuestRegLane32offset(xmmreg,laneno), Ity_F32 );
1482}
sewardj1001dc42005-02-21 08:25:55 +00001483
de5a70f5c2010-04-01 23:08:59 +00001484static IRExpr* getXMMRegLane16 ( UInt xmmreg, Int laneno )
1485{
1486 return IRExpr_Get( xmmGuestRegLane16offset(xmmreg,laneno), Ity_I16 );
1487}
1488
sewardj1001dc42005-02-21 08:25:55 +00001489static void putXMMReg ( UInt xmmreg, IRExpr* e )
1490{
sewardjdd40fdf2006-12-24 02:20:24 +00001491 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
sewardj1001dc42005-02-21 08:25:55 +00001492 stmt( IRStmt_Put( xmmGuestRegOffset(xmmreg), e ) );
1493}
1494
1495static void putXMMRegLane64 ( UInt xmmreg, Int laneno, IRExpr* e )
1496{
sewardjdd40fdf2006-12-24 02:20:24 +00001497 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
sewardj1001dc42005-02-21 08:25:55 +00001498 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1499}
1500
sewardj1a01e652005-02-23 11:39:21 +00001501static void putXMMRegLane64F ( UInt xmmreg, Int laneno, IRExpr* e )
1502{
sewardjdd40fdf2006-12-24 02:20:24 +00001503 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
sewardj1a01e652005-02-23 11:39:21 +00001504 stmt( IRStmt_Put( xmmGuestRegLane64offset(xmmreg,laneno), e ) );
1505}
1506
sewardj8d965312005-02-25 02:48:47 +00001507static void putXMMRegLane32F ( UInt xmmreg, Int laneno, IRExpr* e )
1508{
sewardjdd40fdf2006-12-24 02:20:24 +00001509 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
sewardj8d965312005-02-25 02:48:47 +00001510 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1511}
1512
1513static void putXMMRegLane32 ( UInt xmmreg, Int laneno, IRExpr* e )
1514{
sewardjdd40fdf2006-12-24 02:20:24 +00001515 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
sewardj8d965312005-02-25 02:48:47 +00001516 stmt( IRStmt_Put( xmmGuestRegLane32offset(xmmreg,laneno), e ) );
1517}
1518
sewardjc4530ae2012-05-21 10:18:49 +00001519static IRExpr* getYMMReg ( UInt xmmreg )
1520{
1521 return IRExpr_Get( ymmGuestRegOffset(xmmreg), Ity_V256 );
1522}
1523
1524static IRExpr* getYMMRegLane128 ( UInt ymmreg, Int laneno )
1525{
1526 return IRExpr_Get( ymmGuestRegLane128offset(ymmreg,laneno), Ity_V128 );
1527}
1528
sewardj82096922012-06-24 14:57:59 +00001529static IRExpr* getYMMRegLane64 ( UInt ymmreg, Int laneno )
1530{
1531 return IRExpr_Get( ymmGuestRegLane64offset(ymmreg,laneno), Ity_I64 );
1532}
1533
sewardj8eb7ae82012-06-24 14:00:27 +00001534static IRExpr* getYMMRegLane32 ( UInt ymmreg, Int laneno )
1535{
1536 return IRExpr_Get( ymmGuestRegLane32offset(ymmreg,laneno), Ity_I32 );
1537}
1538
sewardjc4530ae2012-05-21 10:18:49 +00001539static void putYMMReg ( UInt ymmreg, IRExpr* e )
1540{
1541 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V256);
1542 stmt( IRStmt_Put( ymmGuestRegOffset(ymmreg), e ) );
1543}
1544
1545static void putYMMRegLane128 ( UInt ymmreg, Int laneno, IRExpr* e )
1546{
1547 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_V128);
1548 stmt( IRStmt_Put( ymmGuestRegLane128offset(ymmreg,laneno), e ) );
1549}
1550
sewardj66becf32012-06-18 23:15:16 +00001551static void putYMMRegLane64F ( UInt ymmreg, Int laneno, IRExpr* e )
1552{
1553 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F64);
1554 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
1555}
1556
sewardj82096922012-06-24 14:57:59 +00001557static void putYMMRegLane64 ( UInt ymmreg, Int laneno, IRExpr* e )
1558{
1559 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
1560 stmt( IRStmt_Put( ymmGuestRegLane64offset(ymmreg,laneno), e ) );
1561}
1562
sewardj66becf32012-06-18 23:15:16 +00001563static void putYMMRegLane32F ( UInt ymmreg, Int laneno, IRExpr* e )
1564{
1565 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_F32);
1566 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
1567}
1568
1569static void putYMMRegLane32 ( UInt ymmreg, Int laneno, IRExpr* e )
1570{
1571 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I32);
1572 stmt( IRStmt_Put( ymmGuestRegLane32offset(ymmreg,laneno), e ) );
1573}
1574
sewardj1001dc42005-02-21 08:25:55 +00001575static IRExpr* mkV128 ( UShort mask )
1576{
1577 return IRExpr_Const(IRConst_V128(mask));
1578}
sewardjdf0e0022005-01-25 15:48:43 +00001579
sewardjc4530ae2012-05-21 10:18:49 +00001580/* Write the low half of a YMM reg and zero out the upper half. */
1581static void putYMMRegLoAndZU ( UInt ymmreg, IRExpr* e )
1582{
1583 putYMMRegLane128( ymmreg, 0, e );
1584 putYMMRegLane128( ymmreg, 1, mkV128(0) );
1585}
1586
sewardje8f65252005-08-23 23:44:35 +00001587static IRExpr* mkAnd1 ( IRExpr* x, IRExpr* y )
1588{
sewardjdd40fdf2006-12-24 02:20:24 +00001589 vassert(typeOfIRExpr(irsb->tyenv,x) == Ity_I1);
1590 vassert(typeOfIRExpr(irsb->tyenv,y) == Ity_I1);
sewardje8f65252005-08-23 23:44:35 +00001591 return unop(Iop_64to1,
1592 binop(Iop_And64,
1593 unop(Iop_1Uto64,x),
1594 unop(Iop_1Uto64,y)));
1595}
1596
sewardje9d8a262009-07-01 08:06:34 +00001597/* Generate a compare-and-swap operation, operating on memory at
1598 'addr'. The expected value is 'expVal' and the new value is
1599 'newVal'. If the operation fails, then transfer control (with a
1600 no-redir jump (XXX no -- see comment at top of this file)) to
1601 'restart_point', which is presumably the address of the guest
1602 instruction again -- retrying, essentially. */
1603static void casLE ( IRExpr* addr, IRExpr* expVal, IRExpr* newVal,
1604 Addr64 restart_point )
1605{
1606 IRCAS* cas;
1607 IRType tyE = typeOfIRExpr(irsb->tyenv, expVal);
1608 IRType tyN = typeOfIRExpr(irsb->tyenv, newVal);
1609 IRTemp oldTmp = newTemp(tyE);
1610 IRTemp expTmp = newTemp(tyE);
1611 vassert(tyE == tyN);
1612 vassert(tyE == Ity_I64 || tyE == Ity_I32
1613 || tyE == Ity_I16 || tyE == Ity_I8);
1614 assign(expTmp, expVal);
1615 cas = mkIRCAS( IRTemp_INVALID, oldTmp, Iend_LE, addr,
1616 NULL, mkexpr(expTmp), NULL, newVal );
1617 stmt( IRStmt_CAS(cas) );
1618 stmt( IRStmt_Exit(
sewardj1fb8c922009-07-12 12:56:53 +00001619 binop( mkSizedOp(tyE,Iop_CasCmpNE8),
1620 mkexpr(oldTmp), mkexpr(expTmp) ),
sewardje9d8a262009-07-01 08:06:34 +00001621 Ijk_Boring, /*Ijk_NoRedir*/
sewardjc6f970f2012-04-02 21:54:49 +00001622 IRConst_U64( restart_point ),
1623 OFFB_RIP
sewardje9d8a262009-07-01 08:06:34 +00001624 ));
1625}
1626
sewardj5b470602005-02-27 13:10:48 +00001627
sewardj118b23e2005-01-29 02:14:44 +00001628/*------------------------------------------------------------*/
sewardje8f65252005-08-23 23:44:35 +00001629/*--- Helpers for %rflags. ---*/
sewardj118b23e2005-01-29 02:14:44 +00001630/*------------------------------------------------------------*/
1631
1632/* -------------- Evaluating the flags-thunk. -------------- */
1633
1634/* Build IR to calculate all the eflags from stored
1635 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1636 Ity_I64. */
1637static IRExpr* mk_amd64g_calculate_rflags_all ( void )
1638{
1639 IRExpr** args
1640 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1641 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1642 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1643 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1644 IRExpr* call
1645 = mkIRExprCCall(
1646 Ity_I64,
1647 0/*regparm*/,
1648 "amd64g_calculate_rflags_all", &amd64g_calculate_rflags_all,
1649 args
1650 );
1651 /* Exclude OP and NDEP from definedness checking. We're only
1652 interested in DEP1 and DEP2. */
1653 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1654 return call;
1655}
sewardj3ca55a12005-01-27 16:06:23 +00001656
1657/* Build IR to calculate some particular condition from stored
1658 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1659 Ity_Bit. */
1660static IRExpr* mk_amd64g_calculate_condition ( AMD64Condcode cond )
1661{
1662 IRExpr** args
1663 = mkIRExprVec_5( mkU64(cond),
1664 IRExpr_Get(OFFB_CC_OP, Ity_I64),
1665 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1666 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1667 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1668 IRExpr* call
1669 = mkIRExprCCall(
1670 Ity_I64,
1671 0/*regparm*/,
1672 "amd64g_calculate_condition", &amd64g_calculate_condition,
1673 args
1674 );
1675 /* Exclude the requested condition, OP and NDEP from definedness
1676 checking. We're only interested in DEP1 and DEP2. */
1677 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<1) | (1<<4);
sewardje58967e2005-04-27 11:50:56 +00001678 return unop(Iop_64to1, call);
sewardj3ca55a12005-01-27 16:06:23 +00001679}
sewardjdf0e0022005-01-25 15:48:43 +00001680
1681/* Build IR to calculate just the carry flag from stored
1682 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression :: Ity_I64. */
1683static IRExpr* mk_amd64g_calculate_rflags_c ( void )
1684{
1685 IRExpr** args
1686 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1687 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1688 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1689 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1690 IRExpr* call
1691 = mkIRExprCCall(
1692 Ity_I64,
1693 0/*regparm*/,
1694 "amd64g_calculate_rflags_c", &amd64g_calculate_rflags_c,
1695 args
1696 );
1697 /* Exclude OP and NDEP from definedness checking. We're only
1698 interested in DEP1 and DEP2. */
1699 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1700 return call;
1701}
1702
1703
1704/* -------------- Building the flags-thunk. -------------- */
1705
1706/* The machinery in this section builds the flag-thunk following a
1707 flag-setting operation. Hence the various setFlags_* functions.
1708*/
1709
1710static Bool isAddSub ( IROp op8 )
1711{
sewardj7a240552005-01-28 21:37:12 +00001712 return toBool(op8 == Iop_Add8 || op8 == Iop_Sub8);
sewardjdf0e0022005-01-25 15:48:43 +00001713}
1714
sewardj3ca55a12005-01-27 16:06:23 +00001715static Bool isLogic ( IROp op8 )
1716{
sewardj7a240552005-01-28 21:37:12 +00001717 return toBool(op8 == Iop_And8 || op8 == Iop_Or8 || op8 == Iop_Xor8);
sewardj3ca55a12005-01-27 16:06:23 +00001718}
sewardjdf0e0022005-01-25 15:48:43 +00001719
sewardj656b8f42013-03-27 22:15:36 +00001720/* U-widen 1/8/16/32/64 bit int expr to 64. */
sewardjdf0e0022005-01-25 15:48:43 +00001721static IRExpr* widenUto64 ( IRExpr* e )
1722{
sewardjdd40fdf2006-12-24 02:20:24 +00001723 switch (typeOfIRExpr(irsb->tyenv,e)) {
sewardjdf0e0022005-01-25 15:48:43 +00001724 case Ity_I64: return e;
1725 case Ity_I32: return unop(Iop_32Uto64, e);
sewardje58967e2005-04-27 11:50:56 +00001726 case Ity_I16: return unop(Iop_16Uto64, e);
1727 case Ity_I8: return unop(Iop_8Uto64, e);
sewardj656b8f42013-03-27 22:15:36 +00001728 case Ity_I1: return unop(Iop_1Uto64, e);
sewardj118b23e2005-01-29 02:14:44 +00001729 default: vpanic("widenUto64");
sewardjdf0e0022005-01-25 15:48:43 +00001730 }
1731}
1732
sewardj118b23e2005-01-29 02:14:44 +00001733/* S-widen 8/16/32/64 bit int expr to 32. */
1734static IRExpr* widenSto64 ( IRExpr* e )
1735{
sewardjdd40fdf2006-12-24 02:20:24 +00001736 switch (typeOfIRExpr(irsb->tyenv,e)) {
sewardj118b23e2005-01-29 02:14:44 +00001737 case Ity_I64: return e;
1738 case Ity_I32: return unop(Iop_32Sto64, e);
sewardje58967e2005-04-27 11:50:56 +00001739 case Ity_I16: return unop(Iop_16Sto64, e);
1740 case Ity_I8: return unop(Iop_8Sto64, e);
sewardj118b23e2005-01-29 02:14:44 +00001741 default: vpanic("widenSto64");
1742 }
1743}
sewardjdf0e0022005-01-25 15:48:43 +00001744
1745/* Narrow 8/16/32/64 bit int expr to 8/16/32/64. Clearly only some
1746 of these combinations make sense. */
1747static IRExpr* narrowTo ( IRType dst_ty, IRExpr* e )
1748{
sewardjdd40fdf2006-12-24 02:20:24 +00001749 IRType src_ty = typeOfIRExpr(irsb->tyenv,e);
sewardjdf0e0022005-01-25 15:48:43 +00001750 if (src_ty == dst_ty)
1751 return e;
1752 if (src_ty == Ity_I32 && dst_ty == Ity_I16)
1753 return unop(Iop_32to16, e);
1754 if (src_ty == Ity_I32 && dst_ty == Ity_I8)
1755 return unop(Iop_32to8, e);
sewardj118b23e2005-01-29 02:14:44 +00001756 if (src_ty == Ity_I64 && dst_ty == Ity_I32)
1757 return unop(Iop_64to32, e);
1758 if (src_ty == Ity_I64 && dst_ty == Ity_I16)
sewardje58967e2005-04-27 11:50:56 +00001759 return unop(Iop_64to16, e);
sewardj03b07cc2005-01-31 18:09:43 +00001760 if (src_ty == Ity_I64 && dst_ty == Ity_I8)
sewardje58967e2005-04-27 11:50:56 +00001761 return unop(Iop_64to8, e);
sewardjdf0e0022005-01-25 15:48:43 +00001762
1763 vex_printf("\nsrc, dst tys are: ");
1764 ppIRType(src_ty);
1765 vex_printf(", ");
1766 ppIRType(dst_ty);
1767 vex_printf("\n");
1768 vpanic("narrowTo(amd64)");
1769}
1770
1771
1772/* Set the flags thunk OP, DEP1 and DEP2 fields. The supplied op is
1773 auto-sized up to the real op. */
1774
1775static
1776void setFlags_DEP1_DEP2 ( IROp op8, IRTemp dep1, IRTemp dep2, IRType ty )
1777{
1778 Int ccOp = 0;
1779 switch (ty) {
1780 case Ity_I8: ccOp = 0; break;
1781 case Ity_I16: ccOp = 1; break;
1782 case Ity_I32: ccOp = 2; break;
1783 case Ity_I64: ccOp = 3; break;
1784 default: vassert(0);
1785 }
1786 switch (op8) {
1787 case Iop_Add8: ccOp += AMD64G_CC_OP_ADDB; break;
1788 case Iop_Sub8: ccOp += AMD64G_CC_OP_SUBB; break;
1789 default: ppIROp(op8);
1790 vpanic("setFlags_DEP1_DEP2(amd64)");
1791 }
1792 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1793 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1794 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(dep2))) );
1795}
1796
1797
1798/* Set the OP and DEP1 fields only, and write zero to DEP2. */
1799
1800static
1801void setFlags_DEP1 ( IROp op8, IRTemp dep1, IRType ty )
1802{
1803 Int ccOp = 0;
1804 switch (ty) {
1805 case Ity_I8: ccOp = 0; break;
1806 case Ity_I16: ccOp = 1; break;
1807 case Ity_I32: ccOp = 2; break;
1808 case Ity_I64: ccOp = 3; break;
1809 default: vassert(0);
1810 }
1811 switch (op8) {
1812 case Iop_Or8:
1813 case Iop_And8:
1814 case Iop_Xor8: ccOp += AMD64G_CC_OP_LOGICB; break;
1815 default: ppIROp(op8);
1816 vpanic("setFlags_DEP1(amd64)");
1817 }
1818 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
1819 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dep1))) );
1820 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1821}
1822
1823
sewardj118b23e2005-01-29 02:14:44 +00001824/* For shift operations, we put in the result and the undershifted
1825 result. Except if the shift amount is zero, the thunk is left
1826 unchanged. */
1827
1828static void setFlags_DEP1_DEP2_shift ( IROp op64,
1829 IRTemp res,
1830 IRTemp resUS,
1831 IRType ty,
1832 IRTemp guard )
1833{
1834 Int ccOp = 0;
1835 switch (ty) {
1836 case Ity_I8: ccOp = 0; break;
1837 case Ity_I16: ccOp = 1; break;
1838 case Ity_I32: ccOp = 2; break;
1839 case Ity_I64: ccOp = 3; break;
1840 default: vassert(0);
1841 }
1842
1843 vassert(guard);
1844
1845 /* Both kinds of right shifts are handled by the same thunk
1846 operation. */
1847 switch (op64) {
1848 case Iop_Shr64:
1849 case Iop_Sar64: ccOp += AMD64G_CC_OP_SHRB; break;
1850 case Iop_Shl64: ccOp += AMD64G_CC_OP_SHLB; break;
1851 default: ppIROp(op64);
1852 vpanic("setFlags_DEP1_DEP2_shift(amd64)");
1853 }
1854
sewardj009230b2013-01-26 11:47:55 +00001855 /* guard :: Ity_I8. We need to convert it to I1. */
1856 IRTemp guardB = newTemp(Ity_I1);
1857 assign( guardB, binop(Iop_CmpNE8, mkexpr(guard), mkU8(0)) );
1858
sewardj118b23e2005-01-29 02:14:44 +00001859 /* DEP1 contains the result, DEP2 contains the undershifted value. */
1860 stmt( IRStmt_Put( OFFB_CC_OP,
florian99dd03e2013-01-29 03:56:06 +00001861 IRExpr_ITE( mkexpr(guardB),
1862 mkU64(ccOp),
1863 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) ));
sewardj118b23e2005-01-29 02:14:44 +00001864 stmt( IRStmt_Put( OFFB_CC_DEP1,
florian99dd03e2013-01-29 03:56:06 +00001865 IRExpr_ITE( mkexpr(guardB),
1866 widenUto64(mkexpr(res)),
1867 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) ));
sewardj118b23e2005-01-29 02:14:44 +00001868 stmt( IRStmt_Put( OFFB_CC_DEP2,
florian99dd03e2013-01-29 03:56:06 +00001869 IRExpr_ITE( mkexpr(guardB),
1870 widenUto64(mkexpr(resUS)),
1871 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) ));
sewardj118b23e2005-01-29 02:14:44 +00001872}
sewardj354e5c62005-01-27 20:12:52 +00001873
1874
1875/* For the inc/dec case, we store in DEP1 the result value and in NDEP
1876 the former value of the carry flag, which unfortunately we have to
1877 compute. */
1878
1879static void setFlags_INC_DEC ( Bool inc, IRTemp res, IRType ty )
1880{
1881 Int ccOp = inc ? AMD64G_CC_OP_INCB : AMD64G_CC_OP_DECB;
1882
1883 switch (ty) {
1884 case Ity_I8: ccOp += 0; break;
1885 case Ity_I16: ccOp += 1; break;
1886 case Ity_I32: ccOp += 2; break;
1887 case Ity_I64: ccOp += 3; break;
1888 default: vassert(0);
1889 }
1890
1891 /* This has to come first, because calculating the C flag
1892 may require reading all four thunk fields. */
1893 stmt( IRStmt_Put( OFFB_CC_NDEP, mk_amd64g_calculate_rflags_c()) );
1894 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(ccOp)) );
sewardj478646f2008-05-01 20:13:04 +00001895 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(res))) );
sewardj354e5c62005-01-27 20:12:52 +00001896 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
1897}
1898
1899
sewardj32b2bbe2005-01-28 00:50:10 +00001900/* Multiplies are pretty much like add and sub: DEP1 and DEP2 hold the
1901 two arguments. */
1902
1903static
1904void setFlags_MUL ( IRType ty, IRTemp arg1, IRTemp arg2, ULong base_op )
1905{
1906 switch (ty) {
1907 case Ity_I8:
1908 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+0) ) );
1909 break;
1910 case Ity_I16:
1911 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+1) ) );
1912 break;
1913 case Ity_I32:
1914 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+2) ) );
1915 break;
1916 case Ity_I64:
1917 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(base_op+3) ) );
1918 break;
1919 default:
1920 vpanic("setFlags_MUL(amd64)");
1921 }
1922 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(arg1)) ));
1923 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(arg2)) ));
1924}
sewardj3ca55a12005-01-27 16:06:23 +00001925
1926
1927/* -------------- Condition codes. -------------- */
1928
1929/* Condition codes, using the AMD encoding. */
1930
florian55085f82012-11-21 00:36:55 +00001931static const HChar* name_AMD64Condcode ( AMD64Condcode cond )
sewardj3ca55a12005-01-27 16:06:23 +00001932{
1933 switch (cond) {
1934 case AMD64CondO: return "o";
1935 case AMD64CondNO: return "no";
1936 case AMD64CondB: return "b";
sewardje941eea2005-01-30 19:52:28 +00001937 case AMD64CondNB: return "ae"; /*"nb";*/
1938 case AMD64CondZ: return "e"; /*"z";*/
1939 case AMD64CondNZ: return "ne"; /*"nz";*/
sewardj3ca55a12005-01-27 16:06:23 +00001940 case AMD64CondBE: return "be";
sewardje941eea2005-01-30 19:52:28 +00001941 case AMD64CondNBE: return "a"; /*"nbe";*/
sewardj3ca55a12005-01-27 16:06:23 +00001942 case AMD64CondS: return "s";
1943 case AMD64CondNS: return "ns";
1944 case AMD64CondP: return "p";
1945 case AMD64CondNP: return "np";
1946 case AMD64CondL: return "l";
sewardje941eea2005-01-30 19:52:28 +00001947 case AMD64CondNL: return "ge"; /*"nl";*/
sewardj3ca55a12005-01-27 16:06:23 +00001948 case AMD64CondLE: return "le";
sewardje941eea2005-01-30 19:52:28 +00001949 case AMD64CondNLE: return "g"; /*"nle";*/
sewardj3ca55a12005-01-27 16:06:23 +00001950 case AMD64CondAlways: return "ALWAYS";
1951 default: vpanic("name_AMD64Condcode");
1952 }
1953}
1954
sewardj1389d4d2005-01-28 13:46:29 +00001955static
1956AMD64Condcode positiveIse_AMD64Condcode ( AMD64Condcode cond,
1957 /*OUT*/Bool* needInvert )
1958{
1959 vassert(cond >= AMD64CondO && cond <= AMD64CondNLE);
1960 if (cond & 1) {
1961 *needInvert = True;
1962 return cond-1;
1963 } else {
1964 *needInvert = False;
1965 return cond;
1966 }
1967}
sewardjdf0e0022005-01-25 15:48:43 +00001968
1969
1970/* -------------- Helpers for ADD/SUB with carry. -------------- */
1971
1972/* Given ta1, ta2 and tres, compute tres = ADC(ta1,ta2) and set flags
1973 appropriately.
sewardje9d8a262009-07-01 08:06:34 +00001974
1975 Optionally, generate a store for the 'tres' value. This can either
1976 be a normal store, or it can be a cas-with-possible-failure style
1977 store:
1978
1979 if taddr is IRTemp_INVALID, then no store is generated.
1980
1981 if taddr is not IRTemp_INVALID, then a store (using taddr as
1982 the address) is generated:
1983
1984 if texpVal is IRTemp_INVALID then a normal store is
1985 generated, and restart_point must be zero (it is irrelevant).
1986
1987 if texpVal is not IRTemp_INVALID then a cas-style store is
1988 generated. texpVal is the expected value, restart_point
1989 is the restart point if the store fails, and texpVal must
1990 have the same type as tres.
1991
sewardjdf0e0022005-01-25 15:48:43 +00001992*/
1993static void helper_ADC ( Int sz,
sewardje9d8a262009-07-01 08:06:34 +00001994 IRTemp tres, IRTemp ta1, IRTemp ta2,
1995 /* info about optional store: */
1996 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
sewardjdf0e0022005-01-25 15:48:43 +00001997{
1998 UInt thunkOp;
1999 IRType ty = szToITy(sz);
2000 IRTemp oldc = newTemp(Ity_I64);
2001 IRTemp oldcn = newTemp(ty);
2002 IROp plus = mkSizedOp(ty, Iop_Add8);
2003 IROp xor = mkSizedOp(ty, Iop_Xor8);
2004
sewardje9d8a262009-07-01 08:06:34 +00002005 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2006
sewardjdf0e0022005-01-25 15:48:43 +00002007 switch (sz) {
2008 case 8: thunkOp = AMD64G_CC_OP_ADCQ; break;
2009 case 4: thunkOp = AMD64G_CC_OP_ADCL; break;
2010 case 2: thunkOp = AMD64G_CC_OP_ADCW; break;
2011 case 1: thunkOp = AMD64G_CC_OP_ADCB; break;
2012 default: vassert(0);
2013 }
2014
2015 /* oldc = old carry flag, 0 or 1 */
2016 assign( oldc, binop(Iop_And64,
2017 mk_amd64g_calculate_rflags_c(),
2018 mkU64(1)) );
2019
2020 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
2021
2022 assign( tres, binop(plus,
2023 binop(plus,mkexpr(ta1),mkexpr(ta2)),
2024 mkexpr(oldcn)) );
2025
sewardje9d8a262009-07-01 08:06:34 +00002026 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2027 start of this function. */
2028 if (taddr != IRTemp_INVALID) {
2029 if (texpVal == IRTemp_INVALID) {
2030 vassert(restart_point == 0);
2031 storeLE( mkexpr(taddr), mkexpr(tres) );
2032 } else {
2033 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
2034 /* .. and hence 'texpVal' has the same type as 'tres'. */
2035 casLE( mkexpr(taddr),
2036 mkexpr(texpVal), mkexpr(tres), restart_point );
2037 }
2038 }
2039
sewardjdf0e0022005-01-25 15:48:43 +00002040 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
sewardj820611e2005-08-24 10:56:01 +00002041 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1)) ));
2042 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2043 mkexpr(oldcn)) )) );
sewardjdf0e0022005-01-25 15:48:43 +00002044 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
2045}
2046
2047
2048/* Given ta1, ta2 and tres, compute tres = SBB(ta1,ta2) and set flags
sewardje9d8a262009-07-01 08:06:34 +00002049 appropriately. As with helper_ADC, possibly generate a store of
2050 the result -- see comments on helper_ADC for details.
sewardjdf0e0022005-01-25 15:48:43 +00002051*/
2052static void helper_SBB ( Int sz,
sewardje9d8a262009-07-01 08:06:34 +00002053 IRTemp tres, IRTemp ta1, IRTemp ta2,
2054 /* info about optional store: */
2055 IRTemp taddr, IRTemp texpVal, Addr32 restart_point )
sewardjdf0e0022005-01-25 15:48:43 +00002056{
2057 UInt thunkOp;
2058 IRType ty = szToITy(sz);
2059 IRTemp oldc = newTemp(Ity_I64);
2060 IRTemp oldcn = newTemp(ty);
2061 IROp minus = mkSizedOp(ty, Iop_Sub8);
2062 IROp xor = mkSizedOp(ty, Iop_Xor8);
2063
sewardje9d8a262009-07-01 08:06:34 +00002064 vassert(typeOfIRTemp(irsb->tyenv, tres) == ty);
2065
sewardjdf0e0022005-01-25 15:48:43 +00002066 switch (sz) {
2067 case 8: thunkOp = AMD64G_CC_OP_SBBQ; break;
2068 case 4: thunkOp = AMD64G_CC_OP_SBBL; break;
2069 case 2: thunkOp = AMD64G_CC_OP_SBBW; break;
2070 case 1: thunkOp = AMD64G_CC_OP_SBBB; break;
2071 default: vassert(0);
2072 }
2073
2074 /* oldc = old carry flag, 0 or 1 */
2075 assign( oldc, binop(Iop_And64,
2076 mk_amd64g_calculate_rflags_c(),
2077 mkU64(1)) );
2078
2079 assign( oldcn, narrowTo(ty, mkexpr(oldc)) );
2080
2081 assign( tres, binop(minus,
2082 binop(minus,mkexpr(ta1),mkexpr(ta2)),
2083 mkexpr(oldcn)) );
2084
sewardje9d8a262009-07-01 08:06:34 +00002085 /* Possibly generate a store of 'tres' to 'taddr'. See comment at
2086 start of this function. */
2087 if (taddr != IRTemp_INVALID) {
2088 if (texpVal == IRTemp_INVALID) {
2089 vassert(restart_point == 0);
2090 storeLE( mkexpr(taddr), mkexpr(tres) );
2091 } else {
2092 vassert(typeOfIRTemp(irsb->tyenv, texpVal) == ty);
2093 /* .. and hence 'texpVal' has the same type as 'tres'. */
2094 casLE( mkexpr(taddr),
2095 mkexpr(texpVal), mkexpr(tres), restart_point );
2096 }
2097 }
2098
sewardjdf0e0022005-01-25 15:48:43 +00002099 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(thunkOp) ) );
sewardj820611e2005-08-24 10:56:01 +00002100 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(ta1) )) );
2101 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(binop(xor, mkexpr(ta2),
2102 mkexpr(oldcn)) )) );
sewardjdf0e0022005-01-25 15:48:43 +00002103 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(oldc) ) );
2104}
2105
2106
sewardj3ca55a12005-01-27 16:06:23 +00002107/* -------------- Helpers for disassembly printing. -------------- */
2108
florian55085f82012-11-21 00:36:55 +00002109static const HChar* nameGrp1 ( Int opc_aux )
sewardj3ca55a12005-01-27 16:06:23 +00002110{
florian55085f82012-11-21 00:36:55 +00002111 static const HChar* grp1_names[8]
sewardj3ca55a12005-01-27 16:06:23 +00002112 = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" };
2113 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp1(amd64)");
2114 return grp1_names[opc_aux];
2115}
2116
florian55085f82012-11-21 00:36:55 +00002117static const HChar* nameGrp2 ( Int opc_aux )
sewardj118b23e2005-01-29 02:14:44 +00002118{
florian55085f82012-11-21 00:36:55 +00002119 static const HChar* grp2_names[8]
sewardj118b23e2005-01-29 02:14:44 +00002120 = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" };
sewardje941eea2005-01-30 19:52:28 +00002121 if (opc_aux < 0 || opc_aux > 7) vpanic("nameGrp2(amd64)");
sewardj118b23e2005-01-29 02:14:44 +00002122 return grp2_names[opc_aux];
2123}
2124
florian55085f82012-11-21 00:36:55 +00002125static const HChar* nameGrp4 ( Int opc_aux )
sewardj03b07cc2005-01-31 18:09:43 +00002126{
florian55085f82012-11-21 00:36:55 +00002127 static const HChar* grp4_names[8]
sewardj03b07cc2005-01-31 18:09:43 +00002128 = { "inc", "dec", "???", "???", "???", "???", "???", "???" };
2129 if (opc_aux < 0 || opc_aux > 1) vpanic("nameGrp4(amd64)");
2130 return grp4_names[opc_aux];
2131}
sewardj354e5c62005-01-27 20:12:52 +00002132
florian55085f82012-11-21 00:36:55 +00002133static const HChar* nameGrp5 ( Int opc_aux )
sewardj354e5c62005-01-27 20:12:52 +00002134{
florian55085f82012-11-21 00:36:55 +00002135 static const HChar* grp5_names[8]
sewardj354e5c62005-01-27 20:12:52 +00002136 = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" };
2137 if (opc_aux < 0 || opc_aux > 6) vpanic("nameGrp5(amd64)");
2138 return grp5_names[opc_aux];
2139}
2140
florian55085f82012-11-21 00:36:55 +00002141static const HChar* nameGrp8 ( Int opc_aux )
sewardj1d511802005-03-27 17:59:45 +00002142{
florian55085f82012-11-21 00:36:55 +00002143 static const HChar* grp8_names[8]
sewardj1d511802005-03-27 17:59:45 +00002144 = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" };
2145 if (opc_aux < 4 || opc_aux > 7) vpanic("nameGrp8(amd64)");
2146 return grp8_names[opc_aux];
2147}
2148
florian55085f82012-11-21 00:36:55 +00002149//.. static const HChar* nameSReg ( UInt sreg )
sewardjd20c8852005-01-20 20:04:07 +00002150//.. {
2151//.. switch (sreg) {
2152//.. case R_ES: return "%es";
2153//.. case R_CS: return "%cs";
2154//.. case R_SS: return "%ss";
2155//.. case R_DS: return "%ds";
2156//.. case R_FS: return "%fs";
2157//.. case R_GS: return "%gs";
2158//.. default: vpanic("nameSReg(x86)");
2159//.. }
2160//.. }
sewardj8711f662005-05-09 17:52:56 +00002161
florian55085f82012-11-21 00:36:55 +00002162static const HChar* nameMMXReg ( Int mmxreg )
sewardj8711f662005-05-09 17:52:56 +00002163{
florian55085f82012-11-21 00:36:55 +00002164 static const HChar* mmx_names[8]
sewardj8711f662005-05-09 17:52:56 +00002165 = { "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7" };
2166 if (mmxreg < 0 || mmxreg > 7) vpanic("nameMMXReg(amd64,guest)");
2167 return mmx_names[mmxreg];
2168}
sewardj1001dc42005-02-21 08:25:55 +00002169
florian55085f82012-11-21 00:36:55 +00002170static const HChar* nameXMMReg ( Int xmmreg )
sewardj1001dc42005-02-21 08:25:55 +00002171{
florian55085f82012-11-21 00:36:55 +00002172 static const HChar* xmm_names[16]
sewardj1001dc42005-02-21 08:25:55 +00002173 = { "%xmm0", "%xmm1", "%xmm2", "%xmm3",
2174 "%xmm4", "%xmm5", "%xmm6", "%xmm7",
2175 "%xmm8", "%xmm9", "%xmm10", "%xmm11",
2176 "%xmm12", "%xmm13", "%xmm14", "%xmm15" };
2177 if (xmmreg < 0 || xmmreg > 15) vpanic("nameXMMReg(amd64)");
2178 return xmm_names[xmmreg];
2179}
2180
florian55085f82012-11-21 00:36:55 +00002181static const HChar* nameMMXGran ( Int gran )
sewardj8711f662005-05-09 17:52:56 +00002182{
2183 switch (gran) {
2184 case 0: return "b";
2185 case 1: return "w";
2186 case 2: return "d";
2187 case 3: return "q";
2188 default: vpanic("nameMMXGran(amd64,guest)");
2189 }
2190}
sewardjdf0e0022005-01-25 15:48:43 +00002191
florianf8883992013-01-15 03:25:17 +00002192static HChar nameISize ( Int size )
sewardjdf0e0022005-01-25 15:48:43 +00002193{
2194 switch (size) {
2195 case 8: return 'q';
2196 case 4: return 'l';
2197 case 2: return 'w';
2198 case 1: return 'b';
2199 default: vpanic("nameISize(amd64)");
2200 }
2201}
2202
florian55085f82012-11-21 00:36:55 +00002203static const HChar* nameYMMReg ( Int ymmreg )
sewardjc4530ae2012-05-21 10:18:49 +00002204{
florian55085f82012-11-21 00:36:55 +00002205 static const HChar* ymm_names[16]
sewardjc4530ae2012-05-21 10:18:49 +00002206 = { "%ymm0", "%ymm1", "%ymm2", "%ymm3",
2207 "%ymm4", "%ymm5", "%ymm6", "%ymm7",
2208 "%ymm8", "%ymm9", "%ymm10", "%ymm11",
2209 "%ymm12", "%ymm13", "%ymm14", "%ymm15" };
2210 if (ymmreg < 0 || ymmreg > 15) vpanic("nameYMMReg(amd64)");
2211 return ymm_names[ymmreg];
2212}
2213
sewardjdf0e0022005-01-25 15:48:43 +00002214
2215/*------------------------------------------------------------*/
2216/*--- JMP helpers ---*/
2217/*------------------------------------------------------------*/
2218
sewardjc6f970f2012-04-02 21:54:49 +00002219static void jmp_lit( /*MOD*/DisResult* dres,
2220 IRJumpKind kind, Addr64 d64 )
sewardjdf0e0022005-01-25 15:48:43 +00002221{
sewardjc6f970f2012-04-02 21:54:49 +00002222 vassert(dres->whatNext == Dis_Continue);
2223 vassert(dres->len == 0);
2224 vassert(dres->continueAt == 0);
2225 vassert(dres->jk_StopHere == Ijk_INVALID);
2226 dres->whatNext = Dis_StopHere;
2227 dres->jk_StopHere = kind;
2228 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) );
sewardjdf0e0022005-01-25 15:48:43 +00002229}
2230
sewardjc6f970f2012-04-02 21:54:49 +00002231static void jmp_treg( /*MOD*/DisResult* dres,
2232 IRJumpKind kind, IRTemp t )
sewardj2f959cc2005-01-26 01:19:35 +00002233{
sewardjc6f970f2012-04-02 21:54:49 +00002234 vassert(dres->whatNext == Dis_Continue);
2235 vassert(dres->len == 0);
2236 vassert(dres->continueAt == 0);
2237 vassert(dres->jk_StopHere == Ijk_INVALID);
2238 dres->whatNext = Dis_StopHere;
2239 dres->jk_StopHere = kind;
2240 stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) );
sewardj2f959cc2005-01-26 01:19:35 +00002241}
2242
sewardj1389d4d2005-01-28 13:46:29 +00002243static
sewardjc6f970f2012-04-02 21:54:49 +00002244void jcc_01 ( /*MOD*/DisResult* dres,
2245 AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
sewardj1389d4d2005-01-28 13:46:29 +00002246{
2247 Bool invert;
2248 AMD64Condcode condPos;
sewardjc6f970f2012-04-02 21:54:49 +00002249 vassert(dres->whatNext == Dis_Continue);
2250 vassert(dres->len == 0);
2251 vassert(dres->continueAt == 0);
2252 vassert(dres->jk_StopHere == Ijk_INVALID);
2253 dres->whatNext = Dis_StopHere;
2254 dres->jk_StopHere = Ijk_Boring;
sewardj1389d4d2005-01-28 13:46:29 +00002255 condPos = positiveIse_AMD64Condcode ( cond, &invert );
2256 if (invert) {
2257 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2258 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +00002259 IRConst_U64(d64_false),
2260 OFFB_RIP ) );
2261 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) );
sewardj1389d4d2005-01-28 13:46:29 +00002262 } else {
2263 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
2264 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +00002265 IRConst_U64(d64_true),
2266 OFFB_RIP ) );
2267 stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) );
sewardj1389d4d2005-01-28 13:46:29 +00002268 }
2269}
sewardjb3a04292005-01-21 20:33:44 +00002270
sewardj478646f2008-05-01 20:13:04 +00002271/* Let new_rsp be the %rsp value after a call/return. Let nia be the
2272 guest address of the next instruction to be executed.
2273
2274 This function generates an AbiHint to say that -128(%rsp)
2275 .. -1(%rsp) should now be regarded as uninitialised.
sewardj5a9ffab2005-05-12 17:55:01 +00002276*/
sewardjaca070a2006-10-17 00:28:22 +00002277static
sewardj478646f2008-05-01 20:13:04 +00002278void make_redzone_AbiHint ( VexAbiInfo* vbi,
florian55085f82012-11-21 00:36:55 +00002279 IRTemp new_rsp, IRTemp nia, const HChar* who )
sewardj5a9ffab2005-05-12 17:55:01 +00002280{
sewardjdd40fdf2006-12-24 02:20:24 +00002281 Int szB = vbi->guest_stack_redzone_size;
sewardjaca070a2006-10-17 00:28:22 +00002282 vassert(szB >= 0);
2283
2284 /* A bit of a kludge. Currently the only AbI we've guested AMD64
2285 for is ELF. So just check it's the expected 128 value
2286 (paranoia). */
2287 vassert(szB == 128);
2288
sewardj5a9ffab2005-05-12 17:55:01 +00002289 if (0) vex_printf("AbiHint: %s\n", who);
sewardjdd40fdf2006-12-24 02:20:24 +00002290 vassert(typeOfIRTemp(irsb->tyenv, new_rsp) == Ity_I64);
sewardj478646f2008-05-01 20:13:04 +00002291 vassert(typeOfIRTemp(irsb->tyenv, nia) == Ity_I64);
sewardjaca070a2006-10-17 00:28:22 +00002292 if (szB > 0)
2293 stmt( IRStmt_AbiHint(
2294 binop(Iop_Sub64, mkexpr(new_rsp), mkU64(szB)),
sewardj478646f2008-05-01 20:13:04 +00002295 szB,
2296 mkexpr(nia)
sewardjaca070a2006-10-17 00:28:22 +00002297 ));
sewardj5a9ffab2005-05-12 17:55:01 +00002298}
2299
sewardjb3a04292005-01-21 20:33:44 +00002300
2301/*------------------------------------------------------------*/
2302/*--- Disassembling addressing modes ---*/
2303/*------------------------------------------------------------*/
2304
2305static
florian55085f82012-11-21 00:36:55 +00002306const HChar* segRegTxt ( Prefix pfx )
sewardjb3a04292005-01-21 20:33:44 +00002307{
2308 if (pfx & PFX_CS) return "%cs:";
2309 if (pfx & PFX_DS) return "%ds:";
2310 if (pfx & PFX_ES) return "%es:";
2311 if (pfx & PFX_FS) return "%fs:";
2312 if (pfx & PFX_GS) return "%gs:";
2313 if (pfx & PFX_SS) return "%ss:";
2314 return ""; /* no override */
2315}
2316
2317
2318/* 'virtual' is an IRExpr* holding a virtual address. Convert it to a
2319 linear address by adding any required segment override as indicated
sewardj42561ef2005-11-04 14:18:31 +00002320 by sorb, and also dealing with any address size override
2321 present. */
sewardjb3a04292005-01-21 20:33:44 +00002322static
sewardj2e28ac42008-12-04 00:05:12 +00002323IRExpr* handleAddrOverrides ( VexAbiInfo* vbi,
2324 Prefix pfx, IRExpr* virtual )
sewardjb3a04292005-01-21 20:33:44 +00002325{
sewardj42561ef2005-11-04 14:18:31 +00002326 /* --- segment overrides --- */
sewardja6b93d12005-02-17 09:28:28 +00002327 if (pfx & PFX_FS) {
sewardj2e28ac42008-12-04 00:05:12 +00002328 if (vbi->guest_amd64_assume_fs_is_zero) {
2329 /* Note that this is a linux-kernel specific hack that relies
2330 on the assumption that %fs is always zero. */
2331 /* return virtual + guest_FS_ZERO. */
2332 virtual = binop(Iop_Add64, virtual,
2333 IRExpr_Get(OFFB_FS_ZERO, Ity_I64));
2334 } else {
2335 unimplemented("amd64 %fs segment override");
2336 }
sewardja6b93d12005-02-17 09:28:28 +00002337 }
sewardjb3a04292005-01-21 20:33:44 +00002338
sewardja6b93d12005-02-17 09:28:28 +00002339 if (pfx & PFX_GS) {
sewardj2e28ac42008-12-04 00:05:12 +00002340 if (vbi->guest_amd64_assume_gs_is_0x60) {
2341 /* Note that this is a darwin-kernel specific hack that relies
2342 on the assumption that %gs is always 0x60. */
2343 /* return virtual + guest_GS_0x60. */
2344 virtual = binop(Iop_Add64, virtual,
2345 IRExpr_Get(OFFB_GS_0x60, Ity_I64));
2346 } else {
2347 unimplemented("amd64 %gs segment override");
2348 }
sewardja6b93d12005-02-17 09:28:28 +00002349 }
2350
2351 /* cs, ds, es and ss are simply ignored in 64-bit mode. */
sewardj42561ef2005-11-04 14:18:31 +00002352
2353 /* --- address size override --- */
2354 if (haveASO(pfx))
2355 virtual = unop(Iop_32Uto64, unop(Iop_64to32, virtual));
2356
sewardja6b93d12005-02-17 09:28:28 +00002357 return virtual;
sewardjb3a04292005-01-21 20:33:44 +00002358}
sewardja6b93d12005-02-17 09:28:28 +00002359
sewardjd20c8852005-01-20 20:04:07 +00002360//.. {
2361//.. Int sreg;
2362//.. IRType hWordTy;
2363//.. IRTemp ldt_ptr, gdt_ptr, seg_selector, r64;
2364//..
2365//.. if (sorb == 0)
2366//.. /* the common case - no override */
2367//.. return virtual;
2368//..
2369//.. switch (sorb) {
2370//.. case 0x3E: sreg = R_DS; break;
2371//.. case 0x26: sreg = R_ES; break;
2372//.. case 0x64: sreg = R_FS; break;
2373//.. case 0x65: sreg = R_GS; break;
sewardj42561ef2005-11-04 14:18:31 +00002374//.. default: vpanic("handleAddrOverrides(x86,guest)");
sewardjd20c8852005-01-20 20:04:07 +00002375//.. }
2376//..
2377//.. hWordTy = sizeof(HWord)==4 ? Ity_I32 : Ity_I64;
2378//..
2379//.. seg_selector = newTemp(Ity_I32);
2380//.. ldt_ptr = newTemp(hWordTy);
2381//.. gdt_ptr = newTemp(hWordTy);
2382//.. r64 = newTemp(Ity_I64);
2383//..
2384//.. assign( seg_selector, unop(Iop_16Uto32, getSReg(sreg)) );
2385//.. assign( ldt_ptr, IRExpr_Get( OFFB_LDT, hWordTy ));
2386//.. assign( gdt_ptr, IRExpr_Get( OFFB_GDT, hWordTy ));
2387//..
2388//.. /*
2389//.. Call this to do the translation and limit checks:
2390//.. ULong x86g_use_seg_selector ( HWord ldt, HWord gdt,
2391//.. UInt seg_selector, UInt virtual_addr )
2392//.. */
2393//.. assign(
2394//.. r64,
2395//.. mkIRExprCCall(
2396//.. Ity_I64,
2397//.. 0/*regparms*/,
2398//.. "x86g_use_seg_selector",
2399//.. &x86g_use_seg_selector,
2400//.. mkIRExprVec_4( mkexpr(ldt_ptr), mkexpr(gdt_ptr),
2401//.. mkexpr(seg_selector), virtual)
2402//.. )
2403//.. );
2404//..
2405//.. /* If the high 32 of the result are non-zero, there was a
2406//.. failure in address translation. In which case, make a
2407//.. quick exit.
2408//.. */
2409//.. stmt(
2410//.. IRStmt_Exit(
2411//.. binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
2412//.. Ijk_MapFail,
2413//.. IRConst_U32( guest_eip_curr_instr )
2414//.. )
2415//.. );
2416//..
2417//.. /* otherwise, here's the translated result. */
2418//.. return unop(Iop_64to32, mkexpr(r64));
2419//.. }
sewardjb3a04292005-01-21 20:33:44 +00002420
2421
2422/* Generate IR to calculate an address indicated by a ModRM and
2423 following SIB bytes. The expression, and the number of bytes in
2424 the address mode, are returned (the latter in *len). Note that
2425 this fn should not be called if the R/M part of the address denotes
2426 a register instead of memory. If print_codegen is true, text of
2427 the addressing mode is placed in buf.
2428
2429 The computed address is stored in a new tempreg, and the
sewardje1698952005-02-08 15:02:39 +00002430 identity of the tempreg is returned.
2431
2432 extra_bytes holds the number of bytes after the amode, as supplied
2433 by the caller. This is needed to make sense of %rip-relative
2434 addresses. Note that the value that *len is set to is only the
2435 length of the amode itself and does not include the value supplied
sewardj09717342005-05-05 21:34:02 +00002436 in extra_bytes.
sewardje1698952005-02-08 15:02:39 +00002437 */
sewardjb3a04292005-01-21 20:33:44 +00002438
2439static IRTemp disAMode_copy2tmp ( IRExpr* addr64 )
2440{
2441 IRTemp tmp = newTemp(Ity_I64);
2442 assign( tmp, addr64 );
2443 return tmp;
2444}
2445
2446static
sewardj2e28ac42008-12-04 00:05:12 +00002447IRTemp disAMode ( /*OUT*/Int* len,
2448 VexAbiInfo* vbi, Prefix pfx, Long delta,
2449 /*OUT*/HChar* buf, Int extra_bytes )
sewardjb3a04292005-01-21 20:33:44 +00002450{
sewardj8c332e22005-01-28 01:36:56 +00002451 UChar mod_reg_rm = getUChar(delta);
sewardjb3a04292005-01-21 20:33:44 +00002452 delta++;
2453
2454 buf[0] = (UChar)0;
sewardje1698952005-02-08 15:02:39 +00002455 vassert(extra_bytes >= 0 && extra_bytes < 10);
sewardjb3a04292005-01-21 20:33:44 +00002456
2457 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2458 jump table seems a bit excessive.
2459 */
sewardj7a240552005-01-28 21:37:12 +00002460 mod_reg_rm &= 0xC7; /* is now XX000YYY */
sewardj1027dc22005-02-26 01:55:02 +00002461 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2462 /* is now XX0XXYYY */
sewardj7a240552005-01-28 21:37:12 +00002463 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
sewardjb3a04292005-01-21 20:33:44 +00002464 switch (mod_reg_rm) {
2465
2466 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2467 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2468 */
2469 case 0x00: case 0x01: case 0x02: case 0x03:
2470 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
sewardj1027dc22005-02-26 01:55:02 +00002471 { UChar rm = toUChar(mod_reg_rm & 7);
sewardjc4356f02007-11-09 21:15:04 +00002472 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
sewardjb3a04292005-01-21 20:33:44 +00002473 *len = 1;
2474 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002475 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,rm)));
sewardjb3a04292005-01-21 20:33:44 +00002476 }
2477
2478 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2479 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2480 */
2481 case 0x08: case 0x09: case 0x0A: case 0x0B:
2482 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
sewardj1027dc22005-02-26 01:55:02 +00002483 { UChar rm = toUChar(mod_reg_rm & 7);
sewardj8c332e22005-01-28 01:36:56 +00002484 Long d = getSDisp8(delta);
sewardj7eaa7cf2005-01-31 18:55:22 +00002485 if (d == 0) {
sewardjc4356f02007-11-09 21:15:04 +00002486 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,rm));
sewardj7eaa7cf2005-01-31 18:55:22 +00002487 } else {
sewardjc4356f02007-11-09 21:15:04 +00002488 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
sewardj7eaa7cf2005-01-31 18:55:22 +00002489 }
sewardjb3a04292005-01-21 20:33:44 +00002490 *len = 2;
2491 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002492 handleAddrOverrides(vbi, pfx,
sewardj5b470602005-02-27 13:10:48 +00002493 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
sewardjb3a04292005-01-21 20:33:44 +00002494 }
2495
2496 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2497 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2498 */
2499 case 0x10: case 0x11: case 0x12: case 0x13:
2500 /* ! 14 */ case 0x15: case 0x16: case 0x17:
sewardj1027dc22005-02-26 01:55:02 +00002501 { UChar rm = toUChar(mod_reg_rm & 7);
sewardj227458e2005-01-31 19:04:50 +00002502 Long d = getSDisp32(delta);
sewardjc4356f02007-11-09 21:15:04 +00002503 DIS(buf, "%s%lld(%s)", segRegTxt(pfx), d, nameIRegRexB(8,pfx,rm));
sewardjb3a04292005-01-21 20:33:44 +00002504 *len = 5;
2505 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002506 handleAddrOverrides(vbi, pfx,
sewardj5b470602005-02-27 13:10:48 +00002507 binop(Iop_Add64,getIRegRexB(8,pfx,rm),mkU64(d))));
sewardjb3a04292005-01-21 20:33:44 +00002508 }
2509
2510 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2511 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2512 case 0x18: case 0x19: case 0x1A: case 0x1B:
2513 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
sewardj354e5c62005-01-27 20:12:52 +00002514 vpanic("disAMode(amd64): not an addr!");
sewardjb3a04292005-01-21 20:33:44 +00002515
sewardj9e6491a2005-07-02 19:24:10 +00002516 /* RIP + disp32. This assumes that guest_RIP_curr_instr is set
sewardjb3a04292005-01-21 20:33:44 +00002517 correctly at the start of handling each instruction. */
2518 case 0x05:
sewardj227458e2005-01-31 19:04:50 +00002519 { Long d = getSDisp32(delta);
sewardjb3a04292005-01-21 20:33:44 +00002520 *len = 5;
sewardjc4356f02007-11-09 21:15:04 +00002521 DIS(buf, "%s%lld(%%rip)", segRegTxt(pfx), d);
sewardj4b744762005-02-07 15:02:25 +00002522 /* We need to know the next instruction's start address.
2523 Try and figure out what it is, record the guess, and ask
2524 the top-level driver logic (bbToIR_AMD64) to check we
2525 guessed right, after the instruction is completely
2526 decoded. */
sewardj9e6491a2005-07-02 19:24:10 +00002527 guest_RIP_next_mustcheck = True;
2528 guest_RIP_next_assumed = guest_RIP_bbstart
sewardje1698952005-02-08 15:02:39 +00002529 + delta+4 + extra_bytes;
sewardjb3a04292005-01-21 20:33:44 +00002530 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002531 handleAddrOverrides(vbi, pfx,
sewardj9e6491a2005-07-02 19:24:10 +00002532 binop(Iop_Add64, mkU64(guest_RIP_next_assumed),
sewardjb3a04292005-01-21 20:33:44 +00002533 mkU64(d))));
2534 }
sewardj3ca55a12005-01-27 16:06:23 +00002535
sewardj2f959cc2005-01-26 01:19:35 +00002536 case 0x04: {
2537 /* SIB, with no displacement. Special cases:
sewardj3ca55a12005-01-27 16:06:23 +00002538 -- %rsp cannot act as an index value.
2539 If index_r indicates %rsp, zero is used for the index.
2540 -- when mod is zero and base indicates RBP or R13, base is
2541 instead a 32-bit sign-extended literal.
sewardj2f959cc2005-01-26 01:19:35 +00002542 It's all madness, I tell you. Extract %index, %base and
2543 scale from the SIB byte. The value denoted is then:
sewardj3ca55a12005-01-27 16:06:23 +00002544 | %index == %RSP && (%base == %RBP || %base == %R13)
sewardj2f959cc2005-01-26 01:19:35 +00002545 = d32 following SIB byte
sewardj3ca55a12005-01-27 16:06:23 +00002546 | %index == %RSP && !(%base == %RBP || %base == %R13)
sewardj2f959cc2005-01-26 01:19:35 +00002547 = %base
sewardj3ca55a12005-01-27 16:06:23 +00002548 | %index != %RSP && (%base == %RBP || %base == %R13)
sewardj2f959cc2005-01-26 01:19:35 +00002549 = d32 following SIB byte + (%index << scale)
sewardj3ca55a12005-01-27 16:06:23 +00002550 | %index != %RSP && !(%base == %RBP || %base == %R13)
sewardj2f959cc2005-01-26 01:19:35 +00002551 = %base + (%index << scale)
2552 */
sewardj8c332e22005-01-28 01:36:56 +00002553 UChar sib = getUChar(delta);
sewardj1027dc22005-02-26 01:55:02 +00002554 UChar scale = toUChar((sib >> 6) & 3);
2555 UChar index_r = toUChar((sib >> 3) & 7);
2556 UChar base_r = toUChar(sib & 7);
sewardj3ca55a12005-01-27 16:06:23 +00002557 /* correct since #(R13) == 8 + #(RBP) */
sewardj7a240552005-01-28 21:37:12 +00002558 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2559 Bool index_is_SP = toBool(index_r == R_RSP && 0==getRexX(pfx));
sewardj2f959cc2005-01-26 01:19:35 +00002560 delta++;
sewardjb3a04292005-01-21 20:33:44 +00002561
sewardj3ca55a12005-01-27 16:06:23 +00002562 if ((!index_is_SP) && (!base_is_BPor13)) {
sewardje941eea2005-01-30 19:52:28 +00002563 if (scale == 0) {
sewardjc4356f02007-11-09 21:15:04 +00002564 DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
sewardj5b470602005-02-27 13:10:48 +00002565 nameIRegRexB(8,pfx,base_r),
2566 nameIReg64rexX(pfx,index_r));
sewardje941eea2005-01-30 19:52:28 +00002567 } else {
sewardjc4356f02007-11-09 21:15:04 +00002568 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
sewardj5b470602005-02-27 13:10:48 +00002569 nameIRegRexB(8,pfx,base_r),
2570 nameIReg64rexX(pfx,index_r), 1<<scale);
sewardje941eea2005-01-30 19:52:28 +00002571 }
sewardj2f959cc2005-01-26 01:19:35 +00002572 *len = 2;
sewardj2f959cc2005-01-26 01:19:35 +00002573 return
2574 disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002575 handleAddrOverrides(vbi, pfx,
sewardj3ca55a12005-01-27 16:06:23 +00002576 binop(Iop_Add64,
sewardj5b470602005-02-27 13:10:48 +00002577 getIRegRexB(8,pfx,base_r),
2578 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
sewardj2f959cc2005-01-26 01:19:35 +00002579 mkU8(scale)))));
2580 }
2581
sewardj3ca55a12005-01-27 16:06:23 +00002582 if ((!index_is_SP) && base_is_BPor13) {
sewardj227458e2005-01-31 19:04:50 +00002583 Long d = getSDisp32(delta);
sewardjc4356f02007-11-09 21:15:04 +00002584 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d,
sewardj5b470602005-02-27 13:10:48 +00002585 nameIReg64rexX(pfx,index_r), 1<<scale);
sewardj2f959cc2005-01-26 01:19:35 +00002586 *len = 6;
sewardj2f959cc2005-01-26 01:19:35 +00002587 return
2588 disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002589 handleAddrOverrides(vbi, pfx,
sewardj3ca55a12005-01-27 16:06:23 +00002590 binop(Iop_Add64,
sewardj5b470602005-02-27 13:10:48 +00002591 binop(Iop_Shl64, getIReg64rexX(pfx,index_r),
sewardj3ca55a12005-01-27 16:06:23 +00002592 mkU8(scale)),
2593 mkU64(d))));
sewardj2f959cc2005-01-26 01:19:35 +00002594 }
2595
sewardj3ca55a12005-01-27 16:06:23 +00002596 if (index_is_SP && (!base_is_BPor13)) {
sewardjc4356f02007-11-09 21:15:04 +00002597 DIS(buf, "%s(%s)", segRegTxt(pfx), nameIRegRexB(8,pfx,base_r));
sewardj2f959cc2005-01-26 01:19:35 +00002598 *len = 2;
sewardj2f959cc2005-01-26 01:19:35 +00002599 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002600 handleAddrOverrides(vbi, pfx, getIRegRexB(8,pfx,base_r)));
sewardj2f959cc2005-01-26 01:19:35 +00002601 }
2602
sewardj3ca55a12005-01-27 16:06:23 +00002603 if (index_is_SP && base_is_BPor13) {
sewardj227458e2005-01-31 19:04:50 +00002604 Long d = getSDisp32(delta);
sewardjc4356f02007-11-09 21:15:04 +00002605 DIS(buf, "%s%lld", segRegTxt(pfx), d);
sewardj2f959cc2005-01-26 01:19:35 +00002606 *len = 6;
sewardj2f959cc2005-01-26 01:19:35 +00002607 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002608 handleAddrOverrides(vbi, pfx, mkU64(d)));
sewardj2f959cc2005-01-26 01:19:35 +00002609 }
2610
2611 vassert(0);
2612 }
sewardj3ca55a12005-01-27 16:06:23 +00002613
sewardj2f959cc2005-01-26 01:19:35 +00002614 /* SIB, with 8-bit displacement. Special cases:
2615 -- %esp cannot act as an index value.
2616 If index_r indicates %esp, zero is used for the index.
2617 Denoted value is:
2618 | %index == %ESP
2619 = d8 + %base
2620 | %index != %ESP
2621 = d8 + %base + (%index << scale)
2622 */
2623 case 0x0C: {
sewardj8c332e22005-01-28 01:36:56 +00002624 UChar sib = getUChar(delta);
sewardj1027dc22005-02-26 01:55:02 +00002625 UChar scale = toUChar((sib >> 6) & 3);
2626 UChar index_r = toUChar((sib >> 3) & 7);
2627 UChar base_r = toUChar(sib & 7);
sewardj8c332e22005-01-28 01:36:56 +00002628 Long d = getSDisp8(delta+1);
sewardj2f959cc2005-01-26 01:19:35 +00002629
sewardj3ca55a12005-01-27 16:06:23 +00002630 if (index_r == R_RSP && 0==getRexX(pfx)) {
sewardjc4356f02007-11-09 21:15:04 +00002631 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
sewardj5b470602005-02-27 13:10:48 +00002632 d, nameIRegRexB(8,pfx,base_r));
sewardj2f959cc2005-01-26 01:19:35 +00002633 *len = 3;
sewardj2f959cc2005-01-26 01:19:35 +00002634 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002635 handleAddrOverrides(vbi, pfx,
sewardj5b470602005-02-27 13:10:48 +00002636 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
sewardj2f959cc2005-01-26 01:19:35 +00002637 } else {
sewardje941eea2005-01-30 19:52:28 +00002638 if (scale == 0) {
sewardjc4356f02007-11-09 21:15:04 +00002639 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
sewardj5b470602005-02-27 13:10:48 +00002640 nameIRegRexB(8,pfx,base_r),
2641 nameIReg64rexX(pfx,index_r));
sewardje941eea2005-01-30 19:52:28 +00002642 } else {
sewardjc4356f02007-11-09 21:15:04 +00002643 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
sewardj5b470602005-02-27 13:10:48 +00002644 nameIRegRexB(8,pfx,base_r),
2645 nameIReg64rexX(pfx,index_r), 1<<scale);
sewardje941eea2005-01-30 19:52:28 +00002646 }
sewardj2f959cc2005-01-26 01:19:35 +00002647 *len = 3;
sewardj2f959cc2005-01-26 01:19:35 +00002648 return
2649 disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002650 handleAddrOverrides(vbi, pfx,
sewardj3ca55a12005-01-27 16:06:23 +00002651 binop(Iop_Add64,
2652 binop(Iop_Add64,
sewardj5b470602005-02-27 13:10:48 +00002653 getIRegRexB(8,pfx,base_r),
sewardj3ca55a12005-01-27 16:06:23 +00002654 binop(Iop_Shl64,
sewardj5b470602005-02-27 13:10:48 +00002655 getIReg64rexX(pfx,index_r), mkU8(scale))),
sewardj3ca55a12005-01-27 16:06:23 +00002656 mkU64(d))));
sewardj2f959cc2005-01-26 01:19:35 +00002657 }
sewardj3ca55a12005-01-27 16:06:23 +00002658 vassert(0); /*NOTREACHED*/
sewardj2f959cc2005-01-26 01:19:35 +00002659 }
sewardj3ca55a12005-01-27 16:06:23 +00002660
sewardj2f959cc2005-01-26 01:19:35 +00002661 /* SIB, with 32-bit displacement. Special cases:
2662 -- %rsp cannot act as an index value.
2663 If index_r indicates %rsp, zero is used for the index.
2664 Denoted value is:
2665 | %index == %RSP
2666 = d32 + %base
2667 | %index != %RSP
2668 = d32 + %base + (%index << scale)
2669 */
2670 case 0x14: {
sewardj8c332e22005-01-28 01:36:56 +00002671 UChar sib = getUChar(delta);
sewardj1027dc22005-02-26 01:55:02 +00002672 UChar scale = toUChar((sib >> 6) & 3);
2673 UChar index_r = toUChar((sib >> 3) & 7);
2674 UChar base_r = toUChar(sib & 7);
sewardj8c332e22005-01-28 01:36:56 +00002675 Long d = getSDisp32(delta+1);
sewardj2f959cc2005-01-26 01:19:35 +00002676
2677 if (index_r == R_RSP && 0==getRexX(pfx)) {
sewardjc4356f02007-11-09 21:15:04 +00002678 DIS(buf, "%s%lld(%s)", segRegTxt(pfx),
sewardj5b470602005-02-27 13:10:48 +00002679 d, nameIRegRexB(8,pfx,base_r));
sewardj2f959cc2005-01-26 01:19:35 +00002680 *len = 6;
sewardj2f959cc2005-01-26 01:19:35 +00002681 return disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002682 handleAddrOverrides(vbi, pfx,
sewardj5b470602005-02-27 13:10:48 +00002683 binop(Iop_Add64, getIRegRexB(8,pfx,base_r), mkU64(d)) ));
sewardj2f959cc2005-01-26 01:19:35 +00002684 } else {
sewardje941eea2005-01-30 19:52:28 +00002685 if (scale == 0) {
sewardjc4356f02007-11-09 21:15:04 +00002686 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
sewardj5b470602005-02-27 13:10:48 +00002687 nameIRegRexB(8,pfx,base_r),
2688 nameIReg64rexX(pfx,index_r));
sewardje941eea2005-01-30 19:52:28 +00002689 } else {
sewardjc4356f02007-11-09 21:15:04 +00002690 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
sewardj5b470602005-02-27 13:10:48 +00002691 nameIRegRexB(8,pfx,base_r),
2692 nameIReg64rexX(pfx,index_r), 1<<scale);
sewardje941eea2005-01-30 19:52:28 +00002693 }
sewardj2f959cc2005-01-26 01:19:35 +00002694 *len = 6;
2695 return
2696 disAMode_copy2tmp(
sewardj2e28ac42008-12-04 00:05:12 +00002697 handleAddrOverrides(vbi, pfx,
sewardj2f959cc2005-01-26 01:19:35 +00002698 binop(Iop_Add64,
2699 binop(Iop_Add64,
sewardj5b470602005-02-27 13:10:48 +00002700 getIRegRexB(8,pfx,base_r),
sewardj2f959cc2005-01-26 01:19:35 +00002701 binop(Iop_Shl64,
sewardj5b470602005-02-27 13:10:48 +00002702 getIReg64rexX(pfx,index_r), mkU8(scale))),
sewardj2f959cc2005-01-26 01:19:35 +00002703 mkU64(d))));
2704 }
sewardj3ca55a12005-01-27 16:06:23 +00002705 vassert(0); /*NOTREACHED*/
sewardj2f959cc2005-01-26 01:19:35 +00002706 }
2707
sewardjb3a04292005-01-21 20:33:44 +00002708 default:
2709 vpanic("disAMode(amd64)");
2710 return 0; /*notreached*/
2711 }
2712}
2713
2714
sewardjcc3d2192013-03-27 11:37:33 +00002715/* Similarly for VSIB addressing. This returns just the addend,
2716 and fills in *rI and *vscale with the register number of the vector
2717 index and its multiplicand. */
2718static
2719IRTemp disAVSIBMode ( /*OUT*/Int* len,
2720 VexAbiInfo* vbi, Prefix pfx, Long delta,
2721 /*OUT*/HChar* buf, /*OUT*/UInt* rI,
2722 IRType ty, /*OUT*/Int* vscale )
2723{
2724 UChar mod_reg_rm = getUChar(delta);
2725 const HChar *vindex;
2726
2727 *len = 0;
2728 *rI = 0;
2729 *vscale = 0;
2730 buf[0] = (UChar)0;
2731 if ((mod_reg_rm & 7) != 4 || epartIsReg(mod_reg_rm))
2732 return IRTemp_INVALID;
2733
2734 UChar sib = getUChar(delta+1);
2735 UChar scale = toUChar((sib >> 6) & 3);
2736 UChar index_r = toUChar((sib >> 3) & 7);
2737 UChar base_r = toUChar(sib & 7);
2738 Long d = 0;
2739 /* correct since #(R13) == 8 + #(RBP) */
2740 Bool base_is_BPor13 = toBool(base_r == R_RBP);
2741 delta += 2;
2742 *len = 2;
2743
2744 *rI = index_r | (getRexX(pfx) << 3);
2745 if (ty == Ity_V128)
2746 vindex = nameXMMReg(*rI);
2747 else
2748 vindex = nameYMMReg(*rI);
2749 *vscale = 1<<scale;
2750
2751 switch (mod_reg_rm >> 6) {
2752 case 0:
2753 if (base_is_BPor13) {
2754 d = getSDisp32(delta);
2755 *len += 4;
2756 if (scale == 0) {
2757 DIS(buf, "%s%lld(,%s)", segRegTxt(pfx), d, vindex);
2758 } else {
2759 DIS(buf, "%s%lld(,%s,%d)", segRegTxt(pfx), d, vindex, 1<<scale);
2760 }
2761 return disAMode_copy2tmp( mkU64(d) );
2762 } else {
2763 if (scale == 0) {
2764 DIS(buf, "%s(%s,%s)", segRegTxt(pfx),
2765 nameIRegRexB(8,pfx,base_r), vindex);
2766 } else {
2767 DIS(buf, "%s(%s,%s,%d)", segRegTxt(pfx),
2768 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale);
2769 }
2770 }
2771 break;
2772 case 1:
2773 d = getSDisp8(delta);
2774 *len += 1;
2775 goto have_disp;
2776 case 2:
2777 d = getSDisp32(delta);
2778 *len += 4;
2779 have_disp:
2780 if (scale == 0) {
2781 DIS(buf, "%s%lld(%s,%s)", segRegTxt(pfx), d,
2782 nameIRegRexB(8,pfx,base_r), vindex);
2783 } else {
2784 DIS(buf, "%s%lld(%s,%s,%d)", segRegTxt(pfx), d,
2785 nameIRegRexB(8,pfx,base_r), vindex, 1<<scale);
2786 }
2787 break;
2788 }
2789
2790 if (!d)
2791 return disAMode_copy2tmp( getIRegRexB(8,pfx,base_r) );
2792 return disAMode_copy2tmp( binop(Iop_Add64, getIRegRexB(8,pfx,base_r),
2793 mkU64(d)) );
2794}
2795
2796
sewardj3ca55a12005-01-27 16:06:23 +00002797/* Figure out the number of (insn-stream) bytes constituting the amode
2798 beginning at delta. Is useful for getting hold of literals beyond
2799 the end of the amode before it has been disassembled. */
2800
sewardj270def42005-07-03 01:03:01 +00002801static UInt lengthAMode ( Prefix pfx, Long delta )
sewardj3ca55a12005-01-27 16:06:23 +00002802{
sewardj8c332e22005-01-28 01:36:56 +00002803 UChar mod_reg_rm = getUChar(delta);
sewardj3ca55a12005-01-27 16:06:23 +00002804 delta++;
2805
2806 /* squeeze out the reg field from mod_reg_rm, since a 256-entry
2807 jump table seems a bit excessive.
2808 */
sewardj7a240552005-01-28 21:37:12 +00002809 mod_reg_rm &= 0xC7; /* is now XX000YYY */
sewardj1027dc22005-02-26 01:55:02 +00002810 mod_reg_rm = toUChar(mod_reg_rm | (mod_reg_rm >> 3));
2811 /* is now XX0XXYYY */
sewardj7a240552005-01-28 21:37:12 +00002812 mod_reg_rm &= 0x1F; /* is now 000XXYYY */
sewardj3ca55a12005-01-27 16:06:23 +00002813 switch (mod_reg_rm) {
2814
2815 /* REX.B==0: (%rax) .. (%rdi), not including (%rsp) or (%rbp).
2816 REX.B==1: (%r8) .. (%r15), not including (%r12) or (%r13).
2817 */
2818 case 0x00: case 0x01: case 0x02: case 0x03:
2819 /* ! 04 */ /* ! 05 */ case 0x06: case 0x07:
sewardj3ca55a12005-01-27 16:06:23 +00002820 return 1;
2821
2822 /* REX.B==0: d8(%rax) ... d8(%rdi), not including d8(%rsp)
2823 REX.B==1: d8(%r8) ... d8(%r15), not including d8(%r12)
2824 */
2825 case 0x08: case 0x09: case 0x0A: case 0x0B:
2826 /* ! 0C */ case 0x0D: case 0x0E: case 0x0F:
sewardj3ca55a12005-01-27 16:06:23 +00002827 return 2;
2828
2829 /* REX.B==0: d32(%rax) ... d32(%rdi), not including d32(%rsp)
2830 REX.B==1: d32(%r8) ... d32(%r15), not including d32(%r12)
2831 */
2832 case 0x10: case 0x11: case 0x12: case 0x13:
2833 /* ! 14 */ case 0x15: case 0x16: case 0x17:
sewardj3ca55a12005-01-27 16:06:23 +00002834 return 5;
2835
2836 /* REX.B==0: a register, %rax .. %rdi. This shouldn't happen. */
2837 /* REX.B==1: a register, %r8 .. %r16. This shouldn't happen. */
2838 /* Not an address, but still handled. */
2839 case 0x18: case 0x19: case 0x1A: case 0x1B:
2840 case 0x1C: case 0x1D: case 0x1E: case 0x1F:
2841 return 1;
2842
2843 /* RIP + disp32. */
2844 case 0x05:
sewardj3ca55a12005-01-27 16:06:23 +00002845 return 5;
2846
2847 case 0x04: {
2848 /* SIB, with no displacement. */
sewardj8c332e22005-01-28 01:36:56 +00002849 UChar sib = getUChar(delta);
sewardj1027dc22005-02-26 01:55:02 +00002850 UChar base_r = toUChar(sib & 7);
sewardj3ca55a12005-01-27 16:06:23 +00002851 /* correct since #(R13) == 8 + #(RBP) */
sewardj7a240552005-01-28 21:37:12 +00002852 Bool base_is_BPor13 = toBool(base_r == R_RBP);
sewardj3ca55a12005-01-27 16:06:23 +00002853
2854 if (base_is_BPor13) {
sewardj3ca55a12005-01-27 16:06:23 +00002855 return 6;
2856 } else {
sewardj3ca55a12005-01-27 16:06:23 +00002857 return 2;
2858 }
2859 }
2860
2861 /* SIB, with 8-bit displacement. */
2862 case 0x0C:
2863 return 3;
2864
2865 /* SIB, with 32-bit displacement. */
2866 case 0x14:
2867 return 6;
2868
2869 default:
2870 vpanic("lengthAMode(amd64)");
2871 return 0; /*notreached*/
2872 }
2873}
2874
2875
sewardjdf0e0022005-01-25 15:48:43 +00002876/*------------------------------------------------------------*/
2877/*--- Disassembling common idioms ---*/
2878/*------------------------------------------------------------*/
2879
sewardjdf0e0022005-01-25 15:48:43 +00002880/* Handle binary integer instructions of the form
2881 op E, G meaning
2882 op reg-or-mem, reg
2883 Is passed the a ptr to the modRM byte, the actual operation, and the
2884 data size. Returns the address advanced completely over this
2885 instruction.
2886
2887 E(src) is reg-or-mem
2888 G(dst) is reg.
2889
2890 If E is reg, --> GET %G, tmp
2891 OP %E, tmp
2892 PUT tmp, %G
2893
2894 If E is mem and OP is not reversible,
2895 --> (getAddr E) -> tmpa
2896 LD (tmpa), tmpa
2897 GET %G, tmp2
2898 OP tmpa, tmp2
2899 PUT tmp2, %G
2900
2901 If E is mem and OP is reversible
2902 --> (getAddr E) -> tmpa
2903 LD (tmpa), tmpa
2904 OP %G, tmpa
2905 PUT tmpa, %G
2906*/
2907static
sewardj2e28ac42008-12-04 00:05:12 +00002908ULong dis_op2_E_G ( VexAbiInfo* vbi,
2909 Prefix pfx,
sewardjdf0e0022005-01-25 15:48:43 +00002910 Bool addSubCarry,
2911 IROp op8,
2912 Bool keep,
2913 Int size,
sewardj270def42005-07-03 01:03:01 +00002914 Long delta0,
florian55085f82012-11-21 00:36:55 +00002915 const HChar* t_amd64opc )
sewardjdf0e0022005-01-25 15:48:43 +00002916{
2917 HChar dis_buf[50];
2918 Int len;
2919 IRType ty = szToITy(size);
2920 IRTemp dst1 = newTemp(ty);
2921 IRTemp src = newTemp(ty);
2922 IRTemp dst0 = newTemp(ty);
2923 UChar rm = getUChar(delta0);
2924 IRTemp addr = IRTemp_INVALID;
2925
2926 /* addSubCarry == True indicates the intended operation is
2927 add-with-carry or subtract-with-borrow. */
2928 if (addSubCarry) {
2929 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
2930 vassert(keep);
2931 }
2932
2933 if (epartIsReg(rm)) {
sewardjdf0e0022005-01-25 15:48:43 +00002934 /* Specially handle XOR reg,reg, because that doesn't really
2935 depend on reg, and doing the obvious thing potentially
2936 generates a spurious value check failure due to the bogus
2937 dependency. */
sewardj5b470602005-02-27 13:10:48 +00002938 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
2939 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
sewardj4f9847d2005-07-25 11:58:34 +00002940 if (False && op8 == Iop_Sub8)
sewardj5b470602005-02-27 13:10:48 +00002941 vex_printf("vex amd64->IR: sbb %%r,%%r optimisation(1)\n");
sewardjcc3d2192013-03-27 11:37:33 +00002942 putIRegG(size,pfx,rm, mkU(ty,0));
sewardjdf0e0022005-01-25 15:48:43 +00002943 }
sewardj5b470602005-02-27 13:10:48 +00002944
2945 assign( dst0, getIRegG(size,pfx,rm) );
2946 assign( src, getIRegE(size,pfx,rm) );
sewardjdf0e0022005-01-25 15:48:43 +00002947
2948 if (addSubCarry && op8 == Iop_Add8) {
sewardje9d8a262009-07-01 08:06:34 +00002949 helper_ADC( size, dst1, dst0, src,
2950 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00002951 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002952 } else
2953 if (addSubCarry && op8 == Iop_Sub8) {
sewardje9d8a262009-07-01 08:06:34 +00002954 helper_SBB( size, dst1, dst0, src,
2955 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00002956 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002957 } else {
2958 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2959 if (isAddSub(op8))
2960 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2961 else
2962 setFlags_DEP1(op8, dst1, ty);
2963 if (keep)
sewardj5b470602005-02-27 13:10:48 +00002964 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002965 }
2966
2967 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00002968 nameIRegE(size,pfx,rm),
2969 nameIRegG(size,pfx,rm));
sewardjdf0e0022005-01-25 15:48:43 +00002970 return 1+delta0;
2971 } else {
2972 /* E refers to memory */
sewardj2e28ac42008-12-04 00:05:12 +00002973 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardj5b470602005-02-27 13:10:48 +00002974 assign( dst0, getIRegG(size,pfx,rm) );
sewardjdf0e0022005-01-25 15:48:43 +00002975 assign( src, loadLE(szToITy(size), mkexpr(addr)) );
2976
2977 if (addSubCarry && op8 == Iop_Add8) {
sewardje9d8a262009-07-01 08:06:34 +00002978 helper_ADC( size, dst1, dst0, src,
2979 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00002980 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002981 } else
2982 if (addSubCarry && op8 == Iop_Sub8) {
sewardje9d8a262009-07-01 08:06:34 +00002983 helper_SBB( size, dst1, dst0, src,
2984 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00002985 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002986 } else {
2987 assign( dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
2988 if (isAddSub(op8))
2989 setFlags_DEP1_DEP2(op8, dst0, src, ty);
2990 else
2991 setFlags_DEP1(op8, dst1, ty);
2992 if (keep)
sewardj5b470602005-02-27 13:10:48 +00002993 putIRegG(size, pfx, rm, mkexpr(dst1));
sewardjdf0e0022005-01-25 15:48:43 +00002994 }
2995
2996 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00002997 dis_buf, nameIRegG(size, pfx, rm));
sewardjdf0e0022005-01-25 15:48:43 +00002998 return len+delta0;
2999 }
3000}
3001
3002
3003
sewardj3ca55a12005-01-27 16:06:23 +00003004/* Handle binary integer instructions of the form
3005 op G, E meaning
3006 op reg, reg-or-mem
3007 Is passed the a ptr to the modRM byte, the actual operation, and the
3008 data size. Returns the address advanced completely over this
3009 instruction.
3010
3011 G(src) is reg.
3012 E(dst) is reg-or-mem
3013
3014 If E is reg, --> GET %E, tmp
3015 OP %G, tmp
3016 PUT tmp, %E
3017
3018 If E is mem, --> (getAddr E) -> tmpa
3019 LD (tmpa), tmpv
3020 OP %G, tmpv
3021 ST tmpv, (tmpa)
3022*/
3023static
sewardj2e28ac42008-12-04 00:05:12 +00003024ULong dis_op2_G_E ( VexAbiInfo* vbi,
3025 Prefix pfx,
sewardj8c332e22005-01-28 01:36:56 +00003026 Bool addSubCarry,
3027 IROp op8,
3028 Bool keep,
3029 Int size,
sewardj270def42005-07-03 01:03:01 +00003030 Long delta0,
florian55085f82012-11-21 00:36:55 +00003031 const HChar* t_amd64opc )
sewardj3ca55a12005-01-27 16:06:23 +00003032{
3033 HChar dis_buf[50];
3034 Int len;
3035 IRType ty = szToITy(size);
3036 IRTemp dst1 = newTemp(ty);
3037 IRTemp src = newTemp(ty);
3038 IRTemp dst0 = newTemp(ty);
sewardj8c332e22005-01-28 01:36:56 +00003039 UChar rm = getUChar(delta0);
sewardj3ca55a12005-01-27 16:06:23 +00003040 IRTemp addr = IRTemp_INVALID;
3041
3042 /* addSubCarry == True indicates the intended operation is
3043 add-with-carry or subtract-with-borrow. */
3044 if (addSubCarry) {
3045 vassert(op8 == Iop_Add8 || op8 == Iop_Sub8);
3046 vassert(keep);
3047 }
3048
3049 if (epartIsReg(rm)) {
3050 /* Specially handle XOR reg,reg, because that doesn't really
3051 depend on reg, and doing the obvious thing potentially
3052 generates a spurious value check failure due to the bogus
sewardj5b470602005-02-27 13:10:48 +00003053 dependency. Ditto SBB reg,reg. */
3054 if ((op8 == Iop_Xor8 || (op8 == Iop_Sub8 && addSubCarry))
3055 && offsetIRegG(size,pfx,rm) == offsetIRegE(size,pfx,rm)) {
3056 putIRegE(size,pfx,rm, mkU(ty,0));
sewardj3ca55a12005-01-27 16:06:23 +00003057 }
sewardj5b470602005-02-27 13:10:48 +00003058
3059 assign(dst0, getIRegE(size,pfx,rm));
3060 assign(src, getIRegG(size,pfx,rm));
sewardj3ca55a12005-01-27 16:06:23 +00003061
3062 if (addSubCarry && op8 == Iop_Add8) {
sewardje9d8a262009-07-01 08:06:34 +00003063 helper_ADC( size, dst1, dst0, src,
3064 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00003065 putIRegE(size, pfx, rm, mkexpr(dst1));
sewardj3ca55a12005-01-27 16:06:23 +00003066 } else
3067 if (addSubCarry && op8 == Iop_Sub8) {
sewardje9d8a262009-07-01 08:06:34 +00003068 helper_SBB( size, dst1, dst0, src,
3069 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5b470602005-02-27 13:10:48 +00003070 putIRegE(size, pfx, rm, mkexpr(dst1));
sewardj3ca55a12005-01-27 16:06:23 +00003071 } else {
3072 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3073 if (isAddSub(op8))
3074 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3075 else
3076 setFlags_DEP1(op8, dst1, ty);
3077 if (keep)
sewardj5b470602005-02-27 13:10:48 +00003078 putIRegE(size, pfx, rm, mkexpr(dst1));
sewardj3ca55a12005-01-27 16:06:23 +00003079 }
3080
3081 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003082 nameIRegG(size,pfx,rm),
3083 nameIRegE(size,pfx,rm));
sewardj3ca55a12005-01-27 16:06:23 +00003084 return 1+delta0;
3085 }
3086
3087 /* E refers to memory */
3088 {
sewardj2e28ac42008-12-04 00:05:12 +00003089 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardj3ca55a12005-01-27 16:06:23 +00003090 assign(dst0, loadLE(ty,mkexpr(addr)));
sewardj5b470602005-02-27 13:10:48 +00003091 assign(src, getIRegG(size,pfx,rm));
sewardj3ca55a12005-01-27 16:06:23 +00003092
3093 if (addSubCarry && op8 == Iop_Add8) {
sewardj6491f862013-10-15 17:29:19 +00003094 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00003095 /* cas-style store */
3096 helper_ADC( size, dst1, dst0, src,
3097 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3098 } else {
3099 /* normal store */
3100 helper_ADC( size, dst1, dst0, src,
3101 /*store*/addr, IRTemp_INVALID, 0 );
3102 }
sewardj3ca55a12005-01-27 16:06:23 +00003103 } else
3104 if (addSubCarry && op8 == Iop_Sub8) {
sewardj6491f862013-10-15 17:29:19 +00003105 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00003106 /* cas-style store */
3107 helper_SBB( size, dst1, dst0, src,
3108 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3109 } else {
3110 /* normal store */
3111 helper_SBB( size, dst1, dst0, src,
3112 /*store*/addr, IRTemp_INVALID, 0 );
3113 }
sewardj3ca55a12005-01-27 16:06:23 +00003114 } else {
3115 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
sewardje9d8a262009-07-01 08:06:34 +00003116 if (keep) {
sewardj6491f862013-10-15 17:29:19 +00003117 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00003118 if (0) vex_printf("locked case\n" );
3119 casLE( mkexpr(addr),
3120 mkexpr(dst0)/*expval*/,
3121 mkexpr(dst1)/*newval*/, guest_RIP_curr_instr );
3122 } else {
3123 if (0) vex_printf("nonlocked case\n");
3124 storeLE(mkexpr(addr), mkexpr(dst1));
3125 }
3126 }
sewardj3ca55a12005-01-27 16:06:23 +00003127 if (isAddSub(op8))
3128 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3129 else
3130 setFlags_DEP1(op8, dst1, ty);
sewardj3ca55a12005-01-27 16:06:23 +00003131 }
3132
3133 DIP("%s%c %s,%s\n", t_amd64opc, nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003134 nameIRegG(size,pfx,rm), dis_buf);
sewardj3ca55a12005-01-27 16:06:23 +00003135 return len+delta0;
3136 }
3137}
3138
3139
sewardj1389d4d2005-01-28 13:46:29 +00003140/* Handle move instructions of the form
3141 mov E, G meaning
3142 mov reg-or-mem, reg
3143 Is passed the a ptr to the modRM byte, and the data size. Returns
3144 the address advanced completely over this instruction.
3145
3146 E(src) is reg-or-mem
3147 G(dst) is reg.
3148
3149 If E is reg, --> GET %E, tmpv
3150 PUT tmpv, %G
3151
3152 If E is mem --> (getAddr E) -> tmpa
3153 LD (tmpa), tmpb
3154 PUT tmpb, %G
3155*/
3156static
sewardj2e28ac42008-12-04 00:05:12 +00003157ULong dis_mov_E_G ( VexAbiInfo* vbi,
3158 Prefix pfx,
sewardj1389d4d2005-01-28 13:46:29 +00003159 Int size,
sewardj270def42005-07-03 01:03:01 +00003160 Long delta0 )
sewardj1389d4d2005-01-28 13:46:29 +00003161{
3162 Int len;
3163 UChar rm = getUChar(delta0);
3164 HChar dis_buf[50];
3165
3166 if (epartIsReg(rm)) {
sewardj5b470602005-02-27 13:10:48 +00003167 putIRegG(size, pfx, rm, getIRegE(size, pfx, rm));
sewardje941eea2005-01-30 19:52:28 +00003168 DIP("mov%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003169 nameIRegE(size,pfx,rm),
3170 nameIRegG(size,pfx,rm));
sewardj1389d4d2005-01-28 13:46:29 +00003171 return 1+delta0;
3172 }
3173
3174 /* E refers to memory */
3175 {
sewardj2e28ac42008-12-04 00:05:12 +00003176 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardj5b470602005-02-27 13:10:48 +00003177 putIRegG(size, pfx, rm, loadLE(szToITy(size), mkexpr(addr)));
sewardj1389d4d2005-01-28 13:46:29 +00003178 DIP("mov%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003179 dis_buf,
3180 nameIRegG(size,pfx,rm));
sewardj1389d4d2005-01-28 13:46:29 +00003181 return delta0+len;
3182 }
3183}
3184
3185
3186/* Handle move instructions of the form
3187 mov G, E meaning
3188 mov reg, reg-or-mem
3189 Is passed the a ptr to the modRM byte, and the data size. Returns
3190 the address advanced completely over this instruction.
sewardj38b1d692013-10-15 17:21:42 +00003191 We have to decide here whether F2 or F3 are acceptable. F2 never is.
sewardj1389d4d2005-01-28 13:46:29 +00003192
3193 G(src) is reg.
3194 E(dst) is reg-or-mem
3195
3196 If E is reg, --> GET %G, tmp
3197 PUT tmp, %E
3198
3199 If E is mem, --> (getAddr E) -> tmpa
3200 GET %G, tmpv
3201 ST tmpv, (tmpa)
3202*/
3203static
sewardj38b1d692013-10-15 17:21:42 +00003204ULong dis_mov_G_E ( VexAbiInfo* vbi,
3205 Prefix pfx,
3206 Int size,
3207 Long delta0,
3208 /*OUT*/Bool* ok )
sewardj1389d4d2005-01-28 13:46:29 +00003209{
sewardj38b1d692013-10-15 17:21:42 +00003210 Int len;
sewardj1389d4d2005-01-28 13:46:29 +00003211 UChar rm = getUChar(delta0);
3212 HChar dis_buf[50];
3213
sewardj38b1d692013-10-15 17:21:42 +00003214 *ok = True;
3215
sewardj1389d4d2005-01-28 13:46:29 +00003216 if (epartIsReg(rm)) {
sewardj38b1d692013-10-15 17:21:42 +00003217 if (haveF2orF3(pfx)) { *ok = False; return delta0; }
sewardj5b470602005-02-27 13:10:48 +00003218 putIRegE(size, pfx, rm, getIRegG(size, pfx, rm));
sewardje941eea2005-01-30 19:52:28 +00003219 DIP("mov%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003220 nameIRegG(size,pfx,rm),
3221 nameIRegE(size,pfx,rm));
sewardj1389d4d2005-01-28 13:46:29 +00003222 return 1+delta0;
3223 }
3224
3225 /* E refers to memory */
3226 {
sewardj38b1d692013-10-15 17:21:42 +00003227 if (haveF2(pfx)) { *ok = False; return delta0; }
3228 /* F3(XRELEASE) is acceptable, though. */
sewardj2e28ac42008-12-04 00:05:12 +00003229 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardj5b470602005-02-27 13:10:48 +00003230 storeLE( mkexpr(addr), getIRegG(size, pfx, rm) );
sewardj1389d4d2005-01-28 13:46:29 +00003231 DIP("mov%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003232 nameIRegG(size,pfx,rm),
3233 dis_buf);
sewardj1389d4d2005-01-28 13:46:29 +00003234 return len+delta0;
3235 }
3236}
sewardj3ca55a12005-01-27 16:06:23 +00003237
3238
3239/* op $immediate, AL/AX/EAX/RAX. */
3240static
sewardj8c332e22005-01-28 01:36:56 +00003241ULong dis_op_imm_A ( Int size,
sewardj41c01092005-07-23 13:50:32 +00003242 Bool carrying,
sewardj8c332e22005-01-28 01:36:56 +00003243 IROp op8,
3244 Bool keep,
sewardj270def42005-07-03 01:03:01 +00003245 Long delta,
florian55085f82012-11-21 00:36:55 +00003246 const HChar* t_amd64opc )
sewardj3ca55a12005-01-27 16:06:23 +00003247{
3248 Int size4 = imin(size,4);
3249 IRType ty = szToITy(size);
3250 IRTemp dst0 = newTemp(ty);
3251 IRTemp src = newTemp(ty);
3252 IRTemp dst1 = newTemp(ty);
sewardj8c332e22005-01-28 01:36:56 +00003253 Long lit = getSDisp(size4,delta);
sewardj5b470602005-02-27 13:10:48 +00003254 assign(dst0, getIRegRAX(size));
sewardj1389d4d2005-01-28 13:46:29 +00003255 assign(src, mkU(ty,lit & mkSizeMask(size)));
sewardj41c01092005-07-23 13:50:32 +00003256
3257 if (isAddSub(op8) && !carrying) {
3258 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
sewardj3ca55a12005-01-27 16:06:23 +00003259 setFlags_DEP1_DEP2(op8, dst0, src, ty);
sewardj41c01092005-07-23 13:50:32 +00003260 }
sewardj3ca55a12005-01-27 16:06:23 +00003261 else
sewardj41c01092005-07-23 13:50:32 +00003262 if (isLogic(op8)) {
3263 vassert(!carrying);
3264 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)) );
sewardj3ca55a12005-01-27 16:06:23 +00003265 setFlags_DEP1(op8, dst1, ty);
sewardj41c01092005-07-23 13:50:32 +00003266 }
sewardj3ca55a12005-01-27 16:06:23 +00003267 else
sewardj41c01092005-07-23 13:50:32 +00003268 if (op8 == Iop_Add8 && carrying) {
sewardje9d8a262009-07-01 08:06:34 +00003269 helper_ADC( size, dst1, dst0, src,
3270 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj41c01092005-07-23 13:50:32 +00003271 }
3272 else
sewardj5fadaf92006-05-12 20:45:59 +00003273 if (op8 == Iop_Sub8 && carrying) {
sewardje9d8a262009-07-01 08:06:34 +00003274 helper_SBB( size, dst1, dst0, src,
3275 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj5fadaf92006-05-12 20:45:59 +00003276 }
3277 else
sewardj41c01092005-07-23 13:50:32 +00003278 vpanic("dis_op_imm_A(amd64,guest)");
sewardj3ca55a12005-01-27 16:06:23 +00003279
3280 if (keep)
sewardj5b470602005-02-27 13:10:48 +00003281 putIRegRAX(size, mkexpr(dst1));
sewardj3ca55a12005-01-27 16:06:23 +00003282
3283 DIP("%s%c $%lld, %s\n", t_amd64opc, nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00003284 lit, nameIRegRAX(size));
sewardj3ca55a12005-01-27 16:06:23 +00003285 return delta+size4;
3286}
3287
3288
sewardj5e525292005-01-28 15:13:10 +00003289/* Sign- and Zero-extending moves. */
3290static
sewardj2e28ac42008-12-04 00:05:12 +00003291ULong dis_movx_E_G ( VexAbiInfo* vbi,
3292 Prefix pfx,
sewardj270def42005-07-03 01:03:01 +00003293 Long delta, Int szs, Int szd, Bool sign_extend )
sewardj5e525292005-01-28 15:13:10 +00003294{
3295 UChar rm = getUChar(delta);
3296 if (epartIsReg(rm)) {
sewardj5b470602005-02-27 13:10:48 +00003297 putIRegG(szd, pfx, rm,
sewardj5e525292005-01-28 15:13:10 +00003298 doScalarWidening(
3299 szs,szd,sign_extend,
sewardj5b470602005-02-27 13:10:48 +00003300 getIRegE(szs,pfx,rm)));
sewardj5e525292005-01-28 15:13:10 +00003301 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3302 nameISize(szs),
3303 nameISize(szd),
sewardj5b470602005-02-27 13:10:48 +00003304 nameIRegE(szs,pfx,rm),
3305 nameIRegG(szd,pfx,rm));
sewardj5e525292005-01-28 15:13:10 +00003306 return 1+delta;
3307 }
3308
3309 /* E refers to memory */
3310 {
3311 Int len;
3312 HChar dis_buf[50];
sewardj2e28ac42008-12-04 00:05:12 +00003313 IRTemp addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj5b470602005-02-27 13:10:48 +00003314 putIRegG(szd, pfx, rm,
sewardj5e525292005-01-28 15:13:10 +00003315 doScalarWidening(
3316 szs,szd,sign_extend,
3317 loadLE(szToITy(szs),mkexpr(addr))));
3318 DIP("mov%c%c%c %s,%s\n", sign_extend ? 's' : 'z',
3319 nameISize(szs),
3320 nameISize(szd),
3321 dis_buf,
sewardj5b470602005-02-27 13:10:48 +00003322 nameIRegG(szd,pfx,rm));
sewardj5e525292005-01-28 15:13:10 +00003323 return len+delta;
3324 }
3325}
sewardj32b2bbe2005-01-28 00:50:10 +00003326
3327
sewardj03b07cc2005-01-31 18:09:43 +00003328/* Generate code to divide ArchRegs RDX:RAX / EDX:EAX / DX:AX / AX by
3329 the 64 / 32 / 16 / 8 bit quantity in the given IRTemp. */
sewardj32b2bbe2005-01-28 00:50:10 +00003330static
3331void codegen_div ( Int sz, IRTemp t, Bool signed_divide )
3332{
sewardj03b07cc2005-01-31 18:09:43 +00003333 /* special-case the 64-bit case */
3334 if (sz == 8) {
3335 IROp op = signed_divide ? Iop_DivModS128to64
3336 : Iop_DivModU128to64;
sewardja6b93d12005-02-17 09:28:28 +00003337 IRTemp src128 = newTemp(Ity_I128);
3338 IRTemp dst128 = newTemp(Ity_I128);
sewardj03b07cc2005-01-31 18:09:43 +00003339 assign( src128, binop(Iop_64HLto128,
sewardja6b93d12005-02-17 09:28:28 +00003340 getIReg64(R_RDX),
3341 getIReg64(R_RAX)) );
sewardj03b07cc2005-01-31 18:09:43 +00003342 assign( dst128, binop(op, mkexpr(src128), mkexpr(t)) );
sewardja6b93d12005-02-17 09:28:28 +00003343 putIReg64( R_RAX, unop(Iop_128to64,mkexpr(dst128)) );
3344 putIReg64( R_RDX, unop(Iop_128HIto64,mkexpr(dst128)) );
sewardj03b07cc2005-01-31 18:09:43 +00003345 } else {
3346 IROp op = signed_divide ? Iop_DivModS64to32
3347 : Iop_DivModU64to32;
3348 IRTemp src64 = newTemp(Ity_I64);
3349 IRTemp dst64 = newTemp(Ity_I64);
3350 switch (sz) {
sewardj85520e42005-02-19 15:22:38 +00003351 case 4:
sewardj5b470602005-02-27 13:10:48 +00003352 assign( src64,
3353 binop(Iop_32HLto64, getIRegRDX(4), getIRegRAX(4)) );
3354 assign( dst64,
3355 binop(op, mkexpr(src64), mkexpr(t)) );
3356 putIRegRAX( 4, unop(Iop_64to32,mkexpr(dst64)) );
3357 putIRegRDX( 4, unop(Iop_64HIto32,mkexpr(dst64)) );
sewardj85520e42005-02-19 15:22:38 +00003358 break;
3359 case 2: {
3360 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3361 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3362 assign( src64, unop(widen3264,
3363 binop(Iop_16HLto32,
sewardj5b470602005-02-27 13:10:48 +00003364 getIRegRDX(2),
3365 getIRegRAX(2))) );
sewardj85520e42005-02-19 15:22:38 +00003366 assign( dst64, binop(op, mkexpr(src64), unop(widen1632,mkexpr(t))) );
sewardj5b470602005-02-27 13:10:48 +00003367 putIRegRAX( 2, unop(Iop_32to16,unop(Iop_64to32,mkexpr(dst64))) );
3368 putIRegRDX( 2, unop(Iop_32to16,unop(Iop_64HIto32,mkexpr(dst64))) );
sewardj85520e42005-02-19 15:22:38 +00003369 break;
3370 }
3371 case 1: {
3372 IROp widen3264 = signed_divide ? Iop_32Sto64 : Iop_32Uto64;
3373 IROp widen1632 = signed_divide ? Iop_16Sto32 : Iop_16Uto32;
3374 IROp widen816 = signed_divide ? Iop_8Sto16 : Iop_8Uto16;
3375 assign( src64, unop(widen3264,
sewardj5b470602005-02-27 13:10:48 +00003376 unop(widen1632, getIRegRAX(2))) );
sewardj85520e42005-02-19 15:22:38 +00003377 assign( dst64,
3378 binop(op, mkexpr(src64),
3379 unop(widen1632, unop(widen816, mkexpr(t)))) );
sewardj5b470602005-02-27 13:10:48 +00003380 putIRegRAX( 1, unop(Iop_16to8,
3381 unop(Iop_32to16,
3382 unop(Iop_64to32,mkexpr(dst64)))) );
3383 putIRegAH( unop(Iop_16to8,
3384 unop(Iop_32to16,
3385 unop(Iop_64HIto32,mkexpr(dst64)))) );
sewardj85520e42005-02-19 15:22:38 +00003386 break;
3387 }
3388 default:
3389 vpanic("codegen_div(amd64)");
sewardj03b07cc2005-01-31 18:09:43 +00003390 }
sewardj32b2bbe2005-01-28 00:50:10 +00003391 }
3392}
sewardj3ca55a12005-01-27 16:06:23 +00003393
3394static
sewardj2e28ac42008-12-04 00:05:12 +00003395ULong dis_Grp1 ( VexAbiInfo* vbi,
3396 Prefix pfx,
sewardj270def42005-07-03 01:03:01 +00003397 Long delta, UChar modrm,
sewardj227458e2005-01-31 19:04:50 +00003398 Int am_sz, Int d_sz, Int sz, Long d64 )
sewardj3ca55a12005-01-27 16:06:23 +00003399{
3400 Int len;
3401 HChar dis_buf[50];
3402 IRType ty = szToITy(sz);
3403 IRTemp dst1 = newTemp(ty);
3404 IRTemp src = newTemp(ty);
3405 IRTemp dst0 = newTemp(ty);
3406 IRTemp addr = IRTemp_INVALID;
3407 IROp op8 = Iop_INVALID;
sewardj1389d4d2005-01-28 13:46:29 +00003408 ULong mask = mkSizeMask(sz);
sewardj3ca55a12005-01-27 16:06:23 +00003409
sewardj901ed122005-02-27 13:25:31 +00003410 switch (gregLO3ofRM(modrm)) {
sewardj3ca55a12005-01-27 16:06:23 +00003411 case 0: op8 = Iop_Add8; break; case 1: op8 = Iop_Or8; break;
3412 case 2: break; // ADC
3413 case 3: break; // SBB
3414 case 4: op8 = Iop_And8; break; case 5: op8 = Iop_Sub8; break;
3415 case 6: op8 = Iop_Xor8; break; case 7: op8 = Iop_Sub8; break;
sewardjfd4203c2007-03-21 00:21:56 +00003416 /*NOTREACHED*/
sewardj3ca55a12005-01-27 16:06:23 +00003417 default: vpanic("dis_Grp1(amd64): unhandled case");
3418 }
3419
3420 if (epartIsReg(modrm)) {
3421 vassert(am_sz == 1);
3422
sewardj5b470602005-02-27 13:10:48 +00003423 assign(dst0, getIRegE(sz,pfx,modrm));
sewardj227458e2005-01-31 19:04:50 +00003424 assign(src, mkU(ty,d64 & mask));
sewardj3ca55a12005-01-27 16:06:23 +00003425
sewardj901ed122005-02-27 13:25:31 +00003426 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
sewardje9d8a262009-07-01 08:06:34 +00003427 helper_ADC( sz, dst1, dst0, src,
3428 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj3ca55a12005-01-27 16:06:23 +00003429 } else
sewardj901ed122005-02-27 13:25:31 +00003430 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
sewardje9d8a262009-07-01 08:06:34 +00003431 helper_SBB( sz, dst1, dst0, src,
3432 /*no store*/IRTemp_INVALID, IRTemp_INVALID, 0 );
sewardj3ca55a12005-01-27 16:06:23 +00003433 } else {
3434 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
3435 if (isAddSub(op8))
3436 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3437 else
3438 setFlags_DEP1(op8, dst1, ty);
3439 }
3440
sewardj901ed122005-02-27 13:25:31 +00003441 if (gregLO3ofRM(modrm) < 7)
sewardj5b470602005-02-27 13:10:48 +00003442 putIRegE(sz, pfx, modrm, mkexpr(dst1));
sewardj3ca55a12005-01-27 16:06:23 +00003443
3444 delta += (am_sz + d_sz);
sewardje941eea2005-01-30 19:52:28 +00003445 DIP("%s%c $%lld, %s\n",
sewardj901ed122005-02-27 13:25:31 +00003446 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz), d64,
sewardj5b470602005-02-27 13:10:48 +00003447 nameIRegE(sz,pfx,modrm));
sewardj3ca55a12005-01-27 16:06:23 +00003448 } else {
sewardj2e28ac42008-12-04 00:05:12 +00003449 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
sewardj3ca55a12005-01-27 16:06:23 +00003450
3451 assign(dst0, loadLE(ty,mkexpr(addr)));
sewardj227458e2005-01-31 19:04:50 +00003452 assign(src, mkU(ty,d64 & mask));
sewardj3ca55a12005-01-27 16:06:23 +00003453
sewardj901ed122005-02-27 13:25:31 +00003454 if (gregLO3ofRM(modrm) == 2 /* ADC */) {
sewardj6491f862013-10-15 17:29:19 +00003455 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00003456 /* cas-style store */
3457 helper_ADC( sz, dst1, dst0, src,
3458 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3459 } else {
3460 /* normal store */
3461 helper_ADC( sz, dst1, dst0, src,
3462 /*store*/addr, IRTemp_INVALID, 0 );
3463 }
sewardj3ca55a12005-01-27 16:06:23 +00003464 } else
sewardj901ed122005-02-27 13:25:31 +00003465 if (gregLO3ofRM(modrm) == 3 /* SBB */) {
sewardj6491f862013-10-15 17:29:19 +00003466 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00003467 /* cas-style store */
3468 helper_SBB( sz, dst1, dst0, src,
3469 /*store*/addr, dst0/*expVal*/, guest_RIP_curr_instr );
3470 } else {
3471 /* normal store */
3472 helper_SBB( sz, dst1, dst0, src,
3473 /*store*/addr, IRTemp_INVALID, 0 );
3474 }
sewardj3ca55a12005-01-27 16:06:23 +00003475 } else {
3476 assign(dst1, binop(mkSizedOp(ty,op8), mkexpr(dst0), mkexpr(src)));
sewardje9d8a262009-07-01 08:06:34 +00003477 if (gregLO3ofRM(modrm) < 7) {
sewardj6491f862013-10-15 17:29:19 +00003478 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00003479 casLE( mkexpr(addr), mkexpr(dst0)/*expVal*/,
3480 mkexpr(dst1)/*newVal*/,
3481 guest_RIP_curr_instr );
3482 } else {
3483 storeLE(mkexpr(addr), mkexpr(dst1));
3484 }
3485 }
sewardj3ca55a12005-01-27 16:06:23 +00003486 if (isAddSub(op8))
3487 setFlags_DEP1_DEP2(op8, dst0, src, ty);
3488 else
3489 setFlags_DEP1(op8, dst1, ty);
3490 }
3491
sewardj3ca55a12005-01-27 16:06:23 +00003492 delta += (len+d_sz);
sewardje941eea2005-01-30 19:52:28 +00003493 DIP("%s%c $%lld, %s\n",
sewardj901ed122005-02-27 13:25:31 +00003494 nameGrp1(gregLO3ofRM(modrm)), nameISize(sz),
sewardj227458e2005-01-31 19:04:50 +00003495 d64, dis_buf);
sewardj3ca55a12005-01-27 16:06:23 +00003496 }
3497 return delta;
3498}
3499
3500
sewardj118b23e2005-01-29 02:14:44 +00003501/* Group 2 extended opcodes. shift_expr must be an 8-bit typed
3502 expression. */
3503
3504static
sewardj2e28ac42008-12-04 00:05:12 +00003505ULong dis_Grp2 ( VexAbiInfo* vbi,
3506 Prefix pfx,
sewardj270def42005-07-03 01:03:01 +00003507 Long delta, UChar modrm,
sewardj118b23e2005-01-29 02:14:44 +00003508 Int am_sz, Int d_sz, Int sz, IRExpr* shift_expr,
florian55085f82012-11-21 00:36:55 +00003509 const HChar* shift_expr_txt, Bool* decode_OK )
sewardj118b23e2005-01-29 02:14:44 +00003510{
3511 /* delta on entry points at the modrm byte. */
3512 HChar dis_buf[50];
3513 Int len;
sewardjb5e5c6d2007-01-12 20:29:01 +00003514 Bool isShift, isRotate, isRotateC;
sewardj118b23e2005-01-29 02:14:44 +00003515 IRType ty = szToITy(sz);
3516 IRTemp dst0 = newTemp(ty);
3517 IRTemp dst1 = newTemp(ty);
3518 IRTemp addr = IRTemp_INVALID;
3519
sewardjfd4203c2007-03-21 00:21:56 +00003520 *decode_OK = True;
3521
sewardj118b23e2005-01-29 02:14:44 +00003522 vassert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
3523
3524 /* Put value to shift/rotate in dst0. */
3525 if (epartIsReg(modrm)) {
sewardj5b470602005-02-27 13:10:48 +00003526 assign(dst0, getIRegE(sz, pfx, modrm));
sewardj118b23e2005-01-29 02:14:44 +00003527 delta += (am_sz + d_sz);
3528 } else {
sewardj2e28ac42008-12-04 00:05:12 +00003529 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, /*xtra*/d_sz );
sewardj118b23e2005-01-29 02:14:44 +00003530 assign(dst0, loadLE(ty,mkexpr(addr)));
3531 delta += len + d_sz;
3532 }
3533
3534 isShift = False;
tomd6b43fd2011-08-19 16:06:52 +00003535 switch (gregLO3ofRM(modrm)) { case 4: case 5: case 6: case 7: isShift = True; }
sewardj118b23e2005-01-29 02:14:44 +00003536
3537 isRotate = False;
sewardj901ed122005-02-27 13:25:31 +00003538 switch (gregLO3ofRM(modrm)) { case 0: case 1: isRotate = True; }
sewardj118b23e2005-01-29 02:14:44 +00003539
sewardjb5e5c6d2007-01-12 20:29:01 +00003540 isRotateC = False;
3541 switch (gregLO3ofRM(modrm)) { case 2: case 3: isRotateC = True; }
sewardj118b23e2005-01-29 02:14:44 +00003542
sewardjb5e5c6d2007-01-12 20:29:01 +00003543 if (!isShift && !isRotate && !isRotateC) {
sewardjfd4203c2007-03-21 00:21:56 +00003544 /*NOTREACHED*/
sewardj118b23e2005-01-29 02:14:44 +00003545 vpanic("dis_Grp2(Reg): unhandled case(amd64)");
3546 }
3547
sewardjb5e5c6d2007-01-12 20:29:01 +00003548 if (isRotateC) {
sewardj112b0992005-07-23 13:19:32 +00003549 /* Call a helper; this insn is so ridiculous it does not deserve
3550 better. One problem is, the helper has to calculate both the
3551 new value and the new flags. This is more than 64 bits, and
3552 there is no way to return more than 64 bits from the helper.
3553 Hence the crude and obvious solution is to call it twice,
3554 using the sign of the sz field to indicate whether it is the
3555 value or rflags result we want.
3556 */
sewardjb5e5c6d2007-01-12 20:29:01 +00003557 Bool left = toBool(gregLO3ofRM(modrm) == 2);
sewardj112b0992005-07-23 13:19:32 +00003558 IRExpr** argsVALUE;
3559 IRExpr** argsRFLAGS;
3560
3561 IRTemp new_value = newTemp(Ity_I64);
3562 IRTemp new_rflags = newTemp(Ity_I64);
3563 IRTemp old_rflags = newTemp(Ity_I64);
3564
3565 assign( old_rflags, widenUto64(mk_amd64g_calculate_rflags_all()) );
3566
3567 argsVALUE
3568 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3569 widenUto64(shift_expr), /* rotate amount */
3570 mkexpr(old_rflags),
3571 mkU64(sz) );
3572 assign( new_value,
3573 mkIRExprCCall(
3574 Ity_I64,
3575 0/*regparm*/,
sewardjb5e5c6d2007-01-12 20:29:01 +00003576 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3577 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
sewardj112b0992005-07-23 13:19:32 +00003578 argsVALUE
3579 )
3580 );
3581
3582 argsRFLAGS
3583 = mkIRExprVec_4( widenUto64(mkexpr(dst0)), /* thing to rotate */
3584 widenUto64(shift_expr), /* rotate amount */
3585 mkexpr(old_rflags),
3586 mkU64(-sz) );
3587 assign( new_rflags,
3588 mkIRExprCCall(
3589 Ity_I64,
3590 0/*regparm*/,
sewardjb5e5c6d2007-01-12 20:29:01 +00003591 left ? "amd64g_calculate_RCL" : "amd64g_calculate_RCR",
3592 left ? &amd64g_calculate_RCL : &amd64g_calculate_RCR,
sewardj112b0992005-07-23 13:19:32 +00003593 argsRFLAGS
3594 )
3595 );
3596
3597 assign( dst1, narrowTo(ty, mkexpr(new_value)) );
3598 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
3599 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(new_rflags) ));
3600 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
3601 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
sewardj118b23e2005-01-29 02:14:44 +00003602 }
3603
sewardj112b0992005-07-23 13:19:32 +00003604 else
sewardj118b23e2005-01-29 02:14:44 +00003605 if (isShift) {
3606
3607 IRTemp pre64 = newTemp(Ity_I64);
3608 IRTemp res64 = newTemp(Ity_I64);
3609 IRTemp res64ss = newTemp(Ity_I64);
3610 IRTemp shift_amt = newTemp(Ity_I8);
sewardj1027dc22005-02-26 01:55:02 +00003611 UChar mask = toUChar(sz==8 ? 63 : 31);
sewardj118b23e2005-01-29 02:14:44 +00003612 IROp op64;
3613
sewardj901ed122005-02-27 13:25:31 +00003614 switch (gregLO3ofRM(modrm)) {
sewardj118b23e2005-01-29 02:14:44 +00003615 case 4: op64 = Iop_Shl64; break;
3616 case 5: op64 = Iop_Shr64; break;
tomd6b43fd2011-08-19 16:06:52 +00003617 case 6: op64 = Iop_Shl64; break;
sewardj118b23e2005-01-29 02:14:44 +00003618 case 7: op64 = Iop_Sar64; break;
sewardjfd4203c2007-03-21 00:21:56 +00003619 /*NOTREACHED*/
sewardj118b23e2005-01-29 02:14:44 +00003620 default: vpanic("dis_Grp2:shift"); break;
3621 }
3622
3623 /* Widen the value to be shifted to 64 bits, do the shift, and
3624 narrow back down. This seems surprisingly long-winded, but
3625 unfortunately the AMD semantics requires that 8/16/32-bit
3626 shifts give defined results for shift values all the way up
sewardj03c96e82005-02-19 18:12:45 +00003627 to 32, and this seems the simplest way to do it. It has the
sewardj118b23e2005-01-29 02:14:44 +00003628 advantage that the only IR level shifts generated are of 64
3629 bit values, and the shift amount is guaranteed to be in the
3630 range 0 .. 63, thereby observing the IR semantics requiring
sewardj03c96e82005-02-19 18:12:45 +00003631 all shift values to be in the range 0 .. 2^word_size-1.
sewardj118b23e2005-01-29 02:14:44 +00003632
sewardj03c96e82005-02-19 18:12:45 +00003633 Therefore the shift amount is masked with 63 for 64-bit shifts
3634 and 31 for all others.
3635 */
3636 /* shift_amt = shift_expr & MASK, regardless of operation size */
3637 assign( shift_amt, binop(Iop_And8, shift_expr, mkU8(mask)) );
sewardj118b23e2005-01-29 02:14:44 +00003638
sewardj03c96e82005-02-19 18:12:45 +00003639 /* suitably widen the value to be shifted to 64 bits. */
sewardj118b23e2005-01-29 02:14:44 +00003640 assign( pre64, op64==Iop_Sar64 ? widenSto64(mkexpr(dst0))
3641 : widenUto64(mkexpr(dst0)) );
3642
3643 /* res64 = pre64 `shift` shift_amt */
3644 assign( res64, binop(op64, mkexpr(pre64), mkexpr(shift_amt)) );
3645
sewardj03c96e82005-02-19 18:12:45 +00003646 /* res64ss = pre64 `shift` ((shift_amt - 1) & MASK) */
sewardj118b23e2005-01-29 02:14:44 +00003647 assign( res64ss,
3648 binop(op64,
3649 mkexpr(pre64),
3650 binop(Iop_And8,
3651 binop(Iop_Sub8,
3652 mkexpr(shift_amt), mkU8(1)),
sewardj03c96e82005-02-19 18:12:45 +00003653 mkU8(mask))) );
sewardj118b23e2005-01-29 02:14:44 +00003654
3655 /* Build the flags thunk. */
3656 setFlags_DEP1_DEP2_shift(op64, res64, res64ss, ty, shift_amt);
3657
3658 /* Narrow the result back down. */
3659 assign( dst1, narrowTo(ty, mkexpr(res64)) );
3660
3661 } /* if (isShift) */
3662
3663 else
3664 if (isRotate) {
3665 Int ccOp = ty==Ity_I8 ? 0 : (ty==Ity_I16 ? 1
3666 : (ty==Ity_I32 ? 2 : 3));
sewardj901ed122005-02-27 13:25:31 +00003667 Bool left = toBool(gregLO3ofRM(modrm) == 0);
sewardj118b23e2005-01-29 02:14:44 +00003668 IRTemp rot_amt = newTemp(Ity_I8);
3669 IRTemp rot_amt64 = newTemp(Ity_I8);
3670 IRTemp oldFlags = newTemp(Ity_I64);
sewardj1027dc22005-02-26 01:55:02 +00003671 UChar mask = toUChar(sz==8 ? 63 : 31);
sewardj118b23e2005-01-29 02:14:44 +00003672
3673 /* rot_amt = shift_expr & mask */
3674 /* By masking the rotate amount thusly, the IR-level Shl/Shr
3675 expressions never shift beyond the word size and thus remain
3676 well defined. */
sewardj03c96e82005-02-19 18:12:45 +00003677 assign(rot_amt64, binop(Iop_And8, shift_expr, mkU8(mask)));
sewardj118b23e2005-01-29 02:14:44 +00003678
3679 if (ty == Ity_I64)
3680 assign(rot_amt, mkexpr(rot_amt64));
3681 else
3682 assign(rot_amt, binop(Iop_And8, mkexpr(rot_amt64), mkU8(8*sz-1)));
3683
3684 if (left) {
3685
3686 /* dst1 = (dst0 << rot_amt) | (dst0 >>u (wordsize-rot_amt)) */
3687 assign(dst1,
3688 binop( mkSizedOp(ty,Iop_Or8),
3689 binop( mkSizedOp(ty,Iop_Shl8),
3690 mkexpr(dst0),
3691 mkexpr(rot_amt)
3692 ),
3693 binop( mkSizedOp(ty,Iop_Shr8),
3694 mkexpr(dst0),
3695 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3696 )
3697 )
3698 );
3699 ccOp += AMD64G_CC_OP_ROLB;
3700
3701 } else { /* right */
3702
3703 /* dst1 = (dst0 >>u rot_amt) | (dst0 << (wordsize-rot_amt)) */
3704 assign(dst1,
3705 binop( mkSizedOp(ty,Iop_Or8),
3706 binop( mkSizedOp(ty,Iop_Shr8),
3707 mkexpr(dst0),
3708 mkexpr(rot_amt)
3709 ),
3710 binop( mkSizedOp(ty,Iop_Shl8),
3711 mkexpr(dst0),
3712 binop(Iop_Sub8,mkU8(8*sz), mkexpr(rot_amt))
3713 )
3714 )
3715 );
3716 ccOp += AMD64G_CC_OP_RORB;
3717
3718 }
3719
3720 /* dst1 now holds the rotated value. Build flag thunk. We
3721 need the resulting value for this, and the previous flags.
3722 Except don't set it if the rotate count is zero. */
3723
3724 assign(oldFlags, mk_amd64g_calculate_rflags_all());
3725
sewardj009230b2013-01-26 11:47:55 +00003726 /* rot_amt64 :: Ity_I8. We need to convert it to I1. */
3727 IRTemp rot_amt64b = newTemp(Ity_I1);
3728 assign(rot_amt64b, binop(Iop_CmpNE8, mkexpr(rot_amt64), mkU8(0)) );
3729
sewardj118b23e2005-01-29 02:14:44 +00003730 /* CC_DEP1 is the rotated value. CC_NDEP is flags before. */
3731 stmt( IRStmt_Put( OFFB_CC_OP,
florian99dd03e2013-01-29 03:56:06 +00003732 IRExpr_ITE( mkexpr(rot_amt64b),
3733 mkU64(ccOp),
3734 IRExpr_Get(OFFB_CC_OP,Ity_I64) ) ));
sewardj118b23e2005-01-29 02:14:44 +00003735 stmt( IRStmt_Put( OFFB_CC_DEP1,
florian99dd03e2013-01-29 03:56:06 +00003736 IRExpr_ITE( mkexpr(rot_amt64b),
3737 widenUto64(mkexpr(dst1)),
3738 IRExpr_Get(OFFB_CC_DEP1,Ity_I64) ) ));
sewardj118b23e2005-01-29 02:14:44 +00003739 stmt( IRStmt_Put( OFFB_CC_DEP2,
florian99dd03e2013-01-29 03:56:06 +00003740 IRExpr_ITE( mkexpr(rot_amt64b),
3741 mkU64(0),
3742 IRExpr_Get(OFFB_CC_DEP2,Ity_I64) ) ));
sewardj118b23e2005-01-29 02:14:44 +00003743 stmt( IRStmt_Put( OFFB_CC_NDEP,
florian99dd03e2013-01-29 03:56:06 +00003744 IRExpr_ITE( mkexpr(rot_amt64b),
3745 mkexpr(oldFlags),
3746 IRExpr_Get(OFFB_CC_NDEP,Ity_I64) ) ));
sewardj118b23e2005-01-29 02:14:44 +00003747 } /* if (isRotate) */
3748
3749 /* Save result, and finish up. */
3750 if (epartIsReg(modrm)) {
sewardj5b470602005-02-27 13:10:48 +00003751 putIRegE(sz, pfx, modrm, mkexpr(dst1));
sewardj118b23e2005-01-29 02:14:44 +00003752 if (vex_traceflags & VEX_TRACE_FE) {
3753 vex_printf("%s%c ",
sewardj901ed122005-02-27 13:25:31 +00003754 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
sewardj118b23e2005-01-29 02:14:44 +00003755 if (shift_expr_txt)
3756 vex_printf("%s", shift_expr_txt);
3757 else
3758 ppIRExpr(shift_expr);
sewardj5b470602005-02-27 13:10:48 +00003759 vex_printf(", %s\n", nameIRegE(sz,pfx,modrm));
sewardj118b23e2005-01-29 02:14:44 +00003760 }
3761 } else {
3762 storeLE(mkexpr(addr), mkexpr(dst1));
3763 if (vex_traceflags & VEX_TRACE_FE) {
3764 vex_printf("%s%c ",
sewardj901ed122005-02-27 13:25:31 +00003765 nameGrp2(gregLO3ofRM(modrm)), nameISize(sz) );
sewardj118b23e2005-01-29 02:14:44 +00003766 if (shift_expr_txt)
3767 vex_printf("%s", shift_expr_txt);
3768 else
3769 ppIRExpr(shift_expr);
3770 vex_printf(", %s\n", dis_buf);
3771 }
3772 }
3773 return delta;
3774}
3775
3776
sewardj1d511802005-03-27 17:59:45 +00003777/* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */
3778static
sewardj2e28ac42008-12-04 00:05:12 +00003779ULong dis_Grp8_Imm ( VexAbiInfo* vbi,
3780 Prefix pfx,
sewardj270def42005-07-03 01:03:01 +00003781 Long delta, UChar modrm,
sewardj1d511802005-03-27 17:59:45 +00003782 Int am_sz, Int sz, ULong src_val,
3783 Bool* decode_OK )
3784{
3785 /* src_val denotes a d8.
3786 And delta on entry points at the modrm byte. */
sewardj118b23e2005-01-29 02:14:44 +00003787
sewardj1d511802005-03-27 17:59:45 +00003788 IRType ty = szToITy(sz);
3789 IRTemp t2 = newTemp(Ity_I64);
3790 IRTemp t2m = newTemp(Ity_I64);
3791 IRTemp t_addr = IRTemp_INVALID;
3792 HChar dis_buf[50];
3793 ULong mask;
sewardj9b967672005-02-08 11:13:09 +00003794
sewardj1d511802005-03-27 17:59:45 +00003795 /* we're optimists :-) */
3796 *decode_OK = True;
3797
sewardj38b1d692013-10-15 17:21:42 +00003798 /* Check whether F2 or F3 are acceptable. */
3799 if (epartIsReg(modrm)) {
3800 /* F2 or F3 are not allowed in the register case. */
3801 if (haveF2orF3(pfx)) {
3802 *decode_OK = False;
3803 return delta;
3804 }
3805 } else {
3806 /* F2 or F3 (but not both) are allowable provided LOCK is also
3807 present. */
3808 if (haveF2orF3(pfx)) {
3809 if (haveF2andF3(pfx) || !haveLOCK(pfx)) {
3810 *decode_OK = False;
3811 return delta;
3812 }
3813 }
3814 }
3815
sewardj1d511802005-03-27 17:59:45 +00003816 /* Limit src_val -- the bit offset -- to something within a word.
3817 The Intel docs say that literal offsets larger than a word are
3818 masked in this way. */
3819 switch (sz) {
3820 case 2: src_val &= 15; break;
3821 case 4: src_val &= 31; break;
sewardj537cab02005-04-07 02:03:52 +00003822 case 8: src_val &= 63; break;
sewardj1d511802005-03-27 17:59:45 +00003823 default: *decode_OK = False; return delta;
3824 }
3825
3826 /* Invent a mask suitable for the operation. */
3827 switch (gregLO3ofRM(modrm)) {
sewardj74b4f892005-05-06 01:43:56 +00003828 case 4: /* BT */ mask = 0; break;
3829 case 5: /* BTS */ mask = 1ULL << src_val; break;
3830 case 6: /* BTR */ mask = ~(1ULL << src_val); break;
3831 case 7: /* BTC */ mask = 1ULL << src_val; break;
sewardj1d511802005-03-27 17:59:45 +00003832 /* If this needs to be extended, probably simplest to make a
3833 new function to handle the other cases (0 .. 3). The
3834 Intel docs do however not indicate any use for 0 .. 3, so
3835 we don't expect this to happen. */
3836 default: *decode_OK = False; return delta;
3837 }
3838
3839 /* Fetch the value to be tested and modified into t2, which is
3840 64-bits wide regardless of sz. */
3841 if (epartIsReg(modrm)) {
3842 vassert(am_sz == 1);
3843 assign( t2, widenUto64(getIRegE(sz, pfx, modrm)) );
3844 delta += (am_sz + 1);
3845 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
3846 nameISize(sz),
3847 src_val, nameIRegE(sz,pfx,modrm));
3848 } else {
3849 Int len;
sewardj2e28ac42008-12-04 00:05:12 +00003850 t_addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 1 );
sewardj1d511802005-03-27 17:59:45 +00003851 delta += (len+1);
3852 assign( t2, widenUto64(loadLE(ty, mkexpr(t_addr))) );
3853 DIP("%s%c $0x%llx, %s\n", nameGrp8(gregLO3ofRM(modrm)),
3854 nameISize(sz),
3855 src_val, dis_buf);
3856 }
3857
sewardj1d511802005-03-27 17:59:45 +00003858 /* Compute the new value into t2m, if non-BT. */
3859 switch (gregLO3ofRM(modrm)) {
3860 case 4: /* BT */
3861 break;
3862 case 5: /* BTS */
3863 assign( t2m, binop(Iop_Or64, mkU64(mask), mkexpr(t2)) );
3864 break;
3865 case 6: /* BTR */
3866 assign( t2m, binop(Iop_And64, mkU64(mask), mkexpr(t2)) );
3867 break;
3868 case 7: /* BTC */
3869 assign( t2m, binop(Iop_Xor64, mkU64(mask), mkexpr(t2)) );
3870 break;
3871 default:
sewardje9d8a262009-07-01 08:06:34 +00003872 /*NOTREACHED*/ /*the previous switch guards this*/
sewardj1d511802005-03-27 17:59:45 +00003873 vassert(0);
3874 }
3875
3876 /* Write the result back, if non-BT. */
3877 if (gregLO3ofRM(modrm) != 4 /* BT */) {
3878 if (epartIsReg(modrm)) {
sewardjcc3d2192013-03-27 11:37:33 +00003879 putIRegE(sz, pfx, modrm, narrowTo(ty, mkexpr(t2m)));
sewardj1d511802005-03-27 17:59:45 +00003880 } else {
sewardj6491f862013-10-15 17:29:19 +00003881 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00003882 casLE( mkexpr(t_addr),
3883 narrowTo(ty, mkexpr(t2))/*expd*/,
3884 narrowTo(ty, mkexpr(t2m))/*new*/,
3885 guest_RIP_curr_instr );
3886 } else {
3887 storeLE(mkexpr(t_addr), narrowTo(ty, mkexpr(t2m)));
3888 }
sewardj1d511802005-03-27 17:59:45 +00003889 }
3890 }
3891
sewardje9d8a262009-07-01 08:06:34 +00003892 /* Copy relevant bit from t2 into the carry flag. */
3893 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
3894 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
3895 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
3896 stmt( IRStmt_Put(
3897 OFFB_CC_DEP1,
3898 binop(Iop_And64,
3899 binop(Iop_Shr64, mkexpr(t2), mkU8(src_val)),
3900 mkU64(1))
3901 ));
3902 /* Set NDEP even though it isn't used. This makes redundant-PUT
3903 elimination of previous stores to this field work better. */
3904 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
3905
sewardj1d511802005-03-27 17:59:45 +00003906 return delta;
3907}
sewardj9b967672005-02-08 11:13:09 +00003908
3909
3910/* Signed/unsigned widening multiply. Generate IR to multiply the
3911 value in RAX/EAX/AX/AL by the given IRTemp, and park the result in
3912 RDX:RAX/EDX:EAX/DX:AX/AX.
3913*/
3914static void codegen_mulL_A_D ( Int sz, Bool syned,
florian55085f82012-11-21 00:36:55 +00003915 IRTemp tmp, const HChar* tmp_txt )
sewardj9b967672005-02-08 11:13:09 +00003916{
3917 IRType ty = szToITy(sz);
3918 IRTemp t1 = newTemp(ty);
3919
sewardj5b470602005-02-27 13:10:48 +00003920 assign( t1, getIRegRAX(sz) );
sewardj9b967672005-02-08 11:13:09 +00003921
3922 switch (ty) {
3923 case Ity_I64: {
3924 IRTemp res128 = newTemp(Ity_I128);
3925 IRTemp resHi = newTemp(Ity_I64);
3926 IRTemp resLo = newTemp(Ity_I64);
3927 IROp mulOp = syned ? Iop_MullS64 : Iop_MullU64;
sewardj8bdb89a2005-05-05 21:46:50 +00003928 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
sewardj9b967672005-02-08 11:13:09 +00003929 setFlags_MUL ( Ity_I64, t1, tmp, tBaseOp );
3930 assign( res128, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3931 assign( resHi, unop(Iop_128HIto64,mkexpr(res128)));
3932 assign( resLo, unop(Iop_128to64,mkexpr(res128)));
sewardj5b470602005-02-27 13:10:48 +00003933 putIReg64(R_RDX, mkexpr(resHi));
3934 putIReg64(R_RAX, mkexpr(resLo));
sewardj9b967672005-02-08 11:13:09 +00003935 break;
3936 }
sewardj85520e42005-02-19 15:22:38 +00003937 case Ity_I32: {
3938 IRTemp res64 = newTemp(Ity_I64);
3939 IRTemp resHi = newTemp(Ity_I32);
3940 IRTemp resLo = newTemp(Ity_I32);
3941 IROp mulOp = syned ? Iop_MullS32 : Iop_MullU32;
3942 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
3943 setFlags_MUL ( Ity_I32, t1, tmp, tBaseOp );
3944 assign( res64, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3945 assign( resHi, unop(Iop_64HIto32,mkexpr(res64)));
3946 assign( resLo, unop(Iop_64to32,mkexpr(res64)));
sewardj5b470602005-02-27 13:10:48 +00003947 putIRegRDX(4, mkexpr(resHi));
3948 putIRegRAX(4, mkexpr(resLo));
sewardj85520e42005-02-19 15:22:38 +00003949 break;
3950 }
3951 case Ity_I16: {
3952 IRTemp res32 = newTemp(Ity_I32);
3953 IRTemp resHi = newTemp(Ity_I16);
3954 IRTemp resLo = newTemp(Ity_I16);
3955 IROp mulOp = syned ? Iop_MullS16 : Iop_MullU16;
3956 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
3957 setFlags_MUL ( Ity_I16, t1, tmp, tBaseOp );
3958 assign( res32, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3959 assign( resHi, unop(Iop_32HIto16,mkexpr(res32)));
3960 assign( resLo, unop(Iop_32to16,mkexpr(res32)));
sewardj5b470602005-02-27 13:10:48 +00003961 putIRegRDX(2, mkexpr(resHi));
3962 putIRegRAX(2, mkexpr(resLo));
sewardj85520e42005-02-19 15:22:38 +00003963 break;
3964 }
3965 case Ity_I8: {
3966 IRTemp res16 = newTemp(Ity_I16);
3967 IRTemp resHi = newTemp(Ity_I8);
3968 IRTemp resLo = newTemp(Ity_I8);
3969 IROp mulOp = syned ? Iop_MullS8 : Iop_MullU8;
3970 UInt tBaseOp = syned ? AMD64G_CC_OP_SMULB : AMD64G_CC_OP_UMULB;
3971 setFlags_MUL ( Ity_I8, t1, tmp, tBaseOp );
3972 assign( res16, binop(mulOp, mkexpr(t1), mkexpr(tmp)) );
3973 assign( resHi, unop(Iop_16HIto8,mkexpr(res16)));
3974 assign( resLo, unop(Iop_16to8,mkexpr(res16)));
sewardj5b470602005-02-27 13:10:48 +00003975 putIRegRAX(2, mkexpr(res16));
sewardj85520e42005-02-19 15:22:38 +00003976 break;
3977 }
sewardj9b967672005-02-08 11:13:09 +00003978 default:
sewardj85520e42005-02-19 15:22:38 +00003979 ppIRType(ty);
sewardj9b967672005-02-08 11:13:09 +00003980 vpanic("codegen_mulL_A_D(amd64)");
3981 }
3982 DIP("%s%c %s\n", syned ? "imul" : "mul", nameISize(sz), tmp_txt);
3983}
sewardj32b2bbe2005-01-28 00:50:10 +00003984
3985
sewardj38b1d692013-10-15 17:21:42 +00003986/* Group 3 extended opcodes. We have to decide here whether F2 and F3
3987 might be valid.*/
sewardj32b2bbe2005-01-28 00:50:10 +00003988static
sewardj2e28ac42008-12-04 00:05:12 +00003989ULong dis_Grp3 ( VexAbiInfo* vbi,
3990 Prefix pfx, Int sz, Long delta, Bool* decode_OK )
sewardj32b2bbe2005-01-28 00:50:10 +00003991{
sewardj227458e2005-01-31 19:04:50 +00003992 Long d64;
sewardj32b2bbe2005-01-28 00:50:10 +00003993 UChar modrm;
3994 HChar dis_buf[50];
3995 Int len;
3996 IRTemp addr;
3997 IRType ty = szToITy(sz);
3998 IRTemp t1 = newTemp(ty);
sewardj55dbb262005-01-28 16:36:51 +00003999 IRTemp dst1, src, dst0;
sewardjfd4203c2007-03-21 00:21:56 +00004000 *decode_OK = True;
sewardj8c332e22005-01-28 01:36:56 +00004001 modrm = getUChar(delta);
sewardj32b2bbe2005-01-28 00:50:10 +00004002 if (epartIsReg(modrm)) {
sewardj38b1d692013-10-15 17:21:42 +00004003 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4004 if (haveF2orF3(pfx)) goto unhandled;
sewardj901ed122005-02-27 13:25:31 +00004005 switch (gregLO3ofRM(modrm)) {
sewardj118b23e2005-01-29 02:14:44 +00004006 case 0: { /* TEST */
4007 delta++;
4008 d64 = getSDisp(imin(4,sz), delta);
4009 delta += imin(4,sz);
4010 dst1 = newTemp(ty);
4011 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
sewardj5b470602005-02-27 13:10:48 +00004012 getIRegE(sz,pfx,modrm),
sewardj03b07cc2005-01-31 18:09:43 +00004013 mkU(ty, d64 & mkSizeMask(sz))));
sewardj118b23e2005-01-29 02:14:44 +00004014 setFlags_DEP1( Iop_And8, dst1, ty );
sewardj7eaa7cf2005-01-31 18:55:22 +00004015 DIP("test%c $%lld, %s\n",
sewardj118b23e2005-01-29 02:14:44 +00004016 nameISize(sz), d64,
sewardj5b470602005-02-27 13:10:48 +00004017 nameIRegE(sz, pfx, modrm));
sewardj118b23e2005-01-29 02:14:44 +00004018 break;
4019 }
sewardjfd4203c2007-03-21 00:21:56 +00004020 case 1:
4021 *decode_OK = False;
4022 return delta;
sewardj55dbb262005-01-28 16:36:51 +00004023 case 2: /* NOT */
4024 delta++;
sewardj5b470602005-02-27 13:10:48 +00004025 putIRegE(sz, pfx, modrm,
4026 unop(mkSizedOp(ty,Iop_Not8),
4027 getIRegE(sz, pfx, modrm)));
sewardj55dbb262005-01-28 16:36:51 +00004028 DIP("not%c %s\n", nameISize(sz),
sewardj5b470602005-02-27 13:10:48 +00004029 nameIRegE(sz, pfx, modrm));
sewardj55dbb262005-01-28 16:36:51 +00004030 break;
4031 case 3: /* NEG */
4032 delta++;
4033 dst0 = newTemp(ty);
4034 src = newTemp(ty);
4035 dst1 = newTemp(ty);
4036 assign(dst0, mkU(ty,0));
sewardj5b470602005-02-27 13:10:48 +00004037 assign(src, getIRegE(sz, pfx, modrm));
sewardj2e28ac42008-12-04 00:05:12 +00004038 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
4039 mkexpr(src)));
sewardj55dbb262005-01-28 16:36:51 +00004040 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
sewardj5b470602005-02-27 13:10:48 +00004041 putIRegE(sz, pfx, modrm, mkexpr(dst1));
4042 DIP("neg%c %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm));
sewardj55dbb262005-01-28 16:36:51 +00004043 break;
sewardj9b967672005-02-08 11:13:09 +00004044 case 4: /* MUL (unsigned widening) */
4045 delta++;
4046 src = newTemp(ty);
sewardj5b470602005-02-27 13:10:48 +00004047 assign(src, getIRegE(sz,pfx,modrm));
sewardj9b967672005-02-08 11:13:09 +00004048 codegen_mulL_A_D ( sz, False, src,
sewardj5b470602005-02-27 13:10:48 +00004049 nameIRegE(sz,pfx,modrm) );
sewardj9b967672005-02-08 11:13:09 +00004050 break;
sewardj85520e42005-02-19 15:22:38 +00004051 case 5: /* IMUL (signed widening) */
4052 delta++;
4053 src = newTemp(ty);
sewardj5b470602005-02-27 13:10:48 +00004054 assign(src, getIRegE(sz,pfx,modrm));
sewardj85520e42005-02-19 15:22:38 +00004055 codegen_mulL_A_D ( sz, True, src,
sewardj5b470602005-02-27 13:10:48 +00004056 nameIRegE(sz,pfx,modrm) );
sewardj85520e42005-02-19 15:22:38 +00004057 break;
sewardj03b07cc2005-01-31 18:09:43 +00004058 case 6: /* DIV */
4059 delta++;
sewardj5b470602005-02-27 13:10:48 +00004060 assign( t1, getIRegE(sz, pfx, modrm) );
sewardj03b07cc2005-01-31 18:09:43 +00004061 codegen_div ( sz, t1, False );
4062 DIP("div%c %s\n", nameISize(sz),
sewardj5b470602005-02-27 13:10:48 +00004063 nameIRegE(sz, pfx, modrm));
sewardj03b07cc2005-01-31 18:09:43 +00004064 break;
sewardj32b2bbe2005-01-28 00:50:10 +00004065 case 7: /* IDIV */
4066 delta++;
sewardj5b470602005-02-27 13:10:48 +00004067 assign( t1, getIRegE(sz, pfx, modrm) );
sewardj32b2bbe2005-01-28 00:50:10 +00004068 codegen_div ( sz, t1, True );
4069 DIP("idiv%c %s\n", nameISize(sz),
sewardj5b470602005-02-27 13:10:48 +00004070 nameIRegE(sz, pfx, modrm));
sewardj32b2bbe2005-01-28 00:50:10 +00004071 break;
4072 default:
sewardjfd4203c2007-03-21 00:21:56 +00004073 /*NOTREACHED*/
4074 vpanic("Grp3(amd64,R)");
sewardj32b2bbe2005-01-28 00:50:10 +00004075 }
4076 } else {
sewardj38b1d692013-10-15 17:21:42 +00004077 /* Decide if F2/XACQ or F3/XREL might be valid. */
4078 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4079 if ((gregLO3ofRM(modrm) == 3/*NEG*/ || gregLO3ofRM(modrm) == 2/*NOT*/)
4080 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4081 validF2orF3 = True;
4082 }
4083 if (!validF2orF3) goto unhandled;
4084 /* */
sewardj2e28ac42008-12-04 00:05:12 +00004085 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
sewardj7de0d3c2005-02-13 02:26:41 +00004086 /* we have to inform disAMode of any immediate
sewardjcc3d2192013-03-27 11:37:33 +00004087 bytes used */
sewardj901ed122005-02-27 13:25:31 +00004088 gregLO3ofRM(modrm)==0/*TEST*/
sewardj7de0d3c2005-02-13 02:26:41 +00004089 ? imin(4,sz)
4090 : 0
4091 );
sewardj32b2bbe2005-01-28 00:50:10 +00004092 t1 = newTemp(ty);
4093 delta += len;
4094 assign(t1, loadLE(ty,mkexpr(addr)));
sewardj901ed122005-02-27 13:25:31 +00004095 switch (gregLO3ofRM(modrm)) {
sewardj03b07cc2005-01-31 18:09:43 +00004096 case 0: { /* TEST */
4097 d64 = getSDisp(imin(4,sz), delta);
4098 delta += imin(4,sz);
4099 dst1 = newTemp(ty);
4100 assign(dst1, binop(mkSizedOp(ty,Iop_And8),
4101 mkexpr(t1),
4102 mkU(ty, d64 & mkSizeMask(sz))));
4103 setFlags_DEP1( Iop_And8, dst1, ty );
4104 DIP("test%c $%lld, %s\n", nameISize(sz), d64, dis_buf);
4105 break;
4106 }
sewardjfd4203c2007-03-21 00:21:56 +00004107 case 1:
4108 *decode_OK = False;
4109 return delta;
sewardj82c9f2f2005-03-02 16:05:13 +00004110 case 2: /* NOT */
sewardje9d8a262009-07-01 08:06:34 +00004111 dst1 = newTemp(ty);
4112 assign(dst1, unop(mkSizedOp(ty,Iop_Not8), mkexpr(t1)));
sewardj6491f862013-10-15 17:29:19 +00004113 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00004114 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
4115 guest_RIP_curr_instr );
4116 } else {
4117 storeLE( mkexpr(addr), mkexpr(dst1) );
4118 }
sewardj82c9f2f2005-03-02 16:05:13 +00004119 DIP("not%c %s\n", nameISize(sz), dis_buf);
4120 break;
sewardj7de0d3c2005-02-13 02:26:41 +00004121 case 3: /* NEG */
4122 dst0 = newTemp(ty);
4123 src = newTemp(ty);
4124 dst1 = newTemp(ty);
4125 assign(dst0, mkU(ty,0));
4126 assign(src, mkexpr(t1));
sewardj2e28ac42008-12-04 00:05:12 +00004127 assign(dst1, binop(mkSizedOp(ty,Iop_Sub8), mkexpr(dst0),
4128 mkexpr(src)));
sewardj6491f862013-10-15 17:29:19 +00004129 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00004130 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(dst1)/*new*/,
4131 guest_RIP_curr_instr );
4132 } else {
4133 storeLE( mkexpr(addr), mkexpr(dst1) );
4134 }
sewardj7de0d3c2005-02-13 02:26:41 +00004135 setFlags_DEP1_DEP2(Iop_Sub8, dst0, src, ty);
sewardj7de0d3c2005-02-13 02:26:41 +00004136 DIP("neg%c %s\n", nameISize(sz), dis_buf);
4137 break;
sewardj31eecde2005-03-23 03:39:55 +00004138 case 4: /* MUL (unsigned widening) */
4139 codegen_mulL_A_D ( sz, False, t1, dis_buf );
4140 break;
sewardj3aba9eb2005-03-30 23:20:47 +00004141 case 5: /* IMUL */
4142 codegen_mulL_A_D ( sz, True, t1, dis_buf );
4143 break;
sewardj1001dc42005-02-21 08:25:55 +00004144 case 6: /* DIV */
4145 codegen_div ( sz, t1, False );
4146 DIP("div%c %s\n", nameISize(sz), dis_buf);
4147 break;
sewardj82c9f2f2005-03-02 16:05:13 +00004148 case 7: /* IDIV */
4149 codegen_div ( sz, t1, True );
4150 DIP("idiv%c %s\n", nameISize(sz), dis_buf);
4151 break;
sewardj32b2bbe2005-01-28 00:50:10 +00004152 default:
sewardjfd4203c2007-03-21 00:21:56 +00004153 /*NOTREACHED*/
4154 vpanic("Grp3(amd64,M)");
sewardj32b2bbe2005-01-28 00:50:10 +00004155 }
4156 }
4157 return delta;
sewardj38b1d692013-10-15 17:21:42 +00004158 unhandled:
4159 *decode_OK = False;
4160 return delta;
sewardj32b2bbe2005-01-28 00:50:10 +00004161}
4162
4163
sewardj38b1d692013-10-15 17:21:42 +00004164/* Group 4 extended opcodes. We have to decide here whether F2 and F3
4165 might be valid. */
sewardj03b07cc2005-01-31 18:09:43 +00004166static
sewardj2e28ac42008-12-04 00:05:12 +00004167ULong dis_Grp4 ( VexAbiInfo* vbi,
4168 Prefix pfx, Long delta, Bool* decode_OK )
sewardj03b07cc2005-01-31 18:09:43 +00004169{
4170 Int alen;
4171 UChar modrm;
4172 HChar dis_buf[50];
4173 IRType ty = Ity_I8;
4174 IRTemp t1 = newTemp(ty);
4175 IRTemp t2 = newTemp(ty);
4176
sewardjfd4203c2007-03-21 00:21:56 +00004177 *decode_OK = True;
4178
sewardj03b07cc2005-01-31 18:09:43 +00004179 modrm = getUChar(delta);
4180 if (epartIsReg(modrm)) {
sewardj38b1d692013-10-15 17:21:42 +00004181 /* F2/XACQ and F3/XREL are always invalid in the non-mem case. */
4182 if (haveF2orF3(pfx)) goto unhandled;
sewardj5b470602005-02-27 13:10:48 +00004183 assign(t1, getIRegE(1, pfx, modrm));
sewardj901ed122005-02-27 13:25:31 +00004184 switch (gregLO3ofRM(modrm)) {
sewardj85520e42005-02-19 15:22:38 +00004185 case 0: /* INC */
4186 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
sewardj5b470602005-02-27 13:10:48 +00004187 putIRegE(1, pfx, modrm, mkexpr(t2));
sewardj85520e42005-02-19 15:22:38 +00004188 setFlags_INC_DEC( True, t2, ty );
4189 break;
sewardj03b07cc2005-01-31 18:09:43 +00004190 case 1: /* DEC */
4191 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
sewardj5b470602005-02-27 13:10:48 +00004192 putIRegE(1, pfx, modrm, mkexpr(t2));
sewardj03b07cc2005-01-31 18:09:43 +00004193 setFlags_INC_DEC( False, t2, ty );
4194 break;
4195 default:
sewardjfd4203c2007-03-21 00:21:56 +00004196 *decode_OK = False;
4197 return delta;
sewardj03b07cc2005-01-31 18:09:43 +00004198 }
4199 delta++;
sewardj901ed122005-02-27 13:25:31 +00004200 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)),
sewardj5b470602005-02-27 13:10:48 +00004201 nameIRegE(1, pfx, modrm));
sewardj03b07cc2005-01-31 18:09:43 +00004202 } else {
sewardj38b1d692013-10-15 17:21:42 +00004203 /* Decide if F2/XACQ or F3/XREL might be valid. */
4204 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4205 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/)
4206 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4207 validF2orF3 = True;
4208 }
4209 if (!validF2orF3) goto unhandled;
4210 /* */
sewardj2e28ac42008-12-04 00:05:12 +00004211 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj03b07cc2005-01-31 18:09:43 +00004212 assign( t1, loadLE(ty, mkexpr(addr)) );
sewardj901ed122005-02-27 13:25:31 +00004213 switch (gregLO3ofRM(modrm)) {
sewardj007e9ec2005-03-23 11:36:48 +00004214 case 0: /* INC */
4215 assign(t2, binop(Iop_Add8, mkexpr(t1), mkU8(1)));
sewardj6491f862013-10-15 17:29:19 +00004216 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00004217 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
4218 guest_RIP_curr_instr );
4219 } else {
4220 storeLE( mkexpr(addr), mkexpr(t2) );
4221 }
sewardj007e9ec2005-03-23 11:36:48 +00004222 setFlags_INC_DEC( True, t2, ty );
4223 break;
4224 case 1: /* DEC */
4225 assign(t2, binop(Iop_Sub8, mkexpr(t1), mkU8(1)));
sewardj6491f862013-10-15 17:29:19 +00004226 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00004227 casLE( mkexpr(addr), mkexpr(t1)/*expd*/, mkexpr(t2)/*new*/,
4228 guest_RIP_curr_instr );
4229 } else {
4230 storeLE( mkexpr(addr), mkexpr(t2) );
4231 }
sewardj007e9ec2005-03-23 11:36:48 +00004232 setFlags_INC_DEC( False, t2, ty );
4233 break;
sewardj03b07cc2005-01-31 18:09:43 +00004234 default:
sewardjfd4203c2007-03-21 00:21:56 +00004235 *decode_OK = False;
4236 return delta;
sewardj03b07cc2005-01-31 18:09:43 +00004237 }
4238 delta += alen;
sewardj901ed122005-02-27 13:25:31 +00004239 DIP("%sb %s\n", nameGrp4(gregLO3ofRM(modrm)), dis_buf);
sewardj03b07cc2005-01-31 18:09:43 +00004240 }
4241 return delta;
sewardj38b1d692013-10-15 17:21:42 +00004242 unhandled:
4243 *decode_OK = False;
4244 return delta;
sewardj03b07cc2005-01-31 18:09:43 +00004245}
sewardj354e5c62005-01-27 20:12:52 +00004246
4247
sewardj38b1d692013-10-15 17:21:42 +00004248/* Group 5 extended opcodes. We have to decide here whether F2 and F3
4249 might be valid. */
sewardj354e5c62005-01-27 20:12:52 +00004250static
sewardjdd40fdf2006-12-24 02:20:24 +00004251ULong dis_Grp5 ( VexAbiInfo* vbi,
sewardjfd4203c2007-03-21 00:21:56 +00004252 Prefix pfx, Int sz, Long delta,
sewardjc6f970f2012-04-02 21:54:49 +00004253 /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK )
sewardj354e5c62005-01-27 20:12:52 +00004254{
4255 Int len;
4256 UChar modrm;
4257 HChar dis_buf[50];
4258 IRTemp addr = IRTemp_INVALID;
4259 IRType ty = szToITy(sz);
4260 IRTemp t1 = newTemp(ty);
4261 IRTemp t2 = IRTemp_INVALID;
sewardj03b07cc2005-01-31 18:09:43 +00004262 IRTemp t3 = IRTemp_INVALID;
sewardj7eaa7cf2005-01-31 18:55:22 +00004263 Bool showSz = True;
sewardj354e5c62005-01-27 20:12:52 +00004264
sewardjfd4203c2007-03-21 00:21:56 +00004265 *decode_OK = True;
4266
sewardj8c332e22005-01-28 01:36:56 +00004267 modrm = getUChar(delta);
sewardj354e5c62005-01-27 20:12:52 +00004268 if (epartIsReg(modrm)) {
mjw67ac3fd2014-05-09 11:41:06 +00004269 /* F2/XACQ and F3/XREL are always invalid in the non-mem case.
4270 F2/CALL and F2/JMP may have bnd prefix. */
4271 if (haveF2orF3(pfx)
4272 && ! (haveF2(pfx)
4273 && (gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4)))
4274 goto unhandledR;
sewardj5b470602005-02-27 13:10:48 +00004275 assign(t1, getIRegE(sz,pfx,modrm));
sewardj901ed122005-02-27 13:25:31 +00004276 switch (gregLO3ofRM(modrm)) {
sewardj32b2bbe2005-01-28 00:50:10 +00004277 case 0: /* INC */
4278 t2 = newTemp(ty);
4279 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4280 mkexpr(t1), mkU(ty,1)));
4281 setFlags_INC_DEC( True, t2, ty );
sewardj5b470602005-02-27 13:10:48 +00004282 putIRegE(sz,pfx,modrm, mkexpr(t2));
sewardj32b2bbe2005-01-28 00:50:10 +00004283 break;
4284 case 1: /* DEC */
4285 t2 = newTemp(ty);
4286 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4287 mkexpr(t1), mkU(ty,1)));
4288 setFlags_INC_DEC( False, t2, ty );
sewardj5b470602005-02-27 13:10:48 +00004289 putIRegE(sz,pfx,modrm, mkexpr(t2));
sewardj32b2bbe2005-01-28 00:50:10 +00004290 break;
sewardj354e5c62005-01-27 20:12:52 +00004291 case 2: /* call Ev */
sewardj03b07cc2005-01-31 18:09:43 +00004292 /* Ignore any sz value and operate as if sz==8. */
sewardj66e40ae2013-10-14 21:47:14 +00004293 if (!(sz == 4 || sz == 8)) goto unhandledR;
mjw67ac3fd2014-05-09 11:41:06 +00004294 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj03b07cc2005-01-31 18:09:43 +00004295 sz = 8;
4296 t3 = newTemp(Ity_I64);
sewardj5b470602005-02-27 13:10:48 +00004297 assign(t3, getIRegE(sz,pfx,modrm));
sewardj03b07cc2005-01-31 18:09:43 +00004298 t2 = newTemp(Ity_I64);
4299 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4300 putIReg64(R_RSP, mkexpr(t2));
sewardj9e6491a2005-07-02 19:24:10 +00004301 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1));
sewardj478646f2008-05-01 20:13:04 +00004302 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)");
sewardjc6f970f2012-04-02 21:54:49 +00004303 jmp_treg(dres, Ijk_Call, t3);
4304 vassert(dres->whatNext == Dis_StopHere);
sewardj7eaa7cf2005-01-31 18:55:22 +00004305 showSz = False;
sewardj354e5c62005-01-27 20:12:52 +00004306 break;
sewardj354e5c62005-01-27 20:12:52 +00004307 case 4: /* jmp Ev */
sewardj03b07cc2005-01-31 18:09:43 +00004308 /* Ignore any sz value and operate as if sz==8. */
sewardj66e40ae2013-10-14 21:47:14 +00004309 if (!(sz == 4 || sz == 8)) goto unhandledR;
mjw67ac3fd2014-05-09 11:41:06 +00004310 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj03b07cc2005-01-31 18:09:43 +00004311 sz = 8;
4312 t3 = newTemp(Ity_I64);
sewardj5b470602005-02-27 13:10:48 +00004313 assign(t3, getIRegE(sz,pfx,modrm));
sewardjc6f970f2012-04-02 21:54:49 +00004314 jmp_treg(dres, Ijk_Boring, t3);
4315 vassert(dres->whatNext == Dis_StopHere);
sewardj7eaa7cf2005-01-31 18:55:22 +00004316 showSz = False;
sewardj354e5c62005-01-27 20:12:52 +00004317 break;
sewardj66e40ae2013-10-14 21:47:14 +00004318 case 6: /* PUSH Ev */
4319 /* There is no encoding for 32-bit operand size; hence ... */
4320 if (sz == 4) sz = 8;
4321 if (sz == 8 || sz == 2) {
4322 ty = szToITy(sz); /* redo it, since sz might have changed */
4323 t3 = newTemp(ty);
4324 assign(t3, getIRegE(sz,pfx,modrm));
4325 t2 = newTemp(Ity_I64);
4326 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
4327 putIReg64(R_RSP, mkexpr(t2) );
4328 storeLE( mkexpr(t2), mkexpr(t3) );
4329 break;
4330 } else {
4331 goto unhandledR; /* awaiting test case */
4332 }
4333 default:
4334 unhandledR:
sewardjfd4203c2007-03-21 00:21:56 +00004335 *decode_OK = False;
4336 return delta;
sewardj354e5c62005-01-27 20:12:52 +00004337 }
4338 delta++;
sewardj901ed122005-02-27 13:25:31 +00004339 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
sewardj7eaa7cf2005-01-31 18:55:22 +00004340 showSz ? nameISize(sz) : ' ',
sewardj5b470602005-02-27 13:10:48 +00004341 nameIRegE(sz, pfx, modrm));
sewardj354e5c62005-01-27 20:12:52 +00004342 } else {
mjw67ac3fd2014-05-09 11:41:06 +00004343 /* Decide if F2/XACQ, F3/XREL, F2/CALL or F2/JMP might be valid. */
sewardj38b1d692013-10-15 17:21:42 +00004344 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
4345 if ((gregLO3ofRM(modrm) == 0/*INC*/ || gregLO3ofRM(modrm) == 1/*DEC*/)
4346 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
4347 validF2orF3 = True;
mjw67ac3fd2014-05-09 11:41:06 +00004348 } else if ((gregLO3ofRM(modrm) == 2 || gregLO3ofRM(modrm) == 4)
4349 && (haveF2(pfx) && !haveF3(pfx))) {
4350 validF2orF3 = True;
sewardj38b1d692013-10-15 17:21:42 +00004351 }
4352 if (!validF2orF3) goto unhandledM;
4353 /* */
sewardj2e28ac42008-12-04 00:05:12 +00004354 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj901ed122005-02-27 13:25:31 +00004355 if (gregLO3ofRM(modrm) != 2 && gregLO3ofRM(modrm) != 4
4356 && gregLO3ofRM(modrm) != 6) {
sewardj03b07cc2005-01-31 18:09:43 +00004357 assign(t1, loadLE(ty,mkexpr(addr)));
sewardj909c06d2005-02-19 22:47:41 +00004358 }
sewardj901ed122005-02-27 13:25:31 +00004359 switch (gregLO3ofRM(modrm)) {
sewardj354e5c62005-01-27 20:12:52 +00004360 case 0: /* INC */
sewardj354e5c62005-01-27 20:12:52 +00004361 t2 = newTemp(ty);
4362 assign(t2, binop(mkSizedOp(ty,Iop_Add8),
4363 mkexpr(t1), mkU(ty,1)));
sewardj6491f862013-10-15 17:29:19 +00004364 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00004365 casLE( mkexpr(addr),
4366 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4367 } else {
4368 storeLE(mkexpr(addr),mkexpr(t2));
4369 }
sewardj354e5c62005-01-27 20:12:52 +00004370 setFlags_INC_DEC( True, t2, ty );
sewardj354e5c62005-01-27 20:12:52 +00004371 break;
sewardj354e5c62005-01-27 20:12:52 +00004372 case 1: /* DEC */
4373 t2 = newTemp(ty);
4374 assign(t2, binop(mkSizedOp(ty,Iop_Sub8),
4375 mkexpr(t1), mkU(ty,1)));
sewardj6491f862013-10-15 17:29:19 +00004376 if (haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00004377 casLE( mkexpr(addr),
4378 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
4379 } else {
4380 storeLE(mkexpr(addr),mkexpr(t2));
4381 }
sewardj354e5c62005-01-27 20:12:52 +00004382 setFlags_INC_DEC( False, t2, ty );
sewardj354e5c62005-01-27 20:12:52 +00004383 break;
4384 case 2: /* call Ev */
sewardj03b07cc2005-01-31 18:09:43 +00004385 /* Ignore any sz value and operate as if sz==8. */
sewardj66e40ae2013-10-14 21:47:14 +00004386 if (!(sz == 4 || sz == 8)) goto unhandledM;
mjw67ac3fd2014-05-09 11:41:06 +00004387 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj7eaa7cf2005-01-31 18:55:22 +00004388 sz = 8;
sewardj03b07cc2005-01-31 18:09:43 +00004389 t3 = newTemp(Ity_I64);
4390 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
4391 t2 = newTemp(Ity_I64);
4392 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
4393 putIReg64(R_RSP, mkexpr(t2));
sewardj9e6491a2005-07-02 19:24:10 +00004394 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len));
sewardj478646f2008-05-01 20:13:04 +00004395 make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)");
sewardjc6f970f2012-04-02 21:54:49 +00004396 jmp_treg(dres, Ijk_Call, t3);
4397 vassert(dres->whatNext == Dis_StopHere);
sewardj7eaa7cf2005-01-31 18:55:22 +00004398 showSz = False;
sewardj354e5c62005-01-27 20:12:52 +00004399 break;
sewardj354e5c62005-01-27 20:12:52 +00004400 case 4: /* JMP Ev */
sewardj03b07cc2005-01-31 18:09:43 +00004401 /* Ignore any sz value and operate as if sz==8. */
sewardj66e40ae2013-10-14 21:47:14 +00004402 if (!(sz == 4 || sz == 8)) goto unhandledM;
mjw67ac3fd2014-05-09 11:41:06 +00004403 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj7eaa7cf2005-01-31 18:55:22 +00004404 sz = 8;
sewardj03b07cc2005-01-31 18:09:43 +00004405 t3 = newTemp(Ity_I64);
4406 assign(t3, loadLE(Ity_I64,mkexpr(addr)));
sewardjc6f970f2012-04-02 21:54:49 +00004407 jmp_treg(dres, Ijk_Boring, t3);
4408 vassert(dres->whatNext == Dis_StopHere);
sewardj7eaa7cf2005-01-31 18:55:22 +00004409 showSz = False;
sewardj354e5c62005-01-27 20:12:52 +00004410 break;
sewardj354e5c62005-01-27 20:12:52 +00004411 case 6: /* PUSH Ev */
sewardja6b93d12005-02-17 09:28:28 +00004412 /* There is no encoding for 32-bit operand size; hence ... */
4413 if (sz == 4) sz = 8;
sewardj66e40ae2013-10-14 21:47:14 +00004414 if (sz == 8 || sz == 2) {
4415 ty = szToITy(sz); /* redo it, since sz might have changed */
4416 t3 = newTemp(ty);
4417 assign(t3, loadLE(ty,mkexpr(addr)));
sewardj909c06d2005-02-19 22:47:41 +00004418 t2 = newTemp(Ity_I64);
4419 assign( t2, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
4420 putIReg64(R_RSP, mkexpr(t2) );
4421 storeLE( mkexpr(t2), mkexpr(t3) );
4422 break;
sewardjcc3d2192013-03-27 11:37:33 +00004423 } else {
sewardj66e40ae2013-10-14 21:47:14 +00004424 goto unhandledM; /* awaiting test case */
sewardjcc3d2192013-03-27 11:37:33 +00004425 }
sewardj354e5c62005-01-27 20:12:52 +00004426 default:
sewardj66e40ae2013-10-14 21:47:14 +00004427 unhandledM:
sewardjfd4203c2007-03-21 00:21:56 +00004428 *decode_OK = False;
4429 return delta;
sewardj354e5c62005-01-27 20:12:52 +00004430 }
4431 delta += len;
sewardj901ed122005-02-27 13:25:31 +00004432 DIP("%s%c %s\n", nameGrp5(gregLO3ofRM(modrm)),
sewardj7eaa7cf2005-01-31 18:55:22 +00004433 showSz ? nameISize(sz) : ' ',
4434 dis_buf);
sewardj354e5c62005-01-27 20:12:52 +00004435 }
4436 return delta;
4437}
4438
4439
sewardjd0a12df2005-02-10 02:07:43 +00004440/*------------------------------------------------------------*/
4441/*--- Disassembling string ops (including REP prefixes) ---*/
4442/*------------------------------------------------------------*/
4443
4444/* Code shared by all the string ops */
4445static
4446void dis_string_op_increment ( Int sz, IRTemp t_inc )
4447{
4448 UChar logSz;
4449 if (sz == 8 || sz == 4 || sz == 2) {
4450 logSz = 1;
4451 if (sz == 4) logSz = 2;
4452 if (sz == 8) logSz = 3;
4453 assign( t_inc,
4454 binop(Iop_Shl64, IRExpr_Get( OFFB_DFLAG, Ity_I64 ),
4455 mkU8(logSz) ) );
4456 } else {
4457 assign( t_inc,
4458 IRExpr_Get( OFFB_DFLAG, Ity_I64 ) );
4459 }
4460}
4461
sewardj909c06d2005-02-19 22:47:41 +00004462static
tom5cdf4e32011-08-12 15:42:56 +00004463void dis_string_op( void (*dis_OP)( Int, IRTemp, Prefix pfx ),
florian55085f82012-11-21 00:36:55 +00004464 Int sz, const HChar* name, Prefix pfx )
sewardj909c06d2005-02-19 22:47:41 +00004465{
4466 IRTemp t_inc = newTemp(Ity_I64);
4467 /* Really we ought to inspect the override prefixes, but we don't.
4468 The following assertion catches any resulting sillyness. */
4469 vassert(pfx == clearSegBits(pfx));
4470 dis_string_op_increment(sz, t_inc);
tom5cdf4e32011-08-12 15:42:56 +00004471 dis_OP( sz, t_inc, pfx );
sewardj909c06d2005-02-19 22:47:41 +00004472 DIP("%s%c\n", name, nameISize(sz));
4473}
4474
4475static
tom5cdf4e32011-08-12 15:42:56 +00004476void dis_MOVS ( Int sz, IRTemp t_inc, Prefix pfx )
sewardj909c06d2005-02-19 22:47:41 +00004477{
4478 IRType ty = szToITy(sz);
4479 IRTemp td = newTemp(Ity_I64); /* RDI */
4480 IRTemp ts = newTemp(Ity_I64); /* RSI */
tom5cdf4e32011-08-12 15:42:56 +00004481 IRExpr *incd, *incs;
sewardj909c06d2005-02-19 22:47:41 +00004482
tom5cdf4e32011-08-12 15:42:56 +00004483 if (haveASO(pfx)) {
4484 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4485 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4486 } else {
4487 assign( td, getIReg64(R_RDI) );
4488 assign( ts, getIReg64(R_RSI) );
4489 }
sewardj909c06d2005-02-19 22:47:41 +00004490
4491 storeLE( mkexpr(td), loadLE(ty,mkexpr(ts)) );
4492
tom5cdf4e32011-08-12 15:42:56 +00004493 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4494 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4495 if (haveASO(pfx)) {
4496 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4497 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4498 }
4499 putIReg64( R_RDI, incd );
4500 putIReg64( R_RSI, incs );
sewardj909c06d2005-02-19 22:47:41 +00004501}
4502
sewardj0f99be62009-07-22 09:29:13 +00004503static
tom5cdf4e32011-08-12 15:42:56 +00004504void dis_LODS ( Int sz, IRTemp t_inc, Prefix pfx )
sewardj0f99be62009-07-22 09:29:13 +00004505{
4506 IRType ty = szToITy(sz);
4507 IRTemp ts = newTemp(Ity_I64); /* RSI */
tom5cdf4e32011-08-12 15:42:56 +00004508 IRExpr *incs;
sewardj0f99be62009-07-22 09:29:13 +00004509
tom5cdf4e32011-08-12 15:42:56 +00004510 if (haveASO(pfx))
4511 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4512 else
4513 assign( ts, getIReg64(R_RSI) );
sewardj0f99be62009-07-22 09:29:13 +00004514
4515 putIRegRAX ( sz, loadLE(ty, mkexpr(ts)) );
4516
tom5cdf4e32011-08-12 15:42:56 +00004517 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4518 if (haveASO(pfx))
4519 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4520 putIReg64( R_RSI, incs );
sewardj0f99be62009-07-22 09:29:13 +00004521}
sewardja6b93d12005-02-17 09:28:28 +00004522
4523static
tom5cdf4e32011-08-12 15:42:56 +00004524void dis_STOS ( Int sz, IRTemp t_inc, Prefix pfx )
sewardja6b93d12005-02-17 09:28:28 +00004525{
4526 IRType ty = szToITy(sz);
4527 IRTemp ta = newTemp(ty); /* rAX */
4528 IRTemp td = newTemp(Ity_I64); /* RDI */
tom5cdf4e32011-08-12 15:42:56 +00004529 IRExpr *incd;
sewardja6b93d12005-02-17 09:28:28 +00004530
sewardj5b470602005-02-27 13:10:48 +00004531 assign( ta, getIRegRAX(sz) );
sewardja6b93d12005-02-17 09:28:28 +00004532
tom5cdf4e32011-08-12 15:42:56 +00004533 if (haveASO(pfx))
4534 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4535 else
4536 assign( td, getIReg64(R_RDI) );
sewardja6b93d12005-02-17 09:28:28 +00004537
4538 storeLE( mkexpr(td), mkexpr(ta) );
4539
tom5cdf4e32011-08-12 15:42:56 +00004540 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4541 if (haveASO(pfx))
4542 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4543 putIReg64( R_RDI, incd );
sewardja6b93d12005-02-17 09:28:28 +00004544}
sewardjd0a12df2005-02-10 02:07:43 +00004545
4546static
tom5cdf4e32011-08-12 15:42:56 +00004547void dis_CMPS ( Int sz, IRTemp t_inc, Prefix pfx )
sewardjd0a12df2005-02-10 02:07:43 +00004548{
4549 IRType ty = szToITy(sz);
4550 IRTemp tdv = newTemp(ty); /* (RDI) */
4551 IRTemp tsv = newTemp(ty); /* (RSI) */
4552 IRTemp td = newTemp(Ity_I64); /* RDI */
4553 IRTemp ts = newTemp(Ity_I64); /* RSI */
tom5cdf4e32011-08-12 15:42:56 +00004554 IRExpr *incd, *incs;
sewardjd0a12df2005-02-10 02:07:43 +00004555
tom5cdf4e32011-08-12 15:42:56 +00004556 if (haveASO(pfx)) {
4557 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4558 assign( ts, unop(Iop_32Uto64, getIReg32(R_RSI)) );
4559 } else {
4560 assign( td, getIReg64(R_RDI) );
4561 assign( ts, getIReg64(R_RSI) );
4562 }
sewardjd0a12df2005-02-10 02:07:43 +00004563
4564 assign( tdv, loadLE(ty,mkexpr(td)) );
4565
4566 assign( tsv, loadLE(ty,mkexpr(ts)) );
4567
4568 setFlags_DEP1_DEP2 ( Iop_Sub8, tsv, tdv, ty );
4569
tom5cdf4e32011-08-12 15:42:56 +00004570 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4571 incs = binop(Iop_Add64, mkexpr(ts), mkexpr(t_inc));
4572 if (haveASO(pfx)) {
4573 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4574 incs = unop(Iop_32Uto64, unop(Iop_64to32, incs));
4575 }
4576 putIReg64( R_RDI, incd );
4577 putIReg64( R_RSI, incs );
sewardjd0a12df2005-02-10 02:07:43 +00004578}
4579
sewardj85520e42005-02-19 15:22:38 +00004580static
tom5cdf4e32011-08-12 15:42:56 +00004581void dis_SCAS ( Int sz, IRTemp t_inc, Prefix pfx )
sewardj85520e42005-02-19 15:22:38 +00004582{
4583 IRType ty = szToITy(sz);
4584 IRTemp ta = newTemp(ty); /* rAX */
4585 IRTemp td = newTemp(Ity_I64); /* RDI */
4586 IRTemp tdv = newTemp(ty); /* (RDI) */
tom5cdf4e32011-08-12 15:42:56 +00004587 IRExpr *incd;
sewardj85520e42005-02-19 15:22:38 +00004588
sewardj5b470602005-02-27 13:10:48 +00004589 assign( ta, getIRegRAX(sz) );
sewardj85520e42005-02-19 15:22:38 +00004590
tom5cdf4e32011-08-12 15:42:56 +00004591 if (haveASO(pfx))
4592 assign( td, unop(Iop_32Uto64, getIReg32(R_RDI)) );
4593 else
4594 assign( td, getIReg64(R_RDI) );
sewardj85520e42005-02-19 15:22:38 +00004595
4596 assign( tdv, loadLE(ty,mkexpr(td)) );
4597
4598 setFlags_DEP1_DEP2 ( Iop_Sub8, ta, tdv, ty );
4599
tom5cdf4e32011-08-12 15:42:56 +00004600 incd = binop(Iop_Add64, mkexpr(td), mkexpr(t_inc));
4601 if (haveASO(pfx))
4602 incd = unop(Iop_32Uto64, unop(Iop_64to32, incd));
4603 putIReg64( R_RDI, incd );
sewardj85520e42005-02-19 15:22:38 +00004604}
sewardjd0a12df2005-02-10 02:07:43 +00004605
4606
4607/* Wrap the appropriate string op inside a REP/REPE/REPNE. We assume
4608 the insn is the last one in the basic block, and so emit a jump to
4609 the next insn, rather than just falling through. */
4610static
sewardjc6f970f2012-04-02 21:54:49 +00004611void dis_REP_op ( /*MOD*/DisResult* dres,
4612 AMD64Condcode cond,
tom5cdf4e32011-08-12 15:42:56 +00004613 void (*dis_OP)(Int, IRTemp, Prefix),
florian55085f82012-11-21 00:36:55 +00004614 Int sz, Addr64 rip, Addr64 rip_next, const HChar* name,
sewardj909c06d2005-02-19 22:47:41 +00004615 Prefix pfx )
sewardjd0a12df2005-02-10 02:07:43 +00004616{
4617 IRTemp t_inc = newTemp(Ity_I64);
tom5cdf4e32011-08-12 15:42:56 +00004618 IRTemp tc;
4619 IRExpr* cmp;
sewardjd0a12df2005-02-10 02:07:43 +00004620
sewardj909c06d2005-02-19 22:47:41 +00004621 /* Really we ought to inspect the override prefixes, but we don't.
4622 The following assertion catches any resulting sillyness. */
4623 vassert(pfx == clearSegBits(pfx));
4624
tom5cdf4e32011-08-12 15:42:56 +00004625 if (haveASO(pfx)) {
4626 tc = newTemp(Ity_I32); /* ECX */
4627 assign( tc, getIReg32(R_RCX) );
4628 cmp = binop(Iop_CmpEQ32, mkexpr(tc), mkU32(0));
4629 } else {
4630 tc = newTemp(Ity_I64); /* RCX */
4631 assign( tc, getIReg64(R_RCX) );
4632 cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0));
4633 }
sewardjd0a12df2005-02-10 02:07:43 +00004634
sewardjc6f970f2012-04-02 21:54:49 +00004635 stmt( IRStmt_Exit( cmp, Ijk_Boring,
4636 IRConst_U64(rip_next), OFFB_RIP ) );
sewardjd0a12df2005-02-10 02:07:43 +00004637
tom5cdf4e32011-08-12 15:42:56 +00004638 if (haveASO(pfx))
4639 putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
4640 else
4641 putIReg64(R_RCX, binop(Iop_Sub64, mkexpr(tc), mkU64(1)) );
sewardjd0a12df2005-02-10 02:07:43 +00004642
4643 dis_string_op_increment(sz, t_inc);
tom5cdf4e32011-08-12 15:42:56 +00004644 dis_OP (sz, t_inc, pfx);
sewardjd0a12df2005-02-10 02:07:43 +00004645
4646 if (cond == AMD64CondAlways) {
sewardjc6f970f2012-04-02 21:54:49 +00004647 jmp_lit(dres, Ijk_Boring, rip);
4648 vassert(dres->whatNext == Dis_StopHere);
sewardjd0a12df2005-02-10 02:07:43 +00004649 } else {
4650 stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond),
4651 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +00004652 IRConst_U64(rip),
4653 OFFB_RIP ) );
4654 jmp_lit(dres, Ijk_Boring, rip_next);
4655 vassert(dres->whatNext == Dis_StopHere);
sewardjd0a12df2005-02-10 02:07:43 +00004656 }
4657 DIP("%s%c\n", name, nameISize(sz));
4658}
sewardj32b2bbe2005-01-28 00:50:10 +00004659
4660
4661/*------------------------------------------------------------*/
4662/*--- Arithmetic, etc. ---*/
4663/*------------------------------------------------------------*/
4664
4665/* IMUL E, G. Supplied eip points to the modR/M byte. */
4666static
sewardj2e28ac42008-12-04 00:05:12 +00004667ULong dis_mul_E_G ( VexAbiInfo* vbi,
4668 Prefix pfx,
sewardj32b2bbe2005-01-28 00:50:10 +00004669 Int size,
sewardj270def42005-07-03 01:03:01 +00004670 Long delta0 )
sewardj32b2bbe2005-01-28 00:50:10 +00004671{
4672 Int alen;
4673 HChar dis_buf[50];
sewardj8c332e22005-01-28 01:36:56 +00004674 UChar rm = getUChar(delta0);
sewardj32b2bbe2005-01-28 00:50:10 +00004675 IRType ty = szToITy(size);
4676 IRTemp te = newTemp(ty);
4677 IRTemp tg = newTemp(ty);
4678 IRTemp resLo = newTemp(ty);
4679
sewardj5b470602005-02-27 13:10:48 +00004680 assign( tg, getIRegG(size, pfx, rm) );
sewardj32b2bbe2005-01-28 00:50:10 +00004681 if (epartIsReg(rm)) {
sewardj5b470602005-02-27 13:10:48 +00004682 assign( te, getIRegE(size, pfx, rm) );
sewardj32b2bbe2005-01-28 00:50:10 +00004683 } else {
sewardj2e28ac42008-12-04 00:05:12 +00004684 IRTemp addr = disAMode( &alen, vbi, pfx, delta0, dis_buf, 0 );
sewardj32b2bbe2005-01-28 00:50:10 +00004685 assign( te, loadLE(ty,mkexpr(addr)) );
4686 }
4687
4688 setFlags_MUL ( ty, te, tg, AMD64G_CC_OP_SMULB );
4689
4690 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tg) ) );
4691
sewardj5b470602005-02-27 13:10:48 +00004692 putIRegG(size, pfx, rm, mkexpr(resLo) );
sewardj32b2bbe2005-01-28 00:50:10 +00004693
4694 if (epartIsReg(rm)) {
4695 DIP("imul%c %s, %s\n", nameISize(size),
sewardj901ed122005-02-27 13:25:31 +00004696 nameIRegE(size,pfx,rm),
4697 nameIRegG(size,pfx,rm));
sewardj32b2bbe2005-01-28 00:50:10 +00004698 return 1+delta0;
4699 } else {
4700 DIP("imul%c %s, %s\n", nameISize(size),
4701 dis_buf,
sewardj901ed122005-02-27 13:25:31 +00004702 nameIRegG(size,pfx,rm));
sewardj32b2bbe2005-01-28 00:50:10 +00004703 return alen+delta0;
4704 }
4705}
4706
4707
4708/* IMUL I * E -> G. Supplied rip points to the modR/M byte. */
4709static
sewardj2e28ac42008-12-04 00:05:12 +00004710ULong dis_imul_I_E_G ( VexAbiInfo* vbi,
4711 Prefix pfx,
sewardj32b2bbe2005-01-28 00:50:10 +00004712 Int size,
sewardj270def42005-07-03 01:03:01 +00004713 Long delta,
sewardj32b2bbe2005-01-28 00:50:10 +00004714 Int litsize )
4715{
4716 Long d64;
4717 Int alen;
4718 HChar dis_buf[50];
sewardj8c332e22005-01-28 01:36:56 +00004719 UChar rm = getUChar(delta);
sewardj32b2bbe2005-01-28 00:50:10 +00004720 IRType ty = szToITy(size);
4721 IRTemp te = newTemp(ty);
4722 IRTemp tl = newTemp(ty);
4723 IRTemp resLo = newTemp(ty);
4724
sewardj85520e42005-02-19 15:22:38 +00004725 vassert(/*size == 1 ||*/ size == 2 || size == 4 || size == 8);
sewardj32b2bbe2005-01-28 00:50:10 +00004726
4727 if (epartIsReg(rm)) {
sewardj5b470602005-02-27 13:10:48 +00004728 assign(te, getIRegE(size, pfx, rm));
sewardj32b2bbe2005-01-28 00:50:10 +00004729 delta++;
4730 } else {
sewardj2e28ac42008-12-04 00:05:12 +00004731 IRTemp addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
sewardj7de0d3c2005-02-13 02:26:41 +00004732 imin(4,litsize) );
sewardj32b2bbe2005-01-28 00:50:10 +00004733 assign(te, loadLE(ty, mkexpr(addr)));
4734 delta += alen;
4735 }
4736 d64 = getSDisp(imin(4,litsize),delta);
4737 delta += imin(4,litsize);
4738
sewardj1389d4d2005-01-28 13:46:29 +00004739 d64 &= mkSizeMask(size);
sewardj32b2bbe2005-01-28 00:50:10 +00004740 assign(tl, mkU(ty,d64));
4741
4742 assign( resLo, binop( mkSizedOp(ty, Iop_Mul8), mkexpr(te), mkexpr(tl) ));
4743
4744 setFlags_MUL ( ty, te, tl, AMD64G_CC_OP_SMULB );
4745
sewardj5b470602005-02-27 13:10:48 +00004746 putIRegG(size, pfx, rm, mkexpr(resLo));
sewardj32b2bbe2005-01-28 00:50:10 +00004747
4748 DIP("imul%c $%lld, %s, %s\n",
4749 nameISize(size), d64,
sewardj5b470602005-02-27 13:10:48 +00004750 ( epartIsReg(rm) ? nameIRegE(size,pfx,rm) : dis_buf ),
4751 nameIRegG(size,pfx,rm) );
sewardj32b2bbe2005-01-28 00:50:10 +00004752 return delta;
4753}
4754
4755
sewardjfd181282010-06-14 21:29:35 +00004756/* Generate an IR sequence to do a popcount operation on the supplied
4757 IRTemp, and return a new IRTemp holding the result. 'ty' may be
4758 Ity_I16, Ity_I32 or Ity_I64 only. */
4759static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src )
4760{
4761 Int i;
4762 if (ty == Ity_I16) {
4763 IRTemp old = IRTemp_INVALID;
4764 IRTemp nyu = IRTemp_INVALID;
4765 IRTemp mask[4], shift[4];
4766 for (i = 0; i < 4; i++) {
4767 mask[i] = newTemp(ty);
4768 shift[i] = 1 << i;
4769 }
4770 assign(mask[0], mkU16(0x5555));
4771 assign(mask[1], mkU16(0x3333));
4772 assign(mask[2], mkU16(0x0F0F));
4773 assign(mask[3], mkU16(0x00FF));
4774 old = src;
4775 for (i = 0; i < 4; i++) {
4776 nyu = newTemp(ty);
4777 assign(nyu,
4778 binop(Iop_Add16,
4779 binop(Iop_And16,
4780 mkexpr(old),
4781 mkexpr(mask[i])),
4782 binop(Iop_And16,
4783 binop(Iop_Shr16, mkexpr(old), mkU8(shift[i])),
4784 mkexpr(mask[i]))));
4785 old = nyu;
4786 }
4787 return nyu;
4788 }
4789 if (ty == Ity_I32) {
4790 IRTemp old = IRTemp_INVALID;
4791 IRTemp nyu = IRTemp_INVALID;
4792 IRTemp mask[5], shift[5];
4793 for (i = 0; i < 5; i++) {
4794 mask[i] = newTemp(ty);
4795 shift[i] = 1 << i;
4796 }
4797 assign(mask[0], mkU32(0x55555555));
4798 assign(mask[1], mkU32(0x33333333));
4799 assign(mask[2], mkU32(0x0F0F0F0F));
4800 assign(mask[3], mkU32(0x00FF00FF));
4801 assign(mask[4], mkU32(0x0000FFFF));
4802 old = src;
4803 for (i = 0; i < 5; i++) {
4804 nyu = newTemp(ty);
4805 assign(nyu,
4806 binop(Iop_Add32,
4807 binop(Iop_And32,
4808 mkexpr(old),
4809 mkexpr(mask[i])),
4810 binop(Iop_And32,
4811 binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
4812 mkexpr(mask[i]))));
4813 old = nyu;
4814 }
4815 return nyu;
4816 }
4817 if (ty == Ity_I64) {
4818 IRTemp old = IRTemp_INVALID;
4819 IRTemp nyu = IRTemp_INVALID;
4820 IRTemp mask[6], shift[6];
4821 for (i = 0; i < 6; i++) {
4822 mask[i] = newTemp(ty);
4823 shift[i] = 1 << i;
4824 }
4825 assign(mask[0], mkU64(0x5555555555555555ULL));
4826 assign(mask[1], mkU64(0x3333333333333333ULL));
4827 assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL));
4828 assign(mask[3], mkU64(0x00FF00FF00FF00FFULL));
4829 assign(mask[4], mkU64(0x0000FFFF0000FFFFULL));
4830 assign(mask[5], mkU64(0x00000000FFFFFFFFULL));
4831 old = src;
4832 for (i = 0; i < 6; i++) {
4833 nyu = newTemp(ty);
4834 assign(nyu,
4835 binop(Iop_Add64,
4836 binop(Iop_And64,
4837 mkexpr(old),
4838 mkexpr(mask[i])),
4839 binop(Iop_And64,
4840 binop(Iop_Shr64, mkexpr(old), mkU8(shift[i])),
4841 mkexpr(mask[i]))));
4842 old = nyu;
4843 }
4844 return nyu;
4845 }
4846 /*NOTREACHED*/
4847 vassert(0);
4848}
4849
4850
sewardj9a660ea2010-07-29 11:34:38 +00004851/* Generate an IR sequence to do a count-leading-zeroes operation on
4852 the supplied IRTemp, and return a new IRTemp holding the result.
4853 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
4854 the argument is zero, return the number of bits in the word (the
4855 natural semantics). */
4856static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
4857{
4858 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
4859
4860 IRTemp src64 = newTemp(Ity_I64);
4861 assign(src64, widenUto64( mkexpr(src) ));
4862
4863 IRTemp src64x = newTemp(Ity_I64);
4864 assign(src64x,
4865 binop(Iop_Shl64, mkexpr(src64),
4866 mkU8(64 - 8 * sizeofIRType(ty))));
4867
4868 // Clz64 has undefined semantics when its input is zero, so
4869 // special-case around that.
4870 IRTemp res64 = newTemp(Ity_I64);
4871 assign(res64,
florian99dd03e2013-01-29 03:56:06 +00004872 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00004873 binop(Iop_CmpEQ64, mkexpr(src64x), mkU64(0)),
florian99dd03e2013-01-29 03:56:06 +00004874 mkU64(8 * sizeofIRType(ty)),
4875 unop(Iop_Clz64, mkexpr(src64x))
sewardj9a660ea2010-07-29 11:34:38 +00004876 ));
4877
4878 IRTemp res = newTemp(ty);
4879 assign(res, narrowTo(ty, mkexpr(res64)));
4880 return res;
4881}
4882
4883
sewardjcc3d2192013-03-27 11:37:33 +00004884/* Generate an IR sequence to do a count-trailing-zeroes operation on
4885 the supplied IRTemp, and return a new IRTemp holding the result.
4886 'ty' may be Ity_I16, Ity_I32 or Ity_I64 only. In the case where
4887 the argument is zero, return the number of bits in the word (the
4888 natural semantics). */
4889static IRTemp gen_TZCNT ( IRType ty, IRTemp src )
4890{
4891 vassert(ty == Ity_I64 || ty == Ity_I32 || ty == Ity_I16);
4892
4893 IRTemp src64 = newTemp(Ity_I64);
4894 assign(src64, widenUto64( mkexpr(src) ));
4895
4896 // Ctz64 has undefined semantics when its input is zero, so
4897 // special-case around that.
4898 IRTemp res64 = newTemp(Ity_I64);
4899 assign(res64,
4900 IRExpr_ITE(
4901 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0)),
4902 mkU64(8 * sizeofIRType(ty)),
4903 unop(Iop_Ctz64, mkexpr(src64))
4904 ));
4905
4906 IRTemp res = newTemp(ty);
4907 assign(res, narrowTo(ty, mkexpr(res64)));
4908 return res;
4909}
4910
4911
sewardjbcbb9de2005-03-27 02:22:32 +00004912/*------------------------------------------------------------*/
4913/*--- ---*/
4914/*--- x87 FLOATING POINT INSTRUCTIONS ---*/
4915/*--- ---*/
4916/*------------------------------------------------------------*/
4917
4918/* --- Helper functions for dealing with the register stack. --- */
4919
4920/* --- Set the emulation-warning pseudo-register. --- */
4921
4922static void put_emwarn ( IRExpr* e /* :: Ity_I32 */ )
4923{
sewardjdd40fdf2006-12-24 02:20:24 +00004924 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
florian6ef84be2012-08-26 03:20:07 +00004925 stmt( IRStmt_Put( OFFB_EMNOTE, e ) );
sewardjbcbb9de2005-03-27 02:22:32 +00004926}
sewardj8d965312005-02-25 02:48:47 +00004927
4928/* --- Produce an IRExpr* denoting a 64-bit QNaN. --- */
4929
4930static IRExpr* mkQNaN64 ( void )
4931{
4932 /* QNaN is 0 2047 1 0(51times)
4933 == 0b 11111111111b 1 0(51times)
4934 == 0x7FF8 0000 0000 0000
4935 */
4936 return IRExpr_Const(IRConst_F64i(0x7FF8000000000000ULL));
4937}
4938
4939/* --------- Get/put the top-of-stack pointer :: Ity_I32 --------- */
4940
4941static IRExpr* get_ftop ( void )
4942{
4943 return IRExpr_Get( OFFB_FTOP, Ity_I32 );
4944}
4945
4946static void put_ftop ( IRExpr* e )
4947{
sewardjdd40fdf2006-12-24 02:20:24 +00004948 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
sewardj8d965312005-02-25 02:48:47 +00004949 stmt( IRStmt_Put( OFFB_FTOP, e ) );
4950}
4951
sewardj25a85812005-05-08 23:03:48 +00004952/* --------- Get/put the C3210 bits. --------- */
4953
4954static IRExpr* /* :: Ity_I64 */ get_C3210 ( void )
4955{
4956 return IRExpr_Get( OFFB_FC3210, Ity_I64 );
4957}
4958
4959static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ )
4960{
sewardjdd40fdf2006-12-24 02:20:24 +00004961 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
sewardj25a85812005-05-08 23:03:48 +00004962 stmt( IRStmt_Put( OFFB_FC3210, e ) );
4963}
sewardjc49ce232005-02-25 13:03:03 +00004964
4965/* --------- Get/put the FPU rounding mode. --------- */
4966static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
4967{
4968 return unop(Iop_64to32, IRExpr_Get( OFFB_FPROUND, Ity_I64 ));
4969}
4970
sewardj5e205372005-05-09 02:57:08 +00004971static void put_fpround ( IRExpr* /* :: Ity_I32 */ e )
4972{
sewardjdd40fdf2006-12-24 02:20:24 +00004973 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
sewardj5e205372005-05-09 02:57:08 +00004974 stmt( IRStmt_Put( OFFB_FPROUND, unop(Iop_32Uto64,e) ) );
4975}
sewardjc49ce232005-02-25 13:03:03 +00004976
4977
4978/* --------- Synthesise a 2-bit FPU rounding mode. --------- */
4979/* Produces a value in 0 .. 3, which is encoded as per the type
4980 IRRoundingMode. Since the guest_FPROUND value is also encoded as
4981 per IRRoundingMode, we merely need to get it and mask it for
4982 safety.
4983*/
4984static IRExpr* /* :: Ity_I32 */ get_roundingmode ( void )
4985{
4986 return binop( Iop_And32, get_fpround(), mkU32(3) );
4987}
sewardj8d965312005-02-25 02:48:47 +00004988
sewardj4796d662006-02-05 16:06:26 +00004989static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
4990{
4991 return mkU32(Irrm_NEAREST);
4992}
4993
sewardj8d965312005-02-25 02:48:47 +00004994
4995/* --------- Get/set FP register tag bytes. --------- */
4996
4997/* Given i, and some expression e, generate 'ST_TAG(i) = e'. */
4998
4999static void put_ST_TAG ( Int i, IRExpr* value )
5000{
sewardjdd40fdf2006-12-24 02:20:24 +00005001 IRRegArray* descr;
5002 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_I8);
5003 descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
floriand6f38b32012-05-31 15:46:18 +00005004 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
sewardj8d965312005-02-25 02:48:47 +00005005}
5006
5007/* Given i, generate an expression yielding 'ST_TAG(i)'. This will be
5008 zero to indicate "Empty" and nonzero to indicate "NonEmpty". */
5009
5010static IRExpr* get_ST_TAG ( Int i )
5011{
sewardjdd40fdf2006-12-24 02:20:24 +00005012 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
sewardj8d965312005-02-25 02:48:47 +00005013 return IRExpr_GetI( descr, get_ftop(), i );
5014}
5015
5016
5017/* --------- Get/set FP registers. --------- */
5018
5019/* Given i, and some expression e, emit 'ST(i) = e' and set the
5020 register's tag to indicate the register is full. The previous
5021 state of the register is not checked. */
5022
5023static void put_ST_UNCHECKED ( Int i, IRExpr* value )
5024{
sewardjdd40fdf2006-12-24 02:20:24 +00005025 IRRegArray* descr;
5026 vassert(typeOfIRExpr(irsb->tyenv, value) == Ity_F64);
5027 descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
floriand6f38b32012-05-31 15:46:18 +00005028 stmt( IRStmt_PutI( mkIRPutI(descr, get_ftop(), i, value) ) );
sewardj8d965312005-02-25 02:48:47 +00005029 /* Mark the register as in-use. */
5030 put_ST_TAG(i, mkU8(1));
5031}
5032
5033/* Given i, and some expression e, emit
5034 ST(i) = is_full(i) ? NaN : e
5035 and set the tag accordingly.
5036*/
5037
5038static void put_ST ( Int i, IRExpr* value )
5039{
sewardj009230b2013-01-26 11:47:55 +00005040 put_ST_UNCHECKED(
5041 i,
florian99dd03e2013-01-29 03:56:06 +00005042 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
5043 /* non-0 means full */
5044 mkQNaN64(),
5045 /* 0 means empty */
5046 value
sewardj009230b2013-01-26 11:47:55 +00005047 )
sewardj8d965312005-02-25 02:48:47 +00005048 );
5049}
5050
5051
5052/* Given i, generate an expression yielding 'ST(i)'. */
5053
5054static IRExpr* get_ST_UNCHECKED ( Int i )
5055{
sewardjdd40fdf2006-12-24 02:20:24 +00005056 IRRegArray* descr = mkIRRegArray( OFFB_FPREGS, Ity_F64, 8 );
sewardj8d965312005-02-25 02:48:47 +00005057 return IRExpr_GetI( descr, get_ftop(), i );
5058}
5059
5060
5061/* Given i, generate an expression yielding
5062 is_full(i) ? ST(i) : NaN
5063*/
5064
5065static IRExpr* get_ST ( Int i )
5066{
5067 return
florian99dd03e2013-01-29 03:56:06 +00005068 IRExpr_ITE( binop(Iop_CmpNE8, get_ST_TAG(i), mkU8(0)),
5069 /* non-0 means full */
5070 get_ST_UNCHECKED(i),
5071 /* 0 means empty */
5072 mkQNaN64());
sewardj8d965312005-02-25 02:48:47 +00005073}
5074
5075
sewardje9c51c92014-04-30 22:50:34 +00005076/* Given i, and some expression e, and a condition cond, generate IR
5077 which has the same effect as put_ST(i,e) when cond is true and has
5078 no effect when cond is false. Given the lack of proper
5079 if-then-else in the IR, this is pretty tricky.
5080*/
5081
5082static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value )
5083{
5084 // new_tag = if cond then FULL else old_tag
5085 // new_val = if cond then (if old_tag==FULL then NaN else val)
5086 // else old_val
5087
5088 IRTemp old_tag = newTemp(Ity_I8);
5089 assign(old_tag, get_ST_TAG(i));
5090 IRTemp new_tag = newTemp(Ity_I8);
5091 assign(new_tag,
5092 IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag)));
5093
5094 IRTemp old_val = newTemp(Ity_F64);
5095 assign(old_val, get_ST_UNCHECKED(i));
5096 IRTemp new_val = newTemp(Ity_F64);
5097 assign(new_val,
5098 IRExpr_ITE(mkexpr(cond),
5099 IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)),
5100 /* non-0 means full */
5101 mkQNaN64(),
5102 /* 0 means empty */
5103 value),
5104 mkexpr(old_val)));
5105
5106 put_ST_UNCHECKED(i, mkexpr(new_val));
5107 // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So
5108 // now set it to new_tag instead.
5109 put_ST_TAG(i, mkexpr(new_tag));
5110}
5111
sewardj8d965312005-02-25 02:48:47 +00005112/* Adjust FTOP downwards by one register. */
5113
5114static void fp_push ( void )
5115{
5116 put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) );
5117}
5118
sewardje9c51c92014-04-30 22:50:34 +00005119/* Adjust FTOP downwards by one register when COND is 1:I1. Else
5120 don't change it. */
5121
5122static void maybe_fp_push ( IRTemp cond )
5123{
5124 put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) );
5125}
5126
sewardj8d965312005-02-25 02:48:47 +00005127/* Adjust FTOP upwards by one register, and mark the vacated register
5128 as empty. */
5129
5130static void fp_pop ( void )
5131{
5132 put_ST_TAG(0, mkU8(0));
5133 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
5134}
5135
sewardje9c51c92014-04-30 22:50:34 +00005136/* Set the C2 bit of the FPU status register to e[0]. Assumes that
5137 e[31:1] == 0.
5138*/
5139static void set_C2 ( IRExpr* e )
sewardj25a85812005-05-08 23:03:48 +00005140{
sewardje9c51c92014-04-30 22:50:34 +00005141 IRExpr* cleared = binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2));
5142 put_C3210( binop(Iop_Or64,
5143 cleared,
5144 binop(Iop_Shl64, e, mkU8(AMD64G_FC_SHIFT_C2))) );
5145}
5146
5147/* Generate code to check that abs(d64) < 2^63 and is finite. This is
5148 used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The
5149 test is simple, but the derivation of it is not so simple.
5150
5151 The exponent field for an IEEE754 double is 11 bits. That means it
5152 can take values 0 through 0x7FF. If the exponent has value 0x7FF,
5153 the number is either a NaN or an Infinity and so is not finite.
5154 Furthermore, a finite value of exactly 2^63 is the smallest value
5155 that has exponent value 0x43E. Hence, what we need to do is
5156 extract the exponent, ignoring the sign bit and mantissa, and check
5157 it is < 0x43E, or <= 0x43D.
5158
5159 To make this easily applicable to 32- and 64-bit targets, a
5160 roundabout approach is used. First the number is converted to I64,
5161 then the top 32 bits are taken. Shifting them right by 20 bits
5162 places the sign bit and exponent in the bottom 12 bits. Anding
5163 with 0x7FF gets rid of the sign bit, leaving just the exponent
5164 available for comparison.
5165*/
5166static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 )
5167{
5168 IRTemp i64 = newTemp(Ity_I64);
5169 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) );
5170 IRTemp exponent = newTemp(Ity_I32);
5171 assign(exponent,
5172 binop(Iop_And32,
5173 binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)),
5174 mkU32(0x7FF)));
5175 IRTemp in_range_and_finite = newTemp(Ity_I1);
5176 assign(in_range_and_finite,
5177 binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D)));
5178 return in_range_and_finite;
sewardj25a85812005-05-08 23:03:48 +00005179}
sewardj48a89d82005-05-06 11:50:13 +00005180
sewardj7c2d2822006-03-07 00:22:02 +00005181/* Invent a plausible-looking FPU status word value:
5182 ((ftop & 7) << 11) | (c3210 & 0x4700)
5183 */
5184static IRExpr* get_FPU_sw ( void )
5185{
5186 return
5187 unop(Iop_32to16,
5188 binop(Iop_Or32,
5189 binop(Iop_Shl32,
5190 binop(Iop_And32, get_ftop(), mkU32(7)),
5191 mkU8(11)),
5192 binop(Iop_And32, unop(Iop_64to32, get_C3210()),
5193 mkU32(0x4700))
5194 ));
5195}
5196
sewardj48a89d82005-05-06 11:50:13 +00005197
5198/* ------------------------------------------------------- */
5199/* Given all that stack-mangling junk, we can now go ahead
5200 and describe FP instructions.
5201*/
5202
5203/* ST(0) = ST(0) `op` mem64/32(addr)
5204 Need to check ST(0)'s tag on read, but not on write.
5205*/
5206static
florian55085f82012-11-21 00:36:55 +00005207void fp_do_op_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
sewardj48a89d82005-05-06 11:50:13 +00005208 IROp op, Bool dbl )
5209{
5210 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
5211 if (dbl) {
5212 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00005213 triop( op,
5214 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj48a89d82005-05-06 11:50:13 +00005215 get_ST(0),
5216 loadLE(Ity_F64,mkexpr(addr))
5217 ));
5218 } else {
5219 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00005220 triop( op,
5221 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj48a89d82005-05-06 11:50:13 +00005222 get_ST(0),
5223 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr)))
5224 ));
5225 }
5226}
sewardj7bc00082005-03-27 05:08:32 +00005227
5228
5229/* ST(0) = mem64/32(addr) `op` ST(0)
5230 Need to check ST(0)'s tag on read, but not on write.
5231*/
5232static
florian55085f82012-11-21 00:36:55 +00005233void fp_do_oprev_mem_ST_0 ( IRTemp addr, const HChar* op_txt, HChar* dis_buf,
sewardj7bc00082005-03-27 05:08:32 +00005234 IROp op, Bool dbl )
5235{
5236 DIP("f%s%c %s\n", op_txt, dbl?'l':'s', dis_buf);
5237 if (dbl) {
5238 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00005239 triop( op,
5240 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj7bc00082005-03-27 05:08:32 +00005241 loadLE(Ity_F64,mkexpr(addr)),
5242 get_ST(0)
5243 ));
5244 } else {
5245 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00005246 triop( op,
5247 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj7bc00082005-03-27 05:08:32 +00005248 unop(Iop_F32toF64, loadLE(Ity_F32,mkexpr(addr))),
5249 get_ST(0)
5250 ));
5251 }
5252}
sewardj37d52572005-02-25 14:22:12 +00005253
5254
5255/* ST(dst) = ST(dst) `op` ST(src).
5256 Check dst and src tags when reading but not on write.
5257*/
5258static
florian55085f82012-11-21 00:36:55 +00005259void fp_do_op_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
sewardj37d52572005-02-25 14:22:12 +00005260 Bool pop_after )
5261{
sewardj1027dc22005-02-26 01:55:02 +00005262 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
sewardj37d52572005-02-25 14:22:12 +00005263 put_ST_UNCHECKED(
5264 st_dst,
sewardj4796d662006-02-05 16:06:26 +00005265 triop( op,
5266 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5267 get_ST(st_dst),
5268 get_ST(st_src) )
sewardj37d52572005-02-25 14:22:12 +00005269 );
5270 if (pop_after)
5271 fp_pop();
5272}
5273
sewardj137015d2005-03-27 04:01:15 +00005274/* ST(dst) = ST(src) `op` ST(dst).
5275 Check dst and src tags when reading but not on write.
5276*/
5277static
florian55085f82012-11-21 00:36:55 +00005278void fp_do_oprev_ST_ST ( const HChar* op_txt, IROp op, UInt st_src, UInt st_dst,
sewardj137015d2005-03-27 04:01:15 +00005279 Bool pop_after )
5280{
5281 DIP("f%s%s st(%u), st(%u)\n", op_txt, pop_after?"p":"", st_src, st_dst );
5282 put_ST_UNCHECKED(
5283 st_dst,
sewardj4796d662006-02-05 16:06:26 +00005284 triop( op,
5285 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5286 get_ST(st_src),
5287 get_ST(st_dst) )
sewardj137015d2005-03-27 04:01:15 +00005288 );
5289 if (pop_after)
5290 fp_pop();
5291}
sewardjc49ce232005-02-25 13:03:03 +00005292
5293/* %rflags(Z,P,C) = UCOMI( st(0), st(i) ) */
5294static void fp_do_ucomi_ST0_STi ( UInt i, Bool pop_after )
5295{
sewardj1027dc22005-02-26 01:55:02 +00005296 DIP("fucomi%s %%st(0),%%st(%u)\n", pop_after ? "p" : "", i);
sewardjc49ce232005-02-25 13:03:03 +00005297 /* This is a bit of a hack (and isn't really right). It sets
5298 Z,P,C,O correctly, but forces A and S to zero, whereas the Intel
5299 documentation implies A and S are unchanged.
5300 */
5301 /* It's also fishy in that it is used both for COMIP and
5302 UCOMIP, and they aren't the same (although similar). */
5303 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
5304 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
5305 stmt( IRStmt_Put(
5306 OFFB_CC_DEP1,
5307 binop( Iop_And64,
5308 unop( Iop_32Uto64,
5309 binop(Iop_CmpF64, get_ST(0), get_ST(i))),
5310 mkU64(0x45)
5311 )));
5312 if (pop_after)
5313 fp_pop();
5314}
sewardj8d965312005-02-25 02:48:47 +00005315
5316
sewardjb707d102007-07-11 22:49:26 +00005317/* returns
5318 32to16( if e32 <s -32768 || e32 >s 32767 then -32768 else e32 )
5319*/
5320static IRExpr* x87ishly_qnarrow_32_to_16 ( IRExpr* e32 )
5321{
5322 IRTemp t32 = newTemp(Ity_I32);
5323 assign( t32, e32 );
5324 return
florian99dd03e2013-01-29 03:56:06 +00005325 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00005326 binop(Iop_CmpLT64U,
5327 unop(Iop_32Uto64,
5328 binop(Iop_Add32, mkexpr(t32), mkU32(32768))),
5329 mkU64(65536)),
florian99dd03e2013-01-29 03:56:06 +00005330 unop(Iop_32to16, mkexpr(t32)),
5331 mkU16( 0x8000 ) );
sewardjb707d102007-07-11 22:49:26 +00005332}
5333
5334
sewardj8d965312005-02-25 02:48:47 +00005335static
sewardjb4fd2e72005-03-23 13:34:11 +00005336ULong dis_FPU ( /*OUT*/Bool* decode_ok,
sewardj2e28ac42008-12-04 00:05:12 +00005337 VexAbiInfo* vbi, Prefix pfx, Long delta )
sewardj8d965312005-02-25 02:48:47 +00005338{
5339 Int len;
5340 UInt r_src, r_dst;
5341 HChar dis_buf[50];
5342 IRTemp t1, t2;
5343
5344 /* On entry, delta points at the second byte of the insn (the modrm
5345 byte).*/
5346 UChar first_opcode = getUChar(delta-1);
5347 UChar modrm = getUChar(delta+0);
5348
sewardj37d52572005-02-25 14:22:12 +00005349 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD8 opcodes +-+-+-+-+-+-+-+ */
5350
5351 if (first_opcode == 0xD8) {
5352 if (modrm < 0xC0) {
5353
5354 /* bits 5,4,3 are an opcode extension, and the modRM also
5355 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00005356 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj7bc00082005-03-27 05:08:32 +00005357 delta += len;
sewardj37d52572005-02-25 14:22:12 +00005358
sewardj901ed122005-02-27 13:25:31 +00005359 switch (gregLO3ofRM(modrm)) {
sewardj37d52572005-02-25 14:22:12 +00005360
sewardj48a89d82005-05-06 11:50:13 +00005361 case 0: /* FADD single-real */
5362 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, False );
5363 break;
5364
sewardje6939f02005-05-07 01:01:24 +00005365 case 1: /* FMUL single-real */
5366 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, False );
5367 break;
5368
sewardjdb855502012-05-21 21:51:36 +00005369 case 2: /* FCOM single-real */
5370 DIP("fcoms %s\n", dis_buf);
5371 /* This forces C1 to zero, which isn't right. */
5372 /* The AMD documentation suggests that forcing C1 to
5373 zero is correct (Eliot Moss) */
5374 put_C3210(
5375 unop( Iop_32Uto64,
5376 binop( Iop_And32,
5377 binop(Iop_Shl32,
5378 binop(Iop_CmpF64,
5379 get_ST(0),
5380 unop(Iop_F32toF64,
5381 loadLE(Ity_F32,mkexpr(addr)))),
5382 mkU8(8)),
5383 mkU32(0x4500)
5384 )));
5385 break;
5386
5387 case 3: /* FCOMP single-real */
5388 /* The AMD documentation suggests that forcing C1 to
5389 zero is correct (Eliot Moss) */
5390 DIP("fcomps %s\n", dis_buf);
5391 /* This forces C1 to zero, which isn't right. */
5392 put_C3210(
5393 unop( Iop_32Uto64,
5394 binop( Iop_And32,
5395 binop(Iop_Shl32,
5396 binop(Iop_CmpF64,
5397 get_ST(0),
5398 unop(Iop_F32toF64,
5399 loadLE(Ity_F32,mkexpr(addr)))),
5400 mkU8(8)),
5401 mkU32(0x4500)
5402 )));
5403 fp_pop();
5404 break;
sewardje6939f02005-05-07 01:01:24 +00005405
5406 case 4: /* FSUB single-real */
5407 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, False );
5408 break;
sewardj7bc00082005-03-27 05:08:32 +00005409
5410 case 5: /* FSUBR single-real */
5411 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, False );
5412 break;
5413
sewardje6939f02005-05-07 01:01:24 +00005414 case 6: /* FDIV single-real */
5415 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, False );
5416 break;
5417
5418 case 7: /* FDIVR single-real */
5419 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, False );
5420 break;
sewardj37d52572005-02-25 14:22:12 +00005421
5422 default:
sewardj901ed122005-02-27 13:25:31 +00005423 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardj37d52572005-02-25 14:22:12 +00005424 vex_printf("first_opcode == 0xD8\n");
5425 goto decode_fail;
5426 }
5427 } else {
5428 delta++;
5429 switch (modrm) {
5430
5431 case 0xC0 ... 0xC7: /* FADD %st(?),%st(0) */
5432 fp_do_op_ST_ST ( "add", Iop_AddF64, modrm - 0xC0, 0, False );
5433 break;
5434
sewardj137015d2005-03-27 04:01:15 +00005435 case 0xC8 ... 0xCF: /* FMUL %st(?),%st(0) */
5436 fp_do_op_ST_ST ( "mul", Iop_MulF64, modrm - 0xC8, 0, False );
5437 break;
5438
sewardj1859ecd2007-02-23 08:48:22 +00005439 /* Dunno if this is right */
5440 case 0xD0 ... 0xD7: /* FCOM %st(?),%st(0) */
5441 r_dst = (UInt)modrm - 0xD0;
5442 DIP("fcom %%st(0),%%st(%d)\n", r_dst);
5443 /* This forces C1 to zero, which isn't right. */
5444 put_C3210(
5445 unop(Iop_32Uto64,
5446 binop( Iop_And32,
5447 binop(Iop_Shl32,
5448 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5449 mkU8(8)),
5450 mkU32(0x4500)
5451 )));
5452 break;
5453
sewardj90e2e4b2007-05-04 09:41:24 +00005454 /* Dunno if this is right */
5455 case 0xD8 ... 0xDF: /* FCOMP %st(?),%st(0) */
5456 r_dst = (UInt)modrm - 0xD8;
5457 DIP("fcomp %%st(0),%%st(%d)\n", r_dst);
5458 /* This forces C1 to zero, which isn't right. */
5459 put_C3210(
5460 unop(Iop_32Uto64,
5461 binop( Iop_And32,
5462 binop(Iop_Shl32,
5463 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
5464 mkU8(8)),
5465 mkU32(0x4500)
5466 )));
5467 fp_pop();
5468 break;
5469
sewardj137015d2005-03-27 04:01:15 +00005470 case 0xE0 ... 0xE7: /* FSUB %st(?),%st(0) */
5471 fp_do_op_ST_ST ( "sub", Iop_SubF64, modrm - 0xE0, 0, False );
5472 break;
5473
sewardje6939f02005-05-07 01:01:24 +00005474 case 0xE8 ... 0xEF: /* FSUBR %st(?),%st(0) */
5475 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, modrm - 0xE8, 0, False );
5476 break;
sewardj137015d2005-03-27 04:01:15 +00005477
5478 case 0xF0 ... 0xF7: /* FDIV %st(?),%st(0) */
5479 fp_do_op_ST_ST ( "div", Iop_DivF64, modrm - 0xF0, 0, False );
5480 break;
5481
sewardj48a89d82005-05-06 11:50:13 +00005482 case 0xF8 ... 0xFF: /* FDIVR %st(?),%st(0) */
5483 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, modrm - 0xF8, 0, False );
5484 break;
sewardj37d52572005-02-25 14:22:12 +00005485
5486 default:
5487 goto decode_fail;
5488 }
5489 }
5490 }
sewardj8d965312005-02-25 02:48:47 +00005491
5492 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xD9 opcodes +-+-+-+-+-+-+-+ */
sewardj37d52572005-02-25 14:22:12 +00005493 else
sewardj8d965312005-02-25 02:48:47 +00005494 if (first_opcode == 0xD9) {
5495 if (modrm < 0xC0) {
5496
5497 /* bits 5,4,3 are an opcode extension, and the modRM also
5498 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00005499 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8d965312005-02-25 02:48:47 +00005500 delta += len;
5501
sewardj901ed122005-02-27 13:25:31 +00005502 switch (gregLO3ofRM(modrm)) {
sewardj8d965312005-02-25 02:48:47 +00005503
sewardjc49ce232005-02-25 13:03:03 +00005504 case 0: /* FLD single-real */
5505 DIP("flds %s\n", dis_buf);
5506 fp_push();
5507 put_ST(0, unop(Iop_F32toF64,
5508 loadLE(Ity_F32, mkexpr(addr))));
5509 break;
5510
5511 case 2: /* FST single-real */
5512 DIP("fsts %s\n", dis_buf);
5513 storeLE(mkexpr(addr),
5514 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5515 break;
5516
5517 case 3: /* FSTP single-real */
5518 DIP("fstps %s\n", dis_buf);
5519 storeLE(mkexpr(addr),
5520 binop(Iop_F64toF32, get_roundingmode(), get_ST(0)));
5521 fp_pop();
5522 break;
5523
sewardj4017a3b2005-06-13 12:17:27 +00005524 case 4: { /* FLDENV m28 */
5525 /* Uses dirty helper:
florian6ef84be2012-08-26 03:20:07 +00005526 VexEmNote amd64g_do_FLDENV ( VexGuestX86State*, HWord ) */
sewardj4017a3b2005-06-13 12:17:27 +00005527 IRTemp ew = newTemp(Ity_I32);
5528 IRTemp w64 = newTemp(Ity_I64);
5529 IRDirty* d = unsafeIRDirty_0_N (
5530 0/*regparms*/,
5531 "amd64g_dirtyhelper_FLDENV",
5532 &amd64g_dirtyhelper_FLDENV,
florian90419562013-08-15 20:54:52 +00005533 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
sewardj4017a3b2005-06-13 12:17:27 +00005534 );
sewardj74142b82013-08-08 10:28:59 +00005535 d->tmp = w64;
sewardj4017a3b2005-06-13 12:17:27 +00005536 /* declare we're reading memory */
5537 d->mFx = Ifx_Read;
5538 d->mAddr = mkexpr(addr);
5539 d->mSize = 28;
5540
5541 /* declare we're writing guest state */
5542 d->nFxState = 4;
sewardjc9069f22012-06-01 16:09:50 +00005543 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj4017a3b2005-06-13 12:17:27 +00005544
5545 d->fxState[0].fx = Ifx_Write;
5546 d->fxState[0].offset = OFFB_FTOP;
5547 d->fxState[0].size = sizeof(UInt);
5548
5549 d->fxState[1].fx = Ifx_Write;
5550 d->fxState[1].offset = OFFB_FPTAGS;
5551 d->fxState[1].size = 8 * sizeof(UChar);
5552
5553 d->fxState[2].fx = Ifx_Write;
5554 d->fxState[2].offset = OFFB_FPROUND;
5555 d->fxState[2].size = sizeof(ULong);
5556
5557 d->fxState[3].fx = Ifx_Write;
5558 d->fxState[3].offset = OFFB_FC3210;
5559 d->fxState[3].size = sizeof(ULong);
5560
5561 stmt( IRStmt_Dirty(d) );
5562
5563 /* ew contains any emulation warning we may need to
5564 issue. If needed, side-exit to the next insn,
5565 reporting the warning, so that Valgrind's dispatcher
5566 sees the warning. */
sewardjcc3d2192013-03-27 11:37:33 +00005567 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
sewardj4017a3b2005-06-13 12:17:27 +00005568 put_emwarn( mkexpr(ew) );
5569 stmt(
5570 IRStmt_Exit(
5571 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5572 Ijk_EmWarn,
sewardjc6f970f2012-04-02 21:54:49 +00005573 IRConst_U64( guest_RIP_bbstart+delta ),
5574 OFFB_RIP
sewardj4017a3b2005-06-13 12:17:27 +00005575 )
5576 );
5577
5578 DIP("fldenv %s\n", dis_buf);
5579 break;
5580 }
sewardj5e205372005-05-09 02:57:08 +00005581
5582 case 5: {/* FLDCW */
5583 /* The only thing we observe in the control word is the
5584 rounding mode. Therefore, pass the 16-bit value
5585 (x87 native-format control word) to a clean helper,
5586 getting back a 64-bit value, the lower half of which
5587 is the FPROUND value to store, and the upper half of
5588 which is the emulation-warning token which may be
5589 generated.
5590 */
5591 /* ULong amd64h_check_fldcw ( ULong ); */
5592 IRTemp t64 = newTemp(Ity_I64);
5593 IRTemp ew = newTemp(Ity_I32);
5594 DIP("fldcw %s\n", dis_buf);
5595 assign( t64, mkIRExprCCall(
5596 Ity_I64, 0/*regparms*/,
5597 "amd64g_check_fldcw",
5598 &amd64g_check_fldcw,
5599 mkIRExprVec_1(
5600 unop( Iop_16Uto64,
5601 loadLE(Ity_I16, mkexpr(addr)))
5602 )
5603 )
5604 );
5605
5606 put_fpround( unop(Iop_64to32, mkexpr(t64)) );
5607 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
5608 put_emwarn( mkexpr(ew) );
5609 /* Finally, if an emulation warning was reported,
5610 side-exit to the next insn, reporting the warning,
5611 so that Valgrind's dispatcher sees the warning. */
5612 stmt(
5613 IRStmt_Exit(
5614 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
5615 Ijk_EmWarn,
sewardjc6f970f2012-04-02 21:54:49 +00005616 IRConst_U64( guest_RIP_bbstart+delta ),
5617 OFFB_RIP
sewardj5e205372005-05-09 02:57:08 +00005618 )
5619 );
5620 break;
5621 }
5622
sewardj4017a3b2005-06-13 12:17:27 +00005623 case 6: { /* FNSTENV m28 */
5624 /* Uses dirty helper:
5625 void amd64g_do_FSTENV ( VexGuestAMD64State*, HWord ) */
5626 IRDirty* d = unsafeIRDirty_0_N (
5627 0/*regparms*/,
5628 "amd64g_dirtyhelper_FSTENV",
5629 &amd64g_dirtyhelper_FSTENV,
florian90419562013-08-15 20:54:52 +00005630 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
sewardj4017a3b2005-06-13 12:17:27 +00005631 );
sewardj4017a3b2005-06-13 12:17:27 +00005632 /* declare we're writing memory */
5633 d->mFx = Ifx_Write;
5634 d->mAddr = mkexpr(addr);
5635 d->mSize = 28;
5636
5637 /* declare we're reading guest state */
5638 d->nFxState = 4;
sewardjc9069f22012-06-01 16:09:50 +00005639 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj4017a3b2005-06-13 12:17:27 +00005640
5641 d->fxState[0].fx = Ifx_Read;
5642 d->fxState[0].offset = OFFB_FTOP;
5643 d->fxState[0].size = sizeof(UInt);
5644
5645 d->fxState[1].fx = Ifx_Read;
5646 d->fxState[1].offset = OFFB_FPTAGS;
5647 d->fxState[1].size = 8 * sizeof(UChar);
5648
5649 d->fxState[2].fx = Ifx_Read;
5650 d->fxState[2].offset = OFFB_FPROUND;
5651 d->fxState[2].size = sizeof(ULong);
5652
5653 d->fxState[3].fx = Ifx_Read;
5654 d->fxState[3].offset = OFFB_FC3210;
5655 d->fxState[3].size = sizeof(ULong);
5656
5657 stmt( IRStmt_Dirty(d) );
5658
5659 DIP("fnstenv %s\n", dis_buf);
5660 break;
5661 }
sewardj5e205372005-05-09 02:57:08 +00005662
5663 case 7: /* FNSTCW */
5664 /* Fake up a native x87 FPU control word. The only
5665 thing it depends on is FPROUND[1:0], so call a clean
5666 helper to cook it up. */
sewardj4017a3b2005-06-13 12:17:27 +00005667 /* ULong amd64g_create_fpucw ( ULong fpround ) */
sewardj5e205372005-05-09 02:57:08 +00005668 DIP("fnstcw %s\n", dis_buf);
5669 storeLE(
5670 mkexpr(addr),
5671 unop( Iop_64to16,
5672 mkIRExprCCall(
5673 Ity_I64, 0/*regp*/,
5674 "amd64g_create_fpucw", &amd64g_create_fpucw,
5675 mkIRExprVec_1( unop(Iop_32Uto64, get_fpround()) )
5676 )
5677 )
5678 );
5679 break;
sewardj8d965312005-02-25 02:48:47 +00005680
5681 default:
sewardj901ed122005-02-27 13:25:31 +00005682 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardj8d965312005-02-25 02:48:47 +00005683 vex_printf("first_opcode == 0xD9\n");
5684 goto decode_fail;
5685 }
5686
5687 } else {
5688 delta++;
5689 switch (modrm) {
5690
sewardjc49ce232005-02-25 13:03:03 +00005691 case 0xC0 ... 0xC7: /* FLD %st(?) */
5692 r_src = (UInt)modrm - 0xC0;
sewardj1027dc22005-02-26 01:55:02 +00005693 DIP("fld %%st(%u)\n", r_src);
sewardjc49ce232005-02-25 13:03:03 +00005694 t1 = newTemp(Ity_F64);
5695 assign(t1, get_ST(r_src));
5696 fp_push();
5697 put_ST(0, mkexpr(t1));
5698 break;
sewardj8d965312005-02-25 02:48:47 +00005699
5700 case 0xC8 ... 0xCF: /* FXCH %st(?) */
5701 r_src = (UInt)modrm - 0xC8;
sewardj1027dc22005-02-26 01:55:02 +00005702 DIP("fxch %%st(%u)\n", r_src);
sewardj8d965312005-02-25 02:48:47 +00005703 t1 = newTemp(Ity_F64);
5704 t2 = newTemp(Ity_F64);
5705 assign(t1, get_ST(0));
5706 assign(t2, get_ST(r_src));
5707 put_ST_UNCHECKED(0, mkexpr(t2));
5708 put_ST_UNCHECKED(r_src, mkexpr(t1));
5709 break;
5710
5711 case 0xE0: /* FCHS */
5712 DIP("fchs\n");
5713 put_ST_UNCHECKED(0, unop(Iop_NegF64, get_ST(0)));
5714 break;
5715
sewardj137015d2005-03-27 04:01:15 +00005716 case 0xE1: /* FABS */
5717 DIP("fabs\n");
5718 put_ST_UNCHECKED(0, unop(Iop_AbsF64, get_ST(0)));
5719 break;
5720
sewardj4f9847d2005-07-25 11:58:34 +00005721 case 0xE5: { /* FXAM */
5722 /* This is an interesting one. It examines %st(0),
5723 regardless of whether the tag says it's empty or not.
5724 Here, just pass both the tag (in our format) and the
5725 value (as a double, actually a ULong) to a helper
5726 function. */
5727 IRExpr** args
5728 = mkIRExprVec_2( unop(Iop_8Uto64, get_ST_TAG(0)),
5729 unop(Iop_ReinterpF64asI64,
5730 get_ST_UNCHECKED(0)) );
5731 put_C3210(mkIRExprCCall(
5732 Ity_I64,
5733 0/*regparm*/,
5734 "amd64g_calculate_FXAM", &amd64g_calculate_FXAM,
5735 args
5736 ));
5737 DIP("fxam\n");
5738 break;
5739 }
sewardjc49ce232005-02-25 13:03:03 +00005740
5741 case 0xE8: /* FLD1 */
5742 DIP("fld1\n");
5743 fp_push();
5744 /* put_ST(0, IRExpr_Const(IRConst_F64(1.0))); */
5745 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff0000000000000ULL)));
5746 break;
5747
sewardj6847d8c2005-05-12 19:21:55 +00005748 case 0xE9: /* FLDL2T */
5749 DIP("fldl2t\n");
5750 fp_push();
5751 /* put_ST(0, IRExpr_Const(IRConst_F64(3.32192809488736234781))); */
5752 put_ST(0, IRExpr_Const(IRConst_F64i(0x400a934f0979a371ULL)));
5753 break;
5754
5755 case 0xEA: /* FLDL2E */
5756 DIP("fldl2e\n");
5757 fp_push();
5758 /* put_ST(0, IRExpr_Const(IRConst_F64(1.44269504088896340739))); */
5759 put_ST(0, IRExpr_Const(IRConst_F64i(0x3ff71547652b82feULL)));
5760 break;
5761
5762 case 0xEB: /* FLDPI */
5763 DIP("fldpi\n");
5764 fp_push();
5765 /* put_ST(0, IRExpr_Const(IRConst_F64(3.14159265358979323851))); */
5766 put_ST(0, IRExpr_Const(IRConst_F64i(0x400921fb54442d18ULL)));
5767 break;
5768
5769 case 0xEC: /* FLDLG2 */
5770 DIP("fldlg2\n");
5771 fp_push();
5772 /* put_ST(0, IRExpr_Const(IRConst_F64(0.301029995663981143))); */
5773 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fd34413509f79ffULL)));
5774 break;
5775
5776 case 0xED: /* FLDLN2 */
5777 DIP("fldln2\n");
5778 fp_push();
5779 /* put_ST(0, IRExpr_Const(IRConst_F64(0.69314718055994530942))); */
5780 put_ST(0, IRExpr_Const(IRConst_F64i(0x3fe62e42fefa39efULL)));
5781 break;
sewardjc49ce232005-02-25 13:03:03 +00005782
5783 case 0xEE: /* FLDZ */
5784 DIP("fldz\n");
5785 fp_push();
5786 /* put_ST(0, IRExpr_Const(IRConst_F64(0.0))); */
5787 put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
5788 break;
5789
sewardj25a85812005-05-08 23:03:48 +00005790 case 0xF0: /* F2XM1 */
5791 DIP("f2xm1\n");
sewardj4796d662006-02-05 16:06:26 +00005792 put_ST_UNCHECKED(0,
5793 binop(Iop_2xm1F64,
5794 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5795 get_ST(0)));
sewardj25a85812005-05-08 23:03:48 +00005796 break;
5797
5798 case 0xF1: /* FYL2X */
5799 DIP("fyl2x\n");
sewardj4796d662006-02-05 16:06:26 +00005800 put_ST_UNCHECKED(1,
5801 triop(Iop_Yl2xF64,
5802 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5803 get_ST(1),
5804 get_ST(0)));
sewardj25a85812005-05-08 23:03:48 +00005805 fp_pop();
5806 break;
5807
sewardje9c51c92014-04-30 22:50:34 +00005808 case 0xF2: { /* FPTAN */
5809 DIP("fptan\n");
5810 IRTemp argD = newTemp(Ity_F64);
5811 assign(argD, get_ST(0));
5812 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
5813 IRTemp resD = newTemp(Ity_F64);
5814 assign(resD,
5815 IRExpr_ITE(
5816 mkexpr(argOK),
5817 binop(Iop_TanF64,
5818 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5819 mkexpr(argD)),
5820 mkexpr(argD))
5821 );
5822 put_ST_UNCHECKED(0, mkexpr(resD));
5823 /* Conditionally push 1.0 on the stack, if the arg is
5824 in range */
5825 maybe_fp_push(argOK);
5826 maybe_put_ST(argOK, 0,
5827 IRExpr_Const(IRConst_F64(1.0)));
5828 set_C2( binop(Iop_Xor64,
5829 unop(Iop_1Uto64, mkexpr(argOK)),
5830 mkU64(1)) );
sewardj5e205372005-05-09 02:57:08 +00005831 break;
sewardje9c51c92014-04-30 22:50:34 +00005832 }
sewardj25a85812005-05-08 23:03:48 +00005833
5834 case 0xF3: /* FPATAN */
5835 DIP("fpatan\n");
sewardj4796d662006-02-05 16:06:26 +00005836 put_ST_UNCHECKED(1,
5837 triop(Iop_AtanF64,
5838 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5839 get_ST(1),
5840 get_ST(0)));
sewardj25a85812005-05-08 23:03:48 +00005841 fp_pop();
5842 break;
5843
sewardj879cee02006-03-07 01:15:50 +00005844 case 0xF4: { /* FXTRACT */
5845 IRTemp argF = newTemp(Ity_F64);
5846 IRTemp sigF = newTemp(Ity_F64);
5847 IRTemp expF = newTemp(Ity_F64);
5848 IRTemp argI = newTemp(Ity_I64);
5849 IRTemp sigI = newTemp(Ity_I64);
5850 IRTemp expI = newTemp(Ity_I64);
5851 DIP("fxtract\n");
5852 assign( argF, get_ST(0) );
5853 assign( argI, unop(Iop_ReinterpF64asI64, mkexpr(argF)));
5854 assign( sigI,
5855 mkIRExprCCall(
5856 Ity_I64, 0/*regparms*/,
5857 "x86amd64g_calculate_FXTRACT",
5858 &x86amd64g_calculate_FXTRACT,
5859 mkIRExprVec_2( mkexpr(argI),
5860 mkIRExpr_HWord(0)/*sig*/ ))
5861 );
5862 assign( expI,
5863 mkIRExprCCall(
5864 Ity_I64, 0/*regparms*/,
5865 "x86amd64g_calculate_FXTRACT",
5866 &x86amd64g_calculate_FXTRACT,
5867 mkIRExprVec_2( mkexpr(argI),
5868 mkIRExpr_HWord(1)/*exp*/ ))
5869 );
5870 assign( sigF, unop(Iop_ReinterpI64asF64, mkexpr(sigI)) );
5871 assign( expF, unop(Iop_ReinterpI64asF64, mkexpr(expI)) );
5872 /* exponent */
5873 put_ST_UNCHECKED(0, mkexpr(expF) );
5874 fp_push();
5875 /* significand */
5876 put_ST(0, mkexpr(sigF) );
5877 break;
5878 }
5879
sewardj4970e4e2008-10-11 10:07:55 +00005880 case 0xF5: { /* FPREM1 -- IEEE compliant */
5881 IRTemp a1 = newTemp(Ity_F64);
5882 IRTemp a2 = newTemp(Ity_F64);
5883 DIP("fprem1\n");
5884 /* Do FPREM1 twice, once to get the remainder, and once
5885 to get the C3210 flag values. */
5886 assign( a1, get_ST(0) );
5887 assign( a2, get_ST(1) );
5888 put_ST_UNCHECKED(0,
5889 triop(Iop_PRem1F64,
5890 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5891 mkexpr(a1),
5892 mkexpr(a2)));
5893 put_C3210(
5894 unop(Iop_32Uto64,
5895 triop(Iop_PRem1C3210F64,
5896 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5897 mkexpr(a1),
5898 mkexpr(a2)) ));
5899 break;
5900 }
sewardj6847d8c2005-05-12 19:21:55 +00005901
5902 case 0xF7: /* FINCSTP */
5903 DIP("fincstp\n");
5904 put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
5905 break;
5906
sewardjf4c803b2006-09-11 11:07:34 +00005907 case 0xF8: { /* FPREM -- not IEEE compliant */
5908 IRTemp a1 = newTemp(Ity_F64);
5909 IRTemp a2 = newTemp(Ity_F64);
5910 DIP("fprem\n");
5911 /* Do FPREM twice, once to get the remainder, and once
5912 to get the C3210 flag values. */
5913 assign( a1, get_ST(0) );
5914 assign( a2, get_ST(1) );
5915 put_ST_UNCHECKED(0,
5916 triop(Iop_PRemF64,
5917 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5918 mkexpr(a1),
5919 mkexpr(a2)));
5920 put_C3210(
5921 unop(Iop_32Uto64,
5922 triop(Iop_PRemC3210F64,
5923 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5924 mkexpr(a1),
5925 mkexpr(a2)) ));
5926 break;
5927 }
5928
sewardj5e205372005-05-09 02:57:08 +00005929 case 0xF9: /* FYL2XP1 */
5930 DIP("fyl2xp1\n");
sewardj4796d662006-02-05 16:06:26 +00005931 put_ST_UNCHECKED(1,
5932 triop(Iop_Yl2xp1F64,
5933 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5934 get_ST(1),
5935 get_ST(0)));
sewardj5e205372005-05-09 02:57:08 +00005936 fp_pop();
5937 break;
sewardje6939f02005-05-07 01:01:24 +00005938
5939 case 0xFA: /* FSQRT */
5940 DIP("fsqrt\n");
sewardj4796d662006-02-05 16:06:26 +00005941 put_ST_UNCHECKED(0,
5942 binop(Iop_SqrtF64,
5943 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5944 get_ST(0)));
sewardje6939f02005-05-07 01:01:24 +00005945 break;
5946
sewardj25a85812005-05-08 23:03:48 +00005947 case 0xFB: { /* FSINCOS */
sewardj25a85812005-05-08 23:03:48 +00005948 DIP("fsincos\n");
sewardje9c51c92014-04-30 22:50:34 +00005949 IRTemp argD = newTemp(Ity_F64);
5950 assign(argD, get_ST(0));
5951 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
5952 IRTemp resD = newTemp(Ity_F64);
5953 assign(resD,
5954 IRExpr_ITE(
5955 mkexpr(argOK),
5956 binop(Iop_SinF64,
5957 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5958 mkexpr(argD)),
5959 mkexpr(argD))
5960 );
5961 put_ST_UNCHECKED(0, mkexpr(resD));
5962 /* Conditionally push the cos value on the stack, if
5963 the arg is in range */
5964 maybe_fp_push(argOK);
5965 maybe_put_ST(argOK, 0,
5966 binop(Iop_CosF64,
sewardj4796d662006-02-05 16:06:26 +00005967 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardje9c51c92014-04-30 22:50:34 +00005968 mkexpr(argD)));
5969 set_C2( binop(Iop_Xor64,
5970 unop(Iop_1Uto64, mkexpr(argOK)),
5971 mkU64(1)) );
sewardj25a85812005-05-08 23:03:48 +00005972 break;
5973 }
5974
5975 case 0xFC: /* FRNDINT */
5976 DIP("frndint\n");
5977 put_ST_UNCHECKED(0,
sewardjb183b852006-02-03 16:08:03 +00005978 binop(Iop_RoundF64toInt, get_roundingmode(), get_ST(0)) );
sewardj25a85812005-05-08 23:03:48 +00005979 break;
5980
5981 case 0xFD: /* FSCALE */
5982 DIP("fscale\n");
sewardj4796d662006-02-05 16:06:26 +00005983 put_ST_UNCHECKED(0,
5984 triop(Iop_ScaleF64,
5985 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
5986 get_ST(0),
5987 get_ST(1)));
sewardj25a85812005-05-08 23:03:48 +00005988 break;
5989
sewardje9c51c92014-04-30 22:50:34 +00005990 case 0xFE: /* FSIN */
5991 case 0xFF: { /* FCOS */
5992 Bool isSIN = modrm == 0xFE;
5993 DIP("%s\n", isSIN ? "fsin" : "fcos");
5994 IRTemp argD = newTemp(Ity_F64);
5995 assign(argD, get_ST(0));
5996 IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD);
5997 IRTemp resD = newTemp(Ity_F64);
5998 assign(resD,
5999 IRExpr_ITE(
6000 mkexpr(argOK),
6001 binop(isSIN ? Iop_SinF64 : Iop_CosF64,
6002 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
6003 mkexpr(argD)),
6004 mkexpr(argD))
6005 );
6006 put_ST_UNCHECKED(0, mkexpr(resD));
6007 set_C2( binop(Iop_Xor64,
6008 unop(Iop_1Uto64, mkexpr(argOK)),
6009 mkU64(1)) );
sewardj25a85812005-05-08 23:03:48 +00006010 break;
sewardje9c51c92014-04-30 22:50:34 +00006011 }
sewardj8d965312005-02-25 02:48:47 +00006012
6013 default:
6014 goto decode_fail;
6015 }
6016 }
6017 }
6018
6019 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDA opcodes +-+-+-+-+-+-+-+ */
6020 else
6021 if (first_opcode == 0xDA) {
6022
6023 if (modrm < 0xC0) {
6024
6025 /* bits 5,4,3 are an opcode extension, and the modRM also
6026 specifies an address. */
sewardj6847d8c2005-05-12 19:21:55 +00006027 IROp fop;
sewardj2e28ac42008-12-04 00:05:12 +00006028 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8d965312005-02-25 02:48:47 +00006029 delta += len;
sewardj901ed122005-02-27 13:25:31 +00006030 switch (gregLO3ofRM(modrm)) {
sewardj8d965312005-02-25 02:48:47 +00006031
sewardj6847d8c2005-05-12 19:21:55 +00006032 case 0: /* FIADD m32int */ /* ST(0) += m32int */
6033 DIP("fiaddl %s\n", dis_buf);
6034 fop = Iop_AddF64;
6035 goto do_fop_m32;
6036
6037 case 1: /* FIMUL m32int */ /* ST(0) *= m32int */
6038 DIP("fimull %s\n", dis_buf);
6039 fop = Iop_MulF64;
6040 goto do_fop_m32;
6041
6042 case 4: /* FISUB m32int */ /* ST(0) -= m32int */
6043 DIP("fisubl %s\n", dis_buf);
6044 fop = Iop_SubF64;
6045 goto do_fop_m32;
6046
6047 case 5: /* FISUBR m32int */ /* ST(0) = m32int - ST(0) */
6048 DIP("fisubrl %s\n", dis_buf);
6049 fop = Iop_SubF64;
6050 goto do_foprev_m32;
6051
6052 case 6: /* FIDIV m32int */ /* ST(0) /= m32int */
6053 DIP("fisubl %s\n", dis_buf);
6054 fop = Iop_DivF64;
6055 goto do_fop_m32;
6056
6057 case 7: /* FIDIVR m32int */ /* ST(0) = m32int / ST(0) */
6058 DIP("fidivrl %s\n", dis_buf);
6059 fop = Iop_DivF64;
6060 goto do_foprev_m32;
6061
6062 do_fop_m32:
6063 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00006064 triop(fop,
6065 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj6847d8c2005-05-12 19:21:55 +00006066 get_ST(0),
sewardj6c299f32009-12-31 18:00:12 +00006067 unop(Iop_I32StoF64,
sewardj6847d8c2005-05-12 19:21:55 +00006068 loadLE(Ity_I32, mkexpr(addr)))));
6069 break;
6070
6071 do_foprev_m32:
6072 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00006073 triop(fop,
6074 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj6c299f32009-12-31 18:00:12 +00006075 unop(Iop_I32StoF64,
sewardj6847d8c2005-05-12 19:21:55 +00006076 loadLE(Ity_I32, mkexpr(addr))),
6077 get_ST(0)));
6078 break;
sewardj8d965312005-02-25 02:48:47 +00006079
6080 default:
sewardj901ed122005-02-27 13:25:31 +00006081 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardj8d965312005-02-25 02:48:47 +00006082 vex_printf("first_opcode == 0xDA\n");
6083 goto decode_fail;
6084 }
6085
6086 } else {
6087
6088 delta++;
6089 switch (modrm) {
6090
sewardj48a89d82005-05-06 11:50:13 +00006091 case 0xC0 ... 0xC7: /* FCMOVB ST(i), ST(0) */
6092 r_src = (UInt)modrm - 0xC0;
sewardjca673ab2005-05-11 10:03:08 +00006093 DIP("fcmovb %%st(%u), %%st(0)\n", r_src);
sewardj48a89d82005-05-06 11:50:13 +00006094 put_ST_UNCHECKED(0,
florian99dd03e2013-01-29 03:56:06 +00006095 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00006096 mk_amd64g_calculate_condition(AMD64CondB),
florian99dd03e2013-01-29 03:56:06 +00006097 get_ST(r_src), get_ST(0)) );
sewardj48a89d82005-05-06 11:50:13 +00006098 break;
sewardj8d965312005-02-25 02:48:47 +00006099
6100 case 0xC8 ... 0xCF: /* FCMOVE(Z) ST(i), ST(0) */
6101 r_src = (UInt)modrm - 0xC8;
sewardj1027dc22005-02-26 01:55:02 +00006102 DIP("fcmovz %%st(%u), %%st(0)\n", r_src);
sewardj8d965312005-02-25 02:48:47 +00006103 put_ST_UNCHECKED(0,
florian99dd03e2013-01-29 03:56:06 +00006104 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00006105 mk_amd64g_calculate_condition(AMD64CondZ),
florian99dd03e2013-01-29 03:56:06 +00006106 get_ST(r_src), get_ST(0)) );
sewardj8d965312005-02-25 02:48:47 +00006107 break;
6108
sewardj37d52572005-02-25 14:22:12 +00006109 case 0xD0 ... 0xD7: /* FCMOVBE ST(i), ST(0) */
6110 r_src = (UInt)modrm - 0xD0;
sewardj1027dc22005-02-26 01:55:02 +00006111 DIP("fcmovbe %%st(%u), %%st(0)\n", r_src);
sewardj37d52572005-02-25 14:22:12 +00006112 put_ST_UNCHECKED(0,
florian99dd03e2013-01-29 03:56:06 +00006113 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00006114 mk_amd64g_calculate_condition(AMD64CondBE),
florian99dd03e2013-01-29 03:56:06 +00006115 get_ST(r_src), get_ST(0)) );
sewardj37d52572005-02-25 14:22:12 +00006116 break;
6117
sewardj25a85812005-05-08 23:03:48 +00006118 case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
6119 r_src = (UInt)modrm - 0xD8;
6120 DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
6121 put_ST_UNCHECKED(0,
florian99dd03e2013-01-29 03:56:06 +00006122 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00006123 mk_amd64g_calculate_condition(AMD64CondP),
florian99dd03e2013-01-29 03:56:06 +00006124 get_ST(r_src), get_ST(0)) );
sewardj25a85812005-05-08 23:03:48 +00006125 break;
6126
sewardje7f277a2008-04-28 21:05:33 +00006127 case 0xE9: /* FUCOMPP %st(0),%st(1) */
6128 DIP("fucompp %%st(0),%%st(1)\n");
6129 /* This forces C1 to zero, which isn't right. */
6130 put_C3210(
6131 unop(Iop_32Uto64,
6132 binop( Iop_And32,
6133 binop(Iop_Shl32,
6134 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
6135 mkU8(8)),
6136 mkU32(0x4500)
6137 )));
6138 fp_pop();
6139 fp_pop();
6140 break;
sewardj8d965312005-02-25 02:48:47 +00006141
6142 default:
6143 goto decode_fail;
6144 }
6145
6146 }
6147 }
6148
sewardjc49ce232005-02-25 13:03:03 +00006149 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDB opcodes +-+-+-+-+-+-+-+ */
6150 else
6151 if (first_opcode == 0xDB) {
6152 if (modrm < 0xC0) {
6153
6154 /* bits 5,4,3 are an opcode extension, and the modRM also
6155 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00006156 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardjc49ce232005-02-25 13:03:03 +00006157 delta += len;
6158
sewardj901ed122005-02-27 13:25:31 +00006159 switch (gregLO3ofRM(modrm)) {
sewardjc49ce232005-02-25 13:03:03 +00006160
sewardj5cc00ff2005-03-27 04:48:32 +00006161 case 0: /* FILD m32int */
6162 DIP("fildl %s\n", dis_buf);
6163 fp_push();
sewardj6c299f32009-12-31 18:00:12 +00006164 put_ST(0, unop(Iop_I32StoF64,
sewardj5cc00ff2005-03-27 04:48:32 +00006165 loadLE(Ity_I32, mkexpr(addr))));
6166 break;
6167
sewardjfcf21f32006-08-04 14:51:19 +00006168 case 1: /* FISTTPL m32 (SSE3) */
6169 DIP("fisttpl %s\n", dis_buf);
6170 storeLE( mkexpr(addr),
sewardj6c299f32009-12-31 18:00:12 +00006171 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) );
sewardjfcf21f32006-08-04 14:51:19 +00006172 fp_pop();
6173 break;
6174
sewardj6847d8c2005-05-12 19:21:55 +00006175 case 2: /* FIST m32 */
6176 DIP("fistl %s\n", dis_buf);
6177 storeLE( mkexpr(addr),
sewardj6c299f32009-12-31 18:00:12 +00006178 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
sewardj6847d8c2005-05-12 19:21:55 +00006179 break;
sewardj37d52572005-02-25 14:22:12 +00006180
6181 case 3: /* FISTP m32 */
6182 DIP("fistpl %s\n", dis_buf);
6183 storeLE( mkexpr(addr),
sewardj6c299f32009-12-31 18:00:12 +00006184 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) );
sewardj37d52572005-02-25 14:22:12 +00006185 fp_pop();
6186 break;
6187
sewardj924215b2005-03-26 21:50:31 +00006188 case 5: { /* FLD extended-real */
6189 /* Uses dirty helper:
6190 ULong amd64g_loadF80le ( ULong )
6191 addr holds the address. First, do a dirty call to
6192 get hold of the data. */
6193 IRTemp val = newTemp(Ity_I64);
6194 IRExpr** args = mkIRExprVec_1 ( mkexpr(addr) );
6195
6196 IRDirty* d = unsafeIRDirty_1_N (
6197 val,
6198 0/*regparms*/,
sewardj8707fef2005-08-23 23:26:37 +00006199 "amd64g_dirtyhelper_loadF80le",
6200 &amd64g_dirtyhelper_loadF80le,
sewardj924215b2005-03-26 21:50:31 +00006201 args
6202 );
6203 /* declare that we're reading memory */
6204 d->mFx = Ifx_Read;
6205 d->mAddr = mkexpr(addr);
6206 d->mSize = 10;
6207
6208 /* execute the dirty call, dumping the result in val. */
6209 stmt( IRStmt_Dirty(d) );
6210 fp_push();
6211 put_ST(0, unop(Iop_ReinterpI64asF64, mkexpr(val)));
6212
6213 DIP("fldt %s\n", dis_buf);
6214 break;
6215 }
6216
6217 case 7: { /* FSTP extended-real */
6218 /* Uses dirty helper:
6219 void amd64g_storeF80le ( ULong addr, ULong data )
6220 */
6221 IRExpr** args
6222 = mkIRExprVec_2( mkexpr(addr),
6223 unop(Iop_ReinterpF64asI64, get_ST(0)) );
6224
6225 IRDirty* d = unsafeIRDirty_0_N (
6226 0/*regparms*/,
sewardj8707fef2005-08-23 23:26:37 +00006227 "amd64g_dirtyhelper_storeF80le",
6228 &amd64g_dirtyhelper_storeF80le,
sewardj924215b2005-03-26 21:50:31 +00006229 args
6230 );
6231 /* declare we're writing memory */
6232 d->mFx = Ifx_Write;
6233 d->mAddr = mkexpr(addr);
6234 d->mSize = 10;
6235
6236 /* execute the dirty call. */
6237 stmt( IRStmt_Dirty(d) );
6238 fp_pop();
6239
6240 DIP("fstpt\n %s", dis_buf);
6241 break;
6242 }
sewardjc49ce232005-02-25 13:03:03 +00006243
6244 default:
sewardj901ed122005-02-27 13:25:31 +00006245 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardjc49ce232005-02-25 13:03:03 +00006246 vex_printf("first_opcode == 0xDB\n");
6247 goto decode_fail;
6248 }
6249
6250 } else {
6251
6252 delta++;
6253 switch (modrm) {
6254
sewardj48a89d82005-05-06 11:50:13 +00006255 case 0xC0 ... 0xC7: /* FCMOVNB ST(i), ST(0) */
6256 r_src = (UInt)modrm - 0xC0;
sewardjca673ab2005-05-11 10:03:08 +00006257 DIP("fcmovnb %%st(%u), %%st(0)\n", r_src);
sewardj48a89d82005-05-06 11:50:13 +00006258 put_ST_UNCHECKED(0,
florian99dd03e2013-01-29 03:56:06 +00006259 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00006260 mk_amd64g_calculate_condition(AMD64CondNB),
florian99dd03e2013-01-29 03:56:06 +00006261 get_ST(r_src), get_ST(0)) );
sewardj48a89d82005-05-06 11:50:13 +00006262 break;
sewardj924215b2005-03-26 21:50:31 +00006263
6264 case 0xC8 ... 0xCF: /* FCMOVNE(NZ) ST(i), ST(0) */
6265 r_src = (UInt)modrm - 0xC8;
sewardj40e144d2005-03-28 00:46:27 +00006266 DIP("fcmovnz %%st(%u), %%st(0)\n", r_src);
sewardj137015d2005-03-27 04:01:15 +00006267 put_ST_UNCHECKED(
6268 0,
florian99dd03e2013-01-29 03:56:06 +00006269 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00006270 mk_amd64g_calculate_condition(AMD64CondNZ),
florian99dd03e2013-01-29 03:56:06 +00006271 get_ST(r_src),
6272 get_ST(0)
sewardj137015d2005-03-27 04:01:15 +00006273 )
6274 );
sewardj924215b2005-03-26 21:50:31 +00006275 break;
6276
sewardj137015d2005-03-27 04:01:15 +00006277 case 0xD0 ... 0xD7: /* FCMOVNBE ST(i), ST(0) */
6278 r_src = (UInt)modrm - 0xD0;
sewardj40e144d2005-03-28 00:46:27 +00006279 DIP("fcmovnbe %%st(%u), %%st(0)\n", r_src);
sewardj137015d2005-03-27 04:01:15 +00006280 put_ST_UNCHECKED(
6281 0,
florian99dd03e2013-01-29 03:56:06 +00006282 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00006283 mk_amd64g_calculate_condition(AMD64CondNBE),
florian99dd03e2013-01-29 03:56:06 +00006284 get_ST(r_src),
6285 get_ST(0)
sewardj137015d2005-03-27 04:01:15 +00006286 )
6287 );
6288 break;
6289
sewardj3368e102006-03-06 19:05:07 +00006290 case 0xD8 ... 0xDF: /* FCMOVNU ST(i), ST(0) */
6291 r_src = (UInt)modrm - 0xD8;
6292 DIP("fcmovnu %%st(%u), %%st(0)\n", r_src);
6293 put_ST_UNCHECKED(
6294 0,
florian99dd03e2013-01-29 03:56:06 +00006295 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00006296 mk_amd64g_calculate_condition(AMD64CondNP),
florian99dd03e2013-01-29 03:56:06 +00006297 get_ST(r_src),
6298 get_ST(0)
sewardj3368e102006-03-06 19:05:07 +00006299 )
6300 );
6301 break;
6302
sewardj4e1a1e92005-05-25 00:44:13 +00006303 case 0xE2:
6304 DIP("fnclex\n");
6305 break;
6306
sewardj0585a032005-11-05 02:55:06 +00006307 case 0xE3: {
6308 /* Uses dirty helper:
6309 void amd64g_do_FINIT ( VexGuestAMD64State* ) */
6310 IRDirty* d = unsafeIRDirty_0_N (
6311 0/*regparms*/,
6312 "amd64g_dirtyhelper_FINIT",
6313 &amd64g_dirtyhelper_FINIT,
florian90419562013-08-15 20:54:52 +00006314 mkIRExprVec_1( IRExpr_BBPTR() )
sewardj0585a032005-11-05 02:55:06 +00006315 );
sewardj0585a032005-11-05 02:55:06 +00006316
6317 /* declare we're writing guest state */
6318 d->nFxState = 5;
sewardjc9069f22012-06-01 16:09:50 +00006319 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj0585a032005-11-05 02:55:06 +00006320
6321 d->fxState[0].fx = Ifx_Write;
6322 d->fxState[0].offset = OFFB_FTOP;
6323 d->fxState[0].size = sizeof(UInt);
6324
6325 d->fxState[1].fx = Ifx_Write;
6326 d->fxState[1].offset = OFFB_FPREGS;
6327 d->fxState[1].size = 8 * sizeof(ULong);
6328
6329 d->fxState[2].fx = Ifx_Write;
6330 d->fxState[2].offset = OFFB_FPTAGS;
6331 d->fxState[2].size = 8 * sizeof(UChar);
6332
6333 d->fxState[3].fx = Ifx_Write;
6334 d->fxState[3].offset = OFFB_FPROUND;
6335 d->fxState[3].size = sizeof(ULong);
6336
6337 d->fxState[4].fx = Ifx_Write;
6338 d->fxState[4].offset = OFFB_FC3210;
6339 d->fxState[4].size = sizeof(ULong);
6340
6341 stmt( IRStmt_Dirty(d) );
6342
6343 DIP("fninit\n");
6344 break;
6345 }
sewardjc49ce232005-02-25 13:03:03 +00006346
6347 case 0xE8 ... 0xEF: /* FUCOMI %st(0),%st(?) */
6348 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, False );
6349 break;
6350
sewardj48a89d82005-05-06 11:50:13 +00006351 case 0xF0 ... 0xF7: /* FCOMI %st(0),%st(?) */
6352 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, False );
6353 break;
sewardjc49ce232005-02-25 13:03:03 +00006354
6355 default:
6356 goto decode_fail;
6357 }
6358 }
6359 }
6360
sewardj137015d2005-03-27 04:01:15 +00006361 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDC opcodes +-+-+-+-+-+-+-+ */
6362 else
6363 if (first_opcode == 0xDC) {
6364 if (modrm < 0xC0) {
6365
sewardj434e0692005-03-27 17:36:08 +00006366 /* bits 5,4,3 are an opcode extension, and the modRM also
6367 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00006368 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj434e0692005-03-27 17:36:08 +00006369 delta += len;
6370
6371 switch (gregLO3ofRM(modrm)) {
6372
sewardje6939f02005-05-07 01:01:24 +00006373 case 0: /* FADD double-real */
6374 fp_do_op_mem_ST_0 ( addr, "add", dis_buf, Iop_AddF64, True );
6375 break;
6376
6377 case 1: /* FMUL double-real */
6378 fp_do_op_mem_ST_0 ( addr, "mul", dis_buf, Iop_MulF64, True );
6379 break;
6380
sewardjd20c8852005-01-20 20:04:07 +00006381//.. case 2: /* FCOM double-real */
6382//.. DIP("fcoml %s\n", dis_buf);
6383//.. /* This forces C1 to zero, which isn't right. */
6384//.. put_C3210(
6385//.. binop( Iop_And32,
6386//.. binop(Iop_Shl32,
6387//.. binop(Iop_CmpF64,
6388//.. get_ST(0),
6389//.. loadLE(Ity_F64,mkexpr(addr))),
6390//.. mkU8(8)),
6391//.. mkU32(0x4500)
6392//.. ));
6393//.. break;
sewardj566d2c72005-08-10 11:43:42 +00006394
6395 case 3: /* FCOMP double-real */
6396 DIP("fcompl %s\n", dis_buf);
6397 /* This forces C1 to zero, which isn't right. */
6398 put_C3210(
6399 unop(Iop_32Uto64,
6400 binop( Iop_And32,
6401 binop(Iop_Shl32,
6402 binop(Iop_CmpF64,
6403 get_ST(0),
6404 loadLE(Ity_F64,mkexpr(addr))),
6405 mkU8(8)),
6406 mkU32(0x4500)
6407 )));
6408 fp_pop();
6409 break;
sewardje6939f02005-05-07 01:01:24 +00006410
6411 case 4: /* FSUB double-real */
6412 fp_do_op_mem_ST_0 ( addr, "sub", dis_buf, Iop_SubF64, True );
6413 break;
sewardj434e0692005-03-27 17:36:08 +00006414
6415 case 5: /* FSUBR double-real */
6416 fp_do_oprev_mem_ST_0 ( addr, "subr", dis_buf, Iop_SubF64, True );
6417 break;
6418
sewardje6939f02005-05-07 01:01:24 +00006419 case 6: /* FDIV double-real */
6420 fp_do_op_mem_ST_0 ( addr, "div", dis_buf, Iop_DivF64, True );
6421 break;
6422
6423 case 7: /* FDIVR double-real */
6424 fp_do_oprev_mem_ST_0 ( addr, "divr", dis_buf, Iop_DivF64, True );
6425 break;
sewardj434e0692005-03-27 17:36:08 +00006426
6427 default:
6428 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
6429 vex_printf("first_opcode == 0xDC\n");
6430 goto decode_fail;
6431 }
sewardj137015d2005-03-27 04:01:15 +00006432
6433 } else {
6434
6435 delta++;
6436 switch (modrm) {
6437
6438 case 0xC0 ... 0xC7: /* FADD %st(0),%st(?) */
6439 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, False );
6440 break;
6441
sewardj7bc00082005-03-27 05:08:32 +00006442 case 0xC8 ... 0xCF: /* FMUL %st(0),%st(?) */
6443 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, False );
6444 break;
6445
sewardj434e0692005-03-27 17:36:08 +00006446 case 0xE0 ... 0xE7: /* FSUBR %st(0),%st(?) */
6447 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, False );
6448 break;
6449
sewardje6939f02005-05-07 01:01:24 +00006450 case 0xE8 ... 0xEF: /* FSUB %st(0),%st(?) */
6451 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, False );
6452 break;
6453
6454 case 0xF0 ... 0xF7: /* FDIVR %st(0),%st(?) */
6455 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, False );
6456 break;
sewardj137015d2005-03-27 04:01:15 +00006457
6458 case 0xF8 ... 0xFF: /* FDIV %st(0),%st(?) */
6459 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, False );
6460 break;
6461
6462 default:
6463 goto decode_fail;
6464 }
6465
6466 }
6467 }
sewardj8d965312005-02-25 02:48:47 +00006468
6469 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDD opcodes +-+-+-+-+-+-+-+ */
6470 else
6471 if (first_opcode == 0xDD) {
6472
6473 if (modrm < 0xC0) {
6474
6475 /* bits 5,4,3 are an opcode extension, and the modRM also
6476 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00006477 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8d965312005-02-25 02:48:47 +00006478 delta += len;
6479
sewardj901ed122005-02-27 13:25:31 +00006480 switch (gregLO3ofRM(modrm)) {
sewardj8d965312005-02-25 02:48:47 +00006481
6482 case 0: /* FLD double-real */
6483 DIP("fldl %s\n", dis_buf);
6484 fp_push();
sewardjaf1ceca2005-06-30 23:31:27 +00006485 put_ST(0, loadLE(Ity_F64, mkexpr(addr)));
sewardj8d965312005-02-25 02:48:47 +00006486 break;
6487
sewardjfcf21f32006-08-04 14:51:19 +00006488 case 1: /* FISTTPQ m64 (SSE3) */
6489 DIP("fistppll %s\n", dis_buf);
6490 storeLE( mkexpr(addr),
sewardj6c299f32009-12-31 18:00:12 +00006491 binop(Iop_F64toI64S, mkU32(Irrm_ZERO), get_ST(0)) );
sewardjfcf21f32006-08-04 14:51:19 +00006492 fp_pop();
6493 break;
6494
sewardjc49ce232005-02-25 13:03:03 +00006495 case 2: /* FST double-real */
6496 DIP("fstl %s\n", dis_buf);
6497 storeLE(mkexpr(addr), get_ST(0));
6498 break;
sewardj8d965312005-02-25 02:48:47 +00006499
6500 case 3: /* FSTP double-real */
6501 DIP("fstpl %s\n", dis_buf);
6502 storeLE(mkexpr(addr), get_ST(0));
6503 fp_pop();
6504 break;
6505
sewardj9ae42a72012-02-16 14:18:56 +00006506 case 4: { /* FRSTOR m94/m108 */
6507 IRTemp ew = newTemp(Ity_I32);
6508 IRTemp w64 = newTemp(Ity_I64);
6509 IRDirty* d;
6510 if ( have66(pfx) ) {
6511 /* Uses dirty helper:
florian6ef84be2012-08-26 03:20:07 +00006512 VexEmNote amd64g_dirtyhelper_FRSTORS
sewardj9ae42a72012-02-16 14:18:56 +00006513 ( VexGuestAMD64State*, HWord ) */
6514 d = unsafeIRDirty_0_N (
6515 0/*regparms*/,
6516 "amd64g_dirtyhelper_FRSTORS",
6517 &amd64g_dirtyhelper_FRSTORS,
6518 mkIRExprVec_1( mkexpr(addr) )
6519 );
6520 d->mSize = 94;
6521 } else {
6522 /* Uses dirty helper:
florian6ef84be2012-08-26 03:20:07 +00006523 VexEmNote amd64g_dirtyhelper_FRSTOR
sewardj9ae42a72012-02-16 14:18:56 +00006524 ( VexGuestAMD64State*, HWord ) */
6525 d = unsafeIRDirty_0_N (
6526 0/*regparms*/,
6527 "amd64g_dirtyhelper_FRSTOR",
6528 &amd64g_dirtyhelper_FRSTOR,
florian90419562013-08-15 20:54:52 +00006529 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
sewardj9ae42a72012-02-16 14:18:56 +00006530 );
6531 d->mSize = 108;
6532 }
6533
sewardj74142b82013-08-08 10:28:59 +00006534 d->tmp = w64;
sewardj9ae42a72012-02-16 14:18:56 +00006535 /* declare we're reading memory */
6536 d->mFx = Ifx_Read;
6537 d->mAddr = mkexpr(addr);
6538 /* d->mSize set above */
6539
6540 /* declare we're writing guest state */
6541 d->nFxState = 5;
sewardjc9069f22012-06-01 16:09:50 +00006542 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj9ae42a72012-02-16 14:18:56 +00006543
6544 d->fxState[0].fx = Ifx_Write;
6545 d->fxState[0].offset = OFFB_FTOP;
6546 d->fxState[0].size = sizeof(UInt);
6547
6548 d->fxState[1].fx = Ifx_Write;
6549 d->fxState[1].offset = OFFB_FPREGS;
6550 d->fxState[1].size = 8 * sizeof(ULong);
6551
6552 d->fxState[2].fx = Ifx_Write;
6553 d->fxState[2].offset = OFFB_FPTAGS;
6554 d->fxState[2].size = 8 * sizeof(UChar);
6555
6556 d->fxState[3].fx = Ifx_Write;
6557 d->fxState[3].offset = OFFB_FPROUND;
6558 d->fxState[3].size = sizeof(ULong);
6559
6560 d->fxState[4].fx = Ifx_Write;
6561 d->fxState[4].offset = OFFB_FC3210;
6562 d->fxState[4].size = sizeof(ULong);
6563
6564 stmt( IRStmt_Dirty(d) );
6565
6566 /* ew contains any emulation warning we may need to
6567 issue. If needed, side-exit to the next insn,
6568 reporting the warning, so that Valgrind's dispatcher
6569 sees the warning. */
6570 assign(ew, unop(Iop_64to32,mkexpr(w64)) );
6571 put_emwarn( mkexpr(ew) );
6572 stmt(
6573 IRStmt_Exit(
6574 binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
6575 Ijk_EmWarn,
sewardjc6f970f2012-04-02 21:54:49 +00006576 IRConst_U64( guest_RIP_bbstart+delta ),
6577 OFFB_RIP
sewardj9ae42a72012-02-16 14:18:56 +00006578 )
6579 );
6580
6581 if ( have66(pfx) ) {
6582 DIP("frstors %s\n", dis_buf);
6583 } else {
6584 DIP("frstor %s\n", dis_buf);
6585 }
6586 break;
6587 }
6588
6589 case 6: { /* FNSAVE m94/m108 */
6590 IRDirty *d;
6591 if ( have66(pfx) ) {
6592 /* Uses dirty helper:
sewardj74142b82013-08-08 10:28:59 +00006593 void amd64g_dirtyhelper_FNSAVES ( VexGuestAMD64State*,
6594 HWord ) */
sewardj9ae42a72012-02-16 14:18:56 +00006595 d = unsafeIRDirty_0_N (
6596 0/*regparms*/,
6597 "amd64g_dirtyhelper_FNSAVES",
6598 &amd64g_dirtyhelper_FNSAVES,
6599 mkIRExprVec_1( mkexpr(addr) )
6600 );
6601 d->mSize = 94;
6602 } else {
6603 /* Uses dirty helper:
sewardj74142b82013-08-08 10:28:59 +00006604 void amd64g_dirtyhelper_FNSAVE ( VexGuestAMD64State*,
6605 HWord ) */
sewardj9ae42a72012-02-16 14:18:56 +00006606 d = unsafeIRDirty_0_N (
6607 0/*regparms*/,
6608 "amd64g_dirtyhelper_FNSAVE",
6609 &amd64g_dirtyhelper_FNSAVE,
florian90419562013-08-15 20:54:52 +00006610 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
sewardj74142b82013-08-08 10:28:59 +00006611 );
sewardj9ae42a72012-02-16 14:18:56 +00006612 d->mSize = 108;
6613 }
sewardj74142b82013-08-08 10:28:59 +00006614
sewardj9ae42a72012-02-16 14:18:56 +00006615 /* declare we're writing memory */
6616 d->mFx = Ifx_Write;
6617 d->mAddr = mkexpr(addr);
6618 /* d->mSize set above */
6619
6620 /* declare we're reading guest state */
6621 d->nFxState = 5;
sewardjc9069f22012-06-01 16:09:50 +00006622 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj9ae42a72012-02-16 14:18:56 +00006623
6624 d->fxState[0].fx = Ifx_Read;
6625 d->fxState[0].offset = OFFB_FTOP;
6626 d->fxState[0].size = sizeof(UInt);
6627
6628 d->fxState[1].fx = Ifx_Read;
6629 d->fxState[1].offset = OFFB_FPREGS;
6630 d->fxState[1].size = 8 * sizeof(ULong);
6631
6632 d->fxState[2].fx = Ifx_Read;
6633 d->fxState[2].offset = OFFB_FPTAGS;
6634 d->fxState[2].size = 8 * sizeof(UChar);
6635
6636 d->fxState[3].fx = Ifx_Read;
6637 d->fxState[3].offset = OFFB_FPROUND;
6638 d->fxState[3].size = sizeof(ULong);
6639
6640 d->fxState[4].fx = Ifx_Read;
6641 d->fxState[4].offset = OFFB_FC3210;
6642 d->fxState[4].size = sizeof(ULong);
6643
6644 stmt( IRStmt_Dirty(d) );
6645
6646 if ( have66(pfx) ) {
6647 DIP("fnsaves %s\n", dis_buf);
6648 } else {
6649 DIP("fnsave %s\n", dis_buf);
6650 }
6651 break;
6652 }
sewardj8d965312005-02-25 02:48:47 +00006653
sewardj7c2d2822006-03-07 00:22:02 +00006654 case 7: { /* FNSTSW m16 */
6655 IRExpr* sw = get_FPU_sw();
sewardjdd40fdf2006-12-24 02:20:24 +00006656 vassert(typeOfIRExpr(irsb->tyenv, sw) == Ity_I16);
sewardj7c2d2822006-03-07 00:22:02 +00006657 storeLE( mkexpr(addr), sw );
6658 DIP("fnstsw %s\n", dis_buf);
6659 break;
6660 }
6661
sewardj8d965312005-02-25 02:48:47 +00006662 default:
sewardj901ed122005-02-27 13:25:31 +00006663 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardj8d965312005-02-25 02:48:47 +00006664 vex_printf("first_opcode == 0xDD\n");
6665 goto decode_fail;
6666 }
6667 } else {
6668 delta++;
6669 switch (modrm) {
6670
sewardj6847d8c2005-05-12 19:21:55 +00006671 case 0xC0 ... 0xC7: /* FFREE %st(?) */
6672 r_dst = (UInt)modrm - 0xC0;
6673 DIP("ffree %%st(%u)\n", r_dst);
6674 put_ST_TAG ( r_dst, mkU8(0) );
6675 break;
6676
sewardjbfabcc42005-08-08 09:58:05 +00006677 case 0xD0 ... 0xD7: /* FST %st(0),%st(?) */
6678 r_dst = (UInt)modrm - 0xD0;
sewardjc7cd2142005-09-09 22:31:49 +00006679 DIP("fst %%st(0),%%st(%u)\n", r_dst);
sewardjbfabcc42005-08-08 09:58:05 +00006680 /* P4 manual says: "If the destination operand is a
6681 non-empty register, the invalid-operation exception
6682 is not generated. Hence put_ST_UNCHECKED. */
6683 put_ST_UNCHECKED(r_dst, get_ST(0));
6684 break;
sewardj8d965312005-02-25 02:48:47 +00006685
6686 case 0xD8 ... 0xDF: /* FSTP %st(0),%st(?) */
6687 r_dst = (UInt)modrm - 0xD8;
sewardj1027dc22005-02-26 01:55:02 +00006688 DIP("fstp %%st(0),%%st(%u)\n", r_dst);
sewardj8d965312005-02-25 02:48:47 +00006689 /* P4 manual says: "If the destination operand is a
6690 non-empty register, the invalid-operation exception
6691 is not generated. Hence put_ST_UNCHECKED. */
6692 put_ST_UNCHECKED(r_dst, get_ST(0));
6693 fp_pop();
6694 break;
6695
sewardjfb6c1792005-10-05 17:58:32 +00006696 case 0xE0 ... 0xE7: /* FUCOM %st(0),%st(?) */
6697 r_dst = (UInt)modrm - 0xE0;
sewardj62d05432005-10-29 22:30:47 +00006698 DIP("fucom %%st(0),%%st(%u)\n", r_dst);
sewardjfb6c1792005-10-05 17:58:32 +00006699 /* This forces C1 to zero, which isn't right. */
6700 put_C3210(
6701 unop(Iop_32Uto64,
6702 binop( Iop_And32,
6703 binop(Iop_Shl32,
6704 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6705 mkU8(8)),
6706 mkU32(0x4500)
6707 )));
6708 break;
6709
sewardj9fb2f472005-11-05 01:12:18 +00006710 case 0xE8 ... 0xEF: /* FUCOMP %st(0),%st(?) */
6711 r_dst = (UInt)modrm - 0xE8;
sewardj43f45732005-11-05 13:04:34 +00006712 DIP("fucomp %%st(0),%%st(%u)\n", r_dst);
sewardj9fb2f472005-11-05 01:12:18 +00006713 /* This forces C1 to zero, which isn't right. */
6714 put_C3210(
6715 unop(Iop_32Uto64,
6716 binop( Iop_And32,
6717 binop(Iop_Shl32,
6718 binop(Iop_CmpF64, get_ST(0), get_ST(r_dst)),
6719 mkU8(8)),
6720 mkU32(0x4500)
6721 )));
6722 fp_pop();
6723 break;
sewardj8d965312005-02-25 02:48:47 +00006724
6725 default:
6726 goto decode_fail;
6727 }
6728 }
6729 }
6730
sewardj137015d2005-03-27 04:01:15 +00006731 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDE opcodes +-+-+-+-+-+-+-+ */
6732 else
6733 if (first_opcode == 0xDE) {
6734
6735 if (modrm < 0xC0) {
6736
sewardj6847d8c2005-05-12 19:21:55 +00006737 /* bits 5,4,3 are an opcode extension, and the modRM also
6738 specifies an address. */
6739 IROp fop;
sewardj2e28ac42008-12-04 00:05:12 +00006740 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj6847d8c2005-05-12 19:21:55 +00006741 delta += len;
6742
6743 switch (gregLO3ofRM(modrm)) {
6744
6745 case 0: /* FIADD m16int */ /* ST(0) += m16int */
6746 DIP("fiaddw %s\n", dis_buf);
6747 fop = Iop_AddF64;
6748 goto do_fop_m16;
6749
6750 case 1: /* FIMUL m16int */ /* ST(0) *= m16int */
6751 DIP("fimulw %s\n", dis_buf);
6752 fop = Iop_MulF64;
6753 goto do_fop_m16;
6754
6755 case 4: /* FISUB m16int */ /* ST(0) -= m16int */
6756 DIP("fisubw %s\n", dis_buf);
6757 fop = Iop_SubF64;
6758 goto do_fop_m16;
6759
6760 case 5: /* FISUBR m16int */ /* ST(0) = m16int - ST(0) */
6761 DIP("fisubrw %s\n", dis_buf);
6762 fop = Iop_SubF64;
6763 goto do_foprev_m16;
6764
6765 case 6: /* FIDIV m16int */ /* ST(0) /= m16int */
6766 DIP("fisubw %s\n", dis_buf);
6767 fop = Iop_DivF64;
6768 goto do_fop_m16;
6769
6770 case 7: /* FIDIVR m16int */ /* ST(0) = m16int / ST(0) */
6771 DIP("fidivrw %s\n", dis_buf);
6772 fop = Iop_DivF64;
6773 goto do_foprev_m16;
6774
6775 do_fop_m16:
6776 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00006777 triop(fop,
6778 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj6847d8c2005-05-12 19:21:55 +00006779 get_ST(0),
sewardj6c299f32009-12-31 18:00:12 +00006780 unop(Iop_I32StoF64,
sewardj6847d8c2005-05-12 19:21:55 +00006781 unop(Iop_16Sto32,
6782 loadLE(Ity_I16, mkexpr(addr))))));
6783 break;
6784
6785 do_foprev_m16:
6786 put_ST_UNCHECKED(0,
sewardj4796d662006-02-05 16:06:26 +00006787 triop(fop,
6788 get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
sewardj6c299f32009-12-31 18:00:12 +00006789 unop(Iop_I32StoF64,
sewardj6847d8c2005-05-12 19:21:55 +00006790 unop(Iop_16Sto32,
6791 loadLE(Ity_I16, mkexpr(addr)))),
6792 get_ST(0)));
6793 break;
6794
6795 default:
6796 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
6797 vex_printf("first_opcode == 0xDE\n");
6798 goto decode_fail;
6799 }
sewardj137015d2005-03-27 04:01:15 +00006800
6801 } else {
6802
6803 delta++;
6804 switch (modrm) {
6805
6806 case 0xC0 ... 0xC7: /* FADDP %st(0),%st(?) */
6807 fp_do_op_ST_ST ( "add", Iop_AddF64, 0, modrm - 0xC0, True );
6808 break;
6809
6810 case 0xC8 ... 0xCF: /* FMULP %st(0),%st(?) */
6811 fp_do_op_ST_ST ( "mul", Iop_MulF64, 0, modrm - 0xC8, True );
6812 break;
6813
sewardj1859ecd2007-02-23 08:48:22 +00006814 case 0xD9: /* FCOMPP %st(0),%st(1) */
6815 DIP("fcompp %%st(0),%%st(1)\n");
6816 /* This forces C1 to zero, which isn't right. */
6817 put_C3210(
6818 unop(Iop_32Uto64,
6819 binop( Iop_And32,
6820 binop(Iop_Shl32,
6821 binop(Iop_CmpF64, get_ST(0), get_ST(1)),
6822 mkU8(8)),
6823 mkU32(0x4500)
6824 )));
6825 fp_pop();
6826 fp_pop();
6827 break;
sewardj137015d2005-03-27 04:01:15 +00006828
6829 case 0xE0 ... 0xE7: /* FSUBRP %st(0),%st(?) */
6830 fp_do_oprev_ST_ST ( "subr", Iop_SubF64, 0, modrm - 0xE0, True );
6831 break;
6832
6833 case 0xE8 ... 0xEF: /* FSUBP %st(0),%st(?) */
6834 fp_do_op_ST_ST ( "sub", Iop_SubF64, 0, modrm - 0xE8, True );
6835 break;
6836
6837 case 0xF0 ... 0xF7: /* FDIVRP %st(0),%st(?) */
6838 fp_do_oprev_ST_ST ( "divr", Iop_DivF64, 0, modrm - 0xF0, True );
6839 break;
6840
6841 case 0xF8 ... 0xFF: /* FDIVP %st(0),%st(?) */
6842 fp_do_op_ST_ST ( "div", Iop_DivF64, 0, modrm - 0xF8, True );
6843 break;
6844
6845 default:
6846 goto decode_fail;
6847 }
6848
6849 }
6850 }
sewardjc49ce232005-02-25 13:03:03 +00006851
6852 /* -+-+-+-+-+-+-+-+-+-+-+-+ 0xDF opcodes +-+-+-+-+-+-+-+ */
6853 else
6854 if (first_opcode == 0xDF) {
6855
6856 if (modrm < 0xC0) {
6857
6858 /* bits 5,4,3 are an opcode extension, and the modRM also
6859 specifies an address. */
sewardj2e28ac42008-12-04 00:05:12 +00006860 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardjc49ce232005-02-25 13:03:03 +00006861 delta += len;
6862
sewardj901ed122005-02-27 13:25:31 +00006863 switch (gregLO3ofRM(modrm)) {
sewardjc49ce232005-02-25 13:03:03 +00006864
sewardj434e0692005-03-27 17:36:08 +00006865 case 0: /* FILD m16int */
6866 DIP("fildw %s\n", dis_buf);
6867 fp_push();
sewardj6c299f32009-12-31 18:00:12 +00006868 put_ST(0, unop(Iop_I32StoF64,
sewardj434e0692005-03-27 17:36:08 +00006869 unop(Iop_16Sto32,
6870 loadLE(Ity_I16, mkexpr(addr)))));
6871 break;
6872
sewardjfcf21f32006-08-04 14:51:19 +00006873 case 1: /* FISTTPS m16 (SSE3) */
6874 DIP("fisttps %s\n", dis_buf);
6875 storeLE( mkexpr(addr),
sewardjb707d102007-07-11 22:49:26 +00006876 x87ishly_qnarrow_32_to_16(
sewardj6c299f32009-12-31 18:00:12 +00006877 binop(Iop_F64toI32S, mkU32(Irrm_ZERO), get_ST(0)) ));
sewardjfcf21f32006-08-04 14:51:19 +00006878 fp_pop();
6879 break;
6880
sewardj9f5c8fd2010-05-10 20:08:12 +00006881 case 2: /* FIST m16 */
6882 DIP("fists %s\n", dis_buf);
6883 storeLE( mkexpr(addr),
6884 x87ishly_qnarrow_32_to_16(
6885 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
6886 break;
sewardj6847d8c2005-05-12 19:21:55 +00006887
sewardjb707d102007-07-11 22:49:26 +00006888 case 3: /* FISTP m16 */
6889 DIP("fistps %s\n", dis_buf);
6890 storeLE( mkexpr(addr),
6891 x87ishly_qnarrow_32_to_16(
sewardj6c299f32009-12-31 18:00:12 +00006892 binop(Iop_F64toI32S, get_roundingmode(), get_ST(0)) ));
sewardjb707d102007-07-11 22:49:26 +00006893 fp_pop();
6894 break;
sewardj37d52572005-02-25 14:22:12 +00006895
6896 case 5: /* FILD m64 */
6897 DIP("fildll %s\n", dis_buf);
6898 fp_push();
sewardj6c299f32009-12-31 18:00:12 +00006899 put_ST(0, binop(Iop_I64StoF64,
sewardj37d52572005-02-25 14:22:12 +00006900 get_roundingmode(),
6901 loadLE(Ity_I64, mkexpr(addr))));
6902 break;
6903
sewardj6847d8c2005-05-12 19:21:55 +00006904 case 7: /* FISTP m64 */
6905 DIP("fistpll %s\n", dis_buf);
6906 storeLE( mkexpr(addr),
sewardj6c299f32009-12-31 18:00:12 +00006907 binop(Iop_F64toI64S, get_roundingmode(), get_ST(0)) );
sewardj6847d8c2005-05-12 19:21:55 +00006908 fp_pop();
6909 break;
sewardjc49ce232005-02-25 13:03:03 +00006910
6911 default:
sewardj901ed122005-02-27 13:25:31 +00006912 vex_printf("unhandled opc_aux = 0x%2x\n", gregLO3ofRM(modrm));
sewardjc49ce232005-02-25 13:03:03 +00006913 vex_printf("first_opcode == 0xDF\n");
6914 goto decode_fail;
6915 }
6916
6917 } else {
6918
6919 delta++;
6920 switch (modrm) {
6921
6922 case 0xC0: /* FFREEP %st(0) */
6923 DIP("ffreep %%st(%d)\n", 0);
6924 put_ST_TAG ( 0, mkU8(0) );
6925 fp_pop();
6926 break;
6927
sewardj4f9847d2005-07-25 11:58:34 +00006928 case 0xE0: /* FNSTSW %ax */
6929 DIP("fnstsw %%ax\n");
6930 /* Invent a plausible-looking FPU status word value and
6931 dump it in %AX:
6932 ((ftop & 7) << 11) | (c3210 & 0x4700)
6933 */
6934 putIRegRAX(
6935 2,
6936 unop(Iop_32to16,
6937 binop(Iop_Or32,
6938 binop(Iop_Shl32,
6939 binop(Iop_And32, get_ftop(), mkU32(7)),
6940 mkU8(11)),
6941 binop(Iop_And32,
6942 unop(Iop_64to32, get_C3210()),
6943 mkU32(0x4700))
6944 )));
6945 break;
sewardj924215b2005-03-26 21:50:31 +00006946
6947 case 0xE8 ... 0xEF: /* FUCOMIP %st(0),%st(?) */
6948 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xE8, True );
6949 break;
6950
sewardj48a89d82005-05-06 11:50:13 +00006951 case 0xF0 ... 0xF7: /* FCOMIP %st(0),%st(?) */
6952 /* not really right since COMIP != UCOMIP */
6953 fp_do_ucomi_ST0_STi( (UInt)modrm - 0xF0, True );
6954 break;
sewardjc49ce232005-02-25 13:03:03 +00006955
6956 default:
6957 goto decode_fail;
6958 }
6959 }
6960
6961 }
sewardj8d965312005-02-25 02:48:47 +00006962
6963 else
sewardj137015d2005-03-27 04:01:15 +00006964 goto decode_fail;
sewardj8d965312005-02-25 02:48:47 +00006965
6966 *decode_ok = True;
6967 return delta;
6968
6969 decode_fail:
6970 *decode_ok = False;
6971 return delta;
6972}
6973
6974
sewardj8711f662005-05-09 17:52:56 +00006975/*------------------------------------------------------------*/
6976/*--- ---*/
6977/*--- MMX INSTRUCTIONS ---*/
6978/*--- ---*/
6979/*------------------------------------------------------------*/
6980
6981/* Effect of MMX insns on x87 FPU state (table 11-2 of
6982 IA32 arch manual, volume 3):
6983
6984 Read from, or write to MMX register (viz, any insn except EMMS):
6985 * All tags set to Valid (non-empty) -- FPTAGS[i] := nonzero
6986 * FP stack pointer set to zero
6987
6988 EMMS:
6989 * All tags set to Invalid (empty) -- FPTAGS[i] := zero
6990 * FP stack pointer set to zero
6991*/
6992
6993static void do_MMX_preamble ( void )
6994{
sewardjdd40fdf2006-12-24 02:20:24 +00006995 Int i;
6996 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
6997 IRExpr* zero = mkU32(0);
6998 IRExpr* tag1 = mkU8(1);
sewardj8711f662005-05-09 17:52:56 +00006999 put_ftop(zero);
7000 for (i = 0; i < 8; i++)
floriand6f38b32012-05-31 15:46:18 +00007001 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag1) ) );
sewardj8711f662005-05-09 17:52:56 +00007002}
7003
7004static void do_EMMS_preamble ( void )
7005{
sewardjdd40fdf2006-12-24 02:20:24 +00007006 Int i;
7007 IRRegArray* descr = mkIRRegArray( OFFB_FPTAGS, Ity_I8, 8 );
7008 IRExpr* zero = mkU32(0);
7009 IRExpr* tag0 = mkU8(0);
sewardj8711f662005-05-09 17:52:56 +00007010 put_ftop(zero);
7011 for (i = 0; i < 8; i++)
floriand6f38b32012-05-31 15:46:18 +00007012 stmt( IRStmt_PutI( mkIRPutI(descr, zero, i, tag0) ) );
sewardj8711f662005-05-09 17:52:56 +00007013}
7014
7015
7016static IRExpr* getMMXReg ( UInt archreg )
7017{
7018 vassert(archreg < 8);
7019 return IRExpr_Get( OFFB_FPREGS + 8 * archreg, Ity_I64 );
7020}
7021
7022
7023static void putMMXReg ( UInt archreg, IRExpr* e )
7024{
7025 vassert(archreg < 8);
sewardjdd40fdf2006-12-24 02:20:24 +00007026 vassert(typeOfIRExpr(irsb->tyenv,e) == Ity_I64);
sewardj8711f662005-05-09 17:52:56 +00007027 stmt( IRStmt_Put( OFFB_FPREGS + 8 * archreg, e ) );
7028}
7029
7030
7031/* Helper for non-shift MMX insns. Note this is incomplete in the
7032 sense that it does not first call do_MMX_preamble() -- that is the
7033 responsibility of its caller. */
7034
7035static
sewardj2e28ac42008-12-04 00:05:12 +00007036ULong dis_MMXop_regmem_to_reg ( VexAbiInfo* vbi,
7037 Prefix pfx,
7038 Long delta,
7039 UChar opc,
florian55085f82012-11-21 00:36:55 +00007040 const HChar* name,
sewardj2e28ac42008-12-04 00:05:12 +00007041 Bool show_granularity )
sewardj8711f662005-05-09 17:52:56 +00007042{
7043 HChar dis_buf[50];
7044 UChar modrm = getUChar(delta);
7045 Bool isReg = epartIsReg(modrm);
7046 IRExpr* argL = NULL;
7047 IRExpr* argR = NULL;
7048 IRExpr* argG = NULL;
7049 IRExpr* argE = NULL;
7050 IRTemp res = newTemp(Ity_I64);
7051
7052 Bool invG = False;
7053 IROp op = Iop_INVALID;
7054 void* hAddr = NULL;
florian55085f82012-11-21 00:36:55 +00007055 const HChar* hName = NULL;
sewardj8711f662005-05-09 17:52:56 +00007056 Bool eLeft = False;
7057
7058# define XXX(_name) do { hAddr = &_name; hName = #_name; } while (0)
7059
7060 switch (opc) {
7061 /* Original MMX ones */
7062 case 0xFC: op = Iop_Add8x8; break;
7063 case 0xFD: op = Iop_Add16x4; break;
7064 case 0xFE: op = Iop_Add32x2; break;
7065
7066 case 0xEC: op = Iop_QAdd8Sx8; break;
7067 case 0xED: op = Iop_QAdd16Sx4; break;
7068
7069 case 0xDC: op = Iop_QAdd8Ux8; break;
7070 case 0xDD: op = Iop_QAdd16Ux4; break;
7071
7072 case 0xF8: op = Iop_Sub8x8; break;
7073 case 0xF9: op = Iop_Sub16x4; break;
7074 case 0xFA: op = Iop_Sub32x2; break;
7075
7076 case 0xE8: op = Iop_QSub8Sx8; break;
7077 case 0xE9: op = Iop_QSub16Sx4; break;
7078
7079 case 0xD8: op = Iop_QSub8Ux8; break;
7080 case 0xD9: op = Iop_QSub16Ux4; break;
7081
7082 case 0xE5: op = Iop_MulHi16Sx4; break;
7083 case 0xD5: op = Iop_Mul16x4; break;
7084 case 0xF5: XXX(amd64g_calculate_mmx_pmaddwd); break;
7085
7086 case 0x74: op = Iop_CmpEQ8x8; break;
7087 case 0x75: op = Iop_CmpEQ16x4; break;
7088 case 0x76: op = Iop_CmpEQ32x2; break;
7089
7090 case 0x64: op = Iop_CmpGT8Sx8; break;
7091 case 0x65: op = Iop_CmpGT16Sx4; break;
7092 case 0x66: op = Iop_CmpGT32Sx2; break;
7093
sewardj5f438dd2011-06-16 11:36:23 +00007094 case 0x6B: op = Iop_QNarrowBin32Sto16Sx4; eLeft = True; break;
7095 case 0x63: op = Iop_QNarrowBin16Sto8Sx8; eLeft = True; break;
7096 case 0x67: op = Iop_QNarrowBin16Sto8Ux8; eLeft = True; break;
sewardj8711f662005-05-09 17:52:56 +00007097
7098 case 0x68: op = Iop_InterleaveHI8x8; eLeft = True; break;
7099 case 0x69: op = Iop_InterleaveHI16x4; eLeft = True; break;
7100 case 0x6A: op = Iop_InterleaveHI32x2; eLeft = True; break;
7101
7102 case 0x60: op = Iop_InterleaveLO8x8; eLeft = True; break;
7103 case 0x61: op = Iop_InterleaveLO16x4; eLeft = True; break;
7104 case 0x62: op = Iop_InterleaveLO32x2; eLeft = True; break;
7105
7106 case 0xDB: op = Iop_And64; break;
7107 case 0xDF: op = Iop_And64; invG = True; break;
7108 case 0xEB: op = Iop_Or64; break;
7109 case 0xEF: /* Possibly do better here if argL and argR are the
7110 same reg */
7111 op = Iop_Xor64; break;
7112
7113 /* Introduced in SSE1 */
7114 case 0xE0: op = Iop_Avg8Ux8; break;
7115 case 0xE3: op = Iop_Avg16Ux4; break;
7116 case 0xEE: op = Iop_Max16Sx4; break;
7117 case 0xDE: op = Iop_Max8Ux8; break;
7118 case 0xEA: op = Iop_Min16Sx4; break;
7119 case 0xDA: op = Iop_Min8Ux8; break;
7120 case 0xE4: op = Iop_MulHi16Ux4; break;
sewardja7ba8c42005-05-10 20:08:34 +00007121 case 0xF6: XXX(amd64g_calculate_mmx_psadbw); break;
sewardj8711f662005-05-09 17:52:56 +00007122
7123 /* Introduced in SSE2 */
7124 case 0xD4: op = Iop_Add64; break;
7125 case 0xFB: op = Iop_Sub64; break;
7126
7127 default:
7128 vex_printf("\n0x%x\n", (Int)opc);
7129 vpanic("dis_MMXop_regmem_to_reg");
7130 }
7131
7132# undef XXX
7133
7134 argG = getMMXReg(gregLO3ofRM(modrm));
7135 if (invG)
7136 argG = unop(Iop_Not64, argG);
7137
7138 if (isReg) {
7139 delta++;
7140 argE = getMMXReg(eregLO3ofRM(modrm));
7141 } else {
7142 Int len;
sewardj2e28ac42008-12-04 00:05:12 +00007143 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8711f662005-05-09 17:52:56 +00007144 delta += len;
7145 argE = loadLE(Ity_I64, mkexpr(addr));
7146 }
7147
7148 if (eLeft) {
7149 argL = argE;
7150 argR = argG;
7151 } else {
7152 argL = argG;
7153 argR = argE;
7154 }
7155
7156 if (op != Iop_INVALID) {
7157 vassert(hName == NULL);
7158 vassert(hAddr == NULL);
7159 assign(res, binop(op, argL, argR));
7160 } else {
7161 vassert(hName != NULL);
7162 vassert(hAddr != NULL);
7163 assign( res,
7164 mkIRExprCCall(
7165 Ity_I64,
7166 0/*regparms*/, hName, hAddr,
7167 mkIRExprVec_2( argL, argR )
7168 )
7169 );
7170 }
7171
7172 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
7173
7174 DIP("%s%s %s, %s\n",
7175 name, show_granularity ? nameMMXGran(opc & 3) : "",
7176 ( isReg ? nameMMXReg(eregLO3ofRM(modrm)) : dis_buf ),
7177 nameMMXReg(gregLO3ofRM(modrm)) );
7178
7179 return delta;
7180}
7181
7182
7183/* Vector by scalar shift of G by the amount specified at the bottom
7184 of E. This is a straight copy of dis_SSE_shiftG_byE. */
7185
sewardj2e28ac42008-12-04 00:05:12 +00007186static ULong dis_MMX_shiftG_byE ( VexAbiInfo* vbi,
7187 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00007188 const HChar* opname, IROp op )
sewardj8711f662005-05-09 17:52:56 +00007189{
7190 HChar dis_buf[50];
7191 Int alen, size;
7192 IRTemp addr;
7193 Bool shl, shr, sar;
7194 UChar rm = getUChar(delta);
7195 IRTemp g0 = newTemp(Ity_I64);
7196 IRTemp g1 = newTemp(Ity_I64);
7197 IRTemp amt = newTemp(Ity_I64);
7198 IRTemp amt8 = newTemp(Ity_I8);
7199
7200 if (epartIsReg(rm)) {
7201 assign( amt, getMMXReg(eregLO3ofRM(rm)) );
7202 DIP("%s %s,%s\n", opname,
7203 nameMMXReg(eregLO3ofRM(rm)),
7204 nameMMXReg(gregLO3ofRM(rm)) );
7205 delta++;
7206 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007207 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj8711f662005-05-09 17:52:56 +00007208 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
7209 DIP("%s %s,%s\n", opname,
7210 dis_buf,
7211 nameMMXReg(gregLO3ofRM(rm)) );
7212 delta += alen;
7213 }
7214 assign( g0, getMMXReg(gregLO3ofRM(rm)) );
7215 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
7216
7217 shl = shr = sar = False;
7218 size = 0;
7219 switch (op) {
7220 case Iop_ShlN16x4: shl = True; size = 32; break;
7221 case Iop_ShlN32x2: shl = True; size = 32; break;
7222 case Iop_Shl64: shl = True; size = 64; break;
7223 case Iop_ShrN16x4: shr = True; size = 16; break;
7224 case Iop_ShrN32x2: shr = True; size = 32; break;
7225 case Iop_Shr64: shr = True; size = 64; break;
7226 case Iop_SarN16x4: sar = True; size = 16; break;
7227 case Iop_SarN32x2: sar = True; size = 32; break;
7228 default: vassert(0);
7229 }
7230
7231 if (shl || shr) {
7232 assign(
7233 g1,
florian99dd03e2013-01-29 03:56:06 +00007234 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00007235 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)),
florian99dd03e2013-01-29 03:56:06 +00007236 binop(op, mkexpr(g0), mkexpr(amt8)),
7237 mkU64(0)
sewardj8711f662005-05-09 17:52:56 +00007238 )
7239 );
7240 } else
7241 if (sar) {
7242 assign(
7243 g1,
florian99dd03e2013-01-29 03:56:06 +00007244 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00007245 binop(Iop_CmpLT64U,mkexpr(amt),mkU64(size)),
florian99dd03e2013-01-29 03:56:06 +00007246 binop(op, mkexpr(g0), mkexpr(amt8)),
7247 binop(op, mkexpr(g0), mkU8(size-1))
sewardj8711f662005-05-09 17:52:56 +00007248 )
7249 );
7250 } else {
7251 vassert(0);
7252 }
7253
7254 putMMXReg( gregLO3ofRM(rm), mkexpr(g1) );
7255 return delta;
7256}
7257
7258
sewardj3d8107c2005-05-09 22:23:38 +00007259/* Vector by scalar shift of E by an immediate byte. This is a
7260 straight copy of dis_SSE_shiftE_imm. */
7261
7262static
florian55085f82012-11-21 00:36:55 +00007263ULong dis_MMX_shiftE_imm ( Long delta, const HChar* opname, IROp op )
sewardj3d8107c2005-05-09 22:23:38 +00007264{
7265 Bool shl, shr, sar;
7266 UChar rm = getUChar(delta);
7267 IRTemp e0 = newTemp(Ity_I64);
7268 IRTemp e1 = newTemp(Ity_I64);
7269 UChar amt, size;
7270 vassert(epartIsReg(rm));
7271 vassert(gregLO3ofRM(rm) == 2
7272 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
sewardjca673ab2005-05-11 10:03:08 +00007273 amt = getUChar(delta+1);
sewardj3d8107c2005-05-09 22:23:38 +00007274 delta += 2;
7275 DIP("%s $%d,%s\n", opname,
7276 (Int)amt,
7277 nameMMXReg(eregLO3ofRM(rm)) );
7278
7279 assign( e0, getMMXReg(eregLO3ofRM(rm)) );
7280
7281 shl = shr = sar = False;
7282 size = 0;
7283 switch (op) {
7284 case Iop_ShlN16x4: shl = True; size = 16; break;
7285 case Iop_ShlN32x2: shl = True; size = 32; break;
7286 case Iop_Shl64: shl = True; size = 64; break;
7287 case Iop_SarN16x4: sar = True; size = 16; break;
7288 case Iop_SarN32x2: sar = True; size = 32; break;
7289 case Iop_ShrN16x4: shr = True; size = 16; break;
7290 case Iop_ShrN32x2: shr = True; size = 32; break;
7291 case Iop_Shr64: shr = True; size = 64; break;
7292 default: vassert(0);
7293 }
7294
7295 if (shl || shr) {
7296 assign( e1, amt >= size
7297 ? mkU64(0)
7298 : binop(op, mkexpr(e0), mkU8(amt))
7299 );
7300 } else
7301 if (sar) {
7302 assign( e1, amt >= size
7303 ? binop(op, mkexpr(e0), mkU8(size-1))
7304 : binop(op, mkexpr(e0), mkU8(amt))
7305 );
7306 } else {
7307 vassert(0);
7308 }
7309
7310 putMMXReg( eregLO3ofRM(rm), mkexpr(e1) );
7311 return delta;
7312}
sewardj8711f662005-05-09 17:52:56 +00007313
7314
7315/* Completely handle all MMX instructions except emms. */
7316
7317static
sewardj2e28ac42008-12-04 00:05:12 +00007318ULong dis_MMX ( Bool* decode_ok,
7319 VexAbiInfo* vbi, Prefix pfx, Int sz, Long delta )
sewardj8711f662005-05-09 17:52:56 +00007320{
7321 Int len;
7322 UChar modrm;
7323 HChar dis_buf[50];
7324 UChar opc = getUChar(delta);
7325 delta++;
7326
7327 /* dis_MMX handles all insns except emms. */
7328 do_MMX_preamble();
7329
7330 switch (opc) {
7331
sewardj3d8107c2005-05-09 22:23:38 +00007332 case 0x6E:
7333 if (sz == 4) {
7334 /* MOVD (src)ireg32-or-mem32 (E), (dst)mmxreg (G)*/
7335 modrm = getUChar(delta);
7336 if (epartIsReg(modrm)) {
7337 delta++;
7338 putMMXReg(
7339 gregLO3ofRM(modrm),
7340 binop( Iop_32HLto64,
7341 mkU32(0),
7342 getIReg32(eregOfRexRM(pfx,modrm)) ) );
7343 DIP("movd %s, %s\n",
7344 nameIReg32(eregOfRexRM(pfx,modrm)),
7345 nameMMXReg(gregLO3ofRM(modrm)));
7346 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007347 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj3d8107c2005-05-09 22:23:38 +00007348 delta += len;
7349 putMMXReg(
7350 gregLO3ofRM(modrm),
7351 binop( Iop_32HLto64,
7352 mkU32(0),
7353 loadLE(Ity_I32, mkexpr(addr)) ) );
7354 DIP("movd %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7355 }
7356 }
7357 else
7358 if (sz == 8) {
7359 /* MOVD (src)ireg64-or-mem64 (E), (dst)mmxreg (G)*/
7360 modrm = getUChar(delta);
7361 if (epartIsReg(modrm)) {
7362 delta++;
7363 putMMXReg( gregLO3ofRM(modrm),
7364 getIReg64(eregOfRexRM(pfx,modrm)) );
7365 DIP("movd %s, %s\n",
7366 nameIReg64(eregOfRexRM(pfx,modrm)),
7367 nameMMXReg(gregLO3ofRM(modrm)));
7368 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007369 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj3d8107c2005-05-09 22:23:38 +00007370 delta += len;
7371 putMMXReg( gregLO3ofRM(modrm),
7372 loadLE(Ity_I64, mkexpr(addr)) );
7373 DIP("movd{64} %s, %s\n", dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7374 }
7375 }
7376 else {
7377 goto mmx_decode_failure;
7378 }
7379 break;
7380
7381 case 0x7E:
7382 if (sz == 4) {
7383 /* MOVD (src)mmxreg (G), (dst)ireg32-or-mem32 (E) */
7384 modrm = getUChar(delta);
7385 if (epartIsReg(modrm)) {
7386 delta++;
7387 putIReg32( eregOfRexRM(pfx,modrm),
7388 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
7389 DIP("movd %s, %s\n",
7390 nameMMXReg(gregLO3ofRM(modrm)),
7391 nameIReg32(eregOfRexRM(pfx,modrm)));
7392 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007393 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj3d8107c2005-05-09 22:23:38 +00007394 delta += len;
7395 storeLE( mkexpr(addr),
7396 unop(Iop_64to32, getMMXReg(gregLO3ofRM(modrm)) ) );
7397 DIP("movd %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7398 }
7399 }
7400 else
7401 if (sz == 8) {
7402 /* MOVD (src)mmxreg (G), (dst)ireg64-or-mem64 (E) */
7403 modrm = getUChar(delta);
7404 if (epartIsReg(modrm)) {
7405 delta++;
7406 putIReg64( eregOfRexRM(pfx,modrm),
7407 getMMXReg(gregLO3ofRM(modrm)) );
7408 DIP("movd %s, %s\n",
7409 nameMMXReg(gregLO3ofRM(modrm)),
7410 nameIReg64(eregOfRexRM(pfx,modrm)));
7411 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007412 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj3d8107c2005-05-09 22:23:38 +00007413 delta += len;
7414 storeLE( mkexpr(addr),
7415 getMMXReg(gregLO3ofRM(modrm)) );
7416 DIP("movd{64} %s, %s\n", nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7417 }
7418 } else {
7419 goto mmx_decode_failure;
7420 }
7421 break;
sewardj8711f662005-05-09 17:52:56 +00007422
7423 case 0x6F:
7424 /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
sewardj7bdd1bc2008-12-13 16:49:46 +00007425 if (sz != 4
7426 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
sewardj8711f662005-05-09 17:52:56 +00007427 goto mmx_decode_failure;
7428 modrm = getUChar(delta);
7429 if (epartIsReg(modrm)) {
7430 delta++;
7431 putMMXReg( gregLO3ofRM(modrm), getMMXReg(eregLO3ofRM(modrm)) );
7432 DIP("movq %s, %s\n",
7433 nameMMXReg(eregLO3ofRM(modrm)),
7434 nameMMXReg(gregLO3ofRM(modrm)));
7435 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007436 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8711f662005-05-09 17:52:56 +00007437 delta += len;
7438 putMMXReg( gregLO3ofRM(modrm), loadLE(Ity_I64, mkexpr(addr)) );
7439 DIP("movq %s, %s\n",
7440 dis_buf, nameMMXReg(gregLO3ofRM(modrm)));
7441 }
7442 break;
7443
7444 case 0x7F:
7445 /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
sewardj7bdd1bc2008-12-13 16:49:46 +00007446 if (sz != 4
7447 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
sewardj8711f662005-05-09 17:52:56 +00007448 goto mmx_decode_failure;
7449 modrm = getUChar(delta);
7450 if (epartIsReg(modrm)) {
sewardjf2d61c42012-08-23 19:00:06 +00007451 delta++;
7452 putMMXReg( eregLO3ofRM(modrm), getMMXReg(gregLO3ofRM(modrm)) );
7453 DIP("movq %s, %s\n",
7454 nameMMXReg(gregLO3ofRM(modrm)),
7455 nameMMXReg(eregLO3ofRM(modrm)));
sewardj8711f662005-05-09 17:52:56 +00007456 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007457 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj8711f662005-05-09 17:52:56 +00007458 delta += len;
7459 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
7460 DIP("mov(nt)q %s, %s\n",
7461 nameMMXReg(gregLO3ofRM(modrm)), dis_buf);
7462 }
7463 break;
7464
7465 case 0xFC:
7466 case 0xFD:
7467 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
7468 if (sz != 4)
7469 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007470 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padd", True );
sewardj8711f662005-05-09 17:52:56 +00007471 break;
7472
7473 case 0xEC:
7474 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
sewardj7bdd1bc2008-12-13 16:49:46 +00007475 if (sz != 4
7476 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
sewardj8711f662005-05-09 17:52:56 +00007477 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007478 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "padds", True );
sewardj8711f662005-05-09 17:52:56 +00007479 break;
7480
7481 case 0xDC:
7482 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7483 if (sz != 4)
7484 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007485 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "paddus", True );
sewardj8711f662005-05-09 17:52:56 +00007486 break;
7487
7488 case 0xF8:
7489 case 0xF9:
7490 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
7491 if (sz != 4)
7492 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007493 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psub", True );
sewardj8711f662005-05-09 17:52:56 +00007494 break;
7495
7496 case 0xE8:
7497 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
7498 if (sz != 4)
7499 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007500 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubs", True );
sewardj8711f662005-05-09 17:52:56 +00007501 break;
7502
7503 case 0xD8:
7504 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
7505 if (sz != 4)
7506 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007507 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "psubus", True );
sewardj8711f662005-05-09 17:52:56 +00007508 break;
7509
7510 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
7511 if (sz != 4)
7512 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007513 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmulhw", False );
sewardj8711f662005-05-09 17:52:56 +00007514 break;
7515
7516 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
7517 if (sz != 4)
7518 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007519 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmullw", False );
sewardj8711f662005-05-09 17:52:56 +00007520 break;
7521
7522 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
7523 vassert(sz == 4);
sewardj2e28ac42008-12-04 00:05:12 +00007524 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pmaddwd", False );
sewardj8711f662005-05-09 17:52:56 +00007525 break;
7526
7527 case 0x74:
7528 case 0x75:
7529 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
7530 if (sz != 4)
7531 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007532 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpeq", True );
sewardj8711f662005-05-09 17:52:56 +00007533 break;
7534
7535 case 0x64:
7536 case 0x65:
7537 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
7538 if (sz != 4)
7539 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007540 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pcmpgt", True );
sewardj8711f662005-05-09 17:52:56 +00007541 break;
7542
7543 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
7544 if (sz != 4)
7545 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007546 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packssdw", False );
sewardj8711f662005-05-09 17:52:56 +00007547 break;
7548
7549 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
7550 if (sz != 4)
7551 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007552 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packsswb", False );
sewardj8711f662005-05-09 17:52:56 +00007553 break;
7554
7555 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
7556 if (sz != 4)
7557 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007558 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "packuswb", False );
sewardj8711f662005-05-09 17:52:56 +00007559 break;
7560
7561 case 0x68:
7562 case 0x69:
7563 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
sewardj23063322009-01-24 10:34:19 +00007564 if (sz != 4
7565 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
sewardj8711f662005-05-09 17:52:56 +00007566 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007567 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckh", True );
sewardj8711f662005-05-09 17:52:56 +00007568 break;
7569
7570 case 0x60:
7571 case 0x61:
7572 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
sewardj7bdd1bc2008-12-13 16:49:46 +00007573 if (sz != 4
7574 && /*ignore redundant REX.W*/!(sz==8 && haveNo66noF2noF3(pfx)))
sewardj8711f662005-05-09 17:52:56 +00007575 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007576 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "punpckl", True );
sewardj8711f662005-05-09 17:52:56 +00007577 break;
7578
7579 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
7580 if (sz != 4)
7581 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007582 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pand", False );
sewardj8711f662005-05-09 17:52:56 +00007583 break;
7584
7585 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
7586 if (sz != 4)
7587 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007588 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pandn", False );
sewardj8711f662005-05-09 17:52:56 +00007589 break;
7590
7591 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
7592 if (sz != 4)
7593 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007594 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "por", False );
sewardj8711f662005-05-09 17:52:56 +00007595 break;
7596
7597 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
7598 if (sz != 4)
7599 goto mmx_decode_failure;
sewardj2e28ac42008-12-04 00:05:12 +00007600 delta = dis_MMXop_regmem_to_reg ( vbi, pfx, delta, opc, "pxor", False );
sewardj8711f662005-05-09 17:52:56 +00007601 break;
7602
sewardj2e28ac42008-12-04 00:05:12 +00007603# define SHIFT_BY_REG(_name,_op) \
7604 delta = dis_MMX_shiftG_byE(vbi, pfx, delta, _name, _op); \
sewardj8711f662005-05-09 17:52:56 +00007605 break;
7606
7607 /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
7608 case 0xF1: SHIFT_BY_REG("psllw", Iop_ShlN16x4);
7609 case 0xF2: SHIFT_BY_REG("pslld", Iop_ShlN32x2);
7610 case 0xF3: SHIFT_BY_REG("psllq", Iop_Shl64);
7611
7612 /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
7613 case 0xD1: SHIFT_BY_REG("psrlw", Iop_ShrN16x4);
7614 case 0xD2: SHIFT_BY_REG("psrld", Iop_ShrN32x2);
7615 case 0xD3: SHIFT_BY_REG("psrlq", Iop_Shr64);
7616
7617 /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
7618 case 0xE1: SHIFT_BY_REG("psraw", Iop_SarN16x4);
7619 case 0xE2: SHIFT_BY_REG("psrad", Iop_SarN32x2);
7620
7621# undef SHIFT_BY_REG
sewardj3d8107c2005-05-09 22:23:38 +00007622
7623 case 0x71:
7624 case 0x72:
7625 case 0x73: {
7626 /* (sz==4): PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
sewardjca673ab2005-05-11 10:03:08 +00007627 UChar byte2, subopc;
sewardj3d8107c2005-05-09 22:23:38 +00007628 if (sz != 4)
7629 goto mmx_decode_failure;
sewardjca673ab2005-05-11 10:03:08 +00007630 byte2 = getUChar(delta); /* amode / sub-opcode */
7631 subopc = toUChar( (byte2 >> 3) & 7 );
sewardj3d8107c2005-05-09 22:23:38 +00007632
7633# define SHIFT_BY_IMM(_name,_op) \
7634 do { delta = dis_MMX_shiftE_imm(delta,_name,_op); \
7635 } while (0)
7636
7637 if (subopc == 2 /*SRL*/ && opc == 0x71)
7638 SHIFT_BY_IMM("psrlw", Iop_ShrN16x4);
7639 else if (subopc == 2 /*SRL*/ && opc == 0x72)
7640 SHIFT_BY_IMM("psrld", Iop_ShrN32x2);
7641 else if (subopc == 2 /*SRL*/ && opc == 0x73)
7642 SHIFT_BY_IMM("psrlq", Iop_Shr64);
7643
7644 else if (subopc == 4 /*SAR*/ && opc == 0x71)
7645 SHIFT_BY_IMM("psraw", Iop_SarN16x4);
7646 else if (subopc == 4 /*SAR*/ && opc == 0x72)
7647 SHIFT_BY_IMM("psrad", Iop_SarN32x2);
7648
7649 else if (subopc == 6 /*SHL*/ && opc == 0x71)
7650 SHIFT_BY_IMM("psllw", Iop_ShlN16x4);
7651 else if (subopc == 6 /*SHL*/ && opc == 0x72)
7652 SHIFT_BY_IMM("pslld", Iop_ShlN32x2);
7653 else if (subopc == 6 /*SHL*/ && opc == 0x73)
7654 SHIFT_BY_IMM("psllq", Iop_Shl64);
7655
7656 else goto mmx_decode_failure;
7657
7658# undef SHIFT_BY_IMM
7659 break;
7660 }
sewardj8711f662005-05-09 17:52:56 +00007661
sewardj02f79f12007-09-01 18:59:53 +00007662 case 0xF7: {
7663 IRTemp addr = newTemp(Ity_I64);
7664 IRTemp regD = newTemp(Ity_I64);
7665 IRTemp regM = newTemp(Ity_I64);
7666 IRTemp mask = newTemp(Ity_I64);
7667 IRTemp olddata = newTemp(Ity_I64);
7668 IRTemp newdata = newTemp(Ity_I64);
7669
7670 modrm = getUChar(delta);
7671 if (sz != 4 || (!epartIsReg(modrm)))
7672 goto mmx_decode_failure;
7673 delta++;
7674
sewardj2e28ac42008-12-04 00:05:12 +00007675 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
sewardj02f79f12007-09-01 18:59:53 +00007676 assign( regM, getMMXReg( eregLO3ofRM(modrm) ));
7677 assign( regD, getMMXReg( gregLO3ofRM(modrm) ));
7678 assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
7679 assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
7680 assign( newdata,
7681 binop(Iop_Or64,
7682 binop(Iop_And64,
7683 mkexpr(regD),
7684 mkexpr(mask) ),
7685 binop(Iop_And64,
7686 mkexpr(olddata),
7687 unop(Iop_Not64, mkexpr(mask)))) );
7688 storeLE( mkexpr(addr), mkexpr(newdata) );
7689 DIP("maskmovq %s,%s\n", nameMMXReg( eregLO3ofRM(modrm) ),
7690 nameMMXReg( gregLO3ofRM(modrm) ) );
7691 break;
7692 }
7693
sewardj8711f662005-05-09 17:52:56 +00007694 /* --- MMX decode failure --- */
7695 default:
7696 mmx_decode_failure:
7697 *decode_ok = False;
7698 return delta; /* ignored */
7699
7700 }
7701
7702 *decode_ok = True;
7703 return delta;
7704}
7705
7706
sewardj33ef9c22005-11-04 20:05:57 +00007707/*------------------------------------------------------------*/
7708/*--- More misc arithmetic and other obscure insns. ---*/
7709/*------------------------------------------------------------*/
7710
7711/* Generate base << amt with vacated places filled with stuff
7712 from xtra. amt guaranteed in 0 .. 63. */
7713static
7714IRExpr* shiftL64_with_extras ( IRTemp base, IRTemp xtra, IRTemp amt )
7715{
7716 /* if amt == 0
7717 then base
7718 else (base << amt) | (xtra >>u (64-amt))
7719 */
7720 return
florian99dd03e2013-01-29 03:56:06 +00007721 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00007722 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)),
sewardj33ef9c22005-11-04 20:05:57 +00007723 binop(Iop_Or64,
7724 binop(Iop_Shl64, mkexpr(base), mkexpr(amt)),
7725 binop(Iop_Shr64, mkexpr(xtra),
7726 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
florian99dd03e2013-01-29 03:56:06 +00007727 ),
7728 mkexpr(base)
sewardj33ef9c22005-11-04 20:05:57 +00007729 );
7730}
7731
7732/* Generate base >>u amt with vacated places filled with stuff
7733 from xtra. amt guaranteed in 0 .. 63. */
7734static
7735IRExpr* shiftR64_with_extras ( IRTemp xtra, IRTemp base, IRTemp amt )
7736{
7737 /* if amt == 0
7738 then base
7739 else (base >>u amt) | (xtra << (64-amt))
7740 */
7741 return
florian99dd03e2013-01-29 03:56:06 +00007742 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00007743 binop(Iop_CmpNE8, mkexpr(amt), mkU8(0)),
sewardj33ef9c22005-11-04 20:05:57 +00007744 binop(Iop_Or64,
7745 binop(Iop_Shr64, mkexpr(base), mkexpr(amt)),
7746 binop(Iop_Shl64, mkexpr(xtra),
7747 binop(Iop_Sub8, mkU8(64), mkexpr(amt)))
florian99dd03e2013-01-29 03:56:06 +00007748 ),
7749 mkexpr(base)
sewardj33ef9c22005-11-04 20:05:57 +00007750 );
7751}
7752
7753/* Double length left and right shifts. Apparently only required in
7754 v-size (no b- variant). */
7755static
sewardj2e28ac42008-12-04 00:05:12 +00007756ULong dis_SHLRD_Gv_Ev ( VexAbiInfo* vbi,
7757 Prefix pfx,
sewardj33ef9c22005-11-04 20:05:57 +00007758 Long delta, UChar modrm,
7759 Int sz,
7760 IRExpr* shift_amt,
7761 Bool amt_is_literal,
florian55085f82012-11-21 00:36:55 +00007762 const HChar* shift_amt_txt,
sewardj33ef9c22005-11-04 20:05:57 +00007763 Bool left_shift )
7764{
7765 /* shift_amt :: Ity_I8 is the amount to shift. shift_amt_txt is used
7766 for printing it. And eip on entry points at the modrm byte. */
7767 Int len;
7768 HChar dis_buf[50];
7769
7770 IRType ty = szToITy(sz);
7771 IRTemp gsrc = newTemp(ty);
7772 IRTemp esrc = newTemp(ty);
7773 IRTemp addr = IRTemp_INVALID;
7774 IRTemp tmpSH = newTemp(Ity_I8);
7775 IRTemp tmpSS = newTemp(Ity_I8);
7776 IRTemp tmp64 = IRTemp_INVALID;
7777 IRTemp res64 = IRTemp_INVALID;
7778 IRTemp rss64 = IRTemp_INVALID;
7779 IRTemp resTy = IRTemp_INVALID;
7780 IRTemp rssTy = IRTemp_INVALID;
7781 Int mask = sz==8 ? 63 : 31;
7782
7783 vassert(sz == 2 || sz == 4 || sz == 8);
7784
7785 /* The E-part is the destination; this is shifted. The G-part
7786 supplies bits to be shifted into the E-part, but is not
7787 changed.
7788
7789 If shifting left, form a double-length word with E at the top
7790 and G at the bottom, and shift this left. The result is then in
7791 the high part.
7792
7793 If shifting right, form a double-length word with G at the top
7794 and E at the bottom, and shift this right. The result is then
7795 at the bottom. */
7796
7797 /* Fetch the operands. */
7798
7799 assign( gsrc, getIRegG(sz, pfx, modrm) );
7800
7801 if (epartIsReg(modrm)) {
7802 delta++;
7803 assign( esrc, getIRegE(sz, pfx, modrm) );
7804 DIP("sh%cd%c %s, %s, %s\n",
7805 ( left_shift ? 'l' : 'r' ), nameISize(sz),
7806 shift_amt_txt,
7807 nameIRegG(sz, pfx, modrm), nameIRegE(sz, pfx, modrm));
7808 } else {
sewardj2e28ac42008-12-04 00:05:12 +00007809 addr = disAMode ( &len, vbi, pfx, delta, dis_buf,
sewardj75ce3652005-11-04 20:49:36 +00007810 /* # bytes following amode */
7811 amt_is_literal ? 1 : 0 );
sewardj33ef9c22005-11-04 20:05:57 +00007812 delta += len;
7813 assign( esrc, loadLE(ty, mkexpr(addr)) );
7814 DIP("sh%cd%c %s, %s, %s\n",
7815 ( left_shift ? 'l' : 'r' ), nameISize(sz),
7816 shift_amt_txt,
7817 nameIRegG(sz, pfx, modrm), dis_buf);
7818 }
7819
7820 /* Calculate the masked shift amount (tmpSH), the masked subshift
7821 amount (tmpSS), the shifted value (res64) and the subshifted
7822 value (rss64). */
7823
7824 assign( tmpSH, binop(Iop_And8, shift_amt, mkU8(mask)) );
7825 assign( tmpSS, binop(Iop_And8,
7826 binop(Iop_Sub8, mkexpr(tmpSH), mkU8(1) ),
7827 mkU8(mask)));
7828
7829 tmp64 = newTemp(Ity_I64);
7830 res64 = newTemp(Ity_I64);
7831 rss64 = newTemp(Ity_I64);
7832
7833 if (sz == 2 || sz == 4) {
7834
7835 /* G is xtra; E is data */
7836 /* what a freaking nightmare: */
7837 if (sz == 4 && left_shift) {
7838 assign( tmp64, binop(Iop_32HLto64, mkexpr(esrc), mkexpr(gsrc)) );
7839 assign( res64,
7840 binop(Iop_Shr64,
7841 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
7842 mkU8(32)) );
7843 assign( rss64,
7844 binop(Iop_Shr64,
7845 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSS)),
7846 mkU8(32)) );
7847 }
7848 else
7849 if (sz == 4 && !left_shift) {
7850 assign( tmp64, binop(Iop_32HLto64, mkexpr(gsrc), mkexpr(esrc)) );
7851 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
7852 assign( rss64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSS)) );
7853 }
7854 else
7855 if (sz == 2 && left_shift) {
7856 assign( tmp64,
7857 binop(Iop_32HLto64,
7858 binop(Iop_16HLto32, mkexpr(esrc), mkexpr(gsrc)),
7859 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc))
7860 ));
sewardjcc3d2192013-03-27 11:37:33 +00007861 /* result formed by shifting [esrc'gsrc'gsrc'gsrc] */
sewardj33ef9c22005-11-04 20:05:57 +00007862 assign( res64,
7863 binop(Iop_Shr64,
7864 binop(Iop_Shl64, mkexpr(tmp64), mkexpr(tmpSH)),
7865 mkU8(48)) );
7866 /* subshift formed by shifting [esrc'0000'0000'0000] */
7867 assign( rss64,
7868 binop(Iop_Shr64,
7869 binop(Iop_Shl64,
7870 binop(Iop_Shl64, unop(Iop_16Uto64, mkexpr(esrc)),
7871 mkU8(48)),
7872 mkexpr(tmpSS)),
7873 mkU8(48)) );
7874 }
7875 else
7876 if (sz == 2 && !left_shift) {
7877 assign( tmp64,
7878 binop(Iop_32HLto64,
7879 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(gsrc)),
7880 binop(Iop_16HLto32, mkexpr(gsrc), mkexpr(esrc))
7881 ));
7882 /* result formed by shifting [gsrc'gsrc'gsrc'esrc] */
7883 assign( res64, binop(Iop_Shr64, mkexpr(tmp64), mkexpr(tmpSH)) );
7884 /* subshift formed by shifting [0000'0000'0000'esrc] */
7885 assign( rss64, binop(Iop_Shr64,
7886 unop(Iop_16Uto64, mkexpr(esrc)),
7887 mkexpr(tmpSS)) );
7888 }
7889
7890 } else {
7891
7892 vassert(sz == 8);
7893 if (left_shift) {
7894 assign( res64, shiftL64_with_extras( esrc, gsrc, tmpSH ));
7895 assign( rss64, shiftL64_with_extras( esrc, gsrc, tmpSS ));
7896 } else {
7897 assign( res64, shiftR64_with_extras( gsrc, esrc, tmpSH ));
7898 assign( rss64, shiftR64_with_extras( gsrc, esrc, tmpSS ));
7899 }
7900
7901 }
7902
7903 resTy = newTemp(ty);
7904 rssTy = newTemp(ty);
7905 assign( resTy, narrowTo(ty, mkexpr(res64)) );
7906 assign( rssTy, narrowTo(ty, mkexpr(rss64)) );
7907
7908 /* Put result back and write the flags thunk. */
7909 setFlags_DEP1_DEP2_shift ( left_shift ? Iop_Shl64 : Iop_Sar64,
7910 resTy, rssTy, ty, tmpSH );
7911
7912 if (epartIsReg(modrm)) {
7913 putIRegE(sz, pfx, modrm, mkexpr(resTy));
7914 } else {
7915 storeLE( mkexpr(addr), mkexpr(resTy) );
7916 }
7917
7918 if (amt_is_literal) delta++;
7919 return delta;
7920}
sewardj9ed16802005-08-24 10:46:19 +00007921
7922
7923/* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not
7924 required. */
7925
7926typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp;
7927
florian55085f82012-11-21 00:36:55 +00007928static const HChar* nameBtOp ( BtOp op )
sewardj9ed16802005-08-24 10:46:19 +00007929{
7930 switch (op) {
7931 case BtOpNone: return "";
7932 case BtOpSet: return "s";
7933 case BtOpReset: return "r";
7934 case BtOpComp: return "c";
7935 default: vpanic("nameBtOp(amd64)");
7936 }
7937}
7938
7939
7940static
sewardj2e28ac42008-12-04 00:05:12 +00007941ULong dis_bt_G_E ( VexAbiInfo* vbi,
sewardj38b1d692013-10-15 17:21:42 +00007942 Prefix pfx, Int sz, Long delta, BtOp op,
7943 /*OUT*/Bool* decode_OK )
sewardj9ed16802005-08-24 10:46:19 +00007944{
7945 HChar dis_buf[50];
7946 UChar modrm;
7947 Int len;
7948 IRTemp t_fetched, t_bitno0, t_bitno1, t_bitno2, t_addr0,
sewardj38b1d692013-10-15 17:21:42 +00007949 t_addr1, t_rsp, t_mask, t_new;
sewardj9ed16802005-08-24 10:46:19 +00007950
7951 vassert(sz == 2 || sz == 4 || sz == 8);
7952
7953 t_fetched = t_bitno0 = t_bitno1 = t_bitno2
sewardje9d8a262009-07-01 08:06:34 +00007954 = t_addr0 = t_addr1 = t_rsp
7955 = t_mask = t_new = IRTemp_INVALID;
sewardj9ed16802005-08-24 10:46:19 +00007956
7957 t_fetched = newTemp(Ity_I8);
sewardje9d8a262009-07-01 08:06:34 +00007958 t_new = newTemp(Ity_I8);
sewardj9ed16802005-08-24 10:46:19 +00007959 t_bitno0 = newTemp(Ity_I64);
7960 t_bitno1 = newTemp(Ity_I64);
7961 t_bitno2 = newTemp(Ity_I8);
7962 t_addr1 = newTemp(Ity_I64);
7963 modrm = getUChar(delta);
7964
sewardj38b1d692013-10-15 17:21:42 +00007965 *decode_OK = True;
7966 if (epartIsReg(modrm)) {
7967 /* F2 and F3 are never acceptable. */
7968 if (haveF2orF3(pfx)) {
7969 *decode_OK = False;
7970 return delta;
7971 }
7972 } else {
7973 /* F2 or F3 (but not both) are allowed, provided LOCK is also
7974 present, and only for the BTC/BTS/BTR cases (not BT). */
7975 if (haveF2orF3(pfx)) {
7976 if (haveF2andF3(pfx) || !haveLOCK(pfx) || op == BtOpNone) {
7977 *decode_OK = False;
7978 return delta;
7979 }
7980 }
7981 }
7982
sewardj9ed16802005-08-24 10:46:19 +00007983 assign( t_bitno0, widenSto64(getIRegG(sz, pfx, modrm)) );
7984
7985 if (epartIsReg(modrm)) {
7986 delta++;
sewardj02834302010-07-29 18:10:51 +00007987 /* Get it onto the client's stack. Oh, this is a horrible
7988 kludge. See https://bugs.kde.org/show_bug.cgi?id=245925.
7989 Because of the ELF ABI stack redzone, there may be live data
7990 up to 128 bytes below %RSP. So we can't just push it on the
7991 stack, else we may wind up trashing live data, and causing
7992 impossible-to-find simulation errors. (Yes, this did
7993 happen.) So we need to drop RSP before at least 128 before
7994 pushing it. That unfortunately means hitting Memcheck's
7995 fast-case painting code. Ideally we should drop more than
7996 128, to reduce the chances of breaking buggy programs that
7997 have live data below -128(%RSP). Memcheck fast-cases moves
7998 of 288 bytes due to the need to handle ppc64-linux quickly,
7999 so let's use 288. Of course the real fix is to get rid of
8000 this kludge entirely. */
sewardj9ed16802005-08-24 10:46:19 +00008001 t_rsp = newTemp(Ity_I64);
8002 t_addr0 = newTemp(Ity_I64);
8003
sewardj02834302010-07-29 18:10:51 +00008004 vassert(vbi->guest_stack_redzone_size == 128);
8005 assign( t_rsp, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(288)) );
sewardj9ed16802005-08-24 10:46:19 +00008006 putIReg64(R_RSP, mkexpr(t_rsp));
8007
8008 storeLE( mkexpr(t_rsp), getIRegE(sz, pfx, modrm) );
8009
8010 /* Make t_addr0 point at it. */
8011 assign( t_addr0, mkexpr(t_rsp) );
8012
8013 /* Mask out upper bits of the shift amount, since we're doing a
8014 reg. */
8015 assign( t_bitno1, binop(Iop_And64,
8016 mkexpr(t_bitno0),
8017 mkU64(sz == 8 ? 63 : sz == 4 ? 31 : 15)) );
8018
8019 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008020 t_addr0 = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
sewardj9ed16802005-08-24 10:46:19 +00008021 delta += len;
8022 assign( t_bitno1, mkexpr(t_bitno0) );
8023 }
8024
8025 /* At this point: t_addr0 is the address being operated on. If it
8026 was a reg, we will have pushed it onto the client's stack.
8027 t_bitno1 is the bit number, suitably masked in the case of a
8028 reg. */
8029
8030 /* Now the main sequence. */
8031 assign( t_addr1,
8032 binop(Iop_Add64,
8033 mkexpr(t_addr0),
8034 binop(Iop_Sar64, mkexpr(t_bitno1), mkU8(3))) );
8035
8036 /* t_addr1 now holds effective address */
8037
8038 assign( t_bitno2,
8039 unop(Iop_64to8,
8040 binop(Iop_And64, mkexpr(t_bitno1), mkU64(7))) );
8041
8042 /* t_bitno2 contains offset of bit within byte */
8043
8044 if (op != BtOpNone) {
8045 t_mask = newTemp(Ity_I8);
8046 assign( t_mask, binop(Iop_Shl8, mkU8(1), mkexpr(t_bitno2)) );
8047 }
8048
8049 /* t_mask is now a suitable byte mask */
8050
8051 assign( t_fetched, loadLE(Ity_I8, mkexpr(t_addr1)) );
8052
8053 if (op != BtOpNone) {
8054 switch (op) {
sewardje9d8a262009-07-01 08:06:34 +00008055 case BtOpSet:
8056 assign( t_new,
8057 binop(Iop_Or8, mkexpr(t_fetched), mkexpr(t_mask)) );
sewardj9ed16802005-08-24 10:46:19 +00008058 break;
sewardje9d8a262009-07-01 08:06:34 +00008059 case BtOpComp:
8060 assign( t_new,
8061 binop(Iop_Xor8, mkexpr(t_fetched), mkexpr(t_mask)) );
sewardj9ed16802005-08-24 10:46:19 +00008062 break;
sewardje9d8a262009-07-01 08:06:34 +00008063 case BtOpReset:
8064 assign( t_new,
8065 binop(Iop_And8, mkexpr(t_fetched),
8066 unop(Iop_Not8, mkexpr(t_mask))) );
sewardj9ed16802005-08-24 10:46:19 +00008067 break;
8068 default:
8069 vpanic("dis_bt_G_E(amd64)");
8070 }
sewardj6491f862013-10-15 17:29:19 +00008071 if ((haveLOCK(pfx)) && !epartIsReg(modrm)) {
sewardje9d8a262009-07-01 08:06:34 +00008072 casLE( mkexpr(t_addr1), mkexpr(t_fetched)/*expd*/,
8073 mkexpr(t_new)/*new*/,
8074 guest_RIP_curr_instr );
8075 } else {
8076 storeLE( mkexpr(t_addr1), mkexpr(t_new) );
8077 }
sewardj9ed16802005-08-24 10:46:19 +00008078 }
sewardje9d8a262009-07-01 08:06:34 +00008079
sewardj9ed16802005-08-24 10:46:19 +00008080 /* Side effect done; now get selected bit into Carry flag */
8081 /* Flags: C=selected bit, O,S,Z,A,P undefined, so are set to zero. */
8082 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8083 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8084 stmt( IRStmt_Put(
8085 OFFB_CC_DEP1,
8086 binop(Iop_And64,
8087 binop(Iop_Shr64,
8088 unop(Iop_8Uto64, mkexpr(t_fetched)),
8089 mkexpr(t_bitno2)),
8090 mkU64(1)))
8091 );
8092 /* Set NDEP even though it isn't used. This makes redundant-PUT
8093 elimination of previous stores to this field work better. */
8094 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8095
8096 /* Move reg operand from stack back to reg */
8097 if (epartIsReg(modrm)) {
sewardje9d8a262009-07-01 08:06:34 +00008098 /* t_rsp still points at it. */
sewardj25d23862006-05-12 17:47:21 +00008099 /* only write the reg if actually modifying it; doing otherwise
8100 zeroes the top half erroneously when doing btl due to
8101 standard zero-extend rule */
sewardje9d8a262009-07-01 08:06:34 +00008102 if (op != BtOpNone)
sewardj25d23862006-05-12 17:47:21 +00008103 putIRegE(sz, pfx, modrm, loadLE(szToITy(sz), mkexpr(t_rsp)) );
sewardj02834302010-07-29 18:10:51 +00008104 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t_rsp), mkU64(288)) );
sewardj9ed16802005-08-24 10:46:19 +00008105 }
8106
8107 DIP("bt%s%c %s, %s\n",
8108 nameBtOp(op), nameISize(sz), nameIRegG(sz, pfx, modrm),
8109 ( epartIsReg(modrm) ? nameIRegE(sz, pfx, modrm) : dis_buf ) );
8110
8111 return delta;
8112}
sewardjf53b7352005-04-06 20:01:56 +00008113
8114
8115
8116/* Handle BSF/BSR. Only v-size seems necessary. */
8117static
sewardj2e28ac42008-12-04 00:05:12 +00008118ULong dis_bs_E_G ( VexAbiInfo* vbi,
8119 Prefix pfx, Int sz, Long delta, Bool fwds )
sewardjf53b7352005-04-06 20:01:56 +00008120{
8121 Bool isReg;
8122 UChar modrm;
8123 HChar dis_buf[50];
8124
8125 IRType ty = szToITy(sz);
8126 IRTemp src = newTemp(ty);
8127 IRTemp dst = newTemp(ty);
8128 IRTemp src64 = newTemp(Ity_I64);
8129 IRTemp dst64 = newTemp(Ity_I64);
sewardj009230b2013-01-26 11:47:55 +00008130 IRTemp srcB = newTemp(Ity_I1);
sewardjf53b7352005-04-06 20:01:56 +00008131
8132 vassert(sz == 8 || sz == 4 || sz == 2);
8133
8134 modrm = getUChar(delta);
8135 isReg = epartIsReg(modrm);
8136 if (isReg) {
8137 delta++;
8138 assign( src, getIRegE(sz, pfx, modrm) );
8139 } else {
8140 Int len;
sewardj2e28ac42008-12-04 00:05:12 +00008141 IRTemp addr = disAMode( &len, vbi, pfx, delta, dis_buf, 0 );
sewardjf53b7352005-04-06 20:01:56 +00008142 delta += len;
8143 assign( src, loadLE(ty, mkexpr(addr)) );
8144 }
8145
8146 DIP("bs%c%c %s, %s\n",
8147 fwds ? 'f' : 'r', nameISize(sz),
8148 ( isReg ? nameIRegE(sz, pfx, modrm) : dis_buf ),
8149 nameIRegG(sz, pfx, modrm));
8150
8151 /* First, widen src to 64 bits if it is not already. */
8152 assign( src64, widenUto64(mkexpr(src)) );
8153
sewardj009230b2013-01-26 11:47:55 +00008154 /* Generate a bool expression which is zero iff the original is
sewardje13074c2012-11-08 10:57:08 +00008155 zero, and nonzero otherwise. Ask for a CmpNE version which, if
8156 instrumented by Memcheck, is instrumented expensively, since
8157 this may be used on the output of a preceding movmskb insn,
8158 which has been known to be partially defined, and in need of
8159 careful handling. */
sewardj009230b2013-01-26 11:47:55 +00008160 assign( srcB, binop(Iop_ExpCmpNE64, mkexpr(src64), mkU64(0)) );
sewardjf53b7352005-04-06 20:01:56 +00008161
8162 /* Flags: Z is 1 iff source value is zero. All others
8163 are undefined -- we force them to zero. */
8164 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8165 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8166 stmt( IRStmt_Put(
8167 OFFB_CC_DEP1,
florian99dd03e2013-01-29 03:56:06 +00008168 IRExpr_ITE( mkexpr(srcB),
8169 /* src!=0 */
8170 mkU64(0),
8171 /* src==0 */
8172 mkU64(AMD64G_CC_MASK_Z)
sewardjf53b7352005-04-06 20:01:56 +00008173 )
8174 ));
8175 /* Set NDEP even though it isn't used. This makes redundant-PUT
8176 elimination of previous stores to this field work better. */
8177 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8178
8179 /* Result: iff source value is zero, we can't use
8180 Iop_Clz64/Iop_Ctz64 as they have no defined result in that case.
8181 But anyway, amd64 semantics say the result is undefined in
8182 such situations. Hence handle the zero case specially. */
8183
8184 /* Bleh. What we compute:
8185
8186 bsf64: if src == 0 then {dst is unchanged}
8187 else Ctz64(src)
8188
8189 bsr64: if src == 0 then {dst is unchanged}
8190 else 63 - Clz64(src)
8191
8192 bsf32: if src == 0 then {dst is unchanged}
8193 else Ctz64(32Uto64(src))
8194
8195 bsr32: if src == 0 then {dst is unchanged}
8196 else 63 - Clz64(32Uto64(src))
8197
8198 bsf16: if src == 0 then {dst is unchanged}
8199 else Ctz64(32Uto64(16Uto32(src)))
8200
8201 bsr16: if src == 0 then {dst is unchanged}
8202 else 63 - Clz64(32Uto64(16Uto32(src)))
8203 */
8204
8205 /* The main computation, guarding against zero. */
8206 assign( dst64,
florian99dd03e2013-01-29 03:56:06 +00008207 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00008208 mkexpr(srcB),
sewardjf53b7352005-04-06 20:01:56 +00008209 /* src != 0 */
8210 fwds ? unop(Iop_Ctz64, mkexpr(src64))
8211 : binop(Iop_Sub64,
8212 mkU64(63),
florian99dd03e2013-01-29 03:56:06 +00008213 unop(Iop_Clz64, mkexpr(src64))),
8214 /* src == 0 -- leave dst unchanged */
8215 widenUto64( getIRegG( sz, pfx, modrm ) )
sewardjf53b7352005-04-06 20:01:56 +00008216 )
8217 );
8218
8219 if (sz == 2)
sewardje58967e2005-04-27 11:50:56 +00008220 assign( dst, unop(Iop_64to16, mkexpr(dst64)) );
sewardjf53b7352005-04-06 20:01:56 +00008221 else
8222 if (sz == 4)
8223 assign( dst, unop(Iop_64to32, mkexpr(dst64)) );
8224 else
8225 assign( dst, mkexpr(dst64) );
8226
8227 /* dump result back */
8228 putIRegG( sz, pfx, modrm, mkexpr(dst) );
8229
8230 return delta;
8231}
sewardja6b93d12005-02-17 09:28:28 +00008232
8233
8234/* swap rAX with the reg specified by reg and REX.B */
8235static
sewardj5b470602005-02-27 13:10:48 +00008236void codegen_xchg_rAX_Reg ( Prefix pfx, Int sz, UInt regLo3 )
sewardja6b93d12005-02-17 09:28:28 +00008237{
8238 IRType ty = szToITy(sz);
8239 IRTemp t1 = newTemp(ty);
8240 IRTemp t2 = newTemp(ty);
tom0fb4cbd2011-08-10 12:58:03 +00008241 vassert(sz == 2 || sz == 4 || sz == 8);
sewardj5b470602005-02-27 13:10:48 +00008242 vassert(regLo3 < 8);
sewardj2d4fcd52005-05-18 11:47:47 +00008243 if (sz == 8) {
8244 assign( t1, getIReg64(R_RAX) );
8245 assign( t2, getIRegRexB(8, pfx, regLo3) );
8246 putIReg64( R_RAX, mkexpr(t2) );
8247 putIRegRexB(8, pfx, regLo3, mkexpr(t1) );
tom0fb4cbd2011-08-10 12:58:03 +00008248 } else if (sz == 4) {
sewardj2d4fcd52005-05-18 11:47:47 +00008249 assign( t1, getIReg32(R_RAX) );
8250 assign( t2, getIRegRexB(4, pfx, regLo3) );
8251 putIReg32( R_RAX, mkexpr(t2) );
8252 putIRegRexB(4, pfx, regLo3, mkexpr(t1) );
tom0fb4cbd2011-08-10 12:58:03 +00008253 } else {
8254 assign( t1, getIReg16(R_RAX) );
8255 assign( t2, getIRegRexB(2, pfx, regLo3) );
8256 putIReg16( R_RAX, mkexpr(t2) );
8257 putIRegRexB(2, pfx, regLo3, mkexpr(t1) );
sewardj2d4fcd52005-05-18 11:47:47 +00008258 }
sewardja6b93d12005-02-17 09:28:28 +00008259 DIP("xchg%c %s, %s\n",
sewardj5b470602005-02-27 13:10:48 +00008260 nameISize(sz), nameIRegRAX(sz),
sewardj2d4fcd52005-05-18 11:47:47 +00008261 nameIRegRexB(sz,pfx, regLo3));
sewardja6b93d12005-02-17 09:28:28 +00008262}
8263
8264
sewardj905edbd2007-04-07 12:25:37 +00008265static
8266void codegen_SAHF ( void )
8267{
8268 /* Set the flags to:
8269 (amd64g_calculate_flags_all() & AMD64G_CC_MASK_O)
8270 -- retain the old O flag
8271 | (%AH & (AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8272 |AMD64G_CC_MASK_P|AMD64G_CC_MASK_C)
8273 */
8274 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8275 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
8276 IRTemp oldflags = newTemp(Ity_I64);
8277 assign( oldflags, mk_amd64g_calculate_rflags_all() );
8278 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
8279 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
8280 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
8281 stmt( IRStmt_Put( OFFB_CC_DEP1,
8282 binop(Iop_Or64,
8283 binop(Iop_And64, mkexpr(oldflags), mkU64(AMD64G_CC_MASK_O)),
8284 binop(Iop_And64,
8285 binop(Iop_Shr64, getIReg64(R_RAX), mkU8(8)),
8286 mkU64(mask_SZACP))
8287 )
8288 ));
8289}
8290
8291
8292static
8293void codegen_LAHF ( void )
8294{
8295 /* AH <- EFLAGS(SF:ZF:0:AF:0:PF:1:CF) */
8296 IRExpr* rax_with_hole;
8297 IRExpr* new_byte;
8298 IRExpr* new_rax;
8299 ULong mask_SZACP = AMD64G_CC_MASK_S|AMD64G_CC_MASK_Z|AMD64G_CC_MASK_A
8300 |AMD64G_CC_MASK_C|AMD64G_CC_MASK_P;
8301
8302 IRTemp flags = newTemp(Ity_I64);
8303 assign( flags, mk_amd64g_calculate_rflags_all() );
8304
8305 rax_with_hole
8306 = binop(Iop_And64, getIReg64(R_RAX), mkU64(~0xFF00ULL));
8307 new_byte
8308 = binop(Iop_Or64, binop(Iop_And64, mkexpr(flags), mkU64(mask_SZACP)),
8309 mkU64(1<<1));
8310 new_rax
8311 = binop(Iop_Or64, rax_with_hole,
8312 binop(Iop_Shl64, new_byte, mkU8(8)));
8313 putIReg64(R_RAX, new_rax);
8314}
8315
sewardja6b93d12005-02-17 09:28:28 +00008316
8317static
sewardjd0aa0a52006-08-17 01:20:01 +00008318ULong dis_cmpxchg_G_E ( /*OUT*/Bool* ok,
sewardj2e28ac42008-12-04 00:05:12 +00008319 VexAbiInfo* vbi,
sewardjd0aa0a52006-08-17 01:20:01 +00008320 Prefix pfx,
8321 Int size,
8322 Long delta0 )
sewardja6b93d12005-02-17 09:28:28 +00008323{
8324 HChar dis_buf[50];
8325 Int len;
8326
8327 IRType ty = szToITy(size);
8328 IRTemp acc = newTemp(ty);
8329 IRTemp src = newTemp(ty);
8330 IRTemp dest = newTemp(ty);
8331 IRTemp dest2 = newTemp(ty);
8332 IRTemp acc2 = newTemp(ty);
sewardj009230b2013-01-26 11:47:55 +00008333 IRTemp cond = newTemp(Ity_I1);
sewardja6b93d12005-02-17 09:28:28 +00008334 IRTemp addr = IRTemp_INVALID;
8335 UChar rm = getUChar(delta0);
8336
sewardje9d8a262009-07-01 08:06:34 +00008337 /* There are 3 cases to consider:
8338
8339 reg-reg: ignore any lock prefix, generate sequence based
florian99dd03e2013-01-29 03:56:06 +00008340 on ITE
sewardje9d8a262009-07-01 08:06:34 +00008341
8342 reg-mem, not locked: ignore any lock prefix, generate sequence
florian99dd03e2013-01-29 03:56:06 +00008343 based on ITE
sewardje9d8a262009-07-01 08:06:34 +00008344
8345 reg-mem, locked: use IRCAS
8346 */
8347
sewardj38b1d692013-10-15 17:21:42 +00008348 /* Decide whether F2 or F3 are acceptable. Never for register
8349 case, but for the memory case, one or the other is OK provided
8350 LOCK is also present. */
8351 if (epartIsReg(rm)) {
8352 if (haveF2orF3(pfx)) {
8353 *ok = False;
8354 return delta0;
8355 }
8356 } else {
8357 if (haveF2orF3(pfx)) {
8358 if (haveF2andF3(pfx) || !haveLOCK(pfx)) {
8359 *ok = False;
8360 return delta0;
8361 }
8362 }
8363 }
8364
sewardja6b93d12005-02-17 09:28:28 +00008365 if (epartIsReg(rm)) {
sewardje9d8a262009-07-01 08:06:34 +00008366 /* case 1 */
sewardj5b470602005-02-27 13:10:48 +00008367 assign( dest, getIRegE(size, pfx, rm) );
sewardja6b93d12005-02-17 09:28:28 +00008368 delta0++;
sewardje9d8a262009-07-01 08:06:34 +00008369 assign( src, getIRegG(size, pfx, rm) );
8370 assign( acc, getIRegRAX(size) );
8371 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
sewardj009230b2013-01-26 11:47:55 +00008372 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
florian99dd03e2013-01-29 03:56:06 +00008373 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
8374 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
sewardje9d8a262009-07-01 08:06:34 +00008375 putIRegRAX(size, mkexpr(acc2));
8376 putIRegE(size, pfx, rm, mkexpr(dest2));
sewardja6b93d12005-02-17 09:28:28 +00008377 DIP("cmpxchg%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00008378 nameIRegG(size,pfx,rm),
8379 nameIRegE(size,pfx,rm) );
sewardje9d8a262009-07-01 08:06:34 +00008380 }
sewardj6491f862013-10-15 17:29:19 +00008381 else if (!epartIsReg(rm) && !haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00008382 /* case 2 */
sewardj2e28ac42008-12-04 00:05:12 +00008383 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardja6b93d12005-02-17 09:28:28 +00008384 assign( dest, loadLE(ty, mkexpr(addr)) );
8385 delta0 += len;
sewardje9d8a262009-07-01 08:06:34 +00008386 assign( src, getIRegG(size, pfx, rm) );
8387 assign( acc, getIRegRAX(size) );
8388 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
sewardj009230b2013-01-26 11:47:55 +00008389 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
florian99dd03e2013-01-29 03:56:06 +00008390 assign( dest2, IRExpr_ITE(mkexpr(cond), mkexpr(src), mkexpr(dest)) );
8391 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
sewardje9d8a262009-07-01 08:06:34 +00008392 putIRegRAX(size, mkexpr(acc2));
8393 storeLE( mkexpr(addr), mkexpr(dest2) );
sewardja6b93d12005-02-17 09:28:28 +00008394 DIP("cmpxchg%c %s,%s\n", nameISize(size),
sewardj5b470602005-02-27 13:10:48 +00008395 nameIRegG(size,pfx,rm), dis_buf);
sewardja6b93d12005-02-17 09:28:28 +00008396 }
sewardj6491f862013-10-15 17:29:19 +00008397 else if (!epartIsReg(rm) && haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00008398 /* case 3 */
8399 /* src is new value. acc is expected value. dest is old value.
8400 Compute success from the output of the IRCAS, and steer the
8401 new value for RAX accordingly: in case of success, RAX is
8402 unchanged. */
8403 addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8404 delta0 += len;
8405 assign( src, getIRegG(size, pfx, rm) );
8406 assign( acc, getIRegRAX(size) );
8407 stmt( IRStmt_CAS(
8408 mkIRCAS( IRTemp_INVALID, dest, Iend_LE, mkexpr(addr),
8409 NULL, mkexpr(acc), NULL, mkexpr(src) )
8410 ));
8411 setFlags_DEP1_DEP2(Iop_Sub8, acc, dest, ty);
sewardj009230b2013-01-26 11:47:55 +00008412 assign( cond, mk_amd64g_calculate_condition(AMD64CondZ) );
florian99dd03e2013-01-29 03:56:06 +00008413 assign( acc2, IRExpr_ITE(mkexpr(cond), mkexpr(acc), mkexpr(dest)) );
sewardje9d8a262009-07-01 08:06:34 +00008414 putIRegRAX(size, mkexpr(acc2));
sewardj40d1d212009-07-12 13:01:17 +00008415 DIP("cmpxchg%c %s,%s\n", nameISize(size),
8416 nameIRegG(size,pfx,rm), dis_buf);
sewardja6b93d12005-02-17 09:28:28 +00008417 }
sewardje9d8a262009-07-01 08:06:34 +00008418 else vassert(0);
sewardja6b93d12005-02-17 09:28:28 +00008419
sewardjd0aa0a52006-08-17 01:20:01 +00008420 *ok = True;
sewardja6b93d12005-02-17 09:28:28 +00008421 return delta0;
8422}
8423
sewardj3ca55a12005-01-27 16:06:23 +00008424
8425/* Handle conditional move instructions of the form
8426 cmovcc E(reg-or-mem), G(reg)
8427
8428 E(src) is reg-or-mem
8429 G(dst) is reg.
8430
8431 If E is reg, --> GET %E, tmps
8432 GET %G, tmpd
8433 CMOVcc tmps, tmpd
8434 PUT tmpd, %G
8435
8436 If E is mem --> (getAddr E) -> tmpa
8437 LD (tmpa), tmps
8438 GET %G, tmpd
8439 CMOVcc tmps, tmpd
8440 PUT tmpd, %G
8441*/
8442static
sewardj2e28ac42008-12-04 00:05:12 +00008443ULong dis_cmov_E_G ( VexAbiInfo* vbi,
8444 Prefix pfx,
sewardj3ca55a12005-01-27 16:06:23 +00008445 Int sz,
8446 AMD64Condcode cond,
sewardj270def42005-07-03 01:03:01 +00008447 Long delta0 )
sewardj3ca55a12005-01-27 16:06:23 +00008448{
sewardj8c332e22005-01-28 01:36:56 +00008449 UChar rm = getUChar(delta0);
sewardj3ca55a12005-01-27 16:06:23 +00008450 HChar dis_buf[50];
8451 Int len;
8452
8453 IRType ty = szToITy(sz);
8454 IRTemp tmps = newTemp(ty);
8455 IRTemp tmpd = newTemp(ty);
8456
8457 if (epartIsReg(rm)) {
sewardj5b470602005-02-27 13:10:48 +00008458 assign( tmps, getIRegE(sz, pfx, rm) );
8459 assign( tmpd, getIRegG(sz, pfx, rm) );
sewardj3ca55a12005-01-27 16:06:23 +00008460
sewardj5b470602005-02-27 13:10:48 +00008461 putIRegG( sz, pfx, rm,
florian99dd03e2013-01-29 03:56:06 +00008462 IRExpr_ITE( mk_amd64g_calculate_condition(cond),
8463 mkexpr(tmps),
8464 mkexpr(tmpd) )
sewardj3ca55a12005-01-27 16:06:23 +00008465 );
sewardje941eea2005-01-30 19:52:28 +00008466 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
sewardj5b470602005-02-27 13:10:48 +00008467 nameIRegE(sz,pfx,rm),
8468 nameIRegG(sz,pfx,rm));
sewardj3ca55a12005-01-27 16:06:23 +00008469 return 1+delta0;
8470 }
8471
8472 /* E refers to memory */
8473 {
sewardj2e28ac42008-12-04 00:05:12 +00008474 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardj3ca55a12005-01-27 16:06:23 +00008475 assign( tmps, loadLE(ty, mkexpr(addr)) );
sewardj5b470602005-02-27 13:10:48 +00008476 assign( tmpd, getIRegG(sz, pfx, rm) );
sewardj3ca55a12005-01-27 16:06:23 +00008477
sewardj5b470602005-02-27 13:10:48 +00008478 putIRegG( sz, pfx, rm,
florian99dd03e2013-01-29 03:56:06 +00008479 IRExpr_ITE( mk_amd64g_calculate_condition(cond),
8480 mkexpr(tmps),
8481 mkexpr(tmpd) )
sewardj3ca55a12005-01-27 16:06:23 +00008482 );
8483
sewardj7eaa7cf2005-01-31 18:55:22 +00008484 DIP("cmov%s %s,%s\n", name_AMD64Condcode(cond),
8485 dis_buf,
sewardj5b470602005-02-27 13:10:48 +00008486 nameIRegG(sz,pfx,rm));
sewardj3ca55a12005-01-27 16:06:23 +00008487 return len+delta0;
8488 }
8489}
8490
8491
sewardjb4fd2e72005-03-23 13:34:11 +00008492static
8493ULong dis_xadd_G_E ( /*OUT*/Bool* decode_ok,
sewardj2e28ac42008-12-04 00:05:12 +00008494 VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008495 Prefix pfx, Int sz, Long delta0 )
sewardjb4fd2e72005-03-23 13:34:11 +00008496{
8497 Int len;
8498 UChar rm = getUChar(delta0);
8499 HChar dis_buf[50];
8500
8501 IRType ty = szToITy(sz);
8502 IRTemp tmpd = newTemp(ty);
8503 IRTemp tmpt0 = newTemp(ty);
8504 IRTemp tmpt1 = newTemp(ty);
sewardje9d8a262009-07-01 08:06:34 +00008505
8506 /* There are 3 cases to consider:
8507
sewardjc2433a82010-05-10 20:51:22 +00008508 reg-reg: ignore any lock prefix,
8509 generate 'naive' (non-atomic) sequence
sewardje9d8a262009-07-01 08:06:34 +00008510
8511 reg-mem, not locked: ignore any lock prefix, generate 'naive'
8512 (non-atomic) sequence
8513
8514 reg-mem, locked: use IRCAS
8515 */
sewardjb4fd2e72005-03-23 13:34:11 +00008516
8517 if (epartIsReg(rm)) {
sewardje9d8a262009-07-01 08:06:34 +00008518 /* case 1 */
sewardjc2433a82010-05-10 20:51:22 +00008519 assign( tmpd, getIRegE(sz, pfx, rm) );
8520 assign( tmpt0, getIRegG(sz, pfx, rm) );
8521 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8522 mkexpr(tmpd), mkexpr(tmpt0)) );
8523 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8524 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8525 putIRegE(sz, pfx, rm, mkexpr(tmpt1));
8526 DIP("xadd%c %s, %s\n",
sewardjcc3d2192013-03-27 11:37:33 +00008527 nameISize(sz), nameIRegG(sz,pfx,rm), nameIRegE(sz,pfx,rm));
sewardjc2433a82010-05-10 20:51:22 +00008528 *decode_ok = True;
8529 return 1+delta0;
sewardje9d8a262009-07-01 08:06:34 +00008530 }
sewardj6491f862013-10-15 17:29:19 +00008531 else if (!epartIsReg(rm) && !haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00008532 /* case 2 */
sewardj2e28ac42008-12-04 00:05:12 +00008533 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
sewardjb4fd2e72005-03-23 13:34:11 +00008534 assign( tmpd, loadLE(ty, mkexpr(addr)) );
8535 assign( tmpt0, getIRegG(sz, pfx, rm) );
sewardje9d8a262009-07-01 08:06:34 +00008536 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8537 mkexpr(tmpd), mkexpr(tmpt0)) );
sewardjb4fd2e72005-03-23 13:34:11 +00008538 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8539 storeLE( mkexpr(addr), mkexpr(tmpt1) );
8540 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8541 DIP("xadd%c %s, %s\n",
8542 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
sewardje9d8a262009-07-01 08:06:34 +00008543 *decode_ok = True;
sewardjb4fd2e72005-03-23 13:34:11 +00008544 return len+delta0;
8545 }
sewardj6491f862013-10-15 17:29:19 +00008546 else if (!epartIsReg(rm) && haveLOCK(pfx)) {
sewardje9d8a262009-07-01 08:06:34 +00008547 /* case 3 */
8548 IRTemp addr = disAMode ( &len, vbi, pfx, delta0, dis_buf, 0 );
8549 assign( tmpd, loadLE(ty, mkexpr(addr)) );
8550 assign( tmpt0, getIRegG(sz, pfx, rm) );
8551 assign( tmpt1, binop(mkSizedOp(ty,Iop_Add8),
8552 mkexpr(tmpd), mkexpr(tmpt0)) );
8553 casLE( mkexpr(addr), mkexpr(tmpd)/*expVal*/,
8554 mkexpr(tmpt1)/*newVal*/, guest_RIP_curr_instr );
8555 setFlags_DEP1_DEP2( Iop_Add8, tmpd, tmpt0, ty );
8556 putIRegG(sz, pfx, rm, mkexpr(tmpd));
8557 DIP("xadd%c %s, %s\n",
8558 nameISize(sz), nameIRegG(sz,pfx,rm), dis_buf);
8559 *decode_ok = True;
8560 return len+delta0;
8561 }
8562 /*UNREACHED*/
8563 vassert(0);
sewardjb4fd2e72005-03-23 13:34:11 +00008564}
8565
sewardjd20c8852005-01-20 20:04:07 +00008566//.. /* Move 16 bits from Ew (ireg or mem) to G (a segment register). */
8567//..
8568//.. static
sewardj270def42005-07-03 01:03:01 +00008569//.. UInt dis_mov_Ew_Sw ( UChar sorb, Long delta0 )
sewardjd20c8852005-01-20 20:04:07 +00008570//.. {
8571//.. Int len;
8572//.. IRTemp addr;
sewardj8c332e22005-01-28 01:36:56 +00008573//.. UChar rm = getUChar(delta0);
sewardjd20c8852005-01-20 20:04:07 +00008574//.. HChar dis_buf[50];
8575//..
8576//.. if (epartIsReg(rm)) {
8577//.. putSReg( gregOfRM(rm), getIReg(2, eregOfRM(rm)) );
8578//.. DIP("movw %s,%s\n", nameIReg(2,eregOfRM(rm)), nameSReg(gregOfRM(rm)));
8579//.. return 1+delta0;
8580//.. } else {
8581//.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8582//.. putSReg( gregOfRM(rm), loadLE(Ity_I16, mkexpr(addr)) );
8583//.. DIP("movw %s,%s\n", dis_buf, nameSReg(gregOfRM(rm)));
8584//.. return len+delta0;
8585//.. }
8586//.. }
8587//..
8588//.. /* Move 16 bits from G (a segment register) to Ew (ireg or mem). If
8589//.. dst is ireg and sz==4, zero out top half of it. */
8590//..
8591//.. static
8592//.. UInt dis_mov_Sw_Ew ( UChar sorb,
8593//.. Int sz,
8594//.. UInt delta0 )
8595//.. {
8596//.. Int len;
8597//.. IRTemp addr;
sewardj8c332e22005-01-28 01:36:56 +00008598//.. UChar rm = getUChar(delta0);
sewardjd20c8852005-01-20 20:04:07 +00008599//.. HChar dis_buf[50];
8600//..
8601//.. vassert(sz == 2 || sz == 4);
8602//..
8603//.. if (epartIsReg(rm)) {
8604//.. if (sz == 4)
8605//.. putIReg(4, eregOfRM(rm), unop(Iop_16Uto32, getSReg(gregOfRM(rm))));
8606//.. else
8607//.. putIReg(2, eregOfRM(rm), getSReg(gregOfRM(rm)));
8608//..
8609//.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), nameIReg(sz,eregOfRM(rm)));
8610//.. return 1+delta0;
8611//.. } else {
8612//.. addr = disAMode ( &len, sorb, delta0, dis_buf );
8613//.. storeLE( mkexpr(addr), getSReg(gregOfRM(rm)) );
8614//.. DIP("mov %s,%s\n", nameSReg(gregOfRM(rm)), dis_buf);
8615//.. return len+delta0;
8616//.. }
8617//.. }
8618//..
8619//..
8620//.. static
8621//.. void dis_push_segreg ( UInt sreg, Int sz )
8622//.. {
8623//.. IRTemp t1 = newTemp(Ity_I16);
8624//.. IRTemp ta = newTemp(Ity_I32);
8625//.. vassert(sz == 2 || sz == 4);
8626//..
8627//.. assign( t1, getSReg(sreg) );
8628//.. assign( ta, binop(Iop_Sub32, getIReg(4, R_ESP), mkU32(sz)) );
8629//.. putIReg(4, R_ESP, mkexpr(ta));
8630//.. storeLE( mkexpr(ta), mkexpr(t1) );
8631//..
8632//.. DIP("pushw %s\n", nameSReg(sreg));
8633//.. }
8634//..
8635//.. static
8636//.. void dis_pop_segreg ( UInt sreg, Int sz )
8637//.. {
8638//.. IRTemp t1 = newTemp(Ity_I16);
8639//.. IRTemp ta = newTemp(Ity_I32);
8640//.. vassert(sz == 2 || sz == 4);
8641//..
8642//.. assign( ta, getIReg(4, R_ESP) );
8643//.. assign( t1, loadLE(Ity_I16, mkexpr(ta)) );
8644//..
8645//.. putIReg(4, R_ESP, binop(Iop_Add32, mkexpr(ta), mkU32(sz)) );
8646//.. putSReg( sreg, mkexpr(t1) );
8647//.. DIP("pop %s\n", nameSReg(sreg));
8648//.. }
sewardj2f959cc2005-01-26 01:19:35 +00008649
8650static
sewardjc6f970f2012-04-02 21:54:49 +00008651void dis_ret ( /*MOD*/DisResult* dres, VexAbiInfo* vbi, ULong d64 )
sewardj2f959cc2005-01-26 01:19:35 +00008652{
8653 IRTemp t1 = newTemp(Ity_I64);
8654 IRTemp t2 = newTemp(Ity_I64);
sewardj5a9ffab2005-05-12 17:55:01 +00008655 IRTemp t3 = newTemp(Ity_I64);
sewardj2f959cc2005-01-26 01:19:35 +00008656 assign(t1, getIReg64(R_RSP));
8657 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
sewardj5a9ffab2005-05-12 17:55:01 +00008658 assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64)));
8659 putIReg64(R_RSP, mkexpr(t3));
sewardj478646f2008-05-01 20:13:04 +00008660 make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret");
sewardjc6f970f2012-04-02 21:54:49 +00008661 jmp_treg(dres, Ijk_Ret, t2);
8662 vassert(dres->whatNext == Dis_StopHere);
sewardj2f959cc2005-01-26 01:19:35 +00008663}
8664
sewardj5b470602005-02-27 13:10:48 +00008665
sewardj1001dc42005-02-21 08:25:55 +00008666/*------------------------------------------------------------*/
8667/*--- SSE/SSE2/SSE3 helpers ---*/
8668/*------------------------------------------------------------*/
8669
sewardj9571dc02014-01-26 18:34:23 +00008670/* Indicates whether the op requires a rounding-mode argument. Note
8671 that this covers only vector floating point arithmetic ops, and
8672 omits the scalar ones that need rounding modes. Note also that
8673 inconsistencies here will get picked up later by the IR sanity
8674 checker, so this isn't correctness-critical. */
8675static Bool requiresRMode ( IROp op )
8676{
8677 switch (op) {
8678 /* 128 bit ops */
8679 case Iop_Add32Fx4: case Iop_Sub32Fx4:
8680 case Iop_Mul32Fx4: case Iop_Div32Fx4:
8681 case Iop_Add64Fx2: case Iop_Sub64Fx2:
8682 case Iop_Mul64Fx2: case Iop_Div64Fx2:
8683 /* 256 bit ops */
8684 case Iop_Add32Fx8: case Iop_Sub32Fx8:
8685 case Iop_Mul32Fx8: case Iop_Div32Fx8:
8686 case Iop_Add64Fx4: case Iop_Sub64Fx4:
8687 case Iop_Mul64Fx4: case Iop_Div64Fx4:
8688 return True;
8689 default:
8690 break;
8691 }
8692 return False;
8693}
8694
8695
sewardj1001dc42005-02-21 08:25:55 +00008696/* Worker function; do not call directly.
8697 Handles full width G = G `op` E and G = (not G) `op` E.
8698*/
8699
sewardj8d965312005-02-25 02:48:47 +00008700static ULong dis_SSE_E_to_G_all_wrk (
sewardj2e28ac42008-12-04 00:05:12 +00008701 VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008702 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008703 const HChar* opname, IROp op,
sewardj8d965312005-02-25 02:48:47 +00008704 Bool invertG
8705 )
sewardj9da16972005-02-21 13:58:26 +00008706{
8707 HChar dis_buf[50];
8708 Int alen;
8709 IRTemp addr;
8710 UChar rm = getUChar(delta);
sewardj9571dc02014-01-26 18:34:23 +00008711 Bool needsRMode = requiresRMode(op);
sewardj9da16972005-02-21 13:58:26 +00008712 IRExpr* gpart
8713 = invertG ? unop(Iop_NotV128, getXMMReg(gregOfRexRM(pfx,rm)))
8714 : getXMMReg(gregOfRexRM(pfx,rm));
8715 if (epartIsReg(rm)) {
sewardj9571dc02014-01-26 18:34:23 +00008716 putXMMReg(
8717 gregOfRexRM(pfx,rm),
8718 needsRMode
8719 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
8720 gpart,
8721 getXMMReg(eregOfRexRM(pfx,rm)))
8722 : binop(op, gpart,
8723 getXMMReg(eregOfRexRM(pfx,rm)))
8724 );
sewardj9da16972005-02-21 13:58:26 +00008725 DIP("%s %s,%s\n", opname,
8726 nameXMMReg(eregOfRexRM(pfx,rm)),
8727 nameXMMReg(gregOfRexRM(pfx,rm)) );
8728 return delta+1;
8729 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008730 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj9571dc02014-01-26 18:34:23 +00008731 putXMMReg(
8732 gregOfRexRM(pfx,rm),
8733 needsRMode
8734 ? triop(op, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */
8735 gpart,
8736 loadLE(Ity_V128, mkexpr(addr)))
8737 : binop(op, gpart,
8738 loadLE(Ity_V128, mkexpr(addr)))
8739 );
sewardj9da16972005-02-21 13:58:26 +00008740 DIP("%s %s,%s\n", opname,
8741 dis_buf,
8742 nameXMMReg(gregOfRexRM(pfx,rm)) );
8743 return delta+alen;
8744 }
8745}
8746
8747
8748/* All lanes SSE binary operation, G = G `op` E. */
8749
8750static
sewardj2e28ac42008-12-04 00:05:12 +00008751ULong dis_SSE_E_to_G_all ( VexAbiInfo* vbi,
8752 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008753 const HChar* opname, IROp op )
sewardj9da16972005-02-21 13:58:26 +00008754{
sewardj2e28ac42008-12-04 00:05:12 +00008755 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, False );
sewardj9da16972005-02-21 13:58:26 +00008756}
8757
sewardj8d965312005-02-25 02:48:47 +00008758/* All lanes SSE binary operation, G = (not G) `op` E. */
8759
8760static
sewardj2e28ac42008-12-04 00:05:12 +00008761ULong dis_SSE_E_to_G_all_invG ( VexAbiInfo* vbi,
8762 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008763 const HChar* opname, IROp op )
sewardj8d965312005-02-25 02:48:47 +00008764{
sewardj2e28ac42008-12-04 00:05:12 +00008765 return dis_SSE_E_to_G_all_wrk( vbi, pfx, delta, opname, op, True );
sewardj8d965312005-02-25 02:48:47 +00008766}
8767
8768
8769/* Lowest 32-bit lane only SSE binary operation, G = G `op` E. */
8770
sewardj2e28ac42008-12-04 00:05:12 +00008771static ULong dis_SSE_E_to_G_lo32 ( VexAbiInfo* vbi,
8772 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008773 const HChar* opname, IROp op )
sewardj8d965312005-02-25 02:48:47 +00008774{
8775 HChar dis_buf[50];
8776 Int alen;
8777 IRTemp addr;
8778 UChar rm = getUChar(delta);
sewardj9c9ee3d2005-02-26 01:17:42 +00008779 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
sewardj8d965312005-02-25 02:48:47 +00008780 if (epartIsReg(rm)) {
sewardj9c9ee3d2005-02-26 01:17:42 +00008781 putXMMReg( gregOfRexRM(pfx,rm),
sewardj8d965312005-02-25 02:48:47 +00008782 binop(op, gpart,
8783 getXMMReg(eregOfRexRM(pfx,rm))) );
8784 DIP("%s %s,%s\n", opname,
8785 nameXMMReg(eregOfRexRM(pfx,rm)),
8786 nameXMMReg(gregOfRexRM(pfx,rm)) );
8787 return delta+1;
8788 } else {
8789 /* We can only do a 32-bit memory read, so the upper 3/4 of the
8790 E operand needs to be made simply of zeroes. */
8791 IRTemp epart = newTemp(Ity_V128);
sewardj2e28ac42008-12-04 00:05:12 +00008792 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj8d965312005-02-25 02:48:47 +00008793 assign( epart, unop( Iop_32UtoV128,
8794 loadLE(Ity_I32, mkexpr(addr))) );
8795 putXMMReg( gregOfRexRM(pfx,rm),
8796 binop(op, gpart, mkexpr(epart)) );
8797 DIP("%s %s,%s\n", opname,
8798 dis_buf,
8799 nameXMMReg(gregOfRexRM(pfx,rm)) );
8800 return delta+alen;
8801 }
8802}
sewardj1001dc42005-02-21 08:25:55 +00008803
8804
8805/* Lower 64-bit lane only SSE binary operation, G = G `op` E. */
8806
sewardj2e28ac42008-12-04 00:05:12 +00008807static ULong dis_SSE_E_to_G_lo64 ( VexAbiInfo* vbi,
8808 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008809 const HChar* opname, IROp op )
sewardj1001dc42005-02-21 08:25:55 +00008810{
8811 HChar dis_buf[50];
8812 Int alen;
8813 IRTemp addr;
8814 UChar rm = getUChar(delta);
8815 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
8816 if (epartIsReg(rm)) {
8817 putXMMReg( gregOfRexRM(pfx,rm),
8818 binop(op, gpart,
8819 getXMMReg(eregOfRexRM(pfx,rm))) );
8820 DIP("%s %s,%s\n", opname,
8821 nameXMMReg(eregOfRexRM(pfx,rm)),
8822 nameXMMReg(gregOfRexRM(pfx,rm)) );
8823 return delta+1;
8824 } else {
8825 /* We can only do a 64-bit memory read, so the upper half of the
8826 E operand needs to be made simply of zeroes. */
8827 IRTemp epart = newTemp(Ity_V128);
sewardj2e28ac42008-12-04 00:05:12 +00008828 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj1001dc42005-02-21 08:25:55 +00008829 assign( epart, unop( Iop_64UtoV128,
8830 loadLE(Ity_I64, mkexpr(addr))) );
8831 putXMMReg( gregOfRexRM(pfx,rm),
8832 binop(op, gpart, mkexpr(epart)) );
8833 DIP("%s %s,%s\n", opname,
8834 dis_buf,
8835 nameXMMReg(gregOfRexRM(pfx,rm)) );
8836 return delta+alen;
8837 }
8838}
8839
8840
sewardja7ba8c42005-05-10 20:08:34 +00008841/* All lanes unary SSE operation, G = op(E). */
8842
8843static ULong dis_SSE_E_to_G_unary_all (
sewardj2e28ac42008-12-04 00:05:12 +00008844 VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008845 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008846 const HChar* opname, IROp op
sewardja7ba8c42005-05-10 20:08:34 +00008847 )
8848{
8849 HChar dis_buf[50];
8850 Int alen;
8851 IRTemp addr;
8852 UChar rm = getUChar(delta);
8853 if (epartIsReg(rm)) {
8854 putXMMReg( gregOfRexRM(pfx,rm),
8855 unop(op, getXMMReg(eregOfRexRM(pfx,rm))) );
8856 DIP("%s %s,%s\n", opname,
8857 nameXMMReg(eregOfRexRM(pfx,rm)),
8858 nameXMMReg(gregOfRexRM(pfx,rm)) );
8859 return delta+1;
8860 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008861 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardja7ba8c42005-05-10 20:08:34 +00008862 putXMMReg( gregOfRexRM(pfx,rm),
8863 unop(op, loadLE(Ity_V128, mkexpr(addr))) );
8864 DIP("%s %s,%s\n", opname,
8865 dis_buf,
8866 nameXMMReg(gregOfRexRM(pfx,rm)) );
8867 return delta+alen;
8868 }
8869}
8870
8871
8872/* Lowest 32-bit lane only unary SSE operation, G = op(E). */
8873
8874static ULong dis_SSE_E_to_G_unary_lo32 (
sewardj2e28ac42008-12-04 00:05:12 +00008875 VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008876 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008877 const HChar* opname, IROp op
sewardja7ba8c42005-05-10 20:08:34 +00008878 )
8879{
8880 /* First we need to get the old G value and patch the low 32 bits
8881 of the E operand into it. Then apply op and write back to G. */
8882 HChar dis_buf[50];
8883 Int alen;
8884 IRTemp addr;
8885 UChar rm = getUChar(delta);
8886 IRTemp oldG0 = newTemp(Ity_V128);
8887 IRTemp oldG1 = newTemp(Ity_V128);
8888
8889 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
8890
8891 if (epartIsReg(rm)) {
8892 assign( oldG1,
8893 binop( Iop_SetV128lo32,
8894 mkexpr(oldG0),
8895 getXMMRegLane32(eregOfRexRM(pfx,rm), 0)) );
8896 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8897 DIP("%s %s,%s\n", opname,
8898 nameXMMReg(eregOfRexRM(pfx,rm)),
8899 nameXMMReg(gregOfRexRM(pfx,rm)) );
8900 return delta+1;
8901 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008902 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardja7ba8c42005-05-10 20:08:34 +00008903 assign( oldG1,
8904 binop( Iop_SetV128lo32,
8905 mkexpr(oldG0),
8906 loadLE(Ity_I32, mkexpr(addr)) ));
8907 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8908 DIP("%s %s,%s\n", opname,
8909 dis_buf,
8910 nameXMMReg(gregOfRexRM(pfx,rm)) );
8911 return delta+alen;
8912 }
8913}
sewardj1001dc42005-02-21 08:25:55 +00008914
8915
8916/* Lowest 64-bit lane only unary SSE operation, G = op(E). */
8917
sewardj8d965312005-02-25 02:48:47 +00008918static ULong dis_SSE_E_to_G_unary_lo64 (
sewardj2e28ac42008-12-04 00:05:12 +00008919 VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008920 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008921 const HChar* opname, IROp op
sewardj8d965312005-02-25 02:48:47 +00008922 )
sewardj1001dc42005-02-21 08:25:55 +00008923{
8924 /* First we need to get the old G value and patch the low 64 bits
8925 of the E operand into it. Then apply op and write back to G. */
8926 HChar dis_buf[50];
8927 Int alen;
8928 IRTemp addr;
8929 UChar rm = getUChar(delta);
8930 IRTemp oldG0 = newTemp(Ity_V128);
8931 IRTemp oldG1 = newTemp(Ity_V128);
8932
8933 assign( oldG0, getXMMReg(gregOfRexRM(pfx,rm)) );
8934
8935 if (epartIsReg(rm)) {
8936 assign( oldG1,
8937 binop( Iop_SetV128lo64,
8938 mkexpr(oldG0),
8939 getXMMRegLane64(eregOfRexRM(pfx,rm), 0)) );
8940 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8941 DIP("%s %s,%s\n", opname,
8942 nameXMMReg(eregOfRexRM(pfx,rm)),
8943 nameXMMReg(gregOfRexRM(pfx,rm)) );
8944 return delta+1;
8945 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008946 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj1001dc42005-02-21 08:25:55 +00008947 assign( oldG1,
8948 binop( Iop_SetV128lo64,
8949 mkexpr(oldG0),
8950 loadLE(Ity_I64, mkexpr(addr)) ));
8951 putXMMReg( gregOfRexRM(pfx,rm), unop(op, mkexpr(oldG1)) );
8952 DIP("%s %s,%s\n", opname,
8953 dis_buf,
8954 nameXMMReg(gregOfRexRM(pfx,rm)) );
8955 return delta+alen;
8956 }
8957}
8958
8959
sewardj09717342005-05-05 21:34:02 +00008960/* SSE integer binary operation:
8961 G = G `op` E (eLeft == False)
8962 G = E `op` G (eLeft == True)
8963*/
8964static ULong dis_SSEint_E_to_G(
sewardj2e28ac42008-12-04 00:05:12 +00008965 VexAbiInfo* vbi,
sewardj270def42005-07-03 01:03:01 +00008966 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00008967 const HChar* opname, IROp op,
sewardj09717342005-05-05 21:34:02 +00008968 Bool eLeft
8969 )
8970{
8971 HChar dis_buf[50];
8972 Int alen;
8973 IRTemp addr;
8974 UChar rm = getUChar(delta);
8975 IRExpr* gpart = getXMMReg(gregOfRexRM(pfx,rm));
8976 IRExpr* epart = NULL;
8977 if (epartIsReg(rm)) {
8978 epart = getXMMReg(eregOfRexRM(pfx,rm));
8979 DIP("%s %s,%s\n", opname,
8980 nameXMMReg(eregOfRexRM(pfx,rm)),
8981 nameXMMReg(gregOfRexRM(pfx,rm)) );
8982 delta += 1;
8983 } else {
sewardj2e28ac42008-12-04 00:05:12 +00008984 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj09717342005-05-05 21:34:02 +00008985 epart = loadLE(Ity_V128, mkexpr(addr));
8986 DIP("%s %s,%s\n", opname,
8987 dis_buf,
8988 nameXMMReg(gregOfRexRM(pfx,rm)) );
8989 delta += alen;
8990 }
8991 putXMMReg( gregOfRexRM(pfx,rm),
8992 eLeft ? binop(op, epart, gpart)
sewardjcc3d2192013-03-27 11:37:33 +00008993 : binop(op, gpart, epart) );
sewardj09717342005-05-05 21:34:02 +00008994 return delta;
8995}
sewardj8d965312005-02-25 02:48:47 +00008996
8997
sewardjc4530ae2012-05-21 10:18:49 +00008998/* Helper for doing SSE FP comparisons. False return ==> unhandled.
8999 This is all a bit of a kludge in that it ignores the subtleties of
9000 ordered-vs-unordered and signalling-vs-nonsignalling in the Intel
9001 spec. */
9002static Bool findSSECmpOp ( /*OUT*/Bool* preSwapP,
9003 /*OUT*/IROp* opP,
9004 /*OUT*/Bool* postNotP,
9005 UInt imm8, Bool all_lanes, Int sz )
sewardj8d965312005-02-25 02:48:47 +00009006{
sewardjc4530ae2012-05-21 10:18:49 +00009007 if (imm8 >= 32) return False;
9008
9009 /* First, compute a (preSwap, op, postNot) triple from
9010 the supplied imm8. */
9011 Bool pre = False;
9012 IROp op = Iop_INVALID;
9013 Bool not = False;
9014
9015# define XXX(_pre, _op, _not) { pre = _pre; op = _op; not = _not; }
sewardjd698a052012-06-25 07:40:54 +00009016 // If you add a case here, add a corresponding test for both VCMPSD_128
9017 // and VCMPSS_128 in avx-1.c.
sewardjc4530ae2012-05-21 10:18:49 +00009018 switch (imm8) {
sewardj2f8c0b92012-07-14 14:20:00 +00009019 // "O" = ordered, "U" = unordered
9020 // "Q" = non-signalling (quiet), "S" = signalling
9021 //
9022 // swap operands?
9023 // |
9024 // | cmp op invert after?
9025 // | | |
9026 // v v v
sewardjd698a052012-06-25 07:40:54 +00009027 case 0x0: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_OQ
9028 case 0x1: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OS
9029 case 0x2: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OS
9030 case 0x3: XXX(False, Iop_CmpUN32Fx4, False); break; // UNORD_Q
9031 case 0x4: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_UQ
9032 case 0x5: XXX(False, Iop_CmpLT32Fx4, True); break; // NLT_US
9033 case 0x6: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_US
9034 case 0x7: XXX(False, Iop_CmpUN32Fx4, True); break; // ORD_Q
tom4d780eb2013-03-27 15:38:47 +00009035 case 0x8: XXX(False, Iop_CmpEQ32Fx4, False); break; // EQ_UQ
sewardj2f8c0b92012-07-14 14:20:00 +00009036 case 0x9: XXX(True, Iop_CmpLE32Fx4, True); break; // NGE_US
sewardjc4530ae2012-05-21 10:18:49 +00009037 /* "Enhanced Comparison Predicate[s] for VEX-Encoded [insns] */
sewardjd698a052012-06-25 07:40:54 +00009038 case 0xA: XXX(True, Iop_CmpLT32Fx4, True); break; // NGT_US
9039 // 0xB FALSE_OQ
sewardj68b01f72012-06-27 10:27:13 +00009040 // 0xC: this isn't really right because it returns all-1s when
9041 // either operand is a NaN, and it should return all-0s.
sewardjd698a052012-06-25 07:40:54 +00009042 case 0xC: XXX(False, Iop_CmpEQ32Fx4, True); break; // NEQ_OQ
9043 case 0xD: XXX(True, Iop_CmpLE32Fx4, False); break; // GE_OS
9044 case 0xE: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OS
9045 // 0xF TRUE_UQ
9046 // 0x10 EQ_OS
9047 case 0x11: XXX(False, Iop_CmpLT32Fx4, False); break; // LT_OQ
9048 case 0x12: XXX(False, Iop_CmpLE32Fx4, False); break; // LE_OQ
9049 // 0x13 UNORD_S
9050 // 0x14 NEQ_US
9051 // 0x15 NLT_UQ
9052 case 0x16: XXX(False, Iop_CmpLE32Fx4, True); break; // NLE_UQ
9053 // 0x17 ORD_S
9054 // 0x18 EQ_US
9055 // 0x19 NGE_UQ
9056 // 0x1A NGT_UQ
9057 // 0x1B FALSE_OS
9058 // 0x1C NEQ_OS
9059 // 0x1D GE_OQ
9060 case 0x1E: XXX(True, Iop_CmpLT32Fx4, False); break; // GT_OQ
9061 // 0x1F TRUE_US
sewardjc93904b2012-05-27 13:50:42 +00009062 /* Don't forget to add test cases to VCMPSS_128_<imm8> in
9063 avx-1.c if new cases turn up. */
sewardjc4530ae2012-05-21 10:18:49 +00009064 default: break;
9065 }
9066# undef XXX
9067 if (op == Iop_INVALID) return False;
9068
9069 /* Now convert the op into one with the same arithmetic but that is
9070 correct for the width and laneage requirements. */
9071
9072 /**/ if (sz == 4 && all_lanes) {
9073 switch (op) {
9074 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32Fx4; break;
9075 case Iop_CmpLT32Fx4: op = Iop_CmpLT32Fx4; break;
9076 case Iop_CmpLE32Fx4: op = Iop_CmpLE32Fx4; break;
9077 case Iop_CmpUN32Fx4: op = Iop_CmpUN32Fx4; break;
9078 default: vassert(0);
9079 }
9080 }
9081 else if (sz == 4 && !all_lanes) {
9082 switch (op) {
9083 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ32F0x4; break;
9084 case Iop_CmpLT32Fx4: op = Iop_CmpLT32F0x4; break;
9085 case Iop_CmpLE32Fx4: op = Iop_CmpLE32F0x4; break;
9086 case Iop_CmpUN32Fx4: op = Iop_CmpUN32F0x4; break;
9087 default: vassert(0);
9088 }
9089 }
9090 else if (sz == 8 && all_lanes) {
9091 switch (op) {
9092 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64Fx2; break;
9093 case Iop_CmpLT32Fx4: op = Iop_CmpLT64Fx2; break;
9094 case Iop_CmpLE32Fx4: op = Iop_CmpLE64Fx2; break;
9095 case Iop_CmpUN32Fx4: op = Iop_CmpUN64Fx2; break;
9096 default: vassert(0);
9097 }
9098 }
9099 else if (sz == 8 && !all_lanes) {
9100 switch (op) {
9101 case Iop_CmpEQ32Fx4: op = Iop_CmpEQ64F0x2; break;
9102 case Iop_CmpLT32Fx4: op = Iop_CmpLT64F0x2; break;
9103 case Iop_CmpLE32Fx4: op = Iop_CmpLE64F0x2; break;
9104 case Iop_CmpUN32Fx4: op = Iop_CmpUN64F0x2; break;
9105 default: vassert(0);
9106 }
9107 }
9108 else {
9109 vpanic("findSSECmpOp(amd64,guest)");
sewardj8d965312005-02-25 02:48:47 +00009110 }
9111
sewardjc4530ae2012-05-21 10:18:49 +00009112 *preSwapP = pre; *opP = op; *postNotP = not;
9113 return True;
sewardj8d965312005-02-25 02:48:47 +00009114}
9115
sewardj8d965312005-02-25 02:48:47 +00009116
sewardjc4530ae2012-05-21 10:18:49 +00009117/* Handles SSE 32F/64F comparisons. It can fail, in which case it
9118 returns the original delta to indicate failure. */
9119
9120static Long dis_SSE_cmp_E_to_G ( VexAbiInfo* vbi,
sewardj2e28ac42008-12-04 00:05:12 +00009121 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00009122 const HChar* opname, Bool all_lanes, Int sz )
sewardj8d965312005-02-25 02:48:47 +00009123{
sewardjc4530ae2012-05-21 10:18:49 +00009124 Long delta0 = delta;
sewardj8d965312005-02-25 02:48:47 +00009125 HChar dis_buf[50];
sewardjc4530ae2012-05-21 10:18:49 +00009126 Int alen;
9127 UInt imm8;
sewardj8d965312005-02-25 02:48:47 +00009128 IRTemp addr;
sewardjc4530ae2012-05-21 10:18:49 +00009129 Bool preSwap = False;
sewardj8d965312005-02-25 02:48:47 +00009130 IROp op = Iop_INVALID;
sewardjc4530ae2012-05-21 10:18:49 +00009131 Bool postNot = False;
sewardj8d965312005-02-25 02:48:47 +00009132 IRTemp plain = newTemp(Ity_V128);
9133 UChar rm = getUChar(delta);
9134 UShort mask = 0;
9135 vassert(sz == 4 || sz == 8);
9136 if (epartIsReg(rm)) {
9137 imm8 = getUChar(delta+1);
sewardjc4530ae2012-05-21 10:18:49 +00009138 if (imm8 >= 8) return delta0; /* FAIL */
9139 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz);
9140 if (!ok) return delta0; /* FAIL */
9141 vassert(!preSwap); /* never needed for imm8 < 8 */
sewardj8d965312005-02-25 02:48:47 +00009142 assign( plain, binop(op, getXMMReg(gregOfRexRM(pfx,rm)),
9143 getXMMReg(eregOfRexRM(pfx,rm))) );
9144 delta += 2;
9145 DIP("%s $%d,%s,%s\n", opname,
9146 (Int)imm8,
9147 nameXMMReg(eregOfRexRM(pfx,rm)),
9148 nameXMMReg(gregOfRexRM(pfx,rm)) );
9149 } else {
sewardj2e28ac42008-12-04 00:05:12 +00009150 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj8d965312005-02-25 02:48:47 +00009151 imm8 = getUChar(delta+alen);
sewardjc4530ae2012-05-21 10:18:49 +00009152 if (imm8 >= 8) return delta0; /* FAIL */
9153 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz);
9154 if (!ok) return delta0; /* FAIL */
9155 vassert(!preSwap); /* never needed for imm8 < 8 */
sewardjab9055b2006-01-01 13:17:38 +00009156 assign( plain,
9157 binop(
9158 op,
9159 getXMMReg(gregOfRexRM(pfx,rm)),
sewardjc4530ae2012-05-21 10:18:49 +00009160 all_lanes
9161 ? loadLE(Ity_V128, mkexpr(addr))
9162 : sz == 8
9163 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
9164 : /*sz==4*/
9165 unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr)))
sewardjcc3d2192013-03-27 11:37:33 +00009166 )
sewardjab9055b2006-01-01 13:17:38 +00009167 );
sewardj8d965312005-02-25 02:48:47 +00009168 delta += alen+1;
9169 DIP("%s $%d,%s,%s\n", opname,
9170 (Int)imm8,
9171 dis_buf,
9172 nameXMMReg(gregOfRexRM(pfx,rm)) );
9173 }
9174
sewardjc4530ae2012-05-21 10:18:49 +00009175 if (postNot && all_lanes) {
sewardj9c9ee3d2005-02-26 01:17:42 +00009176 putXMMReg( gregOfRexRM(pfx,rm),
sewardj8d965312005-02-25 02:48:47 +00009177 unop(Iop_NotV128, mkexpr(plain)) );
9178 }
9179 else
sewardjc4530ae2012-05-21 10:18:49 +00009180 if (postNot && !all_lanes) {
sewardj1027dc22005-02-26 01:55:02 +00009181 mask = toUShort(sz==4 ? 0x000F : 0x00FF);
sewardj8d965312005-02-25 02:48:47 +00009182 putXMMReg( gregOfRexRM(pfx,rm),
9183 binop(Iop_XorV128, mkexpr(plain), mkV128(mask)) );
9184 }
9185 else {
9186 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(plain) );
9187 }
9188
9189 return delta;
9190}
9191
9192
sewardjadffcef2005-05-11 00:03:06 +00009193/* Vector by scalar shift of G by the amount specified at the bottom
9194 of E. */
9195
sewardj2e28ac42008-12-04 00:05:12 +00009196static ULong dis_SSE_shiftG_byE ( VexAbiInfo* vbi,
9197 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +00009198 const HChar* opname, IROp op )
sewardjadffcef2005-05-11 00:03:06 +00009199{
9200 HChar dis_buf[50];
9201 Int alen, size;
9202 IRTemp addr;
9203 Bool shl, shr, sar;
9204 UChar rm = getUChar(delta);
9205 IRTemp g0 = newTemp(Ity_V128);
9206 IRTemp g1 = newTemp(Ity_V128);
sewardj4c0a7ac2012-06-21 09:08:19 +00009207 IRTemp amt = newTemp(Ity_I64);
sewardjadffcef2005-05-11 00:03:06 +00009208 IRTemp amt8 = newTemp(Ity_I8);
9209 if (epartIsReg(rm)) {
sewardj4c0a7ac2012-06-21 09:08:19 +00009210 assign( amt, getXMMRegLane64(eregOfRexRM(pfx,rm), 0) );
sewardjadffcef2005-05-11 00:03:06 +00009211 DIP("%s %s,%s\n", opname,
9212 nameXMMReg(eregOfRexRM(pfx,rm)),
9213 nameXMMReg(gregOfRexRM(pfx,rm)) );
9214 delta++;
9215 } else {
sewardj2e28ac42008-12-04 00:05:12 +00009216 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj4c0a7ac2012-06-21 09:08:19 +00009217 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
sewardjadffcef2005-05-11 00:03:06 +00009218 DIP("%s %s,%s\n", opname,
9219 dis_buf,
9220 nameXMMReg(gregOfRexRM(pfx,rm)) );
9221 delta += alen;
9222 }
9223 assign( g0, getXMMReg(gregOfRexRM(pfx,rm)) );
sewardj4c0a7ac2012-06-21 09:08:19 +00009224 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
sewardjadffcef2005-05-11 00:03:06 +00009225
9226 shl = shr = sar = False;
9227 size = 0;
9228 switch (op) {
9229 case Iop_ShlN16x8: shl = True; size = 32; break;
9230 case Iop_ShlN32x4: shl = True; size = 32; break;
9231 case Iop_ShlN64x2: shl = True; size = 64; break;
9232 case Iop_SarN16x8: sar = True; size = 16; break;
9233 case Iop_SarN32x4: sar = True; size = 32; break;
9234 case Iop_ShrN16x8: shr = True; size = 16; break;
9235 case Iop_ShrN32x4: shr = True; size = 32; break;
9236 case Iop_ShrN64x2: shr = True; size = 64; break;
9237 default: vassert(0);
9238 }
9239
9240 if (shl || shr) {
9241 assign(
9242 g1,
florian99dd03e2013-01-29 03:56:06 +00009243 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00009244 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
florian99dd03e2013-01-29 03:56:06 +00009245 binop(op, mkexpr(g0), mkexpr(amt8)),
9246 mkV128(0x0000)
sewardjadffcef2005-05-11 00:03:06 +00009247 )
9248 );
9249 } else
9250 if (sar) {
9251 assign(
9252 g1,
florian99dd03e2013-01-29 03:56:06 +00009253 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +00009254 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
florian99dd03e2013-01-29 03:56:06 +00009255 binop(op, mkexpr(g0), mkexpr(amt8)),
9256 binop(op, mkexpr(g0), mkU8(size-1))
sewardjadffcef2005-05-11 00:03:06 +00009257 )
9258 );
9259 } else {
9260 vassert(0);
9261 }
9262
9263 putXMMReg( gregOfRexRM(pfx,rm), mkexpr(g1) );
9264 return delta;
9265}
sewardj09717342005-05-05 21:34:02 +00009266
9267
9268/* Vector by scalar shift of E by an immediate byte. */
9269
9270static
9271ULong dis_SSE_shiftE_imm ( Prefix pfx,
florian55085f82012-11-21 00:36:55 +00009272 Long delta, const HChar* opname, IROp op )
sewardj09717342005-05-05 21:34:02 +00009273{
9274 Bool shl, shr, sar;
9275 UChar rm = getUChar(delta);
9276 IRTemp e0 = newTemp(Ity_V128);
9277 IRTemp e1 = newTemp(Ity_V128);
9278 UChar amt, size;
9279 vassert(epartIsReg(rm));
9280 vassert(gregLO3ofRM(rm) == 2
9281 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
sewardjca673ab2005-05-11 10:03:08 +00009282 amt = getUChar(delta+1);
sewardj09717342005-05-05 21:34:02 +00009283 delta += 2;
9284 DIP("%s $%d,%s\n", opname,
9285 (Int)amt,
9286 nameXMMReg(eregOfRexRM(pfx,rm)) );
9287 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
9288
9289 shl = shr = sar = False;
9290 size = 0;
9291 switch (op) {
9292 case Iop_ShlN16x8: shl = True; size = 16; break;
9293 case Iop_ShlN32x4: shl = True; size = 32; break;
9294 case Iop_ShlN64x2: shl = True; size = 64; break;
9295 case Iop_SarN16x8: sar = True; size = 16; break;
9296 case Iop_SarN32x4: sar = True; size = 32; break;
9297 case Iop_ShrN16x8: shr = True; size = 16; break;
9298 case Iop_ShrN32x4: shr = True; size = 32; break;
9299 case Iop_ShrN64x2: shr = True; size = 64; break;
9300 default: vassert(0);
9301 }
9302
9303 if (shl || shr) {
9304 assign( e1, amt >= size
9305 ? mkV128(0x0000)
9306 : binop(op, mkexpr(e0), mkU8(amt))
9307 );
9308 } else
9309 if (sar) {
9310 assign( e1, amt >= size
9311 ? binop(op, mkexpr(e0), mkU8(size-1))
9312 : binop(op, mkexpr(e0), mkU8(amt))
9313 );
9314 } else {
9315 vassert(0);
9316 }
9317
9318 putXMMReg( eregOfRexRM(pfx,rm), mkexpr(e1) );
9319 return delta;
9320}
sewardj1a01e652005-02-23 11:39:21 +00009321
9322
9323/* Get the current SSE rounding mode. */
9324
9325static IRExpr* /* :: Ity_I32 */ get_sse_roundingmode ( void )
9326{
9327 return
9328 unop( Iop_64to32,
9329 binop( Iop_And64,
9330 IRExpr_Get( OFFB_SSEROUND, Ity_I64 ),
9331 mkU64(3) ));
9332}
9333
sewardjbcbb9de2005-03-27 02:22:32 +00009334static void put_sse_roundingmode ( IRExpr* sseround )
9335{
sewardjdd40fdf2006-12-24 02:20:24 +00009336 vassert(typeOfIRExpr(irsb->tyenv, sseround) == Ity_I32);
sewardjbcbb9de2005-03-27 02:22:32 +00009337 stmt( IRStmt_Put( OFFB_SSEROUND,
9338 unop(Iop_32Uto64,sseround) ) );
9339}
9340
sewardj4b1cc832012-06-13 11:10:20 +00009341/* Break a V128-bit value up into four 32-bit ints. */
sewardja7ba8c42005-05-10 20:08:34 +00009342
sewardj4b1cc832012-06-13 11:10:20 +00009343static void breakupV128to32s ( IRTemp t128,
9344 /*OUTs*/
9345 IRTemp* t3, IRTemp* t2,
9346 IRTemp* t1, IRTemp* t0 )
sewardja7ba8c42005-05-10 20:08:34 +00009347{
9348 IRTemp hi64 = newTemp(Ity_I64);
9349 IRTemp lo64 = newTemp(Ity_I64);
9350 assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
9351 assign( lo64, unop(Iop_V128to64, mkexpr(t128)) );
9352
9353 vassert(t0 && *t0 == IRTemp_INVALID);
9354 vassert(t1 && *t1 == IRTemp_INVALID);
9355 vassert(t2 && *t2 == IRTemp_INVALID);
9356 vassert(t3 && *t3 == IRTemp_INVALID);
9357
9358 *t0 = newTemp(Ity_I32);
9359 *t1 = newTemp(Ity_I32);
9360 *t2 = newTemp(Ity_I32);
9361 *t3 = newTemp(Ity_I32);
9362 assign( *t0, unop(Iop_64to32, mkexpr(lo64)) );
9363 assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
9364 assign( *t2, unop(Iop_64to32, mkexpr(hi64)) );
9365 assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
9366}
9367
sewardj4b1cc832012-06-13 11:10:20 +00009368/* Construct a V128-bit value from four 32-bit ints. */
sewardja7ba8c42005-05-10 20:08:34 +00009369
sewardj4b1cc832012-06-13 11:10:20 +00009370static IRExpr* mkV128from32s ( IRTemp t3, IRTemp t2,
9371 IRTemp t1, IRTemp t0 )
sewardja7ba8c42005-05-10 20:08:34 +00009372{
9373 return
9374 binop( Iop_64HLtoV128,
9375 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
9376 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0))
9377 );
9378}
9379
9380/* Break a 64-bit value up into four 16-bit ints. */
9381
9382static void breakup64to16s ( IRTemp t64,
9383 /*OUTs*/
9384 IRTemp* t3, IRTemp* t2,
9385 IRTemp* t1, IRTemp* t0 )
9386{
9387 IRTemp hi32 = newTemp(Ity_I32);
9388 IRTemp lo32 = newTemp(Ity_I32);
9389 assign( hi32, unop(Iop_64HIto32, mkexpr(t64)) );
9390 assign( lo32, unop(Iop_64to32, mkexpr(t64)) );
9391
9392 vassert(t0 && *t0 == IRTemp_INVALID);
9393 vassert(t1 && *t1 == IRTemp_INVALID);
9394 vassert(t2 && *t2 == IRTemp_INVALID);
9395 vassert(t3 && *t3 == IRTemp_INVALID);
9396
9397 *t0 = newTemp(Ity_I16);
9398 *t1 = newTemp(Ity_I16);
9399 *t2 = newTemp(Ity_I16);
9400 *t3 = newTemp(Ity_I16);
9401 assign( *t0, unop(Iop_32to16, mkexpr(lo32)) );
9402 assign( *t1, unop(Iop_32HIto16, mkexpr(lo32)) );
9403 assign( *t2, unop(Iop_32to16, mkexpr(hi32)) );
9404 assign( *t3, unop(Iop_32HIto16, mkexpr(hi32)) );
9405}
9406
9407/* Construct a 64-bit value from four 16-bit ints. */
9408
9409static IRExpr* mk64from16s ( IRTemp t3, IRTemp t2,
9410 IRTemp t1, IRTemp t0 )
9411{
9412 return
9413 binop( Iop_32HLto64,
9414 binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)),
9415 binop(Iop_16HLto32, mkexpr(t1), mkexpr(t0))
9416 );
9417}
sewardjdf0e0022005-01-25 15:48:43 +00009418
sewardj4b1cc832012-06-13 11:10:20 +00009419/* Break a V256-bit value up into four 64-bit ints. */
9420
9421static void breakupV256to64s ( IRTemp t256,
9422 /*OUTs*/
9423 IRTemp* t3, IRTemp* t2,
9424 IRTemp* t1, IRTemp* t0 )
9425{
9426 vassert(t0 && *t0 == IRTemp_INVALID);
9427 vassert(t1 && *t1 == IRTemp_INVALID);
9428 vassert(t2 && *t2 == IRTemp_INVALID);
9429 vassert(t3 && *t3 == IRTemp_INVALID);
9430 *t0 = newTemp(Ity_I64);
9431 *t1 = newTemp(Ity_I64);
9432 *t2 = newTemp(Ity_I64);
9433 *t3 = newTemp(Ity_I64);
9434 assign( *t0, unop(Iop_V256to64_0, mkexpr(t256)) );
9435 assign( *t1, unop(Iop_V256to64_1, mkexpr(t256)) );
9436 assign( *t2, unop(Iop_V256to64_2, mkexpr(t256)) );
9437 assign( *t3, unop(Iop_V256to64_3, mkexpr(t256)) );
9438}
9439
sewardjfe0c5e72012-06-15 15:48:07 +00009440/* Break a V256-bit value up into two V128s. */
9441
9442static void breakupV256toV128s ( IRTemp t256,
9443 /*OUTs*/
9444 IRTemp* t1, IRTemp* t0 )
9445{
9446 vassert(t0 && *t0 == IRTemp_INVALID);
9447 vassert(t1 && *t1 == IRTemp_INVALID);
9448 *t0 = newTemp(Ity_V128);
9449 *t1 = newTemp(Ity_V128);
9450 assign(*t1, unop(Iop_V256toV128_1, mkexpr(t256)));
9451 assign(*t0, unop(Iop_V256toV128_0, mkexpr(t256)));
9452}
sewardjdf0e0022005-01-25 15:48:43 +00009453
sewardj4f228902012-06-21 09:17:58 +00009454/* Break a V256-bit value up into eight 32-bit ints. */
9455
9456static void breakupV256to32s ( IRTemp t256,
9457 /*OUTs*/
9458 IRTemp* t7, IRTemp* t6,
9459 IRTemp* t5, IRTemp* t4,
9460 IRTemp* t3, IRTemp* t2,
9461 IRTemp* t1, IRTemp* t0 )
9462{
9463 IRTemp t128_1 = IRTemp_INVALID;
9464 IRTemp t128_0 = IRTemp_INVALID;
9465 breakupV256toV128s( t256, &t128_1, &t128_0 );
9466 breakupV128to32s( t128_1, t7, t6, t5, t4 );
9467 breakupV128to32s( t128_0, t3, t2, t1, t0 );
9468}
9469
sewardjd8bca7e2012-06-20 11:46:19 +00009470/* Break a V128-bit value up into two 64-bit ints. */
9471
9472static void breakupV128to64s ( IRTemp t128,
9473 /*OUTs*/
9474 IRTemp* t1, IRTemp* t0 )
9475{
9476 vassert(t0 && *t0 == IRTemp_INVALID);
9477 vassert(t1 && *t1 == IRTemp_INVALID);
9478 *t0 = newTemp(Ity_I64);
9479 *t1 = newTemp(Ity_I64);
9480 assign( *t0, unop(Iop_V128to64, mkexpr(t128)) );
9481 assign( *t1, unop(Iop_V128HIto64, mkexpr(t128)) );
9482}
9483
sewardj89378162012-06-24 12:12:20 +00009484/* Construct a V256-bit value from eight 32-bit ints. */
9485
9486static IRExpr* mkV256from32s ( IRTemp t7, IRTemp t6,
9487 IRTemp t5, IRTemp t4,
9488 IRTemp t3, IRTemp t2,
9489 IRTemp t1, IRTemp t0 )
9490{
9491 return
9492 binop( Iop_V128HLtoV256,
9493 binop( Iop_64HLtoV128,
9494 binop(Iop_32HLto64, mkexpr(t7), mkexpr(t6)),
9495 binop(Iop_32HLto64, mkexpr(t5), mkexpr(t4)) ),
9496 binop( Iop_64HLtoV128,
9497 binop(Iop_32HLto64, mkexpr(t3), mkexpr(t2)),
9498 binop(Iop_32HLto64, mkexpr(t1), mkexpr(t0)) )
9499 );
9500}
9501
9502/* Construct a V256-bit value from four 64-bit ints. */
9503
9504static IRExpr* mkV256from64s ( IRTemp t3, IRTemp t2,
9505 IRTemp t1, IRTemp t0 )
9506{
9507 return
9508 binop( Iop_V128HLtoV256,
9509 binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)),
9510 binop(Iop_64HLtoV128, mkexpr(t1), mkexpr(t0))
9511 );
9512}
9513
sewardjd166e282008-02-06 11:42:45 +00009514/* Helper for the SSSE3 (not SSE3) PMULHRSW insns. Given two 64-bit
9515 values (aa,bb), computes, for each of the 4 16-bit lanes:
9516
9517 (((aa_lane *s32 bb_lane) >>u 14) + 1) >>u 1
9518*/
9519static IRExpr* dis_PMULHRSW_helper ( IRExpr* aax, IRExpr* bbx )
9520{
9521 IRTemp aa = newTemp(Ity_I64);
9522 IRTemp bb = newTemp(Ity_I64);
9523 IRTemp aahi32s = newTemp(Ity_I64);
9524 IRTemp aalo32s = newTemp(Ity_I64);
9525 IRTemp bbhi32s = newTemp(Ity_I64);
9526 IRTemp bblo32s = newTemp(Ity_I64);
9527 IRTemp rHi = newTemp(Ity_I64);
9528 IRTemp rLo = newTemp(Ity_I64);
9529 IRTemp one32x2 = newTemp(Ity_I64);
9530 assign(aa, aax);
9531 assign(bb, bbx);
9532 assign( aahi32s,
9533 binop(Iop_SarN32x2,
9534 binop(Iop_InterleaveHI16x4, mkexpr(aa), mkexpr(aa)),
9535 mkU8(16) ));
9536 assign( aalo32s,
9537 binop(Iop_SarN32x2,
9538 binop(Iop_InterleaveLO16x4, mkexpr(aa), mkexpr(aa)),
9539 mkU8(16) ));
9540 assign( bbhi32s,
9541 binop(Iop_SarN32x2,
9542 binop(Iop_InterleaveHI16x4, mkexpr(bb), mkexpr(bb)),
9543 mkU8(16) ));
9544 assign( bblo32s,
9545 binop(Iop_SarN32x2,
9546 binop(Iop_InterleaveLO16x4, mkexpr(bb), mkexpr(bb)),
9547 mkU8(16) ));
9548 assign(one32x2, mkU64( (1ULL << 32) + 1 ));
9549 assign(
9550 rHi,
9551 binop(
9552 Iop_ShrN32x2,
9553 binop(
9554 Iop_Add32x2,
9555 binop(
9556 Iop_ShrN32x2,
9557 binop(Iop_Mul32x2, mkexpr(aahi32s), mkexpr(bbhi32s)),
9558 mkU8(14)
9559 ),
9560 mkexpr(one32x2)
9561 ),
9562 mkU8(1)
9563 )
9564 );
9565 assign(
9566 rLo,
9567 binop(
9568 Iop_ShrN32x2,
9569 binop(
9570 Iop_Add32x2,
9571 binop(
9572 Iop_ShrN32x2,
9573 binop(Iop_Mul32x2, mkexpr(aalo32s), mkexpr(bblo32s)),
9574 mkU8(14)
9575 ),
9576 mkexpr(one32x2)
9577 ),
9578 mkU8(1)
9579 )
9580 );
9581 return
9582 binop(Iop_CatEvenLanes16x4, mkexpr(rHi), mkexpr(rLo));
9583}
9584
9585/* Helper for the SSSE3 (not SSE3) PSIGN{B,W,D} insns. Given two 64-bit
9586 values (aa,bb), computes, for each lane:
9587
9588 if aa_lane < 0 then - bb_lane
9589 else if aa_lane > 0 then bb_lane
9590 else 0
9591*/
9592static IRExpr* dis_PSIGN_helper ( IRExpr* aax, IRExpr* bbx, Int laneszB )
9593{
9594 IRTemp aa = newTemp(Ity_I64);
9595 IRTemp bb = newTemp(Ity_I64);
9596 IRTemp zero = newTemp(Ity_I64);
9597 IRTemp bbNeg = newTemp(Ity_I64);
9598 IRTemp negMask = newTemp(Ity_I64);
9599 IRTemp posMask = newTemp(Ity_I64);
9600 IROp opSub = Iop_INVALID;
9601 IROp opCmpGTS = Iop_INVALID;
9602
9603 switch (laneszB) {
9604 case 1: opSub = Iop_Sub8x8; opCmpGTS = Iop_CmpGT8Sx8; break;
9605 case 2: opSub = Iop_Sub16x4; opCmpGTS = Iop_CmpGT16Sx4; break;
9606 case 4: opSub = Iop_Sub32x2; opCmpGTS = Iop_CmpGT32Sx2; break;
9607 default: vassert(0);
9608 }
9609
9610 assign( aa, aax );
9611 assign( bb, bbx );
9612 assign( zero, mkU64(0) );
9613 assign( bbNeg, binop(opSub, mkexpr(zero), mkexpr(bb)) );
9614 assign( negMask, binop(opCmpGTS, mkexpr(zero), mkexpr(aa)) );
9615 assign( posMask, binop(opCmpGTS, mkexpr(aa), mkexpr(zero)) );
9616
9617 return
9618 binop(Iop_Or64,
9619 binop(Iop_And64, mkexpr(bb), mkexpr(posMask)),
9620 binop(Iop_And64, mkexpr(bbNeg), mkexpr(negMask)) );
9621
9622}
9623
sewardj97f72452012-05-23 05:56:53 +00009624
sewardjd166e282008-02-06 11:42:45 +00009625/* Helper for the SSSE3 (not SSE3) PABS{B,W,D} insns. Given a 64-bit
9626 value aa, computes, for each lane
9627
9628 if aa < 0 then -aa else aa
9629
9630 Note that the result is interpreted as unsigned, so that the
9631 absolute value of the most negative signed input can be
9632 represented.
9633*/
sewardj97f72452012-05-23 05:56:53 +00009634static IRTemp math_PABS_MMX ( IRTemp aa, Int laneszB )
sewardjd166e282008-02-06 11:42:45 +00009635{
sewardj97f72452012-05-23 05:56:53 +00009636 IRTemp res = newTemp(Ity_I64);
sewardjd166e282008-02-06 11:42:45 +00009637 IRTemp zero = newTemp(Ity_I64);
9638 IRTemp aaNeg = newTemp(Ity_I64);
9639 IRTemp negMask = newTemp(Ity_I64);
9640 IRTemp posMask = newTemp(Ity_I64);
9641 IROp opSub = Iop_INVALID;
9642 IROp opSarN = Iop_INVALID;
9643
9644 switch (laneszB) {
9645 case 1: opSub = Iop_Sub8x8; opSarN = Iop_SarN8x8; break;
9646 case 2: opSub = Iop_Sub16x4; opSarN = Iop_SarN16x4; break;
9647 case 4: opSub = Iop_Sub32x2; opSarN = Iop_SarN32x2; break;
9648 default: vassert(0);
9649 }
9650
sewardjd166e282008-02-06 11:42:45 +00009651 assign( negMask, binop(opSarN, mkexpr(aa), mkU8(8*laneszB-1)) );
9652 assign( posMask, unop(Iop_Not64, mkexpr(negMask)) );
9653 assign( zero, mkU64(0) );
9654 assign( aaNeg, binop(opSub, mkexpr(zero), mkexpr(aa)) );
sewardj97f72452012-05-23 05:56:53 +00009655 assign( res,
9656 binop(Iop_Or64,
9657 binop(Iop_And64, mkexpr(aa), mkexpr(posMask)),
9658 binop(Iop_And64, mkexpr(aaNeg), mkexpr(negMask)) ));
9659 return res;
sewardjd166e282008-02-06 11:42:45 +00009660}
9661
sewardj97f72452012-05-23 05:56:53 +00009662/* XMM version of math_PABS_MMX. */
9663static IRTemp math_PABS_XMM ( IRTemp aa, Int laneszB )
9664{
9665 IRTemp res = newTemp(Ity_V128);
9666 IRTemp aaHi = newTemp(Ity_I64);
9667 IRTemp aaLo = newTemp(Ity_I64);
9668 assign(aaHi, unop(Iop_V128HIto64, mkexpr(aa)));
9669 assign(aaLo, unop(Iop_V128to64, mkexpr(aa)));
9670 assign(res, binop(Iop_64HLtoV128,
9671 mkexpr(math_PABS_MMX(aaHi, laneszB)),
9672 mkexpr(math_PABS_MMX(aaLo, laneszB))));
9673 return res;
9674}
9675
9676/* Specialisations of math_PABS_XMM, since there's no easy way to do
9677 partial applications in C :-( */
9678static IRTemp math_PABS_XMM_pap4 ( IRTemp aa ) {
9679 return math_PABS_XMM(aa, 4);
9680}
9681
sewardj8516a1f2012-06-24 14:26:30 +00009682static IRTemp math_PABS_XMM_pap2 ( IRTemp aa ) {
9683 return math_PABS_XMM(aa, 2);
9684}
9685
9686static IRTemp math_PABS_XMM_pap1 ( IRTemp aa ) {
9687 return math_PABS_XMM(aa, 1);
9688}
sewardj97f72452012-05-23 05:56:53 +00009689
sewardjcc3d2192013-03-27 11:37:33 +00009690/* YMM version of math_PABS_XMM. */
9691static IRTemp math_PABS_YMM ( IRTemp aa, Int laneszB )
9692{
9693 IRTemp res = newTemp(Ity_V256);
9694 IRTemp aaHi = IRTemp_INVALID;
9695 IRTemp aaLo = IRTemp_INVALID;
9696 breakupV256toV128s(aa, &aaHi, &aaLo);
9697 assign(res, binop(Iop_V128HLtoV256,
9698 mkexpr(math_PABS_XMM(aaHi, laneszB)),
9699 mkexpr(math_PABS_XMM(aaLo, laneszB))));
9700 return res;
9701}
9702
9703static IRTemp math_PABS_YMM_pap4 ( IRTemp aa ) {
9704 return math_PABS_YMM(aa, 4);
9705}
9706
9707static IRTemp math_PABS_YMM_pap2 ( IRTemp aa ) {
9708 return math_PABS_YMM(aa, 2);
9709}
9710
9711static IRTemp math_PABS_YMM_pap1 ( IRTemp aa ) {
9712 return math_PABS_YMM(aa, 1);
9713}
9714
sewardjd166e282008-02-06 11:42:45 +00009715static IRExpr* dis_PALIGNR_XMM_helper ( IRTemp hi64,
9716 IRTemp lo64, Long byteShift )
9717{
9718 vassert(byteShift >= 1 && byteShift <= 7);
9719 return
9720 binop(Iop_Or64,
9721 binop(Iop_Shl64, mkexpr(hi64), mkU8(8*(8-byteShift))),
9722 binop(Iop_Shr64, mkexpr(lo64), mkU8(8*byteShift))
9723 );
9724}
9725
sewardj151cd3e2012-06-18 13:56:55 +00009726static IRTemp math_PALIGNR_XMM ( IRTemp sV, IRTemp dV, UInt imm8 )
9727{
9728 IRTemp res = newTemp(Ity_V128);
9729 IRTemp sHi = newTemp(Ity_I64);
9730 IRTemp sLo = newTemp(Ity_I64);
9731 IRTemp dHi = newTemp(Ity_I64);
9732 IRTemp dLo = newTemp(Ity_I64);
9733 IRTemp rHi = newTemp(Ity_I64);
9734 IRTemp rLo = newTemp(Ity_I64);
9735
9736 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
9737 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
9738 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
9739 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
9740
9741 if (imm8 == 0) {
9742 assign( rHi, mkexpr(sHi) );
9743 assign( rLo, mkexpr(sLo) );
9744 }
9745 else if (imm8 >= 1 && imm8 <= 7) {
9746 assign( rHi, dis_PALIGNR_XMM_helper(dLo, sHi, imm8) );
9747 assign( rLo, dis_PALIGNR_XMM_helper(sHi, sLo, imm8) );
9748 }
9749 else if (imm8 == 8) {
9750 assign( rHi, mkexpr(dLo) );
9751 assign( rLo, mkexpr(sHi) );
9752 }
9753 else if (imm8 >= 9 && imm8 <= 15) {
9754 assign( rHi, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-8) );
9755 assign( rLo, dis_PALIGNR_XMM_helper(dLo, sHi, imm8-8) );
9756 }
9757 else if (imm8 == 16) {
9758 assign( rHi, mkexpr(dHi) );
9759 assign( rLo, mkexpr(dLo) );
9760 }
9761 else if (imm8 >= 17 && imm8 <= 23) {
9762 assign( rHi, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-16))) );
9763 assign( rLo, dis_PALIGNR_XMM_helper(dHi, dLo, imm8-16) );
9764 }
9765 else if (imm8 == 24) {
9766 assign( rHi, mkU64(0) );
9767 assign( rLo, mkexpr(dHi) );
9768 }
9769 else if (imm8 >= 25 && imm8 <= 31) {
9770 assign( rHi, mkU64(0) );
9771 assign( rLo, binop(Iop_Shr64, mkexpr(dHi), mkU8(8*(imm8-24))) );
9772 }
9773 else if (imm8 >= 32 && imm8 <= 255) {
9774 assign( rHi, mkU64(0) );
9775 assign( rLo, mkU64(0) );
9776 }
9777 else
9778 vassert(0);
9779
9780 assign( res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)));
9781 return res;
9782}
9783
9784
sewardj150c9cd2008-02-09 01:16:02 +00009785/* Generate a SIGSEGV followed by a restart of the current instruction
9786 if effective_addr is not 16-aligned. This is required behaviour
9787 for some SSE3 instructions and all 128-bit SSSE3 instructions.
9788 This assumes that guest_RIP_curr_instr is set correctly! */
sewardjc4530ae2012-05-21 10:18:49 +00009789static
9790void gen_SEGV_if_not_XX_aligned ( IRTemp effective_addr, ULong mask )
sewardj150c9cd2008-02-09 01:16:02 +00009791{
9792 stmt(
9793 IRStmt_Exit(
9794 binop(Iop_CmpNE64,
sewardjc4530ae2012-05-21 10:18:49 +00009795 binop(Iop_And64,mkexpr(effective_addr),mkU64(mask)),
sewardj150c9cd2008-02-09 01:16:02 +00009796 mkU64(0)),
9797 Ijk_SigSEGV,
sewardjc6f970f2012-04-02 21:54:49 +00009798 IRConst_U64(guest_RIP_curr_instr),
9799 OFFB_RIP
sewardj150c9cd2008-02-09 01:16:02 +00009800 )
9801 );
9802}
9803
sewardjc4530ae2012-05-21 10:18:49 +00009804static void gen_SEGV_if_not_16_aligned ( IRTemp effective_addr ) {
9805 gen_SEGV_if_not_XX_aligned(effective_addr, 16-1);
9806}
9807
9808static void gen_SEGV_if_not_32_aligned ( IRTemp effective_addr ) {
9809 gen_SEGV_if_not_XX_aligned(effective_addr, 32-1);
9810}
sewardjd166e282008-02-06 11:42:45 +00009811
sewardjc4356f02007-11-09 21:15:04 +00009812/* Helper for deciding whether a given insn (starting at the opcode
9813 byte) may validly be used with a LOCK prefix. The following insns
9814 may be used with LOCK when their destination operand is in memory.
sewardje9d8a262009-07-01 08:06:34 +00009815 AFAICS this is exactly the same for both 32-bit and 64-bit mode.
sewardjc4356f02007-11-09 21:15:04 +00009816
sewardje9d8a262009-07-01 08:06:34 +00009817 ADD 80 /0, 81 /0, 82 /0, 83 /0, 00, 01
9818 OR 80 /1, 81 /1, 82 /x, 83 /1, 08, 09
9819 ADC 80 /2, 81 /2, 82 /2, 83 /2, 10, 11
9820 SBB 81 /3, 81 /3, 82 /x, 83 /3, 18, 19
9821 AND 80 /4, 81 /4, 82 /x, 83 /4, 20, 21
9822 SUB 80 /5, 81 /5, 82 /x, 83 /5, 28, 29
9823 XOR 80 /6, 81 /6, 82 /x, 83 /6, 30, 31
sewardjc4356f02007-11-09 21:15:04 +00009824
9825 DEC FE /1, FF /1
9826 INC FE /0, FF /0
9827
9828 NEG F6 /3, F7 /3
9829 NOT F6 /2, F7 /2
9830
sewardje9d8a262009-07-01 08:06:34 +00009831 XCHG 86, 87
sewardjc4356f02007-11-09 21:15:04 +00009832
9833 BTC 0F BB, 0F BA /7
9834 BTR 0F B3, 0F BA /6
9835 BTS 0F AB, 0F BA /5
9836
9837 CMPXCHG 0F B0, 0F B1
9838 CMPXCHG8B 0F C7 /1
9839
9840 XADD 0F C0, 0F C1
sewardje9d8a262009-07-01 08:06:34 +00009841
9842 ------------------------------
9843
9844 80 /0 = addb $imm8, rm8
9845 81 /0 = addl $imm32, rm32 and addw $imm16, rm16
9846 82 /0 = addb $imm8, rm8
9847 83 /0 = addl $simm8, rm32 and addw $simm8, rm16
9848
9849 00 = addb r8, rm8
9850 01 = addl r32, rm32 and addw r16, rm16
9851
9852 Same for ADD OR ADC SBB AND SUB XOR
9853
9854 FE /1 = dec rm8
9855 FF /1 = dec rm32 and dec rm16
9856
9857 FE /0 = inc rm8
9858 FF /0 = inc rm32 and inc rm16
9859
9860 F6 /3 = neg rm8
9861 F7 /3 = neg rm32 and neg rm16
9862
9863 F6 /2 = not rm8
9864 F7 /2 = not rm32 and not rm16
9865
9866 0F BB = btcw r16, rm16 and btcl r32, rm32
9867 OF BA /7 = btcw $imm8, rm16 and btcw $imm8, rm32
9868
9869 Same for BTS, BTR
sewardjc4356f02007-11-09 21:15:04 +00009870*/
florian8462d112014-09-24 15:18:09 +00009871static Bool can_be_used_with_LOCK_prefix ( const UChar* opc )
sewardjc4356f02007-11-09 21:15:04 +00009872{
9873 switch (opc[0]) {
sewardje9d8a262009-07-01 08:06:34 +00009874 case 0x00: case 0x01: case 0x08: case 0x09:
9875 case 0x10: case 0x11: case 0x18: case 0x19:
9876 case 0x20: case 0x21: case 0x28: case 0x29:
9877 case 0x30: case 0x31:
9878 if (!epartIsReg(opc[1]))
9879 return True;
9880 break;
sewardjc4356f02007-11-09 21:15:04 +00009881
sewardje9d8a262009-07-01 08:06:34 +00009882 case 0x80: case 0x81: case 0x82: case 0x83:
9883 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 6
9884 && !epartIsReg(opc[1]))
sewardjc4356f02007-11-09 21:15:04 +00009885 return True;
9886 break;
9887
9888 case 0xFE: case 0xFF:
sewardje9d8a262009-07-01 08:06:34 +00009889 if (gregLO3ofRM(opc[1]) >= 0 && gregLO3ofRM(opc[1]) <= 1
9890 && !epartIsReg(opc[1]))
sewardjc4356f02007-11-09 21:15:04 +00009891 return True;
9892 break;
9893
9894 case 0xF6: case 0xF7:
sewardje9d8a262009-07-01 08:06:34 +00009895 if (gregLO3ofRM(opc[1]) >= 2 && gregLO3ofRM(opc[1]) <= 3
9896 && !epartIsReg(opc[1]))
sewardjc4356f02007-11-09 21:15:04 +00009897 return True;
9898 break;
9899
9900 case 0x86: case 0x87:
sewardje9d8a262009-07-01 08:06:34 +00009901 if (!epartIsReg(opc[1]))
9902 return True;
9903 break;
sewardjc4356f02007-11-09 21:15:04 +00009904
9905 case 0x0F: {
9906 switch (opc[1]) {
9907 case 0xBB: case 0xB3: case 0xAB:
sewardje9d8a262009-07-01 08:06:34 +00009908 if (!epartIsReg(opc[2]))
9909 return True;
9910 break;
sewardjc4356f02007-11-09 21:15:04 +00009911 case 0xBA:
sewardje9d8a262009-07-01 08:06:34 +00009912 if (gregLO3ofRM(opc[2]) >= 5 && gregLO3ofRM(opc[2]) <= 7
9913 && !epartIsReg(opc[2]))
sewardjc4356f02007-11-09 21:15:04 +00009914 return True;
9915 break;
9916 case 0xB0: case 0xB1:
sewardje9d8a262009-07-01 08:06:34 +00009917 if (!epartIsReg(opc[2]))
9918 return True;
9919 break;
sewardjc4356f02007-11-09 21:15:04 +00009920 case 0xC7:
sewardje9d8a262009-07-01 08:06:34 +00009921 if (gregLO3ofRM(opc[2]) == 1 && !epartIsReg(opc[2]) )
sewardjc4356f02007-11-09 21:15:04 +00009922 return True;
9923 break;
9924 case 0xC0: case 0xC1:
sewardje9d8a262009-07-01 08:06:34 +00009925 if (!epartIsReg(opc[2]))
9926 return True;
9927 break;
sewardjc4356f02007-11-09 21:15:04 +00009928 default:
9929 break;
9930 } /* switch (opc[1]) */
9931 break;
9932 }
9933
9934 default:
9935 break;
9936 } /* switch (opc[0]) */
9937
9938 return False;
9939}
9940
9941
sewardjdf0e0022005-01-25 15:48:43 +00009942/*------------------------------------------------------------*/
sewardj80611e32012-01-20 13:07:24 +00009943/*--- ---*/
9944/*--- Top-level SSE/SSE2: dis_ESC_0F__SSE2 ---*/
9945/*--- ---*/
9946/*------------------------------------------------------------*/
9947
sewardjc4530ae2012-05-21 10:18:49 +00009948static Long dis_COMISD ( VexAbiInfo* vbi, Prefix pfx,
9949 Long delta, Bool isAvx, UChar opc )
9950{
9951 vassert(opc == 0x2F/*COMISD*/ || opc == 0x2E/*UCOMISD*/);
9952 Int alen = 0;
9953 HChar dis_buf[50];
9954 IRTemp argL = newTemp(Ity_F64);
9955 IRTemp argR = newTemp(Ity_F64);
9956 UChar modrm = getUChar(delta);
9957 IRTemp addr = IRTemp_INVALID;
9958 if (epartIsReg(modrm)) {
9959 assign( argR, getXMMRegLane64F( eregOfRexRM(pfx,modrm),
9960 0/*lowest lane*/ ) );
9961 delta += 1;
9962 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "",
9963 opc==0x2E ? "u" : "",
9964 nameXMMReg(eregOfRexRM(pfx,modrm)),
9965 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9966 } else {
9967 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
9968 assign( argR, loadLE(Ity_F64, mkexpr(addr)) );
9969 delta += alen;
9970 DIP("%s%scomisd %s,%s\n", isAvx ? "v" : "",
9971 opc==0x2E ? "u" : "",
9972 dis_buf,
9973 nameXMMReg(gregOfRexRM(pfx,modrm)) );
9974 }
9975 assign( argL, getXMMRegLane64F( gregOfRexRM(pfx,modrm),
9976 0/*lowest lane*/ ) );
9977
9978 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
9979 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
9980 stmt( IRStmt_Put(
9981 OFFB_CC_DEP1,
9982 binop( Iop_And64,
9983 unop( Iop_32Uto64,
9984 binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)) ),
9985 mkU64(0x45)
9986 )));
9987 return delta;
9988}
9989
9990
9991static Long dis_COMISS ( VexAbiInfo* vbi, Prefix pfx,
9992 Long delta, Bool isAvx, UChar opc )
9993{
9994 vassert(opc == 0x2F/*COMISS*/ || opc == 0x2E/*UCOMISS*/);
9995 Int alen = 0;
9996 HChar dis_buf[50];
9997 IRTemp argL = newTemp(Ity_F32);
9998 IRTemp argR = newTemp(Ity_F32);
9999 UChar modrm = getUChar(delta);
10000 IRTemp addr = IRTemp_INVALID;
10001 if (epartIsReg(modrm)) {
10002 assign( argR, getXMMRegLane32F( eregOfRexRM(pfx,modrm),
10003 0/*lowest lane*/ ) );
10004 delta += 1;
10005 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "",
10006 opc==0x2E ? "u" : "",
10007 nameXMMReg(eregOfRexRM(pfx,modrm)),
10008 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10009 } else {
10010 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10011 assign( argR, loadLE(Ity_F32, mkexpr(addr)) );
10012 delta += alen;
10013 DIP("%s%scomiss %s,%s\n", isAvx ? "v" : "",
10014 opc==0x2E ? "u" : "",
10015 dis_buf,
10016 nameXMMReg(gregOfRexRM(pfx,modrm)) );
10017 }
10018 assign( argL, getXMMRegLane32F( gregOfRexRM(pfx,modrm),
10019 0/*lowest lane*/ ) );
10020
10021 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
10022 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
10023 stmt( IRStmt_Put(
10024 OFFB_CC_DEP1,
10025 binop( Iop_And64,
10026 unop( Iop_32Uto64,
10027 binop(Iop_CmpF64,
10028 unop(Iop_F32toF64,mkexpr(argL)),
10029 unop(Iop_F32toF64,mkexpr(argR)))),
10030 mkU64(0x45)
10031 )));
10032 return delta;
10033}
10034
10035
10036static Long dis_PSHUFD_32x4 ( VexAbiInfo* vbi, Prefix pfx,
10037 Long delta, Bool writesYmm )
10038{
10039 Int order;
10040 Int alen = 0;
10041 HChar dis_buf[50];
10042 IRTemp sV = newTemp(Ity_V128);
10043 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000010044 const HChar* strV = writesYmm ? "v" : "";
sewardjc4530ae2012-05-21 10:18:49 +000010045 IRTemp addr = IRTemp_INVALID;
10046 if (epartIsReg(modrm)) {
10047 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
10048 order = (Int)getUChar(delta+1);
10049 delta += 1+1;
10050 DIP("%spshufd $%d,%s,%s\n", strV, order,
10051 nameXMMReg(eregOfRexRM(pfx,modrm)),
10052 nameXMMReg(gregOfRexRM(pfx,modrm)));
10053 } else {
10054 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
10055 1/*byte after the amode*/ );
10056 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
10057 order = (Int)getUChar(delta+alen);
10058 delta += alen+1;
10059 DIP("%spshufd $%d,%s,%s\n", strV, order,
10060 dis_buf,
10061 nameXMMReg(gregOfRexRM(pfx,modrm)));
10062 }
10063
10064 IRTemp s3, s2, s1, s0;
10065 s3 = s2 = s1 = s0 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +000010066 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
sewardjc4530ae2012-05-21 10:18:49 +000010067
10068# define SEL(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10069 IRTemp dV = newTemp(Ity_V128);
10070 assign(dV,
sewardj4b1cc832012-06-13 11:10:20 +000010071 mkV128from32s( SEL((order>>6)&3), SEL((order>>4)&3),
10072 SEL((order>>2)&3), SEL((order>>0)&3) )
sewardjc4530ae2012-05-21 10:18:49 +000010073 );
10074# undef SEL
10075
10076 (writesYmm ? putYMMRegLoAndZU : putXMMReg)
10077 (gregOfRexRM(pfx,modrm), mkexpr(dV));
10078 return delta;
10079}
10080
10081
sewardjcc3d2192013-03-27 11:37:33 +000010082static Long dis_PSHUFD_32x8 ( VexAbiInfo* vbi, Prefix pfx, Long delta )
10083{
10084 Int order;
10085 Int alen = 0;
10086 HChar dis_buf[50];
10087 IRTemp sV = newTemp(Ity_V256);
10088 UChar modrm = getUChar(delta);
10089 IRTemp addr = IRTemp_INVALID;
10090 UInt rG = gregOfRexRM(pfx,modrm);
10091 if (epartIsReg(modrm)) {
10092 UInt rE = eregOfRexRM(pfx,modrm);
10093 assign( sV, getYMMReg(rE) );
10094 order = (Int)getUChar(delta+1);
10095 delta += 1+1;
10096 DIP("vpshufd $%d,%s,%s\n", order, nameYMMReg(rE), nameYMMReg(rG));
10097 } else {
10098 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
10099 1/*byte after the amode*/ );
10100 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
10101 order = (Int)getUChar(delta+alen);
10102 delta += alen+1;
10103 DIP("vpshufd $%d,%s,%s\n", order, dis_buf, nameYMMReg(rG));
10104 }
10105
10106 IRTemp s[8];
10107 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
10108 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4],
10109 &s[3], &s[2], &s[1], &s[0] );
10110
10111 putYMMReg( rG, mkV256from32s( s[4 + ((order>>6)&3)],
10112 s[4 + ((order>>4)&3)],
10113 s[4 + ((order>>2)&3)],
10114 s[4 + ((order>>0)&3)],
10115 s[0 + ((order>>6)&3)],
10116 s[0 + ((order>>4)&3)],
10117 s[0 + ((order>>2)&3)],
10118 s[0 + ((order>>0)&3)] ) );
10119 return delta;
10120}
10121
10122
sewardjc4530ae2012-05-21 10:18:49 +000010123static IRTemp math_PSRLDQ ( IRTemp sV, Int imm )
10124{
10125 IRTemp dV = newTemp(Ity_V128);
10126 IRTemp hi64 = newTemp(Ity_I64);
10127 IRTemp lo64 = newTemp(Ity_I64);
10128 IRTemp hi64r = newTemp(Ity_I64);
10129 IRTemp lo64r = newTemp(Ity_I64);
10130
10131 vassert(imm >= 0 && imm <= 255);
10132 if (imm >= 16) {
10133 assign(dV, mkV128(0x0000));
10134 return dV;
10135 }
10136
10137 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
10138 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
10139
10140 if (imm == 0) {
10141 assign( lo64r, mkexpr(lo64) );
10142 assign( hi64r, mkexpr(hi64) );
10143 }
10144 else
10145 if (imm == 8) {
10146 assign( hi64r, mkU64(0) );
10147 assign( lo64r, mkexpr(hi64) );
10148 }
10149 else
10150 if (imm > 8) {
10151 assign( hi64r, mkU64(0) );
sewardj251b59e2012-05-25 13:51:07 +000010152 assign( lo64r, binop( Iop_Shr64, mkexpr(hi64), mkU8( 8*(imm-8) ) ));
sewardjc4530ae2012-05-21 10:18:49 +000010153 } else {
sewardj251b59e2012-05-25 13:51:07 +000010154 assign( hi64r, binop( Iop_Shr64, mkexpr(hi64), mkU8(8 * imm) ));
sewardjc4530ae2012-05-21 10:18:49 +000010155 assign( lo64r,
10156 binop( Iop_Or64,
10157 binop(Iop_Shr64, mkexpr(lo64),
10158 mkU8(8 * imm)),
10159 binop(Iop_Shl64, mkexpr(hi64),
10160 mkU8(8 * (8 - imm)) )
10161 )
10162 );
10163 }
10164
10165 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
10166 return dV;
10167}
10168
10169
sewardj251b59e2012-05-25 13:51:07 +000010170static IRTemp math_PSLLDQ ( IRTemp sV, Int imm )
10171{
10172 IRTemp dV = newTemp(Ity_V128);
10173 IRTemp hi64 = newTemp(Ity_I64);
10174 IRTemp lo64 = newTemp(Ity_I64);
10175 IRTemp hi64r = newTemp(Ity_I64);
10176 IRTemp lo64r = newTemp(Ity_I64);
10177
10178 vassert(imm >= 0 && imm <= 255);
10179 if (imm >= 16) {
10180 assign(dV, mkV128(0x0000));
10181 return dV;
10182 }
10183
10184 assign( hi64, unop(Iop_V128HIto64, mkexpr(sV)) );
10185 assign( lo64, unop(Iop_V128to64, mkexpr(sV)) );
10186
10187 if (imm == 0) {
10188 assign( lo64r, mkexpr(lo64) );
10189 assign( hi64r, mkexpr(hi64) );
10190 }
10191 else
10192 if (imm == 8) {
10193 assign( lo64r, mkU64(0) );
10194 assign( hi64r, mkexpr(lo64) );
10195 }
10196 else
10197 if (imm > 8) {
10198 assign( lo64r, mkU64(0) );
10199 assign( hi64r, binop( Iop_Shl64, mkexpr(lo64), mkU8( 8*(imm-8) ) ));
10200 } else {
10201 assign( lo64r, binop( Iop_Shl64, mkexpr(lo64), mkU8(8 * imm) ));
10202 assign( hi64r,
10203 binop( Iop_Or64,
10204 binop(Iop_Shl64, mkexpr(hi64),
10205 mkU8(8 * imm)),
10206 binop(Iop_Shr64, mkexpr(lo64),
10207 mkU8(8 * (8 - imm)) )
10208 )
10209 );
10210 }
10211
10212 assign( dV, binop(Iop_64HLtoV128, mkexpr(hi64r), mkexpr(lo64r)) );
10213 return dV;
10214}
10215
10216
sewardjc4530ae2012-05-21 10:18:49 +000010217static Long dis_CVTxSD2SI ( VexAbiInfo* vbi, Prefix pfx,
sewardj80804d12012-05-22 10:48:13 +000010218 Long delta, Bool isAvx, UChar opc, Int sz )
sewardjc4530ae2012-05-21 10:18:49 +000010219{
10220 vassert(opc == 0x2D/*CVTSD2SI*/ || opc == 0x2C/*CVTTSD2SI*/);
sewardjc4530ae2012-05-21 10:18:49 +000010221 HChar dis_buf[50];
sewardj80804d12012-05-22 10:48:13 +000010222 Int alen = 0;
10223 UChar modrm = getUChar(delta);
10224 IRTemp addr = IRTemp_INVALID;
sewardjc4530ae2012-05-21 10:18:49 +000010225 IRTemp rmode = newTemp(Ity_I32);
10226 IRTemp f64lo = newTemp(Ity_F64);
10227 Bool r2zero = toBool(opc == 0x2C);
10228
sewardjc4530ae2012-05-21 10:18:49 +000010229 if (epartIsReg(modrm)) {
10230 delta += 1;
10231 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
10232 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10233 nameXMMReg(eregOfRexRM(pfx,modrm)),
10234 nameIReg(sz, gregOfRexRM(pfx,modrm),
10235 False));
10236 } else {
10237 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10238 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
10239 delta += alen;
10240 DIP("%scvt%ssd2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10241 dis_buf,
10242 nameIReg(sz, gregOfRexRM(pfx,modrm),
10243 False));
10244 }
10245
10246 if (r2zero) {
10247 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10248 } else {
10249 assign( rmode, get_sse_roundingmode() );
10250 }
10251
10252 if (sz == 4) {
10253 putIReg32( gregOfRexRM(pfx,modrm),
10254 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo)) );
10255 } else {
sewardj80804d12012-05-22 10:48:13 +000010256 vassert(sz == 8);
sewardjc4530ae2012-05-21 10:18:49 +000010257 putIReg64( gregOfRexRM(pfx,modrm),
10258 binop( Iop_F64toI64S, mkexpr(rmode), mkexpr(f64lo)) );
10259 }
10260
10261 return delta;
10262}
10263
10264
sewardj80804d12012-05-22 10:48:13 +000010265static Long dis_CVTxSS2SI ( VexAbiInfo* vbi, Prefix pfx,
10266 Long delta, Bool isAvx, UChar opc, Int sz )
10267{
10268 vassert(opc == 0x2D/*CVTSS2SI*/ || opc == 0x2C/*CVTTSS2SI*/);
10269 HChar dis_buf[50];
10270 Int alen = 0;
10271 UChar modrm = getUChar(delta);
10272 IRTemp addr = IRTemp_INVALID;
10273 IRTemp rmode = newTemp(Ity_I32);
10274 IRTemp f32lo = newTemp(Ity_F32);
10275 Bool r2zero = toBool(opc == 0x2C);
10276
10277 if (epartIsReg(modrm)) {
10278 delta += 1;
10279 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
10280 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10281 nameXMMReg(eregOfRexRM(pfx,modrm)),
10282 nameIReg(sz, gregOfRexRM(pfx,modrm),
10283 False));
10284 } else {
10285 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10286 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
10287 delta += alen;
10288 DIP("%scvt%sss2si %s,%s\n", isAvx ? "v" : "", r2zero ? "t" : "",
10289 dis_buf,
10290 nameIReg(sz, gregOfRexRM(pfx,modrm),
10291 False));
10292 }
10293
10294 if (r2zero) {
10295 assign( rmode, mkU32((UInt)Irrm_ZERO) );
10296 } else {
10297 assign( rmode, get_sse_roundingmode() );
10298 }
10299
10300 if (sz == 4) {
10301 putIReg32( gregOfRexRM(pfx,modrm),
10302 binop( Iop_F64toI32S,
10303 mkexpr(rmode),
10304 unop(Iop_F32toF64, mkexpr(f32lo))) );
10305 } else {
10306 vassert(sz == 8);
10307 putIReg64( gregOfRexRM(pfx,modrm),
10308 binop( Iop_F64toI64S,
10309 mkexpr(rmode),
10310 unop(Iop_F32toF64, mkexpr(f32lo))) );
10311 }
10312
10313 return delta;
10314}
10315
10316
sewardj66becf32012-06-18 23:15:16 +000010317static Long dis_CVTPS2PD_128 ( VexAbiInfo* vbi, Prefix pfx,
10318 Long delta, Bool isAvx )
sewardjc4530ae2012-05-21 10:18:49 +000010319{
10320 IRTemp addr = IRTemp_INVALID;
10321 Int alen = 0;
10322 HChar dis_buf[50];
10323 IRTemp f32lo = newTemp(Ity_F32);
10324 IRTemp f32hi = newTemp(Ity_F32);
10325 UChar modrm = getUChar(delta);
10326 UInt rG = gregOfRexRM(pfx,modrm);
10327 if (epartIsReg(modrm)) {
10328 UInt rE = eregOfRexRM(pfx,modrm);
10329 assign( f32lo, getXMMRegLane32F(rE, 0) );
10330 assign( f32hi, getXMMRegLane32F(rE, 1) );
10331 delta += 1;
10332 DIP("%scvtps2pd %s,%s\n",
10333 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
10334 } else {
10335 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10336 assign( f32lo, loadLE(Ity_F32, mkexpr(addr)) );
10337 assign( f32hi, loadLE(Ity_F32,
10338 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
10339 delta += alen;
10340 DIP("%scvtps2pd %s,%s\n",
10341 isAvx ? "v" : "", dis_buf, nameXMMReg(rG));
10342 }
10343
10344 putXMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32hi)) );
10345 putXMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32lo)) );
10346 if (isAvx)
10347 putYMMRegLane128( rG, 1, mkV128(0));
10348 return delta;
10349}
10350
10351
sewardj66becf32012-06-18 23:15:16 +000010352static Long dis_CVTPS2PD_256 ( VexAbiInfo* vbi, Prefix pfx,
10353 Long delta )
10354{
10355 IRTemp addr = IRTemp_INVALID;
10356 Int alen = 0;
10357 HChar dis_buf[50];
10358 IRTemp f32_0 = newTemp(Ity_F32);
10359 IRTemp f32_1 = newTemp(Ity_F32);
10360 IRTemp f32_2 = newTemp(Ity_F32);
10361 IRTemp f32_3 = newTemp(Ity_F32);
10362 UChar modrm = getUChar(delta);
10363 UInt rG = gregOfRexRM(pfx,modrm);
10364 if (epartIsReg(modrm)) {
10365 UInt rE = eregOfRexRM(pfx,modrm);
10366 assign( f32_0, getXMMRegLane32F(rE, 0) );
10367 assign( f32_1, getXMMRegLane32F(rE, 1) );
10368 assign( f32_2, getXMMRegLane32F(rE, 2) );
10369 assign( f32_3, getXMMRegLane32F(rE, 3) );
10370 delta += 1;
10371 DIP("vcvtps2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
10372 } else {
10373 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10374 assign( f32_0, loadLE(Ity_F32, mkexpr(addr)) );
10375 assign( f32_1, loadLE(Ity_F32,
10376 binop(Iop_Add64,mkexpr(addr),mkU64(4))) );
10377 assign( f32_2, loadLE(Ity_F32,
10378 binop(Iop_Add64,mkexpr(addr),mkU64(8))) );
10379 assign( f32_3, loadLE(Ity_F32,
10380 binop(Iop_Add64,mkexpr(addr),mkU64(12))) );
10381 delta += alen;
10382 DIP("vcvtps2pd %s,%s\n", dis_buf, nameYMMReg(rG));
10383 }
10384
10385 putYMMRegLane64F( rG, 3, unop(Iop_F32toF64, mkexpr(f32_3)) );
10386 putYMMRegLane64F( rG, 2, unop(Iop_F32toF64, mkexpr(f32_2)) );
10387 putYMMRegLane64F( rG, 1, unop(Iop_F32toF64, mkexpr(f32_1)) );
10388 putYMMRegLane64F( rG, 0, unop(Iop_F32toF64, mkexpr(f32_0)) );
10389 return delta;
10390}
10391
10392
sewardj6fcd43e2012-06-14 08:51:35 +000010393static Long dis_CVTPD2PS_128 ( VexAbiInfo* vbi, Prefix pfx,
10394 Long delta, Bool isAvx )
sewardjc4530ae2012-05-21 10:18:49 +000010395{
10396 IRTemp addr = IRTemp_INVALID;
10397 Int alen = 0;
10398 HChar dis_buf[50];
10399 UChar modrm = getUChar(delta);
10400 UInt rG = gregOfRexRM(pfx,modrm);
10401 IRTemp argV = newTemp(Ity_V128);
10402 IRTemp rmode = newTemp(Ity_I32);
10403 if (epartIsReg(modrm)) {
10404 UInt rE = eregOfRexRM(pfx,modrm);
10405 assign( argV, getXMMReg(rE) );
10406 delta += 1;
10407 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "",
10408 nameXMMReg(rE), nameXMMReg(rG));
10409 } else {
10410 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10411 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10412 delta += alen;
10413 DIP("%scvtpd2ps %s,%s\n", isAvx ? "v" : "",
10414 dis_buf, nameXMMReg(rG) );
10415 }
10416
10417 assign( rmode, get_sse_roundingmode() );
10418 IRTemp t0 = newTemp(Ity_F64);
10419 IRTemp t1 = newTemp(Ity_F64);
10420 assign( t0, unop(Iop_ReinterpI64asF64,
10421 unop(Iop_V128to64, mkexpr(argV))) );
10422 assign( t1, unop(Iop_ReinterpI64asF64,
10423 unop(Iop_V128HIto64, mkexpr(argV))) );
10424
10425# define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), mkexpr(_t) )
10426 putXMMRegLane32( rG, 3, mkU32(0) );
10427 putXMMRegLane32( rG, 2, mkU32(0) );
10428 putXMMRegLane32F( rG, 1, CVT(t1) );
10429 putXMMRegLane32F( rG, 0, CVT(t0) );
10430# undef CVT
10431 if (isAvx)
10432 putYMMRegLane128( rG, 1, mkV128(0) );
10433
10434 return delta;
10435}
10436
10437
sewardj66becf32012-06-18 23:15:16 +000010438static Long dis_CVTxPS2DQ_128 ( VexAbiInfo* vbi, Prefix pfx,
10439 Long delta, Bool isAvx, Bool r2zero )
sewardj251b59e2012-05-25 13:51:07 +000010440{
10441 IRTemp addr = IRTemp_INVALID;
10442 Int alen = 0;
10443 HChar dis_buf[50];
10444 UChar modrm = getUChar(delta);
10445 IRTemp argV = newTemp(Ity_V128);
10446 IRTemp rmode = newTemp(Ity_I32);
10447 UInt rG = gregOfRexRM(pfx,modrm);
10448 IRTemp t0, t1, t2, t3;
10449
10450 if (epartIsReg(modrm)) {
10451 UInt rE = eregOfRexRM(pfx,modrm);
10452 assign( argV, getXMMReg(rE) );
10453 delta += 1;
sewardj66becf32012-06-18 23:15:16 +000010454 DIP("%scvt%sps2dq %s,%s\n",
10455 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG));
sewardj251b59e2012-05-25 13:51:07 +000010456 } else {
10457 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10458 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10459 delta += alen;
sewardj66becf32012-06-18 23:15:16 +000010460 DIP("%scvt%sps2dq %s,%s\n",
10461 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
sewardj251b59e2012-05-25 13:51:07 +000010462 }
10463
10464 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO)
10465 : get_sse_roundingmode() );
10466 t0 = t1 = t2 = t3 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +000010467 breakupV128to32s( argV, &t3, &t2, &t1, &t0 );
sewardj251b59e2012-05-25 13:51:07 +000010468 /* This is less than ideal. If it turns out to be a performance
10469 bottleneck it can be improved. */
10470# define CVT(_t) \
10471 binop( Iop_F64toI32S, \
10472 mkexpr(rmode), \
10473 unop( Iop_F32toF64, \
10474 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10475
10476 putXMMRegLane32( rG, 3, CVT(t3) );
10477 putXMMRegLane32( rG, 2, CVT(t2) );
10478 putXMMRegLane32( rG, 1, CVT(t1) );
10479 putXMMRegLane32( rG, 0, CVT(t0) );
10480# undef CVT
10481 if (isAvx)
10482 putYMMRegLane128( rG, 1, mkV128(0) );
10483
10484 return delta;
10485}
10486
10487
sewardj66becf32012-06-18 23:15:16 +000010488static Long dis_CVTxPS2DQ_256 ( VexAbiInfo* vbi, Prefix pfx,
10489 Long delta, Bool r2zero )
10490{
10491 IRTemp addr = IRTemp_INVALID;
10492 Int alen = 0;
10493 HChar dis_buf[50];
10494 UChar modrm = getUChar(delta);
10495 IRTemp argV = newTemp(Ity_V256);
sewardj66becf32012-06-18 23:15:16 +000010496 IRTemp rmode = newTemp(Ity_I32);
10497 UInt rG = gregOfRexRM(pfx,modrm);
10498 IRTemp t0, t1, t2, t3, t4, t5, t6, t7;
10499
10500 if (epartIsReg(modrm)) {
10501 UInt rE = eregOfRexRM(pfx,modrm);
10502 assign( argV, getYMMReg(rE) );
10503 delta += 1;
10504 DIP("vcvt%sps2dq %s,%s\n",
10505 r2zero ? "t" : "", nameYMMReg(rE), nameYMMReg(rG));
10506 } else {
10507 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10508 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10509 delta += alen;
10510 DIP("vcvt%sps2dq %s,%s\n",
10511 r2zero ? "t" : "", dis_buf, nameYMMReg(rG) );
10512 }
10513
10514 assign( rmode, r2zero ? mkU32((UInt)Irrm_ZERO)
10515 : get_sse_roundingmode() );
10516 t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = IRTemp_INVALID;
sewardj4f228902012-06-21 09:17:58 +000010517 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 );
sewardj66becf32012-06-18 23:15:16 +000010518 /* This is less than ideal. If it turns out to be a performance
10519 bottleneck it can be improved. */
10520# define CVT(_t) \
10521 binop( Iop_F64toI32S, \
10522 mkexpr(rmode), \
10523 unop( Iop_F32toF64, \
10524 unop( Iop_ReinterpI32asF32, mkexpr(_t))) )
10525
10526 putYMMRegLane32( rG, 7, CVT(t7) );
10527 putYMMRegLane32( rG, 6, CVT(t6) );
10528 putYMMRegLane32( rG, 5, CVT(t5) );
10529 putYMMRegLane32( rG, 4, CVT(t4) );
10530 putYMMRegLane32( rG, 3, CVT(t3) );
10531 putYMMRegLane32( rG, 2, CVT(t2) );
10532 putYMMRegLane32( rG, 1, CVT(t1) );
10533 putYMMRegLane32( rG, 0, CVT(t0) );
10534# undef CVT
10535
10536 return delta;
10537}
10538
10539
10540static Long dis_CVTxPD2DQ_128 ( VexAbiInfo* vbi, Prefix pfx,
10541 Long delta, Bool isAvx, Bool r2zero )
10542{
10543 IRTemp addr = IRTemp_INVALID;
10544 Int alen = 0;
10545 HChar dis_buf[50];
10546 UChar modrm = getUChar(delta);
10547 IRTemp argV = newTemp(Ity_V128);
10548 IRTemp rmode = newTemp(Ity_I32);
10549 UInt rG = gregOfRexRM(pfx,modrm);
10550 IRTemp t0, t1;
10551
10552 if (epartIsReg(modrm)) {
10553 UInt rE = eregOfRexRM(pfx,modrm);
10554 assign( argV, getXMMReg(rE) );
10555 delta += 1;
10556 DIP("%scvt%spd2dq %s,%s\n",
10557 isAvx ? "v" : "", r2zero ? "t" : "", nameXMMReg(rE), nameXMMReg(rG));
10558 } else {
10559 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10560 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10561 delta += alen;
10562 DIP("%scvt%spd2dqx %s,%s\n",
10563 isAvx ? "v" : "", r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10564 }
10565
10566 if (r2zero) {
10567 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10568 } else {
10569 assign( rmode, get_sse_roundingmode() );
10570 }
10571
10572 t0 = newTemp(Ity_F64);
10573 t1 = newTemp(Ity_F64);
10574 assign( t0, unop(Iop_ReinterpI64asF64,
10575 unop(Iop_V128to64, mkexpr(argV))) );
10576 assign( t1, unop(Iop_ReinterpI64asF64,
10577 unop(Iop_V128HIto64, mkexpr(argV))) );
10578
10579# define CVT(_t) binop( Iop_F64toI32S, \
10580 mkexpr(rmode), \
10581 mkexpr(_t) )
10582
10583 putXMMRegLane32( rG, 3, mkU32(0) );
10584 putXMMRegLane32( rG, 2, mkU32(0) );
10585 putXMMRegLane32( rG, 1, CVT(t1) );
10586 putXMMRegLane32( rG, 0, CVT(t0) );
10587# undef CVT
10588 if (isAvx)
10589 putYMMRegLane128( rG, 1, mkV128(0) );
10590
10591 return delta;
10592}
10593
10594
10595static Long dis_CVTxPD2DQ_256 ( VexAbiInfo* vbi, Prefix pfx,
10596 Long delta, Bool r2zero )
10597{
10598 IRTemp addr = IRTemp_INVALID;
10599 Int alen = 0;
10600 HChar dis_buf[50];
10601 UChar modrm = getUChar(delta);
10602 IRTemp argV = newTemp(Ity_V256);
10603 IRTemp rmode = newTemp(Ity_I32);
10604 UInt rG = gregOfRexRM(pfx,modrm);
10605 IRTemp t0, t1, t2, t3;
10606
10607 if (epartIsReg(modrm)) {
10608 UInt rE = eregOfRexRM(pfx,modrm);
10609 assign( argV, getYMMReg(rE) );
10610 delta += 1;
10611 DIP("vcvt%spd2dq %s,%s\n",
10612 r2zero ? "t" : "", nameYMMReg(rE), nameXMMReg(rG));
10613 } else {
10614 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10615 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10616 delta += alen;
10617 DIP("vcvt%spd2dqy %s,%s\n",
10618 r2zero ? "t" : "", dis_buf, nameXMMReg(rG) );
10619 }
10620
10621 if (r2zero) {
10622 assign(rmode, mkU32((UInt)Irrm_ZERO) );
10623 } else {
10624 assign( rmode, get_sse_roundingmode() );
10625 }
10626
10627 t0 = IRTemp_INVALID;
10628 t1 = IRTemp_INVALID;
10629 t2 = IRTemp_INVALID;
10630 t3 = IRTemp_INVALID;
10631 breakupV256to64s( argV, &t3, &t2, &t1, &t0 );
10632
10633# define CVT(_t) binop( Iop_F64toI32S, \
10634 mkexpr(rmode), \
10635 unop( Iop_ReinterpI64asF64, \
10636 mkexpr(_t) ) )
10637
10638 putXMMRegLane32( rG, 3, CVT(t3) );
10639 putXMMRegLane32( rG, 2, CVT(t2) );
10640 putXMMRegLane32( rG, 1, CVT(t1) );
10641 putXMMRegLane32( rG, 0, CVT(t0) );
10642# undef CVT
10643 putYMMRegLane128( rG, 1, mkV128(0) );
10644
10645 return delta;
10646}
10647
10648
10649static Long dis_CVTDQ2PS_128 ( VexAbiInfo* vbi, Prefix pfx,
10650 Long delta, Bool isAvx )
10651{
10652 IRTemp addr = IRTemp_INVALID;
10653 Int alen = 0;
10654 HChar dis_buf[50];
10655 UChar modrm = getUChar(delta);
10656 IRTemp argV = newTemp(Ity_V128);
10657 IRTemp rmode = newTemp(Ity_I32);
10658 UInt rG = gregOfRexRM(pfx,modrm);
10659 IRTemp t0, t1, t2, t3;
10660
10661 if (epartIsReg(modrm)) {
10662 UInt rE = eregOfRexRM(pfx,modrm);
10663 assign( argV, getXMMReg(rE) );
10664 delta += 1;
10665 DIP("%scvtdq2ps %s,%s\n",
10666 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
10667 } else {
10668 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10669 assign( argV, loadLE(Ity_V128, mkexpr(addr)) );
10670 delta += alen;
10671 DIP("%scvtdq2ps %s,%s\n",
10672 isAvx ? "v" : "", dis_buf, nameXMMReg(rG) );
10673 }
10674
10675 assign( rmode, get_sse_roundingmode() );
10676 t0 = IRTemp_INVALID;
10677 t1 = IRTemp_INVALID;
10678 t2 = IRTemp_INVALID;
10679 t3 = IRTemp_INVALID;
10680 breakupV128to32s( argV, &t3, &t2, &t1, &t0 );
10681
10682# define CVT(_t) binop( Iop_F64toF32, \
10683 mkexpr(rmode), \
10684 unop(Iop_I32StoF64,mkexpr(_t)))
10685
10686 putXMMRegLane32F( rG, 3, CVT(t3) );
10687 putXMMRegLane32F( rG, 2, CVT(t2) );
10688 putXMMRegLane32F( rG, 1, CVT(t1) );
10689 putXMMRegLane32F( rG, 0, CVT(t0) );
10690# undef CVT
10691 if (isAvx)
10692 putYMMRegLane128( rG, 1, mkV128(0) );
10693
10694 return delta;
10695}
10696
10697static Long dis_CVTDQ2PS_256 ( VexAbiInfo* vbi, Prefix pfx,
10698 Long delta )
10699{
10700 IRTemp addr = IRTemp_INVALID;
10701 Int alen = 0;
10702 HChar dis_buf[50];
10703 UChar modrm = getUChar(delta);
10704 IRTemp argV = newTemp(Ity_V256);
sewardj66becf32012-06-18 23:15:16 +000010705 IRTemp rmode = newTemp(Ity_I32);
10706 UInt rG = gregOfRexRM(pfx,modrm);
10707 IRTemp t0, t1, t2, t3, t4, t5, t6, t7;
10708
10709 if (epartIsReg(modrm)) {
10710 UInt rE = eregOfRexRM(pfx,modrm);
10711 assign( argV, getYMMReg(rE) );
10712 delta += 1;
10713 DIP("vcvtdq2ps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
10714 } else {
10715 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
10716 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
10717 delta += alen;
10718 DIP("vcvtdq2ps %s,%s\n", dis_buf, nameYMMReg(rG) );
10719 }
10720
10721 assign( rmode, get_sse_roundingmode() );
10722 t0 = IRTemp_INVALID;
10723 t1 = IRTemp_INVALID;
10724 t2 = IRTemp_INVALID;
10725 t3 = IRTemp_INVALID;
10726 t4 = IRTemp_INVALID;
10727 t5 = IRTemp_INVALID;
10728 t6 = IRTemp_INVALID;
10729 t7 = IRTemp_INVALID;
sewardj4f228902012-06-21 09:17:58 +000010730 breakupV256to32s( argV, &t7, &t6, &t5, &t4, &t3, &t2, &t1, &t0 );
sewardj66becf32012-06-18 23:15:16 +000010731
10732# define CVT(_t) binop( Iop_F64toF32, \
10733 mkexpr(rmode), \
10734 unop(Iop_I32StoF64,mkexpr(_t)))
10735
10736 putYMMRegLane32F( rG, 7, CVT(t7) );
10737 putYMMRegLane32F( rG, 6, CVT(t6) );
10738 putYMMRegLane32F( rG, 5, CVT(t5) );
10739 putYMMRegLane32F( rG, 4, CVT(t4) );
10740 putYMMRegLane32F( rG, 3, CVT(t3) );
10741 putYMMRegLane32F( rG, 2, CVT(t2) );
10742 putYMMRegLane32F( rG, 1, CVT(t1) );
10743 putYMMRegLane32F( rG, 0, CVT(t0) );
10744# undef CVT
10745
10746 return delta;
10747}
10748
10749
sewardj8ef22422012-05-24 16:29:18 +000010750static Long dis_PMOVMSKB_128 ( VexAbiInfo* vbi, Prefix pfx,
10751 Long delta, Bool isAvx )
10752{
sewardj8ef22422012-05-24 16:29:18 +000010753 UChar modrm = getUChar(delta);
10754 vassert(epartIsReg(modrm)); /* ensured by caller */
10755 UInt rE = eregOfRexRM(pfx,modrm);
10756 UInt rG = gregOfRexRM(pfx,modrm);
sewardj78a20592012-12-13 18:29:56 +000010757 IRTemp t0 = newTemp(Ity_V128);
10758 IRTemp t1 = newTemp(Ity_I32);
10759 assign(t0, getXMMReg(rE));
10760 assign(t1, unop(Iop_16Uto32, unop(Iop_GetMSBs8x16, mkexpr(t0))));
10761 putIReg32(rG, mkexpr(t1));
sewardj8ef22422012-05-24 16:29:18 +000010762 DIP("%spmovmskb %s,%s\n", isAvx ? "v" : "", nameXMMReg(rE),
10763 nameIReg32(rG));
10764 delta += 1;
10765 return delta;
10766}
10767
10768
sewardjcc3d2192013-03-27 11:37:33 +000010769static Long dis_PMOVMSKB_256 ( VexAbiInfo* vbi, Prefix pfx,
10770 Long delta )
10771{
10772 UChar modrm = getUChar(delta);
10773 vassert(epartIsReg(modrm)); /* ensured by caller */
10774 UInt rE = eregOfRexRM(pfx,modrm);
10775 UInt rG = gregOfRexRM(pfx,modrm);
10776 IRTemp t0 = newTemp(Ity_V128);
10777 IRTemp t1 = newTemp(Ity_V128);
10778 IRTemp t2 = newTemp(Ity_I16);
10779 IRTemp t3 = newTemp(Ity_I16);
10780 assign(t0, getYMMRegLane128(rE, 0));
10781 assign(t1, getYMMRegLane128(rE, 1));
10782 assign(t2, unop(Iop_GetMSBs8x16, mkexpr(t0)));
10783 assign(t3, unop(Iop_GetMSBs8x16, mkexpr(t1)));
10784 putIReg32(rG, binop(Iop_16HLto32, mkexpr(t3), mkexpr(t2)));
10785 DIP("vpmovmskb %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
10786 delta += 1;
10787 return delta;
10788}
10789
10790
sewardj4b1cc832012-06-13 11:10:20 +000010791/* FIXME: why not just use InterleaveLO / InterleaveHI? I think the
10792 relevant ops are "xIsH ? InterleaveHI32x4 : InterleaveLO32x4". */
sewardj56c30312012-06-12 08:45:39 +000010793/* Does the maths for 128 bit versions of UNPCKLPS and UNPCKHPS */
10794static IRTemp math_UNPCKxPS_128 ( IRTemp sV, IRTemp dV, Bool xIsH )
sewardjc4530ae2012-05-21 10:18:49 +000010795{
10796 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
sewardjc4530ae2012-05-21 10:18:49 +000010797 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +000010798 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
10799 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
sewardjc4530ae2012-05-21 10:18:49 +000010800 IRTemp res = newTemp(Ity_V128);
sewardj4b1cc832012-06-13 11:10:20 +000010801 assign(res, xIsH ? mkV128from32s( s3, d3, s2, d2 )
10802 : mkV128from32s( s1, d1, s0, d0 ));
sewardj56c30312012-06-12 08:45:39 +000010803 return res;
10804}
10805
10806
10807/* FIXME: why not just use InterleaveLO / InterleaveHI ?? */
10808/* Does the maths for 128 bit versions of UNPCKLPD and UNPCKHPD */
10809static IRTemp math_UNPCKxPD_128 ( IRTemp sV, IRTemp dV, Bool xIsH )
10810{
10811 IRTemp s1 = newTemp(Ity_I64);
10812 IRTemp s0 = newTemp(Ity_I64);
10813 IRTemp d1 = newTemp(Ity_I64);
10814 IRTemp d0 = newTemp(Ity_I64);
10815 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
10816 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
10817 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
10818 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
10819 IRTemp res = newTemp(Ity_V128);
10820 assign(res, xIsH ? binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1))
10821 : binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)));
sewardjc4530ae2012-05-21 10:18:49 +000010822 return res;
10823}
10824
10825
sewardj4b1cc832012-06-13 11:10:20 +000010826/* Does the maths for 256 bit versions of UNPCKLPD and UNPCKHPD.
10827 Doesn't seem like this fits in either of the Iop_Interleave{LO,HI}
10828 or the Iop_Cat{Odd,Even}Lanes idioms, hence just do it the stupid
10829 way. */
10830static IRTemp math_UNPCKxPD_256 ( IRTemp sV, IRTemp dV, Bool xIsH )
10831{
10832 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
10833 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
10834 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
10835 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
10836 IRTemp res = newTemp(Ity_V256);
10837 assign(res, xIsH
10838 ? IRExpr_Qop(Iop_64x4toV256, mkexpr(s3), mkexpr(d3),
10839 mkexpr(s1), mkexpr(d1))
10840 : IRExpr_Qop(Iop_64x4toV256, mkexpr(s2), mkexpr(d2),
10841 mkexpr(s0), mkexpr(d0)));
10842 return res;
10843}
10844
10845
10846/* FIXME: this is really bad. Surely can do something better here?
10847 One observation is that the steering in the upper and lower 128 bit
10848 halves is the same as with math_UNPCKxPS_128, so we simply split
10849 into two halves, and use that. Consequently any improvement in
10850 math_UNPCKxPS_128 (probably, to use interleave-style primops)
10851 benefits this too. */
10852static IRTemp math_UNPCKxPS_256 ( IRTemp sV, IRTemp dV, Bool xIsH )
10853{
sewardjfe0c5e72012-06-15 15:48:07 +000010854 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
10855 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
10856 breakupV256toV128s( sV, &sVhi, &sVlo );
10857 breakupV256toV128s( dV, &dVhi, &dVlo );
sewardj4b1cc832012-06-13 11:10:20 +000010858 IRTemp rVhi = math_UNPCKxPS_128(sVhi, dVhi, xIsH);
10859 IRTemp rVlo = math_UNPCKxPS_128(sVlo, dVlo, xIsH);
10860 IRTemp rV = newTemp(Ity_V256);
10861 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
10862 return rV;
10863}
10864
10865
10866static IRTemp math_SHUFPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
sewardj251b59e2012-05-25 13:51:07 +000010867{
10868 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
10869 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
10870 vassert(imm8 < 256);
10871
sewardj4b1cc832012-06-13 11:10:20 +000010872 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
10873 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
sewardj251b59e2012-05-25 13:51:07 +000010874
10875# define SELD(n) ((n)==0 ? d0 : ((n)==1 ? d1 : ((n)==2 ? d2 : d3)))
10876# define SELS(n) ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
10877 IRTemp res = newTemp(Ity_V128);
10878 assign(res,
sewardj4b1cc832012-06-13 11:10:20 +000010879 mkV128from32s( SELS((imm8>>6)&3), SELS((imm8>>4)&3),
10880 SELD((imm8>>2)&3), SELD((imm8>>0)&3) ) );
sewardj251b59e2012-05-25 13:51:07 +000010881# undef SELD
10882# undef SELS
10883 return res;
10884}
10885
10886
sewardj4b1cc832012-06-13 11:10:20 +000010887/* 256-bit SHUFPS appears to steer each of the 128-bit halves
10888 identically. Hence do the clueless thing and use math_SHUFPS_128
10889 twice. */
10890static IRTemp math_SHUFPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
10891{
sewardjfe0c5e72012-06-15 15:48:07 +000010892 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
10893 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
10894 breakupV256toV128s( sV, &sVhi, &sVlo );
10895 breakupV256toV128s( dV, &dVhi, &dVlo );
sewardj4b1cc832012-06-13 11:10:20 +000010896 IRTemp rVhi = math_SHUFPS_128(sVhi, dVhi, imm8);
10897 IRTemp rVlo = math_SHUFPS_128(sVlo, dVlo, imm8);
10898 IRTemp rV = newTemp(Ity_V256);
10899 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
10900 return rV;
10901}
10902
10903
sewardj21459cb2012-06-18 14:05:52 +000010904static IRTemp math_SHUFPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
10905{
10906 IRTemp s1 = newTemp(Ity_I64);
10907 IRTemp s0 = newTemp(Ity_I64);
10908 IRTemp d1 = newTemp(Ity_I64);
10909 IRTemp d0 = newTemp(Ity_I64);
10910
10911 assign( d1, unop(Iop_V128HIto64, mkexpr(dV)) );
10912 assign( d0, unop(Iop_V128to64, mkexpr(dV)) );
10913 assign( s1, unop(Iop_V128HIto64, mkexpr(sV)) );
10914 assign( s0, unop(Iop_V128to64, mkexpr(sV)) );
10915
10916# define SELD(n) mkexpr((n)==0 ? d0 : d1)
10917# define SELS(n) mkexpr((n)==0 ? s0 : s1)
10918
10919 IRTemp res = newTemp(Ity_V128);
10920 assign(res, binop( Iop_64HLtoV128,
10921 SELS((imm8>>1)&1), SELD((imm8>>0)&1) ) );
10922
10923# undef SELD
10924# undef SELS
10925 return res;
10926}
10927
10928
10929static IRTemp math_SHUFPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
10930{
10931 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
10932 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
10933 breakupV256toV128s( sV, &sVhi, &sVlo );
10934 breakupV256toV128s( dV, &dVhi, &dVlo );
10935 IRTemp rVhi = math_SHUFPD_128(sVhi, dVhi, (imm8 >> 2) & 3);
10936 IRTemp rVlo = math_SHUFPD_128(sVlo, dVlo, imm8 & 3);
10937 IRTemp rV = newTemp(Ity_V256);
10938 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
10939 return rV;
10940}
10941
10942
10943static IRTemp math_BLENDPD_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
10944{
10945 UShort imm8_mask_16;
10946 IRTemp imm8_mask = newTemp(Ity_V128);
10947
10948 switch( imm8 & 3 ) {
10949 case 0: imm8_mask_16 = 0x0000; break;
10950 case 1: imm8_mask_16 = 0x00FF; break;
10951 case 2: imm8_mask_16 = 0xFF00; break;
10952 case 3: imm8_mask_16 = 0xFFFF; break;
10953 default: vassert(0); break;
10954 }
10955 assign( imm8_mask, mkV128( imm8_mask_16 ) );
10956
10957 IRTemp res = newTemp(Ity_V128);
10958 assign ( res, binop( Iop_OrV128,
10959 binop( Iop_AndV128, mkexpr(sV),
10960 mkexpr(imm8_mask) ),
10961 binop( Iop_AndV128, mkexpr(dV),
10962 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
10963 return res;
10964}
10965
10966
10967static IRTemp math_BLENDPD_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
10968{
10969 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
10970 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
10971 breakupV256toV128s( sV, &sVhi, &sVlo );
10972 breakupV256toV128s( dV, &dVhi, &dVlo );
10973 IRTemp rVhi = math_BLENDPD_128(sVhi, dVhi, (imm8 >> 2) & 3);
10974 IRTemp rVlo = math_BLENDPD_128(sVlo, dVlo, imm8 & 3);
10975 IRTemp rV = newTemp(Ity_V256);
10976 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
10977 return rV;
10978}
10979
10980
10981static IRTemp math_BLENDPS_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
10982{
10983 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
10984 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
10985 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
10986 0xFFFF };
10987 IRTemp imm8_mask = newTemp(Ity_V128);
10988 assign( imm8_mask, mkV128( imm8_perms[ (imm8 & 15) ] ) );
10989
10990 IRTemp res = newTemp(Ity_V128);
10991 assign ( res, binop( Iop_OrV128,
10992 binop( Iop_AndV128, mkexpr(sV),
10993 mkexpr(imm8_mask) ),
10994 binop( Iop_AndV128, mkexpr(dV),
10995 unop( Iop_NotV128, mkexpr(imm8_mask) ) ) ) );
10996 return res;
10997}
10998
10999
11000static IRTemp math_BLENDPS_256 ( IRTemp sV, IRTemp dV, UInt imm8 )
11001{
11002 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
11003 IRTemp dVhi = IRTemp_INVALID, dVlo = IRTemp_INVALID;
11004 breakupV256toV128s( sV, &sVhi, &sVlo );
11005 breakupV256toV128s( dV, &dVhi, &dVlo );
11006 IRTemp rVhi = math_BLENDPS_128(sVhi, dVhi, (imm8 >> 4) & 15);
11007 IRTemp rVlo = math_BLENDPS_128(sVlo, dVlo, imm8 & 15);
11008 IRTemp rV = newTemp(Ity_V256);
11009 assign(rV, binop(Iop_V128HLtoV256, mkexpr(rVhi), mkexpr(rVlo)));
11010 return rV;
11011}
11012
11013
11014static IRTemp math_PBLENDW_128 ( IRTemp sV, IRTemp dV, UInt imm8 )
11015{
11016 /* Make w be a 16-bit version of imm8, formed by duplicating each
11017 bit in imm8. */
11018 Int i;
11019 UShort imm16 = 0;
11020 for (i = 0; i < 8; i++) {
11021 if (imm8 & (1 << i))
11022 imm16 |= (3 << (2*i));
11023 }
11024 IRTemp imm16_mask = newTemp(Ity_V128);
11025 assign( imm16_mask, mkV128( imm16 ));
11026
11027 IRTemp res = newTemp(Ity_V128);
11028 assign ( res, binop( Iop_OrV128,
11029 binop( Iop_AndV128, mkexpr(sV),
11030 mkexpr(imm16_mask) ),
11031 binop( Iop_AndV128, mkexpr(dV),
11032 unop( Iop_NotV128, mkexpr(imm16_mask) ) ) ) );
11033 return res;
11034}
11035
11036
sewardje8a7eb72012-06-12 14:59:17 +000011037static IRTemp math_PMULUDQ_128 ( IRTemp sV, IRTemp dV )
11038{
11039 /* This is a really poor translation -- could be improved if
11040 performance critical */
11041 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11042 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +000011043 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11044 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
sewardje8a7eb72012-06-12 14:59:17 +000011045 IRTemp res = newTemp(Ity_V128);
11046 assign(res, binop(Iop_64HLtoV128,
11047 binop( Iop_MullU32, mkexpr(d2), mkexpr(s2)),
11048 binop( Iop_MullU32, mkexpr(d0), mkexpr(s0)) ));
11049 return res;
11050}
11051
11052
sewardjcc3d2192013-03-27 11:37:33 +000011053static IRTemp math_PMULUDQ_256 ( IRTemp sV, IRTemp dV )
11054{
11055 /* This is a really poor translation -- could be improved if
11056 performance critical */
11057 IRTemp sHi, sLo, dHi, dLo;
11058 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11059 breakupV256toV128s( dV, &dHi, &dLo);
11060 breakupV256toV128s( sV, &sHi, &sLo);
11061 IRTemp res = newTemp(Ity_V256);
11062 assign(res, binop(Iop_V128HLtoV256,
11063 mkexpr(math_PMULUDQ_128(sHi, dHi)),
11064 mkexpr(math_PMULUDQ_128(sLo, dLo))));
11065 return res;
11066}
11067
11068
sewardj89378162012-06-24 12:12:20 +000011069static IRTemp math_PMULDQ_128 ( IRTemp dV, IRTemp sV )
11070{
11071 /* This is a really poor translation -- could be improved if
11072 performance critical */
11073 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
11074 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
11075 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
11076 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
11077 IRTemp res = newTemp(Ity_V128);
11078 assign(res, binop(Iop_64HLtoV128,
11079 binop( Iop_MullS32, mkexpr(d2), mkexpr(s2)),
11080 binop( Iop_MullS32, mkexpr(d0), mkexpr(s0)) ));
11081 return res;
11082}
11083
11084
sewardjcc3d2192013-03-27 11:37:33 +000011085static IRTemp math_PMULDQ_256 ( IRTemp sV, IRTemp dV )
11086{
11087 /* This is a really poor translation -- could be improved if
11088 performance critical */
11089 IRTemp sHi, sLo, dHi, dLo;
11090 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11091 breakupV256toV128s( dV, &dHi, &dLo);
11092 breakupV256toV128s( sV, &sHi, &sLo);
11093 IRTemp res = newTemp(Ity_V256);
11094 assign(res, binop(Iop_V128HLtoV256,
11095 mkexpr(math_PMULDQ_128(sHi, dHi)),
11096 mkexpr(math_PMULDQ_128(sLo, dLo))));
11097 return res;
11098}
11099
11100
sewardj89378162012-06-24 12:12:20 +000011101static IRTemp math_PMADDWD_128 ( IRTemp dV, IRTemp sV )
11102{
11103 IRTemp sVhi, sVlo, dVhi, dVlo;
11104 IRTemp resHi = newTemp(Ity_I64);
11105 IRTemp resLo = newTemp(Ity_I64);
11106 sVhi = sVlo = dVhi = dVlo = IRTemp_INVALID;
11107 breakupV128to64s( sV, &sVhi, &sVlo );
11108 breakupV128to64s( dV, &dVhi, &dVlo );
11109 assign( resHi, mkIRExprCCall(Ity_I64, 0/*regparms*/,
11110 "amd64g_calculate_mmx_pmaddwd",
11111 &amd64g_calculate_mmx_pmaddwd,
11112 mkIRExprVec_2( mkexpr(sVhi), mkexpr(dVhi))));
11113 assign( resLo, mkIRExprCCall(Ity_I64, 0/*regparms*/,
11114 "amd64g_calculate_mmx_pmaddwd",
11115 &amd64g_calculate_mmx_pmaddwd,
11116 mkIRExprVec_2( mkexpr(sVlo), mkexpr(dVlo))));
11117 IRTemp res = newTemp(Ity_V128);
11118 assign( res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo))) ;
11119 return res;
11120}
11121
11122
sewardjcc3d2192013-03-27 11:37:33 +000011123static IRTemp math_PMADDWD_256 ( IRTemp dV, IRTemp sV )
11124{
11125 IRTemp sHi, sLo, dHi, dLo;
11126 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11127 breakupV256toV128s( dV, &dHi, &dLo);
11128 breakupV256toV128s( sV, &sHi, &sLo);
11129 IRTemp res = newTemp(Ity_V256);
11130 assign(res, binop(Iop_V128HLtoV256,
11131 mkexpr(math_PMADDWD_128(dHi, sHi)),
11132 mkexpr(math_PMADDWD_128(dLo, sLo))));
11133 return res;
11134}
11135
11136
sewardj89378162012-06-24 12:12:20 +000011137static IRTemp math_ADDSUBPD_128 ( IRTemp dV, IRTemp sV )
11138{
11139 IRTemp addV = newTemp(Ity_V128);
11140 IRTemp subV = newTemp(Ity_V128);
11141 IRTemp a1 = newTemp(Ity_I64);
11142 IRTemp s0 = newTemp(Ity_I64);
sewardj9571dc02014-01-26 18:34:23 +000011143 IRTemp rm = newTemp(Ity_I32);
sewardj89378162012-06-24 12:12:20 +000011144
sewardj9571dc02014-01-26 18:34:23 +000011145 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11146 assign( addV, triop(Iop_Add64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11147 assign( subV, triop(Iop_Sub64Fx2, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
sewardj89378162012-06-24 12:12:20 +000011148
11149 assign( a1, unop(Iop_V128HIto64, mkexpr(addV) ));
11150 assign( s0, unop(Iop_V128to64, mkexpr(subV) ));
11151
11152 IRTemp res = newTemp(Ity_V128);
11153 assign( res, binop(Iop_64HLtoV128, mkexpr(a1), mkexpr(s0)) );
11154 return res;
11155}
11156
11157
11158static IRTemp math_ADDSUBPD_256 ( IRTemp dV, IRTemp sV )
11159{
11160 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11161 IRTemp addV = newTemp(Ity_V256);
11162 IRTemp subV = newTemp(Ity_V256);
sewardj9571dc02014-01-26 18:34:23 +000011163 IRTemp rm = newTemp(Ity_I32);
sewardj89378162012-06-24 12:12:20 +000011164 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11165
sewardj9571dc02014-01-26 18:34:23 +000011166 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11167 assign( addV, triop(Iop_Add64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11168 assign( subV, triop(Iop_Sub64Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
sewardj89378162012-06-24 12:12:20 +000011169
11170 breakupV256to64s( addV, &a3, &a2, &a1, &a0 );
11171 breakupV256to64s( subV, &s3, &s2, &s1, &s0 );
11172
11173 IRTemp res = newTemp(Ity_V256);
11174 assign( res, mkV256from64s( a3, s2, a1, s0 ) );
11175 return res;
11176}
11177
11178
11179static IRTemp math_ADDSUBPS_128 ( IRTemp dV, IRTemp sV )
11180{
11181 IRTemp a3, a2, a1, a0, s3, s2, s1, s0;
11182 IRTemp addV = newTemp(Ity_V128);
11183 IRTemp subV = newTemp(Ity_V128);
sewardj9571dc02014-01-26 18:34:23 +000011184 IRTemp rm = newTemp(Ity_I32);
sewardj89378162012-06-24 12:12:20 +000011185 a3 = a2 = a1 = a0 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11186
sewardj9571dc02014-01-26 18:34:23 +000011187 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11188 assign( addV, triop(Iop_Add32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11189 assign( subV, triop(Iop_Sub32Fx4, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
sewardj89378162012-06-24 12:12:20 +000011190
11191 breakupV128to32s( addV, &a3, &a2, &a1, &a0 );
11192 breakupV128to32s( subV, &s3, &s2, &s1, &s0 );
11193
11194 IRTemp res = newTemp(Ity_V128);
11195 assign( res, mkV128from32s( a3, s2, a1, s0 ) );
11196 return res;
11197}
11198
11199
11200static IRTemp math_ADDSUBPS_256 ( IRTemp dV, IRTemp sV )
11201{
11202 IRTemp a7, a6, a5, a4, a3, a2, a1, a0;
11203 IRTemp s7, s6, s5, s4, s3, s2, s1, s0;
11204 IRTemp addV = newTemp(Ity_V256);
11205 IRTemp subV = newTemp(Ity_V256);
sewardj9571dc02014-01-26 18:34:23 +000011206 IRTemp rm = newTemp(Ity_I32);
sewardj89378162012-06-24 12:12:20 +000011207 a7 = a6 = a5 = a4 = a3 = a2 = a1 = a0 = IRTemp_INVALID;
11208 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
11209
sewardj9571dc02014-01-26 18:34:23 +000011210 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
11211 assign( addV, triop(Iop_Add32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
11212 assign( subV, triop(Iop_Sub32Fx8, mkexpr(rm), mkexpr(dV), mkexpr(sV)) );
sewardj89378162012-06-24 12:12:20 +000011213
11214 breakupV256to32s( addV, &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0 );
11215 breakupV256to32s( subV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
11216
11217 IRTemp res = newTemp(Ity_V256);
11218 assign( res, mkV256from32s( a7, s6, a5, s4, a3, s2, a1, s0 ) );
11219 return res;
11220}
11221
11222
sewardj251b59e2012-05-25 13:51:07 +000011223/* Handle 128 bit PSHUFLW and PSHUFHW. */
11224static Long dis_PSHUFxW_128 ( VexAbiInfo* vbi, Prefix pfx,
11225 Long delta, Bool isAvx, Bool xIsH )
11226{
11227 IRTemp addr = IRTemp_INVALID;
11228 Int alen = 0;
11229 HChar dis_buf[50];
11230 UChar modrm = getUChar(delta);
11231 UInt rG = gregOfRexRM(pfx,modrm);
11232 UInt imm8;
11233 IRTemp sVmut, dVmut, sVcon, sV, dV, s3, s2, s1, s0;
11234 s3 = s2 = s1 = s0 = IRTemp_INVALID;
11235 sV = newTemp(Ity_V128);
11236 dV = newTemp(Ity_V128);
11237 sVmut = newTemp(Ity_I64);
11238 dVmut = newTemp(Ity_I64);
11239 sVcon = newTemp(Ity_I64);
11240 if (epartIsReg(modrm)) {
11241 UInt rE = eregOfRexRM(pfx,modrm);
11242 assign( sV, getXMMReg(rE) );
11243 imm8 = (UInt)getUChar(delta+1);
11244 delta += 1+1;
11245 DIP("%spshuf%cw $%u,%s,%s\n",
11246 isAvx ? "v" : "", xIsH ? 'h' : 'l',
11247 imm8, nameXMMReg(rE), nameXMMReg(rG));
11248 } else {
11249 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
11250 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
11251 imm8 = (UInt)getUChar(delta+alen);
11252 delta += alen+1;
11253 DIP("%spshuf%cw $%u,%s,%s\n",
11254 isAvx ? "v" : "", xIsH ? 'h' : 'l',
11255 imm8, dis_buf, nameXMMReg(rG));
11256 }
11257
11258 /* Get the to-be-changed (mut) and unchanging (con) bits of the
11259 source. */
11260 assign( sVmut, unop(xIsH ? Iop_V128HIto64 : Iop_V128to64, mkexpr(sV)) );
11261 assign( sVcon, unop(xIsH ? Iop_V128to64 : Iop_V128HIto64, mkexpr(sV)) );
11262
11263 breakup64to16s( sVmut, &s3, &s2, &s1, &s0 );
11264# define SEL(n) \
11265 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
11266 assign(dVmut, mk64from16s( SEL((imm8>>6)&3), SEL((imm8>>4)&3),
11267 SEL((imm8>>2)&3), SEL((imm8>>0)&3) ));
11268# undef SEL
11269
11270 assign(dV, xIsH ? binop(Iop_64HLtoV128, mkexpr(dVmut), mkexpr(sVcon))
11271 : binop(Iop_64HLtoV128, mkexpr(sVcon), mkexpr(dVmut)) );
11272
11273 (isAvx ? putYMMRegLoAndZU : putXMMReg)(rG, mkexpr(dV));
11274 return delta;
11275}
11276
11277
sewardjcc3d2192013-03-27 11:37:33 +000011278/* Handle 256 bit PSHUFLW and PSHUFHW. */
11279static Long dis_PSHUFxW_256 ( VexAbiInfo* vbi, Prefix pfx,
11280 Long delta, Bool xIsH )
11281{
11282 IRTemp addr = IRTemp_INVALID;
11283 Int alen = 0;
11284 HChar dis_buf[50];
11285 UChar modrm = getUChar(delta);
11286 UInt rG = gregOfRexRM(pfx,modrm);
11287 UInt imm8;
11288 IRTemp sV, s[8], sV64[4], dVhi, dVlo;
11289 sV64[3] = sV64[2] = sV64[1] = sV64[0] = IRTemp_INVALID;
11290 s[7] = s[6] = s[5] = s[4] = s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
11291 sV = newTemp(Ity_V256);
11292 dVhi = newTemp(Ity_I64);
11293 dVlo = newTemp(Ity_I64);
11294 if (epartIsReg(modrm)) {
11295 UInt rE = eregOfRexRM(pfx,modrm);
11296 assign( sV, getYMMReg(rE) );
11297 imm8 = (UInt)getUChar(delta+1);
11298 delta += 1+1;
11299 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l',
11300 imm8, nameYMMReg(rE), nameYMMReg(rG));
11301 } else {
11302 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
11303 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
11304 imm8 = (UInt)getUChar(delta+alen);
11305 delta += alen+1;
11306 DIP("vpshuf%cw $%u,%s,%s\n", xIsH ? 'h' : 'l',
11307 imm8, dis_buf, nameYMMReg(rG));
11308 }
11309
11310 breakupV256to64s( sV, &sV64[3], &sV64[2], &sV64[1], &sV64[0] );
11311 breakup64to16s( sV64[xIsH ? 3 : 2], &s[7], &s[6], &s[5], &s[4] );
11312 breakup64to16s( sV64[xIsH ? 1 : 0], &s[3], &s[2], &s[1], &s[0] );
11313
11314 assign( dVhi, mk64from16s( s[4 + ((imm8>>6)&3)], s[4 + ((imm8>>4)&3)],
11315 s[4 + ((imm8>>2)&3)], s[4 + ((imm8>>0)&3)] ) );
11316 assign( dVlo, mk64from16s( s[0 + ((imm8>>6)&3)], s[0 + ((imm8>>4)&3)],
11317 s[0 + ((imm8>>2)&3)], s[0 + ((imm8>>0)&3)] ) );
11318 putYMMReg( rG, mkV256from64s( xIsH ? dVhi : sV64[3],
11319 xIsH ? sV64[2] : dVhi,
11320 xIsH ? dVlo : sV64[1],
11321 xIsH ? sV64[0] : dVlo ) );
11322 return delta;
11323}
11324
11325
sewardje8a7eb72012-06-12 14:59:17 +000011326static Long dis_PEXTRW_128_EregOnly_toG ( VexAbiInfo* vbi, Prefix pfx,
11327 Long delta, Bool isAvx )
11328{
11329 Long deltaIN = delta;
11330 UChar modrm = getUChar(delta);
11331 UInt rG = gregOfRexRM(pfx,modrm);
11332 IRTemp sV = newTemp(Ity_V128);
11333 IRTemp d16 = newTemp(Ity_I16);
11334 UInt imm8;
11335 IRTemp s0, s1, s2, s3;
11336 if (epartIsReg(modrm)) {
11337 UInt rE = eregOfRexRM(pfx,modrm);
11338 assign(sV, getXMMReg(rE));
11339 imm8 = getUChar(delta+1) & 7;
11340 delta += 1+1;
11341 DIP("%spextrw $%d,%s,%s\n", isAvx ? "v" : "",
11342 (Int)imm8, nameXMMReg(rE), nameIReg32(rG));
11343 } else {
11344 /* The memory case is disallowed, apparently. */
11345 return deltaIN; /* FAIL */
11346 }
11347 s3 = s2 = s1 = s0 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +000011348 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
sewardje8a7eb72012-06-12 14:59:17 +000011349 switch (imm8) {
11350 case 0: assign(d16, unop(Iop_32to16, mkexpr(s0))); break;
11351 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(s0))); break;
11352 case 2: assign(d16, unop(Iop_32to16, mkexpr(s1))); break;
11353 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(s1))); break;
11354 case 4: assign(d16, unop(Iop_32to16, mkexpr(s2))); break;
11355 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(s2))); break;
11356 case 6: assign(d16, unop(Iop_32to16, mkexpr(s3))); break;
11357 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(s3))); break;
11358 default: vassert(0);
11359 }
11360 putIReg32(rG, unop(Iop_16Uto32, mkexpr(d16)));
11361 return delta;
11362}
11363
11364
sewardj4b1cc832012-06-13 11:10:20 +000011365static Long dis_CVTDQ2PD_128 ( VexAbiInfo* vbi, Prefix pfx,
11366 Long delta, Bool isAvx )
11367{
11368 IRTemp addr = IRTemp_INVALID;
11369 Int alen = 0;
11370 HChar dis_buf[50];
11371 UChar modrm = getUChar(delta);
11372 IRTemp arg64 = newTemp(Ity_I64);
11373 UInt rG = gregOfRexRM(pfx,modrm);
florian55085f82012-11-21 00:36:55 +000011374 const HChar* mbV = isAvx ? "v" : "";
sewardj4b1cc832012-06-13 11:10:20 +000011375 if (epartIsReg(modrm)) {
11376 UInt rE = eregOfRexRM(pfx,modrm);
11377 assign( arg64, getXMMRegLane64(rE, 0) );
11378 delta += 1;
11379 DIP("%scvtdq2pd %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG));
11380 } else {
11381 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11382 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
11383 delta += alen;
11384 DIP("%scvtdq2pd %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
11385 }
11386 putXMMRegLane64F(
11387 rG, 0,
11388 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)))
11389 );
11390 putXMMRegLane64F(
11391 rG, 1,
11392 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)))
11393 );
11394 if (isAvx)
11395 putYMMRegLane128(rG, 1, mkV128(0));
11396 return delta;
11397}
11398
11399
sewardjfe0c5e72012-06-15 15:48:07 +000011400static Long dis_STMXCSR ( VexAbiInfo* vbi, Prefix pfx,
11401 Long delta, Bool isAvx )
11402{
11403 IRTemp addr = IRTemp_INVALID;
11404 Int alen = 0;
11405 HChar dis_buf[50];
11406 UChar modrm = getUChar(delta);
11407 vassert(!epartIsReg(modrm)); /* ensured by caller */
11408 vassert(gregOfRexRM(pfx,modrm) == 3); /* ditto */
11409
11410 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11411 delta += alen;
11412
11413 /* Fake up a native SSE mxcsr word. The only thing it depends on
11414 is SSEROUND[1:0], so call a clean helper to cook it up.
11415 */
11416 /* ULong amd64h_create_mxcsr ( ULong sseround ) */
11417 DIP("%sstmxcsr %s\n", isAvx ? "v" : "", dis_buf);
11418 storeLE(
11419 mkexpr(addr),
11420 unop(Iop_64to32,
11421 mkIRExprCCall(
11422 Ity_I64, 0/*regp*/,
11423 "amd64g_create_mxcsr", &amd64g_create_mxcsr,
11424 mkIRExprVec_1( unop(Iop_32Uto64,get_sse_roundingmode()) )
11425 )
11426 )
11427 );
11428 return delta;
11429}
11430
11431
11432static Long dis_LDMXCSR ( VexAbiInfo* vbi, Prefix pfx,
11433 Long delta, Bool isAvx )
11434{
11435 IRTemp addr = IRTemp_INVALID;
11436 Int alen = 0;
11437 HChar dis_buf[50];
11438 UChar modrm = getUChar(delta);
11439 vassert(!epartIsReg(modrm)); /* ensured by caller */
11440 vassert(gregOfRexRM(pfx,modrm) == 2); /* ditto */
11441
11442 IRTemp t64 = newTemp(Ity_I64);
11443 IRTemp ew = newTemp(Ity_I32);
11444
11445 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11446 delta += alen;
11447 DIP("%sldmxcsr %s\n", isAvx ? "v" : "", dis_buf);
11448
11449 /* The only thing we observe in %mxcsr is the rounding mode.
11450 Therefore, pass the 32-bit value (SSE native-format control
11451 word) to a clean helper, getting back a 64-bit value, the
11452 lower half of which is the SSEROUND value to store, and the
11453 upper half of which is the emulation-warning token which may
11454 be generated.
11455 */
11456 /* ULong amd64h_check_ldmxcsr ( ULong ); */
11457 assign( t64, mkIRExprCCall(
11458 Ity_I64, 0/*regparms*/,
11459 "amd64g_check_ldmxcsr",
11460 &amd64g_check_ldmxcsr,
11461 mkIRExprVec_1(
11462 unop(Iop_32Uto64,
11463 loadLE(Ity_I32, mkexpr(addr))
11464 )
11465 )
11466 )
11467 );
11468
11469 put_sse_roundingmode( unop(Iop_64to32, mkexpr(t64)) );
11470 assign( ew, unop(Iop_64HIto32, mkexpr(t64) ) );
11471 put_emwarn( mkexpr(ew) );
11472 /* Finally, if an emulation warning was reported, side-exit to
11473 the next insn, reporting the warning, so that Valgrind's
11474 dispatcher sees the warning. */
11475 stmt(
11476 IRStmt_Exit(
11477 binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)),
11478 Ijk_EmWarn,
11479 IRConst_U64(guest_RIP_bbstart+delta),
11480 OFFB_RIP
11481 )
11482 );
11483 return delta;
11484}
11485
11486
sewardj4ed05e02012-06-18 15:01:30 +000011487static IRTemp math_PINSRW_128 ( IRTemp v128, IRTemp u16, UInt imm8 )
11488{
11489 vassert(imm8 >= 0 && imm8 <= 7);
11490
11491 // Create a V128 value which has the selected word in the
11492 // specified lane, and zeroes everywhere else.
11493 IRTemp tmp128 = newTemp(Ity_V128);
11494 IRTemp halfshift = newTemp(Ity_I64);
11495 assign(halfshift, binop(Iop_Shl64,
11496 unop(Iop_16Uto64, mkexpr(u16)),
11497 mkU8(16 * (imm8 & 3))));
11498 if (imm8 < 4) {
11499 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
11500 } else {
11501 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
11502 }
11503
11504 UShort mask = ~(3 << (imm8 * 2));
11505 IRTemp res = newTemp(Ity_V128);
11506 assign( res, binop(Iop_OrV128,
11507 mkexpr(tmp128),
11508 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) );
11509 return res;
11510}
11511
11512
sewardj82096922012-06-24 14:57:59 +000011513static IRTemp math_PSADBW_128 ( IRTemp dV, IRTemp sV )
11514{
11515 IRTemp s1, s0, d1, d0;
11516 s1 = s0 = d1 = d0 = IRTemp_INVALID;
11517
11518 breakupV128to64s( sV, &s1, &s0 );
11519 breakupV128to64s( dV, &d1, &d0 );
11520
11521 IRTemp res = newTemp(Ity_V128);
11522 assign( res,
11523 binop(Iop_64HLtoV128,
11524 mkIRExprCCall(Ity_I64, 0/*regparms*/,
11525 "amd64g_calculate_mmx_psadbw",
11526 &amd64g_calculate_mmx_psadbw,
11527 mkIRExprVec_2( mkexpr(s1), mkexpr(d1))),
11528 mkIRExprCCall(Ity_I64, 0/*regparms*/,
11529 "amd64g_calculate_mmx_psadbw",
11530 &amd64g_calculate_mmx_psadbw,
11531 mkIRExprVec_2( mkexpr(s0), mkexpr(d0)))) );
11532 return res;
11533}
11534
11535
sewardjcc3d2192013-03-27 11:37:33 +000011536static IRTemp math_PSADBW_256 ( IRTemp dV, IRTemp sV )
11537{
11538 IRTemp sHi, sLo, dHi, dLo;
11539 sHi = sLo = dHi = dLo = IRTemp_INVALID;
11540 breakupV256toV128s( dV, &dHi, &dLo);
11541 breakupV256toV128s( sV, &sHi, &sLo);
11542 IRTemp res = newTemp(Ity_V256);
11543 assign(res, binop(Iop_V128HLtoV256,
11544 mkexpr(math_PSADBW_128(dHi, sHi)),
11545 mkexpr(math_PSADBW_128(dLo, sLo))));
11546 return res;
11547}
11548
11549
sewardj8eb7ae82012-06-24 14:00:27 +000011550static Long dis_MASKMOVDQU ( VexAbiInfo* vbi, Prefix pfx,
11551 Long delta, Bool isAvx )
11552{
11553 IRTemp regD = newTemp(Ity_V128);
11554 IRTemp mask = newTemp(Ity_V128);
11555 IRTemp olddata = newTemp(Ity_V128);
11556 IRTemp newdata = newTemp(Ity_V128);
11557 IRTemp addr = newTemp(Ity_I64);
11558 UChar modrm = getUChar(delta);
11559 UInt rG = gregOfRexRM(pfx,modrm);
11560 UInt rE = eregOfRexRM(pfx,modrm);
11561
11562 assign( addr, handleAddrOverrides( vbi, pfx, getIReg64(R_RDI) ));
11563 assign( regD, getXMMReg( rG ));
11564
11565 /* Unfortunately can't do the obvious thing with SarN8x16
11566 here since that can't be re-emitted as SSE2 code - no such
11567 insn. */
11568 assign( mask,
11569 binop(Iop_64HLtoV128,
11570 binop(Iop_SarN8x8,
11571 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ),
11572 mkU8(7) ),
11573 binop(Iop_SarN8x8,
11574 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ),
11575 mkU8(7) ) ));
11576 assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
11577 assign( newdata, binop(Iop_OrV128,
11578 binop(Iop_AndV128,
11579 mkexpr(regD),
11580 mkexpr(mask) ),
11581 binop(Iop_AndV128,
11582 mkexpr(olddata),
11583 unop(Iop_NotV128, mkexpr(mask)))) );
11584 storeLE( mkexpr(addr), mkexpr(newdata) );
11585
11586 delta += 1;
11587 DIP("%smaskmovdqu %s,%s\n", isAvx ? "v" : "",
11588 nameXMMReg(rE), nameXMMReg(rG) );
11589 return delta;
11590}
11591
11592
11593static Long dis_MOVMSKPS_128 ( VexAbiInfo* vbi, Prefix pfx,
11594 Long delta, Bool isAvx )
11595{
11596 UChar modrm = getUChar(delta);
11597 UInt rG = gregOfRexRM(pfx,modrm);
11598 UInt rE = eregOfRexRM(pfx,modrm);
11599 IRTemp t0 = newTemp(Ity_I32);
11600 IRTemp t1 = newTemp(Ity_I32);
11601 IRTemp t2 = newTemp(Ity_I32);
11602 IRTemp t3 = newTemp(Ity_I32);
11603 delta += 1;
11604 assign( t0, binop( Iop_And32,
11605 binop(Iop_Shr32, getXMMRegLane32(rE,0), mkU8(31)),
11606 mkU32(1) ));
11607 assign( t1, binop( Iop_And32,
11608 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(30)),
11609 mkU32(2) ));
11610 assign( t2, binop( Iop_And32,
11611 binop(Iop_Shr32, getXMMRegLane32(rE,2), mkU8(29)),
11612 mkU32(4) ));
11613 assign( t3, binop( Iop_And32,
11614 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(28)),
11615 mkU32(8) ));
11616 putIReg32( rG, binop(Iop_Or32,
11617 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
11618 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) );
11619 DIP("%smovmskps %s,%s\n", isAvx ? "v" : "",
11620 nameXMMReg(rE), nameIReg32(rG));
11621 return delta;
11622}
11623
11624
11625static Long dis_MOVMSKPS_256 ( VexAbiInfo* vbi, Prefix pfx, Long delta )
11626{
11627 UChar modrm = getUChar(delta);
11628 UInt rG = gregOfRexRM(pfx,modrm);
11629 UInt rE = eregOfRexRM(pfx,modrm);
11630 IRTemp t0 = newTemp(Ity_I32);
11631 IRTemp t1 = newTemp(Ity_I32);
11632 IRTemp t2 = newTemp(Ity_I32);
11633 IRTemp t3 = newTemp(Ity_I32);
11634 IRTemp t4 = newTemp(Ity_I32);
11635 IRTemp t5 = newTemp(Ity_I32);
11636 IRTemp t6 = newTemp(Ity_I32);
11637 IRTemp t7 = newTemp(Ity_I32);
11638 delta += 1;
11639 assign( t0, binop( Iop_And32,
11640 binop(Iop_Shr32, getYMMRegLane32(rE,0), mkU8(31)),
11641 mkU32(1) ));
11642 assign( t1, binop( Iop_And32,
11643 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(30)),
11644 mkU32(2) ));
11645 assign( t2, binop( Iop_And32,
11646 binop(Iop_Shr32, getYMMRegLane32(rE,2), mkU8(29)),
11647 mkU32(4) ));
11648 assign( t3, binop( Iop_And32,
11649 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(28)),
11650 mkU32(8) ));
11651 assign( t4, binop( Iop_And32,
11652 binop(Iop_Shr32, getYMMRegLane32(rE,4), mkU8(27)),
11653 mkU32(16) ));
11654 assign( t5, binop( Iop_And32,
11655 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(26)),
11656 mkU32(32) ));
11657 assign( t6, binop( Iop_And32,
11658 binop(Iop_Shr32, getYMMRegLane32(rE,6), mkU8(25)),
11659 mkU32(64) ));
11660 assign( t7, binop( Iop_And32,
11661 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(24)),
11662 mkU32(128) ));
11663 putIReg32( rG, binop(Iop_Or32,
11664 binop(Iop_Or32,
11665 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
11666 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ),
11667 binop(Iop_Or32,
11668 binop(Iop_Or32, mkexpr(t4), mkexpr(t5)),
11669 binop(Iop_Or32, mkexpr(t6), mkexpr(t7)) ) ) );
11670 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
11671 return delta;
11672}
11673
11674
11675static Long dis_MOVMSKPD_128 ( VexAbiInfo* vbi, Prefix pfx,
11676 Long delta, Bool isAvx )
11677{
11678 UChar modrm = getUChar(delta);
11679 UInt rG = gregOfRexRM(pfx,modrm);
11680 UInt rE = eregOfRexRM(pfx,modrm);
11681 IRTemp t0 = newTemp(Ity_I32);
11682 IRTemp t1 = newTemp(Ity_I32);
11683 delta += 1;
11684 assign( t0, binop( Iop_And32,
11685 binop(Iop_Shr32, getXMMRegLane32(rE,1), mkU8(31)),
11686 mkU32(1) ));
11687 assign( t1, binop( Iop_And32,
11688 binop(Iop_Shr32, getXMMRegLane32(rE,3), mkU8(30)),
11689 mkU32(2) ));
11690 putIReg32( rG, binop(Iop_Or32, mkexpr(t0), mkexpr(t1) ) );
11691 DIP("%smovmskpd %s,%s\n", isAvx ? "v" : "",
11692 nameXMMReg(rE), nameIReg32(rG));
11693 return delta;
11694}
11695
11696
11697static Long dis_MOVMSKPD_256 ( VexAbiInfo* vbi, Prefix pfx, Long delta )
11698{
11699 UChar modrm = getUChar(delta);
11700 UInt rG = gregOfRexRM(pfx,modrm);
11701 UInt rE = eregOfRexRM(pfx,modrm);
11702 IRTemp t0 = newTemp(Ity_I32);
11703 IRTemp t1 = newTemp(Ity_I32);
11704 IRTemp t2 = newTemp(Ity_I32);
11705 IRTemp t3 = newTemp(Ity_I32);
11706 delta += 1;
11707 assign( t0, binop( Iop_And32,
11708 binop(Iop_Shr32, getYMMRegLane32(rE,1), mkU8(31)),
11709 mkU32(1) ));
11710 assign( t1, binop( Iop_And32,
11711 binop(Iop_Shr32, getYMMRegLane32(rE,3), mkU8(30)),
11712 mkU32(2) ));
11713 assign( t2, binop( Iop_And32,
11714 binop(Iop_Shr32, getYMMRegLane32(rE,5), mkU8(29)),
11715 mkU32(4) ));
11716 assign( t3, binop( Iop_And32,
11717 binop(Iop_Shr32, getYMMRegLane32(rE,7), mkU8(28)),
11718 mkU32(8) ));
11719 putIReg32( rG, binop(Iop_Or32,
11720 binop(Iop_Or32, mkexpr(t0), mkexpr(t1)),
11721 binop(Iop_Or32, mkexpr(t2), mkexpr(t3)) ) );
11722 DIP("vmovmskps %s,%s\n", nameYMMReg(rE), nameIReg32(rG));
11723 return delta;
11724}
11725
11726
sewardj80611e32012-01-20 13:07:24 +000011727/* Note, this also handles SSE(1) insns. */
11728__attribute__((noinline))
11729static
11730Long dis_ESC_0F__SSE2 ( Bool* decode_OK,
11731 VexAbiInfo* vbi,
sewardj30fc0582012-02-16 13:45:13 +000011732 Prefix pfx, Int sz, Long deltaIN,
11733 DisResult* dres )
sewardj80611e32012-01-20 13:07:24 +000011734{
11735 IRTemp addr = IRTemp_INVALID;
11736 IRTemp t0 = IRTemp_INVALID;
11737 IRTemp t1 = IRTemp_INVALID;
11738 IRTemp t2 = IRTemp_INVALID;
11739 IRTemp t3 = IRTemp_INVALID;
11740 IRTemp t4 = IRTemp_INVALID;
11741 IRTemp t5 = IRTemp_INVALID;
11742 IRTemp t6 = IRTemp_INVALID;
11743 UChar modrm = 0;
11744 Int alen = 0;
11745 HChar dis_buf[50];
11746
11747 *decode_OK = False;
11748
11749 Long delta = deltaIN;
11750 UChar opc = getUChar(delta);
11751 delta++;
11752 switch (opc) {
11753
11754 case 0x10:
11755 if (have66noF2noF3(pfx)
11756 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
11757 /* 66 0F 10 = MOVUPD -- move from E (mem or xmm) to G (xmm). */
11758 modrm = getUChar(delta);
11759 if (epartIsReg(modrm)) {
11760 putXMMReg( gregOfRexRM(pfx,modrm),
11761 getXMMReg( eregOfRexRM(pfx,modrm) ));
11762 DIP("movupd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11763 nameXMMReg(gregOfRexRM(pfx,modrm)));
11764 delta += 1;
11765 } else {
11766 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11767 putXMMReg( gregOfRexRM(pfx,modrm),
11768 loadLE(Ity_V128, mkexpr(addr)) );
11769 DIP("movupd %s,%s\n", dis_buf,
11770 nameXMMReg(gregOfRexRM(pfx,modrm)));
11771 delta += alen;
11772 }
11773 goto decode_success;
11774 }
11775 /* F2 0F 10 = MOVSD -- move 64 bits from E (mem or lo half xmm) to
11776 G (lo half xmm). If E is mem, upper half of G is zeroed out.
11777 If E is reg, upper half of G is unchanged. */
11778 if (haveF2no66noF3(pfx)
11779 && (sz == 4 || /* ignore redundant REX.W */ sz == 8) ) {
11780 modrm = getUChar(delta);
11781 if (epartIsReg(modrm)) {
11782 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
11783 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
11784 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11785 nameXMMReg(gregOfRexRM(pfx,modrm)));
11786 delta += 1;
11787 } else {
11788 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11789 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
11790 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
11791 loadLE(Ity_I64, mkexpr(addr)) );
11792 DIP("movsd %s,%s\n", dis_buf,
11793 nameXMMReg(gregOfRexRM(pfx,modrm)));
11794 delta += alen;
11795 }
11796 goto decode_success;
11797 }
11798 /* F3 0F 10 = MOVSS -- move 32 bits from E (mem or lo 1/4 xmm) to G
11799 (lo 1/4 xmm). If E is mem, upper 3/4 of G is zeroed out. */
11800 if (haveF3no66noF2(pfx)
11801 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11802 modrm = getUChar(delta);
11803 if (epartIsReg(modrm)) {
11804 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
11805 getXMMRegLane32( eregOfRexRM(pfx,modrm), 0 ));
11806 DIP("movss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11807 nameXMMReg(gregOfRexRM(pfx,modrm)));
11808 delta += 1;
11809 } else {
11810 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11811 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
11812 putXMMRegLane32( gregOfRexRM(pfx,modrm), 0,
11813 loadLE(Ity_I32, mkexpr(addr)) );
11814 DIP("movss %s,%s\n", dis_buf,
11815 nameXMMReg(gregOfRexRM(pfx,modrm)));
11816 delta += alen;
11817 }
11818 goto decode_success;
11819 }
11820 /* 0F 10 = MOVUPS -- move from E (mem or xmm) to G (xmm). */
11821 if (haveNo66noF2noF3(pfx)
11822 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11823 modrm = getUChar(delta);
11824 if (epartIsReg(modrm)) {
11825 putXMMReg( gregOfRexRM(pfx,modrm),
11826 getXMMReg( eregOfRexRM(pfx,modrm) ));
11827 DIP("movups %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11828 nameXMMReg(gregOfRexRM(pfx,modrm)));
11829 delta += 1;
11830 } else {
11831 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11832 putXMMReg( gregOfRexRM(pfx,modrm),
11833 loadLE(Ity_V128, mkexpr(addr)) );
11834 DIP("movups %s,%s\n", dis_buf,
11835 nameXMMReg(gregOfRexRM(pfx,modrm)));
11836 delta += alen;
11837 }
11838 goto decode_success;
11839 }
11840 break;
11841
11842 case 0x11:
11843 /* F2 0F 11 = MOVSD -- move 64 bits from G (lo half xmm) to E (mem
11844 or lo half xmm). */
11845 if (haveF2no66noF3(pfx)
11846 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11847 modrm = getUChar(delta);
11848 if (epartIsReg(modrm)) {
11849 putXMMRegLane64( eregOfRexRM(pfx,modrm), 0,
11850 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
11851 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11852 nameXMMReg(eregOfRexRM(pfx,modrm)));
11853 delta += 1;
11854 } else {
11855 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11856 storeLE( mkexpr(addr),
11857 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
11858 DIP("movsd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11859 dis_buf);
11860 delta += alen;
11861 }
11862 goto decode_success;
11863 }
11864 /* F3 0F 11 = MOVSS -- move 32 bits from G (lo 1/4 xmm) to E (mem
11865 or lo 1/4 xmm). */
11866 if (haveF3no66noF2(pfx) && sz == 4) {
11867 modrm = getUChar(delta);
11868 if (epartIsReg(modrm)) {
11869 /* fall through, we don't yet have a test case */
11870 } else {
11871 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11872 storeLE( mkexpr(addr),
11873 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
11874 DIP("movss %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11875 dis_buf);
11876 delta += alen;
11877 goto decode_success;
11878 }
11879 }
11880 /* 66 0F 11 = MOVUPD -- move from G (xmm) to E (mem or xmm). */
11881 if (have66noF2noF3(pfx)
11882 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
11883 modrm = getUChar(delta);
11884 if (epartIsReg(modrm)) {
11885 putXMMReg( eregOfRexRM(pfx,modrm),
sewardjcc3d2192013-03-27 11:37:33 +000011886 getXMMReg( gregOfRexRM(pfx,modrm) ) );
sewardj80611e32012-01-20 13:07:24 +000011887 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
sewardjcc3d2192013-03-27 11:37:33 +000011888 nameXMMReg(eregOfRexRM(pfx,modrm)));
sewardj80611e32012-01-20 13:07:24 +000011889 delta += 1;
11890 } else {
11891 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11892 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
11893 DIP("movupd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11894 dis_buf );
11895 delta += alen;
11896 }
11897 goto decode_success;
11898 }
11899 /* 0F 11 = MOVUPS -- move from G (xmm) to E (mem or xmm). */
11900 if (haveNo66noF2noF3(pfx)
11901 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11902 modrm = getUChar(delta);
11903 if (epartIsReg(modrm)) {
11904 /* fall through; awaiting test case */
11905 } else {
11906 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11907 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
11908 DIP("movups %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
11909 dis_buf );
11910 delta += alen;
11911 goto decode_success;
11912 }
11913 }
11914 break;
11915
11916 case 0x12:
11917 /* 66 0F 12 = MOVLPD -- move from mem to low half of XMM. */
11918 /* Identical to MOVLPS ? */
11919 if (have66noF2noF3(pfx)
11920 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
11921 modrm = getUChar(delta);
11922 if (epartIsReg(modrm)) {
11923 /* fall through; apparently reg-reg is not possible */
11924 } else {
11925 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11926 delta += alen;
11927 putXMMRegLane64( gregOfRexRM(pfx,modrm),
11928 0/*lower lane*/,
11929 loadLE(Ity_I64, mkexpr(addr)) );
11930 DIP("movlpd %s, %s\n",
11931 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
11932 goto decode_success;
11933 }
11934 }
11935 /* 0F 12 = MOVLPS -- move from mem to low half of XMM. */
11936 /* OF 12 = MOVHLPS -- from from hi half to lo half of XMM. */
11937 if (haveNo66noF2noF3(pfx)
11938 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11939 modrm = getUChar(delta);
11940 if (epartIsReg(modrm)) {
11941 delta += 1;
11942 putXMMRegLane64( gregOfRexRM(pfx,modrm),
11943 0/*lower lane*/,
11944 getXMMRegLane64( eregOfRexRM(pfx,modrm), 1 ));
11945 DIP("movhlps %s, %s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
11946 nameXMMReg(gregOfRexRM(pfx,modrm)));
11947 } else {
11948 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11949 delta += alen;
11950 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0/*lower lane*/,
11951 loadLE(Ity_I64, mkexpr(addr)) );
11952 DIP("movlps %s, %s\n",
11953 dis_buf, nameXMMReg( gregOfRexRM(pfx,modrm) ));
11954 }
11955 goto decode_success;
11956 }
11957 break;
11958
11959 case 0x13:
11960 /* 0F 13 = MOVLPS -- move from low half of XMM to mem. */
11961 if (haveNo66noF2noF3(pfx)
11962 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
11963 modrm = getUChar(delta);
11964 if (!epartIsReg(modrm)) {
11965 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11966 delta += alen;
11967 storeLE( mkexpr(addr),
11968 getXMMRegLane64( gregOfRexRM(pfx,modrm),
11969 0/*lower lane*/ ) );
11970 DIP("movlps %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
11971 dis_buf);
11972 goto decode_success;
11973 }
11974 /* else fall through */
11975 }
11976 /* 66 0F 13 = MOVLPD -- move from low half of XMM to mem. */
11977 /* Identical to MOVLPS ? */
11978 if (have66noF2noF3(pfx)
11979 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
11980 modrm = getUChar(delta);
11981 if (!epartIsReg(modrm)) {
11982 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
11983 delta += alen;
11984 storeLE( mkexpr(addr),
11985 getXMMRegLane64( gregOfRexRM(pfx,modrm),
11986 0/*lower lane*/ ) );
11987 DIP("movlpd %s, %s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
11988 dis_buf);
11989 goto decode_success;
11990 }
11991 /* else fall through */
11992 }
11993 break;
11994
11995 case 0x14:
11996 case 0x15:
11997 /* 0F 14 = UNPCKLPS -- unpack and interleave low part F32s */
11998 /* 0F 15 = UNPCKHPS -- unpack and interleave high part F32s */
11999 /* These just appear to be special cases of SHUFPS */
12000 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardjc4530ae2012-05-21 10:18:49 +000012001 Bool hi = toBool(opc == 0x15);
12002 IRTemp sV = newTemp(Ity_V128);
12003 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000012004 modrm = getUChar(delta);
sewardjc4530ae2012-05-21 10:18:49 +000012005 UInt rG = gregOfRexRM(pfx,modrm);
12006 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000012007 if (epartIsReg(modrm)) {
sewardjc4530ae2012-05-21 10:18:49 +000012008 UInt rE = eregOfRexRM(pfx,modrm);
12009 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000012010 delta += 1;
12011 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
sewardjc4530ae2012-05-21 10:18:49 +000012012 nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000012013 } else {
12014 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12015 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12016 delta += alen;
12017 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
sewardjc4530ae2012-05-21 10:18:49 +000012018 dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000012019 }
sewardj56c30312012-06-12 08:45:39 +000012020 IRTemp res = math_UNPCKxPS_128( sV, dV, hi );
sewardjc4530ae2012-05-21 10:18:49 +000012021 putXMMReg( rG, mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000012022 goto decode_success;
12023 }
12024 /* 66 0F 15 = UNPCKHPD -- unpack and interleave high part F64s */
12025 /* 66 0F 14 = UNPCKLPD -- unpack and interleave low part F64s */
12026 /* These just appear to be special cases of SHUFPS */
12027 if (have66noF2noF3(pfx)
12028 && sz == 2 /* could be 8 if rex also present */) {
sewardj56c30312012-06-12 08:45:39 +000012029 Bool hi = toBool(opc == 0x15);
sewardj80611e32012-01-20 13:07:24 +000012030 IRTemp sV = newTemp(Ity_V128);
12031 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000012032 modrm = getUChar(delta);
sewardj56c30312012-06-12 08:45:39 +000012033 UInt rG = gregOfRexRM(pfx,modrm);
12034 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000012035 if (epartIsReg(modrm)) {
sewardj56c30312012-06-12 08:45:39 +000012036 UInt rE = eregOfRexRM(pfx,modrm);
12037 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000012038 delta += 1;
12039 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
sewardj56c30312012-06-12 08:45:39 +000012040 nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000012041 } else {
12042 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12043 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
12044 delta += alen;
12045 DIP("unpck%sps %s,%s\n", hi ? "h" : "l",
sewardj56c30312012-06-12 08:45:39 +000012046 dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000012047 }
sewardj56c30312012-06-12 08:45:39 +000012048 IRTemp res = math_UNPCKxPD_128( sV, dV, hi );
12049 putXMMReg( rG, mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000012050 goto decode_success;
12051 }
12052 break;
12053
12054 case 0x16:
12055 /* 66 0F 16 = MOVHPD -- move from mem to high half of XMM. */
12056 /* These seems identical to MOVHPS. This instruction encoding is
12057 completely crazy. */
12058 if (have66noF2noF3(pfx)
12059 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12060 modrm = getUChar(delta);
12061 if (epartIsReg(modrm)) {
12062 /* fall through; apparently reg-reg is not possible */
12063 } else {
12064 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12065 delta += alen;
12066 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12067 loadLE(Ity_I64, mkexpr(addr)) );
12068 DIP("movhpd %s,%s\n", dis_buf,
12069 nameXMMReg( gregOfRexRM(pfx,modrm) ));
12070 goto decode_success;
12071 }
12072 }
12073 /* 0F 16 = MOVHPS -- move from mem to high half of XMM. */
12074 /* 0F 16 = MOVLHPS -- move from lo half to hi half of XMM. */
12075 if (haveNo66noF2noF3(pfx)
12076 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12077 modrm = getUChar(delta);
12078 if (epartIsReg(modrm)) {
12079 delta += 1;
12080 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12081 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ) );
12082 DIP("movhps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12083 nameXMMReg(gregOfRexRM(pfx,modrm)));
12084 } else {
12085 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12086 delta += alen;
12087 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1/*upper lane*/,
12088 loadLE(Ity_I64, mkexpr(addr)) );
12089 DIP("movhps %s,%s\n", dis_buf,
12090 nameXMMReg( gregOfRexRM(pfx,modrm) ));
12091 }
12092 goto decode_success;
12093 }
12094 break;
12095
12096 case 0x17:
12097 /* 0F 17 = MOVHPS -- move from high half of XMM to mem. */
12098 if (haveNo66noF2noF3(pfx)
12099 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12100 modrm = getUChar(delta);
12101 if (!epartIsReg(modrm)) {
12102 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12103 delta += alen;
12104 storeLE( mkexpr(addr),
12105 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12106 1/*upper lane*/ ) );
12107 DIP("movhps %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12108 dis_buf);
12109 goto decode_success;
12110 }
12111 /* else fall through */
12112 }
12113 /* 66 0F 17 = MOVHPD -- move from high half of XMM to mem. */
12114 /* Again, this seems identical to MOVHPS. */
12115 if (have66noF2noF3(pfx)
12116 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12117 modrm = getUChar(delta);
12118 if (!epartIsReg(modrm)) {
12119 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12120 delta += alen;
12121 storeLE( mkexpr(addr),
12122 getXMMRegLane64( gregOfRexRM(pfx,modrm),
12123 1/*upper lane*/ ) );
12124 DIP("movhpd %s,%s\n", nameXMMReg( gregOfRexRM(pfx,modrm) ),
12125 dis_buf);
12126 goto decode_success;
12127 }
12128 /* else fall through */
12129 }
12130 break;
12131
12132 case 0x18:
12133 /* 0F 18 /0 = PREFETCHNTA -- prefetch into caches, */
12134 /* 0F 18 /1 = PREFETCH0 -- with various different hints */
12135 /* 0F 18 /2 = PREFETCH1 */
12136 /* 0F 18 /3 = PREFETCH2 */
12137 if (haveNo66noF2noF3(pfx)
12138 && !epartIsReg(getUChar(delta))
12139 && gregLO3ofRM(getUChar(delta)) >= 0
12140 && gregLO3ofRM(getUChar(delta)) <= 3) {
florian55085f82012-11-21 00:36:55 +000012141 const HChar* hintstr = "??";
sewardj80611e32012-01-20 13:07:24 +000012142
12143 modrm = getUChar(delta);
12144 vassert(!epartIsReg(modrm));
12145
12146 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12147 delta += alen;
12148
12149 switch (gregLO3ofRM(modrm)) {
12150 case 0: hintstr = "nta"; break;
12151 case 1: hintstr = "t0"; break;
12152 case 2: hintstr = "t1"; break;
12153 case 3: hintstr = "t2"; break;
12154 default: vassert(0);
12155 }
12156
12157 DIP("prefetch%s %s\n", hintstr, dis_buf);
12158 goto decode_success;
12159 }
12160 break;
12161
12162 case 0x28:
12163 /* 66 0F 28 = MOVAPD -- move from E (mem or xmm) to G (xmm). */
12164 if (have66noF2noF3(pfx)
12165 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12166 modrm = getUChar(delta);
12167 if (epartIsReg(modrm)) {
12168 putXMMReg( gregOfRexRM(pfx,modrm),
12169 getXMMReg( eregOfRexRM(pfx,modrm) ));
12170 DIP("movapd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12171 nameXMMReg(gregOfRexRM(pfx,modrm)));
12172 delta += 1;
12173 } else {
12174 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12175 gen_SEGV_if_not_16_aligned( addr );
12176 putXMMReg( gregOfRexRM(pfx,modrm),
12177 loadLE(Ity_V128, mkexpr(addr)) );
12178 DIP("movapd %s,%s\n", dis_buf,
12179 nameXMMReg(gregOfRexRM(pfx,modrm)));
12180 delta += alen;
12181 }
12182 goto decode_success;
12183 }
12184 /* 0F 28 = MOVAPS -- move from E (mem or xmm) to G (xmm). */
12185 if (haveNo66noF2noF3(pfx)
12186 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12187 modrm = getUChar(delta);
12188 if (epartIsReg(modrm)) {
12189 putXMMReg( gregOfRexRM(pfx,modrm),
12190 getXMMReg( eregOfRexRM(pfx,modrm) ));
12191 DIP("movaps %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12192 nameXMMReg(gregOfRexRM(pfx,modrm)));
12193 delta += 1;
12194 } else {
12195 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12196 gen_SEGV_if_not_16_aligned( addr );
12197 putXMMReg( gregOfRexRM(pfx,modrm),
12198 loadLE(Ity_V128, mkexpr(addr)) );
12199 DIP("movaps %s,%s\n", dis_buf,
12200 nameXMMReg(gregOfRexRM(pfx,modrm)));
12201 delta += alen;
12202 }
12203 goto decode_success;
12204 }
12205 break;
12206
12207 case 0x29:
12208 /* 0F 29 = MOVAPS -- move from G (xmm) to E (mem or xmm). */
12209 if (haveNo66noF2noF3(pfx)
12210 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12211 modrm = getUChar(delta);
12212 if (epartIsReg(modrm)) {
sewardjae4793e2012-08-23 18:49:59 +000012213 putXMMReg( eregOfRexRM(pfx,modrm),
12214 getXMMReg( gregOfRexRM(pfx,modrm) ));
12215 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12216 nameXMMReg(eregOfRexRM(pfx,modrm)));
12217 delta += 1;
sewardj80611e32012-01-20 13:07:24 +000012218 } else {
12219 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12220 gen_SEGV_if_not_16_aligned( addr );
12221 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12222 DIP("movaps %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12223 dis_buf );
12224 delta += alen;
sewardj80611e32012-01-20 13:07:24 +000012225 }
sewardjae4793e2012-08-23 18:49:59 +000012226 goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000012227 }
12228 /* 66 0F 29 = MOVAPD -- move from G (xmm) to E (mem or xmm). */
12229 if (have66noF2noF3(pfx)
12230 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12231 modrm = getUChar(delta);
12232 if (epartIsReg(modrm)) {
12233 putXMMReg( eregOfRexRM(pfx,modrm),
sewardjcc3d2192013-03-27 11:37:33 +000012234 getXMMReg( gregOfRexRM(pfx,modrm) ) );
sewardj80611e32012-01-20 13:07:24 +000012235 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
sewardjcc3d2192013-03-27 11:37:33 +000012236 nameXMMReg(eregOfRexRM(pfx,modrm)));
sewardj80611e32012-01-20 13:07:24 +000012237 delta += 1;
12238 } else {
12239 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12240 gen_SEGV_if_not_16_aligned( addr );
12241 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12242 DIP("movapd %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
12243 dis_buf );
12244 delta += alen;
12245 }
12246 goto decode_success;
12247 }
12248 break;
12249
12250 case 0x2A:
12251 /* 0F 2A = CVTPI2PS -- convert 2 x I32 in mem/mmx to 2 x F32 in low
12252 half xmm */
12253 if (haveNo66noF2noF3(pfx) && sz == 4) {
12254 IRTemp arg64 = newTemp(Ity_I64);
12255 IRTemp rmode = newTemp(Ity_I32);
12256
12257 modrm = getUChar(delta);
12258 do_MMX_preamble();
12259 if (epartIsReg(modrm)) {
12260 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
12261 delta += 1;
12262 DIP("cvtpi2ps %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
12263 nameXMMReg(gregOfRexRM(pfx,modrm)));
12264 } else {
12265 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12266 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
12267 delta += alen;
12268 DIP("cvtpi2ps %s,%s\n", dis_buf,
12269 nameXMMReg(gregOfRexRM(pfx,modrm)) );
12270 }
12271
12272 assign( rmode, get_sse_roundingmode() );
12273
12274 putXMMRegLane32F(
12275 gregOfRexRM(pfx,modrm), 0,
12276 binop(Iop_F64toF32,
12277 mkexpr(rmode),
12278 unop(Iop_I32StoF64,
12279 unop(Iop_64to32, mkexpr(arg64)) )) );
12280
12281 putXMMRegLane32F(
12282 gregOfRexRM(pfx,modrm), 1,
12283 binop(Iop_F64toF32,
12284 mkexpr(rmode),
12285 unop(Iop_I32StoF64,
12286 unop(Iop_64HIto32, mkexpr(arg64)) )) );
12287
12288 goto decode_success;
12289 }
12290 /* F3 0F 2A = CVTSI2SS
12291 -- sz==4: convert I32 in mem/ireg to F32 in low quarter xmm
12292 -- sz==8: convert I64 in mem/ireg to F32 in low quarter xmm */
12293 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) {
12294 IRTemp rmode = newTemp(Ity_I32);
12295 assign( rmode, get_sse_roundingmode() );
12296 modrm = getUChar(delta);
12297 if (sz == 4) {
12298 IRTemp arg32 = newTemp(Ity_I32);
12299 if (epartIsReg(modrm)) {
12300 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
12301 delta += 1;
12302 DIP("cvtsi2ss %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
12303 nameXMMReg(gregOfRexRM(pfx,modrm)));
12304 } else {
12305 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12306 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
12307 delta += alen;
12308 DIP("cvtsi2ss %s,%s\n", dis_buf,
12309 nameXMMReg(gregOfRexRM(pfx,modrm)) );
12310 }
12311 putXMMRegLane32F(
12312 gregOfRexRM(pfx,modrm), 0,
12313 binop(Iop_F64toF32,
12314 mkexpr(rmode),
12315 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
12316 } else {
12317 /* sz == 8 */
12318 IRTemp arg64 = newTemp(Ity_I64);
12319 if (epartIsReg(modrm)) {
12320 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
12321 delta += 1;
12322 DIP("cvtsi2ssq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
12323 nameXMMReg(gregOfRexRM(pfx,modrm)));
12324 } else {
12325 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12326 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
12327 delta += alen;
12328 DIP("cvtsi2ssq %s,%s\n", dis_buf,
12329 nameXMMReg(gregOfRexRM(pfx,modrm)) );
12330 }
12331 putXMMRegLane32F(
12332 gregOfRexRM(pfx,modrm), 0,
12333 binop(Iop_F64toF32,
12334 mkexpr(rmode),
12335 binop(Iop_I64StoF64, mkexpr(rmode), mkexpr(arg64)) ) );
12336 }
12337 goto decode_success;
12338 }
12339 /* F2 0F 2A = CVTSI2SD
12340 when sz==4 -- convert I32 in mem/ireg to F64 in low half xmm
12341 when sz==8 -- convert I64 in mem/ireg to F64 in low half xmm
12342 */
12343 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) {
12344 modrm = getUChar(delta);
12345 if (sz == 4) {
12346 IRTemp arg32 = newTemp(Ity_I32);
12347 if (epartIsReg(modrm)) {
12348 assign( arg32, getIReg32(eregOfRexRM(pfx,modrm)) );
12349 delta += 1;
sewardjc4530ae2012-05-21 10:18:49 +000012350 DIP("cvtsi2sdl %s,%s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
12351 nameXMMReg(gregOfRexRM(pfx,modrm)));
sewardj80611e32012-01-20 13:07:24 +000012352 } else {
12353 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12354 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
12355 delta += alen;
sewardjc4530ae2012-05-21 10:18:49 +000012356 DIP("cvtsi2sdl %s,%s\n", dis_buf,
12357 nameXMMReg(gregOfRexRM(pfx,modrm)) );
sewardj80611e32012-01-20 13:07:24 +000012358 }
12359 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
12360 unop(Iop_I32StoF64, mkexpr(arg32))
12361 );
12362 } else {
12363 /* sz == 8 */
12364 IRTemp arg64 = newTemp(Ity_I64);
12365 if (epartIsReg(modrm)) {
12366 assign( arg64, getIReg64(eregOfRexRM(pfx,modrm)) );
12367 delta += 1;
12368 DIP("cvtsi2sdq %s,%s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
12369 nameXMMReg(gregOfRexRM(pfx,modrm)));
12370 } else {
12371 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12372 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
12373 delta += alen;
12374 DIP("cvtsi2sdq %s,%s\n", dis_buf,
12375 nameXMMReg(gregOfRexRM(pfx,modrm)) );
12376 }
12377 putXMMRegLane64F(
12378 gregOfRexRM(pfx,modrm),
12379 0,
12380 binop( Iop_I64StoF64,
12381 get_sse_roundingmode(),
12382 mkexpr(arg64)
12383 )
12384 );
12385 }
12386 goto decode_success;
12387 }
12388 /* 66 0F 2A = CVTPI2PD -- convert 2 x I32 in mem/mmx to 2 x F64 in
12389 xmm(G) */
12390 if (have66noF2noF3(pfx) && sz == 2) {
12391 IRTemp arg64 = newTemp(Ity_I64);
12392
12393 modrm = getUChar(delta);
12394 if (epartIsReg(modrm)) {
12395 /* Only switch to MMX mode if the source is a MMX register.
12396 This is inconsistent with all other instructions which
12397 convert between XMM and (M64 or MMX), which always switch
12398 to MMX mode even if 64-bit operand is M64 and not MMX. At
12399 least, that's what the Intel docs seem to me to say.
12400 Fixes #210264. */
12401 do_MMX_preamble();
12402 assign( arg64, getMMXReg(eregLO3ofRM(modrm)) );
12403 delta += 1;
12404 DIP("cvtpi2pd %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
12405 nameXMMReg(gregOfRexRM(pfx,modrm)));
12406 } else {
12407 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12408 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
12409 delta += alen;
12410 DIP("cvtpi2pd %s,%s\n", dis_buf,
12411 nameXMMReg(gregOfRexRM(pfx,modrm)) );
12412 }
12413
12414 putXMMRegLane64F(
12415 gregOfRexRM(pfx,modrm), 0,
12416 unop(Iop_I32StoF64, unop(Iop_64to32, mkexpr(arg64)) )
12417 );
12418
12419 putXMMRegLane64F(
12420 gregOfRexRM(pfx,modrm), 1,
12421 unop(Iop_I32StoF64, unop(Iop_64HIto32, mkexpr(arg64)) )
12422 );
12423
12424 goto decode_success;
12425 }
12426 break;
12427
12428 case 0x2B:
12429 /* 66 0F 2B = MOVNTPD -- for us, just a plain SSE store. */
12430 /* 0F 2B = MOVNTPS -- for us, just a plain SSE store. */
12431 if ( (haveNo66noF2noF3(pfx) && sz == 4)
12432 || (have66noF2noF3(pfx) && sz == 2) ) {
12433 modrm = getUChar(delta);
12434 if (!epartIsReg(modrm)) {
12435 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12436 gen_SEGV_if_not_16_aligned( addr );
12437 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
12438 DIP("movntp%s %s,%s\n", sz==2 ? "d" : "s",
12439 dis_buf,
12440 nameXMMReg(gregOfRexRM(pfx,modrm)));
12441 delta += alen;
12442 goto decode_success;
12443 }
12444 /* else fall through */
12445 }
12446 break;
12447
12448 case 0x2C:
12449 case 0x2D:
12450 /* 0F 2D = CVTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
12451 I32 in mmx, according to prevailing SSE rounding mode */
12452 /* 0F 2C = CVTTPS2PI -- convert 2 x F32 in mem/low half xmm to 2 x
12453 I32 in mmx, rounding towards zero */
12454 if (haveNo66noF2noF3(pfx) && sz == 4) {
12455 IRTemp dst64 = newTemp(Ity_I64);
12456 IRTemp rmode = newTemp(Ity_I32);
12457 IRTemp f32lo = newTemp(Ity_F32);
12458 IRTemp f32hi = newTemp(Ity_F32);
12459 Bool r2zero = toBool(opc == 0x2C);
12460
12461 do_MMX_preamble();
12462 modrm = getUChar(delta);
12463
12464 if (epartIsReg(modrm)) {
12465 delta += 1;
12466 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
12467 assign(f32hi, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 1));
12468 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
12469 nameXMMReg(eregOfRexRM(pfx,modrm)),
12470 nameMMXReg(gregLO3ofRM(modrm)));
12471 } else {
12472 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12473 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
12474 assign(f32hi, loadLE(Ity_F32, binop( Iop_Add64,
12475 mkexpr(addr),
12476 mkU64(4) )));
12477 delta += alen;
12478 DIP("cvt%sps2pi %s,%s\n", r2zero ? "t" : "",
12479 dis_buf,
12480 nameMMXReg(gregLO3ofRM(modrm)));
12481 }
12482
12483 if (r2zero) {
12484 assign(rmode, mkU32((UInt)Irrm_ZERO) );
12485 } else {
12486 assign( rmode, get_sse_roundingmode() );
12487 }
12488
12489 assign(
12490 dst64,
12491 binop( Iop_32HLto64,
12492 binop( Iop_F64toI32S,
12493 mkexpr(rmode),
12494 unop( Iop_F32toF64, mkexpr(f32hi) ) ),
12495 binop( Iop_F64toI32S,
12496 mkexpr(rmode),
12497 unop( Iop_F32toF64, mkexpr(f32lo) ) )
12498 )
12499 );
12500
12501 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
12502 goto decode_success;
12503 }
12504 /* F3 0F 2D = CVTSS2SI
12505 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
12506 according to prevailing SSE rounding mode
12507 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
12508 according to prevailing SSE rounding mode
12509 */
12510 /* F3 0F 2C = CVTTSS2SI
12511 when sz==4 -- convert F32 in mem/low quarter xmm to I32 in ireg,
12512 truncating towards zero
12513 when sz==8 -- convert F32 in mem/low quarter xmm to I64 in ireg,
12514 truncating towards zero
12515 */
12516 if (haveF3no66noF2(pfx) && (sz == 4 || sz == 8)) {
sewardj80804d12012-05-22 10:48:13 +000012517 delta = dis_CVTxSS2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz);
sewardj80611e32012-01-20 13:07:24 +000012518 goto decode_success;
12519 }
12520 /* F2 0F 2D = CVTSD2SI
12521 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
12522 according to prevailing SSE rounding mode
12523 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
12524 according to prevailing SSE rounding mode
12525 */
12526 /* F2 0F 2C = CVTTSD2SI
12527 when sz==4 -- convert F64 in mem/low half xmm to I32 in ireg,
12528 truncating towards zero
12529 when sz==8 -- convert F64 in mem/low half xmm to I64 in ireg,
12530 truncating towards zero
12531 */
12532 if (haveF2no66noF3(pfx) && (sz == 4 || sz == 8)) {
sewardjc4530ae2012-05-21 10:18:49 +000012533 delta = dis_CVTxSD2SI( vbi, pfx, delta, False/*!isAvx*/, opc, sz);
sewardj80611e32012-01-20 13:07:24 +000012534 goto decode_success;
12535 }
12536 /* 66 0F 2D = CVTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
12537 I32 in mmx, according to prevailing SSE rounding mode */
12538 /* 66 0F 2C = CVTTPD2PI -- convert 2 x F64 in mem/xmm to 2 x
12539 I32 in mmx, rounding towards zero */
12540 if (have66noF2noF3(pfx) && sz == 2) {
12541 IRTemp dst64 = newTemp(Ity_I64);
12542 IRTemp rmode = newTemp(Ity_I32);
12543 IRTemp f64lo = newTemp(Ity_F64);
12544 IRTemp f64hi = newTemp(Ity_F64);
12545 Bool r2zero = toBool(opc == 0x2C);
12546
12547 do_MMX_preamble();
12548 modrm = getUChar(delta);
12549
12550 if (epartIsReg(modrm)) {
12551 delta += 1;
12552 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
12553 assign(f64hi, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 1));
12554 DIP("cvt%spd2pi %s,%s\n", r2zero ? "t" : "",
12555 nameXMMReg(eregOfRexRM(pfx,modrm)),
12556 nameMMXReg(gregLO3ofRM(modrm)));
12557 } else {
12558 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12559 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
12560 assign(f64hi, loadLE(Ity_F64, binop( Iop_Add64,
12561 mkexpr(addr),
12562 mkU64(8) )));
12563 delta += alen;
12564 DIP("cvt%spf2pi %s,%s\n", r2zero ? "t" : "",
12565 dis_buf,
12566 nameMMXReg(gregLO3ofRM(modrm)));
12567 }
12568
12569 if (r2zero) {
12570 assign(rmode, mkU32((UInt)Irrm_ZERO) );
12571 } else {
12572 assign( rmode, get_sse_roundingmode() );
12573 }
12574
12575 assign(
12576 dst64,
12577 binop( Iop_32HLto64,
12578 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64hi) ),
12579 binop( Iop_F64toI32S, mkexpr(rmode), mkexpr(f64lo) )
12580 )
12581 );
12582
12583 putMMXReg(gregLO3ofRM(modrm), mkexpr(dst64));
12584 goto decode_success;
12585 }
12586 break;
12587
12588 case 0x2E:
12589 case 0x2F:
sewardj80611e32012-01-20 13:07:24 +000012590 /* 66 0F 2E = UCOMISD -- 64F0x2 comparison G,E, and set ZCP */
sewardj4ed05e02012-06-18 15:01:30 +000012591 /* 66 0F 2F = COMISD -- 64F0x2 comparison G,E, and set ZCP */
sewardj80611e32012-01-20 13:07:24 +000012592 if (have66noF2noF3(pfx) && sz == 2) {
sewardjc4530ae2012-05-21 10:18:49 +000012593 delta = dis_COMISD( vbi, pfx, delta, False/*!isAvx*/, opc );
sewardj80611e32012-01-20 13:07:24 +000012594 goto decode_success;
12595 }
sewardj80611e32012-01-20 13:07:24 +000012596 /* 0F 2E = UCOMISS -- 32F0x4 comparison G,E, and set ZCP */
sewardj4ed05e02012-06-18 15:01:30 +000012597 /* 0F 2F = COMISS -- 32F0x4 comparison G,E, and set ZCP */
sewardj80611e32012-01-20 13:07:24 +000012598 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardjc4530ae2012-05-21 10:18:49 +000012599 delta = dis_COMISS( vbi, pfx, delta, False/*!isAvx*/, opc );
sewardj80611e32012-01-20 13:07:24 +000012600 goto decode_success;
12601 }
12602 break;
12603
12604 case 0x50:
12605 /* 0F 50 = MOVMSKPS - move 4 sign bits from 4 x F32 in xmm(E)
12606 to 4 lowest bits of ireg(G) */
sewardj8eb7ae82012-06-24 14:00:27 +000012607 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
12608 && epartIsReg(getUChar(delta))) {
sewardj80611e32012-01-20 13:07:24 +000012609 /* sz == 8 is a kludge to handle insns with REX.W redundantly
12610 set to 1, which has been known to happen:
12611
12612 4c 0f 50 d9 rex64X movmskps %xmm1,%r11d
12613
12614 20071106: Intel docs say that REX.W isn't redundant: when
12615 present, a 64-bit register is written; when not present, only
12616 the 32-bit half is written. However, testing on a Core2
12617 machine suggests the entire 64 bit register is written
12618 irrespective of the status of REX.W. That could be because
12619 of the default rule that says "if the lower half of a 32-bit
12620 register is written, the upper half is zeroed". By using
12621 putIReg32 here we inadvertantly produce the same behaviour as
12622 the Core2, for the same reason -- putIReg32 implements said
12623 rule.
12624
12625 AMD docs give no indication that REX.W is even valid for this
12626 insn. */
sewardj8eb7ae82012-06-24 14:00:27 +000012627 delta = dis_MOVMSKPS_128( vbi, pfx, delta, False/*!isAvx*/ );
12628 goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000012629 }
12630 /* 66 0F 50 = MOVMSKPD - move 2 sign bits from 2 x F64 in xmm(E) to
12631 2 lowest bits of ireg(G) */
12632 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) {
12633 /* sz == 8 is a kludge to handle insns with REX.W redundantly
12634 set to 1, which has been known to happen:
12635 66 4c 0f 50 d9 rex64X movmskpd %xmm1,%r11d
12636 20071106: see further comments on MOVMSKPS implementation above.
12637 */
sewardj8eb7ae82012-06-24 14:00:27 +000012638 delta = dis_MOVMSKPD_128( vbi, pfx, delta, False/*!isAvx*/ );
12639 goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000012640 }
12641 break;
12642
12643 case 0x51:
12644 /* F3 0F 51 = SQRTSS -- approx sqrt 32F0x4 from R/M to R */
12645 if (haveF3no66noF2(pfx) && sz == 4) {
12646 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
12647 "sqrtss", Iop_Sqrt32F0x4 );
12648 goto decode_success;
12649 }
12650 /* 0F 51 = SQRTPS -- approx sqrt 32Fx4 from R/M to R */
12651 if (haveNo66noF2noF3(pfx) && sz == 4) {
12652 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
12653 "sqrtps", Iop_Sqrt32Fx4 );
12654 goto decode_success;
12655 }
12656 /* F2 0F 51 = SQRTSD -- approx sqrt 64F0x2 from R/M to R */
12657 if (haveF2no66noF3(pfx) && sz == 4) {
12658 delta = dis_SSE_E_to_G_unary_lo64( vbi, pfx, delta,
12659 "sqrtsd", Iop_Sqrt64F0x2 );
12660 goto decode_success;
12661 }
12662 /* 66 0F 51 = SQRTPD -- approx sqrt 64Fx2 from R/M to R */
12663 if (have66noF2noF3(pfx) && sz == 2) {
12664 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
12665 "sqrtpd", Iop_Sqrt64Fx2 );
12666 goto decode_success;
12667 }
12668 break;
12669
12670 case 0x52:
12671 /* F3 0F 52 = RSQRTSS -- approx reciprocal sqrt 32F0x4 from R/M to R */
12672 if (haveF3no66noF2(pfx) && sz == 4) {
12673 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
sewardj1ddee212014-08-24 14:00:19 +000012674 "rsqrtss", Iop_RSqrtEst32F0x4 );
sewardj80611e32012-01-20 13:07:24 +000012675 goto decode_success;
12676 }
12677 /* 0F 52 = RSQRTPS -- approx reciprocal sqrt 32Fx4 from R/M to R */
12678 if (haveNo66noF2noF3(pfx) && sz == 4) {
12679 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
sewardj1ddee212014-08-24 14:00:19 +000012680 "rsqrtps", Iop_RSqrtEst32Fx4 );
sewardj80611e32012-01-20 13:07:24 +000012681 goto decode_success;
12682 }
12683 break;
12684
12685 case 0x53:
12686 /* F3 0F 53 = RCPSS -- approx reciprocal 32F0x4 from R/M to R */
12687 if (haveF3no66noF2(pfx) && sz == 4) {
12688 delta = dis_SSE_E_to_G_unary_lo32( vbi, pfx, delta,
sewardj1ddee212014-08-24 14:00:19 +000012689 "rcpss", Iop_RecipEst32F0x4 );
sewardj80611e32012-01-20 13:07:24 +000012690 goto decode_success;
12691 }
12692 /* 0F 53 = RCPPS -- approx reciprocal 32Fx4 from R/M to R */
12693 if (haveNo66noF2noF3(pfx) && sz == 4) {
12694 delta = dis_SSE_E_to_G_unary_all( vbi, pfx, delta,
sewardj1ddee212014-08-24 14:00:19 +000012695 "rcpps", Iop_RecipEst32Fx4 );
sewardj80611e32012-01-20 13:07:24 +000012696 goto decode_success;
12697 }
12698 break;
12699
12700 case 0x54:
12701 /* 0F 54 = ANDPS -- G = G and E */
12702 if (haveNo66noF2noF3(pfx) && sz == 4) {
12703 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andps", Iop_AndV128 );
12704 goto decode_success;
12705 }
12706 /* 66 0F 54 = ANDPD -- G = G and E */
12707 if (have66noF2noF3(pfx) && sz == 2) {
12708 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "andpd", Iop_AndV128 );
12709 goto decode_success;
12710 }
12711 break;
12712
12713 case 0x55:
12714 /* 0F 55 = ANDNPS -- G = (not G) and E */
12715 if (haveNo66noF2noF3(pfx) && sz == 4) {
12716 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnps",
12717 Iop_AndV128 );
12718 goto decode_success;
12719 }
12720 /* 66 0F 55 = ANDNPD -- G = (not G) and E */
12721 if (have66noF2noF3(pfx) && sz == 2) {
12722 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "andnpd",
12723 Iop_AndV128 );
12724 goto decode_success;
12725 }
12726 break;
12727
12728 case 0x56:
12729 /* 0F 56 = ORPS -- G = G and E */
12730 if (haveNo66noF2noF3(pfx) && sz == 4) {
12731 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orps", Iop_OrV128 );
12732 goto decode_success;
12733 }
12734 /* 66 0F 56 = ORPD -- G = G and E */
12735 if (have66noF2noF3(pfx) && sz == 2) {
12736 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "orpd", Iop_OrV128 );
12737 goto decode_success;
12738 }
12739 break;
12740
12741 case 0x57:
12742 /* 66 0F 57 = XORPD -- G = G xor E */
12743 if (have66noF2noF3(pfx) && sz == 2) {
12744 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorpd", Iop_XorV128 );
12745 goto decode_success;
12746 }
sewardjc4530ae2012-05-21 10:18:49 +000012747 /* 0F 57 = XORPS -- G = G xor E */
sewardj80611e32012-01-20 13:07:24 +000012748 if (haveNo66noF2noF3(pfx) && sz == 4) {
12749 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "xorps", Iop_XorV128 );
12750 goto decode_success;
12751 }
12752 break;
12753
12754 case 0x58:
12755 /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */
12756 if (haveNo66noF2noF3(pfx) && sz == 4) {
12757 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addps", Iop_Add32Fx4 );
12758 goto decode_success;
12759 }
12760 /* F3 0F 58 = ADDSS -- add 32F0x4 from R/M to R */
12761 if (haveF3no66noF2(pfx) && sz == 4) {
12762 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "addss", Iop_Add32F0x4 );
12763 goto decode_success;
12764 }
12765 /* F2 0F 58 = ADDSD -- add 64F0x2 from R/M to R */
12766 if (haveF2no66noF3(pfx)
12767 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12768 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "addsd", Iop_Add64F0x2 );
12769 goto decode_success;
12770 }
12771 /* 66 0F 58 = ADDPD -- add 32Fx4 from R/M to R */
12772 if (have66noF2noF3(pfx)
12773 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12774 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "addpd", Iop_Add64Fx2 );
12775 goto decode_success;
12776 }
12777 break;
12778
12779 case 0x59:
12780 /* F2 0F 59 = MULSD -- mul 64F0x2 from R/M to R */
12781 if (haveF2no66noF3(pfx)
12782 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12783 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "mulsd", Iop_Mul64F0x2 );
12784 goto decode_success;
12785 }
12786 /* F3 0F 59 = MULSS -- mul 32F0x4 from R/M to R */
12787 if (haveF3no66noF2(pfx) && sz == 4) {
12788 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "mulss", Iop_Mul32F0x4 );
12789 goto decode_success;
12790 }
12791 /* 0F 59 = MULPS -- mul 32Fx4 from R/M to R */
12792 if (haveNo66noF2noF3(pfx) && sz == 4) {
12793 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulps", Iop_Mul32Fx4 );
12794 goto decode_success;
12795 }
12796 /* 66 0F 59 = MULPD -- mul 64Fx2 from R/M to R */
12797 if (have66noF2noF3(pfx)
12798 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
12799 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "mulpd", Iop_Mul64Fx2 );
12800 goto decode_success;
12801 }
12802 break;
12803
12804 case 0x5A:
12805 /* 0F 5A = CVTPS2PD -- convert 2 x F32 in low half mem/xmm to 2 x
12806 F64 in xmm(G). */
12807 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardj66becf32012-06-18 23:15:16 +000012808 delta = dis_CVTPS2PD_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000012809 goto decode_success;
12810 }
12811 /* F3 0F 5A = CVTSS2SD -- convert F32 in mem/low 1/4 xmm to F64 in
12812 low half xmm(G) */
12813 if (haveF3no66noF2(pfx) && sz == 4) {
12814 IRTemp f32lo = newTemp(Ity_F32);
12815
12816 modrm = getUChar(delta);
12817 if (epartIsReg(modrm)) {
12818 delta += 1;
12819 assign(f32lo, getXMMRegLane32F(eregOfRexRM(pfx,modrm), 0));
12820 DIP("cvtss2sd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12821 nameXMMReg(gregOfRexRM(pfx,modrm)));
12822 } else {
12823 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12824 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)));
12825 delta += alen;
12826 DIP("cvtss2sd %s,%s\n", dis_buf,
12827 nameXMMReg(gregOfRexRM(pfx,modrm)));
12828 }
12829
12830 putXMMRegLane64F( gregOfRexRM(pfx,modrm), 0,
12831 unop( Iop_F32toF64, mkexpr(f32lo) ) );
12832
12833 goto decode_success;
12834 }
12835 /* F2 0F 5A = CVTSD2SS -- convert F64 in mem/low half xmm to F32 in
12836 low 1/4 xmm(G), according to prevailing SSE rounding mode */
12837 if (haveF2no66noF3(pfx) && sz == 4) {
12838 IRTemp rmode = newTemp(Ity_I32);
12839 IRTemp f64lo = newTemp(Ity_F64);
12840
12841 modrm = getUChar(delta);
12842 if (epartIsReg(modrm)) {
12843 delta += 1;
12844 assign(f64lo, getXMMRegLane64F(eregOfRexRM(pfx,modrm), 0));
12845 DIP("cvtsd2ss %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
12846 nameXMMReg(gregOfRexRM(pfx,modrm)));
12847 } else {
12848 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
12849 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)));
12850 delta += alen;
12851 DIP("cvtsd2ss %s,%s\n", dis_buf,
12852 nameXMMReg(gregOfRexRM(pfx,modrm)));
12853 }
12854
12855 assign( rmode, get_sse_roundingmode() );
12856 putXMMRegLane32F(
12857 gregOfRexRM(pfx,modrm), 0,
12858 binop( Iop_F64toF32, mkexpr(rmode), mkexpr(f64lo) )
12859 );
12860
12861 goto decode_success;
12862 }
12863 /* 66 0F 5A = CVTPD2PS -- convert 2 x F64 in mem/xmm to 2 x F32 in
12864 lo half xmm(G), rounding according to prevailing SSE rounding
12865 mode, and zero upper half */
12866 /* Note, this is practically identical to CVTPD2DQ. It would have
sewardjc4530ae2012-05-21 10:18:49 +000012867 be nice to merge them together. */
sewardj80611e32012-01-20 13:07:24 +000012868 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000012869 delta = dis_CVTPD2PS_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000012870 goto decode_success;
12871 }
12872 break;
12873
12874 case 0x5B:
12875 /* F3 0F 5B = CVTTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
12876 xmm(G), rounding towards zero */
12877 /* 66 0F 5B = CVTPS2DQ -- convert 4 x F32 in mem/xmm to 4 x I32 in
12878 xmm(G), as per the prevailing rounding mode */
12879 if ( (have66noF2noF3(pfx) && sz == 2)
12880 || (haveF3no66noF2(pfx) && sz == 4) ) {
sewardj251b59e2012-05-25 13:51:07 +000012881 Bool r2zero = toBool(sz == 4); // FIXME -- unreliable (???)
sewardj66becf32012-06-18 23:15:16 +000012882 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta, False/*!isAvx*/, r2zero );
sewardj80611e32012-01-20 13:07:24 +000012883 goto decode_success;
12884 }
12885 /* 0F 5B = CVTDQ2PS -- convert 4 x I32 in mem/xmm to 4 x F32 in
12886 xmm(G) */
12887 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardj66becf32012-06-18 23:15:16 +000012888 delta = dis_CVTDQ2PS_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000012889 goto decode_success;
12890 }
12891 break;
12892
12893 case 0x5C:
12894 /* F3 0F 5C = SUBSS -- sub 32F0x4 from R/M to R */
12895 if (haveF3no66noF2(pfx) && sz == 4) {
12896 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "subss", Iop_Sub32F0x4 );
12897 goto decode_success;
12898 }
12899 /* F2 0F 5C = SUBSD -- sub 64F0x2 from R/M to R */
12900 if (haveF2no66noF3(pfx)
12901 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
12902 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "subsd", Iop_Sub64F0x2 );
12903 goto decode_success;
12904 }
12905 /* 0F 5C = SUBPS -- sub 32Fx4 from R/M to R */
12906 if (haveNo66noF2noF3(pfx) && sz == 4) {
12907 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subps", Iop_Sub32Fx4 );
12908 goto decode_success;
12909 }
12910 /* 66 0F 5C = SUBPD -- sub 64Fx2 from R/M to R */
12911 if (have66noF2noF3(pfx) && sz == 2) {
12912 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "subpd", Iop_Sub64Fx2 );
12913 goto decode_success;
12914 }
12915 break;
12916
12917 case 0x5D:
12918 /* 0F 5D = MINPS -- min 32Fx4 from R/M to R */
12919 if (haveNo66noF2noF3(pfx) && sz == 4) {
12920 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minps", Iop_Min32Fx4 );
12921 goto decode_success;
12922 }
12923 /* F3 0F 5D = MINSS -- min 32F0x4 from R/M to R */
12924 if (haveF3no66noF2(pfx) && sz == 4) {
12925 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "minss", Iop_Min32F0x4 );
12926 goto decode_success;
12927 }
12928 /* F2 0F 5D = MINSD -- min 64F0x2 from R/M to R */
12929 if (haveF2no66noF3(pfx) && sz == 4) {
12930 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "minsd", Iop_Min64F0x2 );
12931 goto decode_success;
12932 }
12933 /* 66 0F 5D = MINPD -- min 64Fx2 from R/M to R */
12934 if (have66noF2noF3(pfx) && sz == 2) {
12935 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "minpd", Iop_Min64Fx2 );
12936 goto decode_success;
12937 }
12938 break;
12939
12940 case 0x5E:
12941 /* F2 0F 5E = DIVSD -- div 64F0x2 from R/M to R */
12942 if (haveF2no66noF3(pfx) && sz == 4) {
12943 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "divsd", Iop_Div64F0x2 );
12944 goto decode_success;
12945 }
12946 /* 0F 5E = DIVPS -- div 32Fx4 from R/M to R */
12947 if (haveNo66noF2noF3(pfx) && sz == 4) {
12948 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divps", Iop_Div32Fx4 );
12949 goto decode_success;
12950 }
12951 /* F3 0F 5E = DIVSS -- div 32F0x4 from R/M to R */
12952 if (haveF3no66noF2(pfx) && sz == 4) {
12953 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "divss", Iop_Div32F0x4 );
12954 goto decode_success;
12955 }
12956 /* 66 0F 5E = DIVPD -- div 64Fx2 from R/M to R */
12957 if (have66noF2noF3(pfx) && sz == 2) {
12958 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "divpd", Iop_Div64Fx2 );
12959 goto decode_success;
12960 }
12961 break;
12962
12963 case 0x5F:
12964 /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
12965 if (haveNo66noF2noF3(pfx) && sz == 4) {
12966 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxps", Iop_Max32Fx4 );
12967 goto decode_success;
12968 }
12969 /* F3 0F 5F = MAXSS -- max 32F0x4 from R/M to R */
12970 if (haveF3no66noF2(pfx) && sz == 4) {
12971 delta = dis_SSE_E_to_G_lo32( vbi, pfx, delta, "maxss", Iop_Max32F0x4 );
12972 goto decode_success;
12973 }
12974 /* F2 0F 5F = MAXSD -- max 64F0x2 from R/M to R */
12975 if (haveF2no66noF3(pfx) && sz == 4) {
12976 delta = dis_SSE_E_to_G_lo64( vbi, pfx, delta, "maxsd", Iop_Max64F0x2 );
12977 goto decode_success;
12978 }
12979 /* 66 0F 5F = MAXPD -- max 64Fx2 from R/M to R */
12980 if (have66noF2noF3(pfx) && sz == 2) {
12981 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "maxpd", Iop_Max64Fx2 );
12982 goto decode_success;
12983 }
12984 break;
12985
12986 case 0x60:
12987 /* 66 0F 60 = PUNPCKLBW */
12988 if (have66noF2noF3(pfx) && sz == 2) {
12989 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
12990 "punpcklbw",
12991 Iop_InterleaveLO8x16, True );
12992 goto decode_success;
12993 }
12994 break;
12995
12996 case 0x61:
12997 /* 66 0F 61 = PUNPCKLWD */
12998 if (have66noF2noF3(pfx) && sz == 2) {
12999 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13000 "punpcklwd",
13001 Iop_InterleaveLO16x8, True );
13002 goto decode_success;
13003 }
13004 break;
13005
13006 case 0x62:
13007 /* 66 0F 62 = PUNPCKLDQ */
13008 if (have66noF2noF3(pfx) && sz == 2) {
13009 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13010 "punpckldq",
13011 Iop_InterleaveLO32x4, True );
13012 goto decode_success;
13013 }
13014 break;
13015
13016 case 0x63:
13017 /* 66 0F 63 = PACKSSWB */
13018 if (have66noF2noF3(pfx) && sz == 2) {
13019 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13020 "packsswb",
13021 Iop_QNarrowBin16Sto8Sx16, True );
13022 goto decode_success;
13023 }
13024 break;
13025
13026 case 0x64:
13027 /* 66 0F 64 = PCMPGTB */
13028 if (have66noF2noF3(pfx) && sz == 2) {
13029 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13030 "pcmpgtb", Iop_CmpGT8Sx16, False );
13031 goto decode_success;
13032 }
13033 break;
13034
13035 case 0x65:
13036 /* 66 0F 65 = PCMPGTW */
13037 if (have66noF2noF3(pfx) && sz == 2) {
13038 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13039 "pcmpgtw", Iop_CmpGT16Sx8, False );
13040 goto decode_success;
13041 }
13042 break;
13043
13044 case 0x66:
13045 /* 66 0F 66 = PCMPGTD */
13046 if (have66noF2noF3(pfx) && sz == 2) {
13047 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13048 "pcmpgtd", Iop_CmpGT32Sx4, False );
13049 goto decode_success;
13050 }
13051 break;
13052
13053 case 0x67:
13054 /* 66 0F 67 = PACKUSWB */
13055 if (have66noF2noF3(pfx) && sz == 2) {
13056 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13057 "packuswb",
13058 Iop_QNarrowBin16Sto8Ux16, True );
13059 goto decode_success;
13060 }
13061 break;
13062
13063 case 0x68:
13064 /* 66 0F 68 = PUNPCKHBW */
13065 if (have66noF2noF3(pfx) && sz == 2) {
13066 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13067 "punpckhbw",
13068 Iop_InterleaveHI8x16, True );
13069 goto decode_success;
13070 }
13071 break;
13072
13073 case 0x69:
13074 /* 66 0F 69 = PUNPCKHWD */
13075 if (have66noF2noF3(pfx) && sz == 2) {
13076 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13077 "punpckhwd",
13078 Iop_InterleaveHI16x8, True );
13079 goto decode_success;
13080 }
13081 break;
13082
13083 case 0x6A:
13084 /* 66 0F 6A = PUNPCKHDQ */
13085 if (have66noF2noF3(pfx) && sz == 2) {
13086 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13087 "punpckhdq",
13088 Iop_InterleaveHI32x4, True );
13089 goto decode_success;
13090 }
13091 break;
13092
13093 case 0x6B:
13094 /* 66 0F 6B = PACKSSDW */
13095 if (have66noF2noF3(pfx) && sz == 2) {
13096 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13097 "packssdw",
13098 Iop_QNarrowBin32Sto16Sx8, True );
13099 goto decode_success;
13100 }
13101 break;
13102
13103 case 0x6C:
13104 /* 66 0F 6C = PUNPCKLQDQ */
13105 if (have66noF2noF3(pfx) && sz == 2) {
13106 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13107 "punpcklqdq",
13108 Iop_InterleaveLO64x2, True );
13109 goto decode_success;
13110 }
13111 break;
13112
13113 case 0x6D:
13114 /* 66 0F 6D = PUNPCKHQDQ */
13115 if (have66noF2noF3(pfx) && sz == 2) {
13116 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13117 "punpckhqdq",
13118 Iop_InterleaveHI64x2, True );
13119 goto decode_success;
13120 }
13121 break;
13122
13123 case 0x6E:
13124 /* 66 0F 6E = MOVD from ireg32/m32 to xmm lo 1/4,
13125 zeroing high 3/4 of xmm. */
13126 /* or from ireg64/m64 to xmm lo 1/2,
13127 zeroing high 1/2 of xmm. */
13128 if (have66noF2noF3(pfx)) {
13129 vassert(sz == 2 || sz == 8);
13130 if (sz == 2) sz = 4;
13131 modrm = getUChar(delta);
13132 if (epartIsReg(modrm)) {
13133 delta += 1;
13134 if (sz == 4) {
13135 putXMMReg(
13136 gregOfRexRM(pfx,modrm),
13137 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
13138 );
13139 DIP("movd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
13140 nameXMMReg(gregOfRexRM(pfx,modrm)));
13141 } else {
13142 putXMMReg(
13143 gregOfRexRM(pfx,modrm),
13144 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
13145 );
13146 DIP("movq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
13147 nameXMMReg(gregOfRexRM(pfx,modrm)));
13148 }
13149 } else {
13150 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
13151 delta += alen;
13152 putXMMReg(
13153 gregOfRexRM(pfx,modrm),
13154 sz == 4
13155 ? unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)) )
13156 : unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)) )
13157 );
13158 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q', dis_buf,
13159 nameXMMReg(gregOfRexRM(pfx,modrm)));
13160 }
13161 goto decode_success;
13162 }
13163 break;
13164
13165 case 0x6F:
13166 if (have66noF2noF3(pfx)
13167 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13168 /* 66 0F 6F = MOVDQA -- move from E (mem or xmm) to G (xmm). */
13169 modrm = getUChar(delta);
13170 if (epartIsReg(modrm)) {
13171 putXMMReg( gregOfRexRM(pfx,modrm),
13172 getXMMReg( eregOfRexRM(pfx,modrm) ));
13173 DIP("movdqa %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13174 nameXMMReg(gregOfRexRM(pfx,modrm)));
13175 delta += 1;
13176 } else {
13177 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13178 gen_SEGV_if_not_16_aligned( addr );
13179 putXMMReg( gregOfRexRM(pfx,modrm),
13180 loadLE(Ity_V128, mkexpr(addr)) );
13181 DIP("movdqa %s,%s\n", dis_buf,
13182 nameXMMReg(gregOfRexRM(pfx,modrm)));
13183 delta += alen;
13184 }
13185 goto decode_success;
13186 }
13187 if (haveF3no66noF2(pfx) && sz == 4) {
13188 /* F3 0F 6F = MOVDQU -- move from E (mem or xmm) to G (xmm). */
13189 modrm = getUChar(delta);
13190 if (epartIsReg(modrm)) {
13191 putXMMReg( gregOfRexRM(pfx,modrm),
13192 getXMMReg( eregOfRexRM(pfx,modrm) ));
13193 DIP("movdqu %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13194 nameXMMReg(gregOfRexRM(pfx,modrm)));
13195 delta += 1;
13196 } else {
13197 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13198 putXMMReg( gregOfRexRM(pfx,modrm),
13199 loadLE(Ity_V128, mkexpr(addr)) );
13200 DIP("movdqu %s,%s\n", dis_buf,
13201 nameXMMReg(gregOfRexRM(pfx,modrm)));
13202 delta += alen;
13203 }
13204 goto decode_success;
13205 }
13206 break;
13207
13208 case 0x70:
13209 /* 66 0F 70 = PSHUFD -- rearrange 4x32 from E(xmm or mem) to G(xmm) */
13210 if (have66noF2noF3(pfx) && sz == 2) {
sewardjc4530ae2012-05-21 10:18:49 +000013211 delta = dis_PSHUFD_32x4( vbi, pfx, delta, False/*!writesYmm*/);
sewardj80611e32012-01-20 13:07:24 +000013212 goto decode_success;
13213 }
13214 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13215 /* 0F 70 = PSHUFW -- rearrange 4x16 from E(mmx or mem) to G(mmx) */
13216 if (haveNo66noF2noF3(pfx) && sz == 4) {
13217 Int order;
13218 IRTemp sV, dV, s3, s2, s1, s0;
13219 s3 = s2 = s1 = s0 = IRTemp_INVALID;
13220 sV = newTemp(Ity_I64);
13221 dV = newTemp(Ity_I64);
13222 do_MMX_preamble();
13223 modrm = getUChar(delta);
13224 if (epartIsReg(modrm)) {
13225 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
13226 order = (Int)getUChar(delta+1);
13227 delta += 1+1;
13228 DIP("pshufw $%d,%s,%s\n", order,
13229 nameMMXReg(eregLO3ofRM(modrm)),
13230 nameMMXReg(gregLO3ofRM(modrm)));
13231 } else {
13232 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
13233 1/*extra byte after amode*/ );
13234 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
13235 order = (Int)getUChar(delta+alen);
13236 delta += 1+alen;
13237 DIP("pshufw $%d,%s,%s\n", order,
13238 dis_buf,
13239 nameMMXReg(gregLO3ofRM(modrm)));
13240 }
13241 breakup64to16s( sV, &s3, &s2, &s1, &s0 );
13242# define SEL(n) \
13243 ((n)==0 ? s0 : ((n)==1 ? s1 : ((n)==2 ? s2 : s3)))
13244 assign(dV,
sewardjcc3d2192013-03-27 11:37:33 +000013245 mk64from16s( SEL((order>>6)&3), SEL((order>>4)&3),
sewardj80611e32012-01-20 13:07:24 +000013246 SEL((order>>2)&3), SEL((order>>0)&3) )
13247 );
13248 putMMXReg(gregLO3ofRM(modrm), mkexpr(dV));
13249# undef SEL
13250 goto decode_success;
13251 }
13252 /* F2 0F 70 = PSHUFLW -- rearrange lower half 4x16 from E(xmm or
13253 mem) to G(xmm), and copy upper half */
13254 if (haveF2no66noF3(pfx) && sz == 4) {
sewardj251b59e2012-05-25 13:51:07 +000013255 delta = dis_PSHUFxW_128( vbi, pfx, delta,
13256 False/*!isAvx*/, False/*!xIsH*/ );
sewardj80611e32012-01-20 13:07:24 +000013257 goto decode_success;
13258 }
13259 /* F3 0F 70 = PSHUFHW -- rearrange upper half 4x16 from E(xmm or
13260 mem) to G(xmm), and copy lower half */
13261 if (haveF3no66noF2(pfx) && sz == 4) {
sewardj251b59e2012-05-25 13:51:07 +000013262 delta = dis_PSHUFxW_128( vbi, pfx, delta,
13263 False/*!isAvx*/, True/*xIsH*/ );
sewardj80611e32012-01-20 13:07:24 +000013264 goto decode_success;
13265 }
13266 break;
13267
13268 case 0x71:
13269 /* 66 0F 71 /2 ib = PSRLW by immediate */
13270 if (have66noF2noF3(pfx) && sz == 2
13271 && epartIsReg(getUChar(delta))
13272 && gregLO3ofRM(getUChar(delta)) == 2) {
13273 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlw", Iop_ShrN16x8 );
13274 goto decode_success;
13275 }
13276 /* 66 0F 71 /4 ib = PSRAW by immediate */
13277 if (have66noF2noF3(pfx) && sz == 2
13278 && epartIsReg(getUChar(delta))
13279 && gregLO3ofRM(getUChar(delta)) == 4) {
13280 delta = dis_SSE_shiftE_imm( pfx, delta, "psraw", Iop_SarN16x8 );
13281 goto decode_success;
13282 }
13283 /* 66 0F 71 /6 ib = PSLLW by immediate */
13284 if (have66noF2noF3(pfx) && sz == 2
13285 && epartIsReg(getUChar(delta))
13286 && gregLO3ofRM(getUChar(delta)) == 6) {
13287 delta = dis_SSE_shiftE_imm( pfx, delta, "psllw", Iop_ShlN16x8 );
13288 goto decode_success;
13289 }
13290 break;
13291
13292 case 0x72:
13293 /* 66 0F 72 /2 ib = PSRLD by immediate */
13294 if (have66noF2noF3(pfx) && sz == 2
13295 && epartIsReg(getUChar(delta))
13296 && gregLO3ofRM(getUChar(delta)) == 2) {
13297 delta = dis_SSE_shiftE_imm( pfx, delta, "psrld", Iop_ShrN32x4 );
13298 goto decode_success;
13299 }
13300 /* 66 0F 72 /4 ib = PSRAD by immediate */
13301 if (have66noF2noF3(pfx) && sz == 2
13302 && epartIsReg(getUChar(delta))
13303 && gregLO3ofRM(getUChar(delta)) == 4) {
13304 delta = dis_SSE_shiftE_imm( pfx, delta, "psrad", Iop_SarN32x4 );
13305 goto decode_success;
13306 }
13307 /* 66 0F 72 /6 ib = PSLLD by immediate */
13308 if (have66noF2noF3(pfx) && sz == 2
13309 && epartIsReg(getUChar(delta))
13310 && gregLO3ofRM(getUChar(delta)) == 6) {
13311 delta = dis_SSE_shiftE_imm( pfx, delta, "pslld", Iop_ShlN32x4 );
13312 goto decode_success;
13313 }
13314 break;
13315
13316 case 0x73:
13317 /* 66 0F 73 /3 ib = PSRLDQ by immediate */
13318 /* note, if mem case ever filled in, 1 byte after amode */
13319 if (have66noF2noF3(pfx) && sz == 2
13320 && epartIsReg(getUChar(delta))
13321 && gregLO3ofRM(getUChar(delta)) == 3) {
sewardjc4530ae2012-05-21 10:18:49 +000013322 Int imm = (Int)getUChar(delta+1);
13323 Int reg = eregOfRexRM(pfx,getUChar(delta));
sewardj80611e32012-01-20 13:07:24 +000013324 DIP("psrldq $%d,%s\n", imm, nameXMMReg(reg));
sewardj80611e32012-01-20 13:07:24 +000013325 delta += 2;
sewardjc4530ae2012-05-21 10:18:49 +000013326 IRTemp sV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000013327 assign( sV, getXMMReg(reg) );
sewardj251b59e2012-05-25 13:51:07 +000013328 putXMMReg(reg, mkexpr(math_PSRLDQ( sV, imm )));
sewardj80611e32012-01-20 13:07:24 +000013329 goto decode_success;
13330 }
13331 /* 66 0F 73 /7 ib = PSLLDQ by immediate */
13332 /* note, if mem case ever filled in, 1 byte after amode */
13333 if (have66noF2noF3(pfx) && sz == 2
13334 && epartIsReg(getUChar(delta))
13335 && gregLO3ofRM(getUChar(delta)) == 7) {
sewardj251b59e2012-05-25 13:51:07 +000013336 Int imm = (Int)getUChar(delta+1);
13337 Int reg = eregOfRexRM(pfx,getUChar(delta));
sewardj80611e32012-01-20 13:07:24 +000013338 DIP("pslldq $%d,%s\n", imm, nameXMMReg(reg));
13339 vassert(imm >= 0 && imm <= 255);
13340 delta += 2;
sewardj251b59e2012-05-25 13:51:07 +000013341 IRTemp sV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000013342 assign( sV, getXMMReg(reg) );
sewardj251b59e2012-05-25 13:51:07 +000013343 putXMMReg(reg, mkexpr(math_PSLLDQ( sV, imm )));
sewardj80611e32012-01-20 13:07:24 +000013344 goto decode_success;
13345 }
13346 /* 66 0F 73 /2 ib = PSRLQ by immediate */
13347 if (have66noF2noF3(pfx) && sz == 2
13348 && epartIsReg(getUChar(delta))
13349 && gregLO3ofRM(getUChar(delta)) == 2) {
13350 delta = dis_SSE_shiftE_imm( pfx, delta, "psrlq", Iop_ShrN64x2 );
13351 goto decode_success;
13352 }
13353 /* 66 0F 73 /6 ib = PSLLQ by immediate */
13354 if (have66noF2noF3(pfx) && sz == 2
13355 && epartIsReg(getUChar(delta))
13356 && gregLO3ofRM(getUChar(delta)) == 6) {
13357 delta = dis_SSE_shiftE_imm( pfx, delta, "psllq", Iop_ShlN64x2 );
13358 goto decode_success;
13359 }
13360 break;
13361
13362 case 0x74:
13363 /* 66 0F 74 = PCMPEQB */
13364 if (have66noF2noF3(pfx) && sz == 2) {
13365 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13366 "pcmpeqb", Iop_CmpEQ8x16, False );
13367 goto decode_success;
13368 }
13369 break;
13370
13371 case 0x75:
13372 /* 66 0F 75 = PCMPEQW */
13373 if (have66noF2noF3(pfx) && sz == 2) {
13374 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13375 "pcmpeqw", Iop_CmpEQ16x8, False );
13376 goto decode_success;
13377 }
13378 break;
13379
13380 case 0x76:
13381 /* 66 0F 76 = PCMPEQD */
13382 if (have66noF2noF3(pfx) && sz == 2) {
13383 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13384 "pcmpeqd", Iop_CmpEQ32x4, False );
13385 goto decode_success;
13386 }
13387 break;
13388
13389 case 0x7E:
13390 /* F3 0F 7E = MOVQ -- move 64 bits from E (mem or lo half xmm) to
13391 G (lo half xmm). Upper half of G is zeroed out. */
13392 if (haveF3no66noF2(pfx)
13393 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13394 modrm = getUChar(delta);
13395 if (epartIsReg(modrm)) {
13396 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
13397 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
13398 /* zero bits 127:64 */
13399 putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkU64(0) );
13400 DIP("movsd %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
13401 nameXMMReg(gregOfRexRM(pfx,modrm)));
13402 delta += 1;
13403 } else {
13404 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13405 putXMMReg( gregOfRexRM(pfx,modrm), mkV128(0) );
13406 putXMMRegLane64( gregOfRexRM(pfx,modrm), 0,
13407 loadLE(Ity_I64, mkexpr(addr)) );
13408 DIP("movsd %s,%s\n", dis_buf,
13409 nameXMMReg(gregOfRexRM(pfx,modrm)));
13410 delta += alen;
13411 }
13412 goto decode_success;
13413 }
13414 /* 66 0F 7E = MOVD from xmm low 1/4 to ireg32 or m32. */
13415 /* or from xmm low 1/2 to ireg64 or m64. */
13416 if (have66noF2noF3(pfx) && (sz == 2 || sz == 8)) {
13417 if (sz == 2) sz = 4;
13418 modrm = getUChar(delta);
13419 if (epartIsReg(modrm)) {
13420 delta += 1;
13421 if (sz == 4) {
13422 putIReg32( eregOfRexRM(pfx,modrm),
13423 getXMMRegLane32(gregOfRexRM(pfx,modrm), 0) );
13424 DIP("movd %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
13425 nameIReg32(eregOfRexRM(pfx,modrm)));
sewardjcc3d2192013-03-27 11:37:33 +000013426 } else {
sewardj80611e32012-01-20 13:07:24 +000013427 putIReg64( eregOfRexRM(pfx,modrm),
13428 getXMMRegLane64(gregOfRexRM(pfx,modrm), 0) );
13429 DIP("movq %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
13430 nameIReg64(eregOfRexRM(pfx,modrm)));
sewardjcc3d2192013-03-27 11:37:33 +000013431 }
sewardj80611e32012-01-20 13:07:24 +000013432 } else {
13433 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
13434 delta += alen;
13435 storeLE( mkexpr(addr),
13436 sz == 4
13437 ? getXMMRegLane32(gregOfRexRM(pfx,modrm),0)
13438 : getXMMRegLane64(gregOfRexRM(pfx,modrm),0) );
13439 DIP("mov%c %s, %s\n", sz == 4 ? 'd' : 'q',
13440 nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
13441 }
13442 goto decode_success;
13443 }
13444 break;
13445
13446 case 0x7F:
13447 /* F3 0F 7F = MOVDQU -- move from G (xmm) to E (mem or xmm). */
13448 if (haveF3no66noF2(pfx) && sz == 4) {
13449 modrm = getUChar(delta);
13450 if (epartIsReg(modrm)) {
13451 goto decode_failure; /* awaiting test case */
13452 delta += 1;
13453 putXMMReg( eregOfRexRM(pfx,modrm),
13454 getXMMReg(gregOfRexRM(pfx,modrm)) );
13455 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
13456 nameXMMReg(eregOfRexRM(pfx,modrm)));
13457 } else {
13458 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
13459 delta += alen;
13460 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
13461 DIP("movdqu %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
13462 }
13463 goto decode_success;
13464 }
13465 /* 66 0F 7F = MOVDQA -- move from G (xmm) to E (mem or xmm). */
13466 if (have66noF2noF3(pfx) && sz == 2) {
13467 modrm = getUChar(delta);
13468 if (epartIsReg(modrm)) {
13469 delta += 1;
13470 putXMMReg( eregOfRexRM(pfx,modrm),
13471 getXMMReg(gregOfRexRM(pfx,modrm)) );
13472 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)),
13473 nameXMMReg(eregOfRexRM(pfx,modrm)));
13474 } else {
13475 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
13476 gen_SEGV_if_not_16_aligned( addr );
13477 delta += alen;
13478 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
13479 DIP("movdqa %s, %s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf);
13480 }
13481 goto decode_success;
13482 }
13483 break;
13484
13485 case 0xAE:
13486 /* 0F AE /7 = SFENCE -- flush pending operations to memory */
13487 if (haveNo66noF2noF3(pfx)
13488 && epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7
13489 && sz == 4) {
13490 delta += 1;
13491 /* Insert a memory fence. It's sometimes important that these
13492 are carried through to the generated code. */
13493 stmt( IRStmt_MBE(Imbe_Fence) );
13494 DIP("sfence\n");
13495 goto decode_success;
13496 }
13497 /* mindless duplication follows .. */
13498 /* 0F AE /5 = LFENCE -- flush pending operations to memory */
13499 /* 0F AE /6 = MFENCE -- flush pending operations to memory */
13500 if (haveNo66noF2noF3(pfx)
13501 && epartIsReg(getUChar(delta))
13502 && (gregLO3ofRM(getUChar(delta)) == 5
13503 || gregLO3ofRM(getUChar(delta)) == 6)
13504 && sz == 4) {
13505 delta += 1;
13506 /* Insert a memory fence. It's sometimes important that these
13507 are carried through to the generated code. */
13508 stmt( IRStmt_MBE(Imbe_Fence) );
13509 DIP("%sfence\n", gregLO3ofRM(getUChar(delta-1))==5 ? "l" : "m");
13510 goto decode_success;
13511 }
sewardj30fc0582012-02-16 13:45:13 +000013512
13513 /* 0F AE /7 = CLFLUSH -- flush cache line */
13514 if (haveNo66noF2noF3(pfx)
13515 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 7
13516 && sz == 4) {
13517
13518 /* This is something of a hack. We need to know the size of
13519 the cache line containing addr. Since we don't (easily),
13520 assume 256 on the basis that no real cache would have a
13521 line that big. It's safe to invalidate more stuff than we
13522 need, just inefficient. */
13523 ULong lineszB = 256ULL;
13524
13525 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13526 delta += alen;
13527
13528 /* Round addr down to the start of the containing block. */
13529 stmt( IRStmt_Put(
sewardj05f5e012014-05-04 10:52:11 +000013530 OFFB_CMSTART,
sewardj30fc0582012-02-16 13:45:13 +000013531 binop( Iop_And64,
13532 mkexpr(addr),
13533 mkU64( ~(lineszB-1) ))) );
13534
sewardj05f5e012014-05-04 10:52:11 +000013535 stmt( IRStmt_Put(OFFB_CMLEN, mkU64(lineszB) ) );
sewardj96c5f262012-04-13 23:03:45 +000013536
sewardj05f5e012014-05-04 10:52:11 +000013537 jmp_lit(dres, Ijk_InvalICache, (Addr64)(guest_RIP_bbstart+delta));
sewardj30fc0582012-02-16 13:45:13 +000013538
13539 DIP("clflush %s\n", dis_buf);
13540 goto decode_success;
13541 }
13542
sewardj80611e32012-01-20 13:07:24 +000013543 /* 0F AE /3 = STMXCSR m32 -- store %mxcsr */
13544 if (haveNo66noF2noF3(pfx)
13545 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3
13546 && sz == 4) {
sewardjfe0c5e72012-06-15 15:48:07 +000013547 delta = dis_STMXCSR(vbi, pfx, delta, False/*!isAvx*/);
sewardj80611e32012-01-20 13:07:24 +000013548 goto decode_success;
13549 }
13550 /* 0F AE /2 = LDMXCSR m32 -- load %mxcsr */
13551 if (haveNo66noF2noF3(pfx)
13552 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2
13553 && sz == 4) {
sewardjfe0c5e72012-06-15 15:48:07 +000013554 delta = dis_LDMXCSR(vbi, pfx, delta, False/*!isAvx*/);
sewardj80611e32012-01-20 13:07:24 +000013555 goto decode_success;
13556 }
13557 /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory.
13558 Note that the presence or absence of REX.W slightly affects the
13559 written format: whether the saved FPU IP and DP pointers are 64
13560 or 32 bits. But the helper function we call simply writes zero
13561 bits in the relevant fields (which are 64 bits regardless of
13562 what REX.W is) and so it's good enough (iow, equally broken) in
13563 both cases. */
13564 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
13565 && !epartIsReg(getUChar(delta))
13566 && gregOfRexRM(pfx,getUChar(delta)) == 0) {
13567 IRDirty* d;
13568 modrm = getUChar(delta);
13569 vassert(!epartIsReg(modrm));
13570
13571 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13572 delta += alen;
13573 gen_SEGV_if_not_16_aligned(addr);
13574
13575 DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
13576
13577 /* Uses dirty helper:
sewardj28d71ed2014-09-07 23:23:17 +000013578 void amd64g_do_FXSAVE_ALL_EXCEPT_XMM ( VexGuestAMD64State*,
13579 ULong ) */
sewardj80611e32012-01-20 13:07:24 +000013580 d = unsafeIRDirty_0_N (
13581 0/*regparms*/,
sewardj28d71ed2014-09-07 23:23:17 +000013582 "amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM",
13583 &amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM,
florian90419562013-08-15 20:54:52 +000013584 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
sewardj80611e32012-01-20 13:07:24 +000013585 );
sewardj80611e32012-01-20 13:07:24 +000013586
13587 /* declare we're writing memory */
13588 d->mFx = Ifx_Write;
13589 d->mAddr = mkexpr(addr);
sewardjc9069f22012-06-01 16:09:50 +000013590 d->mSize = 464; /* according to recent Intel docs */
sewardj80611e32012-01-20 13:07:24 +000013591
13592 /* declare we're reading guest state */
sewardj28d71ed2014-09-07 23:23:17 +000013593 d->nFxState = 6;
sewardjc9069f22012-06-01 16:09:50 +000013594 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj80611e32012-01-20 13:07:24 +000013595
13596 d->fxState[0].fx = Ifx_Read;
13597 d->fxState[0].offset = OFFB_FTOP;
13598 d->fxState[0].size = sizeof(UInt);
13599
13600 d->fxState[1].fx = Ifx_Read;
13601 d->fxState[1].offset = OFFB_FPREGS;
13602 d->fxState[1].size = 8 * sizeof(ULong);
13603
13604 d->fxState[2].fx = Ifx_Read;
13605 d->fxState[2].offset = OFFB_FPTAGS;
13606 d->fxState[2].size = 8 * sizeof(UChar);
13607
13608 d->fxState[3].fx = Ifx_Read;
13609 d->fxState[3].offset = OFFB_FPROUND;
13610 d->fxState[3].size = sizeof(ULong);
13611
13612 d->fxState[4].fx = Ifx_Read;
13613 d->fxState[4].offset = OFFB_FC3210;
13614 d->fxState[4].size = sizeof(ULong);
13615
13616 d->fxState[5].fx = Ifx_Read;
sewardj28d71ed2014-09-07 23:23:17 +000013617 d->fxState[5].offset = OFFB_SSEROUND;
13618 d->fxState[5].size = sizeof(ULong);
sewardj80611e32012-01-20 13:07:24 +000013619
sewardj28d71ed2014-09-07 23:23:17 +000013620 /* Call the helper. This creates all parts of the in-memory
13621 image except for the XMM[0..15] array, which we do
13622 separately, in order that any undefinedness in the XMM
13623 registers is tracked separately by Memcheck and does not
13624 "infect" the in-memory shadow for the other parts of the
13625 image (FPTOP, FPREGS, FPTAGS, FPROUND, FC3210,
13626 SSEROUND). */
sewardj80611e32012-01-20 13:07:24 +000013627 stmt( IRStmt_Dirty(d) );
13628
sewardj28d71ed2014-09-07 23:23:17 +000013629 /* And now the XMMs themselves. */
13630 UInt xmm;
13631 for (xmm = 0; xmm < 16; xmm++) {
13632 storeLE( binop(Iop_Add64, mkexpr(addr), mkU64(160 + xmm * 16)),
13633 getXMMReg(xmm) );
13634 }
13635
sewardj80611e32012-01-20 13:07:24 +000013636 goto decode_success;
13637 }
13638 /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory.
13639 As with FXSAVE above we ignore the value of REX.W since we're
13640 not bothering with the FPU DP and IP fields. */
13641 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)
13642 && !epartIsReg(getUChar(delta))
13643 && gregOfRexRM(pfx,getUChar(delta)) == 1) {
13644 IRDirty* d;
13645 modrm = getUChar(delta);
13646 vassert(!epartIsReg(modrm));
13647
13648 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13649 delta += alen;
13650 gen_SEGV_if_not_16_aligned(addr);
13651
13652 DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
13653
13654 /* Uses dirty helper:
sewardj28d71ed2014-09-07 23:23:17 +000013655 VexEmNote amd64g_do_FXRSTOR_ALL_EXCEPT_XMM ( VexGuestAMD64State*,
13656 ULong )
sewardj80611e32012-01-20 13:07:24 +000013657 NOTE:
sewardj28d71ed2014-09-07 23:23:17 +000013658 the VexEmNote value is simply ignored
sewardj80611e32012-01-20 13:07:24 +000013659 */
13660 d = unsafeIRDirty_0_N (
13661 0/*regparms*/,
sewardj28d71ed2014-09-07 23:23:17 +000013662 "amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM",
13663 &amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM,
florian90419562013-08-15 20:54:52 +000013664 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
sewardj80611e32012-01-20 13:07:24 +000013665 );
sewardj80611e32012-01-20 13:07:24 +000013666
13667 /* declare we're reading memory */
13668 d->mFx = Ifx_Read;
13669 d->mAddr = mkexpr(addr);
sewardjc9069f22012-06-01 16:09:50 +000013670 d->mSize = 464; /* according to recent Intel docs */
sewardj80611e32012-01-20 13:07:24 +000013671
13672 /* declare we're writing guest state */
sewardj28d71ed2014-09-07 23:23:17 +000013673 d->nFxState = 6;
sewardjc9069f22012-06-01 16:09:50 +000013674 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj80611e32012-01-20 13:07:24 +000013675
13676 d->fxState[0].fx = Ifx_Write;
13677 d->fxState[0].offset = OFFB_FTOP;
13678 d->fxState[0].size = sizeof(UInt);
13679
13680 d->fxState[1].fx = Ifx_Write;
13681 d->fxState[1].offset = OFFB_FPREGS;
13682 d->fxState[1].size = 8 * sizeof(ULong);
13683
13684 d->fxState[2].fx = Ifx_Write;
13685 d->fxState[2].offset = OFFB_FPTAGS;
13686 d->fxState[2].size = 8 * sizeof(UChar);
13687
13688 d->fxState[3].fx = Ifx_Write;
13689 d->fxState[3].offset = OFFB_FPROUND;
13690 d->fxState[3].size = sizeof(ULong);
13691
13692 d->fxState[4].fx = Ifx_Write;
13693 d->fxState[4].offset = OFFB_FC3210;
13694 d->fxState[4].size = sizeof(ULong);
13695
13696 d->fxState[5].fx = Ifx_Write;
sewardj28d71ed2014-09-07 23:23:17 +000013697 d->fxState[5].offset = OFFB_SSEROUND;
13698 d->fxState[5].size = sizeof(ULong);
sewardj80611e32012-01-20 13:07:24 +000013699
sewardj28d71ed2014-09-07 23:23:17 +000013700 /* Call the helper. This reads all parts of the in-memory
13701 image except for the XMM[0..15] array, which we do
13702 separately, in order that any undefinedness in the XMM
13703 registers is tracked separately by Memcheck and does not
13704 "infect" the in-guest-state shadow for the other parts of the
13705 image (FPTOP, FPREGS, FPTAGS, FPROUND, FC3210,
13706 SSEROUND). */
sewardj80611e32012-01-20 13:07:24 +000013707 stmt( IRStmt_Dirty(d) );
13708
sewardj28d71ed2014-09-07 23:23:17 +000013709 /* And now the XMMs themselves. */
13710 UInt xmm;
13711 for (xmm = 0; xmm < 16; xmm++) {
13712 putXMMReg(xmm, loadLE(Ity_V128,
13713 binop(Iop_Add64, mkexpr(addr),
13714 mkU64(160 + xmm * 16))));
13715 }
13716
sewardj80611e32012-01-20 13:07:24 +000013717 goto decode_success;
13718 }
13719 break;
13720
13721 case 0xC2:
13722 /* 0F C2 = CMPPS -- 32Fx4 comparison from R/M to R */
13723 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardjc4530ae2012-05-21 10:18:49 +000013724 Long delta0 = delta;
13725 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpps", True, 4 );
13726 if (delta > delta0) goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000013727 }
13728 /* F3 0F C2 = CMPSS -- 32F0x4 comparison from R/M to R */
13729 if (haveF3no66noF2(pfx) && sz == 4) {
sewardjc4530ae2012-05-21 10:18:49 +000013730 Long delta0 = delta;
13731 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpss", False, 4 );
13732 if (delta > delta0) goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000013733 }
13734 /* F2 0F C2 = CMPSD -- 64F0x2 comparison from R/M to R */
13735 if (haveF2no66noF3(pfx) && sz == 4) {
sewardjc4530ae2012-05-21 10:18:49 +000013736 Long delta0 = delta;
13737 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmpsd", False, 8 );
13738 if (delta > delta0) goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000013739 }
13740 /* 66 0F C2 = CMPPD -- 64Fx2 comparison from R/M to R */
13741 if (have66noF2noF3(pfx) && sz == 2) {
sewardjc4530ae2012-05-21 10:18:49 +000013742 Long delta0 = delta;
13743 delta = dis_SSE_cmp_E_to_G( vbi, pfx, delta, "cmppd", True, 8 );
13744 if (delta > delta0) goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000013745 }
13746 break;
13747
13748 case 0xC3:
13749 /* 0F C3 = MOVNTI -- for us, just a plain ireg store. */
13750 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) {
13751 modrm = getUChar(delta);
13752 if (!epartIsReg(modrm)) {
13753 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
13754 storeLE( mkexpr(addr), getIRegG(sz, pfx, modrm) );
13755 DIP("movnti %s,%s\n", dis_buf,
13756 nameIRegG(sz, pfx, modrm));
13757 delta += alen;
13758 goto decode_success;
13759 }
13760 /* else fall through */
13761 }
13762 break;
13763
13764 case 0xC4:
13765 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13766 /* 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
13767 put it into the specified lane of mmx(G). */
13768 if (haveNo66noF2noF3(pfx)
13769 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
13770 /* Use t0 .. t3 to hold the 4 original 16-bit lanes of the
13771 mmx reg. t4 is the new lane value. t5 is the original
13772 mmx value. t6 is the new mmx value. */
13773 Int lane;
13774 t4 = newTemp(Ity_I16);
13775 t5 = newTemp(Ity_I64);
13776 t6 = newTemp(Ity_I64);
13777 modrm = getUChar(delta);
13778 do_MMX_preamble();
13779
13780 assign(t5, getMMXReg(gregLO3ofRM(modrm)));
13781 breakup64to16s( t5, &t3, &t2, &t1, &t0 );
13782
13783 if (epartIsReg(modrm)) {
13784 assign(t4, getIReg16(eregOfRexRM(pfx,modrm)));
13785 delta += 1+1;
13786 lane = getUChar(delta-1);
13787 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
13788 nameIReg16(eregOfRexRM(pfx,modrm)),
13789 nameMMXReg(gregLO3ofRM(modrm)));
13790 } else {
13791 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
13792 delta += 1+alen;
13793 lane = getUChar(delta-1);
13794 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
13795 DIP("pinsrw $%d,%s,%s\n", (Int)lane,
13796 dis_buf,
13797 nameMMXReg(gregLO3ofRM(modrm)));
13798 }
13799
13800 switch (lane & 3) {
13801 case 0: assign(t6, mk64from16s(t3,t2,t1,t4)); break;
13802 case 1: assign(t6, mk64from16s(t3,t2,t4,t0)); break;
13803 case 2: assign(t6, mk64from16s(t3,t4,t1,t0)); break;
13804 case 3: assign(t6, mk64from16s(t4,t2,t1,t0)); break;
13805 default: vassert(0);
13806 }
13807 putMMXReg(gregLO3ofRM(modrm), mkexpr(t6));
13808 goto decode_success;
13809 }
13810 /* 66 0F C4 = PINSRW -- get 16 bits from E(mem or low half ireg) and
13811 put it into the specified lane of xmm(G). */
13812 if (have66noF2noF3(pfx)
13813 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
13814 Int lane;
13815 t4 = newTemp(Ity_I16);
13816 modrm = getUChar(delta);
sewardj4ed05e02012-06-18 15:01:30 +000013817 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000013818 if (epartIsReg(modrm)) {
sewardj4ed05e02012-06-18 15:01:30 +000013819 UInt rE = eregOfRexRM(pfx,modrm);
13820 assign(t4, getIReg16(rE));
sewardj80611e32012-01-20 13:07:24 +000013821 delta += 1+1;
13822 lane = getUChar(delta-1);
sewardj4ed05e02012-06-18 15:01:30 +000013823 DIP("pinsrw $%d,%s,%s\n",
13824 (Int)lane, nameIReg16(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000013825 } else {
13826 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
13827 1/*byte after the amode*/ );
13828 delta += 1+alen;
13829 lane = getUChar(delta-1);
13830 assign(t4, loadLE(Ity_I16, mkexpr(addr)));
sewardj4ed05e02012-06-18 15:01:30 +000013831 DIP("pinsrw $%d,%s,%s\n",
13832 (Int)lane, dis_buf, nameXMMReg(rG));
13833 }
13834 IRTemp src_vec = newTemp(Ity_V128);
13835 assign(src_vec, getXMMReg(rG));
13836 IRTemp res_vec = math_PINSRW_128( src_vec, t4, lane & 7);
13837 putXMMReg(rG, mkexpr(res_vec));
sewardj80611e32012-01-20 13:07:24 +000013838 goto decode_success;
13839 }
13840 break;
13841
13842 case 0xC5:
13843 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
13844 /* 0F C5 = PEXTRW -- extract 16-bit field from mmx(E) and put
13845 zero-extend of it in ireg(G). */
13846 if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8)) {
13847 modrm = getUChar(delta);
13848 if (epartIsReg(modrm)) {
13849 IRTemp sV = newTemp(Ity_I64);
13850 t5 = newTemp(Ity_I16);
13851 do_MMX_preamble();
13852 assign(sV, getMMXReg(eregLO3ofRM(modrm)));
13853 breakup64to16s( sV, &t3, &t2, &t1, &t0 );
13854 switch (getUChar(delta+1) & 3) {
13855 case 0: assign(t5, mkexpr(t0)); break;
13856 case 1: assign(t5, mkexpr(t1)); break;
13857 case 2: assign(t5, mkexpr(t2)); break;
13858 case 3: assign(t5, mkexpr(t3)); break;
13859 default: vassert(0);
13860 }
13861 if (sz == 8)
13862 putIReg64(gregOfRexRM(pfx,modrm), unop(Iop_16Uto64, mkexpr(t5)));
13863 else
13864 putIReg32(gregOfRexRM(pfx,modrm), unop(Iop_16Uto32, mkexpr(t5)));
13865 DIP("pextrw $%d,%s,%s\n",
13866 (Int)getUChar(delta+1),
13867 nameMMXReg(eregLO3ofRM(modrm)),
13868 sz==8 ? nameIReg64(gregOfRexRM(pfx,modrm))
13869 : nameIReg32(gregOfRexRM(pfx,modrm))
13870 );
13871 delta += 2;
13872 goto decode_success;
13873 }
13874 /* else fall through */
13875 /* note, for anyone filling in the mem case: this insn has one
13876 byte after the amode and therefore you must pass 1 as the
13877 last arg to disAMode */
13878 }
13879 /* 66 0F C5 = PEXTRW -- extract 16-bit field from xmm(E) and put
13880 zero-extend of it in ireg(G). */
13881 if (have66noF2noF3(pfx)
13882 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
sewardje8a7eb72012-06-12 14:59:17 +000013883 Long delta0 = delta;
13884 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta,
13885 False/*!isAvx*/ );
13886 if (delta > delta0) goto decode_success;
13887 /* else fall through -- decoding has failed */
sewardj80611e32012-01-20 13:07:24 +000013888 }
13889 break;
13890
13891 case 0xC6:
13892 /* 0F C6 /r ib = SHUFPS -- shuffle packed F32s */
13893 if (haveNo66noF2noF3(pfx) && sz == 4) {
sewardj251b59e2012-05-25 13:51:07 +000013894 Int imm8 = 0;
13895 IRTemp sV = newTemp(Ity_V128);
13896 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000013897 modrm = getUChar(delta);
sewardj251b59e2012-05-25 13:51:07 +000013898 UInt rG = gregOfRexRM(pfx,modrm);
13899 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000013900 if (epartIsReg(modrm)) {
sewardj251b59e2012-05-25 13:51:07 +000013901 UInt rE = eregOfRexRM(pfx,modrm);
13902 assign( sV, getXMMReg(rE) );
13903 imm8 = (Int)getUChar(delta+1);
sewardj80611e32012-01-20 13:07:24 +000013904 delta += 1+1;
sewardj251b59e2012-05-25 13:51:07 +000013905 DIP("shufps $%d,%s,%s\n", imm8, nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000013906 } else {
sewardj251b59e2012-05-25 13:51:07 +000013907 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj80611e32012-01-20 13:07:24 +000013908 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj251b59e2012-05-25 13:51:07 +000013909 imm8 = (Int)getUChar(delta+alen);
sewardj80611e32012-01-20 13:07:24 +000013910 delta += 1+alen;
sewardj251b59e2012-05-25 13:51:07 +000013911 DIP("shufps $%d,%s,%s\n", imm8, dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000013912 }
sewardj4b1cc832012-06-13 11:10:20 +000013913 IRTemp res = math_SHUFPS_128( sV, dV, imm8 );
sewardj251b59e2012-05-25 13:51:07 +000013914 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000013915 goto decode_success;
13916 }
13917 /* 66 0F C6 /r ib = SHUFPD -- shuffle packed F64s */
13918 if (have66noF2noF3(pfx) && sz == 2) {
13919 Int select;
13920 IRTemp sV = newTemp(Ity_V128);
13921 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000013922
13923 modrm = getUChar(delta);
13924 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
13925
13926 if (epartIsReg(modrm)) {
13927 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
13928 select = (Int)getUChar(delta+1);
13929 delta += 1+1;
13930 DIP("shufpd $%d,%s,%s\n", select,
13931 nameXMMReg(eregOfRexRM(pfx,modrm)),
13932 nameXMMReg(gregOfRexRM(pfx,modrm)));
13933 } else {
13934 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
13935 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
13936 select = getUChar(delta+alen);
13937 delta += 1+alen;
13938 DIP("shufpd $%d,%s,%s\n", select,
13939 dis_buf,
13940 nameXMMReg(gregOfRexRM(pfx,modrm)));
13941 }
13942
sewardj21459cb2012-06-18 14:05:52 +000013943 IRTemp res = math_SHUFPD_128( sV, dV, select );
13944 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000013945 goto decode_success;
13946 }
13947 break;
13948
13949 case 0xD1:
13950 /* 66 0F D1 = PSRLW by E */
13951 if (have66noF2noF3(pfx) && sz == 2) {
13952 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlw", Iop_ShrN16x8 );
13953 goto decode_success;
13954 }
13955 break;
13956
13957 case 0xD2:
13958 /* 66 0F D2 = PSRLD by E */
13959 if (have66noF2noF3(pfx) && sz == 2) {
13960 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrld", Iop_ShrN32x4 );
13961 goto decode_success;
13962 }
13963 break;
13964
13965 case 0xD3:
13966 /* 66 0F D3 = PSRLQ by E */
13967 if (have66noF2noF3(pfx) && sz == 2) {
13968 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrlq", Iop_ShrN64x2 );
13969 goto decode_success;
13970 }
13971 break;
13972
13973 case 0xD4:
13974 /* 66 0F D4 = PADDQ */
13975 if (have66noF2noF3(pfx) && sz == 2) {
13976 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13977 "paddq", Iop_Add64x2, False );
13978 goto decode_success;
13979 }
13980 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
13981 /* 0F D4 = PADDQ -- add 64x1 */
13982 if (haveNo66noF2noF3(pfx) && sz == 4) {
13983 do_MMX_preamble();
13984 delta = dis_MMXop_regmem_to_reg (
13985 vbi, pfx, delta, opc, "paddq", False );
13986 goto decode_success;
13987 }
13988 break;
13989
13990 case 0xD5:
sewardj251b59e2012-05-25 13:51:07 +000013991 /* 66 0F D5 = PMULLW -- 16x8 multiply */
sewardj80611e32012-01-20 13:07:24 +000013992 if (have66noF2noF3(pfx) && sz == 2) {
13993 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
13994 "pmullw", Iop_Mul16x8, False );
13995 goto decode_success;
13996 }
13997 break;
13998
13999 case 0xD6:
14000 /* F3 0F D6 = MOVQ2DQ -- move from E (mmx) to G (lo half xmm, zero
14001 hi half). */
14002 if (haveF3no66noF2(pfx) && sz == 4) {
14003 modrm = getUChar(delta);
14004 if (epartIsReg(modrm)) {
14005 do_MMX_preamble();
14006 putXMMReg( gregOfRexRM(pfx,modrm),
14007 unop(Iop_64UtoV128, getMMXReg( eregLO3ofRM(modrm) )) );
14008 DIP("movq2dq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14009 nameXMMReg(gregOfRexRM(pfx,modrm)));
14010 delta += 1;
14011 goto decode_success;
14012 }
14013 /* apparently no mem case for this insn */
14014 }
14015 /* 66 0F D6 = MOVQ -- move 64 bits from G (lo half xmm) to E (mem
14016 or lo half xmm). */
14017 if (have66noF2noF3(pfx)
14018 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
14019 modrm = getUChar(delta);
14020 if (epartIsReg(modrm)) {
14021 /* fall through, awaiting test case */
14022 /* dst: lo half copied, hi half zeroed */
14023 } else {
14024 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14025 storeLE( mkexpr(addr),
14026 getXMMRegLane64( gregOfRexRM(pfx,modrm), 0 ));
14027 DIP("movq %s,%s\n", nameXMMReg(gregOfRexRM(pfx,modrm)), dis_buf );
14028 delta += alen;
14029 goto decode_success;
14030 }
14031 }
14032 /* F2 0F D6 = MOVDQ2Q -- move from E (lo half xmm, not mem) to G (mmx). */
14033 if (haveF2no66noF3(pfx) && sz == 4) {
14034 modrm = getUChar(delta);
14035 if (epartIsReg(modrm)) {
14036 do_MMX_preamble();
14037 putMMXReg( gregLO3ofRM(modrm),
14038 getXMMRegLane64( eregOfRexRM(pfx,modrm), 0 ));
14039 DIP("movdq2q %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
14040 nameMMXReg(gregLO3ofRM(modrm)));
14041 delta += 1;
14042 goto decode_success;
14043 }
14044 /* apparently no mem case for this insn */
14045 }
14046 break;
14047
14048 case 0xD7:
14049 /* 66 0F D7 = PMOVMSKB -- extract sign bits from each of 16
14050 lanes in xmm(E), turn them into a byte, and put
14051 zero-extend of it in ireg(G). Doing this directly is just
14052 too cumbersome; give up therefore and call a helper. */
14053 if (have66noF2noF3(pfx)
sewardj8ef22422012-05-24 16:29:18 +000014054 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)
14055 && epartIsReg(getUChar(delta))) { /* no memory case, it seems */
14056 delta = dis_PMOVMSKB_128( vbi, pfx, delta, False/*!isAvx*/ );
14057 goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000014058 }
14059 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14060 /* 0F D7 = PMOVMSKB -- extract sign bits from each of 8 lanes in
sewardje13074c2012-11-08 10:57:08 +000014061 mmx(E), turn them into a byte, and put zero-extend of it in
sewardj80611e32012-01-20 13:07:24 +000014062 ireg(G). */
tom558fc972012-02-24 12:16:11 +000014063 if (haveNo66noF2noF3(pfx)
14064 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
sewardj80611e32012-01-20 13:07:24 +000014065 modrm = getUChar(delta);
14066 if (epartIsReg(modrm)) {
14067 do_MMX_preamble();
14068 t0 = newTemp(Ity_I64);
sewardje13074c2012-11-08 10:57:08 +000014069 t1 = newTemp(Ity_I32);
sewardj80611e32012-01-20 13:07:24 +000014070 assign(t0, getMMXReg(eregLO3ofRM(modrm)));
sewardje13074c2012-11-08 10:57:08 +000014071 assign(t1, unop(Iop_8Uto32, unop(Iop_GetMSBs8x8, mkexpr(t0))));
14072 putIReg32(gregOfRexRM(pfx,modrm), mkexpr(t1));
sewardj80611e32012-01-20 13:07:24 +000014073 DIP("pmovmskb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14074 nameIReg32(gregOfRexRM(pfx,modrm)));
14075 delta += 1;
14076 goto decode_success;
14077 }
14078 /* else fall through */
14079 }
14080 break;
14081
14082 case 0xD8:
sewardj251b59e2012-05-25 13:51:07 +000014083 /* 66 0F D8 = PSUBUSB */
sewardj80611e32012-01-20 13:07:24 +000014084 if (have66noF2noF3(pfx) && sz == 2) {
14085 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14086 "psubusb", Iop_QSub8Ux16, False );
14087 goto decode_success;
14088 }
14089 break;
14090
14091 case 0xD9:
sewardj4f228902012-06-21 09:17:58 +000014092 /* 66 0F D9 = PSUBUSW */
sewardj80611e32012-01-20 13:07:24 +000014093 if (have66noF2noF3(pfx) && sz == 2) {
14094 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14095 "psubusw", Iop_QSub16Ux8, False );
14096 goto decode_success;
14097 }
14098 break;
14099
14100 case 0xDA:
14101 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14102 /* 0F DA = PMINUB -- 8x8 unsigned min */
14103 if (haveNo66noF2noF3(pfx) && sz == 4) {
14104 do_MMX_preamble();
14105 delta = dis_MMXop_regmem_to_reg (
14106 vbi, pfx, delta, opc, "pminub", False );
14107 goto decode_success;
14108 }
14109 /* 66 0F DA = PMINUB -- 8x16 unsigned min */
14110 if (have66noF2noF3(pfx) && sz == 2) {
14111 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14112 "pminub", Iop_Min8Ux16, False );
14113 goto decode_success;
14114 }
14115 break;
14116
14117 case 0xDB:
14118 /* 66 0F DB = PAND */
14119 if (have66noF2noF3(pfx) && sz == 2) {
14120 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pand", Iop_AndV128 );
14121 goto decode_success;
14122 }
14123 break;
14124
14125 case 0xDC:
14126 /* 66 0F DC = PADDUSB */
14127 if (have66noF2noF3(pfx) && sz == 2) {
14128 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14129 "paddusb", Iop_QAdd8Ux16, False );
14130 goto decode_success;
14131 }
14132 break;
14133
14134 case 0xDD:
14135 /* 66 0F DD = PADDUSW */
14136 if (have66noF2noF3(pfx) && sz == 2) {
14137 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14138 "paddusw", Iop_QAdd16Ux8, False );
14139 goto decode_success;
14140 }
14141 break;
14142
14143 case 0xDE:
14144 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14145 /* 0F DE = PMAXUB -- 8x8 unsigned max */
14146 if (haveNo66noF2noF3(pfx) && sz == 4) {
14147 do_MMX_preamble();
14148 delta = dis_MMXop_regmem_to_reg (
14149 vbi, pfx, delta, opc, "pmaxub", False );
14150 goto decode_success;
14151 }
14152 /* 66 0F DE = PMAXUB -- 8x16 unsigned max */
14153 if (have66noF2noF3(pfx) && sz == 2) {
14154 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14155 "pmaxub", Iop_Max8Ux16, False );
14156 goto decode_success;
14157 }
14158 break;
14159
14160 case 0xDF:
14161 /* 66 0F DF = PANDN */
14162 if (have66noF2noF3(pfx) && sz == 2) {
14163 delta = dis_SSE_E_to_G_all_invG( vbi, pfx, delta, "pandn", Iop_AndV128 );
14164 goto decode_success;
14165 }
14166 break;
14167
14168 case 0xE0:
14169 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14170 /* 0F E0 = PAVGB -- 8x8 unsigned Packed Average, with rounding */
14171 if (haveNo66noF2noF3(pfx) && sz == 4) {
14172 do_MMX_preamble();
14173 delta = dis_MMXop_regmem_to_reg (
14174 vbi, pfx, delta, opc, "pavgb", False );
14175 goto decode_success;
14176 }
14177 /* 66 0F E0 = PAVGB */
14178 if (have66noF2noF3(pfx) && sz == 2) {
14179 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14180 "pavgb", Iop_Avg8Ux16, False );
14181 goto decode_success;
14182 }
14183 break;
14184
14185 case 0xE1:
14186 /* 66 0F E1 = PSRAW by E */
14187 if (have66noF2noF3(pfx) && sz == 2) {
14188 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psraw", Iop_SarN16x8 );
14189 goto decode_success;
14190 }
14191 break;
14192
14193 case 0xE2:
14194 /* 66 0F E2 = PSRAD by E */
14195 if (have66noF2noF3(pfx) && sz == 2) {
14196 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psrad", Iop_SarN32x4 );
14197 goto decode_success;
14198 }
14199 break;
14200
14201 case 0xE3:
14202 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14203 /* 0F E3 = PAVGW -- 16x4 unsigned Packed Average, with rounding */
14204 if (haveNo66noF2noF3(pfx) && sz == 4) {
14205 do_MMX_preamble();
14206 delta = dis_MMXop_regmem_to_reg (
14207 vbi, pfx, delta, opc, "pavgw", False );
14208 goto decode_success;
14209 }
14210 /* 66 0F E3 = PAVGW */
14211 if (have66noF2noF3(pfx) && sz == 2) {
14212 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14213 "pavgw", Iop_Avg16Ux8, False );
14214 goto decode_success;
14215 }
14216 break;
14217
14218 case 0xE4:
14219 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14220 /* 0F E4 = PMULUH -- 16x4 hi-half of unsigned widening multiply */
14221 if (haveNo66noF2noF3(pfx) && sz == 4) {
14222 do_MMX_preamble();
14223 delta = dis_MMXop_regmem_to_reg (
14224 vbi, pfx, delta, opc, "pmuluh", False );
14225 goto decode_success;
14226 }
14227 /* 66 0F E4 = PMULHUW -- 16x8 hi-half of unsigned widening multiply */
14228 if (have66noF2noF3(pfx) && sz == 2) {
14229 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14230 "pmulhuw", Iop_MulHi16Ux8, False );
14231 goto decode_success;
14232 }
14233 break;
14234
14235 case 0xE5:
14236 /* 66 0F E5 = PMULHW -- 16x8 hi-half of signed widening multiply */
14237 if (have66noF2noF3(pfx) && sz == 2) {
14238 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14239 "pmulhw", Iop_MulHi16Sx8, False );
14240 goto decode_success;
14241 }
14242 break;
14243
14244 case 0xE6:
14245 /* 66 0F E6 = CVTTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14246 lo half xmm(G), and zero upper half, rounding towards zero */
14247 /* F2 0F E6 = CVTPD2DQ -- convert 2 x F64 in mem/xmm to 2 x I32 in
14248 lo half xmm(G), according to prevailing rounding mode, and zero
14249 upper half */
14250 if ( (haveF2no66noF3(pfx) && sz == 4)
14251 || (have66noF2noF3(pfx) && sz == 2) ) {
sewardj66becf32012-06-18 23:15:16 +000014252 delta = dis_CVTxPD2DQ_128( vbi, pfx, delta, False/*!isAvx*/,
14253 toBool(sz == 2)/*r2zero*/);
sewardj80611e32012-01-20 13:07:24 +000014254 goto decode_success;
14255 }
14256 /* F3 0F E6 = CVTDQ2PD -- convert 2 x I32 in mem/lo half xmm to 2 x
14257 F64 in xmm(G) */
14258 if (haveF3no66noF2(pfx) && sz == 4) {
sewardj4b1cc832012-06-13 11:10:20 +000014259 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, False/*!isAvx*/);
sewardj80611e32012-01-20 13:07:24 +000014260 goto decode_success;
14261 }
14262 break;
14263
14264 case 0xE7:
14265 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14266 /* 0F E7 = MOVNTQ -- for us, just a plain MMX store. Note, the
14267 Intel manual does not say anything about the usual business of
14268 the FP reg tags getting trashed whenever an MMX insn happens.
14269 So we just leave them alone.
14270 */
14271 if (haveNo66noF2noF3(pfx) && sz == 4) {
14272 modrm = getUChar(delta);
14273 if (!epartIsReg(modrm)) {
14274 /* do_MMX_preamble(); Intel docs don't specify this */
14275 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14276 storeLE( mkexpr(addr), getMMXReg(gregLO3ofRM(modrm)) );
14277 DIP("movntq %s,%s\n", dis_buf,
14278 nameMMXReg(gregLO3ofRM(modrm)));
14279 delta += alen;
14280 goto decode_success;
14281 }
14282 /* else fall through */
14283 }
14284 /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
14285 if (have66noF2noF3(pfx) && sz == 2) {
14286 modrm = getUChar(delta);
14287 if (!epartIsReg(modrm)) {
14288 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14289 gen_SEGV_if_not_16_aligned( addr );
14290 storeLE( mkexpr(addr), getXMMReg(gregOfRexRM(pfx,modrm)) );
14291 DIP("movntdq %s,%s\n", dis_buf,
14292 nameXMMReg(gregOfRexRM(pfx,modrm)));
14293 delta += alen;
14294 goto decode_success;
14295 }
14296 /* else fall through */
14297 }
14298 break;
14299
14300 case 0xE8:
14301 /* 66 0F E8 = PSUBSB */
14302 if (have66noF2noF3(pfx) && sz == 2) {
14303 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14304 "psubsb", Iop_QSub8Sx16, False );
14305 goto decode_success;
14306 }
14307 break;
14308
14309 case 0xE9:
14310 /* 66 0F E9 = PSUBSW */
14311 if (have66noF2noF3(pfx) && sz == 2) {
14312 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14313 "psubsw", Iop_QSub16Sx8, False );
14314 goto decode_success;
14315 }
14316 break;
14317
14318 case 0xEA:
14319 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14320 /* 0F EA = PMINSW -- 16x4 signed min */
14321 if (haveNo66noF2noF3(pfx) && sz == 4) {
14322 do_MMX_preamble();
14323 delta = dis_MMXop_regmem_to_reg (
14324 vbi, pfx, delta, opc, "pminsw", False );
14325 goto decode_success;
14326 }
14327 /* 66 0F EA = PMINSW -- 16x8 signed min */
14328 if (have66noF2noF3(pfx) && sz == 2) {
14329 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14330 "pminsw", Iop_Min16Sx8, False );
14331 goto decode_success;
14332 }
14333 break;
14334
14335 case 0xEB:
14336 /* 66 0F EB = POR */
14337 if (have66noF2noF3(pfx) && sz == 2) {
14338 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "por", Iop_OrV128 );
14339 goto decode_success;
14340 }
14341 break;
14342
14343 case 0xEC:
14344 /* 66 0F EC = PADDSB */
14345 if (have66noF2noF3(pfx) && sz == 2) {
14346 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14347 "paddsb", Iop_QAdd8Sx16, False );
14348 goto decode_success;
14349 }
14350 break;
14351
14352 case 0xED:
14353 /* 66 0F ED = PADDSW */
14354 if (have66noF2noF3(pfx) && sz == 2) {
14355 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14356 "paddsw", Iop_QAdd16Sx8, False );
14357 goto decode_success;
14358 }
14359 break;
14360
14361 case 0xEE:
14362 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14363 /* 0F EE = PMAXSW -- 16x4 signed max */
14364 if (haveNo66noF2noF3(pfx) && sz == 4) {
14365 do_MMX_preamble();
14366 delta = dis_MMXop_regmem_to_reg (
14367 vbi, pfx, delta, opc, "pmaxsw", False );
14368 goto decode_success;
14369 }
14370 /* 66 0F EE = PMAXSW -- 16x8 signed max */
14371 if (have66noF2noF3(pfx) && sz == 2) {
14372 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14373 "pmaxsw", Iop_Max16Sx8, False );
14374 goto decode_success;
14375 }
14376 break;
14377
14378 case 0xEF:
14379 /* 66 0F EF = PXOR */
14380 if (have66noF2noF3(pfx) && sz == 2) {
14381 delta = dis_SSE_E_to_G_all( vbi, pfx, delta, "pxor", Iop_XorV128 );
14382 goto decode_success;
14383 }
14384 break;
14385
14386 case 0xF1:
14387 /* 66 0F F1 = PSLLW by E */
14388 if (have66noF2noF3(pfx) && sz == 2) {
14389 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllw", Iop_ShlN16x8 );
14390 goto decode_success;
14391 }
14392 break;
14393
14394 case 0xF2:
14395 /* 66 0F F2 = PSLLD by E */
14396 if (have66noF2noF3(pfx) && sz == 2) {
14397 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "pslld", Iop_ShlN32x4 );
14398 goto decode_success;
14399 }
14400 break;
14401
14402 case 0xF3:
14403 /* 66 0F F3 = PSLLQ by E */
14404 if (have66noF2noF3(pfx) && sz == 2) {
14405 delta = dis_SSE_shiftG_byE( vbi, pfx, delta, "psllq", Iop_ShlN64x2 );
14406 goto decode_success;
14407 }
14408 break;
14409
14410 case 0xF4:
14411 /* 66 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
14412 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit
14413 half */
sewardj80611e32012-01-20 13:07:24 +000014414 if (have66noF2noF3(pfx) && sz == 2) {
sewardje8a7eb72012-06-12 14:59:17 +000014415 IRTemp sV = newTemp(Ity_V128);
14416 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000014417 modrm = getUChar(delta);
sewardje8a7eb72012-06-12 14:59:17 +000014418 UInt rG = gregOfRexRM(pfx,modrm);
14419 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000014420 if (epartIsReg(modrm)) {
sewardje8a7eb72012-06-12 14:59:17 +000014421 UInt rE = eregOfRexRM(pfx,modrm);
14422 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000014423 delta += 1;
sewardje8a7eb72012-06-12 14:59:17 +000014424 DIP("pmuludq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014425 } else {
14426 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14427 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14428 delta += alen;
sewardje8a7eb72012-06-12 14:59:17 +000014429 DIP("pmuludq %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014430 }
sewardje8a7eb72012-06-12 14:59:17 +000014431 putXMMReg( rG, mkexpr(math_PMULUDQ_128( sV, dV )) );
sewardj80611e32012-01-20 13:07:24 +000014432 goto decode_success;
14433 }
14434 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
14435 /* 0F F4 = PMULUDQ -- unsigned widening multiply of 32-lanes 0 x
14436 0 to form 64-bit result */
14437 if (haveNo66noF2noF3(pfx) && sz == 4) {
14438 IRTemp sV = newTemp(Ity_I64);
14439 IRTemp dV = newTemp(Ity_I64);
14440 t1 = newTemp(Ity_I32);
14441 t0 = newTemp(Ity_I32);
14442 modrm = getUChar(delta);
14443
14444 do_MMX_preamble();
14445 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
14446
14447 if (epartIsReg(modrm)) {
14448 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
14449 delta += 1;
14450 DIP("pmuludq %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
14451 nameMMXReg(gregLO3ofRM(modrm)));
14452 } else {
14453 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14454 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
14455 delta += alen;
14456 DIP("pmuludq %s,%s\n", dis_buf,
14457 nameMMXReg(gregLO3ofRM(modrm)));
14458 }
14459
14460 assign( t0, unop(Iop_64to32, mkexpr(dV)) );
14461 assign( t1, unop(Iop_64to32, mkexpr(sV)) );
14462 putMMXReg( gregLO3ofRM(modrm),
14463 binop( Iop_MullU32, mkexpr(t0), mkexpr(t1) ) );
14464 goto decode_success;
14465 }
14466 break;
14467
14468 case 0xF5:
14469 /* 66 0F F5 = PMADDWD -- Multiply and add packed integers from
14470 E(xmm or mem) to G(xmm) */
14471 if (have66noF2noF3(pfx) && sz == 2) {
sewardj89378162012-06-24 12:12:20 +000014472 IRTemp sV = newTemp(Ity_V128);
14473 IRTemp dV = newTemp(Ity_V128);
14474 modrm = getUChar(delta);
14475 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000014476 if (epartIsReg(modrm)) {
sewardj89378162012-06-24 12:12:20 +000014477 UInt rE = eregOfRexRM(pfx,modrm);
14478 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000014479 delta += 1;
sewardj89378162012-06-24 12:12:20 +000014480 DIP("pmaddwd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014481 } else {
14482 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj89378162012-06-24 12:12:20 +000014483 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj80611e32012-01-20 13:07:24 +000014484 delta += alen;
sewardj89378162012-06-24 12:12:20 +000014485 DIP("pmaddwd %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014486 }
sewardj89378162012-06-24 12:12:20 +000014487 assign( dV, getXMMReg(rG) );
14488 putXMMReg( rG, mkexpr(math_PMADDWD_128(dV, sV)) );
sewardj80611e32012-01-20 13:07:24 +000014489 goto decode_success;
14490 }
14491 break;
14492
14493 case 0xF6:
14494 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14495 /* 0F F6 = PSADBW -- sum of 8Ux8 absolute differences */
14496 if (haveNo66noF2noF3(pfx) && sz == 4) {
14497 do_MMX_preamble();
14498 delta = dis_MMXop_regmem_to_reg (
14499 vbi, pfx, delta, opc, "psadbw", False );
14500 goto decode_success;
14501 }
14502 /* 66 0F F6 = PSADBW -- 2 x (8x8 -> 48 zeroes ++ u16) Sum Abs Diffs
14503 from E(xmm or mem) to G(xmm) */
14504 if (have66noF2noF3(pfx) && sz == 2) {
sewardj82096922012-06-24 14:57:59 +000014505 IRTemp sV = newTemp(Ity_V128);
14506 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000014507 modrm = getUChar(delta);
sewardj82096922012-06-24 14:57:59 +000014508 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000014509 if (epartIsReg(modrm)) {
sewardj82096922012-06-24 14:57:59 +000014510 UInt rE = eregOfRexRM(pfx,modrm);
14511 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000014512 delta += 1;
sewardj82096922012-06-24 14:57:59 +000014513 DIP("psadbw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014514 } else {
14515 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj82096922012-06-24 14:57:59 +000014516 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj80611e32012-01-20 13:07:24 +000014517 delta += alen;
sewardj82096922012-06-24 14:57:59 +000014518 DIP("psadbw %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014519 }
sewardj82096922012-06-24 14:57:59 +000014520 assign( dV, getXMMReg(rG) );
14521 putXMMReg( rG, mkexpr( math_PSADBW_128 ( dV, sV ) ) );
14522
sewardj80611e32012-01-20 13:07:24 +000014523 goto decode_success;
14524 }
14525 break;
14526
14527 case 0xF7:
14528 /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
14529 /* 0F F7 = MASKMOVQ -- 8x8 masked store */
14530 if (haveNo66noF2noF3(pfx) && sz == 4) {
14531 Bool ok = False;
14532 delta = dis_MMX( &ok, vbi, pfx, sz, delta-1 );
14533 if (ok) goto decode_success;
14534 }
14535 /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
sewardj8eb7ae82012-06-24 14:00:27 +000014536 if (have66noF2noF3(pfx) && sz == 2 && epartIsReg(getUChar(delta))) {
14537 delta = dis_MASKMOVDQU( vbi, pfx, delta, False/*!isAvx*/ );
14538 goto decode_success;
sewardj80611e32012-01-20 13:07:24 +000014539 }
14540 break;
14541
14542 case 0xF8:
14543 /* 66 0F F8 = PSUBB */
14544 if (have66noF2noF3(pfx) && sz == 2) {
14545 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14546 "psubb", Iop_Sub8x16, False );
14547 goto decode_success;
14548 }
14549 break;
14550
14551 case 0xF9:
14552 /* 66 0F F9 = PSUBW */
14553 if (have66noF2noF3(pfx) && sz == 2) {
14554 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14555 "psubw", Iop_Sub16x8, False );
14556 goto decode_success;
14557 }
14558 break;
14559
14560 case 0xFA:
14561 /* 66 0F FA = PSUBD */
14562 if (have66noF2noF3(pfx) && sz == 2) {
14563 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14564 "psubd", Iop_Sub32x4, False );
14565 goto decode_success;
14566 }
14567 break;
14568
14569 case 0xFB:
14570 /* 66 0F FB = PSUBQ */
14571 if (have66noF2noF3(pfx) && sz == 2) {
14572 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14573 "psubq", Iop_Sub64x2, False );
14574 goto decode_success;
14575 }
14576 /* ***--- this is an MMX class insn introduced in SSE2 ---*** */
14577 /* 0F FB = PSUBQ -- sub 64x1 */
14578 if (haveNo66noF2noF3(pfx) && sz == 4) {
14579 do_MMX_preamble();
14580 delta = dis_MMXop_regmem_to_reg (
14581 vbi, pfx, delta, opc, "psubq", False );
14582 goto decode_success;
14583 }
14584 break;
14585
14586 case 0xFC:
14587 /* 66 0F FC = PADDB */
14588 if (have66noF2noF3(pfx) && sz == 2) {
14589 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14590 "paddb", Iop_Add8x16, False );
14591 goto decode_success;
14592 }
14593 break;
14594
14595 case 0xFD:
14596 /* 66 0F FD = PADDW */
14597 if (have66noF2noF3(pfx) && sz == 2) {
14598 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14599 "paddw", Iop_Add16x8, False );
14600 goto decode_success;
14601 }
14602 break;
14603
14604 case 0xFE:
14605 /* 66 0F FE = PADDD */
14606 if (have66noF2noF3(pfx) && sz == 2) {
14607 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
14608 "paddd", Iop_Add32x4, False );
14609 goto decode_success;
14610 }
14611 break;
14612
14613 default:
14614 goto decode_failure;
14615
14616 }
14617
14618 decode_failure:
14619 *decode_OK = False;
14620 return deltaIN;
14621
14622 decode_success:
14623 *decode_OK = True;
14624 return delta;
14625}
14626
14627
14628/*------------------------------------------------------------*/
14629/*--- ---*/
14630/*--- Top-level SSE3 (not SupSSE3): dis_ESC_0F__SSE3 ---*/
14631/*--- ---*/
14632/*------------------------------------------------------------*/
14633
sewardjc4530ae2012-05-21 10:18:49 +000014634static Long dis_MOVDDUP_128 ( VexAbiInfo* vbi, Prefix pfx,
14635 Long delta, Bool isAvx )
14636{
14637 IRTemp addr = IRTemp_INVALID;
14638 Int alen = 0;
14639 HChar dis_buf[50];
14640 IRTemp sV = newTemp(Ity_V128);
14641 IRTemp d0 = newTemp(Ity_I64);
14642 UChar modrm = getUChar(delta);
14643 UInt rG = gregOfRexRM(pfx,modrm);
14644 if (epartIsReg(modrm)) {
14645 UInt rE = eregOfRexRM(pfx,modrm);
14646 assign( sV, getXMMReg(rE) );
14647 DIP("%smovddup %s,%s\n",
14648 isAvx ? "v" : "", nameXMMReg(rE), nameXMMReg(rG));
14649 delta += 1;
14650 assign ( d0, unop(Iop_V128to64, mkexpr(sV)) );
14651 } else {
14652 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14653 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
14654 DIP("%smovddup %s,%s\n",
14655 isAvx ? "v" : "", dis_buf, nameXMMReg(rG));
14656 delta += alen;
14657 }
14658 (isAvx ? putYMMRegLoAndZU : putXMMReg)
14659 ( rG, binop(Iop_64HLtoV128,mkexpr(d0),mkexpr(d0)) );
14660 return delta;
14661}
14662
14663
sewardj82096922012-06-24 14:57:59 +000014664static Long dis_MOVDDUP_256 ( VexAbiInfo* vbi, Prefix pfx,
14665 Long delta )
14666{
14667 IRTemp addr = IRTemp_INVALID;
14668 Int alen = 0;
14669 HChar dis_buf[50];
14670 IRTemp d0 = newTemp(Ity_I64);
14671 IRTemp d1 = newTemp(Ity_I64);
14672 UChar modrm = getUChar(delta);
14673 UInt rG = gregOfRexRM(pfx,modrm);
14674 if (epartIsReg(modrm)) {
14675 UInt rE = eregOfRexRM(pfx,modrm);
14676 DIP("vmovddup %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
14677 delta += 1;
14678 assign ( d0, getYMMRegLane64(rE, 0) );
14679 assign ( d1, getYMMRegLane64(rE, 2) );
14680 } else {
14681 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14682 assign( d0, loadLE(Ity_I64, mkexpr(addr)) );
14683 assign( d1, loadLE(Ity_I64, binop(Iop_Add64,
14684 mkexpr(addr), mkU64(16))) );
14685 DIP("vmovddup %s,%s\n", dis_buf, nameYMMReg(rG));
14686 delta += alen;
14687 }
14688 putYMMRegLane64( rG, 0, mkexpr(d0) );
14689 putYMMRegLane64( rG, 1, mkexpr(d0) );
14690 putYMMRegLane64( rG, 2, mkexpr(d1) );
14691 putYMMRegLane64( rG, 3, mkexpr(d1) );
14692 return delta;
14693}
14694
14695
sewardj15ad1942012-06-20 10:21:05 +000014696static Long dis_MOVSxDUP_128 ( VexAbiInfo* vbi, Prefix pfx,
14697 Long delta, Bool isAvx, Bool isL )
14698{
14699 IRTemp addr = IRTemp_INVALID;
14700 Int alen = 0;
14701 HChar dis_buf[50];
14702 IRTemp sV = newTemp(Ity_V128);
14703 UChar modrm = getUChar(delta);
14704 UInt rG = gregOfRexRM(pfx,modrm);
14705 IRTemp s3, s2, s1, s0;
14706 s3 = s2 = s1 = s0 = IRTemp_INVALID;
14707 if (epartIsReg(modrm)) {
14708 UInt rE = eregOfRexRM(pfx,modrm);
14709 assign( sV, getXMMReg(rE) );
14710 DIP("%smovs%cdup %s,%s\n",
14711 isAvx ? "v" : "", isL ? 'l' : 'h', nameXMMReg(rE), nameXMMReg(rG));
14712 delta += 1;
14713 } else {
14714 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14715 if (!isAvx)
14716 gen_SEGV_if_not_16_aligned( addr );
14717 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
14718 DIP("%smovs%cdup %s,%s\n",
14719 isAvx ? "v" : "", isL ? 'l' : 'h', dis_buf, nameXMMReg(rG));
14720 delta += alen;
14721 }
14722 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
14723 (isAvx ? putYMMRegLoAndZU : putXMMReg)
14724 ( rG, isL ? mkV128from32s( s2, s2, s0, s0 )
14725 : mkV128from32s( s3, s3, s1, s1 ) );
14726 return delta;
14727}
14728
14729
14730static Long dis_MOVSxDUP_256 ( VexAbiInfo* vbi, Prefix pfx,
14731 Long delta, Bool isL )
14732{
14733 IRTemp addr = IRTemp_INVALID;
14734 Int alen = 0;
14735 HChar dis_buf[50];
14736 IRTemp sV = newTemp(Ity_V256);
sewardj15ad1942012-06-20 10:21:05 +000014737 UChar modrm = getUChar(delta);
14738 UInt rG = gregOfRexRM(pfx,modrm);
14739 IRTemp s7, s6, s5, s4, s3, s2, s1, s0;
14740 s7 = s6 = s5 = s4 = s3 = s2 = s1 = s0 = IRTemp_INVALID;
14741 if (epartIsReg(modrm)) {
14742 UInt rE = eregOfRexRM(pfx,modrm);
14743 assign( sV, getYMMReg(rE) );
14744 DIP("vmovs%cdup %s,%s\n",
14745 isL ? 'l' : 'h', nameYMMReg(rE), nameYMMReg(rG));
14746 delta += 1;
14747 } else {
14748 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14749 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
14750 DIP("vmovs%cdup %s,%s\n",
14751 isL ? 'l' : 'h', dis_buf, nameYMMReg(rG));
14752 delta += alen;
14753 }
sewardj4f228902012-06-21 09:17:58 +000014754 breakupV256to32s( sV, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
sewardj15ad1942012-06-20 10:21:05 +000014755 putYMMRegLane128( rG, 1, isL ? mkV128from32s( s6, s6, s4, s4 )
14756 : mkV128from32s( s7, s7, s5, s5 ) );
14757 putYMMRegLane128( rG, 0, isL ? mkV128from32s( s2, s2, s0, s0 )
14758 : mkV128from32s( s3, s3, s1, s1 ) );
14759 return delta;
14760}
14761
14762
sewardjadf357c2012-06-24 13:44:17 +000014763static IRTemp math_HADDPS_128 ( IRTemp dV, IRTemp sV, Bool isAdd )
14764{
14765 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
14766 IRTemp leftV = newTemp(Ity_V128);
14767 IRTemp rightV = newTemp(Ity_V128);
sewardj9571dc02014-01-26 18:34:23 +000014768 IRTemp rm = newTemp(Ity_I32);
sewardjadf357c2012-06-24 13:44:17 +000014769 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
14770
14771 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
14772 breakupV128to32s( dV, &d3, &d2, &d1, &d0 );
14773
14774 assign( leftV, mkV128from32s( s2, s0, d2, d0 ) );
14775 assign( rightV, mkV128from32s( s3, s1, d3, d1 ) );
14776
14777 IRTemp res = newTemp(Ity_V128);
sewardj9571dc02014-01-26 18:34:23 +000014778 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
14779 assign( res, triop(isAdd ? Iop_Add32Fx4 : Iop_Sub32Fx4,
14780 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
sewardjadf357c2012-06-24 13:44:17 +000014781 return res;
14782}
14783
14784
14785static IRTemp math_HADDPD_128 ( IRTemp dV, IRTemp sV, Bool isAdd )
14786{
14787 IRTemp s1, s0, d1, d0;
14788 IRTemp leftV = newTemp(Ity_V128);
14789 IRTemp rightV = newTemp(Ity_V128);
sewardj9571dc02014-01-26 18:34:23 +000014790 IRTemp rm = newTemp(Ity_I32);
sewardjadf357c2012-06-24 13:44:17 +000014791 s1 = s0 = d1 = d0 = IRTemp_INVALID;
14792
14793 breakupV128to64s( sV, &s1, &s0 );
14794 breakupV128to64s( dV, &d1, &d0 );
14795
14796 assign( leftV, binop(Iop_64HLtoV128, mkexpr(s0), mkexpr(d0)) );
14797 assign( rightV, binop(Iop_64HLtoV128, mkexpr(s1), mkexpr(d1)) );
14798
14799 IRTemp res = newTemp(Ity_V128);
sewardj9571dc02014-01-26 18:34:23 +000014800 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
14801 assign( res, triop(isAdd ? Iop_Add64Fx2 : Iop_Sub64Fx2,
14802 mkexpr(rm), mkexpr(leftV), mkexpr(rightV) ) );
sewardjadf357c2012-06-24 13:44:17 +000014803 return res;
14804}
14805
14806
sewardj80611e32012-01-20 13:07:24 +000014807__attribute__((noinline))
14808static
14809Long dis_ESC_0F__SSE3 ( Bool* decode_OK,
14810 VexAbiInfo* vbi,
14811 Prefix pfx, Int sz, Long deltaIN )
14812{
14813 IRTemp addr = IRTemp_INVALID;
14814 UChar modrm = 0;
14815 Int alen = 0;
14816 HChar dis_buf[50];
14817
14818 *decode_OK = False;
14819
14820 Long delta = deltaIN;
14821 UChar opc = getUChar(delta);
14822 delta++;
14823 switch (opc) {
14824
14825 case 0x12:
14826 /* F3 0F 12 = MOVSLDUP -- move from E (mem or xmm) to G (xmm),
14827 duplicating some lanes (2:2:0:0). */
14828 if (haveF3no66noF2(pfx) && sz == 4) {
sewardj15ad1942012-06-20 10:21:05 +000014829 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/,
14830 True/*isL*/ );
sewardj80611e32012-01-20 13:07:24 +000014831 goto decode_success;
14832 }
14833 /* F2 0F 12 = MOVDDUP -- move from E (mem or xmm) to G (xmm),
14834 duplicating some lanes (0:1:0:1). */
14835 if (haveF2no66noF3(pfx)
14836 && (sz == 4 || /* ignore redundant REX.W */ sz == 8)) {
sewardjc4530ae2012-05-21 10:18:49 +000014837 delta = dis_MOVDDUP_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000014838 goto decode_success;
14839 }
14840 break;
14841
14842 case 0x16:
14843 /* F3 0F 16 = MOVSHDUP -- move from E (mem or xmm) to G (xmm),
14844 duplicating some lanes (3:3:1:1). */
14845 if (haveF3no66noF2(pfx) && sz == 4) {
sewardj15ad1942012-06-20 10:21:05 +000014846 delta = dis_MOVSxDUP_128( vbi, pfx, delta, False/*!isAvx*/,
14847 False/*!isL*/ );
sewardj80611e32012-01-20 13:07:24 +000014848 goto decode_success;
14849 }
14850 break;
14851
14852 case 0x7C:
14853 case 0x7D:
14854 /* F2 0F 7C = HADDPS -- 32x4 add across from E (mem or xmm) to G (xmm). */
14855 /* F2 0F 7D = HSUBPS -- 32x4 sub across from E (mem or xmm) to G (xmm). */
14856 if (haveF2no66noF3(pfx) && sz == 4) {
sewardj80611e32012-01-20 13:07:24 +000014857 IRTemp eV = newTemp(Ity_V128);
14858 IRTemp gV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000014859 Bool isAdd = opc == 0x7C;
florian55085f82012-11-21 00:36:55 +000014860 const HChar* str = isAdd ? "add" : "sub";
sewardjadf357c2012-06-24 13:44:17 +000014861 modrm = getUChar(delta);
14862 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000014863 if (epartIsReg(modrm)) {
sewardjadf357c2012-06-24 13:44:17 +000014864 UInt rE = eregOfRexRM(pfx,modrm);
14865 assign( eV, getXMMReg(rE) );
14866 DIP("h%sps %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014867 delta += 1;
14868 } else {
14869 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14870 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
sewardjadf357c2012-06-24 13:44:17 +000014871 DIP("h%sps %s,%s\n", str, dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014872 delta += alen;
14873 }
14874
sewardjadf357c2012-06-24 13:44:17 +000014875 assign( gV, getXMMReg(rG) );
14876 putXMMReg( rG, mkexpr( math_HADDPS_128 ( gV, eV, isAdd ) ) );
sewardj80611e32012-01-20 13:07:24 +000014877 goto decode_success;
14878 }
14879 /* 66 0F 7C = HADDPD -- 64x2 add across from E (mem or xmm) to G (xmm). */
14880 /* 66 0F 7D = HSUBPD -- 64x2 sub across from E (mem or xmm) to G (xmm). */
14881 if (have66noF2noF3(pfx) && sz == 2) {
sewardj80611e32012-01-20 13:07:24 +000014882 IRTemp eV = newTemp(Ity_V128);
14883 IRTemp gV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000014884 Bool isAdd = opc == 0x7C;
florian55085f82012-11-21 00:36:55 +000014885 const HChar* str = isAdd ? "add" : "sub";
sewardjadf357c2012-06-24 13:44:17 +000014886 modrm = getUChar(delta);
14887 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000014888 if (epartIsReg(modrm)) {
sewardjadf357c2012-06-24 13:44:17 +000014889 UInt rE = eregOfRexRM(pfx,modrm);
14890 assign( eV, getXMMReg(rE) );
14891 DIP("h%spd %s,%s\n", str, nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014892 delta += 1;
14893 } else {
14894 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14895 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
sewardjadf357c2012-06-24 13:44:17 +000014896 DIP("h%spd %s,%s\n", str, dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014897 delta += alen;
14898 }
14899
sewardjadf357c2012-06-24 13:44:17 +000014900 assign( gV, getXMMReg(rG) );
14901 putXMMReg( rG, mkexpr( math_HADDPD_128 ( gV, eV, isAdd ) ) );
sewardj80611e32012-01-20 13:07:24 +000014902 goto decode_success;
14903 }
14904 break;
14905
14906 case 0xD0:
14907 /* 66 0F D0 = ADDSUBPD -- 64x4 +/- from E (mem or xmm) to G (xmm). */
14908 if (have66noF2noF3(pfx) && sz == 2) {
14909 IRTemp eV = newTemp(Ity_V128);
14910 IRTemp gV = newTemp(Ity_V128);
sewardj89378162012-06-24 12:12:20 +000014911 modrm = getUChar(delta);
14912 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000014913 if (epartIsReg(modrm)) {
sewardj89378162012-06-24 12:12:20 +000014914 UInt rE = eregOfRexRM(pfx,modrm);
14915 assign( eV, getXMMReg(rE) );
14916 DIP("addsubpd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014917 delta += 1;
14918 } else {
14919 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14920 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj89378162012-06-24 12:12:20 +000014921 DIP("addsubpd %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014922 delta += alen;
14923 }
14924
sewardj89378162012-06-24 12:12:20 +000014925 assign( gV, getXMMReg(rG) );
14926 putXMMReg( rG, mkexpr( math_ADDSUBPD_128 ( gV, eV ) ) );
sewardj80611e32012-01-20 13:07:24 +000014927 goto decode_success;
14928 }
14929 /* F2 0F D0 = ADDSUBPS -- 32x4 +/-/+/- from E (mem or xmm) to G (xmm). */
14930 if (haveF2no66noF3(pfx) && sz == 4) {
sewardj80611e32012-01-20 13:07:24 +000014931 IRTemp eV = newTemp(Ity_V128);
14932 IRTemp gV = newTemp(Ity_V128);
sewardj89378162012-06-24 12:12:20 +000014933 modrm = getUChar(delta);
14934 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000014935
14936 modrm = getUChar(delta);
14937 if (epartIsReg(modrm)) {
sewardj89378162012-06-24 12:12:20 +000014938 UInt rE = eregOfRexRM(pfx,modrm);
14939 assign( eV, getXMMReg(rE) );
14940 DIP("addsubps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014941 delta += 1;
14942 } else {
14943 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14944 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj89378162012-06-24 12:12:20 +000014945 DIP("addsubps %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000014946 delta += alen;
14947 }
14948
sewardj89378162012-06-24 12:12:20 +000014949 assign( gV, getXMMReg(rG) );
14950 putXMMReg( rG, mkexpr( math_ADDSUBPS_128 ( gV, eV ) ) );
sewardj80611e32012-01-20 13:07:24 +000014951 goto decode_success;
14952 }
14953 break;
14954
14955 case 0xF0:
14956 /* F2 0F F0 = LDDQU -- move from E (mem or xmm) to G (xmm). */
14957 if (haveF2no66noF3(pfx) && sz == 4) {
14958 modrm = getUChar(delta);
14959 if (epartIsReg(modrm)) {
14960 goto decode_failure;
14961 } else {
14962 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
14963 putXMMReg( gregOfRexRM(pfx,modrm),
14964 loadLE(Ity_V128, mkexpr(addr)) );
14965 DIP("lddqu %s,%s\n", dis_buf,
14966 nameXMMReg(gregOfRexRM(pfx,modrm)));
14967 delta += alen;
14968 }
14969 goto decode_success;
14970 }
14971 break;
14972
14973 default:
14974 goto decode_failure;
14975
14976 }
14977
14978 decode_failure:
14979 *decode_OK = False;
14980 return deltaIN;
14981
14982 decode_success:
14983 *decode_OK = True;
14984 return delta;
14985}
14986
14987
14988/*------------------------------------------------------------*/
14989/*--- ---*/
14990/*--- Top-level SSSE3: dis_ESC_0F38__SupSSE3 ---*/
14991/*--- ---*/
14992/*------------------------------------------------------------*/
14993
sewardjc4530ae2012-05-21 10:18:49 +000014994static
14995IRTemp math_PSHUFB_XMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ )
14996{
14997 IRTemp sHi = newTemp(Ity_I64);
14998 IRTemp sLo = newTemp(Ity_I64);
14999 IRTemp dHi = newTemp(Ity_I64);
15000 IRTemp dLo = newTemp(Ity_I64);
15001 IRTemp rHi = newTemp(Ity_I64);
15002 IRTemp rLo = newTemp(Ity_I64);
15003 IRTemp sevens = newTemp(Ity_I64);
15004 IRTemp mask0x80hi = newTemp(Ity_I64);
15005 IRTemp mask0x80lo = newTemp(Ity_I64);
15006 IRTemp maskBit3hi = newTemp(Ity_I64);
15007 IRTemp maskBit3lo = newTemp(Ity_I64);
15008 IRTemp sAnd7hi = newTemp(Ity_I64);
15009 IRTemp sAnd7lo = newTemp(Ity_I64);
15010 IRTemp permdHi = newTemp(Ity_I64);
15011 IRTemp permdLo = newTemp(Ity_I64);
15012 IRTemp res = newTemp(Ity_V128);
15013
15014 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
15015 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
15016 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
15017 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
15018
15019 assign( sevens, mkU64(0x0707070707070707ULL) );
15020
15021 /* mask0x80hi = Not(SarN8x8(sHi,7))
15022 maskBit3hi = SarN8x8(ShlN8x8(sHi,4),7)
15023 sAnd7hi = And(sHi,sevens)
15024 permdHi = Or( And(Perm8x8(dHi,sAnd7hi),maskBit3hi),
15025 And(Perm8x8(dLo,sAnd7hi),Not(maskBit3hi)) )
15026 rHi = And(permdHi,mask0x80hi)
15027 */
15028 assign(
15029 mask0x80hi,
15030 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sHi),mkU8(7))));
15031
15032 assign(
15033 maskBit3hi,
15034 binop(Iop_SarN8x8,
15035 binop(Iop_ShlN8x8,mkexpr(sHi),mkU8(4)),
15036 mkU8(7)));
15037
15038 assign(sAnd7hi, binop(Iop_And64,mkexpr(sHi),mkexpr(sevens)));
15039
15040 assign(
15041 permdHi,
15042 binop(
15043 Iop_Or64,
15044 binop(Iop_And64,
15045 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7hi)),
15046 mkexpr(maskBit3hi)),
15047 binop(Iop_And64,
15048 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7hi)),
15049 unop(Iop_Not64,mkexpr(maskBit3hi))) ));
15050
15051 assign(rHi, binop(Iop_And64,mkexpr(permdHi),mkexpr(mask0x80hi)) );
15052
15053 /* And the same for the lower half of the result. What fun. */
15054
15055 assign(
15056 mask0x80lo,
15057 unop(Iop_Not64, binop(Iop_SarN8x8,mkexpr(sLo),mkU8(7))));
15058
15059 assign(
15060 maskBit3lo,
15061 binop(Iop_SarN8x8,
15062 binop(Iop_ShlN8x8,mkexpr(sLo),mkU8(4)),
15063 mkU8(7)));
15064
15065 assign(sAnd7lo, binop(Iop_And64,mkexpr(sLo),mkexpr(sevens)));
15066
15067 assign(
15068 permdLo,
15069 binop(
15070 Iop_Or64,
15071 binop(Iop_And64,
15072 binop(Iop_Perm8x8,mkexpr(dHi),mkexpr(sAnd7lo)),
15073 mkexpr(maskBit3lo)),
15074 binop(Iop_And64,
15075 binop(Iop_Perm8x8,mkexpr(dLo),mkexpr(sAnd7lo)),
15076 unop(Iop_Not64,mkexpr(maskBit3lo))) ));
15077
15078 assign(rLo, binop(Iop_And64,mkexpr(permdLo),mkexpr(mask0x80lo)) );
15079
15080 assign(res, binop(Iop_64HLtoV128, mkexpr(rHi), mkexpr(rLo)));
15081 return res;
15082}
15083
15084
sewardjcc3d2192013-03-27 11:37:33 +000015085static
15086IRTemp math_PSHUFB_YMM ( IRTemp dV/*data to perm*/, IRTemp sV/*perm*/ )
15087{
15088 IRTemp sHi, sLo, dHi, dLo;
15089 sHi = sLo = dHi = dLo = IRTemp_INVALID;
15090 breakupV256toV128s( dV, &dHi, &dLo);
15091 breakupV256toV128s( sV, &sHi, &sLo);
15092 IRTemp res = newTemp(Ity_V256);
15093 assign(res, binop(Iop_V128HLtoV256,
15094 mkexpr(math_PSHUFB_XMM(dHi, sHi)),
15095 mkexpr(math_PSHUFB_XMM(dLo, sLo))));
15096 return res;
15097}
15098
15099
sewardj8516a1f2012-06-24 14:26:30 +000015100static Long dis_PHADD_128 ( VexAbiInfo* vbi, Prefix pfx, Long delta,
15101 Bool isAvx, UChar opc )
15102{
15103 IRTemp addr = IRTemp_INVALID;
15104 Int alen = 0;
15105 HChar dis_buf[50];
florian55085f82012-11-21 00:36:55 +000015106 const HChar* str = "???";
sewardj8516a1f2012-06-24 14:26:30 +000015107 IROp opV64 = Iop_INVALID;
15108 IROp opCatO = Iop_CatOddLanes16x4;
15109 IROp opCatE = Iop_CatEvenLanes16x4;
15110 IRTemp sV = newTemp(Ity_V128);
15111 IRTemp dV = newTemp(Ity_V128);
15112 IRTemp sHi = newTemp(Ity_I64);
15113 IRTemp sLo = newTemp(Ity_I64);
15114 IRTemp dHi = newTemp(Ity_I64);
15115 IRTemp dLo = newTemp(Ity_I64);
15116 UChar modrm = getUChar(delta);
15117 UInt rG = gregOfRexRM(pfx,modrm);
15118 UInt rV = isAvx ? getVexNvvvv(pfx) : rG;
15119
15120 switch (opc) {
15121 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15122 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15123 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15124 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15125 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15126 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15127 default: vassert(0);
15128 }
15129 if (opc == 0x02 || opc == 0x06) {
15130 opCatO = Iop_InterleaveHI32x2;
15131 opCatE = Iop_InterleaveLO32x2;
15132 }
15133
15134 assign( dV, getXMMReg(rV) );
15135
15136 if (epartIsReg(modrm)) {
15137 UInt rE = eregOfRexRM(pfx,modrm);
15138 assign( sV, getXMMReg(rE) );
sewardjcc3d2192013-03-27 11:37:33 +000015139 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str,
15140 nameXMMReg(rE), nameXMMReg(rG));
sewardj8516a1f2012-06-24 14:26:30 +000015141 delta += 1;
15142 } else {
15143 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15144 if (!isAvx)
15145 gen_SEGV_if_not_16_aligned( addr );
15146 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
sewardjcc3d2192013-03-27 11:37:33 +000015147 DIP("%sph%s %s,%s\n", isAvx ? "v" : "", str,
15148 dis_buf, nameXMMReg(rG));
sewardj8516a1f2012-06-24 14:26:30 +000015149 delta += alen;
15150 }
15151
15152 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
15153 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
15154 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
15155 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
15156
15157 /* This isn't a particularly efficient way to compute the
15158 result, but at least it avoids a proliferation of IROps,
15159 hence avoids complication all the backends. */
15160
15161 (isAvx ? putYMMRegLoAndZU : putXMMReg)
15162 ( rG,
15163 binop(Iop_64HLtoV128,
15164 binop(opV64,
15165 binop(opCatE,mkexpr(sHi),mkexpr(sLo)),
15166 binop(opCatO,mkexpr(sHi),mkexpr(sLo)) ),
15167 binop(opV64,
15168 binop(opCatE,mkexpr(dHi),mkexpr(dLo)),
15169 binop(opCatO,mkexpr(dHi),mkexpr(dLo)) ) ) );
15170 return delta;
15171}
15172
15173
sewardjcc3d2192013-03-27 11:37:33 +000015174static Long dis_PHADD_256 ( VexAbiInfo* vbi, Prefix pfx, Long delta, UChar opc )
15175{
15176 IRTemp addr = IRTemp_INVALID;
15177 Int alen = 0;
15178 HChar dis_buf[50];
15179 const HChar* str = "???";
15180 IROp opV64 = Iop_INVALID;
15181 IROp opCatO = Iop_CatOddLanes16x4;
15182 IROp opCatE = Iop_CatEvenLanes16x4;
15183 IRTemp sV = newTemp(Ity_V256);
15184 IRTemp dV = newTemp(Ity_V256);
15185 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
15186 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
15187 UChar modrm = getUChar(delta);
15188 UInt rG = gregOfRexRM(pfx,modrm);
15189 UInt rV = getVexNvvvv(pfx);
15190
15191 switch (opc) {
15192 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15193 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15194 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15195 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15196 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15197 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15198 default: vassert(0);
15199 }
15200 if (opc == 0x02 || opc == 0x06) {
15201 opCatO = Iop_InterleaveHI32x2;
15202 opCatE = Iop_InterleaveLO32x2;
15203 }
15204
15205 assign( dV, getYMMReg(rV) );
15206
15207 if (epartIsReg(modrm)) {
15208 UInt rE = eregOfRexRM(pfx,modrm);
15209 assign( sV, getYMMReg(rE) );
15210 DIP("vph%s %s,%s\n", str, nameYMMReg(rE), nameYMMReg(rG));
15211 delta += 1;
15212 } else {
15213 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15214 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
15215 DIP("vph%s %s,%s\n", str, dis_buf, nameYMMReg(rG));
15216 delta += alen;
15217 }
15218
15219 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
15220 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
15221
15222 /* This isn't a particularly efficient way to compute the
15223 result, but at least it avoids a proliferation of IROps,
15224 hence avoids complication all the backends. */
15225
15226 putYMMReg( rG,
15227 binop(Iop_V128HLtoV256,
15228 binop(Iop_64HLtoV128,
15229 binop(opV64,
15230 binop(opCatE,mkexpr(s3),mkexpr(s2)),
15231 binop(opCatO,mkexpr(s3),mkexpr(s2)) ),
15232 binop(opV64,
15233 binop(opCatE,mkexpr(d3),mkexpr(d2)),
15234 binop(opCatO,mkexpr(d3),mkexpr(d2)) ) ),
15235 binop(Iop_64HLtoV128,
15236 binop(opV64,
15237 binop(opCatE,mkexpr(s1),mkexpr(s0)),
15238 binop(opCatO,mkexpr(s1),mkexpr(s0)) ),
15239 binop(opV64,
15240 binop(opCatE,mkexpr(d1),mkexpr(d0)),
15241 binop(opCatO,mkexpr(d1),mkexpr(d0)) ) ) ) );
15242 return delta;
15243}
15244
15245
sewardj8516a1f2012-06-24 14:26:30 +000015246static IRTemp math_PMADDUBSW_128 ( IRTemp dV, IRTemp sV )
15247{
15248 IRTemp sVoddsSX = newTemp(Ity_V128);
15249 IRTemp sVevensSX = newTemp(Ity_V128);
15250 IRTemp dVoddsZX = newTemp(Ity_V128);
15251 IRTemp dVevensZX = newTemp(Ity_V128);
15252 /* compute dV unsigned x sV signed */
15253 assign( sVoddsSX, binop(Iop_SarN16x8, mkexpr(sV), mkU8(8)) );
15254 assign( sVevensSX, binop(Iop_SarN16x8,
15255 binop(Iop_ShlN16x8, mkexpr(sV), mkU8(8)),
15256 mkU8(8)) );
15257 assign( dVoddsZX, binop(Iop_ShrN16x8, mkexpr(dV), mkU8(8)) );
15258 assign( dVevensZX, binop(Iop_ShrN16x8,
15259 binop(Iop_ShlN16x8, mkexpr(dV), mkU8(8)),
15260 mkU8(8)) );
15261
15262 IRTemp res = newTemp(Ity_V128);
15263 assign( res, binop(Iop_QAdd16Sx8,
15264 binop(Iop_Mul16x8, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
15265 binop(Iop_Mul16x8, mkexpr(sVevensSX), mkexpr(dVevensZX))
15266 )
15267 );
15268 return res;
15269}
15270
15271
sewardjcc3d2192013-03-27 11:37:33 +000015272static
15273IRTemp math_PMADDUBSW_256 ( IRTemp dV, IRTemp sV )
15274{
15275 IRTemp sHi, sLo, dHi, dLo;
15276 sHi = sLo = dHi = dLo = IRTemp_INVALID;
15277 breakupV256toV128s( dV, &dHi, &dLo);
15278 breakupV256toV128s( sV, &sHi, &sLo);
15279 IRTemp res = newTemp(Ity_V256);
15280 assign(res, binop(Iop_V128HLtoV256,
15281 mkexpr(math_PMADDUBSW_128(dHi, sHi)),
15282 mkexpr(math_PMADDUBSW_128(dLo, sLo))));
15283 return res;
15284}
15285
15286
sewardj80611e32012-01-20 13:07:24 +000015287__attribute__((noinline))
15288static
15289Long dis_ESC_0F38__SupSSE3 ( Bool* decode_OK,
15290 VexAbiInfo* vbi,
15291 Prefix pfx, Int sz, Long deltaIN )
15292{
15293 IRTemp addr = IRTemp_INVALID;
15294 UChar modrm = 0;
15295 Int alen = 0;
15296 HChar dis_buf[50];
15297
15298 *decode_OK = False;
15299
15300 Long delta = deltaIN;
15301 UChar opc = getUChar(delta);
15302 delta++;
15303 switch (opc) {
15304
15305 case 0x00:
15306 /* 66 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x16 (XMM) */
15307 if (have66noF2noF3(pfx)
15308 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
sewardjc4530ae2012-05-21 10:18:49 +000015309 IRTemp sV = newTemp(Ity_V128);
15310 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000015311
15312 modrm = getUChar(delta);
15313 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
15314
15315 if (epartIsReg(modrm)) {
15316 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
15317 delta += 1;
15318 DIP("pshufb %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
15319 nameXMMReg(gregOfRexRM(pfx,modrm)));
15320 } else {
15321 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15322 gen_SEGV_if_not_16_aligned( addr );
15323 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15324 delta += alen;
15325 DIP("pshufb %s,%s\n", dis_buf,
15326 nameXMMReg(gregOfRexRM(pfx,modrm)));
15327 }
15328
sewardjc4530ae2012-05-21 10:18:49 +000015329 IRTemp res = math_PSHUFB_XMM( dV, sV );
15330 putXMMReg(gregOfRexRM(pfx,modrm), mkexpr(res));
sewardj80611e32012-01-20 13:07:24 +000015331 goto decode_success;
15332 }
15333 /* 0F 38 00 = PSHUFB -- Packed Shuffle Bytes 8x8 (MMX) */
15334 if (haveNo66noF2noF3(pfx) && sz == 4) {
15335 IRTemp sV = newTemp(Ity_I64);
15336 IRTemp dV = newTemp(Ity_I64);
15337
15338 modrm = getUChar(delta);
15339 do_MMX_preamble();
15340 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15341
15342 if (epartIsReg(modrm)) {
15343 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15344 delta += 1;
15345 DIP("pshufb %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15346 nameMMXReg(gregLO3ofRM(modrm)));
15347 } else {
15348 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15349 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15350 delta += alen;
15351 DIP("pshufb %s,%s\n", dis_buf,
15352 nameMMXReg(gregLO3ofRM(modrm)));
15353 }
15354
15355 putMMXReg(
15356 gregLO3ofRM(modrm),
15357 binop(
15358 Iop_And64,
15359 /* permute the lanes */
15360 binop(
15361 Iop_Perm8x8,
15362 mkexpr(dV),
15363 binop(Iop_And64, mkexpr(sV), mkU64(0x0707070707070707ULL))
15364 ),
15365 /* mask off lanes which have (index & 0x80) == 0x80 */
15366 unop(Iop_Not64, binop(Iop_SarN8x8, mkexpr(sV), mkU8(7)))
15367 )
15368 );
15369 goto decode_success;
15370 }
15371 break;
15372
15373 case 0x01:
15374 case 0x02:
15375 case 0x03:
15376 case 0x05:
15377 case 0x06:
15378 case 0x07:
15379 /* 66 0F 38 01 = PHADDW -- 16x8 add across from E (mem or xmm) and
15380 G to G (xmm). */
15381 /* 66 0F 38 02 = PHADDD -- 32x4 add across from E (mem or xmm) and
15382 G to G (xmm). */
15383 /* 66 0F 38 03 = PHADDSW -- 16x8 signed qadd across from E (mem or
15384 xmm) and G to G (xmm). */
15385 /* 66 0F 38 05 = PHSUBW -- 16x8 sub across from E (mem or xmm) and
15386 G to G (xmm). */
15387 /* 66 0F 38 06 = PHSUBD -- 32x4 sub across from E (mem or xmm) and
15388 G to G (xmm). */
15389 /* 66 0F 38 07 = PHSUBSW -- 16x8 signed qsub across from E (mem or
15390 xmm) and G to G (xmm). */
15391 if (have66noF2noF3(pfx)
15392 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
sewardj8516a1f2012-06-24 14:26:30 +000015393 delta = dis_PHADD_128( vbi, pfx, delta, False/*isAvx*/, opc );
sewardj80611e32012-01-20 13:07:24 +000015394 goto decode_success;
15395 }
15396 /* ***--- these are MMX class insns introduced in SSSE3 ---*** */
15397 /* 0F 38 01 = PHADDW -- 16x4 add across from E (mem or mmx) and G
15398 to G (mmx). */
15399 /* 0F 38 02 = PHADDD -- 32x2 add across from E (mem or mmx) and G
15400 to G (mmx). */
15401 /* 0F 38 03 = PHADDSW -- 16x4 signed qadd across from E (mem or
15402 mmx) and G to G (mmx). */
15403 /* 0F 38 05 = PHSUBW -- 16x4 sub across from E (mem or mmx) and G
15404 to G (mmx). */
15405 /* 0F 38 06 = PHSUBD -- 32x2 sub across from E (mem or mmx) and G
15406 to G (mmx). */
15407 /* 0F 38 07 = PHSUBSW -- 16x4 signed qsub across from E (mem or
15408 mmx) and G to G (mmx). */
15409 if (haveNo66noF2noF3(pfx) && sz == 4) {
florian55085f82012-11-21 00:36:55 +000015410 const HChar* str = "???";
sewardj80611e32012-01-20 13:07:24 +000015411 IROp opV64 = Iop_INVALID;
15412 IROp opCatO = Iop_CatOddLanes16x4;
15413 IROp opCatE = Iop_CatEvenLanes16x4;
15414 IRTemp sV = newTemp(Ity_I64);
15415 IRTemp dV = newTemp(Ity_I64);
15416
15417 modrm = getUChar(delta);
15418
15419 switch (opc) {
15420 case 0x01: opV64 = Iop_Add16x4; str = "addw"; break;
15421 case 0x02: opV64 = Iop_Add32x2; str = "addd"; break;
15422 case 0x03: opV64 = Iop_QAdd16Sx4; str = "addsw"; break;
15423 case 0x05: opV64 = Iop_Sub16x4; str = "subw"; break;
15424 case 0x06: opV64 = Iop_Sub32x2; str = "subd"; break;
15425 case 0x07: opV64 = Iop_QSub16Sx4; str = "subsw"; break;
15426 default: vassert(0);
15427 }
15428 if (opc == 0x02 || opc == 0x06) {
15429 opCatO = Iop_InterleaveHI32x2;
15430 opCatE = Iop_InterleaveLO32x2;
15431 }
15432
15433 do_MMX_preamble();
15434 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15435
15436 if (epartIsReg(modrm)) {
15437 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15438 delta += 1;
15439 DIP("ph%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
15440 nameMMXReg(gregLO3ofRM(modrm)));
15441 } else {
15442 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15443 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15444 delta += alen;
15445 DIP("ph%s %s,%s\n", str, dis_buf,
15446 nameMMXReg(gregLO3ofRM(modrm)));
15447 }
15448
15449 putMMXReg(
15450 gregLO3ofRM(modrm),
15451 binop(opV64,
15452 binop(opCatE,mkexpr(sV),mkexpr(dV)),
15453 binop(opCatO,mkexpr(sV),mkexpr(dV))
15454 )
15455 );
15456 goto decode_success;
15457 }
15458 break;
15459
15460 case 0x04:
15461 /* 66 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
15462 Unsigned Bytes (XMM) */
15463 if (have66noF2noF3(pfx)
15464 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
sewardj8516a1f2012-06-24 14:26:30 +000015465 IRTemp sV = newTemp(Ity_V128);
15466 IRTemp dV = newTemp(Ity_V128);
15467 modrm = getUChar(delta);
15468 UInt rG = gregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000015469
sewardj8516a1f2012-06-24 14:26:30 +000015470 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000015471
15472 if (epartIsReg(modrm)) {
sewardj8516a1f2012-06-24 14:26:30 +000015473 UInt rE = eregOfRexRM(pfx,modrm);
15474 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000015475 delta += 1;
sewardj8516a1f2012-06-24 14:26:30 +000015476 DIP("pmaddubsw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000015477 } else {
15478 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15479 gen_SEGV_if_not_16_aligned( addr );
15480 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15481 delta += alen;
sewardj8516a1f2012-06-24 14:26:30 +000015482 DIP("pmaddubsw %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000015483 }
15484
sewardj8516a1f2012-06-24 14:26:30 +000015485 putXMMReg( rG, mkexpr( math_PMADDUBSW_128( dV, sV ) ) );
sewardj80611e32012-01-20 13:07:24 +000015486 goto decode_success;
15487 }
15488 /* 0F 38 04 = PMADDUBSW -- Multiply and Add Packed Signed and
15489 Unsigned Bytes (MMX) */
15490 if (haveNo66noF2noF3(pfx) && sz == 4) {
15491 IRTemp sV = newTemp(Ity_I64);
15492 IRTemp dV = newTemp(Ity_I64);
15493 IRTemp sVoddsSX = newTemp(Ity_I64);
15494 IRTemp sVevensSX = newTemp(Ity_I64);
15495 IRTemp dVoddsZX = newTemp(Ity_I64);
15496 IRTemp dVevensZX = newTemp(Ity_I64);
15497
15498 modrm = getUChar(delta);
15499 do_MMX_preamble();
15500 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15501
15502 if (epartIsReg(modrm)) {
15503 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15504 delta += 1;
15505 DIP("pmaddubsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15506 nameMMXReg(gregLO3ofRM(modrm)));
15507 } else {
15508 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15509 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15510 delta += alen;
15511 DIP("pmaddubsw %s,%s\n", dis_buf,
15512 nameMMXReg(gregLO3ofRM(modrm)));
15513 }
15514
15515 /* compute dV unsigned x sV signed */
15516 assign( sVoddsSX,
15517 binop(Iop_SarN16x4, mkexpr(sV), mkU8(8)) );
15518 assign( sVevensSX,
15519 binop(Iop_SarN16x4,
15520 binop(Iop_ShlN16x4, mkexpr(sV), mkU8(8)),
15521 mkU8(8)) );
15522 assign( dVoddsZX,
15523 binop(Iop_ShrN16x4, mkexpr(dV), mkU8(8)) );
15524 assign( dVevensZX,
15525 binop(Iop_ShrN16x4,
15526 binop(Iop_ShlN16x4, mkexpr(dV), mkU8(8)),
15527 mkU8(8)) );
15528
15529 putMMXReg(
15530 gregLO3ofRM(modrm),
15531 binop(Iop_QAdd16Sx4,
15532 binop(Iop_Mul16x4, mkexpr(sVoddsSX), mkexpr(dVoddsZX)),
15533 binop(Iop_Mul16x4, mkexpr(sVevensSX), mkexpr(dVevensZX))
15534 )
15535 );
15536 goto decode_success;
15537 }
15538 break;
15539
15540 case 0x08:
15541 case 0x09:
15542 case 0x0A:
15543 /* 66 0F 38 08 = PSIGNB -- Packed Sign 8x16 (XMM) */
15544 /* 66 0F 38 09 = PSIGNW -- Packed Sign 16x8 (XMM) */
15545 /* 66 0F 38 0A = PSIGND -- Packed Sign 32x4 (XMM) */
15546 if (have66noF2noF3(pfx)
15547 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15548 IRTemp sV = newTemp(Ity_V128);
15549 IRTemp dV = newTemp(Ity_V128);
15550 IRTemp sHi = newTemp(Ity_I64);
15551 IRTemp sLo = newTemp(Ity_I64);
15552 IRTemp dHi = newTemp(Ity_I64);
15553 IRTemp dLo = newTemp(Ity_I64);
florian55085f82012-11-21 00:36:55 +000015554 const HChar* str = "???";
sewardj80611e32012-01-20 13:07:24 +000015555 Int laneszB = 0;
15556
15557 switch (opc) {
15558 case 0x08: laneszB = 1; str = "b"; break;
15559 case 0x09: laneszB = 2; str = "w"; break;
15560 case 0x0A: laneszB = 4; str = "d"; break;
15561 default: vassert(0);
15562 }
15563
15564 modrm = getUChar(delta);
15565 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
15566
15567 if (epartIsReg(modrm)) {
15568 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
15569 delta += 1;
15570 DIP("psign%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
15571 nameXMMReg(gregOfRexRM(pfx,modrm)));
15572 } else {
15573 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15574 gen_SEGV_if_not_16_aligned( addr );
15575 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15576 delta += alen;
15577 DIP("psign%s %s,%s\n", str, dis_buf,
15578 nameXMMReg(gregOfRexRM(pfx,modrm)));
15579 }
15580
15581 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
15582 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
15583 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
15584 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
15585
15586 putXMMReg(
15587 gregOfRexRM(pfx,modrm),
15588 binop(Iop_64HLtoV128,
15589 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
15590 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
15591 )
15592 );
15593 goto decode_success;
15594 }
15595 /* 0F 38 08 = PSIGNB -- Packed Sign 8x8 (MMX) */
15596 /* 0F 38 09 = PSIGNW -- Packed Sign 16x4 (MMX) */
15597 /* 0F 38 0A = PSIGND -- Packed Sign 32x2 (MMX) */
15598 if (haveNo66noF2noF3(pfx) && sz == 4) {
15599 IRTemp sV = newTemp(Ity_I64);
15600 IRTemp dV = newTemp(Ity_I64);
florian55085f82012-11-21 00:36:55 +000015601 const HChar* str = "???";
sewardj80611e32012-01-20 13:07:24 +000015602 Int laneszB = 0;
15603
15604 switch (opc) {
15605 case 0x08: laneszB = 1; str = "b"; break;
15606 case 0x09: laneszB = 2; str = "w"; break;
15607 case 0x0A: laneszB = 4; str = "d"; break;
15608 default: vassert(0);
15609 }
15610
15611 modrm = getUChar(delta);
15612 do_MMX_preamble();
15613 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15614
15615 if (epartIsReg(modrm)) {
15616 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15617 delta += 1;
15618 DIP("psign%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
15619 nameMMXReg(gregLO3ofRM(modrm)));
15620 } else {
15621 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15622 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15623 delta += alen;
15624 DIP("psign%s %s,%s\n", str, dis_buf,
15625 nameMMXReg(gregLO3ofRM(modrm)));
15626 }
15627
15628 putMMXReg(
15629 gregLO3ofRM(modrm),
15630 dis_PSIGN_helper( mkexpr(sV), mkexpr(dV), laneszB )
15631 );
15632 goto decode_success;
15633 }
15634 break;
15635
15636 case 0x0B:
15637 /* 66 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and
15638 Scale (XMM) */
15639 if (have66noF2noF3(pfx)
15640 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15641 IRTemp sV = newTemp(Ity_V128);
15642 IRTemp dV = newTemp(Ity_V128);
15643 IRTemp sHi = newTemp(Ity_I64);
15644 IRTemp sLo = newTemp(Ity_I64);
15645 IRTemp dHi = newTemp(Ity_I64);
15646 IRTemp dLo = newTemp(Ity_I64);
15647
15648 modrm = getUChar(delta);
15649 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
15650
15651 if (epartIsReg(modrm)) {
15652 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
15653 delta += 1;
15654 DIP("pmulhrsw %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)),
15655 nameXMMReg(gregOfRexRM(pfx,modrm)));
15656 } else {
15657 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15658 gen_SEGV_if_not_16_aligned( addr );
15659 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15660 delta += alen;
15661 DIP("pmulhrsw %s,%s\n", dis_buf,
15662 nameXMMReg(gregOfRexRM(pfx,modrm)));
15663 }
15664
15665 assign( dHi, unop(Iop_V128HIto64, mkexpr(dV)) );
15666 assign( dLo, unop(Iop_V128to64, mkexpr(dV)) );
15667 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
15668 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
15669
15670 putXMMReg(
15671 gregOfRexRM(pfx,modrm),
15672 binop(Iop_64HLtoV128,
15673 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
15674 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
15675 )
15676 );
15677 goto decode_success;
15678 }
15679 /* 0F 38 0B = PMULHRSW -- Packed Multiply High with Round and Scale
15680 (MMX) */
15681 if (haveNo66noF2noF3(pfx) && sz == 4) {
15682 IRTemp sV = newTemp(Ity_I64);
15683 IRTemp dV = newTemp(Ity_I64);
15684
15685 modrm = getUChar(delta);
15686 do_MMX_preamble();
15687 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15688
15689 if (epartIsReg(modrm)) {
15690 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15691 delta += 1;
15692 DIP("pmulhrsw %s,%s\n", nameMMXReg(eregLO3ofRM(modrm)),
15693 nameMMXReg(gregLO3ofRM(modrm)));
15694 } else {
15695 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15696 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15697 delta += alen;
15698 DIP("pmulhrsw %s,%s\n", dis_buf,
15699 nameMMXReg(gregLO3ofRM(modrm)));
15700 }
15701
15702 putMMXReg(
15703 gregLO3ofRM(modrm),
15704 dis_PMULHRSW_helper( mkexpr(sV), mkexpr(dV) )
15705 );
15706 goto decode_success;
15707 }
15708 break;
15709
15710 case 0x1C:
15711 case 0x1D:
15712 case 0x1E:
15713 /* 66 0F 38 1C = PABSB -- Packed Absolute Value 8x16 (XMM) */
15714 /* 66 0F 38 1D = PABSW -- Packed Absolute Value 16x8 (XMM) */
15715 /* 66 0F 38 1E = PABSD -- Packed Absolute Value 32x4 (XMM) */
15716 if (have66noF2noF3(pfx)
15717 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
sewardj97f72452012-05-23 05:56:53 +000015718 IRTemp sV = newTemp(Ity_V128);
florian55085f82012-11-21 00:36:55 +000015719 const HChar* str = "???";
sewardj80611e32012-01-20 13:07:24 +000015720 Int laneszB = 0;
15721
15722 switch (opc) {
15723 case 0x1C: laneszB = 1; str = "b"; break;
15724 case 0x1D: laneszB = 2; str = "w"; break;
15725 case 0x1E: laneszB = 4; str = "d"; break;
15726 default: vassert(0);
15727 }
15728
15729 modrm = getUChar(delta);
sewardj80611e32012-01-20 13:07:24 +000015730 if (epartIsReg(modrm)) {
15731 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
15732 delta += 1;
15733 DIP("pabs%s %s,%s\n", str, nameXMMReg(eregOfRexRM(pfx,modrm)),
15734 nameXMMReg(gregOfRexRM(pfx,modrm)));
15735 } else {
15736 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15737 gen_SEGV_if_not_16_aligned( addr );
15738 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15739 delta += alen;
15740 DIP("pabs%s %s,%s\n", str, dis_buf,
15741 nameXMMReg(gregOfRexRM(pfx,modrm)));
15742 }
15743
sewardj97f72452012-05-23 05:56:53 +000015744 putXMMReg( gregOfRexRM(pfx,modrm),
15745 mkexpr(math_PABS_XMM(sV, laneszB)) );
sewardj80611e32012-01-20 13:07:24 +000015746 goto decode_success;
15747 }
15748 /* 0F 38 1C = PABSB -- Packed Absolute Value 8x8 (MMX) */
15749 /* 0F 38 1D = PABSW -- Packed Absolute Value 16x4 (MMX) */
15750 /* 0F 38 1E = PABSD -- Packed Absolute Value 32x2 (MMX) */
15751 if (haveNo66noF2noF3(pfx) && sz == 4) {
15752 IRTemp sV = newTemp(Ity_I64);
florian55085f82012-11-21 00:36:55 +000015753 const HChar* str = "???";
sewardj80611e32012-01-20 13:07:24 +000015754 Int laneszB = 0;
15755
15756 switch (opc) {
15757 case 0x1C: laneszB = 1; str = "b"; break;
15758 case 0x1D: laneszB = 2; str = "w"; break;
15759 case 0x1E: laneszB = 4; str = "d"; break;
15760 default: vassert(0);
15761 }
15762
15763 modrm = getUChar(delta);
15764 do_MMX_preamble();
15765
15766 if (epartIsReg(modrm)) {
15767 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15768 delta += 1;
15769 DIP("pabs%s %s,%s\n", str, nameMMXReg(eregLO3ofRM(modrm)),
15770 nameMMXReg(gregLO3ofRM(modrm)));
15771 } else {
15772 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
15773 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15774 delta += alen;
15775 DIP("pabs%s %s,%s\n", str, dis_buf,
15776 nameMMXReg(gregLO3ofRM(modrm)));
15777 }
15778
sewardj97f72452012-05-23 05:56:53 +000015779 putMMXReg( gregLO3ofRM(modrm),
15780 mkexpr(math_PABS_MMX( sV, laneszB )) );
sewardj80611e32012-01-20 13:07:24 +000015781 goto decode_success;
15782 }
15783 break;
15784
15785 default:
15786 break;
15787
15788 }
15789
15790 //decode_failure:
15791 *decode_OK = False;
15792 return deltaIN;
15793
15794 decode_success:
15795 *decode_OK = True;
15796 return delta;
15797}
15798
15799
15800/*------------------------------------------------------------*/
15801/*--- ---*/
15802/*--- Top-level SSSE3: dis_ESC_0F3A__SupSSE3 ---*/
15803/*--- ---*/
15804/*------------------------------------------------------------*/
15805
15806__attribute__((noinline))
15807static
15808Long dis_ESC_0F3A__SupSSE3 ( Bool* decode_OK,
15809 VexAbiInfo* vbi,
15810 Prefix pfx, Int sz, Long deltaIN )
15811{
15812 Long d64 = 0;
15813 IRTemp addr = IRTemp_INVALID;
15814 UChar modrm = 0;
15815 Int alen = 0;
15816 HChar dis_buf[50];
15817
15818 *decode_OK = False;
15819
15820 Long delta = deltaIN;
15821 UChar opc = getUChar(delta);
15822 delta++;
15823 switch (opc) {
15824
15825 case 0x0F:
15826 /* 66 0F 3A 0F = PALIGNR -- Packed Align Right (XMM) */
15827 if (have66noF2noF3(pfx)
15828 && (sz == 2 || /*redundant REX.W*/ sz == 8)) {
15829 IRTemp sV = newTemp(Ity_V128);
15830 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000015831
15832 modrm = getUChar(delta);
15833 assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) );
15834
15835 if (epartIsReg(modrm)) {
15836 assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) );
15837 d64 = (Long)getUChar(delta+1);
15838 delta += 1+1;
15839 DIP("palignr $%d,%s,%s\n", (Int)d64,
15840 nameXMMReg(eregOfRexRM(pfx,modrm)),
15841 nameXMMReg(gregOfRexRM(pfx,modrm)));
15842 } else {
15843 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
15844 gen_SEGV_if_not_16_aligned( addr );
15845 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
15846 d64 = (Long)getUChar(delta+alen);
15847 delta += alen+1;
15848 DIP("palignr $%d,%s,%s\n", (Int)d64,
15849 dis_buf,
15850 nameXMMReg(gregOfRexRM(pfx,modrm)));
15851 }
15852
sewardj151cd3e2012-06-18 13:56:55 +000015853 IRTemp res = math_PALIGNR_XMM( sV, dV, d64 );
15854 putXMMReg( gregOfRexRM(pfx,modrm), mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000015855 goto decode_success;
15856 }
15857 /* 0F 3A 0F = PALIGNR -- Packed Align Right (MMX) */
15858 if (haveNo66noF2noF3(pfx) && sz == 4) {
15859 IRTemp sV = newTemp(Ity_I64);
15860 IRTemp dV = newTemp(Ity_I64);
15861 IRTemp res = newTemp(Ity_I64);
15862
15863 modrm = getUChar(delta);
15864 do_MMX_preamble();
15865 assign( dV, getMMXReg(gregLO3ofRM(modrm)) );
15866
15867 if (epartIsReg(modrm)) {
15868 assign( sV, getMMXReg(eregLO3ofRM(modrm)) );
15869 d64 = (Long)getUChar(delta+1);
15870 delta += 1+1;
15871 DIP("palignr $%d,%s,%s\n", (Int)d64,
15872 nameMMXReg(eregLO3ofRM(modrm)),
15873 nameMMXReg(gregLO3ofRM(modrm)));
15874 } else {
15875 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
15876 assign( sV, loadLE(Ity_I64, mkexpr(addr)) );
15877 d64 = (Long)getUChar(delta+alen);
15878 delta += alen+1;
15879 DIP("palignr $%d%s,%s\n", (Int)d64,
15880 dis_buf,
15881 nameMMXReg(gregLO3ofRM(modrm)));
15882 }
15883
15884 if (d64 == 0) {
15885 assign( res, mkexpr(sV) );
15886 }
15887 else if (d64 >= 1 && d64 <= 7) {
15888 assign(res,
15889 binop(Iop_Or64,
15890 binop(Iop_Shr64, mkexpr(sV), mkU8(8*d64)),
15891 binop(Iop_Shl64, mkexpr(dV), mkU8(8*(8-d64))
15892 )));
15893 }
15894 else if (d64 == 8) {
15895 assign( res, mkexpr(dV) );
15896 }
15897 else if (d64 >= 9 && d64 <= 15) {
15898 assign( res, binop(Iop_Shr64, mkexpr(dV), mkU8(8*(d64-8))) );
15899 }
15900 else if (d64 >= 16 && d64 <= 255) {
15901 assign( res, mkU64(0) );
15902 }
15903 else
15904 vassert(0);
15905
15906 putMMXReg( gregLO3ofRM(modrm), mkexpr(res) );
15907 goto decode_success;
15908 }
15909 break;
15910
15911 default:
15912 break;
15913
15914 }
15915
15916 //decode_failure:
15917 *decode_OK = False;
15918 return deltaIN;
15919
15920 decode_success:
15921 *decode_OK = True;
15922 return delta;
15923}
15924
15925
15926/*------------------------------------------------------------*/
15927/*--- ---*/
15928/*--- Top-level SSE4: dis_ESC_0F__SSE4 ---*/
15929/*--- ---*/
15930/*------------------------------------------------------------*/
15931
15932__attribute__((noinline))
15933static
15934Long dis_ESC_0F__SSE4 ( Bool* decode_OK,
15935 VexArchInfo* archinfo,
15936 VexAbiInfo* vbi,
15937 Prefix pfx, Int sz, Long deltaIN )
15938{
15939 IRTemp addr = IRTemp_INVALID;
15940 IRType ty = Ity_INVALID;
15941 UChar modrm = 0;
15942 Int alen = 0;
15943 HChar dis_buf[50];
15944
15945 *decode_OK = False;
15946
15947 Long delta = deltaIN;
15948 UChar opc = getUChar(delta);
15949 delta++;
15950 switch (opc) {
15951
15952 case 0xB8:
15953 /* F3 0F B8 = POPCNT{W,L,Q}
15954 Count the number of 1 bits in a register
15955 */
15956 if (haveF3noF2(pfx) /* so both 66 and REX.W are possibilities */
15957 && (sz == 2 || sz == 4 || sz == 8)) {
15958 /*IRType*/ ty = szToITy(sz);
15959 IRTemp src = newTemp(ty);
15960 modrm = getUChar(delta);
15961 if (epartIsReg(modrm)) {
15962 assign(src, getIRegE(sz, pfx, modrm));
15963 delta += 1;
15964 DIP("popcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
15965 nameIRegG(sz, pfx, modrm));
15966 } else {
15967 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
15968 assign(src, loadLE(ty, mkexpr(addr)));
15969 delta += alen;
15970 DIP("popcnt%c %s, %s\n", nameISize(sz), dis_buf,
15971 nameIRegG(sz, pfx, modrm));
15972 }
15973
15974 IRTemp result = gen_POPCOUNT(ty, src);
15975 putIRegG(sz, pfx, modrm, mkexpr(result));
15976
15977 // Update flags. This is pretty lame .. perhaps can do better
15978 // if this turns out to be performance critical.
15979 // O S A C P are cleared. Z is set if SRC == 0.
15980 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
15981 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
15982 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
15983 stmt( IRStmt_Put( OFFB_CC_DEP1,
15984 binop(Iop_Shl64,
15985 unop(Iop_1Uto64,
15986 binop(Iop_CmpEQ64,
15987 widenUto64(mkexpr(src)),
15988 mkU64(0))),
15989 mkU8(AMD64G_CC_SHIFT_Z))));
15990
15991 goto decode_success;
15992 }
15993 break;
15994
sewardjcc3d2192013-03-27 11:37:33 +000015995 case 0xBC:
15996 /* F3 0F BC -- TZCNT (count trailing zeroes. A BMI extension,
15997 which we can only decode if we're sure this is a BMI1 capable cpu
15998 that supports TZCNT, since otherwise it's BSF, which behaves
15999 differently on zero source. */
16000 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
16001 && (sz == 2 || sz == 4 || sz == 8)
16002 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI)) {
16003 /*IRType*/ ty = szToITy(sz);
16004 IRTemp src = newTemp(ty);
16005 modrm = getUChar(delta);
16006 if (epartIsReg(modrm)) {
16007 assign(src, getIRegE(sz, pfx, modrm));
16008 delta += 1;
16009 DIP("tzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16010 nameIRegG(sz, pfx, modrm));
16011 } else {
16012 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16013 assign(src, loadLE(ty, mkexpr(addr)));
16014 delta += alen;
16015 DIP("tzcnt%c %s, %s\n", nameISize(sz), dis_buf,
16016 nameIRegG(sz, pfx, modrm));
16017 }
16018
16019 IRTemp res = gen_TZCNT(ty, src);
16020 putIRegG(sz, pfx, modrm, mkexpr(res));
16021
16022 // Update flags. This is pretty lame .. perhaps can do better
16023 // if this turns out to be performance critical.
16024 // O S A P are cleared. Z is set if RESULT == 0.
16025 // C is set if SRC is zero.
16026 IRTemp src64 = newTemp(Ity_I64);
16027 IRTemp res64 = newTemp(Ity_I64);
16028 assign(src64, widenUto64(mkexpr(src)));
16029 assign(res64, widenUto64(mkexpr(res)));
16030
16031 IRTemp oszacp = newTemp(Ity_I64);
16032 assign(
16033 oszacp,
16034 binop(Iop_Or64,
16035 binop(Iop_Shl64,
16036 unop(Iop_1Uto64,
16037 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))),
16038 mkU8(AMD64G_CC_SHIFT_Z)),
16039 binop(Iop_Shl64,
16040 unop(Iop_1Uto64,
16041 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))),
16042 mkU8(AMD64G_CC_SHIFT_C))
16043 )
16044 );
16045
16046 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16047 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16048 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16049 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
16050
16051 goto decode_success;
16052 }
16053 break;
16054
sewardj80611e32012-01-20 13:07:24 +000016055 case 0xBD:
16056 /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension,
16057 which we can only decode if we're sure this is an AMD cpu
16058 that supports LZCNT, since otherwise it's BSR, which behaves
16059 differently. Bizarrely, my Sandy Bridge also accepts these
16060 instructions but produces different results. */
16061 if (haveF3noF2(pfx) /* so both 66 and 48 are possibilities */
16062 && (sz == 2 || sz == 4 || sz == 8)
16063 && 0 != (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT)) {
16064 /*IRType*/ ty = szToITy(sz);
16065 IRTemp src = newTemp(ty);
16066 modrm = getUChar(delta);
16067 if (epartIsReg(modrm)) {
16068 assign(src, getIRegE(sz, pfx, modrm));
16069 delta += 1;
16070 DIP("lzcnt%c %s, %s\n", nameISize(sz), nameIRegE(sz, pfx, modrm),
16071 nameIRegG(sz, pfx, modrm));
16072 } else {
16073 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0);
16074 assign(src, loadLE(ty, mkexpr(addr)));
16075 delta += alen;
16076 DIP("lzcnt%c %s, %s\n", nameISize(sz), dis_buf,
16077 nameIRegG(sz, pfx, modrm));
16078 }
16079
16080 IRTemp res = gen_LZCNT(ty, src);
16081 putIRegG(sz, pfx, modrm, mkexpr(res));
16082
16083 // Update flags. This is pretty lame .. perhaps can do better
16084 // if this turns out to be performance critical.
16085 // O S A P are cleared. Z is set if RESULT == 0.
16086 // C is set if SRC is zero.
16087 IRTemp src64 = newTemp(Ity_I64);
16088 IRTemp res64 = newTemp(Ity_I64);
16089 assign(src64, widenUto64(mkexpr(src)));
16090 assign(res64, widenUto64(mkexpr(res)));
16091
16092 IRTemp oszacp = newTemp(Ity_I64);
16093 assign(
16094 oszacp,
16095 binop(Iop_Or64,
16096 binop(Iop_Shl64,
16097 unop(Iop_1Uto64,
16098 binop(Iop_CmpEQ64, mkexpr(res64), mkU64(0))),
16099 mkU8(AMD64G_CC_SHIFT_Z)),
16100 binop(Iop_Shl64,
16101 unop(Iop_1Uto64,
16102 binop(Iop_CmpEQ64, mkexpr(src64), mkU64(0))),
16103 mkU8(AMD64G_CC_SHIFT_C))
16104 )
16105 );
16106
16107 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16108 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16109 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16110 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(oszacp) ));
16111
16112 goto decode_success;
16113 }
16114 break;
16115
16116 default:
16117 break;
16118
16119 }
16120
16121 //decode_failure:
16122 *decode_OK = False;
16123 return deltaIN;
16124
16125 decode_success:
16126 *decode_OK = True;
16127 return delta;
16128}
16129
16130
16131/*------------------------------------------------------------*/
16132/*--- ---*/
16133/*--- Top-level SSE4: dis_ESC_0F38__SSE4 ---*/
16134/*--- ---*/
16135/*------------------------------------------------------------*/
16136
sewardje8a7eb72012-06-12 14:59:17 +000016137static IRTemp math_PBLENDVB_128 ( IRTemp vecE, IRTemp vecG,
16138 IRTemp vec0/*controlling mask*/,
16139 UInt gran, IROp opSAR )
sewardjc4530ae2012-05-21 10:18:49 +000016140{
16141 /* The tricky bit is to convert vec0 into a suitable mask, by
16142 copying the most significant bit of each lane into all positions
16143 in the lane. */
16144 IRTemp sh = newTemp(Ity_I8);
16145 assign(sh, mkU8(8 * gran - 1));
16146
16147 IRTemp mask = newTemp(Ity_V128);
16148 assign(mask, binop(opSAR, mkexpr(vec0), mkexpr(sh)));
16149
16150 IRTemp notmask = newTemp(Ity_V128);
16151 assign(notmask, unop(Iop_NotV128, mkexpr(mask)));
16152
16153 IRTemp res = newTemp(Ity_V128);
16154 assign(res, binop(Iop_OrV128,
16155 binop(Iop_AndV128, mkexpr(vecE), mkexpr(mask)),
16156 binop(Iop_AndV128, mkexpr(vecG), mkexpr(notmask))));
16157 return res;
16158}
16159
sewardj4c0a7ac2012-06-21 09:08:19 +000016160static IRTemp math_PBLENDVB_256 ( IRTemp vecE, IRTemp vecG,
16161 IRTemp vec0/*controlling mask*/,
16162 UInt gran, IROp opSAR128 )
16163{
16164 /* The tricky bit is to convert vec0 into a suitable mask, by
16165 copying the most significant bit of each lane into all positions
16166 in the lane. */
16167 IRTemp sh = newTemp(Ity_I8);
16168 assign(sh, mkU8(8 * gran - 1));
16169
16170 IRTemp vec0Hi = IRTemp_INVALID;
16171 IRTemp vec0Lo = IRTemp_INVALID;
16172 breakupV256toV128s( vec0, &vec0Hi, &vec0Lo );
16173
16174 IRTemp mask = newTemp(Ity_V256);
16175 assign(mask, binop(Iop_V128HLtoV256,
16176 binop(opSAR128, mkexpr(vec0Hi), mkexpr(sh)),
16177 binop(opSAR128, mkexpr(vec0Lo), mkexpr(sh))));
16178
16179 IRTemp notmask = newTemp(Ity_V256);
16180 assign(notmask, unop(Iop_NotV256, mkexpr(mask)));
16181
16182 IRTemp res = newTemp(Ity_V256);
16183 assign(res, binop(Iop_OrV256,
16184 binop(Iop_AndV256, mkexpr(vecE), mkexpr(mask)),
16185 binop(Iop_AndV256, mkexpr(vecG), mkexpr(notmask))));
16186 return res;
16187}
16188
16189static Long dis_VBLENDV_128 ( VexAbiInfo* vbi, Prefix pfx, Long delta,
16190 const HChar *name, UInt gran, IROp opSAR )
16191{
16192 IRTemp addr = IRTemp_INVALID;
16193 Int alen = 0;
16194 HChar dis_buf[50];
16195 UChar modrm = getUChar(delta);
16196 UInt rG = gregOfRexRM(pfx, modrm);
16197 UInt rV = getVexNvvvv(pfx);
16198 UInt rIS4 = 0xFF; /* invalid */
16199 IRTemp vecE = newTemp(Ity_V128);
16200 IRTemp vecV = newTemp(Ity_V128);
16201 IRTemp vecIS4 = newTemp(Ity_V128);
16202 if (epartIsReg(modrm)) {
16203 delta++;
16204 UInt rE = eregOfRexRM(pfx, modrm);
16205 assign(vecE, getXMMReg(rE));
16206 UChar ib = getUChar(delta);
16207 rIS4 = (ib >> 4) & 0xF;
16208 DIP("%s %s,%s,%s,%s\n",
16209 name, nameXMMReg(rIS4), nameXMMReg(rE),
16210 nameXMMReg(rV), nameXMMReg(rG));
16211 } else {
16212 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16213 delta += alen;
16214 assign(vecE, loadLE(Ity_V128, mkexpr(addr)));
16215 UChar ib = getUChar(delta);
16216 rIS4 = (ib >> 4) & 0xF;
16217 DIP("%s %s,%s,%s,%s\n",
16218 name, nameXMMReg(rIS4), dis_buf, nameXMMReg(rV), nameXMMReg(rG));
16219 }
16220 delta++;
16221 assign(vecV, getXMMReg(rV));
16222 assign(vecIS4, getXMMReg(rIS4));
16223 IRTemp res = math_PBLENDVB_128( vecE, vecV, vecIS4, gran, opSAR );
16224 putYMMRegLoAndZU( rG, mkexpr(res) );
16225 return delta;
16226}
16227
16228static Long dis_VBLENDV_256 ( VexAbiInfo* vbi, Prefix pfx, Long delta,
16229 const HChar *name, UInt gran, IROp opSAR128 )
16230{
16231 IRTemp addr = IRTemp_INVALID;
16232 Int alen = 0;
16233 HChar dis_buf[50];
16234 UChar modrm = getUChar(delta);
16235 UInt rG = gregOfRexRM(pfx, modrm);
16236 UInt rV = getVexNvvvv(pfx);
16237 UInt rIS4 = 0xFF; /* invalid */
16238 IRTemp vecE = newTemp(Ity_V256);
16239 IRTemp vecV = newTemp(Ity_V256);
16240 IRTemp vecIS4 = newTemp(Ity_V256);
16241 if (epartIsReg(modrm)) {
16242 delta++;
16243 UInt rE = eregOfRexRM(pfx, modrm);
16244 assign(vecE, getYMMReg(rE));
16245 UChar ib = getUChar(delta);
16246 rIS4 = (ib >> 4) & 0xF;
16247 DIP("%s %s,%s,%s,%s\n",
16248 name, nameYMMReg(rIS4), nameYMMReg(rE),
16249 nameYMMReg(rV), nameYMMReg(rG));
16250 } else {
16251 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
16252 delta += alen;
16253 assign(vecE, loadLE(Ity_V256, mkexpr(addr)));
16254 UChar ib = getUChar(delta);
16255 rIS4 = (ib >> 4) & 0xF;
16256 DIP("%s %s,%s,%s,%s\n",
16257 name, nameYMMReg(rIS4), dis_buf, nameYMMReg(rV), nameYMMReg(rG));
16258 }
16259 delta++;
16260 assign(vecV, getYMMReg(rV));
16261 assign(vecIS4, getYMMReg(rIS4));
16262 IRTemp res = math_PBLENDVB_256( vecE, vecV, vecIS4, gran, opSAR128 );
16263 putYMMReg( rG, mkexpr(res) );
16264 return delta;
16265}
16266
sewardjed1884d2012-06-21 08:53:48 +000016267static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign )
16268{
16269 /* Set Z=1 iff (vecE & vecG) == 0
16270 Set C=1 iff (vecE & not vecG) == 0
16271 */
16272
16273 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16274
16275 /* andV resp. andnV, reduced to 64-bit values, by or-ing the top
16276 and bottom 64-bits together. It relies on this trick:
16277
16278 InterleaveLO64x2([a,b],[c,d]) == [b,d] hence
16279
16280 InterleaveLO64x2([a,b],[a,b]) == [b,b] and similarly
16281 InterleaveHI64x2([a,b],[a,b]) == [a,a]
16282
16283 and so the OR of the above 2 exprs produces
16284 [a OR b, a OR b], from which we simply take the lower half.
16285 */
16286 IRTemp and64 = newTemp(Ity_I64);
16287 IRTemp andn64 = newTemp(Ity_I64);
16288
16289 assign(and64,
16290 unop(Iop_V128to64,
16291 binop(Iop_OrV128,
16292 binop(Iop_InterleaveLO64x2,
16293 mkexpr(andV), mkexpr(andV)),
16294 binop(Iop_InterleaveHI64x2,
16295 mkexpr(andV), mkexpr(andV)))));
16296
16297 assign(andn64,
16298 unop(Iop_V128to64,
16299 binop(Iop_OrV128,
16300 binop(Iop_InterleaveLO64x2,
16301 mkexpr(andnV), mkexpr(andnV)),
16302 binop(Iop_InterleaveHI64x2,
16303 mkexpr(andnV), mkexpr(andnV)))));
16304
16305 IRTemp z64 = newTemp(Ity_I64);
16306 IRTemp c64 = newTemp(Ity_I64);
16307 if (sign == 64) {
16308 /* When only interested in the most significant bit, just shift
16309 arithmetically right and negate. */
16310 assign(z64,
16311 unop(Iop_Not64,
16312 binop(Iop_Sar64, mkexpr(and64), mkU8(63))));
16313
16314 assign(c64,
16315 unop(Iop_Not64,
16316 binop(Iop_Sar64, mkexpr(andn64), mkU8(63))));
16317 } else {
16318 if (sign == 32) {
16319 /* When interested in bit 31 and bit 63, mask those bits and
16320 fallthrough into the PTEST handling. */
16321 IRTemp t0 = newTemp(Ity_I64);
16322 IRTemp t1 = newTemp(Ity_I64);
16323 IRTemp t2 = newTemp(Ity_I64);
16324 assign(t0, mkU64(0x8000000080000000ULL));
16325 assign(t1, binop(Iop_And64, mkexpr(and64), mkexpr(t0)));
16326 assign(t2, binop(Iop_And64, mkexpr(andn64), mkexpr(t0)));
16327 and64 = t1;
16328 andn64 = t2;
16329 }
16330 /* Now convert and64, andn64 to all-zeroes or all-1s, so we can
16331 slice out the Z and C bits conveniently. We use the standard
16332 trick all-zeroes -> all-zeroes, anything-else -> all-ones
16333 done by "(x | -x) >>s (word-size - 1)".
16334 */
16335 assign(z64,
16336 unop(Iop_Not64,
16337 binop(Iop_Sar64,
16338 binop(Iop_Or64,
16339 binop(Iop_Sub64, mkU64(0), mkexpr(and64)),
16340 mkexpr(and64)), mkU8(63))));
16341
16342 assign(c64,
16343 unop(Iop_Not64,
16344 binop(Iop_Sar64,
16345 binop(Iop_Or64,
16346 binop(Iop_Sub64, mkU64(0), mkexpr(andn64)),
16347 mkexpr(andn64)), mkU8(63))));
16348 }
16349
16350 /* And finally, slice out the Z and C flags and set the flags
16351 thunk to COPY for them. OSAP are set to zero. */
16352 IRTemp newOSZACP = newTemp(Ity_I64);
16353 assign(newOSZACP,
16354 binop(Iop_Or64,
16355 binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)),
16356 binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C))));
16357
16358 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP)));
16359 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
16360 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
16361 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
16362}
16363
16364
16365/* Handles 128 bit versions of PTEST, VTESTPS or VTESTPD.
16366 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16367static Long dis_xTESTy_128 ( VexAbiInfo* vbi, Prefix pfx,
16368 Long delta, Bool isAvx, Int sign )
16369{
16370 IRTemp addr = IRTemp_INVALID;
16371 Int alen = 0;
16372 HChar dis_buf[50];
16373 UChar modrm = getUChar(delta);
16374 UInt rG = gregOfRexRM(pfx, modrm);
16375 IRTemp vecE = newTemp(Ity_V128);
16376 IRTemp vecG = newTemp(Ity_V128);
16377
16378 if ( epartIsReg(modrm) ) {
16379 UInt rE = eregOfRexRM(pfx, modrm);
16380 assign(vecE, getXMMReg(rE));
16381 delta += 1;
16382 DIP( "%s%stest%s %s,%s\n",
16383 isAvx ? "v" : "", sign == 0 ? "p" : "",
16384 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16385 nameXMMReg(rE), nameXMMReg(rG) );
16386 } else {
16387 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16388 if (!isAvx)
16389 gen_SEGV_if_not_16_aligned( addr );
16390 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
16391 delta += alen;
16392 DIP( "%s%stest%s %s,%s\n",
16393 isAvx ? "v" : "", sign == 0 ? "p" : "",
16394 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16395 dis_buf, nameXMMReg(rG) );
16396 }
16397
16398 assign(vecG, getXMMReg(rG));
16399
16400 /* Set Z=1 iff (vecE & vecG) == 0
16401 Set C=1 iff (vecE & not vecG) == 0
16402 */
16403
16404 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16405 IRTemp andV = newTemp(Ity_V128);
16406 IRTemp andnV = newTemp(Ity_V128);
16407 assign(andV, binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG)));
16408 assign(andnV, binop(Iop_AndV128,
16409 mkexpr(vecE),
16410 binop(Iop_XorV128, mkexpr(vecG),
16411 mkV128(0xFFFF))));
16412
16413 finish_xTESTy ( andV, andnV, sign );
16414 return delta;
16415}
16416
16417
16418/* Handles 256 bit versions of PTEST, VTESTPS or VTESTPD.
16419 sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
16420static Long dis_xTESTy_256 ( VexAbiInfo* vbi, Prefix pfx,
16421 Long delta, Int sign )
16422{
16423 IRTemp addr = IRTemp_INVALID;
16424 Int alen = 0;
16425 HChar dis_buf[50];
16426 UChar modrm = getUChar(delta);
16427 UInt rG = gregOfRexRM(pfx, modrm);
16428 IRTemp vecE = newTemp(Ity_V256);
16429 IRTemp vecG = newTemp(Ity_V256);
16430
16431 if ( epartIsReg(modrm) ) {
16432 UInt rE = eregOfRexRM(pfx, modrm);
16433 assign(vecE, getYMMReg(rE));
16434 delta += 1;
16435 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
16436 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16437 nameYMMReg(rE), nameYMMReg(rG) );
16438 } else {
16439 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16440 assign(vecE, loadLE( Ity_V256, mkexpr(addr) ));
16441 delta += alen;
16442 DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
16443 sign == 0 ? "" : sign == 32 ? "ps" : "pd",
16444 dis_buf, nameYMMReg(rG) );
16445 }
16446
16447 assign(vecG, getYMMReg(rG));
16448
16449 /* Set Z=1 iff (vecE & vecG) == 0
16450 Set C=1 iff (vecE & not vecG) == 0
16451 */
16452
16453 /* andV, andnV: vecE & vecG, vecE and not(vecG) */
16454 IRTemp andV = newTemp(Ity_V256);
16455 IRTemp andnV = newTemp(Ity_V256);
16456 assign(andV, binop(Iop_AndV256, mkexpr(vecE), mkexpr(vecG)));
16457 assign(andnV, binop(Iop_AndV256,
16458 mkexpr(vecE), unop(Iop_NotV256, mkexpr(vecG))));
16459
16460 IRTemp andVhi = IRTemp_INVALID;
16461 IRTemp andVlo = IRTemp_INVALID;
16462 IRTemp andnVhi = IRTemp_INVALID;
16463 IRTemp andnVlo = IRTemp_INVALID;
16464 breakupV256toV128s( andV, &andVhi, &andVlo );
16465 breakupV256toV128s( andnV, &andnVhi, &andnVlo );
16466
16467 IRTemp andV128 = newTemp(Ity_V128);
16468 IRTemp andnV128 = newTemp(Ity_V128);
16469 assign( andV128, binop( Iop_OrV128, mkexpr(andVhi), mkexpr(andVlo) ) );
16470 assign( andnV128, binop( Iop_OrV128, mkexpr(andnVhi), mkexpr(andnVlo) ) );
16471
16472 finish_xTESTy ( andV128, andnV128, sign );
16473 return delta;
16474}
16475
sewardjc4530ae2012-05-21 10:18:49 +000016476
sewardj6fcd43e2012-06-14 08:51:35 +000016477/* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */
16478static Long dis_PMOVxXBW_128 ( VexAbiInfo* vbi, Prefix pfx,
16479 Long delta, Bool isAvx, Bool xIsZ )
sewardjc4530ae2012-05-21 10:18:49 +000016480{
16481 IRTemp addr = IRTemp_INVALID;
16482 Int alen = 0;
16483 HChar dis_buf[50];
16484 IRTemp srcVec = newTemp(Ity_V128);
16485 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000016486 const HChar* mbV = isAvx ? "v" : "";
16487 const HChar how = xIsZ ? 'z' : 's';
sewardj6fcd43e2012-06-14 08:51:35 +000016488 UInt rG = gregOfRexRM(pfx, modrm);
sewardjc4530ae2012-05-21 10:18:49 +000016489 if ( epartIsReg(modrm) ) {
sewardj6fcd43e2012-06-14 08:51:35 +000016490 UInt rE = eregOfRexRM(pfx, modrm);
16491 assign( srcVec, getXMMReg(rE) );
sewardjc4530ae2012-05-21 10:18:49 +000016492 delta += 1;
sewardj6fcd43e2012-06-14 08:51:35 +000016493 DIP( "%spmov%cxbw %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
sewardjc4530ae2012-05-21 10:18:49 +000016494 } else {
16495 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16496 assign( srcVec,
16497 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
16498 delta += alen;
sewardj6fcd43e2012-06-14 08:51:35 +000016499 DIP( "%spmov%cxbw %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
sewardjc4530ae2012-05-21 10:18:49 +000016500 }
16501
16502 IRExpr* res
sewardj6fcd43e2012-06-14 08:51:35 +000016503 = xIsZ /* do math for either zero or sign extend */
16504 ? binop( Iop_InterleaveLO8x16,
16505 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) )
16506 : binop( Iop_SarN16x8,
16507 binop( Iop_ShlN16x8,
16508 binop( Iop_InterleaveLO8x16,
16509 IRExpr_Const( IRConst_V128(0) ),
16510 mkexpr(srcVec) ),
16511 mkU8(8) ),
16512 mkU8(8) );
sewardjc4530ae2012-05-21 10:18:49 +000016513
sewardj6fcd43e2012-06-14 08:51:35 +000016514 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
sewardjc4530ae2012-05-21 10:18:49 +000016515
16516 return delta;
16517}
16518
16519
sewardjcc3d2192013-03-27 11:37:33 +000016520/* Handles 256 bit versions of PMOVZXBW and PMOVSXBW. */
16521static Long dis_PMOVxXBW_256 ( VexAbiInfo* vbi, Prefix pfx,
16522 Long delta, Bool xIsZ )
16523{
16524 IRTemp addr = IRTemp_INVALID;
16525 Int alen = 0;
16526 HChar dis_buf[50];
16527 IRTemp srcVec = newTemp(Ity_V128);
16528 UChar modrm = getUChar(delta);
16529 UChar how = xIsZ ? 'z' : 's';
16530 UInt rG = gregOfRexRM(pfx, modrm);
16531 if ( epartIsReg(modrm) ) {
16532 UInt rE = eregOfRexRM(pfx, modrm);
16533 assign( srcVec, getXMMReg(rE) );
16534 delta += 1;
16535 DIP( "vpmov%cxbw %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
16536 } else {
16537 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16538 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) );
16539 delta += alen;
16540 DIP( "vpmov%cxbw %s,%s\n", how, dis_buf, nameYMMReg(rG) );
16541 }
16542
16543 /* First do zero extend. */
16544 IRExpr* res
16545 = binop( Iop_V128HLtoV256,
16546 binop( Iop_InterleaveHI8x16,
16547 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
16548 binop( Iop_InterleaveLO8x16,
16549 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
16550 /* And if needed sign extension as well. */
16551 if (!xIsZ)
16552 res = binop( Iop_SarN16x16,
16553 binop( Iop_ShlN16x16, res, mkU8(8) ), mkU8(8) );
16554
16555 putYMMReg ( rG, res );
16556
16557 return delta;
16558}
16559
16560
sewardj6fcd43e2012-06-14 08:51:35 +000016561static Long dis_PMOVxXWD_128 ( VexAbiInfo* vbi, Prefix pfx,
16562 Long delta, Bool isAvx, Bool xIsZ )
sewardjc4530ae2012-05-21 10:18:49 +000016563{
16564 IRTemp addr = IRTemp_INVALID;
16565 Int alen = 0;
16566 HChar dis_buf[50];
16567 IRTemp srcVec = newTemp(Ity_V128);
16568 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000016569 const HChar* mbV = isAvx ? "v" : "";
16570 const HChar how = xIsZ ? 'z' : 's';
sewardj8516a1f2012-06-24 14:26:30 +000016571 UInt rG = gregOfRexRM(pfx, modrm);
sewardjc4530ae2012-05-21 10:18:49 +000016572
16573 if ( epartIsReg(modrm) ) {
sewardj8516a1f2012-06-24 14:26:30 +000016574 UInt rE = eregOfRexRM(pfx, modrm);
16575 assign( srcVec, getXMMReg(rE) );
sewardjc4530ae2012-05-21 10:18:49 +000016576 delta += 1;
sewardj8516a1f2012-06-24 14:26:30 +000016577 DIP( "%spmov%cxwd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
sewardjc4530ae2012-05-21 10:18:49 +000016578 } else {
16579 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16580 assign( srcVec,
16581 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
16582 delta += alen;
sewardj8516a1f2012-06-24 14:26:30 +000016583 DIP( "%spmov%cxwd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
sewardjc4530ae2012-05-21 10:18:49 +000016584 }
16585
16586 IRExpr* res
16587 = binop( Iop_InterleaveLO16x8,
16588 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) );
sewardj6fcd43e2012-06-14 08:51:35 +000016589 if (!xIsZ)
16590 res = binop(Iop_SarN32x4,
16591 binop(Iop_ShlN32x4, res, mkU8(16)), mkU8(16));
sewardjc4530ae2012-05-21 10:18:49 +000016592
sewardje8a7eb72012-06-12 14:59:17 +000016593 (isAvx ? putYMMRegLoAndZU : putXMMReg)
sewardjc4530ae2012-05-21 10:18:49 +000016594 ( gregOfRexRM(pfx, modrm), res );
16595
16596 return delta;
16597}
16598
16599
sewardjcc3d2192013-03-27 11:37:33 +000016600static Long dis_PMOVxXWD_256 ( VexAbiInfo* vbi, Prefix pfx,
16601 Long delta, Bool xIsZ )
16602{
16603 IRTemp addr = IRTemp_INVALID;
16604 Int alen = 0;
16605 HChar dis_buf[50];
16606 IRTemp srcVec = newTemp(Ity_V128);
16607 UChar modrm = getUChar(delta);
16608 UChar how = xIsZ ? 'z' : 's';
16609 UInt rG = gregOfRexRM(pfx, modrm);
16610
16611 if ( epartIsReg(modrm) ) {
16612 UInt rE = eregOfRexRM(pfx, modrm);
16613 assign( srcVec, getXMMReg(rE) );
16614 delta += 1;
16615 DIP( "vpmov%cxwd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
16616 } else {
16617 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16618 assign( srcVec, loadLE( Ity_V128, mkexpr(addr) ) );
16619 delta += alen;
16620 DIP( "vpmov%cxwd %s,%s\n", how, dis_buf, nameYMMReg(rG) );
16621 }
16622
16623 IRExpr* res
16624 = binop( Iop_V128HLtoV256,
16625 binop( Iop_InterleaveHI16x8,
16626 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
16627 binop( Iop_InterleaveLO16x8,
16628 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
16629 if (!xIsZ)
16630 res = binop(Iop_SarN32x8,
16631 binop(Iop_ShlN32x8, res, mkU8(16)), mkU8(16));
16632
16633 putYMMReg ( rG, res );
16634
16635 return delta;
16636}
16637
16638
sewardj8516a1f2012-06-24 14:26:30 +000016639static Long dis_PMOVSXWQ_128 ( VexAbiInfo* vbi, Prefix pfx,
16640 Long delta, Bool isAvx )
16641{
16642 IRTemp addr = IRTemp_INVALID;
16643 Int alen = 0;
16644 HChar dis_buf[50];
16645 IRTemp srcBytes = newTemp(Ity_I32);
16646 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000016647 const HChar* mbV = isAvx ? "v" : "";
sewardj8516a1f2012-06-24 14:26:30 +000016648 UInt rG = gregOfRexRM(pfx, modrm);
16649
16650 if ( epartIsReg( modrm ) ) {
16651 UInt rE = eregOfRexRM(pfx, modrm);
16652 assign( srcBytes, getXMMRegLane32( rE, 0 ) );
16653 delta += 1;
16654 DIP( "%spmovsxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
16655 } else {
16656 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16657 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
16658 delta += alen;
16659 DIP( "%spmovsxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
16660 }
16661
16662 (isAvx ? putYMMRegLoAndZU : putXMMReg)
16663 ( rG, binop( Iop_64HLtoV128,
16664 unop( Iop_16Sto64,
16665 unop( Iop_32HIto16, mkexpr(srcBytes) ) ),
16666 unop( Iop_16Sto64,
16667 unop( Iop_32to16, mkexpr(srcBytes) ) ) ) );
16668 return delta;
16669}
16670
16671
sewardjcc3d2192013-03-27 11:37:33 +000016672static Long dis_PMOVSXWQ_256 ( VexAbiInfo* vbi, Prefix pfx, Long delta )
16673{
16674 IRTemp addr = IRTemp_INVALID;
16675 Int alen = 0;
16676 HChar dis_buf[50];
16677 IRTemp srcBytes = newTemp(Ity_I64);
16678 UChar modrm = getUChar(delta);
16679 UInt rG = gregOfRexRM(pfx, modrm);
16680 IRTemp s3, s2, s1, s0;
16681 s3 = s2 = s1 = s0 = IRTemp_INVALID;
16682
16683 if ( epartIsReg( modrm ) ) {
16684 UInt rE = eregOfRexRM(pfx, modrm);
16685 assign( srcBytes, getXMMRegLane64( rE, 0 ) );
16686 delta += 1;
16687 DIP( "vpmovsxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
16688 } else {
16689 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16690 assign( srcBytes, loadLE( Ity_I64, mkexpr(addr) ) );
16691 delta += alen;
16692 DIP( "vpmovsxwq %s,%s\n", dis_buf, nameYMMReg(rG) );
16693 }
16694
16695 breakup64to16s( srcBytes, &s3, &s2, &s1, &s0 );
16696 putYMMReg( rG, binop( Iop_V128HLtoV256,
16697 binop( Iop_64HLtoV128,
16698 unop( Iop_16Sto64, mkexpr(s3) ),
16699 unop( Iop_16Sto64, mkexpr(s2) ) ),
16700 binop( Iop_64HLtoV128,
16701 unop( Iop_16Sto64, mkexpr(s1) ),
16702 unop( Iop_16Sto64, mkexpr(s0) ) ) ) );
16703 return delta;
16704}
16705
16706
sewardj8516a1f2012-06-24 14:26:30 +000016707static Long dis_PMOVZXWQ_128 ( VexAbiInfo* vbi, Prefix pfx,
16708 Long delta, Bool isAvx )
16709{
16710 IRTemp addr = IRTemp_INVALID;
16711 Int alen = 0;
16712 HChar dis_buf[50];
16713 IRTemp srcVec = newTemp(Ity_V128);
16714 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000016715 const HChar* mbV = isAvx ? "v" : "";
sewardj8516a1f2012-06-24 14:26:30 +000016716 UInt rG = gregOfRexRM(pfx, modrm);
16717
16718 if ( epartIsReg( modrm ) ) {
16719 UInt rE = eregOfRexRM(pfx, modrm);
16720 assign( srcVec, getXMMReg(rE) );
16721 delta += 1;
16722 DIP( "%spmovzxwq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
16723 } else {
16724 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16725 assign( srcVec,
16726 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
16727 delta += alen;
16728 DIP( "%spmovzxwq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
16729 }
16730
16731 IRTemp zeroVec = newTemp( Ity_V128 );
16732 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
16733
16734 (isAvx ? putYMMRegLoAndZU : putXMMReg)
16735 ( rG, binop( Iop_InterleaveLO16x8,
16736 mkexpr(zeroVec),
16737 binop( Iop_InterleaveLO16x8,
16738 mkexpr(zeroVec), mkexpr(srcVec) ) ) );
16739 return delta;
16740}
16741
16742
sewardjcc3d2192013-03-27 11:37:33 +000016743static Long dis_PMOVZXWQ_256 ( VexAbiInfo* vbi, Prefix pfx,
16744 Long delta )
16745{
16746 IRTemp addr = IRTemp_INVALID;
16747 Int alen = 0;
16748 HChar dis_buf[50];
16749 IRTemp srcVec = newTemp(Ity_V128);
16750 UChar modrm = getUChar(delta);
16751 UInt rG = gregOfRexRM(pfx, modrm);
16752
16753 if ( epartIsReg( modrm ) ) {
16754 UInt rE = eregOfRexRM(pfx, modrm);
16755 assign( srcVec, getXMMReg(rE) );
16756 delta += 1;
16757 DIP( "vpmovzxwq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
16758 } else {
16759 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16760 assign( srcVec,
16761 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
16762 delta += alen;
16763 DIP( "vpmovzxwq %s,%s\n", dis_buf, nameYMMReg(rG) );
16764 }
16765
16766 IRTemp zeroVec = newTemp( Ity_V128 );
16767 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
16768
16769 putYMMReg( rG, binop( Iop_V128HLtoV256,
16770 binop( Iop_InterleaveHI16x8,
16771 mkexpr(zeroVec),
16772 binop( Iop_InterleaveLO16x8,
16773 mkexpr(zeroVec), mkexpr(srcVec) ) ),
16774 binop( Iop_InterleaveLO16x8,
16775 mkexpr(zeroVec),
16776 binop( Iop_InterleaveLO16x8,
16777 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
16778 return delta;
16779}
16780
16781
sewardj6fcd43e2012-06-14 08:51:35 +000016782/* Handles 128 bit versions of PMOVZXDQ and PMOVSXDQ. */
16783static Long dis_PMOVxXDQ_128 ( VexAbiInfo* vbi, Prefix pfx,
16784 Long delta, Bool isAvx, Bool xIsZ )
16785{
16786 IRTemp addr = IRTemp_INVALID;
16787 Int alen = 0;
16788 HChar dis_buf[50];
16789 IRTemp srcI64 = newTemp(Ity_I64);
16790 IRTemp srcVec = newTemp(Ity_V128);
16791 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000016792 const HChar* mbV = isAvx ? "v" : "";
16793 const HChar how = xIsZ ? 'z' : 's';
sewardj6fcd43e2012-06-14 08:51:35 +000016794 UInt rG = gregOfRexRM(pfx, modrm);
16795 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
16796 thing in a V128, with arbitrary junk in the top 64 bits. Use
16797 one or both of them and let iropt clean up afterwards (as
16798 usual). */
16799 if ( epartIsReg(modrm) ) {
16800 UInt rE = eregOfRexRM(pfx, modrm);
16801 assign( srcVec, getXMMReg(rE) );
16802 assign( srcI64, unop(Iop_V128to64, mkexpr(srcVec)) );
16803 delta += 1;
16804 DIP( "%spmov%cxdq %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
16805 } else {
16806 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16807 assign( srcI64, loadLE(Ity_I64, mkexpr(addr)) );
16808 assign( srcVec, unop( Iop_64UtoV128, mkexpr(srcI64)) );
16809 delta += alen;
16810 DIP( "%spmov%cxdq %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
16811 }
16812
16813 IRExpr* res
16814 = xIsZ /* do math for either zero or sign extend */
16815 ? binop( Iop_InterleaveLO32x4,
16816 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) )
16817 : binop( Iop_64HLtoV128,
16818 unop( Iop_32Sto64,
16819 unop( Iop_64HIto32, mkexpr(srcI64) ) ),
16820 unop( Iop_32Sto64,
16821 unop( Iop_64to32, mkexpr(srcI64) ) ) );
16822
16823 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
16824
16825 return delta;
16826}
16827
16828
sewardjcc3d2192013-03-27 11:37:33 +000016829/* Handles 256 bit versions of PMOVZXDQ and PMOVSXDQ. */
16830static Long dis_PMOVxXDQ_256 ( VexAbiInfo* vbi, Prefix pfx,
16831 Long delta, Bool xIsZ )
16832{
16833 IRTemp addr = IRTemp_INVALID;
16834 Int alen = 0;
16835 HChar dis_buf[50];
16836 IRTemp srcVec = newTemp(Ity_V128);
16837 UChar modrm = getUChar(delta);
16838 UChar how = xIsZ ? 'z' : 's';
16839 UInt rG = gregOfRexRM(pfx, modrm);
16840 /* Compute both srcI64 -- the value to expand -- and srcVec -- same
16841 thing in a V128, with arbitrary junk in the top 64 bits. Use
16842 one or both of them and let iropt clean up afterwards (as
16843 usual). */
16844 if ( epartIsReg(modrm) ) {
16845 UInt rE = eregOfRexRM(pfx, modrm);
16846 assign( srcVec, getXMMReg(rE) );
16847 delta += 1;
16848 DIP( "vpmov%cxdq %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
16849 } else {
16850 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16851 assign( srcVec, loadLE(Ity_V128, mkexpr(addr)) );
16852 delta += alen;
16853 DIP( "vpmov%cxdq %s,%s\n", how, dis_buf, nameYMMReg(rG) );
16854 }
16855
16856 IRExpr* res;
16857 if (xIsZ)
16858 res = binop( Iop_V128HLtoV256,
16859 binop( Iop_InterleaveHI32x4,
16860 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ),
16861 binop( Iop_InterleaveLO32x4,
16862 IRExpr_Const( IRConst_V128(0) ), mkexpr(srcVec) ) );
16863 else {
16864 IRTemp s3, s2, s1, s0;
16865 s3 = s2 = s1 = s0 = IRTemp_INVALID;
16866 breakupV128to32s( srcVec, &s3, &s2, &s1, &s0 );
16867 res = binop( Iop_V128HLtoV256,
16868 binop( Iop_64HLtoV128,
16869 unop( Iop_32Sto64, mkexpr(s3) ),
16870 unop( Iop_32Sto64, mkexpr(s2) ) ),
16871 binop( Iop_64HLtoV128,
16872 unop( Iop_32Sto64, mkexpr(s1) ),
16873 unop( Iop_32Sto64, mkexpr(s0) ) ) );
16874 }
16875
16876 putYMMReg ( rG, res );
16877
16878 return delta;
16879}
16880
16881
sewardj4ed05e02012-06-18 15:01:30 +000016882/* Handles 128 bit versions of PMOVZXBD and PMOVSXBD. */
16883static Long dis_PMOVxXBD_128 ( VexAbiInfo* vbi, Prefix pfx,
16884 Long delta, Bool isAvx, Bool xIsZ )
16885{
16886 IRTemp addr = IRTemp_INVALID;
16887 Int alen = 0;
16888 HChar dis_buf[50];
16889 IRTemp srcVec = newTemp(Ity_V128);
16890 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000016891 const HChar* mbV = isAvx ? "v" : "";
16892 const HChar how = xIsZ ? 'z' : 's';
sewardj4ed05e02012-06-18 15:01:30 +000016893 UInt rG = gregOfRexRM(pfx, modrm);
16894 if ( epartIsReg(modrm) ) {
16895 UInt rE = eregOfRexRM(pfx, modrm);
16896 assign( srcVec, getXMMReg(rE) );
16897 delta += 1;
16898 DIP( "%spmov%cxbd %s,%s\n", mbV, how, nameXMMReg(rE), nameXMMReg(rG) );
16899 } else {
16900 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16901 assign( srcVec,
16902 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) ) ) );
16903 delta += alen;
16904 DIP( "%spmov%cxbd %s,%s\n", mbV, how, dis_buf, nameXMMReg(rG) );
16905 }
16906
16907 IRTemp zeroVec = newTemp(Ity_V128);
16908 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
16909
16910 IRExpr* res
16911 = binop(Iop_InterleaveLO8x16,
16912 mkexpr(zeroVec),
16913 binop(Iop_InterleaveLO8x16,
16914 mkexpr(zeroVec), mkexpr(srcVec)));
16915 if (!xIsZ)
16916 res = binop(Iop_SarN32x4,
16917 binop(Iop_ShlN32x4, res, mkU8(24)), mkU8(24));
16918
16919 (isAvx ? putYMMRegLoAndZU : putXMMReg) ( rG, res );
16920
16921 return delta;
16922}
16923
16924
sewardjcc3d2192013-03-27 11:37:33 +000016925/* Handles 256 bit versions of PMOVZXBD and PMOVSXBD. */
16926static Long dis_PMOVxXBD_256 ( VexAbiInfo* vbi, Prefix pfx,
16927 Long delta, Bool xIsZ )
16928{
16929 IRTemp addr = IRTemp_INVALID;
16930 Int alen = 0;
16931 HChar dis_buf[50];
16932 IRTemp srcVec = newTemp(Ity_V128);
16933 UChar modrm = getUChar(delta);
16934 UChar how = xIsZ ? 'z' : 's';
16935 UInt rG = gregOfRexRM(pfx, modrm);
16936 if ( epartIsReg(modrm) ) {
16937 UInt rE = eregOfRexRM(pfx, modrm);
16938 assign( srcVec, getXMMReg(rE) );
16939 delta += 1;
16940 DIP( "vpmov%cxbd %s,%s\n", how, nameXMMReg(rE), nameYMMReg(rG) );
16941 } else {
16942 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16943 assign( srcVec,
16944 unop( Iop_64UtoV128, loadLE( Ity_I64, mkexpr(addr) ) ) );
16945 delta += alen;
16946 DIP( "vpmov%cxbd %s,%s\n", how, dis_buf, nameYMMReg(rG) );
16947 }
16948
16949 IRTemp zeroVec = newTemp(Ity_V128);
16950 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
16951
16952 IRExpr* res
16953 = binop( Iop_V128HLtoV256,
16954 binop(Iop_InterleaveHI8x16,
16955 mkexpr(zeroVec),
16956 binop(Iop_InterleaveLO8x16,
16957 mkexpr(zeroVec), mkexpr(srcVec)) ),
16958 binop(Iop_InterleaveLO8x16,
16959 mkexpr(zeroVec),
16960 binop(Iop_InterleaveLO8x16,
16961 mkexpr(zeroVec), mkexpr(srcVec)) ) );
16962 if (!xIsZ)
16963 res = binop(Iop_SarN32x8,
16964 binop(Iop_ShlN32x8, res, mkU8(24)), mkU8(24));
16965
16966 putYMMReg ( rG, res );
16967
16968 return delta;
16969}
16970
16971
sewardj8516a1f2012-06-24 14:26:30 +000016972/* Handles 128 bit versions of PMOVSXBQ. */
16973static Long dis_PMOVSXBQ_128 ( VexAbiInfo* vbi, Prefix pfx,
16974 Long delta, Bool isAvx )
16975{
16976 IRTemp addr = IRTemp_INVALID;
16977 Int alen = 0;
16978 HChar dis_buf[50];
16979 IRTemp srcBytes = newTemp(Ity_I16);
16980 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000016981 const HChar* mbV = isAvx ? "v" : "";
sewardj8516a1f2012-06-24 14:26:30 +000016982 UInt rG = gregOfRexRM(pfx, modrm);
16983 if ( epartIsReg(modrm) ) {
16984 UInt rE = eregOfRexRM(pfx, modrm);
16985 assign( srcBytes, getXMMRegLane16( rE, 0 ) );
16986 delta += 1;
16987 DIP( "%spmovsxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
16988 } else {
16989 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
16990 assign( srcBytes, loadLE( Ity_I16, mkexpr(addr) ) );
16991 delta += alen;
16992 DIP( "%spmovsxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
16993 }
16994
16995 (isAvx ? putYMMRegLoAndZU : putXMMReg)
16996 ( rG, binop( Iop_64HLtoV128,
16997 unop( Iop_8Sto64,
16998 unop( Iop_16HIto8, mkexpr(srcBytes) ) ),
16999 unop( Iop_8Sto64,
17000 unop( Iop_16to8, mkexpr(srcBytes) ) ) ) );
17001 return delta;
17002}
17003
17004
sewardjcc3d2192013-03-27 11:37:33 +000017005/* Handles 256 bit versions of PMOVSXBQ. */
17006static Long dis_PMOVSXBQ_256 ( VexAbiInfo* vbi, Prefix pfx,
17007 Long delta )
17008{
17009 IRTemp addr = IRTemp_INVALID;
17010 Int alen = 0;
17011 HChar dis_buf[50];
17012 IRTemp srcBytes = newTemp(Ity_I32);
17013 UChar modrm = getUChar(delta);
17014 UInt rG = gregOfRexRM(pfx, modrm);
17015 if ( epartIsReg(modrm) ) {
17016 UInt rE = eregOfRexRM(pfx, modrm);
17017 assign( srcBytes, getXMMRegLane32( rE, 0 ) );
17018 delta += 1;
17019 DIP( "vpmovsxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17020 } else {
17021 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17022 assign( srcBytes, loadLE( Ity_I32, mkexpr(addr) ) );
17023 delta += alen;
17024 DIP( "vpmovsxbq %s,%s\n", dis_buf, nameYMMReg(rG) );
17025 }
17026
17027 putYMMReg
17028 ( rG, binop( Iop_V128HLtoV256,
17029 binop( Iop_64HLtoV128,
17030 unop( Iop_8Sto64,
17031 unop( Iop_16HIto8,
17032 unop( Iop_32HIto16,
17033 mkexpr(srcBytes) ) ) ),
17034 unop( Iop_8Sto64,
17035 unop( Iop_16to8,
17036 unop( Iop_32HIto16,
17037 mkexpr(srcBytes) ) ) ) ),
17038 binop( Iop_64HLtoV128,
17039 unop( Iop_8Sto64,
17040 unop( Iop_16HIto8,
17041 unop( Iop_32to16,
17042 mkexpr(srcBytes) ) ) ),
17043 unop( Iop_8Sto64,
17044 unop( Iop_16to8,
17045 unop( Iop_32to16,
17046 mkexpr(srcBytes) ) ) ) ) ) );
17047 return delta;
17048}
17049
17050
sewardj8516a1f2012-06-24 14:26:30 +000017051/* Handles 128 bit versions of PMOVZXBQ. */
17052static Long dis_PMOVZXBQ_128 ( VexAbiInfo* vbi, Prefix pfx,
17053 Long delta, Bool isAvx )
17054{
17055 IRTemp addr = IRTemp_INVALID;
17056 Int alen = 0;
17057 HChar dis_buf[50];
17058 IRTemp srcVec = newTemp(Ity_V128);
17059 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000017060 const HChar* mbV = isAvx ? "v" : "";
sewardj8516a1f2012-06-24 14:26:30 +000017061 UInt rG = gregOfRexRM(pfx, modrm);
17062 if ( epartIsReg(modrm) ) {
17063 UInt rE = eregOfRexRM(pfx, modrm);
17064 assign( srcVec, getXMMReg(rE) );
17065 delta += 1;
17066 DIP( "%spmovzxbq %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG) );
17067 } else {
17068 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17069 assign( srcVec,
17070 unop( Iop_32UtoV128,
17071 unop( Iop_16Uto32, loadLE( Ity_I16, mkexpr(addr) ))));
17072 delta += alen;
17073 DIP( "%spmovzxbq %s,%s\n", mbV, dis_buf, nameXMMReg(rG) );
17074 }
17075
17076 IRTemp zeroVec = newTemp(Ity_V128);
17077 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17078
17079 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17080 ( rG, binop( Iop_InterleaveLO8x16,
17081 mkexpr(zeroVec),
17082 binop( Iop_InterleaveLO8x16,
17083 mkexpr(zeroVec),
17084 binop( Iop_InterleaveLO8x16,
17085 mkexpr(zeroVec), mkexpr(srcVec) ) ) ) );
17086 return delta;
17087}
17088
17089
sewardjcc3d2192013-03-27 11:37:33 +000017090/* Handles 256 bit versions of PMOVZXBQ. */
17091static Long dis_PMOVZXBQ_256 ( VexAbiInfo* vbi, Prefix pfx,
17092 Long delta )
17093{
17094 IRTemp addr = IRTemp_INVALID;
17095 Int alen = 0;
17096 HChar dis_buf[50];
17097 IRTemp srcVec = newTemp(Ity_V128);
17098 UChar modrm = getUChar(delta);
17099 UInt rG = gregOfRexRM(pfx, modrm);
17100 if ( epartIsReg(modrm) ) {
17101 UInt rE = eregOfRexRM(pfx, modrm);
17102 assign( srcVec, getXMMReg(rE) );
17103 delta += 1;
17104 DIP( "vpmovzxbq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG) );
17105 } else {
17106 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17107 assign( srcVec,
17108 unop( Iop_32UtoV128, loadLE( Ity_I32, mkexpr(addr) )));
17109 delta += alen;
17110 DIP( "vpmovzxbq %s,%s\n", dis_buf, nameYMMReg(rG) );
17111 }
17112
17113 IRTemp zeroVec = newTemp(Ity_V128);
17114 assign( zeroVec, IRExpr_Const( IRConst_V128(0) ) );
17115
17116 putYMMReg
17117 ( rG, binop( Iop_V128HLtoV256,
17118 binop( Iop_InterleaveHI8x16,
17119 mkexpr(zeroVec),
17120 binop( Iop_InterleaveLO8x16,
17121 mkexpr(zeroVec),
17122 binop( Iop_InterleaveLO8x16,
17123 mkexpr(zeroVec), mkexpr(srcVec) ) ) ),
17124 binop( Iop_InterleaveLO8x16,
17125 mkexpr(zeroVec),
17126 binop( Iop_InterleaveLO8x16,
17127 mkexpr(zeroVec),
17128 binop( Iop_InterleaveLO8x16,
17129 mkexpr(zeroVec), mkexpr(srcVec) ) ) )
17130 ) );
17131 return delta;
17132}
17133
17134
sewardje8a7eb72012-06-12 14:59:17 +000017135static Long dis_PHMINPOSUW_128 ( VexAbiInfo* vbi, Prefix pfx,
17136 Long delta, Bool isAvx )
17137{
17138 IRTemp addr = IRTemp_INVALID;
17139 Int alen = 0;
17140 HChar dis_buf[50];
17141 UChar modrm = getUChar(delta);
florian55085f82012-11-21 00:36:55 +000017142 const HChar* mbV = isAvx ? "v" : "";
sewardje8a7eb72012-06-12 14:59:17 +000017143 IRTemp sV = newTemp(Ity_V128);
17144 IRTemp sHi = newTemp(Ity_I64);
17145 IRTemp sLo = newTemp(Ity_I64);
17146 IRTemp dLo = newTemp(Ity_I64);
17147 UInt rG = gregOfRexRM(pfx,modrm);
17148 if (epartIsReg(modrm)) {
17149 UInt rE = eregOfRexRM(pfx,modrm);
17150 assign( sV, getXMMReg(rE) );
17151 delta += 1;
17152 DIP("%sphminposuw %s,%s\n", mbV, nameXMMReg(rE), nameXMMReg(rG));
17153 } else {
17154 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj4955c742012-09-02 12:13:34 +000017155 if (!isAvx)
17156 gen_SEGV_if_not_16_aligned(addr);
sewardje8a7eb72012-06-12 14:59:17 +000017157 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
17158 delta += alen;
17159 DIP("%sphminposuw %s,%s\n", mbV, dis_buf, nameXMMReg(rG));
17160 }
17161 assign( sHi, unop(Iop_V128HIto64, mkexpr(sV)) );
17162 assign( sLo, unop(Iop_V128to64, mkexpr(sV)) );
17163 assign( dLo, mkIRExprCCall(
17164 Ity_I64, 0/*regparms*/,
17165 "amd64g_calculate_sse_phminposuw",
17166 &amd64g_calculate_sse_phminposuw,
17167 mkIRExprVec_2( mkexpr(sLo), mkexpr(sHi) )
17168 ));
17169 (isAvx ? putYMMRegLoAndZU : putXMMReg)
17170 (rG, unop(Iop_64UtoV128, mkexpr(dLo)));
17171 return delta;
17172}
17173
17174
sewardj1407a362012-06-24 15:11:38 +000017175static Long dis_AESx ( VexAbiInfo* vbi, Prefix pfx,
17176 Long delta, Bool isAvx, UChar opc )
17177{
17178 IRTemp addr = IRTemp_INVALID;
17179 Int alen = 0;
17180 HChar dis_buf[50];
17181 UChar modrm = getUChar(delta);
17182 UInt rG = gregOfRexRM(pfx, modrm);
17183 UInt regNoL = 0;
17184 UInt regNoR = (isAvx && opc != 0xDB) ? getVexNvvvv(pfx) : rG;
17185
17186 /* This is a nasty kludge. We need to pass 2 x V128 to the
17187 helper. Since we can't do that, use a dirty
17188 helper to compute the results directly from the XMM regs in
17189 the guest state. That means for the memory case, we need to
17190 move the left operand into a pseudo-register (XMM16, let's
17191 call it). */
17192 if (epartIsReg(modrm)) {
17193 regNoL = eregOfRexRM(pfx, modrm);
17194 delta += 1;
17195 } else {
17196 regNoL = 16; /* use XMM16 as an intermediary */
17197 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17198 /* alignment check needed ???? */
17199 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
17200 delta += alen;
17201 }
17202
17203 void* fn = &amd64g_dirtyhelper_AES;
florian55085f82012-11-21 00:36:55 +000017204 const HChar* nm = "amd64g_dirtyhelper_AES";
sewardj1407a362012-06-24 15:11:38 +000017205
17206 /* Round up the arguments. Note that this is a kludge -- the
17207 use of mkU64 rather than mkIRExpr_HWord implies the
17208 assumption that the host's word size is 64-bit. */
17209 UInt gstOffD = ymmGuestRegOffset(rG);
17210 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
17211 UInt gstOffR = ymmGuestRegOffset(regNoR);
17212 IRExpr* opc4 = mkU64(opc);
17213 IRExpr* gstOffDe = mkU64(gstOffD);
17214 IRExpr* gstOffLe = mkU64(gstOffL);
17215 IRExpr* gstOffRe = mkU64(gstOffR);
17216 IRExpr** args
florian90419562013-08-15 20:54:52 +000017217 = mkIRExprVec_5( IRExpr_BBPTR(), opc4, gstOffDe, gstOffLe, gstOffRe );
sewardj1407a362012-06-24 15:11:38 +000017218
17219 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args );
sewardj74142b82013-08-08 10:28:59 +000017220 /* It's not really a dirty call, but we can't use the clean helper
17221 mechanism here for the very lame reason that we can't pass 2 x
17222 V128s by value to a helper. Hence this roundabout scheme. */
sewardj1407a362012-06-24 15:11:38 +000017223 d->nFxState = 2;
17224 vex_bzero(&d->fxState, sizeof(d->fxState));
17225 /* AES{ENC,ENCLAST,DEC,DECLAST} read both registers, and writes
17226 the second for !isAvx or the third for isAvx.
17227 AESIMC (0xDB) reads the first register, and writes the second. */
17228 d->fxState[0].fx = Ifx_Read;
17229 d->fxState[0].offset = gstOffL;
17230 d->fxState[0].size = sizeof(U128);
17231 d->fxState[1].offset = gstOffR;
17232 d->fxState[1].size = sizeof(U128);
17233 if (opc == 0xDB)
17234 d->fxState[1].fx = Ifx_Write;
17235 else if (!isAvx || rG == regNoR)
17236 d->fxState[1].fx = Ifx_Modify;
17237 else {
17238 d->fxState[1].fx = Ifx_Read;
17239 d->nFxState++;
17240 d->fxState[2].fx = Ifx_Write;
17241 d->fxState[2].offset = gstOffD;
17242 d->fxState[2].size = sizeof(U128);
17243 }
17244
17245 stmt( IRStmt_Dirty(d) );
17246 {
florian55085f82012-11-21 00:36:55 +000017247 const HChar* opsuf;
sewardj1407a362012-06-24 15:11:38 +000017248 switch (opc) {
17249 case 0xDC: opsuf = "enc"; break;
17250 case 0XDD: opsuf = "enclast"; break;
17251 case 0xDE: opsuf = "dec"; break;
17252 case 0xDF: opsuf = "declast"; break;
17253 case 0xDB: opsuf = "imc"; break;
17254 default: vassert(0);
17255 }
17256 DIP("%saes%s %s,%s%s%s\n", isAvx ? "v" : "", opsuf,
17257 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)),
17258 nameXMMReg(regNoR),
17259 (isAvx && opc != 0xDB) ? "," : "",
17260 (isAvx && opc != 0xDB) ? nameXMMReg(rG) : "");
17261 }
17262 if (isAvx)
17263 putYMMRegLane128( rG, 1, mkV128(0) );
17264 return delta;
17265}
17266
17267static Long dis_AESKEYGENASSIST ( VexAbiInfo* vbi, Prefix pfx,
17268 Long delta, Bool isAvx )
17269{
17270 IRTemp addr = IRTemp_INVALID;
17271 Int alen = 0;
17272 HChar dis_buf[50];
17273 UChar modrm = getUChar(delta);
17274 UInt regNoL = 0;
17275 UInt regNoR = gregOfRexRM(pfx, modrm);
17276 UChar imm = 0;
17277
17278 /* This is a nasty kludge. See AESENC et al. instructions. */
17279 modrm = getUChar(delta);
17280 if (epartIsReg(modrm)) {
17281 regNoL = eregOfRexRM(pfx, modrm);
17282 imm = getUChar(delta+1);
17283 delta += 1+1;
17284 } else {
17285 regNoL = 16; /* use XMM16 as an intermediary */
17286 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17287 /* alignment check ???? . */
17288 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
17289 imm = getUChar(delta+alen);
17290 delta += alen+1;
17291 }
17292
17293 /* Who ya gonna call? Presumably not Ghostbusters. */
17294 void* fn = &amd64g_dirtyhelper_AESKEYGENASSIST;
florian55085f82012-11-21 00:36:55 +000017295 const HChar* nm = "amd64g_dirtyhelper_AESKEYGENASSIST";
sewardj1407a362012-06-24 15:11:38 +000017296
17297 /* Round up the arguments. Note that this is a kludge -- the
17298 use of mkU64 rather than mkIRExpr_HWord implies the
17299 assumption that the host's word size is 64-bit. */
17300 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
17301 UInt gstOffR = ymmGuestRegOffset(regNoR);
17302
17303 IRExpr* imme = mkU64(imm & 0xFF);
17304 IRExpr* gstOffLe = mkU64(gstOffL);
17305 IRExpr* gstOffRe = mkU64(gstOffR);
17306 IRExpr** args
florian90419562013-08-15 20:54:52 +000017307 = mkIRExprVec_4( IRExpr_BBPTR(), imme, gstOffLe, gstOffRe );
sewardj1407a362012-06-24 15:11:38 +000017308
17309 IRDirty* d = unsafeIRDirty_0_N( 0/*regparms*/, nm, fn, args );
sewardj74142b82013-08-08 10:28:59 +000017310 /* It's not really a dirty call, but we can't use the clean helper
17311 mechanism here for the very lame reason that we can't pass 2 x
17312 V128s by value to a helper. Hence this roundabout scheme. */
sewardj1407a362012-06-24 15:11:38 +000017313 d->nFxState = 2;
17314 vex_bzero(&d->fxState, sizeof(d->fxState));
17315 d->fxState[0].fx = Ifx_Read;
17316 d->fxState[0].offset = gstOffL;
17317 d->fxState[0].size = sizeof(U128);
17318 d->fxState[1].fx = Ifx_Write;
17319 d->fxState[1].offset = gstOffR;
17320 d->fxState[1].size = sizeof(U128);
17321 stmt( IRStmt_Dirty(d) );
17322
17323 DIP("%saeskeygenassist $%x,%s,%s\n", isAvx ? "v" : "", (UInt)imm,
17324 (regNoL == 16 ? dis_buf : nameXMMReg(regNoL)),
17325 nameXMMReg(regNoR));
17326 if (isAvx)
17327 putYMMRegLane128( regNoR, 1, mkV128(0) );
17328 return delta;
17329}
17330
17331
sewardj80611e32012-01-20 13:07:24 +000017332__attribute__((noinline))
17333static
17334Long dis_ESC_0F38__SSE4 ( Bool* decode_OK,
17335 VexAbiInfo* vbi,
17336 Prefix pfx, Int sz, Long deltaIN )
17337{
17338 IRTemp addr = IRTemp_INVALID;
17339 UChar modrm = 0;
17340 Int alen = 0;
17341 HChar dis_buf[50];
17342
17343 *decode_OK = False;
17344
17345 Long delta = deltaIN;
17346 UChar opc = getUChar(delta);
17347 delta++;
17348 switch (opc) {
17349
17350 case 0x10:
17351 case 0x14:
17352 case 0x15:
17353 /* 66 0F 38 10 /r = PBLENDVB xmm1, xmm2/m128 (byte gran)
17354 66 0F 38 14 /r = BLENDVPS xmm1, xmm2/m128 (float gran)
17355 66 0F 38 15 /r = BLENDVPD xmm1, xmm2/m128 (double gran)
17356 Blend at various granularities, with XMM0 (implicit operand)
17357 providing the controlling mask.
17358 */
17359 if (have66noF2noF3(pfx) && sz == 2) {
17360 modrm = getUChar(delta);
17361
florian55085f82012-11-21 00:36:55 +000017362 const HChar* nm = NULL;
sewardj80611e32012-01-20 13:07:24 +000017363 UInt gran = 0;
17364 IROp opSAR = Iop_INVALID;
17365 switch (opc) {
17366 case 0x10:
17367 nm = "pblendvb"; gran = 1; opSAR = Iop_SarN8x16;
17368 break;
17369 case 0x14:
17370 nm = "blendvps"; gran = 4; opSAR = Iop_SarN32x4;
17371 break;
17372 case 0x15:
17373 nm = "blendvpd"; gran = 8; opSAR = Iop_SarN64x2;
17374 break;
17375 }
17376 vassert(nm);
17377
17378 IRTemp vecE = newTemp(Ity_V128);
17379 IRTemp vecG = newTemp(Ity_V128);
17380 IRTemp vec0 = newTemp(Ity_V128);
17381
17382 if ( epartIsReg(modrm) ) {
17383 assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm)));
17384 delta += 1;
17385 DIP( "%s %s,%s\n", nm,
17386 nameXMMReg( eregOfRexRM(pfx, modrm) ),
17387 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17388 } else {
17389 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17390 gen_SEGV_if_not_16_aligned( addr );
17391 assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
17392 delta += alen;
17393 DIP( "%s %s,%s\n", nm,
17394 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17395 }
17396
17397 assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm)));
17398 assign(vec0, getXMMReg(0));
17399
sewardje8a7eb72012-06-12 14:59:17 +000017400 IRTemp res = math_PBLENDVB_128( vecE, vecG, vec0, gran, opSAR );
sewardjc4530ae2012-05-21 10:18:49 +000017401 putXMMReg(gregOfRexRM(pfx, modrm), mkexpr(res));
sewardj80611e32012-01-20 13:07:24 +000017402
17403 goto decode_success;
17404 }
17405 break;
17406
17407 case 0x17:
17408 /* 66 0F 38 17 /r = PTEST xmm1, xmm2/m128
17409 Logical compare (set ZF and CF from AND/ANDN of the operands) */
17410 if (have66noF2noF3(pfx)
17411 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
sewardjed1884d2012-06-21 08:53:48 +000017412 delta = dis_xTESTy_128( vbi, pfx, delta, False/*!isAvx*/, 0 );
sewardj80611e32012-01-20 13:07:24 +000017413 goto decode_success;
17414 }
17415 break;
17416
17417 case 0x20:
17418 /* 66 0F 38 20 /r = PMOVSXBW xmm1, xmm2/m64
17419 Packed Move with Sign Extend from Byte to Word (XMM) */
sewardj6fcd43e2012-06-14 08:51:35 +000017420 if (have66noF2noF3(pfx) && sz == 2) {
17421 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
17422 False/*!isAvx*/, False/*!xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000017423 goto decode_success;
17424 }
17425 break;
17426
17427 case 0x21:
17428 /* 66 0F 38 21 /r = PMOVSXBD xmm1, xmm2/m32
17429 Packed Move with Sign Extend from Byte to DWord (XMM) */
17430 if (have66noF2noF3(pfx) && sz == 2) {
sewardj4ed05e02012-06-18 15:01:30 +000017431 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
17432 False/*!isAvx*/, False/*!xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000017433 goto decode_success;
17434 }
17435 break;
17436
17437 case 0x22:
17438 /* 66 0F 38 22 /r = PMOVSXBQ xmm1, xmm2/m16
17439 Packed Move with Sign Extend from Byte to QWord (XMM) */
17440 if (have66noF2noF3(pfx) && sz == 2) {
sewardj8516a1f2012-06-24 14:26:30 +000017441 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000017442 goto decode_success;
17443 }
17444 break;
17445
17446 case 0x23:
17447 /* 66 0F 38 23 /r = PMOVSXWD xmm1, xmm2/m64
17448 Packed Move with Sign Extend from Word to DWord (XMM) */
17449 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000017450 delta = dis_PMOVxXWD_128(vbi, pfx, delta,
17451 False/*!isAvx*/, False/*!xIsZ*/);
sewardj80611e32012-01-20 13:07:24 +000017452 goto decode_success;
17453 }
17454 break;
17455
17456 case 0x24:
17457 /* 66 0F 38 24 /r = PMOVSXWQ xmm1, xmm2/m32
17458 Packed Move with Sign Extend from Word to QWord (XMM) */
17459 if (have66noF2noF3(pfx) && sz == 2) {
sewardj8516a1f2012-06-24 14:26:30 +000017460 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000017461 goto decode_success;
17462 }
17463 break;
17464
17465 case 0x25:
17466 /* 66 0F 38 25 /r = PMOVSXDQ xmm1, xmm2/m64
17467 Packed Move with Sign Extend from Double Word to Quad Word (XMM) */
17468 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000017469 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
17470 False/*!isAvx*/, False/*!xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000017471 goto decode_success;
17472 }
17473 break;
17474
17475 case 0x28:
sewardj89378162012-06-24 12:12:20 +000017476 /* 66 0F 38 28 = PMULDQ -- signed widening multiply of 32-lanes
sewardj80611e32012-01-20 13:07:24 +000017477 0 x 0 to form lower 64-bit half and lanes 2 x 2 to form upper
17478 64-bit half */
17479 /* This is a really poor translation -- could be improved if
sewardj89378162012-06-24 12:12:20 +000017480 performance critical. It's a copy-paste of PMULUDQ, too. */
sewardj80611e32012-01-20 13:07:24 +000017481 if (have66noF2noF3(pfx) && sz == 2) {
sewardj89378162012-06-24 12:12:20 +000017482 IRTemp sV = newTemp(Ity_V128);
17483 IRTemp dV = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000017484 modrm = getUChar(delta);
sewardj89378162012-06-24 12:12:20 +000017485 UInt rG = gregOfRexRM(pfx,modrm);
17486 assign( dV, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000017487 if (epartIsReg(modrm)) {
sewardj89378162012-06-24 12:12:20 +000017488 UInt rE = eregOfRexRM(pfx,modrm);
17489 assign( sV, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000017490 delta += 1;
sewardj89378162012-06-24 12:12:20 +000017491 DIP("pmuldq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000017492 } else {
17493 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
17494 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
17495 delta += alen;
sewardj89378162012-06-24 12:12:20 +000017496 DIP("pmuldq %s,%s\n", dis_buf, nameXMMReg(rG));
sewardj80611e32012-01-20 13:07:24 +000017497 }
17498
sewardj89378162012-06-24 12:12:20 +000017499 putXMMReg( rG, mkexpr(math_PMULDQ_128( dV, sV )) );
sewardj80611e32012-01-20 13:07:24 +000017500 goto decode_success;
17501 }
17502 break;
17503
17504 case 0x29:
17505 /* 66 0F 38 29 = PCMPEQQ
17506 64x2 equality comparison */
17507 if (have66noF2noF3(pfx) && sz == 2) {
17508 /* FIXME: this needs an alignment check */
17509 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
17510 "pcmpeqq", Iop_CmpEQ64x2, False );
17511 goto decode_success;
17512 }
17513 break;
17514
sewardj92eef382013-03-26 10:27:39 +000017515 case 0x2A:
17516 /* 66 0F 38 2A = MOVNTDQA
17517 "non-temporal" "streaming" load
17518 Handle like MOVDQA but only memory operand is allowed */
17519 if (have66noF2noF3(pfx) && sz == 2) {
17520 modrm = getUChar(delta);
17521 if (!epartIsReg(modrm)) {
17522 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
17523 gen_SEGV_if_not_16_aligned( addr );
17524 putXMMReg( gregOfRexRM(pfx,modrm),
17525 loadLE(Ity_V128, mkexpr(addr)) );
17526 DIP("movntdqa %s,%s\n", dis_buf,
17527 nameXMMReg(gregOfRexRM(pfx,modrm)));
17528 delta += alen;
17529 goto decode_success;
17530 }
17531 }
17532 break;
17533
sewardj80611e32012-01-20 13:07:24 +000017534 case 0x2B:
17535 /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128
17536 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */
17537 if (have66noF2noF3(pfx) && sz == 2) {
17538
17539 modrm = getUChar(delta);
17540
17541 IRTemp argL = newTemp(Ity_V128);
17542 IRTemp argR = newTemp(Ity_V128);
17543
17544 if ( epartIsReg(modrm) ) {
17545 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
17546 delta += 1;
17547 DIP( "packusdw %s,%s\n",
17548 nameXMMReg( eregOfRexRM(pfx, modrm) ),
17549 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17550 } else {
17551 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17552 gen_SEGV_if_not_16_aligned( addr );
17553 assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
17554 delta += alen;
17555 DIP( "packusdw %s,%s\n",
17556 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17557 }
17558
17559 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
17560
17561 putXMMReg( gregOfRexRM(pfx, modrm),
17562 binop( Iop_QNarrowBin32Sto16Ux8,
17563 mkexpr(argL), mkexpr(argR)) );
17564
17565 goto decode_success;
17566 }
17567 break;
17568
17569 case 0x30:
17570 /* 66 0F 38 30 /r = PMOVZXBW xmm1, xmm2/m64
17571 Packed Move with Zero Extend from Byte to Word (XMM) */
17572 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000017573 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
17574 False/*!isAvx*/, True/*xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000017575 goto decode_success;
17576 }
17577 break;
17578
17579 case 0x31:
17580 /* 66 0F 38 31 /r = PMOVZXBD xmm1, xmm2/m32
17581 Packed Move with Zero Extend from Byte to DWord (XMM) */
17582 if (have66noF2noF3(pfx) && sz == 2) {
sewardj4ed05e02012-06-18 15:01:30 +000017583 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
17584 False/*!isAvx*/, True/*xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000017585 goto decode_success;
17586 }
17587 break;
17588
17589 case 0x32:
17590 /* 66 0F 38 32 /r = PMOVZXBQ xmm1, xmm2/m16
17591 Packed Move with Zero Extend from Byte to QWord (XMM) */
17592 if (have66noF2noF3(pfx) && sz == 2) {
sewardj8516a1f2012-06-24 14:26:30 +000017593 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000017594 goto decode_success;
17595 }
17596 break;
17597
17598 case 0x33:
17599 /* 66 0F 38 33 /r = PMOVZXWD xmm1, xmm2/m64
17600 Packed Move with Zero Extend from Word to DWord (XMM) */
17601 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000017602 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
17603 False/*!isAvx*/, True/*xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000017604 goto decode_success;
17605 }
17606 break;
17607
17608 case 0x34:
17609 /* 66 0F 38 34 /r = PMOVZXWQ xmm1, xmm2/m32
17610 Packed Move with Zero Extend from Word to QWord (XMM) */
17611 if (have66noF2noF3(pfx) && sz == 2) {
sewardj8516a1f2012-06-24 14:26:30 +000017612 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000017613 goto decode_success;
17614 }
17615 break;
17616
17617 case 0x35:
17618 /* 66 0F 38 35 /r = PMOVZXDQ xmm1, xmm2/m64
17619 Packed Move with Zero Extend from DWord to QWord (XMM) */
17620 if (have66noF2noF3(pfx) && sz == 2) {
sewardj6fcd43e2012-06-14 08:51:35 +000017621 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
17622 False/*!isAvx*/, True/*xIsZ*/ );
sewardj80611e32012-01-20 13:07:24 +000017623 goto decode_success;
17624 }
17625 break;
17626
17627 case 0x37:
17628 /* 66 0F 38 37 = PCMPGTQ
17629 64x2 comparison (signed, presumably; the Intel docs don't say :-)
17630 */
17631 if (have66noF2noF3(pfx) && sz == 2) {
17632 /* FIXME: this needs an alignment check */
17633 delta = dis_SSEint_E_to_G( vbi, pfx, delta,
17634 "pcmpgtq", Iop_CmpGT64Sx2, False );
17635 goto decode_success;
17636 }
17637 break;
17638
17639 case 0x38:
17640 case 0x3C:
17641 /* 66 0F 38 38 /r = PMINSB xmm1, xmm2/m128 8Sx16 (signed) min
17642 66 0F 38 3C /r = PMAXSB xmm1, xmm2/m128 8Sx16 (signed) max
17643 */
17644 if (have66noF2noF3(pfx) && sz == 2) {
17645 /* FIXME: this needs an alignment check */
17646 Bool isMAX = opc == 0x3C;
17647 delta = dis_SSEint_E_to_G(
17648 vbi, pfx, delta,
17649 isMAX ? "pmaxsb" : "pminsb",
17650 isMAX ? Iop_Max8Sx16 : Iop_Min8Sx16,
17651 False
17652 );
17653 goto decode_success;
17654 }
17655 break;
17656
17657 case 0x39:
17658 case 0x3D:
17659 /* 66 0F 38 39 /r = PMINSD xmm1, xmm2/m128
17660 Minimum of Packed Signed Double Word Integers (XMM)
17661 66 0F 38 3D /r = PMAXSD xmm1, xmm2/m128
17662 Maximum of Packed Signed Double Word Integers (XMM)
17663 */
17664 if (have66noF2noF3(pfx) && sz == 2) {
17665 /* FIXME: this needs an alignment check */
17666 Bool isMAX = opc == 0x3D;
17667 delta = dis_SSEint_E_to_G(
17668 vbi, pfx, delta,
17669 isMAX ? "pmaxsd" : "pminsd",
17670 isMAX ? Iop_Max32Sx4 : Iop_Min32Sx4,
17671 False
17672 );
17673 goto decode_success;
17674 }
17675 break;
17676
17677 case 0x3A:
17678 case 0x3E:
17679 /* 66 0F 38 3A /r = PMINUW xmm1, xmm2/m128
17680 Minimum of Packed Unsigned Word Integers (XMM)
17681 66 0F 38 3E /r = PMAXUW xmm1, xmm2/m128
17682 Maximum of Packed Unsigned Word Integers (XMM)
17683 */
17684 if (have66noF2noF3(pfx) && sz == 2) {
17685 /* FIXME: this needs an alignment check */
17686 Bool isMAX = opc == 0x3E;
17687 delta = dis_SSEint_E_to_G(
17688 vbi, pfx, delta,
17689 isMAX ? "pmaxuw" : "pminuw",
17690 isMAX ? Iop_Max16Ux8 : Iop_Min16Ux8,
17691 False
17692 );
17693 goto decode_success;
17694 }
17695 break;
17696
17697 case 0x3B:
17698 case 0x3F:
17699 /* 66 0F 38 3B /r = PMINUD xmm1, xmm2/m128
17700 Minimum of Packed Unsigned Doubleword Integers (XMM)
17701 66 0F 38 3F /r = PMAXUD xmm1, xmm2/m128
17702 Maximum of Packed Unsigned Doubleword Integers (XMM)
17703 */
17704 if (have66noF2noF3(pfx) && sz == 2) {
17705 /* FIXME: this needs an alignment check */
17706 Bool isMAX = opc == 0x3F;
17707 delta = dis_SSEint_E_to_G(
17708 vbi, pfx, delta,
17709 isMAX ? "pmaxud" : "pminud",
17710 isMAX ? Iop_Max32Ux4 : Iop_Min32Ux4,
17711 False
17712 );
17713 goto decode_success;
17714 }
17715 break;
17716
17717 case 0x40:
17718 /* 66 0F 38 40 /r = PMULLD xmm1, xmm2/m128
17719 32x4 integer multiply from xmm2/m128 to xmm1 */
17720 if (have66noF2noF3(pfx) && sz == 2) {
17721
17722 modrm = getUChar(delta);
17723
17724 IRTemp argL = newTemp(Ity_V128);
17725 IRTemp argR = newTemp(Ity_V128);
17726
17727 if ( epartIsReg(modrm) ) {
17728 assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
17729 delta += 1;
17730 DIP( "pmulld %s,%s\n",
17731 nameXMMReg( eregOfRexRM(pfx, modrm) ),
17732 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17733 } else {
17734 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17735 gen_SEGV_if_not_16_aligned( addr );
17736 assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
17737 delta += alen;
17738 DIP( "pmulld %s,%s\n",
17739 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
17740 }
17741
17742 assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
17743
17744 putXMMReg( gregOfRexRM(pfx, modrm),
17745 binop( Iop_Mul32x4, mkexpr(argL), mkexpr(argR)) );
17746
17747 goto decode_success;
17748 }
17749 break;
17750
sewardj8cb931e2012-02-16 22:02:14 +000017751 case 0x41:
17752 /* 66 0F 38 41 /r = PHMINPOSUW xmm1, xmm2/m128
17753 Packed Horizontal Word Minimum from xmm2/m128 to xmm1 */
17754 if (have66noF2noF3(pfx) && sz == 2) {
sewardje8a7eb72012-06-12 14:59:17 +000017755 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, False/*!isAvx*/ );
sewardj8cb931e2012-02-16 22:02:14 +000017756 goto decode_success;
17757 }
17758 break;
17759
philippeff4d6be2012-02-14 21:34:56 +000017760 case 0xDC:
17761 case 0xDD:
17762 case 0xDE:
17763 case 0xDF:
17764 case 0xDB:
17765 /* 66 0F 38 DC /r = AESENC xmm1, xmm2/m128
17766 DD /r = AESENCLAST xmm1, xmm2/m128
17767 DE /r = AESDEC xmm1, xmm2/m128
17768 DF /r = AESDECLAST xmm1, xmm2/m128
17769
17770 DB /r = AESIMC xmm1, xmm2/m128 */
17771 if (have66noF2noF3(pfx) && sz == 2) {
sewardj1407a362012-06-24 15:11:38 +000017772 delta = dis_AESx( vbi, pfx, delta, False/*!isAvx*/, opc );
philippeff4d6be2012-02-14 21:34:56 +000017773 goto decode_success;
17774 }
17775 break;
17776
sewardj80611e32012-01-20 13:07:24 +000017777 case 0xF0:
17778 case 0xF1:
17779 /* F2 0F 38 F0 /r = CRC32 r/m8, r32 (REX.W ok, 66 not ok)
17780 F2 0F 38 F1 /r = CRC32 r/m{16,32,64}, r32
17781 The decoding on this is a bit unusual.
17782 */
17783 if (haveF2noF3(pfx)
17784 && (opc == 0xF1 || (opc == 0xF0 && !have66(pfx)))) {
17785 modrm = getUChar(delta);
17786
17787 if (opc == 0xF0)
17788 sz = 1;
17789 else
17790 vassert(sz == 2 || sz == 4 || sz == 8);
17791
17792 IRType tyE = szToITy(sz);
17793 IRTemp valE = newTemp(tyE);
17794
17795 if (epartIsReg(modrm)) {
17796 assign(valE, getIRegE(sz, pfx, modrm));
17797 delta += 1;
17798 DIP("crc32b %s,%s\n", nameIRegE(sz, pfx, modrm),
17799 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm));
17800 } else {
17801 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
17802 assign(valE, loadLE(tyE, mkexpr(addr)));
17803 delta += alen;
17804 DIP("crc32b %s,%s\n", dis_buf,
17805 nameIRegG(1==getRexW(pfx) ? 8 : 4, pfx, modrm));
17806 }
17807
17808 /* Somewhat funny getting/putting of the crc32 value, in order
17809 to ensure that it turns into 64-bit gets and puts. However,
17810 mask off the upper 32 bits so as to not get memcheck false
17811 +ves around the helper call. */
17812 IRTemp valG0 = newTemp(Ity_I64);
17813 assign(valG0, binop(Iop_And64, getIRegG(8, pfx, modrm),
17814 mkU64(0xFFFFFFFF)));
17815
florian55085f82012-11-21 00:36:55 +000017816 const HChar* nm = NULL;
sewardj80611e32012-01-20 13:07:24 +000017817 void* fn = NULL;
17818 switch (sz) {
17819 case 1: nm = "amd64g_calc_crc32b";
17820 fn = &amd64g_calc_crc32b; break;
17821 case 2: nm = "amd64g_calc_crc32w";
17822 fn = &amd64g_calc_crc32w; break;
17823 case 4: nm = "amd64g_calc_crc32l";
17824 fn = &amd64g_calc_crc32l; break;
17825 case 8: nm = "amd64g_calc_crc32q";
17826 fn = &amd64g_calc_crc32q; break;
17827 }
17828 vassert(nm && fn);
17829 IRTemp valG1 = newTemp(Ity_I64);
17830 assign(valG1,
17831 mkIRExprCCall(Ity_I64, 0/*regparm*/, nm, fn,
17832 mkIRExprVec_2(mkexpr(valG0),
17833 widenUto64(mkexpr(valE)))));
17834
17835 putIRegG(4, pfx, modrm, unop(Iop_64to32, mkexpr(valG1)));
17836 goto decode_success;
17837 }
17838 break;
17839
17840 default:
17841 break;
17842
17843 }
17844
17845 //decode_failure:
17846 *decode_OK = False;
17847 return deltaIN;
17848
17849 decode_success:
17850 *decode_OK = True;
17851 return delta;
17852}
17853
17854
17855/*------------------------------------------------------------*/
17856/*--- ---*/
17857/*--- Top-level SSE4: dis_ESC_0F3A__SSE4 ---*/
17858/*--- ---*/
17859/*------------------------------------------------------------*/
17860
sewardj82096922012-06-24 14:57:59 +000017861static Long dis_PEXTRW ( VexAbiInfo* vbi, Prefix pfx,
17862 Long delta, Bool isAvx )
17863{
17864 IRTemp addr = IRTemp_INVALID;
17865 IRTemp t0 = IRTemp_INVALID;
17866 IRTemp t1 = IRTemp_INVALID;
17867 IRTemp t2 = IRTemp_INVALID;
17868 IRTemp t3 = IRTemp_INVALID;
17869 UChar modrm = getUChar(delta);
17870 Int alen = 0;
17871 HChar dis_buf[50];
17872 UInt rG = gregOfRexRM(pfx,modrm);
17873 Int imm8_20;
17874 IRTemp xmm_vec = newTemp(Ity_V128);
17875 IRTemp d16 = newTemp(Ity_I16);
florian55085f82012-11-21 00:36:55 +000017876 const HChar* mbV = isAvx ? "v" : "";
sewardj82096922012-06-24 14:57:59 +000017877
17878 vassert(0==getRexW(pfx)); /* ensured by caller */
17879 assign( xmm_vec, getXMMReg(rG) );
17880 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
17881
17882 if ( epartIsReg( modrm ) ) {
17883 imm8_20 = (Int)(getUChar(delta+1) & 7);
17884 } else {
17885 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17886 imm8_20 = (Int)(getUChar(delta+alen) & 7);
17887 }
17888
17889 switch (imm8_20) {
17890 case 0: assign(d16, unop(Iop_32to16, mkexpr(t0))); break;
17891 case 1: assign(d16, unop(Iop_32HIto16, mkexpr(t0))); break;
17892 case 2: assign(d16, unop(Iop_32to16, mkexpr(t1))); break;
17893 case 3: assign(d16, unop(Iop_32HIto16, mkexpr(t1))); break;
17894 case 4: assign(d16, unop(Iop_32to16, mkexpr(t2))); break;
17895 case 5: assign(d16, unop(Iop_32HIto16, mkexpr(t2))); break;
17896 case 6: assign(d16, unop(Iop_32to16, mkexpr(t3))); break;
17897 case 7: assign(d16, unop(Iop_32HIto16, mkexpr(t3))); break;
17898 default: vassert(0);
17899 }
17900
17901 if ( epartIsReg( modrm ) ) {
17902 UInt rE = eregOfRexRM(pfx,modrm);
17903 putIReg32( rE, unop(Iop_16Uto32, mkexpr(d16)) );
17904 delta += 1+1;
17905 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20,
17906 nameXMMReg( rG ), nameIReg32( rE ) );
17907 } else {
17908 storeLE( mkexpr(addr), mkexpr(d16) );
17909 delta += alen+1;
17910 DIP( "%spextrw $%d, %s,%s\n", mbV, imm8_20, nameXMMReg( rG ), dis_buf );
17911 }
17912 return delta;
17913}
17914
17915
sewardjc4530ae2012-05-21 10:18:49 +000017916static Long dis_PEXTRD ( VexAbiInfo* vbi, Prefix pfx,
17917 Long delta, Bool isAvx )
17918{
17919 IRTemp addr = IRTemp_INVALID;
17920 IRTemp t0 = IRTemp_INVALID;
17921 IRTemp t1 = IRTemp_INVALID;
17922 IRTemp t2 = IRTemp_INVALID;
17923 IRTemp t3 = IRTemp_INVALID;
17924 UChar modrm = 0;
17925 Int alen = 0;
17926 HChar dis_buf[50];
17927
17928 Int imm8_10;
17929 IRTemp xmm_vec = newTemp(Ity_V128);
17930 IRTemp src_dword = newTemp(Ity_I32);
florian55085f82012-11-21 00:36:55 +000017931 const HChar* mbV = isAvx ? "v" : "";
sewardjc4530ae2012-05-21 10:18:49 +000017932
17933 vassert(0==getRexW(pfx)); /* ensured by caller */
17934 modrm = getUChar(delta);
17935 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
sewardj4b1cc832012-06-13 11:10:20 +000017936 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
sewardjc4530ae2012-05-21 10:18:49 +000017937
17938 if ( epartIsReg( modrm ) ) {
17939 imm8_10 = (Int)(getUChar(delta+1) & 3);
17940 } else {
17941 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17942 imm8_10 = (Int)(getUChar(delta+alen) & 3);
17943 }
17944
17945 switch ( imm8_10 ) {
17946 case 0: assign( src_dword, mkexpr(t0) ); break;
17947 case 1: assign( src_dword, mkexpr(t1) ); break;
17948 case 2: assign( src_dword, mkexpr(t2) ); break;
17949 case 3: assign( src_dword, mkexpr(t3) ); break;
17950 default: vassert(0);
17951 }
17952
17953 if ( epartIsReg( modrm ) ) {
17954 putIReg32( eregOfRexRM(pfx,modrm), mkexpr(src_dword) );
17955 delta += 1+1;
17956 DIP( "%spextrd $%d, %s,%s\n", mbV, imm8_10,
17957 nameXMMReg( gregOfRexRM(pfx, modrm) ),
17958 nameIReg32( eregOfRexRM(pfx, modrm) ) );
17959 } else {
17960 storeLE( mkexpr(addr), mkexpr(src_dword) );
17961 delta += alen+1;
17962 DIP( "%spextrd $%d, %s,%s\n", mbV,
17963 imm8_10, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
17964 }
17965 return delta;
17966}
17967
17968
sewardj56c30312012-06-12 08:45:39 +000017969static Long dis_PEXTRQ ( VexAbiInfo* vbi, Prefix pfx,
17970 Long delta, Bool isAvx )
17971{
17972 IRTemp addr = IRTemp_INVALID;
17973 UChar modrm = 0;
17974 Int alen = 0;
17975 HChar dis_buf[50];
17976
17977 Int imm8_0;
17978 IRTemp xmm_vec = newTemp(Ity_V128);
17979 IRTemp src_qword = newTemp(Ity_I64);
florian55085f82012-11-21 00:36:55 +000017980 const HChar* mbV = isAvx ? "v" : "";
sewardj56c30312012-06-12 08:45:39 +000017981
17982 vassert(1==getRexW(pfx)); /* ensured by caller */
17983 modrm = getUChar(delta);
17984 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
17985
17986 if ( epartIsReg( modrm ) ) {
17987 imm8_0 = (Int)(getUChar(delta+1) & 1);
17988 } else {
17989 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
17990 imm8_0 = (Int)(getUChar(delta+alen) & 1);
17991 }
17992
17993 switch ( imm8_0 ) {
17994 case 0: assign( src_qword, unop(Iop_V128to64, mkexpr(xmm_vec)) );
17995 break;
17996 case 1: assign( src_qword, unop(Iop_V128HIto64, mkexpr(xmm_vec)) );
17997 break;
17998 default: vassert(0);
17999 }
18000
18001 if ( epartIsReg( modrm ) ) {
18002 putIReg64( eregOfRexRM(pfx,modrm), mkexpr(src_qword) );
18003 delta += 1+1;
18004 DIP( "%spextrq $%d, %s,%s\n", mbV, imm8_0,
18005 nameXMMReg( gregOfRexRM(pfx, modrm) ),
18006 nameIReg64( eregOfRexRM(pfx, modrm) ) );
18007 } else {
18008 storeLE( mkexpr(addr), mkexpr(src_qword) );
18009 delta += alen+1;
18010 DIP( "%spextrq $%d, %s,%s\n", mbV,
18011 imm8_0, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
18012 }
18013 return delta;
18014}
18015
sewardj009230b2013-01-26 11:47:55 +000018016static IRExpr* math_CTZ32(IRExpr *exp)
sewardjf833ed42012-11-20 15:24:24 +000018017{
sewardj009230b2013-01-26 11:47:55 +000018018 /* Iop_Ctz32 isn't implemented by the amd64 back end, so use Iop_Ctz64. */
sewardjf833ed42012-11-20 15:24:24 +000018019 return unop(Iop_64to32, unop(Iop_Ctz64, unop(Iop_32Uto64, exp)));
18020}
18021
sewardjf833ed42012-11-20 15:24:24 +000018022static Long dis_PCMPISTRI_3A ( UChar modrm, UInt regNoL, UInt regNoR,
18023 Long delta, UChar opc, UChar imm,
18024 HChar dis_buf[])
18025{
18026 /* We only handle PCMPISTRI for now */
18027 vassert((opc & 0x03) == 0x03);
18028 /* And only an immediate byte of 0x38 or 0x3A */
18029 vassert((imm & ~0x02) == 0x38);
18030
18031 /* FIXME: Is this correct when RegNoL == 16 ? */
18032 IRTemp argL = newTemp(Ity_V128);
18033 assign(argL, getXMMReg(regNoL));
18034 IRTemp argR = newTemp(Ity_V128);
18035 assign(argR, getXMMReg(regNoR));
18036
18037 IRTemp zmaskL = newTemp(Ity_I32);
sewardj009230b2013-01-26 11:47:55 +000018038 assign(zmaskL, unop(Iop_16Uto32,
18039 unop(Iop_GetMSBs8x16,
18040 binop(Iop_CmpEQ8x16, mkexpr(argL), mkV128(0)))));
sewardjf833ed42012-11-20 15:24:24 +000018041 IRTemp zmaskR = newTemp(Ity_I32);
sewardj009230b2013-01-26 11:47:55 +000018042 assign(zmaskR, unop(Iop_16Uto32,
18043 unop(Iop_GetMSBs8x16,
18044 binop(Iop_CmpEQ8x16, mkexpr(argR), mkV128(0)))));
sewardjf833ed42012-11-20 15:24:24 +000018045
18046 /* We want validL = ~(zmaskL | -zmaskL)
18047
18048 But this formulation kills memcheck's validity tracking when any
18049 bits above the first "1" are invalid. So reformulate as:
18050
18051 validL = (zmaskL ? (1 << ctz(zmaskL)) : 0) - 1
18052 */
18053
sewardj009230b2013-01-26 11:47:55 +000018054 IRExpr *ctzL = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskL)));
sewardjf833ed42012-11-20 15:24:24 +000018055
sewardj009230b2013-01-26 11:47:55 +000018056 /* Generate a bool expression which is zero iff the original is
sewardjf833ed42012-11-20 15:24:24 +000018057 zero. Do this carefully so memcheck can propagate validity bits
18058 correctly.
18059 */
sewardj009230b2013-01-26 11:47:55 +000018060 IRTemp zmaskL_zero = newTemp(Ity_I1);
18061 assign(zmaskL_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskL), mkU32(0)));
sewardjf833ed42012-11-20 15:24:24 +000018062
18063 IRTemp validL = newTemp(Ity_I32);
18064 assign(validL, binop(Iop_Sub32,
florian99dd03e2013-01-29 03:56:06 +000018065 IRExpr_ITE(mkexpr(zmaskL_zero),
18066 binop(Iop_Shl32, mkU32(1), ctzL),
18067 mkU32(0)),
sewardjf833ed42012-11-20 15:24:24 +000018068 mkU32(1)));
18069
18070 /* And similarly for validR. */
sewardj009230b2013-01-26 11:47:55 +000018071 IRExpr *ctzR = unop(Iop_32to8, math_CTZ32(mkexpr(zmaskR)));
18072 IRTemp zmaskR_zero = newTemp(Ity_I1);
18073 assign(zmaskR_zero, binop(Iop_ExpCmpNE32, mkexpr(zmaskR), mkU32(0)));
sewardjf833ed42012-11-20 15:24:24 +000018074 IRTemp validR = newTemp(Ity_I32);
18075 assign(validR, binop(Iop_Sub32,
florian99dd03e2013-01-29 03:56:06 +000018076 IRExpr_ITE(mkexpr(zmaskR_zero),
18077 binop(Iop_Shl32, mkU32(1), ctzR),
18078 mkU32(0)),
sewardjf833ed42012-11-20 15:24:24 +000018079 mkU32(1)));
18080
18081 /* Do the actual comparison. */
sewardj009230b2013-01-26 11:47:55 +000018082 IRExpr *boolResII = unop(Iop_16Uto32,
18083 unop(Iop_GetMSBs8x16,
18084 binop(Iop_CmpEQ8x16, mkexpr(argL),
18085 mkexpr(argR))));
sewardjf833ed42012-11-20 15:24:24 +000018086
18087 /* Compute boolresII & validL & validR (i.e., if both valid, use
18088 comparison result) */
18089 IRExpr *intRes1_a = binop(Iop_And32, boolResII,
18090 binop(Iop_And32,
18091 mkexpr(validL), mkexpr(validR)));
18092
18093 /* Compute ~(validL | validR); i.e., if both invalid, force 1. */
18094 IRExpr *intRes1_b = unop(Iop_Not32, binop(Iop_Or32,
18095 mkexpr(validL), mkexpr(validR)));
18096 /* Otherwise, zero. */
18097 IRExpr *intRes1 = binop(Iop_And32, mkU32(0xFFFF),
18098 binop(Iop_Or32, intRes1_a, intRes1_b));
18099
18100 /* The "0x30" in imm=0x3A means "polarity=3" means XOR validL with
18101 result. */
18102 IRTemp intRes2 = newTemp(Ity_I32);
18103 assign(intRes2, binop(Iop_And32, mkU32(0xFFFF),
18104 binop(Iop_Xor32, intRes1, mkexpr(validL))));
18105
18106 /* If the 0x40 bit were set in imm=0x3A, we would return the index
18107 of the msb. Since it is clear, we return the index of the
18108 lsb. */
sewardj009230b2013-01-26 11:47:55 +000018109 IRExpr *newECX = math_CTZ32(binop(Iop_Or32,
18110 mkexpr(intRes2), mkU32(0x10000)));
sewardjf833ed42012-11-20 15:24:24 +000018111
18112 /* And thats our rcx. */
18113 putIReg32(R_RCX, newECX);
18114
18115 /* Now for the condition codes... */
18116
18117 /* C == 0 iff intRes2 == 0 */
florian99dd03e2013-01-29 03:56:06 +000018118 IRExpr *c_bit = IRExpr_ITE( binop(Iop_ExpCmpNE32, mkexpr(intRes2),
18119 mkU32(0)),
18120 mkU32(1 << AMD64G_CC_SHIFT_C),
18121 mkU32(0));
sewardjf833ed42012-11-20 15:24:24 +000018122 /* Z == 1 iff any in argL is 0 */
florian99dd03e2013-01-29 03:56:06 +000018123 IRExpr *z_bit = IRExpr_ITE( mkexpr(zmaskL_zero),
18124 mkU32(1 << AMD64G_CC_SHIFT_Z),
18125 mkU32(0));
sewardjf833ed42012-11-20 15:24:24 +000018126 /* S == 1 iff any in argR is 0 */
florian99dd03e2013-01-29 03:56:06 +000018127 IRExpr *s_bit = IRExpr_ITE( mkexpr(zmaskR_zero),
18128 mkU32(1 << AMD64G_CC_SHIFT_S),
18129 mkU32(0));
sewardjf833ed42012-11-20 15:24:24 +000018130 /* O == IntRes2[0] */
18131 IRExpr *o_bit = binop(Iop_Shl32, binop(Iop_And32, mkexpr(intRes2),
18132 mkU32(0x01)),
18133 mkU8(AMD64G_CC_SHIFT_O));
18134
18135 /* Put them all together */
18136 IRTemp cc = newTemp(Ity_I64);
18137 assign(cc, widenUto64(binop(Iop_Or32,
18138 binop(Iop_Or32, c_bit, z_bit),
18139 binop(Iop_Or32, s_bit, o_bit))));
18140 stmt(IRStmt_Put(OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY)));
18141 stmt(IRStmt_Put(OFFB_CC_DEP1, mkexpr(cc)));
18142 stmt(IRStmt_Put(OFFB_CC_DEP2, mkU64(0)));
18143 stmt(IRStmt_Put(OFFB_CC_NDEP, mkU64(0)));
18144
18145 return delta;
18146}
sewardj56c30312012-06-12 08:45:39 +000018147
sewardjac75d7b2012-05-23 12:42:39 +000018148/* This can fail, in which case it returns the original (unchanged)
18149 delta. */
18150static Long dis_PCMPxSTRx ( VexAbiInfo* vbi, Prefix pfx,
18151 Long delta, Bool isAvx, UChar opc )
18152{
18153 Long delta0 = delta;
18154 UInt isISTRx = opc & 2;
18155 UInt isxSTRM = (opc & 1) ^ 1;
18156 UInt regNoL = 0;
18157 UInt regNoR = 0;
18158 UChar imm = 0;
18159 IRTemp addr = IRTemp_INVALID;
18160 Int alen = 0;
18161 HChar dis_buf[50];
18162
18163 /* This is a nasty kludge. We need to pass 2 x V128 to the helper
18164 (which is clean). Since we can't do that, use a dirty helper to
18165 compute the results directly from the XMM regs in the guest
18166 state. That means for the memory case, we need to move the left
18167 operand into a pseudo-register (XMM16, let's call it). */
18168 UChar modrm = getUChar(delta);
18169 if (epartIsReg(modrm)) {
18170 regNoL = eregOfRexRM(pfx, modrm);
18171 regNoR = gregOfRexRM(pfx, modrm);
18172 imm = getUChar(delta+1);
18173 delta += 1+1;
18174 } else {
18175 regNoL = 16; /* use XMM16 as an intermediary */
18176 regNoR = gregOfRexRM(pfx, modrm);
sewardjd343e622012-05-24 06:17:14 +000018177 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardjac75d7b2012-05-23 12:42:39 +000018178 /* No alignment check; I guess that makes sense, given that
18179 these insns are for dealing with C style strings. */
18180 stmt( IRStmt_Put( OFFB_YMM16, loadLE(Ity_V128, mkexpr(addr)) ));
18181 imm = getUChar(delta+alen);
18182 delta += alen+1;
18183 }
18184
sewardj009230b2013-01-26 11:47:55 +000018185 /* Print the insn here, since dis_PCMPISTRI_3A doesn't do so
18186 itself. */
18187 if (regNoL == 16) {
18188 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18189 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
18190 (UInt)imm, dis_buf, nameXMMReg(regNoR));
18191 } else {
18192 DIP("%spcmp%cstr%c $%x,%s,%s\n",
18193 isAvx ? "v" : "", isISTRx ? 'i' : 'e', isxSTRM ? 'm' : 'i',
18194 (UInt)imm, nameXMMReg(regNoL), nameXMMReg(regNoR));
18195 }
18196
18197 /* Handle special case(s). */
sewardjf833ed42012-11-20 15:24:24 +000018198 if (imm == 0x3A && isISTRx && !isxSTRM) {
18199 return dis_PCMPISTRI_3A ( modrm, regNoL, regNoR, delta,
18200 opc, imm, dis_buf);
18201 }
18202
sewardjac75d7b2012-05-23 12:42:39 +000018203 /* Now we know the XMM reg numbers for the operands, and the
18204 immediate byte. Is it one we can actually handle? Throw out any
18205 cases for which the helper function has not been verified. */
18206 switch (imm) {
sewardjeead3192014-05-21 14:42:04 +000018207 case 0x00: case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x0E:
18208 case 0x12: case 0x14: case 0x1A:
18209 case 0x30: case 0x34: case 0x38: case 0x3A:
18210 case 0x40: case 0x44: case 0x46: case 0x4A:
sewardjac75d7b2012-05-23 12:42:39 +000018211 break;
sewardjeead3192014-05-21 14:42:04 +000018212 // the 16-bit character versions of the above
18213 case 0x01: case 0x03: case 0x09: case 0x0B: case 0x0D:
18214 case 0x13: case 0x1B:
18215 case 0x39: case 0x3B:
18216 case 0x45: case 0x4B:
sewardjac75d7b2012-05-23 12:42:39 +000018217 break;
18218 default:
18219 return delta0; /*FAIL*/
18220 }
18221
18222 /* Who ya gonna call? Presumably not Ghostbusters. */
18223 void* fn = &amd64g_dirtyhelper_PCMPxSTRx;
florian55085f82012-11-21 00:36:55 +000018224 const HChar* nm = "amd64g_dirtyhelper_PCMPxSTRx";
sewardjac75d7b2012-05-23 12:42:39 +000018225
18226 /* Round up the arguments. Note that this is a kludge -- the use
18227 of mkU64 rather than mkIRExpr_HWord implies the assumption that
18228 the host's word size is 64-bit. */
18229 UInt gstOffL = regNoL == 16 ? OFFB_YMM16 : ymmGuestRegOffset(regNoL);
18230 UInt gstOffR = ymmGuestRegOffset(regNoR);
18231
18232 IRExpr* opc4_and_imm = mkU64((opc << 8) | (imm & 0xFF));
18233 IRExpr* gstOffLe = mkU64(gstOffL);
18234 IRExpr* gstOffRe = mkU64(gstOffR);
18235 IRExpr* edxIN = isISTRx ? mkU64(0) : getIRegRDX(8);
18236 IRExpr* eaxIN = isISTRx ? mkU64(0) : getIRegRAX(8);
18237 IRExpr** args
florian90419562013-08-15 20:54:52 +000018238 = mkIRExprVec_6( IRExpr_BBPTR(),
sewardj74142b82013-08-08 10:28:59 +000018239 opc4_and_imm, gstOffLe, gstOffRe, edxIN, eaxIN );
sewardjac75d7b2012-05-23 12:42:39 +000018240
18241 IRTemp resT = newTemp(Ity_I64);
18242 IRDirty* d = unsafeIRDirty_1_N( resT, 0/*regparms*/, nm, fn, args );
18243 /* It's not really a dirty call, but we can't use the clean helper
18244 mechanism here for the very lame reason that we can't pass 2 x
sewardj74142b82013-08-08 10:28:59 +000018245 V128s by value to a helper. Hence this roundabout scheme. */
sewardjac75d7b2012-05-23 12:42:39 +000018246 d->nFxState = 2;
sewardjc9069f22012-06-01 16:09:50 +000018247 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardjac75d7b2012-05-23 12:42:39 +000018248 d->fxState[0].fx = Ifx_Read;
18249 d->fxState[0].offset = gstOffL;
18250 d->fxState[0].size = sizeof(U128);
18251 d->fxState[1].fx = Ifx_Read;
18252 d->fxState[1].offset = gstOffR;
18253 d->fxState[1].size = sizeof(U128);
18254 if (isxSTRM) {
18255 /* Declare that the helper writes XMM0. */
18256 d->nFxState = 3;
18257 d->fxState[2].fx = Ifx_Write;
18258 d->fxState[2].offset = ymmGuestRegOffset(0);
18259 d->fxState[2].size = sizeof(U128);
18260 }
18261
18262 stmt( IRStmt_Dirty(d) );
18263
18264 /* Now resT[15:0] holds the new OSZACP values, so the condition
18265 codes must be updated. And for a xSTRI case, resT[31:16] holds
18266 the new ECX value, so stash that too. */
18267 if (!isxSTRM) {
18268 putIReg64(R_RCX, binop(Iop_And64,
18269 binop(Iop_Shr64, mkexpr(resT), mkU8(16)),
18270 mkU64(0xFFFF)));
18271 }
18272
18273 /* Zap the upper half of the dest reg as per AVX conventions. */
18274 if (isxSTRM && isAvx)
18275 putYMMRegLane128(/*YMM*/0, 1, mkV128(0));
18276
18277 stmt( IRStmt_Put(
18278 OFFB_CC_DEP1,
18279 binop(Iop_And64, mkexpr(resT), mkU64(0xFFFF))
18280 ));
18281 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
18282 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
18283 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
18284
sewardjac75d7b2012-05-23 12:42:39 +000018285 return delta;
18286}
18287
18288
sewardj21459cb2012-06-18 14:05:52 +000018289static IRTemp math_PINSRB_128 ( IRTemp v128, IRTemp u8, UInt imm8 )
18290{
sewardj4ed05e02012-06-18 15:01:30 +000018291 vassert(imm8 >= 0 && imm8 <= 15);
18292
sewardj21459cb2012-06-18 14:05:52 +000018293 // Create a V128 value which has the selected byte in the
18294 // specified lane, and zeroes everywhere else.
sewardj4ed05e02012-06-18 15:01:30 +000018295 IRTemp tmp128 = newTemp(Ity_V128);
sewardj21459cb2012-06-18 14:05:52 +000018296 IRTemp halfshift = newTemp(Ity_I64);
18297 assign(halfshift, binop(Iop_Shl64,
sewardj4ed05e02012-06-18 15:01:30 +000018298 unop(Iop_8Uto64, mkexpr(u8)),
18299 mkU8(8 * (imm8 & 7))));
sewardj21459cb2012-06-18 14:05:52 +000018300 if (imm8 < 8) {
18301 assign(tmp128, binop(Iop_64HLtoV128, mkU64(0), mkexpr(halfshift)));
18302 } else {
18303 assign(tmp128, binop(Iop_64HLtoV128, mkexpr(halfshift), mkU64(0)));
18304 }
18305
18306 UShort mask = ~(1 << imm8);
sewardj4ed05e02012-06-18 15:01:30 +000018307 IRTemp res = newTemp(Ity_V128);
18308 assign( res, binop(Iop_OrV128,
18309 mkexpr(tmp128),
18310 binop(Iop_AndV128, mkexpr(v128), mkV128(mask))) );
sewardj21459cb2012-06-18 14:05:52 +000018311 return res;
18312}
18313
18314
sewardj6faf7cc2012-05-25 15:53:01 +000018315static IRTemp math_PINSRD_128 ( IRTemp v128, IRTemp u32, UInt imm8 )
18316{
18317 IRTemp z32 = newTemp(Ity_I32);
18318 assign(z32, mkU32(0));
18319
18320 /* Surround u32 with zeroes as per imm, giving us something we can
18321 OR into a suitably masked-out v128.*/
18322 IRTemp withZs = newTemp(Ity_V128);
18323 UShort mask = 0;
18324 switch (imm8) {
18325 case 3: mask = 0x0FFF;
sewardj4b1cc832012-06-13 11:10:20 +000018326 assign(withZs, mkV128from32s(u32, z32, z32, z32));
sewardj6faf7cc2012-05-25 15:53:01 +000018327 break;
18328 case 2: mask = 0xF0FF;
sewardj4b1cc832012-06-13 11:10:20 +000018329 assign(withZs, mkV128from32s(z32, u32, z32, z32));
sewardj6faf7cc2012-05-25 15:53:01 +000018330 break;
18331 case 1: mask = 0xFF0F;
sewardj4b1cc832012-06-13 11:10:20 +000018332 assign(withZs, mkV128from32s(z32, z32, u32, z32));
sewardj6faf7cc2012-05-25 15:53:01 +000018333 break;
18334 case 0: mask = 0xFFF0;
sewardj4b1cc832012-06-13 11:10:20 +000018335 assign(withZs, mkV128from32s(z32, z32, z32, u32));
sewardj6faf7cc2012-05-25 15:53:01 +000018336 break;
18337 default: vassert(0);
18338 }
18339
18340 IRTemp res = newTemp(Ity_V128);
18341 assign(res, binop( Iop_OrV128,
18342 mkexpr(withZs),
18343 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) );
18344 return res;
18345}
18346
18347
sewardj98d02cc2012-06-02 11:55:25 +000018348static IRTemp math_PINSRQ_128 ( IRTemp v128, IRTemp u64, UInt imm8 )
18349{
18350 /* Surround u64 with zeroes as per imm, giving us something we can
18351 OR into a suitably masked-out v128.*/
18352 IRTemp withZs = newTemp(Ity_V128);
18353 UShort mask = 0;
18354 if (imm8 == 0) {
18355 mask = 0xFF00;
18356 assign(withZs, binop(Iop_64HLtoV128, mkU64(0), mkexpr(u64)));
18357 } else {
18358 vassert(imm8 == 1);
18359 mask = 0x00FF;
18360 assign( withZs, binop(Iop_64HLtoV128, mkexpr(u64), mkU64(0)));
18361 }
18362
18363 IRTemp res = newTemp(Ity_V128);
18364 assign( res, binop( Iop_OrV128,
18365 mkexpr(withZs),
18366 binop( Iop_AndV128, mkexpr(v128), mkV128(mask) ) ) );
18367 return res;
18368}
18369
18370
sewardjcfca8cd2012-05-27 08:25:42 +000018371static IRTemp math_INSERTPS ( IRTemp dstV, IRTemp toInsertD, UInt imm8 )
18372{
18373 const IRTemp inval = IRTemp_INVALID;
18374 IRTemp dstDs[4] = { inval, inval, inval, inval };
sewardj4b1cc832012-06-13 11:10:20 +000018375 breakupV128to32s( dstV, &dstDs[3], &dstDs[2], &dstDs[1], &dstDs[0] );
sewardjcfca8cd2012-05-27 08:25:42 +000018376
18377 vassert(imm8 <= 255);
18378 dstDs[(imm8 >> 4) & 3] = toInsertD; /* "imm8_count_d" */
18379
18380 UInt imm8_zmask = (imm8 & 15);
18381 IRTemp zero_32 = newTemp(Ity_I32);
18382 assign( zero_32, mkU32(0) );
18383 IRTemp resV = newTemp(Ity_V128);
sewardj4b1cc832012-06-13 11:10:20 +000018384 assign( resV, mkV128from32s(
sewardjcfca8cd2012-05-27 08:25:42 +000018385 ((imm8_zmask & 8) == 8) ? zero_32 : dstDs[3],
18386 ((imm8_zmask & 4) == 4) ? zero_32 : dstDs[2],
18387 ((imm8_zmask & 2) == 2) ? zero_32 : dstDs[1],
18388 ((imm8_zmask & 1) == 1) ? zero_32 : dstDs[0]) );
18389 return resV;
18390}
18391
18392
sewardje8a7eb72012-06-12 14:59:17 +000018393static Long dis_PEXTRB_128_GtoE ( VexAbiInfo* vbi, Prefix pfx,
18394 Long delta, Bool isAvx )
18395{
18396 IRTemp addr = IRTemp_INVALID;
18397 Int alen = 0;
18398 HChar dis_buf[50];
18399 IRTemp xmm_vec = newTemp(Ity_V128);
18400 IRTemp sel_lane = newTemp(Ity_I32);
18401 IRTemp shr_lane = newTemp(Ity_I32);
florian55085f82012-11-21 00:36:55 +000018402 const HChar* mbV = isAvx ? "v" : "";
sewardje8a7eb72012-06-12 14:59:17 +000018403 UChar modrm = getUChar(delta);
18404 IRTemp t3, t2, t1, t0;
18405 Int imm8;
18406 assign( xmm_vec, getXMMReg( gregOfRexRM(pfx,modrm) ) );
18407 t3 = t2 = t1 = t0 = IRTemp_INVALID;
sewardj4b1cc832012-06-13 11:10:20 +000018408 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
sewardje8a7eb72012-06-12 14:59:17 +000018409
18410 if ( epartIsReg( modrm ) ) {
18411 imm8 = (Int)getUChar(delta+1);
18412 } else {
18413 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18414 imm8 = (Int)getUChar(delta+alen);
18415 }
18416 switch ( (imm8 >> 2) & 3 ) {
18417 case 0: assign( sel_lane, mkexpr(t0) ); break;
18418 case 1: assign( sel_lane, mkexpr(t1) ); break;
18419 case 2: assign( sel_lane, mkexpr(t2) ); break;
18420 case 3: assign( sel_lane, mkexpr(t3) ); break;
18421 default: vassert(0);
18422 }
18423 assign( shr_lane,
18424 binop( Iop_Shr32, mkexpr(sel_lane), mkU8(((imm8 & 3)*8)) ) );
18425
18426 if ( epartIsReg( modrm ) ) {
18427 putIReg64( eregOfRexRM(pfx,modrm),
18428 unop( Iop_32Uto64,
18429 binop(Iop_And32, mkexpr(shr_lane), mkU32(255)) ) );
18430 delta += 1+1;
18431 DIP( "%spextrb $%d, %s,%s\n", mbV, imm8,
18432 nameXMMReg( gregOfRexRM(pfx, modrm) ),
18433 nameIReg64( eregOfRexRM(pfx, modrm) ) );
18434 } else {
18435 storeLE( mkexpr(addr), unop(Iop_32to8, mkexpr(shr_lane) ) );
18436 delta += alen+1;
18437 DIP( "%spextrb $%d,%s,%s\n", mbV,
18438 imm8, nameXMMReg( gregOfRexRM(pfx, modrm) ), dis_buf );
18439 }
18440
18441 return delta;
18442}
18443
18444
sewardj4ed05e02012-06-18 15:01:30 +000018445static IRTemp math_DPPD_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 )
18446{
18447 vassert(imm8 < 256);
18448 UShort imm8_perms[4] = { 0x0000, 0x00FF, 0xFF00, 0xFFFF };
18449 IRTemp and_vec = newTemp(Ity_V128);
18450 IRTemp sum_vec = newTemp(Ity_V128);
sewardj9571dc02014-01-26 18:34:23 +000018451 IRTemp rm = newTemp(Ity_I32);
18452 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
sewardj4ed05e02012-06-18 15:01:30 +000018453 assign( and_vec, binop( Iop_AndV128,
sewardj9571dc02014-01-26 18:34:23 +000018454 triop( Iop_Mul64Fx2,
18455 mkexpr(rm),
sewardj4ed05e02012-06-18 15:01:30 +000018456 mkexpr(dst_vec), mkexpr(src_vec) ),
18457 mkV128( imm8_perms[ ((imm8 >> 4) & 3) ] ) ) );
18458
18459 assign( sum_vec, binop( Iop_Add64F0x2,
18460 binop( Iop_InterleaveHI64x2,
18461 mkexpr(and_vec), mkexpr(and_vec) ),
18462 binop( Iop_InterleaveLO64x2,
18463 mkexpr(and_vec), mkexpr(and_vec) ) ) );
18464 IRTemp res = newTemp(Ity_V128);
18465 assign(res, binop( Iop_AndV128,
18466 binop( Iop_InterleaveLO64x2,
18467 mkexpr(sum_vec), mkexpr(sum_vec) ),
18468 mkV128( imm8_perms[ (imm8 & 3) ] ) ) );
18469 return res;
18470}
18471
18472
sewardjadf357c2012-06-24 13:44:17 +000018473static IRTemp math_DPPS_128 ( IRTemp src_vec, IRTemp dst_vec, UInt imm8 )
18474{
18475 vassert(imm8 < 256);
18476 IRTemp tmp_prod_vec = newTemp(Ity_V128);
18477 IRTemp prod_vec = newTemp(Ity_V128);
18478 IRTemp sum_vec = newTemp(Ity_V128);
sewardj9571dc02014-01-26 18:34:23 +000018479 IRTemp rm = newTemp(Ity_I32);
sewardjadf357c2012-06-24 13:44:17 +000018480 IRTemp v3, v2, v1, v0;
18481 v3 = v2 = v1 = v0 = IRTemp_INVALID;
18482 UShort imm8_perms[16] = { 0x0000, 0x000F, 0x00F0, 0x00FF, 0x0F00,
18483 0x0F0F, 0x0FF0, 0x0FFF, 0xF000, 0xF00F,
18484 0xF0F0, 0xF0FF, 0xFF00, 0xFF0F, 0xFFF0,
18485 0xFFFF };
18486
sewardj9571dc02014-01-26 18:34:23 +000018487 assign( rm, get_FAKE_roundingmode() ); /* XXXROUNDINGFIXME */
sewardjadf357c2012-06-24 13:44:17 +000018488 assign( tmp_prod_vec,
18489 binop( Iop_AndV128,
sewardj9571dc02014-01-26 18:34:23 +000018490 triop( Iop_Mul32Fx4,
18491 mkexpr(rm), mkexpr(dst_vec), mkexpr(src_vec) ),
sewardjadf357c2012-06-24 13:44:17 +000018492 mkV128( imm8_perms[((imm8 >> 4)& 15)] ) ) );
18493 breakupV128to32s( tmp_prod_vec, &v3, &v2, &v1, &v0 );
18494 assign( prod_vec, mkV128from32s( v3, v1, v2, v0 ) );
18495
sewardj9571dc02014-01-26 18:34:23 +000018496 assign( sum_vec, triop( Iop_Add32Fx4,
18497 mkexpr(rm),
sewardjadf357c2012-06-24 13:44:17 +000018498 binop( Iop_InterleaveHI32x4,
18499 mkexpr(prod_vec), mkexpr(prod_vec) ),
18500 binop( Iop_InterleaveLO32x4,
18501 mkexpr(prod_vec), mkexpr(prod_vec) ) ) );
18502
18503 IRTemp res = newTemp(Ity_V128);
18504 assign( res, binop( Iop_AndV128,
sewardj9571dc02014-01-26 18:34:23 +000018505 triop( Iop_Add32Fx4,
18506 mkexpr(rm),
sewardjadf357c2012-06-24 13:44:17 +000018507 binop( Iop_InterleaveHI32x4,
18508 mkexpr(sum_vec), mkexpr(sum_vec) ),
18509 binop( Iop_InterleaveLO32x4,
18510 mkexpr(sum_vec), mkexpr(sum_vec) ) ),
18511 mkV128( imm8_perms[ (imm8 & 15) ] ) ) );
18512 return res;
18513}
18514
18515
sewardj8516a1f2012-06-24 14:26:30 +000018516static IRTemp math_MPSADBW_128 ( IRTemp dst_vec, IRTemp src_vec, UInt imm8 )
18517{
18518 /* Mask out bits of the operands we don't need. This isn't
18519 strictly necessary, but it does ensure Memcheck doesn't
18520 give us any false uninitialised value errors as a
18521 result. */
18522 UShort src_mask[4] = { 0x000F, 0x00F0, 0x0F00, 0xF000 };
18523 UShort dst_mask[2] = { 0x07FF, 0x7FF0 };
18524
18525 IRTemp src_maskV = newTemp(Ity_V128);
18526 IRTemp dst_maskV = newTemp(Ity_V128);
18527 assign(src_maskV, mkV128( src_mask[ imm8 & 3 ] ));
18528 assign(dst_maskV, mkV128( dst_mask[ (imm8 >> 2) & 1 ] ));
18529
18530 IRTemp src_masked = newTemp(Ity_V128);
18531 IRTemp dst_masked = newTemp(Ity_V128);
18532 assign(src_masked, binop(Iop_AndV128, mkexpr(src_vec), mkexpr(src_maskV)));
18533 assign(dst_masked, binop(Iop_AndV128, mkexpr(dst_vec), mkexpr(dst_maskV)));
18534
18535 /* Generate 4 64 bit values that we can hand to a clean helper */
18536 IRTemp sHi = newTemp(Ity_I64);
18537 IRTemp sLo = newTemp(Ity_I64);
18538 assign( sHi, unop(Iop_V128HIto64, mkexpr(src_masked)) );
18539 assign( sLo, unop(Iop_V128to64, mkexpr(src_masked)) );
18540
18541 IRTemp dHi = newTemp(Ity_I64);
18542 IRTemp dLo = newTemp(Ity_I64);
18543 assign( dHi, unop(Iop_V128HIto64, mkexpr(dst_masked)) );
18544 assign( dLo, unop(Iop_V128to64, mkexpr(dst_masked)) );
18545
18546 /* Compute halves of the result separately */
18547 IRTemp resHi = newTemp(Ity_I64);
18548 IRTemp resLo = newTemp(Ity_I64);
18549
18550 IRExpr** argsHi
18551 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo),
18552 mkU64( 0x80 | (imm8 & 7) ));
18553 IRExpr** argsLo
18554 = mkIRExprVec_5( mkexpr(sHi), mkexpr(sLo), mkexpr(dHi), mkexpr(dLo),
18555 mkU64( 0x00 | (imm8 & 7) ));
18556
18557 assign(resHi, mkIRExprCCall( Ity_I64, 0/*regparm*/,
18558 "amd64g_calc_mpsadbw",
18559 &amd64g_calc_mpsadbw, argsHi ));
18560 assign(resLo, mkIRExprCCall( Ity_I64, 0/*regparm*/,
18561 "amd64g_calc_mpsadbw",
18562 &amd64g_calc_mpsadbw, argsLo ));
18563
18564 IRTemp res = newTemp(Ity_V128);
18565 assign(res, binop(Iop_64HLtoV128, mkexpr(resHi), mkexpr(resLo)));
18566 return res;
18567}
18568
sewardjadf357c2012-06-24 13:44:17 +000018569static Long dis_EXTRACTPS ( VexAbiInfo* vbi, Prefix pfx,
18570 Long delta, Bool isAvx )
18571{
18572 IRTemp addr = IRTemp_INVALID;
18573 Int alen = 0;
18574 HChar dis_buf[50];
18575 UChar modrm = getUChar(delta);
18576 Int imm8_10;
18577 IRTemp xmm_vec = newTemp(Ity_V128);
18578 IRTemp src_dword = newTemp(Ity_I32);
18579 UInt rG = gregOfRexRM(pfx,modrm);
18580 IRTemp t3, t2, t1, t0;
18581 t3 = t2 = t1 = t0 = IRTemp_INVALID;
18582
18583 assign( xmm_vec, getXMMReg( rG ) );
18584 breakupV128to32s( xmm_vec, &t3, &t2, &t1, &t0 );
18585
18586 if ( epartIsReg( modrm ) ) {
18587 imm8_10 = (Int)(getUChar(delta+1) & 3);
18588 } else {
18589 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
18590 imm8_10 = (Int)(getUChar(delta+alen) & 3);
18591 }
18592
18593 switch ( imm8_10 ) {
18594 case 0: assign( src_dword, mkexpr(t0) ); break;
18595 case 1: assign( src_dword, mkexpr(t1) ); break;
18596 case 2: assign( src_dword, mkexpr(t2) ); break;
18597 case 3: assign( src_dword, mkexpr(t3) ); break;
18598 default: vassert(0);
18599 }
18600
18601 if ( epartIsReg( modrm ) ) {
18602 UInt rE = eregOfRexRM(pfx,modrm);
18603 putIReg32( rE, mkexpr(src_dword) );
18604 delta += 1+1;
18605 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10,
18606 nameXMMReg( rG ), nameIReg32( rE ) );
18607 } else {
18608 storeLE( mkexpr(addr), mkexpr(src_dword) );
18609 delta += alen+1;
18610 DIP( "%sextractps $%d, %s,%s\n", isAvx ? "v" : "", imm8_10,
18611 nameXMMReg( rG ), dis_buf );
18612 }
18613
18614 return delta;
18615}
18616
18617
sewardj1407a362012-06-24 15:11:38 +000018618static IRTemp math_PCLMULQDQ( IRTemp dV, IRTemp sV, UInt imm8 )
18619{
18620 IRTemp t0 = newTemp(Ity_I64);
18621 IRTemp t1 = newTemp(Ity_I64);
18622 assign(t0, unop((imm8&1)? Iop_V128HIto64 : Iop_V128to64,
18623 mkexpr(dV)));
18624 assign(t1, unop((imm8&16) ? Iop_V128HIto64 : Iop_V128to64,
18625 mkexpr(sV)));
18626
18627 IRTemp t2 = newTemp(Ity_I64);
18628 IRTemp t3 = newTemp(Ity_I64);
18629
18630 IRExpr** args;
18631
18632 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(0));
18633 assign(t2, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
18634 &amd64g_calculate_pclmul, args));
18635 args = mkIRExprVec_3(mkexpr(t0), mkexpr(t1), mkU64(1));
18636 assign(t3, mkIRExprCCall(Ity_I64,0, "amd64g_calculate_pclmul",
18637 &amd64g_calculate_pclmul, args));
18638
18639 IRTemp res = newTemp(Ity_V128);
18640 assign(res, binop(Iop_64HLtoV128, mkexpr(t3), mkexpr(t2)));
18641 return res;
18642}
18643
18644
sewardj80611e32012-01-20 13:07:24 +000018645__attribute__((noinline))
18646static
18647Long dis_ESC_0F3A__SSE4 ( Bool* decode_OK,
18648 VexAbiInfo* vbi,
18649 Prefix pfx, Int sz, Long deltaIN )
18650{
18651 IRTemp addr = IRTemp_INVALID;
sewardj80611e32012-01-20 13:07:24 +000018652 UChar modrm = 0;
18653 Int alen = 0;
18654 HChar dis_buf[50];
18655
18656 *decode_OK = False;
18657
18658 Long delta = deltaIN;
18659 UChar opc = getUChar(delta);
18660 delta++;
18661 switch (opc) {
18662
18663 case 0x08:
18664 /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 */
18665 if (have66noF2noF3(pfx) && sz == 2) {
18666
18667 IRTemp src0 = newTemp(Ity_F32);
18668 IRTemp src1 = newTemp(Ity_F32);
18669 IRTemp src2 = newTemp(Ity_F32);
18670 IRTemp src3 = newTemp(Ity_F32);
18671 IRTemp res0 = newTemp(Ity_F32);
18672 IRTemp res1 = newTemp(Ity_F32);
18673 IRTemp res2 = newTemp(Ity_F32);
18674 IRTemp res3 = newTemp(Ity_F32);
18675 IRTemp rm = newTemp(Ity_I32);
18676 Int imm = 0;
18677
18678 modrm = getUChar(delta);
18679
18680 if (epartIsReg(modrm)) {
18681 assign( src0,
18682 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
18683 assign( src1,
18684 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 1 ) );
18685 assign( src2,
18686 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 2 ) );
18687 assign( src3,
18688 getXMMRegLane32F( eregOfRexRM(pfx, modrm), 3 ) );
18689 imm = getUChar(delta+1);
18690 if (imm & ~15) goto decode_failure;
18691 delta += 1+1;
18692 DIP( "roundps $%d,%s,%s\n",
18693 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
18694 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18695 } else {
sewardjd343e622012-05-24 06:17:14 +000018696 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj80611e32012-01-20 13:07:24 +000018697 gen_SEGV_if_not_16_aligned(addr);
18698 assign( src0, loadLE(Ity_F32,
18699 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
18700 assign( src1, loadLE(Ity_F32,
18701 binop(Iop_Add64, mkexpr(addr), mkU64(4) )));
18702 assign( src2, loadLE(Ity_F32,
18703 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
18704 assign( src3, loadLE(Ity_F32,
18705 binop(Iop_Add64, mkexpr(addr), mkU64(12) )));
18706 imm = getUChar(delta+alen);
18707 if (imm & ~15) goto decode_failure;
18708 delta += alen+1;
18709 DIP( "roundps $%d,%s,%s\n",
18710 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18711 }
18712
18713 /* (imm & 3) contains an Intel-encoded rounding mode. Because
18714 that encoding is the same as the encoding for IRRoundingMode,
18715 we can use that value directly in the IR as a rounding
18716 mode. */
18717 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
18718
18719 assign(res0, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src0)) );
18720 assign(res1, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src1)) );
18721 assign(res2, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src2)) );
18722 assign(res3, binop(Iop_RoundF32toInt, mkexpr(rm), mkexpr(src3)) );
18723
18724 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
18725 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
18726 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 2, mkexpr(res2) );
18727 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 3, mkexpr(res3) );
18728
18729 goto decode_success;
18730 }
18731 break;
18732
18733 case 0x09:
18734 /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 */
18735 if (have66noF2noF3(pfx) && sz == 2) {
18736
18737 IRTemp src0 = newTemp(Ity_F64);
18738 IRTemp src1 = newTemp(Ity_F64);
18739 IRTemp res0 = newTemp(Ity_F64);
18740 IRTemp res1 = newTemp(Ity_F64);
18741 IRTemp rm = newTemp(Ity_I32);
18742 Int imm = 0;
18743
18744 modrm = getUChar(delta);
18745
18746 if (epartIsReg(modrm)) {
18747 assign( src0,
18748 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) );
18749 assign( src1,
18750 getXMMRegLane64F( eregOfRexRM(pfx, modrm), 1 ) );
18751 imm = getUChar(delta+1);
18752 if (imm & ~15) goto decode_failure;
18753 delta += 1+1;
18754 DIP( "roundpd $%d,%s,%s\n",
18755 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
18756 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18757 } else {
sewardjd343e622012-05-24 06:17:14 +000018758 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj80611e32012-01-20 13:07:24 +000018759 gen_SEGV_if_not_16_aligned(addr);
18760 assign( src0, loadLE(Ity_F64,
18761 binop(Iop_Add64, mkexpr(addr), mkU64(0) )));
18762 assign( src1, loadLE(Ity_F64,
18763 binop(Iop_Add64, mkexpr(addr), mkU64(8) )));
18764 imm = getUChar(delta+alen);
18765 if (imm & ~15) goto decode_failure;
18766 delta += alen+1;
18767 DIP( "roundpd $%d,%s,%s\n",
18768 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18769 }
18770
18771 /* (imm & 3) contains an Intel-encoded rounding mode. Because
18772 that encoding is the same as the encoding for IRRoundingMode,
18773 we can use that value directly in the IR as a rounding
18774 mode. */
18775 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
18776
18777 assign(res0, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src0)) );
18778 assign(res1, binop(Iop_RoundF64toInt, mkexpr(rm), mkexpr(src1)) );
18779
18780 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) );
18781 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) );
18782
18783 goto decode_success;
18784 }
18785 break;
18786
18787 case 0x0A:
18788 case 0x0B:
18789 /* 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
18790 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
18791 */
18792 if (have66noF2noF3(pfx) && sz == 2) {
18793
18794 Bool isD = opc == 0x0B;
18795 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
18796 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
18797 Int imm = 0;
18798
18799 modrm = getUChar(delta);
18800
18801 if (epartIsReg(modrm)) {
18802 assign( src,
18803 isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 )
18804 : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
18805 imm = getUChar(delta+1);
18806 if (imm & ~15) goto decode_failure;
18807 delta += 1+1;
18808 DIP( "rounds%c $%d,%s,%s\n",
18809 isD ? 'd' : 's',
18810 imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
18811 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18812 } else {
sewardjd343e622012-05-24 06:17:14 +000018813 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj80611e32012-01-20 13:07:24 +000018814 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
18815 imm = getUChar(delta+alen);
18816 if (imm & ~15) goto decode_failure;
18817 delta += alen+1;
18818 DIP( "rounds%c $%d,%s,%s\n",
18819 isD ? 'd' : 's',
18820 imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18821 }
18822
18823 /* (imm & 3) contains an Intel-encoded rounding mode. Because
18824 that encoding is the same as the encoding for IRRoundingMode,
18825 we can use that value directly in the IR as a rounding
18826 mode. */
18827 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
18828 (imm & 4) ? get_sse_roundingmode()
18829 : mkU32(imm & 3),
18830 mkexpr(src)) );
18831
18832 if (isD)
18833 putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
18834 else
18835 putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
18836
18837 goto decode_success;
18838 }
18839 break;
18840
18841 case 0x0C:
18842 /* 66 0F 3A 0C /r ib = BLENDPS xmm1, xmm2/m128, imm8
18843 Blend Packed Single Precision Floating-Point Values (XMM) */
18844 if (have66noF2noF3(pfx) && sz == 2) {
18845
18846 Int imm8;
18847 IRTemp dst_vec = newTemp(Ity_V128);
18848 IRTemp src_vec = newTemp(Ity_V128);
18849
18850 modrm = getUChar(delta);
18851
18852 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
18853
18854 if ( epartIsReg( modrm ) ) {
18855 imm8 = (Int)getUChar(delta+1);
18856 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
18857 delta += 1+1;
18858 DIP( "blendps $%d, %s,%s\n", imm8,
18859 nameXMMReg( eregOfRexRM(pfx, modrm) ),
18860 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18861 } else {
18862 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
18863 1/* imm8 is 1 byte after the amode */ );
18864 gen_SEGV_if_not_16_aligned( addr );
18865 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
18866 imm8 = (Int)getUChar(delta+alen);
18867 delta += alen+1;
18868 DIP( "blendpd $%d, %s,%s\n",
18869 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18870 }
18871
sewardj80611e32012-01-20 13:07:24 +000018872 putXMMReg( gregOfRexRM(pfx, modrm),
sewardj21459cb2012-06-18 14:05:52 +000018873 mkexpr( math_BLENDPS_128( src_vec, dst_vec, imm8) ) );
sewardj80611e32012-01-20 13:07:24 +000018874 goto decode_success;
18875 }
18876 break;
18877
18878 case 0x0D:
18879 /* 66 0F 3A 0D /r ib = BLENDPD xmm1, xmm2/m128, imm8
18880 Blend Packed Double Precision Floating-Point Values (XMM) */
18881 if (have66noF2noF3(pfx) && sz == 2) {
18882
18883 Int imm8;
sewardj80611e32012-01-20 13:07:24 +000018884 IRTemp dst_vec = newTemp(Ity_V128);
18885 IRTemp src_vec = newTemp(Ity_V128);
sewardj80611e32012-01-20 13:07:24 +000018886
18887 modrm = getUChar(delta);
18888 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
18889
18890 if ( epartIsReg( modrm ) ) {
18891 imm8 = (Int)getUChar(delta+1);
18892 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
18893 delta += 1+1;
18894 DIP( "blendpd $%d, %s,%s\n", imm8,
18895 nameXMMReg( eregOfRexRM(pfx, modrm) ),
18896 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18897 } else {
18898 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
18899 1/* imm8 is 1 byte after the amode */ );
18900 gen_SEGV_if_not_16_aligned( addr );
18901 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
18902 imm8 = (Int)getUChar(delta+alen);
18903 delta += alen+1;
18904 DIP( "blendpd $%d, %s,%s\n",
18905 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18906 }
18907
sewardj80611e32012-01-20 13:07:24 +000018908 putXMMReg( gregOfRexRM(pfx, modrm),
sewardj21459cb2012-06-18 14:05:52 +000018909 mkexpr( math_BLENDPD_128( src_vec, dst_vec, imm8) ) );
sewardj80611e32012-01-20 13:07:24 +000018910 goto decode_success;
18911 }
18912 break;
18913
18914 case 0x0E:
18915 /* 66 0F 3A 0E /r ib = PBLENDW xmm1, xmm2/m128, imm8
18916 Blend Packed Words (XMM) */
18917 if (have66noF2noF3(pfx) && sz == 2) {
18918
18919 Int imm8;
18920 IRTemp dst_vec = newTemp(Ity_V128);
18921 IRTemp src_vec = newTemp(Ity_V128);
18922
18923 modrm = getUChar(delta);
18924
18925 assign( dst_vec, getXMMReg( gregOfRexRM(pfx, modrm) ) );
18926
18927 if ( epartIsReg( modrm ) ) {
18928 imm8 = (Int)getUChar(delta+1);
18929 assign( src_vec, getXMMReg( eregOfRexRM(pfx, modrm) ) );
18930 delta += 1+1;
18931 DIP( "pblendw $%d, %s,%s\n", imm8,
18932 nameXMMReg( eregOfRexRM(pfx, modrm) ),
18933 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18934 } else {
18935 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
18936 1/* imm8 is 1 byte after the amode */ );
18937 gen_SEGV_if_not_16_aligned( addr );
18938 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
18939 imm8 = (Int)getUChar(delta+alen);
18940 delta += alen+1;
18941 DIP( "pblendw $%d, %s,%s\n",
18942 imm8, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
18943 }
18944
sewardj80611e32012-01-20 13:07:24 +000018945 putXMMReg( gregOfRexRM(pfx, modrm),
sewardj21459cb2012-06-18 14:05:52 +000018946 mkexpr( math_PBLENDW_128( src_vec, dst_vec, imm8) ) );
sewardj80611e32012-01-20 13:07:24 +000018947 goto decode_success;
18948 }
18949 break;
18950
18951 case 0x14:
18952 /* 66 0F 3A 14 /r ib = PEXTRB r/m16, xmm, imm8
18953 Extract Byte from xmm, store in mem or zero-extend + store in gen.reg.
18954 (XMM) */
18955 if (have66noF2noF3(pfx) && sz == 2) {
sewardje8a7eb72012-06-12 14:59:17 +000018956 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000018957 goto decode_success;
18958 }
18959 break;
18960
18961 case 0x15:
18962 /* 66 0F 3A 15 /r ib = PEXTRW r/m16, xmm, imm8
18963 Extract Word from xmm, store in mem or zero-extend + store in gen.reg.
18964 (XMM) */
18965 if (have66noF2noF3(pfx) && sz == 2) {
sewardj82096922012-06-24 14:57:59 +000018966 delta = dis_PEXTRW( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000018967 goto decode_success;
18968 }
18969 break;
18970
18971 case 0x16:
18972 /* 66 no-REX.W 0F 3A 16 /r ib = PEXTRD reg/mem32, xmm2, imm8
18973 Extract Doubleword int from xmm reg and store in gen.reg or mem. (XMM)
18974 Note that this insn has the same opcodes as PEXTRQ, but
18975 here the REX.W bit is _not_ present */
18976 if (have66noF2noF3(pfx)
18977 && sz == 2 /* REX.W is _not_ present */) {
sewardjc4530ae2012-05-21 10:18:49 +000018978 delta = dis_PEXTRD( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000018979 goto decode_success;
18980 }
18981 /* 66 REX.W 0F 3A 16 /r ib = PEXTRQ reg/mem64, xmm2, imm8
18982 Extract Quadword int from xmm reg and store in gen.reg or mem. (XMM)
18983 Note that this insn has the same opcodes as PEXTRD, but
18984 here the REX.W bit is present */
18985 if (have66noF2noF3(pfx)
18986 && sz == 8 /* REX.W is present */) {
sewardj56c30312012-06-12 08:45:39 +000018987 delta = dis_PEXTRQ( vbi, pfx, delta, False/*!isAvx*/);
sewardj80611e32012-01-20 13:07:24 +000018988 goto decode_success;
18989 }
18990 break;
18991
18992 case 0x17:
18993 /* 66 0F 3A 17 /r ib = EXTRACTPS reg/mem32, xmm2, imm8 Extract
18994 float from xmm reg and store in gen.reg or mem. This is
18995 identical to PEXTRD, except that REX.W appears to be ignored.
18996 */
18997 if (have66noF2noF3(pfx)
18998 && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
sewardjadf357c2012-06-24 13:44:17 +000018999 delta = dis_EXTRACTPS( vbi, pfx, delta, False/*!isAvx*/ );
sewardj80611e32012-01-20 13:07:24 +000019000 goto decode_success;
19001 }
19002 break;
19003
19004 case 0x20:
19005 /* 66 0F 3A 20 /r ib = PINSRB xmm1, r32/m8, imm8
19006 Extract byte from r32/m8 and insert into xmm1 */
19007 if (have66noF2noF3(pfx) && sz == 2) {
sewardj80611e32012-01-20 13:07:24 +000019008 Int imm8;
sewardj4ed05e02012-06-18 15:01:30 +000019009 IRTemp new8 = newTemp(Ity_I8);
sewardj80611e32012-01-20 13:07:24 +000019010 modrm = getUChar(delta);
sewardj21459cb2012-06-18 14:05:52 +000019011 UInt rG = gregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000019012 if ( epartIsReg( modrm ) ) {
sewardj4ed05e02012-06-18 15:01:30 +000019013 UInt rE = eregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000019014 imm8 = (Int)(getUChar(delta+1) & 0xF);
sewardj4ed05e02012-06-18 15:01:30 +000019015 assign( new8, unop(Iop_32to8, getIReg32(rE)) );
sewardj80611e32012-01-20 13:07:24 +000019016 delta += 1+1;
19017 DIP( "pinsrb $%d,%s,%s\n", imm8,
sewardj4ed05e02012-06-18 15:01:30 +000019018 nameIReg32(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019019 } else {
19020 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19021 imm8 = (Int)(getUChar(delta+alen) & 0xF);
sewardj4ed05e02012-06-18 15:01:30 +000019022 assign( new8, loadLE( Ity_I8, mkexpr(addr) ) );
sewardj80611e32012-01-20 13:07:24 +000019023 delta += alen+1;
19024 DIP( "pinsrb $%d,%s,%s\n",
sewardj4ed05e02012-06-18 15:01:30 +000019025 imm8, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019026 }
sewardj21459cb2012-06-18 14:05:52 +000019027 IRTemp src_vec = newTemp(Ity_V128);
sewardj4ed05e02012-06-18 15:01:30 +000019028 assign(src_vec, getXMMReg( gregOfRexRM(pfx, modrm) ));
19029 IRTemp res = math_PINSRB_128( src_vec, new8, imm8 );
19030 putXMMReg( rG, mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000019031 goto decode_success;
19032 }
19033 break;
19034
19035 case 0x21:
sewardjcfca8cd2012-05-27 08:25:42 +000019036 /* 66 0F 3A 21 /r ib = INSERTPS imm8, xmm2/m32, xmm1
sewardj80611e32012-01-20 13:07:24 +000019037 Insert Packed Single Precision Floating-Point Value (XMM) */
19038 if (have66noF2noF3(pfx) && sz == 2) {
sewardjcfca8cd2012-05-27 08:25:42 +000019039 UInt imm8;
19040 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */
19041 const IRTemp inval = IRTemp_INVALID;
sewardj80611e32012-01-20 13:07:24 +000019042
19043 modrm = getUChar(delta);
sewardjcfca8cd2012-05-27 08:25:42 +000019044 UInt rG = gregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000019045
19046 if ( epartIsReg( modrm ) ) {
sewardjcfca8cd2012-05-27 08:25:42 +000019047 UInt rE = eregOfRexRM(pfx, modrm);
19048 IRTemp vE = newTemp(Ity_V128);
19049 assign( vE, getXMMReg(rE) );
19050 IRTemp dsE[4] = { inval, inval, inval, inval };
sewardj4b1cc832012-06-13 11:10:20 +000019051 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] );
sewardjcfca8cd2012-05-27 08:25:42 +000019052 imm8 = getUChar(delta+1);
19053 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */
sewardj80611e32012-01-20 13:07:24 +000019054 delta += 1+1;
sewardjcfca8cd2012-05-27 08:25:42 +000019055 DIP( "insertps $%u, %s,%s\n",
19056 imm8, nameXMMReg(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019057 } else {
sewardjcfca8cd2012-05-27 08:25:42 +000019058 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19059 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) );
19060 imm8 = getUChar(delta+alen);
sewardj80611e32012-01-20 13:07:24 +000019061 delta += alen+1;
sewardjcfca8cd2012-05-27 08:25:42 +000019062 DIP( "insertps $%u, %s,%s\n",
19063 imm8, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019064 }
19065
sewardjcfca8cd2012-05-27 08:25:42 +000019066 IRTemp vG = newTemp(Ity_V128);
19067 assign( vG, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019068
sewardjcfca8cd2012-05-27 08:25:42 +000019069 putXMMReg( rG, mkexpr(math_INSERTPS( vG, d2ins, imm8 )) );
sewardj80611e32012-01-20 13:07:24 +000019070 goto decode_success;
19071 }
19072 break;
19073
19074 case 0x22:
19075 /* 66 no-REX.W 0F 3A 22 /r ib = PINSRD xmm1, r/m32, imm8
19076 Extract Doubleword int from gen.reg/mem32 and insert into xmm1 */
19077 if (have66noF2noF3(pfx)
19078 && sz == 2 /* REX.W is NOT present */) {
sewardj6faf7cc2012-05-25 15:53:01 +000019079 Int imm8_10;
19080 IRTemp src_u32 = newTemp(Ity_I32);
sewardj80611e32012-01-20 13:07:24 +000019081 modrm = getUChar(delta);
sewardj6faf7cc2012-05-25 15:53:01 +000019082 UInt rG = gregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000019083
19084 if ( epartIsReg( modrm ) ) {
sewardj6faf7cc2012-05-25 15:53:01 +000019085 UInt rE = eregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000019086 imm8_10 = (Int)(getUChar(delta+1) & 3);
sewardj6faf7cc2012-05-25 15:53:01 +000019087 assign( src_u32, getIReg32( rE ) );
sewardj80611e32012-01-20 13:07:24 +000019088 delta += 1+1;
sewardj6faf7cc2012-05-25 15:53:01 +000019089 DIP( "pinsrd $%d, %s,%s\n",
19090 imm8_10, nameIReg32(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019091 } else {
19092 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19093 imm8_10 = (Int)(getUChar(delta+alen) & 3);
sewardj6faf7cc2012-05-25 15:53:01 +000019094 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) );
sewardj80611e32012-01-20 13:07:24 +000019095 delta += alen+1;
19096 DIP( "pinsrd $%d, %s,%s\n",
sewardj6faf7cc2012-05-25 15:53:01 +000019097 imm8_10, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019098 }
19099
sewardj6faf7cc2012-05-25 15:53:01 +000019100 IRTemp src_vec = newTemp(Ity_V128);
19101 assign(src_vec, getXMMReg( rG ));
19102 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 );
19103 putXMMReg( rG, mkexpr(res_vec) );
sewardj80611e32012-01-20 13:07:24 +000019104 goto decode_success;
19105 }
19106 /* 66 REX.W 0F 3A 22 /r ib = PINSRQ xmm1, r/m64, imm8
19107 Extract Quadword int from gen.reg/mem64 and insert into xmm1 */
19108 if (have66noF2noF3(pfx)
19109 && sz == 8 /* REX.W is present */) {
sewardj80611e32012-01-20 13:07:24 +000019110 Int imm8_0;
sewardj98d02cc2012-06-02 11:55:25 +000019111 IRTemp src_u64 = newTemp(Ity_I64);
sewardj80611e32012-01-20 13:07:24 +000019112 modrm = getUChar(delta);
sewardj98d02cc2012-06-02 11:55:25 +000019113 UInt rG = gregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000019114
19115 if ( epartIsReg( modrm ) ) {
sewardj98d02cc2012-06-02 11:55:25 +000019116 UInt rE = eregOfRexRM(pfx,modrm);
sewardj80611e32012-01-20 13:07:24 +000019117 imm8_0 = (Int)(getUChar(delta+1) & 1);
sewardj98d02cc2012-06-02 11:55:25 +000019118 assign( src_u64, getIReg64( rE ) );
sewardj80611e32012-01-20 13:07:24 +000019119 delta += 1+1;
sewardj98d02cc2012-06-02 11:55:25 +000019120 DIP( "pinsrq $%d, %s,%s\n",
19121 imm8_0, nameIReg64(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019122 } else {
19123 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
19124 imm8_0 = (Int)(getUChar(delta+alen) & 1);
sewardj98d02cc2012-06-02 11:55:25 +000019125 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) );
sewardj80611e32012-01-20 13:07:24 +000019126 delta += alen+1;
19127 DIP( "pinsrq $%d, %s,%s\n",
sewardj98d02cc2012-06-02 11:55:25 +000019128 imm8_0, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019129 }
19130
sewardj98d02cc2012-06-02 11:55:25 +000019131 IRTemp src_vec = newTemp(Ity_V128);
19132 assign(src_vec, getXMMReg( rG ));
19133 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 );
19134 putXMMReg( rG, mkexpr(res_vec) );
sewardj80611e32012-01-20 13:07:24 +000019135 goto decode_success;
19136 }
19137 break;
19138
19139 case 0x40:
19140 /* 66 0F 3A 40 /r ib = DPPS xmm1, xmm2/m128, imm8
19141 Dot Product of Packed Single Precision Floating-Point Values (XMM) */
19142 if (have66noF2noF3(pfx) && sz == 2) {
sewardj80611e32012-01-20 13:07:24 +000019143 modrm = getUChar(delta);
sewardjadf357c2012-06-24 13:44:17 +000019144 Int imm8;
19145 IRTemp src_vec = newTemp(Ity_V128);
19146 IRTemp dst_vec = newTemp(Ity_V128);
19147 UInt rG = gregOfRexRM(pfx, modrm);
19148 assign( dst_vec, getXMMReg( rG ) );
sewardj80611e32012-01-20 13:07:24 +000019149 if ( epartIsReg( modrm ) ) {
sewardjadf357c2012-06-24 13:44:17 +000019150 UInt rE = eregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000019151 imm8 = (Int)getUChar(delta+1);
sewardjadf357c2012-06-24 13:44:17 +000019152 assign( src_vec, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000019153 delta += 1+1;
sewardjadf357c2012-06-24 13:44:17 +000019154 DIP( "dpps $%d, %s,%s\n",
19155 imm8, nameXMMReg(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019156 } else {
19157 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19158 1/* imm8 is 1 byte after the amode */ );
19159 gen_SEGV_if_not_16_aligned( addr );
sewardjadf357c2012-06-24 13:44:17 +000019160 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
sewardj80611e32012-01-20 13:07:24 +000019161 imm8 = (Int)getUChar(delta+alen);
19162 delta += alen+1;
19163 DIP( "dpps $%d, %s,%s\n",
sewardjadf357c2012-06-24 13:44:17 +000019164 imm8, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019165 }
sewardjadf357c2012-06-24 13:44:17 +000019166 IRTemp res = math_DPPS_128( src_vec, dst_vec, imm8 );
19167 putXMMReg( rG, mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000019168 goto decode_success;
19169 }
19170 break;
19171
19172 case 0x41:
19173 /* 66 0F 3A 41 /r ib = DPPD xmm1, xmm2/m128, imm8
19174 Dot Product of Packed Double Precision Floating-Point Values (XMM) */
19175 if (have66noF2noF3(pfx) && sz == 2) {
sewardj4ed05e02012-06-18 15:01:30 +000019176 modrm = getUChar(delta);
19177 Int imm8;
sewardj80611e32012-01-20 13:07:24 +000019178 IRTemp src_vec = newTemp(Ity_V128);
19179 IRTemp dst_vec = newTemp(Ity_V128);
sewardj4ed05e02012-06-18 15:01:30 +000019180 UInt rG = gregOfRexRM(pfx, modrm);
19181 assign( dst_vec, getXMMReg( rG ) );
sewardj80611e32012-01-20 13:07:24 +000019182 if ( epartIsReg( modrm ) ) {
sewardj4ed05e02012-06-18 15:01:30 +000019183 UInt rE = eregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000019184 imm8 = (Int)getUChar(delta+1);
sewardj4ed05e02012-06-18 15:01:30 +000019185 assign( src_vec, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000019186 delta += 1+1;
sewardj4ed05e02012-06-18 15:01:30 +000019187 DIP( "dppd $%d, %s,%s\n",
19188 imm8, nameXMMReg(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019189 } else {
19190 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19191 1/* imm8 is 1 byte after the amode */ );
19192 gen_SEGV_if_not_16_aligned( addr );
19193 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19194 imm8 = (Int)getUChar(delta+alen);
19195 delta += alen+1;
19196 DIP( "dppd $%d, %s,%s\n",
sewardj4ed05e02012-06-18 15:01:30 +000019197 imm8, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019198 }
sewardj4ed05e02012-06-18 15:01:30 +000019199 IRTemp res = math_DPPD_128( src_vec, dst_vec, imm8 );
19200 putXMMReg( rG, mkexpr(res) );
sewardj80611e32012-01-20 13:07:24 +000019201 goto decode_success;
19202 }
19203 break;
19204
sewardj4d5bce22012-02-21 11:02:44 +000019205 case 0x42:
19206 /* 66 0F 3A 42 /r ib = MPSADBW xmm1, xmm2/m128, imm8
19207 Multiple Packed Sums of Absolule Difference (XMM) */
19208 if (have66noF2noF3(pfx) && sz == 2) {
sewardj4d5bce22012-02-21 11:02:44 +000019209 Int imm8;
19210 IRTemp src_vec = newTemp(Ity_V128);
19211 IRTemp dst_vec = newTemp(Ity_V128);
sewardj8516a1f2012-06-24 14:26:30 +000019212 modrm = getUChar(delta);
19213 UInt rG = gregOfRexRM(pfx, modrm);
sewardj4d5bce22012-02-21 11:02:44 +000019214
sewardj8516a1f2012-06-24 14:26:30 +000019215 assign( dst_vec, getXMMReg(rG) );
sewardj4d5bce22012-02-21 11:02:44 +000019216
19217 if ( epartIsReg( modrm ) ) {
sewardj8516a1f2012-06-24 14:26:30 +000019218 UInt rE = eregOfRexRM(pfx, modrm);
19219
sewardj4d5bce22012-02-21 11:02:44 +000019220 imm8 = (Int)getUChar(delta+1);
sewardj8516a1f2012-06-24 14:26:30 +000019221 assign( src_vec, getXMMReg(rE) );
sewardj4d5bce22012-02-21 11:02:44 +000019222 delta += 1+1;
19223 DIP( "mpsadbw $%d, %s,%s\n", imm8,
sewardj8516a1f2012-06-24 14:26:30 +000019224 nameXMMReg(rE), nameXMMReg(rG) );
sewardj4d5bce22012-02-21 11:02:44 +000019225 } else {
19226 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19227 1/* imm8 is 1 byte after the amode */ );
19228 gen_SEGV_if_not_16_aligned( addr );
19229 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
19230 imm8 = (Int)getUChar(delta+alen);
19231 delta += alen+1;
sewardj8516a1f2012-06-24 14:26:30 +000019232 DIP( "mpsadbw $%d, %s,%s\n", imm8, dis_buf, nameXMMReg(rG) );
sewardj4d5bce22012-02-21 11:02:44 +000019233 }
19234
sewardj8516a1f2012-06-24 14:26:30 +000019235 putXMMReg( rG, mkexpr( math_MPSADBW_128(dst_vec, src_vec, imm8) ) );
sewardj4d5bce22012-02-21 11:02:44 +000019236 goto decode_success;
19237 }
19238 break;
19239
sewardj80611e32012-01-20 13:07:24 +000019240 case 0x44:
19241 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
19242 * Carry-less multiplication of selected XMM quadwords into XMM
19243 * registers (a.k.a multiplication of polynomials over GF(2))
19244 */
19245 if (have66noF2noF3(pfx) && sz == 2) {
19246
19247 Int imm8;
19248 IRTemp svec = newTemp(Ity_V128);
19249 IRTemp dvec = newTemp(Ity_V128);
sewardj1407a362012-06-24 15:11:38 +000019250 modrm = getUChar(delta);
19251 UInt rG = gregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000019252
sewardj1407a362012-06-24 15:11:38 +000019253 assign( dvec, getXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019254
19255 if ( epartIsReg( modrm ) ) {
sewardj1407a362012-06-24 15:11:38 +000019256 UInt rE = eregOfRexRM(pfx, modrm);
sewardj80611e32012-01-20 13:07:24 +000019257 imm8 = (Int)getUChar(delta+1);
sewardj1407a362012-06-24 15:11:38 +000019258 assign( svec, getXMMReg(rE) );
sewardj80611e32012-01-20 13:07:24 +000019259 delta += 1+1;
19260 DIP( "pclmulqdq $%d, %s,%s\n", imm8,
sewardj1407a362012-06-24 15:11:38 +000019261 nameXMMReg(rE), nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019262 } else {
19263 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
19264 1/* imm8 is 1 byte after the amode */ );
19265 gen_SEGV_if_not_16_aligned( addr );
19266 assign( svec, loadLE( Ity_V128, mkexpr(addr) ) );
19267 imm8 = (Int)getUChar(delta+alen);
19268 delta += alen+1;
19269 DIP( "pclmulqdq $%d, %s,%s\n",
sewardj1407a362012-06-24 15:11:38 +000019270 imm8, dis_buf, nameXMMReg(rG) );
sewardj80611e32012-01-20 13:07:24 +000019271 }
19272
sewardj1407a362012-06-24 15:11:38 +000019273 putXMMReg( rG, mkexpr( math_PCLMULQDQ(dvec, svec, imm8) ) );
sewardj80611e32012-01-20 13:07:24 +000019274 goto decode_success;
19275 }
19276 break;
19277
19278 case 0x60:
19279 case 0x61:
19280 case 0x62:
19281 case 0x63:
19282 /* 66 0F 3A 63 /r ib = PCMPISTRI imm8, xmm2/m128, xmm1
19283 66 0F 3A 62 /r ib = PCMPISTRM imm8, xmm2/m128, xmm1
19284 66 0F 3A 61 /r ib = PCMPESTRI imm8, xmm2/m128, xmm1
19285 66 0F 3A 60 /r ib = PCMPESTRM imm8, xmm2/m128, xmm1
19286 (selected special cases that actually occur in glibc,
19287 not by any means a complete implementation.)
19288 */
19289 if (have66noF2noF3(pfx) && sz == 2) {
sewardjac75d7b2012-05-23 12:42:39 +000019290 Long delta0 = delta;
19291 delta = dis_PCMPxSTRx( vbi, pfx, delta, False/*!isAvx*/, opc );
19292 if (delta > delta0) goto decode_success;
19293 /* else fall though; dis_PCMPxSTRx failed to decode it */
sewardj80611e32012-01-20 13:07:24 +000019294 }
19295 break;
19296
sewardjac75d7b2012-05-23 12:42:39 +000019297 case 0xDF:
philippeff4d6be2012-02-14 21:34:56 +000019298 /* 66 0F 3A DF /r ib = AESKEYGENASSIST imm8, xmm2/m128, xmm1 */
19299 if (have66noF2noF3(pfx) && sz == 2) {
sewardj1407a362012-06-24 15:11:38 +000019300 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, False/*!isAvx*/ );
philippeff4d6be2012-02-14 21:34:56 +000019301 goto decode_success;
19302 }
19303 break;
19304
sewardj80611e32012-01-20 13:07:24 +000019305 default:
19306 break;
19307
19308 }
19309
19310 decode_failure:
19311 *decode_OK = False;
19312 return deltaIN;
19313
19314 decode_success:
19315 *decode_OK = True;
19316 return delta;
19317}
19318
19319
19320/*------------------------------------------------------------*/
19321/*--- ---*/
19322/*--- Top-level post-escape decoders: dis_ESC_NONE ---*/
19323/*--- ---*/
19324/*------------------------------------------------------------*/
19325
19326__attribute__((noinline))
19327static
19328Long dis_ESC_NONE (
19329 /*MB_OUT*/DisResult* dres,
19330 /*MB_OUT*/Bool* expect_CAS,
19331 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
19332 Bool resteerCisOk,
19333 void* callback_opaque,
19334 VexArchInfo* archinfo,
19335 VexAbiInfo* vbi,
19336 Prefix pfx, Int sz, Long deltaIN
19337 )
19338{
19339 Long d64 = 0;
19340 UChar abyte = 0;
19341 IRTemp addr = IRTemp_INVALID;
19342 IRTemp t1 = IRTemp_INVALID;
19343 IRTemp t2 = IRTemp_INVALID;
19344 IRTemp t3 = IRTemp_INVALID;
19345 IRTemp t4 = IRTemp_INVALID;
19346 IRTemp t5 = IRTemp_INVALID;
19347 IRType ty = Ity_INVALID;
19348 UChar modrm = 0;
19349 Int am_sz = 0;
19350 Int d_sz = 0;
19351 Int alen = 0;
19352 HChar dis_buf[50];
19353
19354 Long delta = deltaIN;
sewardj38b1d692013-10-15 17:21:42 +000019355 UChar opc = getUChar(delta); delta++;
19356
19357 /* delta now points at the modrm byte. In most of the cases that
19358 follow, neither the F2 nor F3 prefixes are allowed. However,
19359 for some basic arithmetic operations we have to allow F2/XACQ or
19360 F3/XREL in the case where the destination is memory and the LOCK
19361 prefix is also present. Do this check by looking at the modrm
19362 byte but not advancing delta over it. */
19363 /* By default, F2 and F3 are not allowed, so let's start off with
19364 that setting. */
19365 Bool validF2orF3 = haveF2orF3(pfx) ? False : True;
19366 { UChar tmp_modrm = getUChar(delta);
19367 switch (opc) {
19368 case 0x00: /* ADD Gb,Eb */ case 0x01: /* ADD Gv,Ev */
19369 case 0x08: /* OR Gb,Eb */ case 0x09: /* OR Gv,Ev */
19370 case 0x10: /* ADC Gb,Eb */ case 0x11: /* ADC Gv,Ev */
19371 case 0x18: /* SBB Gb,Eb */ case 0x19: /* SBB Gv,Ev */
19372 case 0x20: /* AND Gb,Eb */ case 0x21: /* AND Gv,Ev */
19373 case 0x28: /* SUB Gb,Eb */ case 0x29: /* SUB Gv,Ev */
19374 case 0x30: /* XOR Gb,Eb */ case 0x31: /* XOR Gv,Ev */
19375 if (!epartIsReg(tmp_modrm)
19376 && haveF2orF3(pfx) && !haveF2andF3(pfx) && haveLOCK(pfx)) {
19377 /* dst is mem, and we have F2 or F3 but not both */
19378 validF2orF3 = True;
19379 }
19380 break;
19381 default:
19382 break;
19383 }
19384 }
19385
19386 /* Now, in the switch below, for the opc values examined by the
19387 switch above, use validF2orF3 rather than looking at pfx
19388 directly. */
sewardj80611e32012-01-20 13:07:24 +000019389 switch (opc) {
19390
19391 case 0x00: /* ADD Gb,Eb */
sewardj38b1d692013-10-15 17:21:42 +000019392 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019393 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" );
19394 return delta;
19395 case 0x01: /* ADD Gv,Ev */
sewardj38b1d692013-10-15 17:21:42 +000019396 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019397 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" );
19398 return delta;
19399
19400 case 0x02: /* ADD Eb,Gb */
19401 if (haveF2orF3(pfx)) goto decode_failure;
19402 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, 1, delta, "add" );
19403 return delta;
19404 case 0x03: /* ADD Ev,Gv */
19405 if (haveF2orF3(pfx)) goto decode_failure;
19406 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Add8, True, sz, delta, "add" );
19407 return delta;
19408
19409 case 0x04: /* ADD Ib, AL */
19410 if (haveF2orF3(pfx)) goto decode_failure;
19411 delta = dis_op_imm_A( 1, False, Iop_Add8, True, delta, "add" );
19412 return delta;
19413 case 0x05: /* ADD Iv, eAX */
19414 if (haveF2orF3(pfx)) goto decode_failure;
19415 delta = dis_op_imm_A(sz, False, Iop_Add8, True, delta, "add" );
19416 return delta;
19417
19418 case 0x08: /* OR Gb,Eb */
sewardj38b1d692013-10-15 17:21:42 +000019419 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019420 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" );
19421 return delta;
19422 case 0x09: /* OR Gv,Ev */
sewardj38b1d692013-10-15 17:21:42 +000019423 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019424 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" );
19425 return delta;
19426
19427 case 0x0A: /* OR Eb,Gb */
19428 if (haveF2orF3(pfx)) goto decode_failure;
19429 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, 1, delta, "or" );
19430 return delta;
19431 case 0x0B: /* OR Ev,Gv */
19432 if (haveF2orF3(pfx)) goto decode_failure;
19433 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Or8, True, sz, delta, "or" );
19434 return delta;
19435
19436 case 0x0C: /* OR Ib, AL */
19437 if (haveF2orF3(pfx)) goto decode_failure;
19438 delta = dis_op_imm_A( 1, False, Iop_Or8, True, delta, "or" );
19439 return delta;
19440 case 0x0D: /* OR Iv, eAX */
19441 if (haveF2orF3(pfx)) goto decode_failure;
19442 delta = dis_op_imm_A( sz, False, Iop_Or8, True, delta, "or" );
19443 return delta;
19444
19445 case 0x10: /* ADC Gb,Eb */
sewardj38b1d692013-10-15 17:21:42 +000019446 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019447 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" );
19448 return delta;
19449 case 0x11: /* ADC Gv,Ev */
sewardj38b1d692013-10-15 17:21:42 +000019450 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019451 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" );
19452 return delta;
19453
19454 case 0x12: /* ADC Eb,Gb */
19455 if (haveF2orF3(pfx)) goto decode_failure;
19456 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, 1, delta, "adc" );
19457 return delta;
19458 case 0x13: /* ADC Ev,Gv */
19459 if (haveF2orF3(pfx)) goto decode_failure;
19460 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Add8, True, sz, delta, "adc" );
19461 return delta;
19462
19463 case 0x14: /* ADC Ib, AL */
19464 if (haveF2orF3(pfx)) goto decode_failure;
19465 delta = dis_op_imm_A( 1, True, Iop_Add8, True, delta, "adc" );
19466 return delta;
19467 case 0x15: /* ADC Iv, eAX */
19468 if (haveF2orF3(pfx)) goto decode_failure;
19469 delta = dis_op_imm_A( sz, True, Iop_Add8, True, delta, "adc" );
19470 return delta;
19471
19472 case 0x18: /* SBB Gb,Eb */
sewardj38b1d692013-10-15 17:21:42 +000019473 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019474 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" );
19475 return delta;
19476 case 0x19: /* SBB Gv,Ev */
sewardj38b1d692013-10-15 17:21:42 +000019477 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019478 delta = dis_op2_G_E ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" );
19479 return delta;
19480
19481 case 0x1A: /* SBB Eb,Gb */
19482 if (haveF2orF3(pfx)) goto decode_failure;
19483 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, 1, delta, "sbb" );
19484 return delta;
19485 case 0x1B: /* SBB Ev,Gv */
19486 if (haveF2orF3(pfx)) goto decode_failure;
19487 delta = dis_op2_E_G ( vbi, pfx, True, Iop_Sub8, True, sz, delta, "sbb" );
19488 return delta;
19489
19490 case 0x1C: /* SBB Ib, AL */
19491 if (haveF2orF3(pfx)) goto decode_failure;
19492 delta = dis_op_imm_A( 1, True, Iop_Sub8, True, delta, "sbb" );
19493 return delta;
19494 case 0x1D: /* SBB Iv, eAX */
19495 if (haveF2orF3(pfx)) goto decode_failure;
19496 delta = dis_op_imm_A( sz, True, Iop_Sub8, True, delta, "sbb" );
19497 return delta;
19498
19499 case 0x20: /* AND Gb,Eb */
sewardj38b1d692013-10-15 17:21:42 +000019500 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019501 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" );
19502 return delta;
19503 case 0x21: /* AND Gv,Ev */
sewardj38b1d692013-10-15 17:21:42 +000019504 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019505 delta = dis_op2_G_E ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" );
19506 return delta;
19507
19508 case 0x22: /* AND Eb,Gb */
19509 if (haveF2orF3(pfx)) goto decode_failure;
19510 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, 1, delta, "and" );
19511 return delta;
19512 case 0x23: /* AND Ev,Gv */
19513 if (haveF2orF3(pfx)) goto decode_failure;
19514 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, True, sz, delta, "and" );
19515 return delta;
19516
19517 case 0x24: /* AND Ib, AL */
19518 if (haveF2orF3(pfx)) goto decode_failure;
19519 delta = dis_op_imm_A( 1, False, Iop_And8, True, delta, "and" );
19520 return delta;
19521 case 0x25: /* AND Iv, eAX */
19522 if (haveF2orF3(pfx)) goto decode_failure;
19523 delta = dis_op_imm_A( sz, False, Iop_And8, True, delta, "and" );
19524 return delta;
19525
19526 case 0x28: /* SUB Gb,Eb */
sewardj38b1d692013-10-15 17:21:42 +000019527 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019528 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" );
19529 return delta;
19530 case 0x29: /* SUB Gv,Ev */
sewardj38b1d692013-10-15 17:21:42 +000019531 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019532 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" );
19533 return delta;
19534
19535 case 0x2A: /* SUB Eb,Gb */
19536 if (haveF2orF3(pfx)) goto decode_failure;
19537 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, 1, delta, "sub" );
19538 return delta;
19539 case 0x2B: /* SUB Ev,Gv */
19540 if (haveF2orF3(pfx)) goto decode_failure;
19541 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, True, sz, delta, "sub" );
19542 return delta;
19543
19544 case 0x2C: /* SUB Ib, AL */
19545 if (haveF2orF3(pfx)) goto decode_failure;
19546 delta = dis_op_imm_A(1, False, Iop_Sub8, True, delta, "sub" );
19547 return delta;
sewardj80611e32012-01-20 13:07:24 +000019548 case 0x2D: /* SUB Iv, eAX */
19549 if (haveF2orF3(pfx)) goto decode_failure;
19550 delta = dis_op_imm_A( sz, False, Iop_Sub8, True, delta, "sub" );
19551 return delta;
19552
19553 case 0x30: /* XOR Gb,Eb */
sewardj38b1d692013-10-15 17:21:42 +000019554 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019555 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" );
19556 return delta;
19557 case 0x31: /* XOR Gv,Ev */
sewardj38b1d692013-10-15 17:21:42 +000019558 if (!validF2orF3) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019559 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" );
19560 return delta;
19561
19562 case 0x32: /* XOR Eb,Gb */
19563 if (haveF2orF3(pfx)) goto decode_failure;
19564 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, 1, delta, "xor" );
19565 return delta;
19566 case 0x33: /* XOR Ev,Gv */
19567 if (haveF2orF3(pfx)) goto decode_failure;
19568 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Xor8, True, sz, delta, "xor" );
19569 return delta;
19570
19571 case 0x34: /* XOR Ib, AL */
19572 if (haveF2orF3(pfx)) goto decode_failure;
19573 delta = dis_op_imm_A( 1, False, Iop_Xor8, True, delta, "xor" );
19574 return delta;
19575 case 0x35: /* XOR Iv, eAX */
19576 if (haveF2orF3(pfx)) goto decode_failure;
19577 delta = dis_op_imm_A( sz, False, Iop_Xor8, True, delta, "xor" );
19578 return delta;
19579
19580 case 0x38: /* CMP Gb,Eb */
19581 if (haveF2orF3(pfx)) goto decode_failure;
19582 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" );
19583 return delta;
19584 case 0x39: /* CMP Gv,Ev */
19585 if (haveF2orF3(pfx)) goto decode_failure;
19586 delta = dis_op2_G_E ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" );
19587 return delta;
19588
19589 case 0x3A: /* CMP Eb,Gb */
19590 if (haveF2orF3(pfx)) goto decode_failure;
19591 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, 1, delta, "cmp" );
19592 return delta;
19593 case 0x3B: /* CMP Ev,Gv */
19594 if (haveF2orF3(pfx)) goto decode_failure;
19595 delta = dis_op2_E_G ( vbi, pfx, False, Iop_Sub8, False, sz, delta, "cmp" );
19596 return delta;
19597
19598 case 0x3C: /* CMP Ib, AL */
19599 if (haveF2orF3(pfx)) goto decode_failure;
19600 delta = dis_op_imm_A( 1, False, Iop_Sub8, False, delta, "cmp" );
19601 return delta;
19602 case 0x3D: /* CMP Iv, eAX */
19603 if (haveF2orF3(pfx)) goto decode_failure;
19604 delta = dis_op_imm_A( sz, False, Iop_Sub8, False, delta, "cmp" );
19605 return delta;
19606
19607 case 0x50: /* PUSH eAX */
19608 case 0x51: /* PUSH eCX */
19609 case 0x52: /* PUSH eDX */
19610 case 0x53: /* PUSH eBX */
19611 case 0x55: /* PUSH eBP */
19612 case 0x56: /* PUSH eSI */
19613 case 0x57: /* PUSH eDI */
19614 case 0x54: /* PUSH eSP */
19615 /* This is the Right Way, in that the value to be pushed is
19616 established before %rsp is changed, so that pushq %rsp
19617 correctly pushes the old value. */
19618 if (haveF2orF3(pfx)) goto decode_failure;
19619 vassert(sz == 2 || sz == 4 || sz == 8);
19620 if (sz == 4)
19621 sz = 8; /* there is no encoding for 32-bit push in 64-bit mode */
19622 ty = sz==2 ? Ity_I16 : Ity_I64;
19623 t1 = newTemp(ty);
19624 t2 = newTemp(Ity_I64);
19625 assign(t1, getIRegRexB(sz, pfx, opc-0x50));
19626 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(sz)));
19627 putIReg64(R_RSP, mkexpr(t2) );
19628 storeLE(mkexpr(t2),mkexpr(t1));
19629 DIP("push%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x50));
19630 return delta;
19631
19632 case 0x58: /* POP eAX */
19633 case 0x59: /* POP eCX */
19634 case 0x5A: /* POP eDX */
19635 case 0x5B: /* POP eBX */
19636 case 0x5D: /* POP eBP */
19637 case 0x5E: /* POP eSI */
19638 case 0x5F: /* POP eDI */
19639 case 0x5C: /* POP eSP */
19640 if (haveF2orF3(pfx)) goto decode_failure;
19641 vassert(sz == 2 || sz == 4 || sz == 8);
19642 if (sz == 4)
19643 sz = 8; /* there is no encoding for 32-bit pop in 64-bit mode */
19644 t1 = newTemp(szToITy(sz));
19645 t2 = newTemp(Ity_I64);
19646 assign(t2, getIReg64(R_RSP));
19647 assign(t1, loadLE(szToITy(sz),mkexpr(t2)));
19648 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
19649 putIRegRexB(sz, pfx, opc-0x58, mkexpr(t1));
19650 DIP("pop%c %s\n", nameISize(sz), nameIRegRexB(sz,pfx,opc-0x58));
19651 return delta;
19652
19653 case 0x63: /* MOVSX */
19654 if (haveF2orF3(pfx)) goto decode_failure;
19655 if (haveREX(pfx) && 1==getRexW(pfx)) {
19656 vassert(sz == 8);
19657 /* movsx r/m32 to r64 */
19658 modrm = getUChar(delta);
19659 if (epartIsReg(modrm)) {
19660 delta++;
19661 putIRegG(8, pfx, modrm,
19662 unop(Iop_32Sto64,
19663 getIRegE(4, pfx, modrm)));
19664 DIP("movslq %s,%s\n",
19665 nameIRegE(4, pfx, modrm),
19666 nameIRegG(8, pfx, modrm));
19667 return delta;
19668 } else {
19669 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
19670 delta += alen;
19671 putIRegG(8, pfx, modrm,
19672 unop(Iop_32Sto64,
19673 loadLE(Ity_I32, mkexpr(addr))));
19674 DIP("movslq %s,%s\n", dis_buf,
19675 nameIRegG(8, pfx, modrm));
19676 return delta;
19677 }
19678 } else {
19679 goto decode_failure;
19680 }
19681
19682 case 0x68: /* PUSH Iv */
19683 if (haveF2orF3(pfx)) goto decode_failure;
19684 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
19685 if (sz == 4) sz = 8;
19686 d64 = getSDisp(imin(4,sz),delta);
19687 delta += imin(4,sz);
19688 goto do_push_I;
19689
19690 case 0x69: /* IMUL Iv, Ev, Gv */
19691 if (haveF2orF3(pfx)) goto decode_failure;
19692 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, sz );
19693 return delta;
19694
19695 case 0x6A: /* PUSH Ib, sign-extended to sz */
19696 if (haveF2orF3(pfx)) goto decode_failure;
19697 /* Note, sz==4 is not possible in 64-bit mode. Hence ... */
19698 if (sz == 4) sz = 8;
19699 d64 = getSDisp8(delta); delta += 1;
19700 goto do_push_I;
19701 do_push_I:
19702 ty = szToITy(sz);
19703 t1 = newTemp(Ity_I64);
19704 t2 = newTemp(ty);
19705 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
19706 putIReg64(R_RSP, mkexpr(t1) );
19707 /* stop mkU16 asserting if d32 is a negative 16-bit number
19708 (bug #132813) */
19709 if (ty == Ity_I16)
19710 d64 &= 0xFFFF;
19711 storeLE( mkexpr(t1), mkU(ty,d64) );
19712 DIP("push%c $%lld\n", nameISize(sz), (Long)d64);
19713 return delta;
19714
19715 case 0x6B: /* IMUL Ib, Ev, Gv */
19716 delta = dis_imul_I_E_G ( vbi, pfx, sz, delta, 1 );
19717 return delta;
19718
19719 case 0x70:
19720 case 0x71:
19721 case 0x72: /* JBb/JNAEb (jump below) */
19722 case 0x73: /* JNBb/JAEb (jump not below) */
19723 case 0x74: /* JZb/JEb (jump zero) */
19724 case 0x75: /* JNZb/JNEb (jump not zero) */
19725 case 0x76: /* JBEb/JNAb (jump below or equal) */
19726 case 0x77: /* JNBEb/JAb (jump not below or equal) */
19727 case 0x78: /* JSb (jump negative) */
19728 case 0x79: /* JSb (jump not negative) */
19729 case 0x7A: /* JP (jump parity even) */
19730 case 0x7B: /* JNP/JPO (jump parity odd) */
19731 case 0x7C: /* JLb/JNGEb (jump less) */
19732 case 0x7D: /* JGEb/JNLb (jump greater or equal) */
19733 case 0x7E: /* JLEb/JNGb (jump less or equal) */
19734 case 0x7F: { /* JGb/JNLEb (jump greater) */
19735 Long jmpDelta;
florian55085f82012-11-21 00:36:55 +000019736 const HChar* comment = "";
mjw67ac3fd2014-05-09 11:41:06 +000019737 if (haveF3(pfx)) goto decode_failure;
19738 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj80611e32012-01-20 13:07:24 +000019739 jmpDelta = getSDisp8(delta);
19740 vassert(-128 <= jmpDelta && jmpDelta < 128);
19741 d64 = (guest_RIP_bbstart+delta+1) + jmpDelta;
19742 delta++;
19743 if (resteerCisOk
19744 && vex_control.guest_chase_cond
19745 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
19746 && jmpDelta < 0
19747 && resteerOkFn( callback_opaque, d64) ) {
19748 /* Speculation: assume this backward branch is taken. So we
19749 need to emit a side-exit to the insn following this one,
19750 on the negation of the condition, and continue at the
19751 branch target address (d64). If we wind up back at the
19752 first instruction of the trace, just stop; it's better to
19753 let the IR loop unroller handle that case. */
19754 stmt( IRStmt_Exit(
19755 mk_amd64g_calculate_condition(
19756 (AMD64Condcode)(1 ^ (opc - 0x70))),
19757 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +000019758 IRConst_U64(guest_RIP_bbstart+delta),
19759 OFFB_RIP ) );
sewardj80611e32012-01-20 13:07:24 +000019760 dres->whatNext = Dis_ResteerC;
19761 dres->continueAt = d64;
19762 comment = "(assumed taken)";
19763 }
19764 else
19765 if (resteerCisOk
19766 && vex_control.guest_chase_cond
19767 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
19768 && jmpDelta >= 0
19769 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) {
19770 /* Speculation: assume this forward branch is not taken. So
19771 we need to emit a side-exit to d64 (the dest) and continue
19772 disassembling at the insn immediately following this
19773 one. */
19774 stmt( IRStmt_Exit(
19775 mk_amd64g_calculate_condition((AMD64Condcode)(opc - 0x70)),
19776 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +000019777 IRConst_U64(d64),
19778 OFFB_RIP ) );
sewardj80611e32012-01-20 13:07:24 +000019779 dres->whatNext = Dis_ResteerC;
19780 dres->continueAt = guest_RIP_bbstart+delta;
19781 comment = "(assumed not taken)";
19782 }
19783 else {
19784 /* Conservative default translation - end the block at this
19785 point. */
sewardjc6f970f2012-04-02 21:54:49 +000019786 jcc_01( dres, (AMD64Condcode)(opc - 0x70),
19787 guest_RIP_bbstart+delta, d64 );
19788 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000019789 }
19790 DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), d64, comment);
19791 return delta;
19792 }
19793
19794 case 0x80: /* Grp1 Ib,Eb */
sewardj80611e32012-01-20 13:07:24 +000019795 modrm = getUChar(delta);
sewardj38b1d692013-10-15 17:21:42 +000019796 /* Disallow F2/XACQ and F3/XREL for the non-mem case. Allow
19797 just one for the mem case and also require LOCK in this case.
19798 Note that this erroneously allows XACQ/XREL on CMP since we
19799 don't check the subopcode here. No big deal. */
19800 if (epartIsReg(modrm) && haveF2orF3(pfx))
19801 goto decode_failure;
19802 if (!epartIsReg(modrm) && haveF2andF3(pfx))
19803 goto decode_failure;
19804 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx))
19805 goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019806 am_sz = lengthAMode(pfx,delta);
19807 sz = 1;
19808 d_sz = 1;
19809 d64 = getSDisp8(delta + am_sz);
19810 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
19811 return delta;
19812
19813 case 0x81: /* Grp1 Iv,Ev */
sewardj80611e32012-01-20 13:07:24 +000019814 modrm = getUChar(delta);
sewardj38b1d692013-10-15 17:21:42 +000019815 /* Same comment as for case 0x80 just above. */
19816 if (epartIsReg(modrm) && haveF2orF3(pfx))
19817 goto decode_failure;
19818 if (!epartIsReg(modrm) && haveF2andF3(pfx))
19819 goto decode_failure;
19820 if (!epartIsReg(modrm) && haveF2orF3(pfx) && !haveLOCK(pfx))
19821 goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019822 am_sz = lengthAMode(pfx,delta);
19823 d_sz = imin(sz,4);
19824 d64 = getSDisp(d_sz, delta + am_sz);
19825 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
19826 return delta;
19827
19828 case 0x83: /* Grp1 Ib,Ev */
19829 if (haveF2orF3(pfx)) goto decode_failure;
19830 modrm = getUChar(delta);
19831 am_sz = lengthAMode(pfx,delta);
19832 d_sz = 1;
19833 d64 = getSDisp8(delta + am_sz);
19834 delta = dis_Grp1 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz, d64 );
19835 return delta;
19836
19837 case 0x84: /* TEST Eb,Gb */
19838 if (haveF2orF3(pfx)) goto decode_failure;
19839 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, 1, delta, "test" );
19840 return delta;
19841
19842 case 0x85: /* TEST Ev,Gv */
19843 if (haveF2orF3(pfx)) goto decode_failure;
19844 delta = dis_op2_E_G ( vbi, pfx, False, Iop_And8, False, sz, delta, "test" );
19845 return delta;
19846
19847 /* XCHG reg,mem automatically asserts LOCK# even without a LOCK
sewardj38b1d692013-10-15 17:21:42 +000019848 prefix. Therefore, generate CAS regardless of the presence or
19849 otherwise of a LOCK prefix. */
sewardj80611e32012-01-20 13:07:24 +000019850 case 0x86: /* XCHG Gb,Eb */
19851 sz = 1;
19852 /* Fall through ... */
19853 case 0x87: /* XCHG Gv,Ev */
sewardj80611e32012-01-20 13:07:24 +000019854 modrm = getUChar(delta);
sewardj38b1d692013-10-15 17:21:42 +000019855 /* Check whether F2 or F3 are allowable. For the mem case, one
19856 or the othter but not both are. We don't care about the
19857 presence of LOCK in this case -- XCHG is unusual in this
19858 respect. */
19859 if (haveF2orF3(pfx)) {
19860 if (epartIsReg(modrm)) {
19861 goto decode_failure;
19862 } else {
19863 if (haveF2andF3(pfx))
19864 goto decode_failure;
19865 }
19866 }
sewardj80611e32012-01-20 13:07:24 +000019867 ty = szToITy(sz);
19868 t1 = newTemp(ty); t2 = newTemp(ty);
19869 if (epartIsReg(modrm)) {
19870 assign(t1, getIRegE(sz, pfx, modrm));
19871 assign(t2, getIRegG(sz, pfx, modrm));
19872 putIRegG(sz, pfx, modrm, mkexpr(t1));
19873 putIRegE(sz, pfx, modrm, mkexpr(t2));
19874 delta++;
19875 DIP("xchg%c %s, %s\n",
19876 nameISize(sz), nameIRegG(sz, pfx, modrm),
19877 nameIRegE(sz, pfx, modrm));
19878 } else {
19879 *expect_CAS = True;
19880 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
19881 assign( t1, loadLE(ty, mkexpr(addr)) );
19882 assign( t2, getIRegG(sz, pfx, modrm) );
19883 casLE( mkexpr(addr),
19884 mkexpr(t1), mkexpr(t2), guest_RIP_curr_instr );
19885 putIRegG( sz, pfx, modrm, mkexpr(t1) );
19886 delta += alen;
19887 DIP("xchg%c %s, %s\n", nameISize(sz),
19888 nameIRegG(sz, pfx, modrm), dis_buf);
19889 }
19890 return delta;
19891
sewardj38b1d692013-10-15 17:21:42 +000019892 case 0x88: { /* MOV Gb,Eb */
19893 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
19894 Bool ok = True;
19895 delta = dis_mov_G_E(vbi, pfx, 1, delta, &ok);
19896 if (!ok) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019897 return delta;
sewardj38b1d692013-10-15 17:21:42 +000019898 }
sewardj80611e32012-01-20 13:07:24 +000019899
sewardj38b1d692013-10-15 17:21:42 +000019900 case 0x89: { /* MOV Gv,Ev */
19901 /* We let dis_mov_G_E decide whether F3(XRELEASE) is allowable. */
19902 Bool ok = True;
19903 delta = dis_mov_G_E(vbi, pfx, sz, delta, &ok);
19904 if (!ok) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000019905 return delta;
sewardj38b1d692013-10-15 17:21:42 +000019906 }
sewardj80611e32012-01-20 13:07:24 +000019907
19908 case 0x8A: /* MOV Eb,Gb */
19909 if (haveF2orF3(pfx)) goto decode_failure;
19910 delta = dis_mov_E_G(vbi, pfx, 1, delta);
19911 return delta;
19912
19913 case 0x8B: /* MOV Ev,Gv */
19914 if (haveF2orF3(pfx)) goto decode_failure;
19915 delta = dis_mov_E_G(vbi, pfx, sz, delta);
19916 return delta;
19917
19918 case 0x8D: /* LEA M,Gv */
19919 if (haveF2orF3(pfx)) goto decode_failure;
19920 if (sz != 4 && sz != 8)
19921 goto decode_failure;
19922 modrm = getUChar(delta);
19923 if (epartIsReg(modrm))
19924 goto decode_failure;
19925 /* NOTE! this is the one place where a segment override prefix
19926 has no effect on the address calculation. Therefore we clear
19927 any segment override bits in pfx. */
19928 addr = disAMode ( &alen, vbi, clearSegBits(pfx), delta, dis_buf, 0 );
19929 delta += alen;
19930 /* This is a hack. But it isn't clear that really doing the
19931 calculation at 32 bits is really worth it. Hence for leal,
19932 do the full 64-bit calculation and then truncate it. */
19933 putIRegG( sz, pfx, modrm,
19934 sz == 4
19935 ? unop(Iop_64to32, mkexpr(addr))
19936 : mkexpr(addr)
19937 );
19938 DIP("lea%c %s, %s\n", nameISize(sz), dis_buf,
19939 nameIRegG(sz,pfx,modrm));
19940 return delta;
19941
19942 case 0x8F: { /* POPQ m64 / POPW m16 */
19943 Int len;
19944 UChar rm;
19945 /* There is no encoding for 32-bit pop in 64-bit mode.
19946 So sz==4 actually means sz==8. */
19947 if (haveF2orF3(pfx)) goto decode_failure;
19948 vassert(sz == 2 || sz == 4
19949 || /* tolerate redundant REX.W, see #210481 */ sz == 8);
19950 if (sz == 4) sz = 8;
19951 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
19952
19953 rm = getUChar(delta);
19954
19955 /* make sure this instruction is correct POP */
19956 if (epartIsReg(rm) || gregLO3ofRM(rm) != 0)
19957 goto decode_failure;
19958 /* and has correct size */
19959 vassert(sz == 8);
19960
19961 t1 = newTemp(Ity_I64);
19962 t3 = newTemp(Ity_I64);
19963 assign( t1, getIReg64(R_RSP) );
19964 assign( t3, loadLE(Ity_I64, mkexpr(t1)) );
19965
19966 /* Increase RSP; must be done before the STORE. Intel manual
19967 says: If the RSP register is used as a base register for
19968 addressing a destination operand in memory, the POP
19969 instruction computes the effective address of the operand
19970 after it increments the RSP register. */
19971 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(sz)) );
19972
19973 addr = disAMode ( &len, vbi, pfx, delta, dis_buf, 0 );
19974 storeLE( mkexpr(addr), mkexpr(t3) );
19975
19976 DIP("popl %s\n", dis_buf);
19977
19978 delta += len;
19979 return delta;
19980 }
19981
19982 case 0x90: /* XCHG eAX,eAX */
19983 /* detect and handle F3 90 (rep nop) specially */
19984 if (!have66(pfx) && !haveF2(pfx) && haveF3(pfx)) {
19985 DIP("rep nop (P4 pause)\n");
19986 /* "observe" the hint. The Vex client needs to be careful not
19987 to cause very long delays as a result, though. */
sewardjc6f970f2012-04-02 21:54:49 +000019988 jmp_lit(dres, Ijk_Yield, guest_RIP_bbstart+delta);
19989 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000019990 return delta;
19991 }
19992 /* detect and handle NOPs specially */
19993 if (/* F2/F3 probably change meaning completely */
19994 !haveF2orF3(pfx)
19995 /* If REX.B is 1, we're not exchanging rAX with itself */
19996 && getRexB(pfx)==0 ) {
19997 DIP("nop\n");
19998 return delta;
19999 }
20000 /* else fall through to normal case. */
20001 case 0x91: /* XCHG rAX,rCX */
20002 case 0x92: /* XCHG rAX,rDX */
20003 case 0x93: /* XCHG rAX,rBX */
20004 case 0x94: /* XCHG rAX,rSP */
20005 case 0x95: /* XCHG rAX,rBP */
20006 case 0x96: /* XCHG rAX,rSI */
20007 case 0x97: /* XCHG rAX,rDI */
20008 /* guard against mutancy */
20009 if (haveF2orF3(pfx)) goto decode_failure;
20010 codegen_xchg_rAX_Reg ( pfx, sz, opc - 0x90 );
20011 return delta;
20012
20013 case 0x98: /* CBW */
20014 if (haveF2orF3(pfx)) goto decode_failure;
20015 if (sz == 8) {
20016 putIRegRAX( 8, unop(Iop_32Sto64, getIRegRAX(4)) );
20017 DIP(/*"cdqe\n"*/"cltq");
20018 return delta;
20019 }
20020 if (sz == 4) {
20021 putIRegRAX( 4, unop(Iop_16Sto32, getIRegRAX(2)) );
20022 DIP("cwtl\n");
20023 return delta;
20024 }
20025 if (sz == 2) {
20026 putIRegRAX( 2, unop(Iop_8Sto16, getIRegRAX(1)) );
20027 DIP("cbw\n");
20028 return delta;
20029 }
20030 goto decode_failure;
20031
20032 case 0x99: /* CWD/CDQ/CQO */
20033 if (haveF2orF3(pfx)) goto decode_failure;
20034 vassert(sz == 2 || sz == 4 || sz == 8);
20035 ty = szToITy(sz);
20036 putIRegRDX( sz,
20037 binop(mkSizedOp(ty,Iop_Sar8),
20038 getIRegRAX(sz),
20039 mkU8(sz == 2 ? 15 : (sz == 4 ? 31 : 63))) );
20040 DIP(sz == 2 ? "cwd\n"
20041 : (sz == 4 ? /*"cdq\n"*/ "cltd\n"
20042 : "cqo\n"));
20043 return delta;
20044
20045 case 0x9B: /* FWAIT (X87 insn) */
20046 /* ignore? */
20047 DIP("fwait\n");
20048 return delta;
20049
20050 case 0x9C: /* PUSHF */ {
20051 /* Note. There is no encoding for a 32-bit pushf in 64-bit
20052 mode. So sz==4 actually means sz==8. */
20053 /* 24 July 06: has also been seen with a redundant REX prefix,
20054 so must also allow sz==8. */
20055 if (haveF2orF3(pfx)) goto decode_failure;
20056 vassert(sz == 2 || sz == 4 || sz == 8);
20057 if (sz == 4) sz = 8;
20058 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20059
20060 t1 = newTemp(Ity_I64);
20061 assign( t1, binop(Iop_Sub64,getIReg64(R_RSP),mkU64(sz)) );
20062 putIReg64(R_RSP, mkexpr(t1) );
20063
20064 t2 = newTemp(Ity_I64);
20065 assign( t2, mk_amd64g_calculate_rflags_all() );
20066
20067 /* Patch in the D flag. This can simply be a copy of bit 10 of
20068 baseBlock[OFFB_DFLAG]. */
20069 t3 = newTemp(Ity_I64);
20070 assign( t3, binop(Iop_Or64,
20071 mkexpr(t2),
20072 binop(Iop_And64,
20073 IRExpr_Get(OFFB_DFLAG,Ity_I64),
20074 mkU64(1<<10)))
20075 );
20076
20077 /* And patch in the ID flag. */
20078 t4 = newTemp(Ity_I64);
20079 assign( t4, binop(Iop_Or64,
20080 mkexpr(t3),
20081 binop(Iop_And64,
20082 binop(Iop_Shl64, IRExpr_Get(OFFB_IDFLAG,Ity_I64),
20083 mkU8(21)),
20084 mkU64(1<<21)))
20085 );
20086
20087 /* And patch in the AC flag too. */
20088 t5 = newTemp(Ity_I64);
20089 assign( t5, binop(Iop_Or64,
20090 mkexpr(t4),
20091 binop(Iop_And64,
20092 binop(Iop_Shl64, IRExpr_Get(OFFB_ACFLAG,Ity_I64),
20093 mkU8(18)),
20094 mkU64(1<<18)))
20095 );
20096
20097 /* if sz==2, the stored value needs to be narrowed. */
20098 if (sz == 2)
20099 storeLE( mkexpr(t1), unop(Iop_32to16,
20100 unop(Iop_64to32,mkexpr(t5))) );
20101 else
20102 storeLE( mkexpr(t1), mkexpr(t5) );
20103
20104 DIP("pushf%c\n", nameISize(sz));
20105 return delta;
20106 }
20107
20108 case 0x9D: /* POPF */
20109 /* Note. There is no encoding for a 32-bit popf in 64-bit mode.
20110 So sz==4 actually means sz==8. */
20111 if (haveF2orF3(pfx)) goto decode_failure;
20112 vassert(sz == 2 || sz == 4);
20113 if (sz == 4) sz = 8;
20114 if (sz != 8) goto decode_failure; // until we know a sz==2 test case exists
20115 t1 = newTemp(Ity_I64); t2 = newTemp(Ity_I64);
20116 assign(t2, getIReg64(R_RSP));
20117 assign(t1, widenUto64(loadLE(szToITy(sz),mkexpr(t2))));
20118 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t2), mkU64(sz)));
20119 /* t1 is the flag word. Mask out everything except OSZACP and
20120 set the flags thunk to AMD64G_CC_OP_COPY. */
20121 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
20122 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
20123 stmt( IRStmt_Put( OFFB_CC_DEP1,
20124 binop(Iop_And64,
20125 mkexpr(t1),
20126 mkU64( AMD64G_CC_MASK_C | AMD64G_CC_MASK_P
20127 | AMD64G_CC_MASK_A | AMD64G_CC_MASK_Z
20128 | AMD64G_CC_MASK_S| AMD64G_CC_MASK_O )
20129 )
20130 )
20131 );
20132
20133 /* Also need to set the D flag, which is held in bit 10 of t1.
20134 If zero, put 1 in OFFB_DFLAG, else -1 in OFFB_DFLAG. */
20135 stmt( IRStmt_Put(
20136 OFFB_DFLAG,
florian99dd03e2013-01-29 03:56:06 +000020137 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +000020138 unop(Iop_64to1,
sewardj80611e32012-01-20 13:07:24 +000020139 binop(Iop_And64,
20140 binop(Iop_Shr64, mkexpr(t1), mkU8(10)),
sewardj009230b2013-01-26 11:47:55 +000020141 mkU64(1))),
florian99dd03e2013-01-29 03:56:06 +000020142 mkU64(0xFFFFFFFFFFFFFFFFULL),
20143 mkU64(1)))
sewardj80611e32012-01-20 13:07:24 +000020144 );
20145
20146 /* And set the ID flag */
20147 stmt( IRStmt_Put(
20148 OFFB_IDFLAG,
florian99dd03e2013-01-29 03:56:06 +000020149 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +000020150 unop(Iop_64to1,
sewardj80611e32012-01-20 13:07:24 +000020151 binop(Iop_And64,
20152 binop(Iop_Shr64, mkexpr(t1), mkU8(21)),
sewardj009230b2013-01-26 11:47:55 +000020153 mkU64(1))),
florian99dd03e2013-01-29 03:56:06 +000020154 mkU64(1),
20155 mkU64(0)))
sewardj80611e32012-01-20 13:07:24 +000020156 );
20157
20158 /* And set the AC flag too */
20159 stmt( IRStmt_Put(
20160 OFFB_ACFLAG,
florian99dd03e2013-01-29 03:56:06 +000020161 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +000020162 unop(Iop_64to1,
sewardj80611e32012-01-20 13:07:24 +000020163 binop(Iop_And64,
20164 binop(Iop_Shr64, mkexpr(t1), mkU8(18)),
sewardj009230b2013-01-26 11:47:55 +000020165 mkU64(1))),
florian99dd03e2013-01-29 03:56:06 +000020166 mkU64(1),
20167 mkU64(0)))
sewardj80611e32012-01-20 13:07:24 +000020168 );
20169
20170 DIP("popf%c\n", nameISize(sz));
20171 return delta;
20172
20173 case 0x9E: /* SAHF */
20174 codegen_SAHF();
20175 DIP("sahf\n");
20176 return delta;
20177
20178 case 0x9F: /* LAHF */
20179 codegen_LAHF();
20180 DIP("lahf\n");
20181 return delta;
20182
20183 case 0xA0: /* MOV Ob,AL */
20184 if (have66orF2orF3(pfx)) goto decode_failure;
20185 sz = 1;
20186 /* Fall through ... */
20187 case 0xA1: /* MOV Ov,eAX */
20188 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
20189 goto decode_failure;
20190 d64 = getDisp64(delta);
20191 delta += 8;
20192 ty = szToITy(sz);
20193 addr = newTemp(Ity_I64);
20194 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
20195 putIRegRAX(sz, loadLE( ty, mkexpr(addr) ));
20196 DIP("mov%c %s0x%llx, %s\n", nameISize(sz),
20197 segRegTxt(pfx), d64,
20198 nameIRegRAX(sz));
20199 return delta;
20200
20201 case 0xA2: /* MOV AL,Ob */
20202 if (have66orF2orF3(pfx)) goto decode_failure;
20203 sz = 1;
20204 /* Fall through ... */
20205 case 0xA3: /* MOV eAX,Ov */
20206 if (sz != 8 && sz != 4 && sz != 2 && sz != 1)
20207 goto decode_failure;
20208 d64 = getDisp64(delta);
20209 delta += 8;
20210 ty = szToITy(sz);
20211 addr = newTemp(Ity_I64);
20212 assign( addr, handleAddrOverrides(vbi, pfx, mkU64(d64)) );
20213 storeLE( mkexpr(addr), getIRegRAX(sz) );
20214 DIP("mov%c %s, %s0x%llx\n", nameISize(sz), nameIRegRAX(sz),
20215 segRegTxt(pfx), d64);
20216 return delta;
20217
20218 case 0xA4:
20219 case 0xA5:
20220 /* F3 A4: rep movsb */
20221 if (haveF3(pfx) && !haveF2(pfx)) {
20222 if (opc == 0xA4)
20223 sz = 1;
sewardjc6f970f2012-04-02 21:54:49 +000020224 dis_REP_op ( dres, AMD64CondAlways, dis_MOVS, sz,
sewardj80611e32012-01-20 13:07:24 +000020225 guest_RIP_curr_instr,
20226 guest_RIP_bbstart+delta, "rep movs", pfx );
20227 dres->whatNext = Dis_StopHere;
20228 return delta;
20229 }
20230 /* A4: movsb */
20231 if (!haveF3(pfx) && !haveF2(pfx)) {
20232 if (opc == 0xA4)
20233 sz = 1;
20234 dis_string_op( dis_MOVS, sz, "movs", pfx );
20235 return delta;
20236 }
20237 goto decode_failure;
20238
20239 case 0xA6:
20240 case 0xA7:
20241 /* F3 A6/A7: repe cmps/rep cmps{w,l,q} */
20242 if (haveF3(pfx) && !haveF2(pfx)) {
20243 if (opc == 0xA6)
20244 sz = 1;
sewardjc6f970f2012-04-02 21:54:49 +000020245 dis_REP_op ( dres, AMD64CondZ, dis_CMPS, sz,
sewardj80611e32012-01-20 13:07:24 +000020246 guest_RIP_curr_instr,
20247 guest_RIP_bbstart+delta, "repe cmps", pfx );
20248 dres->whatNext = Dis_StopHere;
20249 return delta;
20250 }
20251 goto decode_failure;
20252
20253 case 0xAA:
20254 case 0xAB:
20255 /* F3 AA/AB: rep stosb/rep stos{w,l,q} */
20256 if (haveF3(pfx) && !haveF2(pfx)) {
20257 if (opc == 0xAA)
20258 sz = 1;
sewardjc6f970f2012-04-02 21:54:49 +000020259 dis_REP_op ( dres, AMD64CondAlways, dis_STOS, sz,
sewardj80611e32012-01-20 13:07:24 +000020260 guest_RIP_curr_instr,
20261 guest_RIP_bbstart+delta, "rep stos", pfx );
sewardjc6f970f2012-04-02 21:54:49 +000020262 vassert(dres->whatNext == Dis_StopHere);
20263 return delta;
sewardj80611e32012-01-20 13:07:24 +000020264 }
20265 /* AA/AB: stosb/stos{w,l,q} */
20266 if (!haveF3(pfx) && !haveF2(pfx)) {
20267 if (opc == 0xAA)
20268 sz = 1;
20269 dis_string_op( dis_STOS, sz, "stos", pfx );
20270 return delta;
20271 }
20272 goto decode_failure;
20273
20274 case 0xA8: /* TEST Ib, AL */
20275 if (haveF2orF3(pfx)) goto decode_failure;
20276 delta = dis_op_imm_A( 1, False, Iop_And8, False, delta, "test" );
20277 return delta;
20278 case 0xA9: /* TEST Iv, eAX */
20279 if (haveF2orF3(pfx)) goto decode_failure;
20280 delta = dis_op_imm_A( sz, False, Iop_And8, False, delta, "test" );
20281 return delta;
20282
20283 case 0xAC: /* LODS, no REP prefix */
20284 case 0xAD:
20285 dis_string_op( dis_LODS, ( opc == 0xAC ? 1 : sz ), "lods", pfx );
20286 return delta;
20287
20288 case 0xAE:
20289 case 0xAF:
20290 /* F2 AE/AF: repne scasb/repne scas{w,l,q} */
20291 if (haveF2(pfx) && !haveF3(pfx)) {
20292 if (opc == 0xAE)
20293 sz = 1;
sewardjc6f970f2012-04-02 21:54:49 +000020294 dis_REP_op ( dres, AMD64CondNZ, dis_SCAS, sz,
sewardj80611e32012-01-20 13:07:24 +000020295 guest_RIP_curr_instr,
20296 guest_RIP_bbstart+delta, "repne scas", pfx );
sewardjc6f970f2012-04-02 21:54:49 +000020297 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000020298 return delta;
20299 }
20300 /* F3 AE/AF: repe scasb/repe scas{w,l,q} */
20301 if (!haveF2(pfx) && haveF3(pfx)) {
20302 if (opc == 0xAE)
20303 sz = 1;
sewardjc6f970f2012-04-02 21:54:49 +000020304 dis_REP_op ( dres, AMD64CondZ, dis_SCAS, sz,
sewardj80611e32012-01-20 13:07:24 +000020305 guest_RIP_curr_instr,
20306 guest_RIP_bbstart+delta, "repe scas", pfx );
sewardjc6f970f2012-04-02 21:54:49 +000020307 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000020308 return delta;
20309 }
20310 /* AE/AF: scasb/scas{w,l,q} */
20311 if (!haveF2(pfx) && !haveF3(pfx)) {
20312 if (opc == 0xAE)
20313 sz = 1;
20314 dis_string_op( dis_SCAS, sz, "scas", pfx );
20315 return delta;
20316 }
20317 goto decode_failure;
20318
20319 /* XXXX be careful here with moves to AH/BH/CH/DH */
20320 case 0xB0: /* MOV imm,AL */
20321 case 0xB1: /* MOV imm,CL */
20322 case 0xB2: /* MOV imm,DL */
20323 case 0xB3: /* MOV imm,BL */
20324 case 0xB4: /* MOV imm,AH */
20325 case 0xB5: /* MOV imm,CH */
20326 case 0xB6: /* MOV imm,DH */
20327 case 0xB7: /* MOV imm,BH */
20328 if (haveF2orF3(pfx)) goto decode_failure;
20329 d64 = getUChar(delta);
20330 delta += 1;
20331 putIRegRexB(1, pfx, opc-0xB0, mkU8(d64));
20332 DIP("movb $%lld,%s\n", d64, nameIRegRexB(1,pfx,opc-0xB0));
20333 return delta;
20334
20335 case 0xB8: /* MOV imm,eAX */
20336 case 0xB9: /* MOV imm,eCX */
20337 case 0xBA: /* MOV imm,eDX */
20338 case 0xBB: /* MOV imm,eBX */
20339 case 0xBC: /* MOV imm,eSP */
20340 case 0xBD: /* MOV imm,eBP */
20341 case 0xBE: /* MOV imm,eSI */
20342 case 0xBF: /* MOV imm,eDI */
20343 /* This is the one-and-only place where 64-bit literals are
20344 allowed in the instruction stream. */
20345 if (haveF2orF3(pfx)) goto decode_failure;
20346 if (sz == 8) {
20347 d64 = getDisp64(delta);
20348 delta += 8;
20349 putIRegRexB(8, pfx, opc-0xB8, mkU64(d64));
20350 DIP("movabsq $%lld,%s\n", (Long)d64,
20351 nameIRegRexB(8,pfx,opc-0xB8));
20352 } else {
20353 d64 = getSDisp(imin(4,sz),delta);
20354 delta += imin(4,sz);
20355 putIRegRexB(sz, pfx, opc-0xB8,
20356 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20357 DIP("mov%c $%lld,%s\n", nameISize(sz),
20358 (Long)d64,
20359 nameIRegRexB(sz,pfx,opc-0xB8));
20360 }
20361 return delta;
20362
20363 case 0xC0: { /* Grp2 Ib,Eb */
20364 Bool decode_OK = True;
20365 if (haveF2orF3(pfx)) goto decode_failure;
20366 modrm = getUChar(delta);
20367 am_sz = lengthAMode(pfx,delta);
20368 d_sz = 1;
20369 d64 = getUChar(delta + am_sz);
20370 sz = 1;
20371 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20372 mkU8(d64 & 0xFF), NULL, &decode_OK );
20373 if (!decode_OK) goto decode_failure;
20374 return delta;
20375 }
20376
20377 case 0xC1: { /* Grp2 Ib,Ev */
20378 Bool decode_OK = True;
20379 if (haveF2orF3(pfx)) goto decode_failure;
20380 modrm = getUChar(delta);
20381 am_sz = lengthAMode(pfx,delta);
20382 d_sz = 1;
20383 d64 = getUChar(delta + am_sz);
20384 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20385 mkU8(d64 & 0xFF), NULL, &decode_OK );
20386 if (!decode_OK) goto decode_failure;
20387 return delta;
20388 }
20389
sewardjdb546602012-02-16 19:09:43 +000020390 case 0xC2: /* RET imm16 */
mjw67ac3fd2014-05-09 11:41:06 +000020391 if (have66orF3(pfx)) goto decode_failure;
20392 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardjdb546602012-02-16 19:09:43 +000020393 d64 = getUDisp16(delta);
20394 delta += 2;
sewardjc6f970f2012-04-02 21:54:49 +000020395 dis_ret(dres, vbi, d64);
sewardjdb546602012-02-16 19:09:43 +000020396 DIP("ret $%lld\n", d64);
20397 return delta;
20398
sewardj80611e32012-01-20 13:07:24 +000020399 case 0xC3: /* RET */
mjw67ac3fd2014-05-09 11:41:06 +000020400 if (have66(pfx)) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000020401 /* F3 is acceptable on AMD. */
mjw67ac3fd2014-05-09 11:41:06 +000020402 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardjc6f970f2012-04-02 21:54:49 +000020403 dis_ret(dres, vbi, 0);
sewardj80611e32012-01-20 13:07:24 +000020404 DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n");
20405 return delta;
20406
sewardj1bf44e32013-09-18 18:27:55 +000020407 case 0xC6: /* C6 /0 = MOV Ib,Eb */
sewardj80611e32012-01-20 13:07:24 +000020408 sz = 1;
sewardj1bf44e32013-09-18 18:27:55 +000020409 goto maybe_do_Mov_I_E;
20410 case 0xC7: /* C7 /0 = MOV Iv,Ev */
20411 goto maybe_do_Mov_I_E;
20412 maybe_do_Mov_I_E:
sewardj80611e32012-01-20 13:07:24 +000020413 modrm = getUChar(delta);
sewardj1bf44e32013-09-18 18:27:55 +000020414 if (gregLO3ofRM(modrm) == 0) {
sewardj1bf44e32013-09-18 18:27:55 +000020415 if (epartIsReg(modrm)) {
sewardj38b1d692013-10-15 17:21:42 +000020416 /* Neither F2 nor F3 are allowable. */
20417 if (haveF2orF3(pfx)) goto decode_failure;
sewardj1bf44e32013-09-18 18:27:55 +000020418 delta++; /* mod/rm byte */
20419 d64 = getSDisp(imin(4,sz),delta);
20420 delta += imin(4,sz);
20421 putIRegE(sz, pfx, modrm,
20422 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20423 DIP("mov%c $%lld, %s\n", nameISize(sz),
20424 (Long)d64,
20425 nameIRegE(sz,pfx,modrm));
20426 } else {
sewardj38b1d692013-10-15 17:21:42 +000020427 if (haveF2(pfx)) goto decode_failure;
20428 /* F3(XRELEASE) is allowable here */
sewardj1bf44e32013-09-18 18:27:55 +000020429 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf,
20430 /*xtra*/imin(4,sz) );
20431 delta += alen;
20432 d64 = getSDisp(imin(4,sz),delta);
20433 delta += imin(4,sz);
20434 storeLE(mkexpr(addr),
20435 mkU(szToITy(sz), d64 & mkSizeMask(sz)));
20436 DIP("mov%c $%lld, %s\n", nameISize(sz), (Long)d64, dis_buf);
20437 }
20438 return delta;
sewardj80611e32012-01-20 13:07:24 +000020439 }
sewardja56f3692013-09-18 19:41:43 +000020440 /* BEGIN HACKY SUPPORT FOR xbegin */
mjwd5453bf2013-12-09 12:54:06 +000020441 if (opc == 0xC7 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 4
mjw9d690c62013-10-04 22:31:48 +000020442 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
sewardja56f3692013-09-18 19:41:43 +000020443 delta++; /* mod/rm byte */
20444 d64 = getSDisp(4,delta);
20445 delta += 4;
20446 guest_RIP_next_mustcheck = True;
20447 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
20448 Addr64 failAddr = guest_RIP_bbstart + delta + d64;
20449 /* EAX contains the failure status code. Bit 3 is "Set if an
20450 internal buffer overflowed", which seems like the
20451 least-bogus choice we can make here. */
20452 putIRegRAX(4, mkU32(1<<3));
20453 /* And jump to the fail address. */
20454 jmp_lit(dres, Ijk_Boring, failAddr);
20455 vassert(dres->whatNext == Dis_StopHere);
20456 DIP("xbeginq 0x%llx\n", failAddr);
20457 return delta;
20458 }
20459 /* END HACKY SUPPORT FOR xbegin */
mjwd5453bf2013-12-09 12:54:06 +000020460 /* BEGIN HACKY SUPPORT FOR xabort */
20461 if (opc == 0xC6 && modrm == 0xF8 && !have66orF2orF3(pfx) && sz == 1
20462 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
20463 delta++; /* mod/rm byte */
20464 abyte = getUChar(delta); delta++;
20465 /* There is never a real transaction in progress, so do nothing. */
20466 DIP("xabort $%d", (Int)abyte);
20467 return delta;
20468 }
20469 /* END HACKY SUPPORT FOR xabort */
sewardj1bf44e32013-09-18 18:27:55 +000020470 goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000020471
20472 case 0xC8: /* ENTER */
20473 /* Same comments re operand size as for LEAVE below apply.
20474 Also, only handles the case "enter $imm16, $0"; other cases
20475 for the second operand (nesting depth) are not handled. */
20476 if (sz != 4)
20477 goto decode_failure;
20478 d64 = getUDisp16(delta);
20479 delta += 2;
20480 vassert(d64 >= 0 && d64 <= 0xFFFF);
20481 if (getUChar(delta) != 0)
20482 goto decode_failure;
20483 delta++;
20484 /* Intel docs seem to suggest:
20485 push rbp
20486 temp = rsp
20487 rbp = temp
20488 rsp = rsp - imm16
20489 */
20490 t1 = newTemp(Ity_I64);
20491 assign(t1, getIReg64(R_RBP));
20492 t2 = newTemp(Ity_I64);
20493 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
20494 putIReg64(R_RSP, mkexpr(t2));
20495 storeLE(mkexpr(t2), mkexpr(t1));
20496 putIReg64(R_RBP, mkexpr(t2));
20497 if (d64 > 0) {
20498 putIReg64(R_RSP, binop(Iop_Sub64, mkexpr(t2), mkU64(d64)));
20499 }
20500 DIP("enter $%u, $0\n", (UInt)d64);
20501 return delta;
20502
20503 case 0xC9: /* LEAVE */
20504 /* In 64-bit mode this defaults to a 64-bit operand size. There
20505 is no way to encode a 32-bit variant. Hence sz==4 but we do
20506 it as if sz=8. */
20507 if (sz != 4)
20508 goto decode_failure;
20509 t1 = newTemp(Ity_I64);
20510 t2 = newTemp(Ity_I64);
20511 assign(t1, getIReg64(R_RBP));
20512 /* First PUT RSP looks redundant, but need it because RSP must
20513 always be up-to-date for Memcheck to work... */
20514 putIReg64(R_RSP, mkexpr(t1));
20515 assign(t2, loadLE(Ity_I64,mkexpr(t1)));
20516 putIReg64(R_RBP, mkexpr(t2));
20517 putIReg64(R_RSP, binop(Iop_Add64, mkexpr(t1), mkU64(8)) );
20518 DIP("leave\n");
20519 return delta;
20520
20521 case 0xCC: /* INT 3 */
sewardjc6f970f2012-04-02 21:54:49 +000020522 jmp_lit(dres, Ijk_SigTRAP, guest_RIP_bbstart + delta);
20523 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000020524 DIP("int $0x3\n");
20525 return delta;
20526
20527 case 0xD0: { /* Grp2 1,Eb */
20528 Bool decode_OK = True;
20529 if (haveF2orF3(pfx)) goto decode_failure;
20530 modrm = getUChar(delta);
20531 am_sz = lengthAMode(pfx,delta);
20532 d_sz = 0;
20533 d64 = 1;
20534 sz = 1;
20535 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20536 mkU8(d64), NULL, &decode_OK );
20537 if (!decode_OK) goto decode_failure;
20538 return delta;
20539 }
20540
20541 case 0xD1: { /* Grp2 1,Ev */
20542 Bool decode_OK = True;
20543 if (haveF2orF3(pfx)) goto decode_failure;
20544 modrm = getUChar(delta);
20545 am_sz = lengthAMode(pfx,delta);
20546 d_sz = 0;
20547 d64 = 1;
20548 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20549 mkU8(d64), NULL, &decode_OK );
20550 if (!decode_OK) goto decode_failure;
20551 return delta;
20552 }
20553
20554 case 0xD2: { /* Grp2 CL,Eb */
20555 Bool decode_OK = True;
20556 if (haveF2orF3(pfx)) goto decode_failure;
20557 modrm = getUChar(delta);
20558 am_sz = lengthAMode(pfx,delta);
20559 d_sz = 0;
20560 sz = 1;
20561 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20562 getIRegCL(), "%cl", &decode_OK );
20563 if (!decode_OK) goto decode_failure;
20564 return delta;
20565 }
20566
20567 case 0xD3: { /* Grp2 CL,Ev */
20568 Bool decode_OK = True;
20569 if (haveF2orF3(pfx)) goto decode_failure;
20570 modrm = getUChar(delta);
20571 am_sz = lengthAMode(pfx,delta);
20572 d_sz = 0;
20573 delta = dis_Grp2 ( vbi, pfx, delta, modrm, am_sz, d_sz, sz,
20574 getIRegCL(), "%cl", &decode_OK );
20575 if (!decode_OK) goto decode_failure;
20576 return delta;
20577 }
20578
20579 case 0xD8: /* X87 instructions */
20580 case 0xD9:
20581 case 0xDA:
20582 case 0xDB:
20583 case 0xDC:
20584 case 0xDD:
20585 case 0xDE:
20586 case 0xDF: {
20587 Bool redundantREXWok = False;
20588
20589 if (haveF2orF3(pfx))
20590 goto decode_failure;
20591
20592 /* kludge to tolerate redundant rex.w prefixes (should do this
20593 properly one day) */
20594 /* mono 1.1.18.1 produces 48 D9 FA, which is rex.w fsqrt */
20595 if ( (opc == 0xD9 && getUChar(delta+0) == 0xFA)/*fsqrt*/ )
20596 redundantREXWok = True;
20597
sewardj9ae42a72012-02-16 14:18:56 +000020598 Bool size_OK = False;
20599 if ( sz == 4 )
20600 size_OK = True;
20601 else if ( sz == 8 )
20602 size_OK = redundantREXWok;
20603 else if ( sz == 2 ) {
20604 int mod_rm = getUChar(delta+0);
20605 int reg = gregLO3ofRM(mod_rm);
20606 /* The HotSpot JVM uses these */
20607 if ( (opc == 0xDD) && (reg == 0 /* FLDL */ ||
20608 reg == 4 /* FNSAVE */ ||
20609 reg == 6 /* FRSTOR */ ) )
20610 size_OK = True;
sewardj80611e32012-01-20 13:07:24 +000020611 }
sewardj9ae42a72012-02-16 14:18:56 +000020612 /* AMD manual says 0x66 size override is ignored, except where
20613 it is meaningful */
20614 if (!size_OK)
20615 goto decode_failure;
20616
20617 Bool decode_OK = False;
20618 delta = dis_FPU ( &decode_OK, vbi, pfx, delta );
20619 if (!decode_OK)
20620 goto decode_failure;
20621
sewardj80611e32012-01-20 13:07:24 +000020622 return delta;
20623 }
20624
20625 case 0xE0: /* LOOPNE disp8: decrement count, jump if count != 0 && ZF==0 */
20626 case 0xE1: /* LOOPE disp8: decrement count, jump if count != 0 && ZF==1 */
20627 case 0xE2: /* LOOP disp8: decrement count, jump if count != 0 */
20628 { /* The docs say this uses rCX as a count depending on the
20629 address size override, not the operand one. */
20630 IRExpr* zbit = NULL;
20631 IRExpr* count = NULL;
20632 IRExpr* cond = NULL;
florian55085f82012-11-21 00:36:55 +000020633 const HChar* xtra = NULL;
sewardj80611e32012-01-20 13:07:24 +000020634
20635 if (have66orF2orF3(pfx) || 1==getRexW(pfx)) goto decode_failure;
20636 /* So at this point we've rejected any variants which appear to
20637 be governed by the usual operand-size modifiers. Hence only
20638 the address size prefix can have an effect. It changes the
20639 size from 64 (default) to 32. */
20640 d64 = guest_RIP_bbstart+delta+1 + getSDisp8(delta);
20641 delta++;
20642 if (haveASO(pfx)) {
20643 /* 64to32 of 64-bit get is merely a get-put improvement
20644 trick. */
20645 putIReg32(R_RCX, binop(Iop_Sub32,
20646 unop(Iop_64to32, getIReg64(R_RCX)),
20647 mkU32(1)));
20648 } else {
20649 putIReg64(R_RCX, binop(Iop_Sub64, getIReg64(R_RCX), mkU64(1)));
20650 }
20651
20652 /* This is correct, both for 32- and 64-bit versions. If we're
20653 doing a 32-bit dec and the result is zero then the default
20654 zero extension rule will cause the upper 32 bits to be zero
20655 too. Hence a 64-bit check against zero is OK. */
20656 count = getIReg64(R_RCX);
20657 cond = binop(Iop_CmpNE64, count, mkU64(0));
20658 switch (opc) {
20659 case 0xE2:
20660 xtra = "";
20661 break;
20662 case 0xE1:
20663 xtra = "e";
20664 zbit = mk_amd64g_calculate_condition( AMD64CondZ );
20665 cond = mkAnd1(cond, zbit);
20666 break;
20667 case 0xE0:
20668 xtra = "ne";
20669 zbit = mk_amd64g_calculate_condition( AMD64CondNZ );
20670 cond = mkAnd1(cond, zbit);
20671 break;
20672 default:
sewardjcc3d2192013-03-27 11:37:33 +000020673 vassert(0);
sewardj80611e32012-01-20 13:07:24 +000020674 }
sewardjc6f970f2012-04-02 21:54:49 +000020675 stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64), OFFB_RIP) );
sewardj80611e32012-01-20 13:07:24 +000020676
20677 DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", d64);
20678 return delta;
20679 }
20680
20681 case 0xE3:
20682 /* JRCXZ or JECXZ, depending address size override. */
20683 if (have66orF2orF3(pfx)) goto decode_failure;
20684 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
20685 delta++;
20686 if (haveASO(pfx)) {
20687 /* 32-bit */
20688 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
sewardjc6f970f2012-04-02 21:54:49 +000020689 unop(Iop_32Uto64, getIReg32(R_RCX)),
20690 mkU64(0)),
20691 Ijk_Boring,
20692 IRConst_U64(d64),
20693 OFFB_RIP
20694 ));
sewardj80611e32012-01-20 13:07:24 +000020695 DIP("jecxz 0x%llx\n", d64);
20696 } else {
20697 /* 64-bit */
20698 stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
20699 getIReg64(R_RCX),
20700 mkU64(0)),
sewardjc6f970f2012-04-02 21:54:49 +000020701 Ijk_Boring,
20702 IRConst_U64(d64),
20703 OFFB_RIP
20704 ));
sewardj80611e32012-01-20 13:07:24 +000020705 DIP("jrcxz 0x%llx\n", d64);
20706 }
20707 return delta;
20708
20709 case 0xE4: /* IN imm8, AL */
20710 sz = 1;
20711 t1 = newTemp(Ity_I64);
20712 abyte = getUChar(delta); delta++;
20713 assign(t1, mkU64( abyte & 0xFF ));
20714 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
20715 goto do_IN;
20716 case 0xE5: /* IN imm8, eAX */
20717 if (!(sz == 2 || sz == 4)) goto decode_failure;
20718 t1 = newTemp(Ity_I64);
20719 abyte = getUChar(delta); delta++;
20720 assign(t1, mkU64( abyte & 0xFF ));
20721 DIP("in%c $%d,%s\n", nameISize(sz), (Int)abyte, nameIRegRAX(sz));
20722 goto do_IN;
20723 case 0xEC: /* IN %DX, AL */
20724 sz = 1;
20725 t1 = newTemp(Ity_I64);
20726 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
20727 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
20728 nameIRegRAX(sz));
20729 goto do_IN;
20730 case 0xED: /* IN %DX, eAX */
20731 if (!(sz == 2 || sz == 4)) goto decode_failure;
20732 t1 = newTemp(Ity_I64);
20733 assign(t1, unop(Iop_16Uto64, getIRegRDX(2)));
20734 DIP("in%c %s,%s\n", nameISize(sz), nameIRegRDX(2),
20735 nameIRegRAX(sz));
20736 goto do_IN;
20737 do_IN: {
20738 /* At this point, sz indicates the width, and t1 is a 64-bit
20739 value giving port number. */
20740 IRDirty* d;
20741 if (haveF2orF3(pfx)) goto decode_failure;
20742 vassert(sz == 1 || sz == 2 || sz == 4);
20743 ty = szToITy(sz);
20744 t2 = newTemp(Ity_I64);
20745 d = unsafeIRDirty_1_N(
20746 t2,
20747 0/*regparms*/,
20748 "amd64g_dirtyhelper_IN",
20749 &amd64g_dirtyhelper_IN,
20750 mkIRExprVec_2( mkexpr(t1), mkU64(sz) )
20751 );
20752 /* do the call, dumping the result in t2. */
20753 stmt( IRStmt_Dirty(d) );
20754 putIRegRAX(sz, narrowTo( ty, mkexpr(t2) ) );
20755 return delta;
20756 }
20757
20758 case 0xE6: /* OUT AL, imm8 */
20759 sz = 1;
20760 t1 = newTemp(Ity_I64);
20761 abyte = getUChar(delta); delta++;
20762 assign( t1, mkU64( abyte & 0xFF ) );
20763 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
20764 goto do_OUT;
20765 case 0xE7: /* OUT eAX, imm8 */
20766 if (!(sz == 2 || sz == 4)) goto decode_failure;
20767 t1 = newTemp(Ity_I64);
20768 abyte = getUChar(delta); delta++;
20769 assign( t1, mkU64( abyte & 0xFF ) );
20770 DIP("out%c %s,$%d\n", nameISize(sz), nameIRegRAX(sz), (Int)abyte);
20771 goto do_OUT;
20772 case 0xEE: /* OUT AL, %DX */
20773 sz = 1;
20774 t1 = newTemp(Ity_I64);
20775 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
20776 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
20777 nameIRegRDX(2));
20778 goto do_OUT;
20779 case 0xEF: /* OUT eAX, %DX */
20780 if (!(sz == 2 || sz == 4)) goto decode_failure;
20781 t1 = newTemp(Ity_I64);
20782 assign( t1, unop(Iop_16Uto64, getIRegRDX(2)) );
20783 DIP("out%c %s,%s\n", nameISize(sz), nameIRegRAX(sz),
20784 nameIRegRDX(2));
20785 goto do_OUT;
20786 do_OUT: {
20787 /* At this point, sz indicates the width, and t1 is a 64-bit
20788 value giving port number. */
20789 IRDirty* d;
20790 if (haveF2orF3(pfx)) goto decode_failure;
20791 vassert(sz == 1 || sz == 2 || sz == 4);
20792 ty = szToITy(sz);
20793 d = unsafeIRDirty_0_N(
20794 0/*regparms*/,
20795 "amd64g_dirtyhelper_OUT",
20796 &amd64g_dirtyhelper_OUT,
20797 mkIRExprVec_3( mkexpr(t1),
20798 widenUto64( getIRegRAX(sz) ),
20799 mkU64(sz) )
20800 );
20801 stmt( IRStmt_Dirty(d) );
20802 return delta;
20803 }
20804
20805 case 0xE8: /* CALL J4 */
mjw67ac3fd2014-05-09 11:41:06 +000020806 if (haveF3(pfx)) goto decode_failure;
20807 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj80611e32012-01-20 13:07:24 +000020808 d64 = getSDisp32(delta); delta += 4;
20809 d64 += (guest_RIP_bbstart+delta);
20810 /* (guest_RIP_bbstart+delta) == return-to addr, d64 == call-to addr */
20811 t1 = newTemp(Ity_I64);
20812 assign(t1, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
20813 putIReg64(R_RSP, mkexpr(t1));
20814 storeLE( mkexpr(t1), mkU64(guest_RIP_bbstart+delta));
20815 t2 = newTemp(Ity_I64);
20816 assign(t2, mkU64((Addr64)d64));
20817 make_redzone_AbiHint(vbi, t1, t2/*nia*/, "call-d32");
20818 if (resteerOkFn( callback_opaque, (Addr64)d64) ) {
20819 /* follow into the call target. */
20820 dres->whatNext = Dis_ResteerU;
20821 dres->continueAt = d64;
20822 } else {
sewardjc6f970f2012-04-02 21:54:49 +000020823 jmp_lit(dres, Ijk_Call, d64);
20824 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000020825 }
20826 DIP("call 0x%llx\n",d64);
20827 return delta;
20828
20829 case 0xE9: /* Jv (jump, 16/32 offset) */
mjw67ac3fd2014-05-09 11:41:06 +000020830 if (haveF3(pfx)) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000020831 if (sz != 4)
20832 goto decode_failure; /* JRS added 2004 July 11 */
mjw67ac3fd2014-05-09 11:41:06 +000020833 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj80611e32012-01-20 13:07:24 +000020834 d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta);
20835 delta += sz;
20836 if (resteerOkFn(callback_opaque,d64)) {
20837 dres->whatNext = Dis_ResteerU;
20838 dres->continueAt = d64;
20839 } else {
sewardjc6f970f2012-04-02 21:54:49 +000020840 jmp_lit(dres, Ijk_Boring, d64);
20841 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000020842 }
20843 DIP("jmp 0x%llx\n", d64);
20844 return delta;
20845
20846 case 0xEB: /* Jb (jump, byte offset) */
mjw67ac3fd2014-05-09 11:41:06 +000020847 if (haveF3(pfx)) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000020848 if (sz != 4)
20849 goto decode_failure; /* JRS added 2004 July 11 */
mjw67ac3fd2014-05-09 11:41:06 +000020850 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj80611e32012-01-20 13:07:24 +000020851 d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta);
20852 delta++;
20853 if (resteerOkFn(callback_opaque,d64)) {
20854 dres->whatNext = Dis_ResteerU;
20855 dres->continueAt = d64;
20856 } else {
sewardjc6f970f2012-04-02 21:54:49 +000020857 jmp_lit(dres, Ijk_Boring, d64);
20858 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000020859 }
20860 DIP("jmp-8 0x%llx\n", d64);
20861 return delta;
20862
20863 case 0xF5: /* CMC */
20864 case 0xF8: /* CLC */
20865 case 0xF9: /* STC */
20866 t1 = newTemp(Ity_I64);
20867 t2 = newTemp(Ity_I64);
20868 assign( t1, mk_amd64g_calculate_rflags_all() );
20869 switch (opc) {
20870 case 0xF5:
20871 assign( t2, binop(Iop_Xor64, mkexpr(t1),
20872 mkU64(AMD64G_CC_MASK_C)));
20873 DIP("cmc\n");
20874 break;
20875 case 0xF8:
20876 assign( t2, binop(Iop_And64, mkexpr(t1),
20877 mkU64(~AMD64G_CC_MASK_C)));
20878 DIP("clc\n");
20879 break;
20880 case 0xF9:
20881 assign( t2, binop(Iop_Or64, mkexpr(t1),
20882 mkU64(AMD64G_CC_MASK_C)));
20883 DIP("stc\n");
20884 break;
20885 default:
20886 vpanic("disInstr(x64)(cmc/clc/stc)");
20887 }
20888 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
20889 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
20890 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t2) ));
20891 /* Set NDEP even though it isn't used. This makes redundant-PUT
20892 elimination of previous stores to this field work better. */
20893 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
20894 return delta;
20895
20896 case 0xF6: { /* Grp3 Eb */
20897 Bool decode_OK = True;
sewardj38b1d692013-10-15 17:21:42 +000020898 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
20899 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
sewardj80611e32012-01-20 13:07:24 +000020900 delta = dis_Grp3 ( vbi, pfx, 1, delta, &decode_OK );
20901 if (!decode_OK) goto decode_failure;
20902 return delta;
20903 }
20904
20905 case 0xF7: { /* Grp3 Ev */
20906 Bool decode_OK = True;
sewardj38b1d692013-10-15 17:21:42 +000020907 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
20908 /* We now let dis_Grp3 itself decide if F2 and/or F3 are valid */
sewardj80611e32012-01-20 13:07:24 +000020909 delta = dis_Grp3 ( vbi, pfx, sz, delta, &decode_OK );
20910 if (!decode_OK) goto decode_failure;
20911 return delta;
20912 }
20913
20914 case 0xFC: /* CLD */
20915 if (haveF2orF3(pfx)) goto decode_failure;
20916 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(1)) );
20917 DIP("cld\n");
20918 return delta;
20919
20920 case 0xFD: /* STD */
20921 if (haveF2orF3(pfx)) goto decode_failure;
20922 stmt( IRStmt_Put( OFFB_DFLAG, mkU64(-1ULL)) );
20923 DIP("std\n");
20924 return delta;
20925
20926 case 0xFE: { /* Grp4 Eb */
20927 Bool decode_OK = True;
sewardj38b1d692013-10-15 17:21:42 +000020928 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
20929 /* We now let dis_Grp4 itself decide if F2 and/or F3 are valid */
sewardj80611e32012-01-20 13:07:24 +000020930 delta = dis_Grp4 ( vbi, pfx, delta, &decode_OK );
20931 if (!decode_OK) goto decode_failure;
20932 return delta;
20933 }
20934
20935 case 0xFF: { /* Grp5 Ev */
20936 Bool decode_OK = True;
sewardj38b1d692013-10-15 17:21:42 +000020937 /* RM'd: if (haveF2orF3(pfx)) goto decode_failure; */
20938 /* We now let dis_Grp5 itself decide if F2 and/or F3 are valid */
sewardj80611e32012-01-20 13:07:24 +000020939 delta = dis_Grp5 ( vbi, pfx, sz, delta, dres, &decode_OK );
20940 if (!decode_OK) goto decode_failure;
20941 return delta;
20942 }
20943
20944 default:
20945 break;
20946
20947 }
20948
20949 decode_failure:
20950 return deltaIN; /* fail */
20951}
20952
20953
20954/*------------------------------------------------------------*/
20955/*--- ---*/
20956/*--- Top-level post-escape decoders: dis_ESC_0F ---*/
20957/*--- ---*/
20958/*------------------------------------------------------------*/
20959
sewardjf85e1772012-07-15 10:11:10 +000020960static IRTemp math_BSWAP ( IRTemp t1, IRType ty )
20961{
20962 IRTemp t2 = newTemp(ty);
20963 if (ty == Ity_I64) {
20964 IRTemp m8 = newTemp(Ity_I64);
20965 IRTemp s8 = newTemp(Ity_I64);
20966 IRTemp m16 = newTemp(Ity_I64);
20967 IRTemp s16 = newTemp(Ity_I64);
20968 IRTemp m32 = newTemp(Ity_I64);
20969 assign( m8, mkU64(0xFF00FF00FF00FF00ULL) );
20970 assign( s8,
20971 binop(Iop_Or64,
20972 binop(Iop_Shr64,
20973 binop(Iop_And64,mkexpr(t1),mkexpr(m8)),
20974 mkU8(8)),
20975 binop(Iop_And64,
20976 binop(Iop_Shl64,mkexpr(t1),mkU8(8)),
20977 mkexpr(m8))
20978 )
20979 );
20980
20981 assign( m16, mkU64(0xFFFF0000FFFF0000ULL) );
20982 assign( s16,
20983 binop(Iop_Or64,
20984 binop(Iop_Shr64,
20985 binop(Iop_And64,mkexpr(s8),mkexpr(m16)),
20986 mkU8(16)),
20987 binop(Iop_And64,
20988 binop(Iop_Shl64,mkexpr(s8),mkU8(16)),
20989 mkexpr(m16))
20990 )
20991 );
20992
20993 assign( m32, mkU64(0xFFFFFFFF00000000ULL) );
20994 assign( t2,
20995 binop(Iop_Or64,
20996 binop(Iop_Shr64,
20997 binop(Iop_And64,mkexpr(s16),mkexpr(m32)),
20998 mkU8(32)),
20999 binop(Iop_And64,
21000 binop(Iop_Shl64,mkexpr(s16),mkU8(32)),
21001 mkexpr(m32))
21002 )
21003 );
21004 return t2;
21005 }
21006 if (ty == Ity_I32) {
21007 assign( t2,
21008 binop(
21009 Iop_Or32,
21010 binop(Iop_Shl32, mkexpr(t1), mkU8(24)),
21011 binop(
21012 Iop_Or32,
21013 binop(Iop_And32, binop(Iop_Shl32, mkexpr(t1), mkU8(8)),
21014 mkU32(0x00FF0000)),
21015 binop(Iop_Or32,
21016 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(8)),
21017 mkU32(0x0000FF00)),
21018 binop(Iop_And32, binop(Iop_Shr32, mkexpr(t1), mkU8(24)),
21019 mkU32(0x000000FF) )
21020 )))
21021 );
21022 return t2;
21023 }
21024 if (ty == Ity_I16) {
21025 assign(t2,
21026 binop(Iop_Or16,
21027 binop(Iop_Shl16, mkexpr(t1), mkU8(8)),
21028 binop(Iop_Shr16, mkexpr(t1), mkU8(8)) ));
21029 return t2;
21030 }
21031 vassert(0);
21032 /*NOTREACHED*/
21033 return IRTemp_INVALID;
21034}
21035
21036
sewardj80611e32012-01-20 13:07:24 +000021037__attribute__((noinline))
21038static
21039Long dis_ESC_0F (
21040 /*MB_OUT*/DisResult* dres,
21041 /*MB_OUT*/Bool* expect_CAS,
21042 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
21043 Bool resteerCisOk,
21044 void* callback_opaque,
21045 VexArchInfo* archinfo,
21046 VexAbiInfo* vbi,
21047 Prefix pfx, Int sz, Long deltaIN
21048 )
21049{
21050 Long d64 = 0;
21051 IRTemp addr = IRTemp_INVALID;
21052 IRTemp t1 = IRTemp_INVALID;
21053 IRTemp t2 = IRTemp_INVALID;
21054 UChar modrm = 0;
21055 Int am_sz = 0;
21056 Int alen = 0;
21057 HChar dis_buf[50];
21058
21059 /* In the first switch, look for ordinary integer insns. */
21060 Long delta = deltaIN;
21061 UChar opc = getUChar(delta);
21062 delta++;
21063 switch (opc) { /* first switch */
21064
sewardjfe0c5e72012-06-15 15:48:07 +000021065 case 0x01:
sewardj80611e32012-01-20 13:07:24 +000021066 {
sewardj80611e32012-01-20 13:07:24 +000021067 modrm = getUChar(delta);
sewardjfe0c5e72012-06-15 15:48:07 +000021068 /* 0F 01 /0 -- SGDT */
21069 /* 0F 01 /1 -- SIDT */
21070 if (!epartIsReg(modrm)
21071 && (gregLO3ofRM(modrm) == 0 || gregLO3ofRM(modrm) == 1)) {
21072 /* This is really revolting, but ... since each processor
21073 (core) only has one IDT and one GDT, just let the guest
21074 see it (pass-through semantics). I can't see any way to
21075 construct a faked-up value, so don't bother to try. */
21076 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21077 delta += alen;
21078 switch (gregLO3ofRM(modrm)) {
21079 case 0: DIP("sgdt %s\n", dis_buf); break;
21080 case 1: DIP("sidt %s\n", dis_buf); break;
21081 default: vassert(0); /*NOTREACHED*/
21082 }
21083 IRDirty* d = unsafeIRDirty_0_N (
21084 0/*regparms*/,
21085 "amd64g_dirtyhelper_SxDT",
21086 &amd64g_dirtyhelper_SxDT,
21087 mkIRExprVec_2( mkexpr(addr),
21088 mkU64(gregLO3ofRM(modrm)) )
21089 );
21090 /* declare we're writing memory */
21091 d->mFx = Ifx_Write;
21092 d->mAddr = mkexpr(addr);
21093 d->mSize = 6;
21094 stmt( IRStmt_Dirty(d) );
21095 return delta;
sewardj80611e32012-01-20 13:07:24 +000021096 }
sewardjfe0c5e72012-06-15 15:48:07 +000021097 /* 0F 01 D0 = XGETBV */
21098 if (modrm == 0xD0 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21099 delta += 1;
21100 DIP("xgetbv\n");
21101 /* Fault (SEGV) if ECX isn't zero. Intel docs say #GP and I
21102 am not sure if that translates in to SEGV or to something
21103 else, in user space. */
21104 t1 = newTemp(Ity_I32);
21105 assign( t1, getIReg32(R_RCX) );
21106 stmt( IRStmt_Exit(binop(Iop_CmpNE32, mkexpr(t1), mkU32(0)),
21107 Ijk_SigSEGV,
21108 IRConst_U64(guest_RIP_curr_instr),
21109 OFFB_RIP
21110 ));
21111 putIRegRAX(4, mkU32(7));
21112 putIRegRDX(4, mkU32(0));
21113 return delta;
21114 }
mjwe3fa0f82014-08-21 17:46:00 +000021115 /* BEGIN HACKY SUPPORT FOR xend */
21116 /* 0F 01 D5 = XEND */
21117 if (modrm == 0xD5 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
21118 /* We are never in an transaction (xbegin immediately aborts).
21119 So this just always generates a General Protection Fault. */
21120 delta += 1;
21121 jmp_lit(dres, Ijk_SigSEGV, guest_RIP_bbstart + delta);
21122 vassert(dres->whatNext == Dis_StopHere);
21123 DIP("xend\n");
21124 return delta;
21125 }
21126 /* END HACKY SUPPORT FOR xend */
sewardj9e4c3762013-09-27 15:03:58 +000021127 /* BEGIN HACKY SUPPORT FOR xtest */
21128 /* 0F 01 D6 = XTEST */
mjw9d690c62013-10-04 22:31:48 +000021129 if (modrm == 0xD6 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
sewardj9e4c3762013-09-27 15:03:58 +000021130 /* Sets ZF because there never is a transaction, and all
21131 CF, OF, SF, PF and AF are always cleared by xtest. */
21132 delta += 1;
21133 DIP("xtest\n");
21134 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21135 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21136 stmt( IRStmt_Put( OFFB_CC_DEP1, mkU64(AMD64G_CC_MASK_Z) ));
21137 /* Set NDEP even though it isn't used. This makes redundant-PUT
21138 elimination of previous stores to this field work better. */
21139 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21140 return delta;
21141 }
21142 /* END HACKY SUPPORT FOR xtest */
sewardj818c7302013-03-26 13:53:18 +000021143 /* 0F 01 F9 = RDTSCP */
21144 if (modrm == 0xF9 && (archinfo->hwcaps & VEX_HWCAPS_AMD64_RDTSCP)) {
21145 delta += 1;
21146 /* Uses dirty helper:
21147 void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* )
21148 declared to wr rax, rcx, rdx
21149 */
21150 const HChar* fName = "amd64g_dirtyhelper_RDTSCP";
21151 void* fAddr = &amd64g_dirtyhelper_RDTSCP;
21152 IRDirty* d
21153 = unsafeIRDirty_0_N ( 0/*regparms*/,
florian90419562013-08-15 20:54:52 +000021154 fName, fAddr, mkIRExprVec_1(IRExpr_BBPTR()) );
sewardj818c7302013-03-26 13:53:18 +000021155 /* declare guest state effects */
sewardj818c7302013-03-26 13:53:18 +000021156 d->nFxState = 3;
21157 vex_bzero(&d->fxState, sizeof(d->fxState));
21158 d->fxState[0].fx = Ifx_Write;
21159 d->fxState[0].offset = OFFB_RAX;
21160 d->fxState[0].size = 8;
21161 d->fxState[1].fx = Ifx_Write;
21162 d->fxState[1].offset = OFFB_RCX;
21163 d->fxState[1].size = 8;
21164 d->fxState[2].fx = Ifx_Write;
21165 d->fxState[2].offset = OFFB_RDX;
21166 d->fxState[2].size = 8;
21167 /* execute the dirty call, side-effecting guest state */
21168 stmt( IRStmt_Dirty(d) );
21169 /* RDTSCP is a serialising insn. So, just in case someone is
21170 using it as a memory fence ... */
21171 stmt( IRStmt_MBE(Imbe_Fence) );
21172 DIP("rdtscp\n");
21173 return delta;
21174 }
sewardjfe0c5e72012-06-15 15:48:07 +000021175 /* else decode failed */
21176 break;
sewardj80611e32012-01-20 13:07:24 +000021177 }
21178
21179 case 0x05: /* SYSCALL */
21180 guest_RIP_next_mustcheck = True;
21181 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
21182 putIReg64( R_RCX, mkU64(guest_RIP_next_assumed) );
21183 /* It's important that all guest state is up-to-date
21184 at this point. So we declare an end-of-block here, which
21185 forces any cached guest state to be flushed. */
sewardjc6f970f2012-04-02 21:54:49 +000021186 jmp_lit(dres, Ijk_Sys_syscall, guest_RIP_next_assumed);
21187 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000021188 DIP("syscall\n");
21189 return delta;
21190
sewardj47855302012-07-14 14:31:17 +000021191 case 0x0B: /* UD2 */
21192 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
21193 jmp_lit(dres, Ijk_NoDecode, guest_RIP_curr_instr);
21194 vassert(dres->whatNext == Dis_StopHere);
21195 DIP("ud2\n");
21196 return delta;
21197
sewardj944ff5a2012-08-23 19:47:05 +000021198 case 0x0D: /* 0F 0D /0 -- prefetch mem8 */
21199 /* 0F 0D /1 -- prefetchw mem8 */
21200 if (have66orF2orF3(pfx)) goto decode_failure;
21201 modrm = getUChar(delta);
21202 if (epartIsReg(modrm)) goto decode_failure;
21203 if (gregLO3ofRM(modrm) != 0 && gregLO3ofRM(modrm) != 1)
21204 goto decode_failure;
21205 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21206 delta += alen;
21207 switch (gregLO3ofRM(modrm)) {
21208 case 0: DIP("prefetch %s\n", dis_buf); break;
21209 case 1: DIP("prefetchw %s\n", dis_buf); break;
21210 default: vassert(0); /*NOTREACHED*/
21211 }
21212 return delta;
21213
sewardj80611e32012-01-20 13:07:24 +000021214 case 0x1F:
21215 if (haveF2orF3(pfx)) goto decode_failure;
21216 modrm = getUChar(delta);
21217 if (epartIsReg(modrm)) goto decode_failure;
21218 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21219 delta += alen;
21220 DIP("nop%c %s\n", nameISize(sz), dis_buf);
21221 return delta;
21222
21223 case 0x31: { /* RDTSC */
21224 IRTemp val = newTemp(Ity_I64);
21225 IRExpr** args = mkIRExprVec_0();
21226 IRDirty* d = unsafeIRDirty_1_N (
21227 val,
21228 0/*regparms*/,
21229 "amd64g_dirtyhelper_RDTSC",
21230 &amd64g_dirtyhelper_RDTSC,
21231 args
21232 );
21233 if (have66orF2orF3(pfx)) goto decode_failure;
21234 /* execute the dirty call, dumping the result in val. */
21235 stmt( IRStmt_Dirty(d) );
21236 putIRegRDX(4, unop(Iop_64HIto32, mkexpr(val)));
21237 putIRegRAX(4, unop(Iop_64to32, mkexpr(val)));
21238 DIP("rdtsc\n");
21239 return delta;
21240 }
21241
21242 case 0x40:
21243 case 0x41:
21244 case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */
21245 case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */
21246 case 0x44: /* CMOVZb/CMOVEb (cmov zero) */
21247 case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */
21248 case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */
21249 case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */
21250 case 0x48: /* CMOVSb (cmov negative) */
21251 case 0x49: /* CMOVSb (cmov not negative) */
21252 case 0x4A: /* CMOVP (cmov parity even) */
21253 case 0x4B: /* CMOVNP (cmov parity odd) */
21254 case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */
21255 case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */
21256 case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */
21257 case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */
21258 if (haveF2orF3(pfx)) goto decode_failure;
21259 delta = dis_cmov_E_G(vbi, pfx, sz, (AMD64Condcode)(opc - 0x40), delta);
21260 return delta;
21261
21262 case 0x80:
21263 case 0x81:
21264 case 0x82: /* JBb/JNAEb (jump below) */
21265 case 0x83: /* JNBb/JAEb (jump not below) */
21266 case 0x84: /* JZb/JEb (jump zero) */
21267 case 0x85: /* JNZb/JNEb (jump not zero) */
21268 case 0x86: /* JBEb/JNAb (jump below or equal) */
21269 case 0x87: /* JNBEb/JAb (jump not below or equal) */
21270 case 0x88: /* JSb (jump negative) */
21271 case 0x89: /* JSb (jump not negative) */
21272 case 0x8A: /* JP (jump parity even) */
21273 case 0x8B: /* JNP/JPO (jump parity odd) */
21274 case 0x8C: /* JLb/JNGEb (jump less) */
21275 case 0x8D: /* JGEb/JNLb (jump greater or equal) */
21276 case 0x8E: /* JLEb/JNGb (jump less or equal) */
21277 case 0x8F: { /* JGb/JNLEb (jump greater) */
21278 Long jmpDelta;
florian55085f82012-11-21 00:36:55 +000021279 const HChar* comment = "";
mjw67ac3fd2014-05-09 11:41:06 +000021280 if (haveF3(pfx)) goto decode_failure;
21281 if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */
sewardj80611e32012-01-20 13:07:24 +000021282 jmpDelta = getSDisp32(delta);
21283 d64 = (guest_RIP_bbstart+delta+4) + jmpDelta;
21284 delta += 4;
21285 if (resteerCisOk
21286 && vex_control.guest_chase_cond
21287 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
21288 && jmpDelta < 0
21289 && resteerOkFn( callback_opaque, d64) ) {
21290 /* Speculation: assume this backward branch is taken. So
21291 we need to emit a side-exit to the insn following this
21292 one, on the negation of the condition, and continue at
21293 the branch target address (d64). If we wind up back at
21294 the first instruction of the trace, just stop; it's
21295 better to let the IR loop unroller handle that case. */
21296 stmt( IRStmt_Exit(
21297 mk_amd64g_calculate_condition(
21298 (AMD64Condcode)(1 ^ (opc - 0x80))),
21299 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +000021300 IRConst_U64(guest_RIP_bbstart+delta),
21301 OFFB_RIP
21302 ));
sewardj80611e32012-01-20 13:07:24 +000021303 dres->whatNext = Dis_ResteerC;
21304 dres->continueAt = d64;
21305 comment = "(assumed taken)";
21306 }
21307 else
21308 if (resteerCisOk
21309 && vex_control.guest_chase_cond
21310 && (Addr64)d64 != (Addr64)guest_RIP_bbstart
21311 && jmpDelta >= 0
21312 && resteerOkFn( callback_opaque, guest_RIP_bbstart+delta ) ) {
21313 /* Speculation: assume this forward branch is not taken.
21314 So we need to emit a side-exit to d64 (the dest) and
21315 continue disassembling at the insn immediately
21316 following this one. */
21317 stmt( IRStmt_Exit(
21318 mk_amd64g_calculate_condition((AMD64Condcode)
21319 (opc - 0x80)),
21320 Ijk_Boring,
sewardjc6f970f2012-04-02 21:54:49 +000021321 IRConst_U64(d64),
21322 OFFB_RIP
21323 ));
sewardj80611e32012-01-20 13:07:24 +000021324 dres->whatNext = Dis_ResteerC;
21325 dres->continueAt = guest_RIP_bbstart+delta;
21326 comment = "(assumed not taken)";
21327 }
21328 else {
21329 /* Conservative default translation - end the block at
21330 this point. */
sewardjc6f970f2012-04-02 21:54:49 +000021331 jcc_01( dres, (AMD64Condcode)(opc - 0x80),
21332 guest_RIP_bbstart+delta, d64 );
21333 vassert(dres->whatNext == Dis_StopHere);
sewardj80611e32012-01-20 13:07:24 +000021334 }
21335 DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), d64, comment);
21336 return delta;
21337 }
21338
21339 case 0x90:
21340 case 0x91:
21341 case 0x92: /* set-Bb/set-NAEb (set if below) */
21342 case 0x93: /* set-NBb/set-AEb (set if not below) */
21343 case 0x94: /* set-Zb/set-Eb (set if zero) */
21344 case 0x95: /* set-NZb/set-NEb (set if not zero) */
21345 case 0x96: /* set-BEb/set-NAb (set if below or equal) */
21346 case 0x97: /* set-NBEb/set-Ab (set if not below or equal) */
21347 case 0x98: /* set-Sb (set if negative) */
21348 case 0x99: /* set-Sb (set if not negative) */
21349 case 0x9A: /* set-P (set if parity even) */
21350 case 0x9B: /* set-NP (set if parity odd) */
21351 case 0x9C: /* set-Lb/set-NGEb (set if less) */
21352 case 0x9D: /* set-GEb/set-NLb (set if greater or equal) */
21353 case 0x9E: /* set-LEb/set-NGb (set if less or equal) */
21354 case 0x9F: /* set-Gb/set-NLEb (set if greater) */
21355 if (haveF2orF3(pfx)) goto decode_failure;
21356 t1 = newTemp(Ity_I8);
21357 assign( t1, unop(Iop_1Uto8,mk_amd64g_calculate_condition(opc-0x90)) );
21358 modrm = getUChar(delta);
21359 if (epartIsReg(modrm)) {
21360 delta++;
21361 putIRegE(1, pfx, modrm, mkexpr(t1));
21362 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90),
21363 nameIRegE(1,pfx,modrm));
21364 } else {
21365 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21366 delta += alen;
21367 storeLE( mkexpr(addr), mkexpr(t1) );
21368 DIP("set%s %s\n", name_AMD64Condcode(opc-0x90), dis_buf);
21369 }
21370 return delta;
21371
mjw67ac3fd2014-05-09 11:41:06 +000021372 case 0x1A:
21373 case 0x1B: { /* Future MPX instructions, currently NOPs.
21374 BNDMK b, m F3 0F 1B
21375 BNDCL b, r/m F3 0F 1A
21376 BNDCU b, r/m F2 0F 1A
21377 BNDCN b, r/m F2 0F 1B
21378 BNDMOV b, b/m 66 0F 1A
21379 BNDMOV b/m, b 66 0F 1B
21380 BNDLDX b, mib 0F 1A
21381 BNDSTX mib, b 0F 1B */
21382
21383 /* All instructions have two operands. One operand is always the
21384 bnd register number (bnd0-bnd3, other register numbers are
21385 ignored when MPX isn't enabled, but should generate an
21386 exception if MPX is enabled) given by gregOfRexRM. The other
21387 operand is either a ModRM:reg, ModRM:r/m or a SIB encoded
21388 address, all of which can be decoded by using either
21389 eregOfRexRM or disAMode. */
21390
21391 modrm = getUChar(delta);
21392 int bnd = gregOfRexRM(pfx,modrm);
21393 const HChar *oper;
21394 if (epartIsReg(modrm)) {
21395 oper = nameIReg64 (eregOfRexRM(pfx,modrm));
21396 delta += 1;
21397 } else {
21398 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21399 delta += alen;
21400 oper = dis_buf;
21401 }
21402
21403 if (haveF3no66noF2 (pfx)) {
21404 if (opc == 0x1B) {
21405 DIP ("bndmk %s, %%bnd%d\n", oper, bnd);
21406 } else /* opc == 0x1A */ {
21407 DIP ("bndcl %s, %%bnd%d\n", oper, bnd);
21408 }
21409 } else if (haveF2no66noF3 (pfx)) {
21410 if (opc == 0x1A) {
21411 DIP ("bndcu %s, %%bnd%d\n", oper, bnd);
21412 } else /* opc == 0x1B */ {
21413 DIP ("bndcn %s, %%bnd%d\n", oper, bnd);
21414 }
21415 } else if (have66noF2noF3 (pfx)) {
21416 if (opc == 0x1A) {
21417 DIP ("bndmov %s, %%bnd%d\n", oper, bnd);
21418 } else /* opc == 0x1B */ {
21419 DIP ("bndmov %%bnd%d, %s\n", bnd, oper);
21420 }
21421 } else if (haveNo66noF2noF3 (pfx)) {
21422 if (opc == 0x1A) {
21423 DIP ("bndldx %s, %%bnd%d\n", oper, bnd);
21424 } else /* opc == 0x1B */ {
21425 DIP ("bndstx %%bnd%d, %s\n", bnd, oper);
21426 }
21427 } else goto decode_failure;
21428
21429 return delta;
21430 }
21431
sewardj80611e32012-01-20 13:07:24 +000021432 case 0xA2: { /* CPUID */
21433 /* Uses dirty helper:
21434 void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* )
21435 declared to mod rax, wr rbx, rcx, rdx
21436 */
sewardje3a10d72014-11-11 12:49:21 +000021437 IRDirty* d = NULL;
21438 const HChar* fName = NULL;
21439 void* fAddr = NULL;
21440
21441 /* JRS 2014-11-11: this a really horrible temp kludge to work
21442 around the fact that the Yosemite (OSX 10.10)
21443 /usr/lib/system/libdyld.dylib expects XSAVE/XRSTOR to be
21444 implemented, because amd64g_dirtyhelper_CPUID_avx_and_cx16
21445 claims they are supported, but so far they aren't. So cause
21446 it to fall back to a simpler CPU. The cleaner approach of
21447 setting CPUID(eax=1).OSXSAVE=0 and .XSAVE=0 isn't desirable
21448 since it will (per the official Intel guidelines) lead to
21449 software concluding that AVX isn't supported.
21450
21451 This is also a kludge in that putting these ifdefs here checks
21452 the build (host) architecture, when really we're checking the
21453 guest architecture. */
21454 Bool this_is_yosemite = False;
21455# if defined(VGP_amd64_darwin) && DARWIN_VERS == DARWIN_10_10
21456 this_is_yosemite = True;
21457# endif
21458
sewardj80611e32012-01-20 13:07:24 +000021459 if (haveF2orF3(pfx)) goto decode_failure;
mjwc31e6cb2013-09-13 13:30:45 +000021460 /* This isn't entirely correct, CPUID should depend on the VEX
21461 capabilities, not on the underlying CPU. See bug #324882. */
sewardje3a10d72014-11-11 12:49:21 +000021462 if (!this_is_yosemite &&
21463 (archinfo->hwcaps & VEX_HWCAPS_AMD64_SSE3) &&
mjwc31e6cb2013-09-13 13:30:45 +000021464 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16) &&
21465 (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX)) {
sewardjfe0c5e72012-06-15 15:48:07 +000021466 fName = "amd64g_dirtyhelper_CPUID_avx_and_cx16";
21467 fAddr = &amd64g_dirtyhelper_CPUID_avx_and_cx16;
21468 /* This is a Core-i5-2300-like machine */
21469 }
mjwc31e6cb2013-09-13 13:30:45 +000021470 else if ((archinfo->hwcaps & VEX_HWCAPS_AMD64_SSE3) &&
21471 (archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16)) {
sewardj80611e32012-01-20 13:07:24 +000021472 fName = "amd64g_dirtyhelper_CPUID_sse42_and_cx16";
21473 fAddr = &amd64g_dirtyhelper_CPUID_sse42_and_cx16;
sewardjfe0c5e72012-06-15 15:48:07 +000021474 /* This is a Core-i5-670-like machine */
sewardj80611e32012-01-20 13:07:24 +000021475 }
21476 else {
21477 /* Give a CPUID for at least a baseline machine, SSE2
21478 only, and no CX16 */
21479 fName = "amd64g_dirtyhelper_CPUID_baseline";
21480 fAddr = &amd64g_dirtyhelper_CPUID_baseline;
21481 }
21482
21483 vassert(fName); vassert(fAddr);
21484 d = unsafeIRDirty_0_N ( 0/*regparms*/,
florian90419562013-08-15 20:54:52 +000021485 fName, fAddr, mkIRExprVec_1(IRExpr_BBPTR()) );
sewardj80611e32012-01-20 13:07:24 +000021486 /* declare guest state effects */
sewardj80611e32012-01-20 13:07:24 +000021487 d->nFxState = 4;
sewardjc9069f22012-06-01 16:09:50 +000021488 vex_bzero(&d->fxState, sizeof(d->fxState));
sewardj80611e32012-01-20 13:07:24 +000021489 d->fxState[0].fx = Ifx_Modify;
21490 d->fxState[0].offset = OFFB_RAX;
21491 d->fxState[0].size = 8;
21492 d->fxState[1].fx = Ifx_Write;
21493 d->fxState[1].offset = OFFB_RBX;
21494 d->fxState[1].size = 8;
21495 d->fxState[2].fx = Ifx_Modify;
21496 d->fxState[2].offset = OFFB_RCX;
21497 d->fxState[2].size = 8;
21498 d->fxState[3].fx = Ifx_Write;
21499 d->fxState[3].offset = OFFB_RDX;
21500 d->fxState[3].size = 8;
21501 /* execute the dirty call, side-effecting guest state */
21502 stmt( IRStmt_Dirty(d) );
21503 /* CPUID is a serialising insn. So, just in case someone is
21504 using it as a memory fence ... */
21505 stmt( IRStmt_MBE(Imbe_Fence) );
21506 DIP("cpuid\n");
21507 return delta;
21508 }
21509
sewardj38b1d692013-10-15 17:21:42 +000021510 case 0xA3: { /* BT Gv,Ev */
21511 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
21512 Bool ok = True;
sewardj80611e32012-01-20 13:07:24 +000021513 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
sewardj38b1d692013-10-15 17:21:42 +000021514 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpNone, &ok );
21515 if (!ok) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000021516 return delta;
sewardj38b1d692013-10-15 17:21:42 +000021517 }
sewardj80611e32012-01-20 13:07:24 +000021518
21519 case 0xA4: /* SHLDv imm8,Gv,Ev */
21520 modrm = getUChar(delta);
21521 d64 = delta + lengthAMode(pfx, delta);
21522 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
21523 delta = dis_SHLRD_Gv_Ev (
21524 vbi, pfx, delta, modrm, sz,
21525 mkU8(getUChar(d64)), True, /* literal */
21526 dis_buf, True /* left */ );
21527 return delta;
21528
21529 case 0xA5: /* SHLDv %cl,Gv,Ev */
21530 modrm = getUChar(delta);
21531 delta = dis_SHLRD_Gv_Ev (
21532 vbi, pfx, delta, modrm, sz,
21533 getIRegCL(), False, /* not literal */
21534 "%cl", True /* left */ );
21535 return delta;
21536
sewardj38b1d692013-10-15 17:21:42 +000021537 case 0xAB: { /* BTS Gv,Ev */
21538 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
21539 Bool ok = True;
sewardj80611e32012-01-20 13:07:24 +000021540 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
sewardj38b1d692013-10-15 17:21:42 +000021541 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpSet, &ok );
21542 if (!ok) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000021543 return delta;
sewardj38b1d692013-10-15 17:21:42 +000021544 }
sewardj80611e32012-01-20 13:07:24 +000021545
21546 case 0xAC: /* SHRDv imm8,Gv,Ev */
21547 modrm = getUChar(delta);
21548 d64 = delta + lengthAMode(pfx, delta);
21549 vex_sprintf(dis_buf, "$%d", (Int)getUChar(d64));
21550 delta = dis_SHLRD_Gv_Ev (
21551 vbi, pfx, delta, modrm, sz,
21552 mkU8(getUChar(d64)), True, /* literal */
21553 dis_buf, False /* right */ );
21554 return delta;
21555
21556 case 0xAD: /* SHRDv %cl,Gv,Ev */
21557 modrm = getUChar(delta);
21558 delta = dis_SHLRD_Gv_Ev (
21559 vbi, pfx, delta, modrm, sz,
21560 getIRegCL(), False, /* not literal */
21561 "%cl", False /* right */);
21562 return delta;
21563
21564 case 0xAF: /* IMUL Ev, Gv */
21565 if (haveF2orF3(pfx)) goto decode_failure;
21566 delta = dis_mul_E_G ( vbi, pfx, sz, delta );
21567 return delta;
21568
sewardj38b1d692013-10-15 17:21:42 +000021569 case 0xB0: { /* CMPXCHG Gb,Eb */
21570 Bool ok = True;
21571 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
21572 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, 1, delta );
21573 if (!ok) goto decode_failure;
21574 return delta;
21575 }
21576
sewardj80611e32012-01-20 13:07:24 +000021577 case 0xB1: { /* CMPXCHG Gv,Ev (allowed in 16,32,64 bit) */
21578 Bool ok = True;
sewardj38b1d692013-10-15 17:21:42 +000021579 /* We let dis_cmpxchg_G_E decide whether F2 or F3 are allowable. */
sewardj80611e32012-01-20 13:07:24 +000021580 if (sz != 2 && sz != 4 && sz != 8) goto decode_failure;
21581 delta = dis_cmpxchg_G_E ( &ok, vbi, pfx, sz, delta );
21582 if (!ok) goto decode_failure;
21583 return delta;
21584 }
21585
sewardj38b1d692013-10-15 17:21:42 +000021586 case 0xB3: { /* BTR Gv,Ev */
21587 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
sewardj80611e32012-01-20 13:07:24 +000021588 Bool ok = True;
sewardj38b1d692013-10-15 17:21:42 +000021589 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
21590 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpReset, &ok );
sewardj80611e32012-01-20 13:07:24 +000021591 if (!ok) goto decode_failure;
21592 return delta;
21593 }
21594
sewardj80611e32012-01-20 13:07:24 +000021595 case 0xB6: /* MOVZXb Eb,Gv */
21596 if (haveF2orF3(pfx)) goto decode_failure;
21597 if (sz != 2 && sz != 4 && sz != 8)
21598 goto decode_failure;
21599 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, False );
21600 return delta;
21601
21602 case 0xB7: /* MOVZXw Ew,Gv */
21603 if (haveF2orF3(pfx)) goto decode_failure;
21604 if (sz != 4 && sz != 8)
21605 goto decode_failure;
21606 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, False );
21607 return delta;
21608
21609 case 0xBA: { /* Grp8 Ib,Ev */
sewardj38b1d692013-10-15 17:21:42 +000021610 /* We let dis_Grp8_Imm decide whether F2 or F3 are allowable. */
sewardj80611e32012-01-20 13:07:24 +000021611 Bool decode_OK = False;
sewardj80611e32012-01-20 13:07:24 +000021612 modrm = getUChar(delta);
21613 am_sz = lengthAMode(pfx,delta);
21614 d64 = getSDisp8(delta + am_sz);
21615 delta = dis_Grp8_Imm ( vbi, pfx, delta, modrm, am_sz, sz, d64,
21616 &decode_OK );
21617 if (!decode_OK)
21618 goto decode_failure;
21619 return delta;
21620 }
21621
sewardj38b1d692013-10-15 17:21:42 +000021622 case 0xBB: { /* BTC Gv,Ev */
21623 /* We let dis_bt_G_E decide whether F2 or F3 are allowable. */
21624 Bool ok = False;
sewardj80611e32012-01-20 13:07:24 +000021625 if (sz != 8 && sz != 4 && sz != 2) goto decode_failure;
sewardj38b1d692013-10-15 17:21:42 +000021626 delta = dis_bt_G_E ( vbi, pfx, sz, delta, BtOpComp, &ok );
21627 if (!ok) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000021628 return delta;
sewardj38b1d692013-10-15 17:21:42 +000021629 }
sewardj80611e32012-01-20 13:07:24 +000021630
21631 case 0xBC: /* BSF Gv,Ev */
sewardjcc3d2192013-03-27 11:37:33 +000021632 if (!haveF2orF3(pfx)
21633 || (haveF3noF2(pfx)
21634 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_BMI))) {
21635 /* no-F2 no-F3 0F BC = BSF
21636 or F3 0F BC = REP; BSF on older CPUs. */
21637 delta = dis_bs_E_G ( vbi, pfx, sz, delta, True );
21638 return delta;
21639 }
21640 /* Fall through, since F3 0F BC is TZCNT, and needs to
21641 be handled by dis_ESC_0F__SSE4. */
21642 break;
sewardj80611e32012-01-20 13:07:24 +000021643
21644 case 0xBD: /* BSR Gv,Ev */
sewardjc8851af2012-08-23 20:14:51 +000021645 if (!haveF2orF3(pfx)
21646 || (haveF3noF2(pfx)
21647 && 0 == (archinfo->hwcaps & VEX_HWCAPS_AMD64_LZCNT))) {
21648 /* no-F2 no-F3 0F BD = BSR
21649 or F3 0F BD = REP; BSR on older CPUs. */
sewardj80611e32012-01-20 13:07:24 +000021650 delta = dis_bs_E_G ( vbi, pfx, sz, delta, False );
21651 return delta;
21652 }
21653 /* Fall through, since F3 0F BD is LZCNT, and needs to
21654 be handled by dis_ESC_0F__SSE4. */
21655 break;
21656
21657 case 0xBE: /* MOVSXb Eb,Gv */
21658 if (haveF2orF3(pfx)) goto decode_failure;
21659 if (sz != 2 && sz != 4 && sz != 8)
21660 goto decode_failure;
21661 delta = dis_movx_E_G ( vbi, pfx, delta, 1, sz, True );
21662 return delta;
21663
21664 case 0xBF: /* MOVSXw Ew,Gv */
21665 if (haveF2orF3(pfx)) goto decode_failure;
21666 if (sz != 4 && sz != 8)
21667 goto decode_failure;
21668 delta = dis_movx_E_G ( vbi, pfx, delta, 2, sz, True );
21669 return delta;
21670
sewardjdbcb1df2012-12-06 17:29:10 +000021671 case 0xC0: { /* XADD Gb,Eb */
21672 Bool decode_OK = False;
21673 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, 1, delta );
21674 if (!decode_OK)
21675 goto decode_failure;
21676 return delta;
21677 }
21678
sewardj80611e32012-01-20 13:07:24 +000021679 case 0xC1: { /* XADD Gv,Ev */
21680 Bool decode_OK = False;
21681 delta = dis_xadd_G_E ( &decode_OK, vbi, pfx, sz, delta );
21682 if (!decode_OK)
21683 goto decode_failure;
21684 return delta;
21685 }
21686
21687 case 0xC7: { /* CMPXCHG8B Ev, CMPXCHG16B Ev */
21688 IRType elemTy = sz==4 ? Ity_I32 : Ity_I64;
21689 IRTemp expdHi = newTemp(elemTy);
21690 IRTemp expdLo = newTemp(elemTy);
21691 IRTemp dataHi = newTemp(elemTy);
21692 IRTemp dataLo = newTemp(elemTy);
21693 IRTemp oldHi = newTemp(elemTy);
21694 IRTemp oldLo = newTemp(elemTy);
21695 IRTemp flags_old = newTemp(Ity_I64);
21696 IRTemp flags_new = newTemp(Ity_I64);
21697 IRTemp success = newTemp(Ity_I1);
21698 IROp opOR = sz==4 ? Iop_Or32 : Iop_Or64;
21699 IROp opXOR = sz==4 ? Iop_Xor32 : Iop_Xor64;
21700 IROp opCasCmpEQ = sz==4 ? Iop_CasCmpEQ32 : Iop_CasCmpEQ64;
21701 IRExpr* zero = sz==4 ? mkU32(0) : mkU64(0);
21702 IRTemp expdHi64 = newTemp(Ity_I64);
21703 IRTemp expdLo64 = newTemp(Ity_I64);
21704
21705 /* Translate this using a DCAS, even if there is no LOCK
21706 prefix. Life is too short to bother with generating two
21707 different translations for the with/without-LOCK-prefix
21708 cases. */
21709 *expect_CAS = True;
21710
21711 /* Decode, and generate address. */
sewardj38b1d692013-10-15 17:21:42 +000021712 if (have66(pfx)) goto decode_failure;
sewardj80611e32012-01-20 13:07:24 +000021713 if (sz != 4 && sz != 8) goto decode_failure;
21714 if (sz == 8 && !(archinfo->hwcaps & VEX_HWCAPS_AMD64_CX16))
21715 goto decode_failure;
21716 modrm = getUChar(delta);
21717 if (epartIsReg(modrm)) goto decode_failure;
21718 if (gregLO3ofRM(modrm) != 1) goto decode_failure;
sewardj38b1d692013-10-15 17:21:42 +000021719 if (haveF2orF3(pfx)) {
21720 /* Since the e-part is memory only, F2 or F3 (one or the
21721 other) is acceptable if LOCK is also present. But only
21722 for cmpxchg8b. */
21723 if (sz == 8) goto decode_failure;
21724 if (haveF2andF3(pfx) || !haveLOCK(pfx)) goto decode_failure;
21725 }
21726
sewardj80611e32012-01-20 13:07:24 +000021727 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
21728 delta += alen;
21729
21730 /* cmpxchg16b requires an alignment check. */
21731 if (sz == 8)
21732 gen_SEGV_if_not_16_aligned( addr );
21733
21734 /* Get the expected and new values. */
21735 assign( expdHi64, getIReg64(R_RDX) );
21736 assign( expdLo64, getIReg64(R_RAX) );
21737
21738 /* These are the correctly-sized expected and new values.
21739 However, we also get expdHi64/expdLo64 above as 64-bits
21740 regardless, because we will need them later in the 32-bit
21741 case (paradoxically). */
21742 assign( expdHi, sz==4 ? unop(Iop_64to32, mkexpr(expdHi64))
21743 : mkexpr(expdHi64) );
21744 assign( expdLo, sz==4 ? unop(Iop_64to32, mkexpr(expdLo64))
21745 : mkexpr(expdLo64) );
21746 assign( dataHi, sz==4 ? getIReg32(R_RCX) : getIReg64(R_RCX) );
21747 assign( dataLo, sz==4 ? getIReg32(R_RBX) : getIReg64(R_RBX) );
21748
21749 /* Do the DCAS */
21750 stmt( IRStmt_CAS(
21751 mkIRCAS( oldHi, oldLo,
21752 Iend_LE, mkexpr(addr),
21753 mkexpr(expdHi), mkexpr(expdLo),
21754 mkexpr(dataHi), mkexpr(dataLo)
21755 )));
21756
21757 /* success when oldHi:oldLo == expdHi:expdLo */
21758 assign( success,
21759 binop(opCasCmpEQ,
21760 binop(opOR,
21761 binop(opXOR, mkexpr(oldHi), mkexpr(expdHi)),
21762 binop(opXOR, mkexpr(oldLo), mkexpr(expdLo))
21763 ),
21764 zero
21765 ));
21766
21767 /* If the DCAS is successful, that is to say oldHi:oldLo ==
21768 expdHi:expdLo, then put expdHi:expdLo back in RDX:RAX,
21769 which is where they came from originally. Both the actual
21770 contents of these two regs, and any shadow values, are
21771 unchanged. If the DCAS fails then we're putting into
21772 RDX:RAX the value seen in memory. */
21773 /* Now of course there's a complication in the 32-bit case
21774 (bah!): if the DCAS succeeds, we need to leave RDX:RAX
21775 unchanged; but if we use the same scheme as in the 64-bit
21776 case, we get hit by the standard rule that a write to the
21777 bottom 32 bits of an integer register zeros the upper 32
21778 bits. And so the upper halves of RDX and RAX mysteriously
21779 become zero. So we have to stuff back in the original
21780 64-bit values which we previously stashed in
21781 expdHi64:expdLo64, even if we're doing a cmpxchg8b. */
21782 /* It's just _so_ much fun ... */
21783 putIRegRDX( 8,
florian99dd03e2013-01-29 03:56:06 +000021784 IRExpr_ITE( mkexpr(success),
21785 mkexpr(expdHi64),
21786 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldHi))
21787 : mkexpr(oldHi)
sewardj80611e32012-01-20 13:07:24 +000021788 ));
21789 putIRegRAX( 8,
florian99dd03e2013-01-29 03:56:06 +000021790 IRExpr_ITE( mkexpr(success),
21791 mkexpr(expdLo64),
21792 sz == 4 ? unop(Iop_32Uto64, mkexpr(oldLo))
21793 : mkexpr(oldLo)
sewardj80611e32012-01-20 13:07:24 +000021794 ));
21795
21796 /* Copy the success bit into the Z flag and leave the others
21797 unchanged */
21798 assign( flags_old, widenUto64(mk_amd64g_calculate_rflags_all()));
21799 assign(
21800 flags_new,
21801 binop(Iop_Or64,
21802 binop(Iop_And64, mkexpr(flags_old),
21803 mkU64(~AMD64G_CC_MASK_Z)),
21804 binop(Iop_Shl64,
21805 binop(Iop_And64,
21806 unop(Iop_1Uto64, mkexpr(success)), mkU64(1)),
21807 mkU8(AMD64G_CC_SHIFT_Z)) ));
21808
21809 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(AMD64G_CC_OP_COPY) ));
21810 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(flags_new) ));
21811 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
21812 /* Set NDEP even though it isn't used. This makes
21813 redundant-PUT elimination of previous stores to this field
21814 work better. */
21815 stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
21816
21817 /* Sheesh. Aren't you glad it was me and not you that had to
21818 write and validate all this grunge? */
21819
21820 DIP("cmpxchg8b %s\n", dis_buf);
21821 return delta;
21822 }
21823
21824 case 0xC8: /* BSWAP %eax */
21825 case 0xC9:
21826 case 0xCA:
21827 case 0xCB:
21828 case 0xCC:
21829 case 0xCD:
21830 case 0xCE:
21831 case 0xCF: /* BSWAP %edi */
21832 if (haveF2orF3(pfx)) goto decode_failure;
21833 /* According to the AMD64 docs, this insn can have size 4 or
21834 8. */
21835 if (sz == 4) {
21836 t1 = newTemp(Ity_I32);
sewardj80611e32012-01-20 13:07:24 +000021837 assign( t1, getIRegRexB(4, pfx, opc-0xC8) );
sewardjf85e1772012-07-15 10:11:10 +000021838 t2 = math_BSWAP( t1, Ity_I32 );
sewardj80611e32012-01-20 13:07:24 +000021839 putIRegRexB(4, pfx, opc-0xC8, mkexpr(t2));
21840 DIP("bswapl %s\n", nameIRegRexB(4, pfx, opc-0xC8));
21841 return delta;
21842 }
21843 if (sz == 8) {
sewardj80611e32012-01-20 13:07:24 +000021844 t1 = newTemp(Ity_I64);
21845 t2 = newTemp(Ity_I64);
21846 assign( t1, getIRegRexB(8, pfx, opc-0xC8) );
sewardjf85e1772012-07-15 10:11:10 +000021847 t2 = math_BSWAP( t1, Ity_I64 );
sewardj80611e32012-01-20 13:07:24 +000021848 putIRegRexB(8, pfx, opc-0xC8, mkexpr(t2));
21849 DIP("bswapq %s\n", nameIRegRexB(8, pfx, opc-0xC8));
21850 return delta;
21851 }
21852 goto decode_failure;
21853
21854 default:
21855 break;
21856
21857 } /* first switch */
21858
21859
21860 /* =-=-=-=-=-=-=-=-= MMXery =-=-=-=-=-=-=-=-= */
21861 /* In the second switch, pick off MMX insns. */
21862
21863 if (!have66orF2orF3(pfx)) {
21864 /* So there's no SIMD prefix. */
21865
21866 vassert(sz == 4 || sz == 8);
21867
21868 switch (opc) { /* second switch */
21869
21870 case 0x71:
21871 case 0x72:
21872 case 0x73: /* PSLLgg/PSRAgg/PSRLgg mmxreg by imm8 */
21873
21874 case 0x6E: /* MOVD (src)ireg-or-mem, (dst)mmxreg */
21875 case 0x7E: /* MOVD (src)mmxreg, (dst)ireg-or-mem */
21876 case 0x7F: /* MOVQ (src)mmxreg, (dst)mmxreg-or-mem */
21877 case 0x6F: /* MOVQ (src)mmxreg-or-mem, (dst)mmxreg */
21878
21879 case 0xFC:
21880 case 0xFD:
21881 case 0xFE: /* PADDgg (src)mmxreg-or-mem, (dst)mmxreg */
21882
21883 case 0xEC:
21884 case 0xED: /* PADDSgg (src)mmxreg-or-mem, (dst)mmxreg */
21885
21886 case 0xDC:
21887 case 0xDD: /* PADDUSgg (src)mmxreg-or-mem, (dst)mmxreg */
21888
21889 case 0xF8:
21890 case 0xF9:
21891 case 0xFA: /* PSUBgg (src)mmxreg-or-mem, (dst)mmxreg */
21892
21893 case 0xE8:
21894 case 0xE9: /* PSUBSgg (src)mmxreg-or-mem, (dst)mmxreg */
21895
21896 case 0xD8:
21897 case 0xD9: /* PSUBUSgg (src)mmxreg-or-mem, (dst)mmxreg */
21898
21899 case 0xE5: /* PMULHW (src)mmxreg-or-mem, (dst)mmxreg */
21900 case 0xD5: /* PMULLW (src)mmxreg-or-mem, (dst)mmxreg */
21901
21902 case 0xF5: /* PMADDWD (src)mmxreg-or-mem, (dst)mmxreg */
21903
21904 case 0x74:
21905 case 0x75:
21906 case 0x76: /* PCMPEQgg (src)mmxreg-or-mem, (dst)mmxreg */
21907
21908 case 0x64:
21909 case 0x65:
21910 case 0x66: /* PCMPGTgg (src)mmxreg-or-mem, (dst)mmxreg */
21911
21912 case 0x6B: /* PACKSSDW (src)mmxreg-or-mem, (dst)mmxreg */
21913 case 0x63: /* PACKSSWB (src)mmxreg-or-mem, (dst)mmxreg */
21914 case 0x67: /* PACKUSWB (src)mmxreg-or-mem, (dst)mmxreg */
21915
21916 case 0x68:
21917 case 0x69:
21918 case 0x6A: /* PUNPCKHgg (src)mmxreg-or-mem, (dst)mmxreg */
21919
21920 case 0x60:
21921 case 0x61:
21922 case 0x62: /* PUNPCKLgg (src)mmxreg-or-mem, (dst)mmxreg */
21923
21924 case 0xDB: /* PAND (src)mmxreg-or-mem, (dst)mmxreg */
21925 case 0xDF: /* PANDN (src)mmxreg-or-mem, (dst)mmxreg */
21926 case 0xEB: /* POR (src)mmxreg-or-mem, (dst)mmxreg */
21927 case 0xEF: /* PXOR (src)mmxreg-or-mem, (dst)mmxreg */
21928
21929 case 0xF1: /* PSLLgg (src)mmxreg-or-mem, (dst)mmxreg */
21930 case 0xF2:
21931 case 0xF3:
21932
21933 case 0xD1: /* PSRLgg (src)mmxreg-or-mem, (dst)mmxreg */
21934 case 0xD2:
21935 case 0xD3:
21936
21937 case 0xE1: /* PSRAgg (src)mmxreg-or-mem, (dst)mmxreg */
21938 case 0xE2: {
21939 Bool decode_OK = False;
21940 delta = dis_MMX ( &decode_OK, vbi, pfx, sz, deltaIN );
21941 if (decode_OK)
21942 return delta;
21943 goto decode_failure;
21944 }
21945
21946 default:
21947 break;
21948 } /* second switch */
21949
21950 }
21951
21952 /* A couple of MMX corner cases */
21953 if (opc == 0x0E/* FEMMS */ || opc == 0x77/* EMMS */) {
21954 if (sz != 4)
21955 goto decode_failure;
21956 do_EMMS_preamble();
21957 DIP("{f}emms\n");
21958 return delta;
21959 }
21960
21961 /* =-=-=-=-=-=-=-=-= SSE2ery =-=-=-=-=-=-=-=-= */
21962 /* Perhaps it's an SSE or SSE2 instruction. We can try this
21963 without checking the guest hwcaps because SSE2 is a baseline
21964 facility in 64 bit mode. */
21965 {
21966 Bool decode_OK = False;
sewardj30fc0582012-02-16 13:45:13 +000021967 delta = dis_ESC_0F__SSE2 ( &decode_OK, vbi, pfx, sz, deltaIN, dres );
sewardj80611e32012-01-20 13:07:24 +000021968 if (decode_OK)
21969 return delta;
21970 }
21971
21972 /* =-=-=-=-=-=-=-=-= SSE3ery =-=-=-=-=-=-=-=-= */
21973 /* Perhaps it's a SSE3 instruction. FIXME: check guest hwcaps
21974 first. */
21975 {
21976 Bool decode_OK = False;
21977 delta = dis_ESC_0F__SSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
21978 if (decode_OK)
21979 return delta;
21980 }
21981
21982 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
21983 /* Perhaps it's a SSE4 instruction. FIXME: check guest hwcaps
21984 first. */
21985 {
21986 Bool decode_OK = False;
21987 delta = dis_ESC_0F__SSE4 ( &decode_OK,
21988 archinfo, vbi, pfx, sz, deltaIN );
21989 if (decode_OK)
21990 return delta;
21991 }
21992
21993 decode_failure:
21994 return deltaIN; /* fail */
21995}
21996
21997
21998/*------------------------------------------------------------*/
21999/*--- ---*/
22000/*--- Top-level post-escape decoders: dis_ESC_0F38 ---*/
22001/*--- ---*/
22002/*------------------------------------------------------------*/
22003
22004__attribute__((noinline))
22005static
22006Long dis_ESC_0F38 (
22007 /*MB_OUT*/DisResult* dres,
22008 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
22009 Bool resteerCisOk,
22010 void* callback_opaque,
22011 VexArchInfo* archinfo,
22012 VexAbiInfo* vbi,
22013 Prefix pfx, Int sz, Long deltaIN
22014 )
22015{
22016 Long delta = deltaIN;
22017 UChar opc = getUChar(delta);
22018 delta++;
22019 switch (opc) {
22020
sewardj1a237be2012-07-16 08:35:31 +000022021 case 0xF0: /* 0F 38 F0 = MOVBE m16/32/64(E), r16/32/64(G) */
22022 case 0xF1: { /* 0F 38 F1 = MOVBE r16/32/64(G), m16/32/64(E) */
22023 if (!haveF2orF3(pfx) && !haveVEX(pfx)
22024 && (sz == 2 || sz == 4 || sz == 8)) {
22025 IRTemp addr = IRTemp_INVALID;
22026 UChar modrm = 0;
22027 Int alen = 0;
22028 HChar dis_buf[50];
22029 modrm = getUChar(delta);
22030 if (epartIsReg(modrm)) break;
22031 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22032 delta += alen;
22033 IRType ty = szToITy(sz);
22034 IRTemp src = newTemp(ty);
22035 if (opc == 0xF0) { /* LOAD */
22036 assign(src, loadLE(ty, mkexpr(addr)));
22037 IRTemp dst = math_BSWAP(src, ty);
22038 putIRegG(sz, pfx, modrm, mkexpr(dst));
22039 DIP("movbe %s,%s\n", dis_buf, nameIRegG(sz, pfx, modrm));
22040 } else { /* STORE */
22041 assign(src, getIRegG(sz, pfx, modrm));
22042 IRTemp dst = math_BSWAP(src, ty);
22043 storeLE(mkexpr(addr), mkexpr(dst));
22044 DIP("movbe %s,%s\n", nameIRegG(sz, pfx, modrm), dis_buf);
22045 }
22046 return delta;
sewardjf85e1772012-07-15 10:11:10 +000022047 }
sewardj1a237be2012-07-16 08:35:31 +000022048 /* else fall through; maybe one of the decoders below knows what
22049 it is. */
22050 break;
sewardjf85e1772012-07-15 10:11:10 +000022051 }
22052
sewardj80611e32012-01-20 13:07:24 +000022053 default:
22054 break;
22055
22056 }
22057
22058 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22059 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22060 rather than proceeding indiscriminately. */
22061 {
22062 Bool decode_OK = False;
22063 delta = dis_ESC_0F38__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22064 if (decode_OK)
22065 return delta;
22066 }
22067
22068 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22069 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22070 rather than proceeding indiscriminately. */
22071 {
22072 Bool decode_OK = False;
22073 delta = dis_ESC_0F38__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN );
22074 if (decode_OK)
22075 return delta;
22076 }
22077
sewardj1a237be2012-07-16 08:35:31 +000022078 /*decode_failure:*/
sewardj80611e32012-01-20 13:07:24 +000022079 return deltaIN; /* fail */
22080}
22081
22082
22083/*------------------------------------------------------------*/
22084/*--- ---*/
22085/*--- Top-level post-escape decoders: dis_ESC_0F3A ---*/
22086/*--- ---*/
22087/*------------------------------------------------------------*/
22088
22089__attribute__((noinline))
22090static
22091Long dis_ESC_0F3A (
22092 /*MB_OUT*/DisResult* dres,
22093 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
22094 Bool resteerCisOk,
22095 void* callback_opaque,
22096 VexArchInfo* archinfo,
22097 VexAbiInfo* vbi,
22098 Prefix pfx, Int sz, Long deltaIN
22099 )
22100{
22101 Long delta = deltaIN;
22102 UChar opc = getUChar(delta);
22103 delta++;
22104 switch (opc) {
22105
22106 default:
22107 break;
22108
22109 }
22110
22111 /* =-=-=-=-=-=-=-=-= SSSE3ery =-=-=-=-=-=-=-=-= */
22112 /* Perhaps it's an SSSE3 instruction. FIXME: consult guest hwcaps
22113 rather than proceeding indiscriminately. */
22114 {
22115 Bool decode_OK = False;
22116 delta = dis_ESC_0F3A__SupSSE3 ( &decode_OK, vbi, pfx, sz, deltaIN );
22117 if (decode_OK)
22118 return delta;
22119 }
22120
22121 /* =-=-=-=-=-=-=-=-= SSE4ery =-=-=-=-=-=-=-=-= */
22122 /* Perhaps it's an SSE4 instruction. FIXME: consult guest hwcaps
22123 rather than proceeding indiscriminately. */
22124 {
22125 Bool decode_OK = False;
22126 delta = dis_ESC_0F3A__SSE4 ( &decode_OK, vbi, pfx, sz, deltaIN );
22127 if (decode_OK)
22128 return delta;
22129 }
22130
sewardj80611e32012-01-20 13:07:24 +000022131 return deltaIN; /* fail */
22132}
22133
22134
22135/*------------------------------------------------------------*/
22136/*--- ---*/
sewardjc4530ae2012-05-21 10:18:49 +000022137/*--- Top-level post-escape decoders: dis_ESC_0F__VEX ---*/
22138/*--- ---*/
22139/*------------------------------------------------------------*/
22140
sewardj56c30312012-06-12 08:45:39 +000022141/* FIXME: common up with the _256_ version below? */
sewardjc4530ae2012-05-21 10:18:49 +000022142static
22143Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG (
22144 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi,
florian55085f82012-11-21 00:36:55 +000022145 Prefix pfx, Long delta, const HChar* name,
sewardjc4530ae2012-05-21 10:18:49 +000022146 /* The actual operation. Use either 'op' or 'opfn',
22147 but not both. */
22148 IROp op, IRTemp(*opFn)(IRTemp,IRTemp),
sewardj44565e82012-05-22 09:14:15 +000022149 Bool invertLeftArg,
22150 Bool swapArgs
sewardjc4530ae2012-05-21 10:18:49 +000022151 )
22152{
22153 UChar modrm = getUChar(delta);
22154 UInt rD = gregOfRexRM(pfx, modrm);
22155 UInt rSL = getVexNvvvv(pfx);
22156 IRTemp tSL = newTemp(Ity_V128);
22157 IRTemp tSR = newTemp(Ity_V128);
22158 IRTemp addr = IRTemp_INVALID;
22159 HChar dis_buf[50];
22160 Int alen = 0;
22161 vassert(0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*WIG?*/);
22162
22163 assign(tSL, invertLeftArg ? unop(Iop_NotV128, getXMMReg(rSL))
22164 : getXMMReg(rSL));
22165
22166 if (epartIsReg(modrm)) {
22167 UInt rSR = eregOfRexRM(pfx, modrm);
22168 delta += 1;
22169 assign(tSR, getXMMReg(rSR));
22170 DIP("%s %s,%s,%s\n",
22171 name, nameXMMReg(rSR), nameXMMReg(rSL), nameXMMReg(rD));
22172 } else {
22173 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
22174 delta += alen;
22175 assign(tSR, loadLE(Ity_V128, mkexpr(addr)));
22176 DIP("%s %s,%s,%s\n",
22177 name, dis_buf, nameXMMReg(rSL), nameXMMReg(rD));
22178 }
22179
22180 IRTemp res = IRTemp_INVALID;
22181 if (op != Iop_INVALID) {
22182 vassert(opFn == NULL);
22183 res = newTemp(Ity_V128);
sewardj9571dc02014-01-26 18:34:23 +000022184 if (requiresRMode(op)) {
22185 IRTemp rm = newTemp(Ity_I32);
22186 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
22187 assign(res, swapArgs
22188 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL))
22189 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR)));
22190 } else {
22191 assign(res, swapArgs
22192 ? binop(op, mkexpr(tSR), mkexpr(tSL))
22193 : binop(op, mkexpr(tSL), mkexpr(tSR)));
22194 }
sewardjc4530ae2012-05-21 10:18:49 +000022195 } else {
22196 vassert(opFn != NULL);
sewardj44565e82012-05-22 09:14:15 +000022197 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
sewardjc4530ae2012-05-21 10:18:49 +000022198 }
22199
22200 putYMMRegLoAndZU(rD, mkexpr(res));
22201
22202 *uses_vvvv = True;
22203 return delta;
22204}
22205
22206
22207/* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, with a simple IROp
sewardj44565e82012-05-22 09:14:15 +000022208 for the operation, no inversion of the left arg, and no swapping of
22209 args. */
sewardjc4530ae2012-05-21 10:18:49 +000022210static
22211Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple (
22212 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi,
florian55085f82012-11-21 00:36:55 +000022213 Prefix pfx, Long delta, const HChar* name,
sewardjc4530ae2012-05-21 10:18:49 +000022214 IROp op
22215 )
22216{
22217 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
sewardj44565e82012-05-22 09:14:15 +000022218 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False);
sewardjc4530ae2012-05-21 10:18:49 +000022219}
22220
22221
22222/* Handle a VEX_NDS_128_66_0F_WIG (3-addr) insn, using the given IR
sewardj44565e82012-05-22 09:14:15 +000022223 generator to compute the result, no inversion of the left
22224 arg, and no swapping of args. */
sewardjc4530ae2012-05-21 10:18:49 +000022225static
22226Long dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex (
22227 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi,
florian55085f82012-11-21 00:36:55 +000022228 Prefix pfx, Long delta, const HChar* name,
sewardjc4530ae2012-05-21 10:18:49 +000022229 IRTemp(*opFn)(IRTemp,IRTemp)
22230 )
22231{
22232 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
sewardj44565e82012-05-22 09:14:15 +000022233 uses_vvvv, vbi, pfx, delta, name,
22234 Iop_INVALID, opFn, False, False );
sewardjc4530ae2012-05-21 10:18:49 +000022235}
22236
22237
sewardj4c0a7ac2012-06-21 09:08:19 +000022238/* Vector by scalar shift of V by the amount specified at the bottom
22239 of E. */
22240static ULong dis_AVX128_shiftV_byE ( VexAbiInfo* vbi,
22241 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022242 const HChar* opname, IROp op )
sewardj4c0a7ac2012-06-21 09:08:19 +000022243{
22244 HChar dis_buf[50];
22245 Int alen, size;
22246 IRTemp addr;
22247 Bool shl, shr, sar;
22248 UChar modrm = getUChar(delta);
22249 UInt rG = gregOfRexRM(pfx,modrm);
22250 UInt rV = getVexNvvvv(pfx);;
22251 IRTemp g0 = newTemp(Ity_V128);
22252 IRTemp g1 = newTemp(Ity_V128);
22253 IRTemp amt = newTemp(Ity_I64);
22254 IRTemp amt8 = newTemp(Ity_I8);
22255 if (epartIsReg(modrm)) {
22256 UInt rE = eregOfRexRM(pfx,modrm);
22257 assign( amt, getXMMRegLane64(rE, 0) );
22258 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
22259 nameXMMReg(rV), nameXMMReg(rG) );
22260 delta++;
22261 } else {
22262 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22263 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
22264 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
22265 delta += alen;
22266 }
22267 assign( g0, getXMMReg(rV) );
22268 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
22269
22270 shl = shr = sar = False;
22271 size = 0;
22272 switch (op) {
22273 case Iop_ShlN16x8: shl = True; size = 32; break;
22274 case Iop_ShlN32x4: shl = True; size = 32; break;
22275 case Iop_ShlN64x2: shl = True; size = 64; break;
22276 case Iop_SarN16x8: sar = True; size = 16; break;
22277 case Iop_SarN32x4: sar = True; size = 32; break;
22278 case Iop_ShrN16x8: shr = True; size = 16; break;
22279 case Iop_ShrN32x4: shr = True; size = 32; break;
22280 case Iop_ShrN64x2: shr = True; size = 64; break;
22281 default: vassert(0);
22282 }
22283
22284 if (shl || shr) {
22285 assign(
22286 g1,
florian99dd03e2013-01-29 03:56:06 +000022287 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +000022288 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
florian99dd03e2013-01-29 03:56:06 +000022289 binop(op, mkexpr(g0), mkexpr(amt8)),
22290 mkV128(0x0000)
sewardj4c0a7ac2012-06-21 09:08:19 +000022291 )
22292 );
22293 } else
22294 if (sar) {
22295 assign(
22296 g1,
florian99dd03e2013-01-29 03:56:06 +000022297 IRExpr_ITE(
sewardj009230b2013-01-26 11:47:55 +000022298 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
florian99dd03e2013-01-29 03:56:06 +000022299 binop(op, mkexpr(g0), mkexpr(amt8)),
22300 binop(op, mkexpr(g0), mkU8(size-1))
sewardj4c0a7ac2012-06-21 09:08:19 +000022301 )
22302 );
22303 } else {
22304 vassert(0);
22305 }
22306
22307 putYMMRegLoAndZU( rG, mkexpr(g1) );
22308 return delta;
22309}
22310
22311
sewardjcc3d2192013-03-27 11:37:33 +000022312/* Vector by scalar shift of V by the amount specified at the bottom
22313 of E. */
22314static ULong dis_AVX256_shiftV_byE ( VexAbiInfo* vbi,
22315 Prefix pfx, Long delta,
22316 const HChar* opname, IROp op )
22317{
22318 HChar dis_buf[50];
22319 Int alen, size;
22320 IRTemp addr;
22321 Bool shl, shr, sar;
22322 UChar modrm = getUChar(delta);
22323 UInt rG = gregOfRexRM(pfx,modrm);
22324 UInt rV = getVexNvvvv(pfx);;
22325 IRTemp g0 = newTemp(Ity_V256);
22326 IRTemp g1 = newTemp(Ity_V256);
22327 IRTemp amt = newTemp(Ity_I64);
22328 IRTemp amt8 = newTemp(Ity_I8);
22329 if (epartIsReg(modrm)) {
22330 UInt rE = eregOfRexRM(pfx,modrm);
22331 assign( amt, getXMMRegLane64(rE, 0) );
22332 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
22333 nameYMMReg(rV), nameYMMReg(rG) );
22334 delta++;
22335 } else {
22336 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22337 assign( amt, loadLE(Ity_I64, mkexpr(addr)) );
22338 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
22339 delta += alen;
22340 }
22341 assign( g0, getYMMReg(rV) );
22342 assign( amt8, unop(Iop_64to8, mkexpr(amt)) );
22343
22344 shl = shr = sar = False;
22345 size = 0;
22346 switch (op) {
22347 case Iop_ShlN16x16: shl = True; size = 32; break;
22348 case Iop_ShlN32x8: shl = True; size = 32; break;
22349 case Iop_ShlN64x4: shl = True; size = 64; break;
22350 case Iop_SarN16x16: sar = True; size = 16; break;
22351 case Iop_SarN32x8: sar = True; size = 32; break;
22352 case Iop_ShrN16x16: shr = True; size = 16; break;
22353 case Iop_ShrN32x8: shr = True; size = 32; break;
22354 case Iop_ShrN64x4: shr = True; size = 64; break;
22355 default: vassert(0);
22356 }
22357
22358 if (shl || shr) {
22359 assign(
22360 g1,
22361 IRExpr_ITE(
22362 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
22363 binop(op, mkexpr(g0), mkexpr(amt8)),
22364 binop(Iop_V128HLtoV256, mkV128(0), mkV128(0))
22365 )
22366 );
22367 } else
22368 if (sar) {
22369 assign(
22370 g1,
22371 IRExpr_ITE(
22372 binop(Iop_CmpLT64U, mkexpr(amt), mkU64(size)),
22373 binop(op, mkexpr(g0), mkexpr(amt8)),
22374 binop(op, mkexpr(g0), mkU8(size-1))
22375 )
22376 );
22377 } else {
22378 vassert(0);
22379 }
22380
22381 putYMMReg( rG, mkexpr(g1) );
22382 return delta;
22383}
22384
22385
22386/* Vector by vector shift of V by the amount specified at the bottom
22387 of E. Vector by vector shifts are defined for all shift amounts,
22388 so not using Iop_S*x* here (and SSE2 doesn't support variable shifts
22389 anyway). */
22390static ULong dis_AVX_var_shiftV_byE ( VexAbiInfo* vbi,
22391 Prefix pfx, Long delta,
22392 const HChar* opname, IROp op, Bool isYMM )
22393{
22394 HChar dis_buf[50];
22395 Int alen, size, i;
22396 IRTemp addr;
22397 UChar modrm = getUChar(delta);
22398 UInt rG = gregOfRexRM(pfx,modrm);
22399 UInt rV = getVexNvvvv(pfx);;
22400 IRTemp sV = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128);
22401 IRTemp amt = isYMM ? newTemp(Ity_V256) : newTemp(Ity_V128);
22402 IRTemp amts[8], sVs[8], res[8];
22403 if (epartIsReg(modrm)) {
22404 UInt rE = eregOfRexRM(pfx,modrm);
22405 assign( amt, isYMM ? getYMMReg(rE) : getXMMReg(rE) );
22406 if (isYMM) {
22407 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rE),
22408 nameYMMReg(rV), nameYMMReg(rG) );
22409 } else {
22410 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rE),
22411 nameXMMReg(rV), nameXMMReg(rG) );
22412 }
22413 delta++;
22414 } else {
22415 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22416 assign( amt, loadLE(isYMM ? Ity_V256 : Ity_V128, mkexpr(addr)) );
22417 if (isYMM) {
22418 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV),
22419 nameYMMReg(rG) );
22420 } else {
22421 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV),
22422 nameXMMReg(rG) );
22423 }
22424 delta += alen;
22425 }
22426 assign( sV, isYMM ? getYMMReg(rV) : getXMMReg(rV) );
22427
22428 size = 0;
22429 switch (op) {
22430 case Iop_Shl32: size = 32; break;
22431 case Iop_Shl64: size = 64; break;
22432 case Iop_Sar32: size = 32; break;
22433 case Iop_Shr32: size = 32; break;
22434 case Iop_Shr64: size = 64; break;
22435 default: vassert(0);
22436 }
22437
22438 for (i = 0; i < 8; i++) {
22439 sVs[i] = IRTemp_INVALID;
22440 amts[i] = IRTemp_INVALID;
22441 }
22442 switch (size) {
22443 case 32:
22444 if (isYMM) {
22445 breakupV256to32s( sV, &sVs[7], &sVs[6], &sVs[5], &sVs[4],
22446 &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
22447 breakupV256to32s( amt, &amts[7], &amts[6], &amts[5], &amts[4],
22448 &amts[3], &amts[2], &amts[1], &amts[0] );
22449 } else {
22450 breakupV128to32s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
22451 breakupV128to32s( amt, &amts[3], &amts[2], &amts[1], &amts[0] );
22452 }
22453 break;
22454 case 64:
22455 if (isYMM) {
22456 breakupV256to64s( sV, &sVs[3], &sVs[2], &sVs[1], &sVs[0] );
22457 breakupV256to64s( amt, &amts[3], &amts[2], &amts[1], &amts[0] );
22458 } else {
22459 breakupV128to64s( sV, &sVs[1], &sVs[0] );
22460 breakupV128to64s( amt, &amts[1], &amts[0] );
22461 }
22462 break;
22463 default: vassert(0);
22464 }
22465 for (i = 0; i < 8; i++)
22466 if (sVs[i] != IRTemp_INVALID) {
22467 res[i] = size == 32 ? newTemp(Ity_I32) : newTemp(Ity_I64);
22468 assign( res[i],
22469 IRExpr_ITE(
22470 binop(size == 32 ? Iop_CmpLT32U : Iop_CmpLT64U,
22471 mkexpr(amts[i]),
22472 size == 32 ? mkU32(size) : mkU64(size)),
22473 binop(op, mkexpr(sVs[i]),
22474 unop(size == 32 ? Iop_32to8 : Iop_64to8,
22475 mkexpr(amts[i]))),
22476 op == Iop_Sar32 ? binop(op, mkexpr(sVs[i]), mkU8(size-1))
22477 : size == 32 ? mkU32(0) : mkU64(0)
22478 ));
22479 }
22480 switch (size) {
22481 case 32:
22482 for (i = 0; i < 8; i++)
22483 putYMMRegLane32( rG, i, (i < 4 || isYMM)
22484 ? mkexpr(res[i]) : mkU32(0) );
22485 break;
22486 case 64:
22487 for (i = 0; i < 4; i++)
22488 putYMMRegLane64( rG, i, (i < 2 || isYMM)
22489 ? mkexpr(res[i]) : mkU64(0) );
22490 break;
22491 default: vassert(0);
22492 }
22493
22494 return delta;
22495}
22496
22497
sewardjc4530ae2012-05-21 10:18:49 +000022498/* Vector by scalar shift of E into V, by an immediate byte. Modified
22499 version of dis_SSE_shiftE_imm. */
22500static
22501Long dis_AVX128_shiftE_to_V_imm( Prefix pfx,
florian55085f82012-11-21 00:36:55 +000022502 Long delta, const HChar* opname, IROp op )
sewardjc4530ae2012-05-21 10:18:49 +000022503{
22504 Bool shl, shr, sar;
22505 UChar rm = getUChar(delta);
22506 IRTemp e0 = newTemp(Ity_V128);
22507 IRTemp e1 = newTemp(Ity_V128);
22508 UInt rD = getVexNvvvv(pfx);
22509 UChar amt, size;
22510 vassert(epartIsReg(rm));
22511 vassert(gregLO3ofRM(rm) == 2
22512 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
22513 amt = getUChar(delta+1);
22514 delta += 2;
22515 DIP("%s $%d,%s,%s\n", opname,
22516 (Int)amt,
22517 nameXMMReg(eregOfRexRM(pfx,rm)),
22518 nameXMMReg(rD));
22519 assign( e0, getXMMReg(eregOfRexRM(pfx,rm)) );
22520
22521 shl = shr = sar = False;
22522 size = 0;
22523 switch (op) {
sewardj15ad1942012-06-20 10:21:05 +000022524 case Iop_ShlN16x8: shl = True; size = 16; break;
sewardjc4530ae2012-05-21 10:18:49 +000022525 case Iop_ShlN32x4: shl = True; size = 32; break;
sewardje8a7eb72012-06-12 14:59:17 +000022526 case Iop_ShlN64x2: shl = True; size = 64; break;
sewardjfe0c5e72012-06-15 15:48:07 +000022527 case Iop_SarN16x8: sar = True; size = 16; break;
sewardj15ad1942012-06-20 10:21:05 +000022528 case Iop_SarN32x4: sar = True; size = 32; break;
sewardj6faf7cc2012-05-25 15:53:01 +000022529 case Iop_ShrN16x8: shr = True; size = 16; break;
sewardj251b59e2012-05-25 13:51:07 +000022530 case Iop_ShrN32x4: shr = True; size = 32; break;
sewardj56c30312012-06-12 08:45:39 +000022531 case Iop_ShrN64x2: shr = True; size = 64; break;
sewardjc4530ae2012-05-21 10:18:49 +000022532 default: vassert(0);
22533 }
22534
22535 if (shl || shr) {
22536 assign( e1, amt >= size
22537 ? mkV128(0x0000)
22538 : binop(op, mkexpr(e0), mkU8(amt))
22539 );
22540 } else
22541 if (sar) {
22542 assign( e1, amt >= size
22543 ? binop(op, mkexpr(e0), mkU8(size-1))
22544 : binop(op, mkexpr(e0), mkU8(amt))
22545 );
22546 } else {
22547 vassert(0);
22548 }
22549
22550 putYMMRegLoAndZU( rD, mkexpr(e1) );
22551 return delta;
22552}
22553
22554
sewardjcc3d2192013-03-27 11:37:33 +000022555/* Vector by scalar shift of E into V, by an immediate byte. Modified
22556 version of dis_AVX128_shiftE_to_V_imm. */
22557static
22558Long dis_AVX256_shiftE_to_V_imm( Prefix pfx,
22559 Long delta, const HChar* opname, IROp op )
22560{
22561 Bool shl, shr, sar;
22562 UChar rm = getUChar(delta);
22563 IRTemp e0 = newTemp(Ity_V256);
22564 IRTemp e1 = newTemp(Ity_V256);
22565 UInt rD = getVexNvvvv(pfx);
22566 UChar amt, size;
22567 vassert(epartIsReg(rm));
22568 vassert(gregLO3ofRM(rm) == 2
22569 || gregLO3ofRM(rm) == 4 || gregLO3ofRM(rm) == 6);
22570 amt = getUChar(delta+1);
22571 delta += 2;
22572 DIP("%s $%d,%s,%s\n", opname,
22573 (Int)amt,
22574 nameYMMReg(eregOfRexRM(pfx,rm)),
22575 nameYMMReg(rD));
22576 assign( e0, getYMMReg(eregOfRexRM(pfx,rm)) );
22577
22578 shl = shr = sar = False;
22579 size = 0;
22580 switch (op) {
22581 case Iop_ShlN16x16: shl = True; size = 16; break;
22582 case Iop_ShlN32x8: shl = True; size = 32; break;
22583 case Iop_ShlN64x4: shl = True; size = 64; break;
22584 case Iop_SarN16x16: sar = True; size = 16; break;
22585 case Iop_SarN32x8: sar = True; size = 32; break;
22586 case Iop_ShrN16x16: shr = True; size = 16; break;
22587 case Iop_ShrN32x8: shr = True; size = 32; break;
22588 case Iop_ShrN64x4: shr = True; size = 64; break;
22589 default: vassert(0);
22590 }
22591
22592
22593 if (shl || shr) {
22594 assign( e1, amt >= size
22595 ? binop(Iop_V128HLtoV256, mkV128(0), mkV128(0))
22596 : binop(op, mkexpr(e0), mkU8(amt))
22597 );
22598 } else
22599 if (sar) {
22600 assign( e1, amt >= size
22601 ? binop(op, mkexpr(e0), mkU8(size-1))
22602 : binop(op, mkexpr(e0), mkU8(amt))
22603 );
22604 } else {
22605 vassert(0);
22606 }
22607
22608 putYMMReg( rD, mkexpr(e1) );
22609 return delta;
22610}
22611
22612
sewardjc4530ae2012-05-21 10:18:49 +000022613/* Lower 64-bit lane only AVX128 binary operation:
22614 G[63:0] = V[63:0] `op` E[63:0]
22615 G[127:64] = V[127:64]
22616 G[255:128] = 0.
22617 The specified op must be of the 64F0x2 kind, so that it
22618 copies the upper half of the left operand to the result.
22619*/
22620static Long dis_AVX128_E_V_to_G_lo64 ( /*OUT*/Bool* uses_vvvv,
22621 VexAbiInfo* vbi,
22622 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022623 const HChar* opname, IROp op )
sewardjc4530ae2012-05-21 10:18:49 +000022624{
22625 HChar dis_buf[50];
22626 Int alen;
22627 IRTemp addr;
22628 UChar rm = getUChar(delta);
22629 UInt rG = gregOfRexRM(pfx,rm);
22630 UInt rV = getVexNvvvv(pfx);
22631 IRExpr* vpart = getXMMReg(rV);
22632 if (epartIsReg(rm)) {
22633 UInt rE = eregOfRexRM(pfx,rm);
22634 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) );
22635 DIP("%s %s,%s,%s\n", opname,
22636 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
22637 delta = delta+1;
22638 } else {
22639 /* We can only do a 64-bit memory read, so the upper half of the
22640 E operand needs to be made simply of zeroes. */
22641 IRTemp epart = newTemp(Ity_V128);
22642 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22643 assign( epart, unop( Iop_64UtoV128,
22644 loadLE(Ity_I64, mkexpr(addr))) );
22645 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) );
22646 DIP("%s %s,%s,%s\n", opname,
22647 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
22648 delta = delta+alen;
22649 }
22650 putYMMRegLane128( rG, 1, mkV128(0) );
22651 *uses_vvvv = True;
22652 return delta;
22653}
22654
22655
22656/* Lower 64-bit lane only AVX128 unary operation:
22657 G[63:0] = op(E[63:0])
22658 G[127:64] = V[127:64]
22659 G[255:128] = 0
22660 The specified op must be of the 64F0x2 kind, so that it
22661 copies the upper half of the operand to the result.
22662*/
22663static Long dis_AVX128_E_V_to_G_lo64_unary ( /*OUT*/Bool* uses_vvvv,
22664 VexAbiInfo* vbi,
22665 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022666 const HChar* opname, IROp op )
sewardjc4530ae2012-05-21 10:18:49 +000022667{
22668 HChar dis_buf[50];
22669 Int alen;
22670 IRTemp addr;
22671 UChar rm = getUChar(delta);
22672 UInt rG = gregOfRexRM(pfx,rm);
22673 UInt rV = getVexNvvvv(pfx);
22674 IRTemp e64 = newTemp(Ity_I64);
22675
22676 /* Fetch E[63:0] */
22677 if (epartIsReg(rm)) {
22678 UInt rE = eregOfRexRM(pfx,rm);
22679 assign(e64, getXMMRegLane64(rE, 0));
22680 DIP("%s %s,%s,%s\n", opname,
22681 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
22682 delta += 1;
22683 } else {
22684 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22685 assign(e64, loadLE(Ity_I64, mkexpr(addr)));
22686 DIP("%s %s,%s,%s\n", opname,
22687 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
22688 delta += alen;
22689 }
22690
22691 /* Create a value 'arg' as V[127:64]++E[63:0] */
22692 IRTemp arg = newTemp(Ity_V128);
22693 assign(arg,
22694 binop(Iop_SetV128lo64,
22695 getXMMReg(rV), mkexpr(e64)));
22696 /* and apply op to it */
22697 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
22698 *uses_vvvv = True;
22699 return delta;
22700}
22701
22702
sewardj66becf32012-06-18 23:15:16 +000022703/* Lower 32-bit lane only AVX128 unary operation:
22704 G[31:0] = op(E[31:0])
22705 G[127:32] = V[127:32]
22706 G[255:128] = 0
22707 The specified op must be of the 32F0x4 kind, so that it
22708 copies the upper 3/4 of the operand to the result.
22709*/
22710static Long dis_AVX128_E_V_to_G_lo32_unary ( /*OUT*/Bool* uses_vvvv,
22711 VexAbiInfo* vbi,
22712 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022713 const HChar* opname, IROp op )
sewardj66becf32012-06-18 23:15:16 +000022714{
22715 HChar dis_buf[50];
22716 Int alen;
22717 IRTemp addr;
22718 UChar rm = getUChar(delta);
22719 UInt rG = gregOfRexRM(pfx,rm);
22720 UInt rV = getVexNvvvv(pfx);
22721 IRTemp e32 = newTemp(Ity_I32);
22722
22723 /* Fetch E[31:0] */
22724 if (epartIsReg(rm)) {
22725 UInt rE = eregOfRexRM(pfx,rm);
22726 assign(e32, getXMMRegLane32(rE, 0));
22727 DIP("%s %s,%s,%s\n", opname,
22728 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
22729 delta += 1;
22730 } else {
22731 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22732 assign(e32, loadLE(Ity_I32, mkexpr(addr)));
22733 DIP("%s %s,%s,%s\n", opname,
22734 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
22735 delta += alen;
22736 }
22737
22738 /* Create a value 'arg' as V[127:32]++E[31:0] */
22739 IRTemp arg = newTemp(Ity_V128);
22740 assign(arg,
22741 binop(Iop_SetV128lo32,
22742 getXMMReg(rV), mkexpr(e32)));
22743 /* and apply op to it */
22744 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
22745 *uses_vvvv = True;
22746 return delta;
22747}
22748
22749
sewardjc4530ae2012-05-21 10:18:49 +000022750/* Lower 32-bit lane only AVX128 binary operation:
22751 G[31:0] = V[31:0] `op` E[31:0]
22752 G[127:32] = V[127:32]
22753 G[255:128] = 0.
22754 The specified op must be of the 32F0x4 kind, so that it
22755 copies the upper 3/4 of the left operand to the result.
22756*/
22757static Long dis_AVX128_E_V_to_G_lo32 ( /*OUT*/Bool* uses_vvvv,
22758 VexAbiInfo* vbi,
22759 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022760 const HChar* opname, IROp op )
sewardjc4530ae2012-05-21 10:18:49 +000022761{
22762 HChar dis_buf[50];
22763 Int alen;
22764 IRTemp addr;
22765 UChar rm = getUChar(delta);
22766 UInt rG = gregOfRexRM(pfx,rm);
22767 UInt rV = getVexNvvvv(pfx);
22768 IRExpr* vpart = getXMMReg(rV);
22769 if (epartIsReg(rm)) {
22770 UInt rE = eregOfRexRM(pfx,rm);
22771 putXMMReg( rG, binop(op, vpart, getXMMReg(rE)) );
22772 DIP("%s %s,%s,%s\n", opname,
22773 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
22774 delta = delta+1;
22775 } else {
22776 /* We can only do a 32-bit memory read, so the upper 3/4 of the
22777 E operand needs to be made simply of zeroes. */
22778 IRTemp epart = newTemp(Ity_V128);
22779 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
22780 assign( epart, unop( Iop_32UtoV128,
22781 loadLE(Ity_I32, mkexpr(addr))) );
22782 putXMMReg( rG, binop(op, vpart, mkexpr(epart)) );
22783 DIP("%s %s,%s,%s\n", opname,
22784 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
22785 delta = delta+alen;
22786 }
22787 putYMMRegLane128( rG, 1, mkV128(0) );
22788 *uses_vvvv = True;
22789 return delta;
22790}
22791
22792
sewardj251b59e2012-05-25 13:51:07 +000022793/* All-lanes AVX128 binary operation:
sewardj56c30312012-06-12 08:45:39 +000022794 G[127:0] = V[127:0] `op` E[127:0]
sewardj251b59e2012-05-25 13:51:07 +000022795 G[255:128] = 0.
22796*/
22797static Long dis_AVX128_E_V_to_G ( /*OUT*/Bool* uses_vvvv,
22798 VexAbiInfo* vbi,
22799 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022800 const HChar* opname, IROp op )
sewardj251b59e2012-05-25 13:51:07 +000022801{
sewardj29a219c2012-06-04 07:38:10 +000022802 return dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
22803 uses_vvvv, vbi, pfx, delta, opname, op,
22804 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/
22805 );
sewardj251b59e2012-05-25 13:51:07 +000022806}
22807
22808
sewardjc4530ae2012-05-21 10:18:49 +000022809/* Handles AVX128 32F/64F comparisons. A derivative of
22810 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
22811 original delta to indicate failure. */
22812static
22813Long dis_AVX128_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv,
22814 VexAbiInfo* vbi,
22815 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022816 const HChar* opname, Bool all_lanes, Int sz )
sewardjc4530ae2012-05-21 10:18:49 +000022817{
sewardjc93904b2012-05-27 13:50:42 +000022818 vassert(sz == 4 || sz == 8);
sewardjc4530ae2012-05-21 10:18:49 +000022819 Long deltaIN = delta;
22820 HChar dis_buf[50];
22821 Int alen;
22822 UInt imm8;
22823 IRTemp addr;
22824 Bool preSwap = False;
22825 IROp op = Iop_INVALID;
22826 Bool postNot = False;
22827 IRTemp plain = newTemp(Ity_V128);
22828 UChar rm = getUChar(delta);
sewardjc93904b2012-05-27 13:50:42 +000022829 UInt rG = gregOfRexRM(pfx, rm);
22830 UInt rV = getVexNvvvv(pfx);
22831 IRTemp argL = newTemp(Ity_V128);
22832 IRTemp argR = newTemp(Ity_V128);
22833
22834 assign(argL, getXMMReg(rV));
sewardjc4530ae2012-05-21 10:18:49 +000022835 if (epartIsReg(rm)) {
22836 imm8 = getUChar(delta+1);
22837 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz);
22838 if (!ok) return deltaIN; /* FAIL */
22839 UInt rE = eregOfRexRM(pfx,rm);
sewardjc93904b2012-05-27 13:50:42 +000022840 assign(argR, getXMMReg(rE));
sewardjc4530ae2012-05-21 10:18:49 +000022841 delta += 1+1;
22842 DIP("%s $%d,%s,%s,%s\n",
22843 opname, (Int)imm8,
22844 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
22845 } else {
22846 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
22847 imm8 = getUChar(delta+alen);
22848 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8, all_lanes, sz);
22849 if (!ok) return deltaIN; /* FAIL */
sewardjc93904b2012-05-27 13:50:42 +000022850 assign(argR,
22851 all_lanes ? loadLE(Ity_V128, mkexpr(addr))
sewardjc4530ae2012-05-21 10:18:49 +000022852 : sz == 8 ? unop( Iop_64UtoV128, loadLE(Ity_I64, mkexpr(addr)))
sewardjc93904b2012-05-27 13:50:42 +000022853 : /*sz==4*/ unop( Iop_32UtoV128, loadLE(Ity_I32, mkexpr(addr))));
sewardjc4530ae2012-05-21 10:18:49 +000022854 delta += alen+1;
22855 DIP("%s $%d,%s,%s,%s\n",
22856 opname, (Int)imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
22857 }
22858
sewardjc93904b2012-05-27 13:50:42 +000022859 assign(plain, preSwap ? binop(op, mkexpr(argR), mkexpr(argL))
22860 : binop(op, mkexpr(argL), mkexpr(argR)));
sewardjc4530ae2012-05-21 10:18:49 +000022861
sewardjc93904b2012-05-27 13:50:42 +000022862 if (all_lanes) {
22863 /* This is simple: just invert the result, if necessary, and
22864 have done. */
22865 if (postNot) {
22866 putYMMRegLoAndZU( rG, unop(Iop_NotV128, mkexpr(plain)) );
22867 } else {
22868 putYMMRegLoAndZU( rG, mkexpr(plain) );
22869 }
sewardjc4530ae2012-05-21 10:18:49 +000022870 }
22871 else
sewardjc93904b2012-05-27 13:50:42 +000022872 if (!preSwap) {
22873 /* More complex. It's a one-lane-only, hence need to possibly
22874 invert only that one lane. But at least the other lanes are
22875 correctly "in" the result, having been copied from the left
22876 operand (argL). */
22877 if (postNot) {
22878 IRExpr* mask = mkV128(sz==4 ? 0x000F : 0x00FF);
22879 putYMMRegLoAndZU( rG, binop(Iop_XorV128, mkexpr(plain),
22880 mask) );
22881 } else {
22882 putYMMRegLoAndZU( rG, mkexpr(plain) );
22883 }
sewardjc4530ae2012-05-21 10:18:49 +000022884 }
22885 else {
sewardjc93904b2012-05-27 13:50:42 +000022886 /* This is the most complex case. One-lane-only, but the args
22887 were swapped. So we have to possibly invert the bottom lane,
22888 and (definitely) we have to copy the upper lane(s) from argL
22889 since, due to the swapping, what's currently there is from
22890 argR, which is not correct. */
22891 IRTemp res = newTemp(Ity_V128);
22892 IRTemp mask = newTemp(Ity_V128);
22893 IRTemp notMask = newTemp(Ity_V128);
22894 assign(mask, mkV128(sz==4 ? 0x000F : 0x00FF));
22895 assign(notMask, mkV128(sz==4 ? 0xFFF0 : 0xFF00));
22896 if (postNot) {
22897 assign(res,
22898 binop(Iop_OrV128,
22899 binop(Iop_AndV128,
22900 unop(Iop_NotV128, mkexpr(plain)),
22901 mkexpr(mask)),
22902 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask))));
22903 } else {
22904 assign(res,
22905 binop(Iop_OrV128,
22906 binop(Iop_AndV128,
22907 mkexpr(plain),
22908 mkexpr(mask)),
22909 binop(Iop_AndV128, mkexpr(argL), mkexpr(notMask))));
22910 }
22911 putYMMRegLoAndZU( rG, mkexpr(res) );
sewardjc4530ae2012-05-21 10:18:49 +000022912 }
22913
22914 *uses_vvvv = True;
22915 return delta;
22916}
22917
22918
sewardj89378162012-06-24 12:12:20 +000022919/* Handles AVX256 32F/64F comparisons. A derivative of
22920 dis_SSEcmp_E_to_G. It can fail, in which case it returns the
22921 original delta to indicate failure. */
22922static
22923Long dis_AVX256_cmp_V_E_to_G ( /*OUT*/Bool* uses_vvvv,
22924 VexAbiInfo* vbi,
22925 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022926 const HChar* opname, Int sz )
sewardj89378162012-06-24 12:12:20 +000022927{
22928 vassert(sz == 4 || sz == 8);
22929 Long deltaIN = delta;
22930 HChar dis_buf[50];
22931 Int alen;
22932 UInt imm8;
22933 IRTemp addr;
22934 Bool preSwap = False;
22935 IROp op = Iop_INVALID;
22936 Bool postNot = False;
22937 IRTemp plain = newTemp(Ity_V256);
22938 UChar rm = getUChar(delta);
22939 UInt rG = gregOfRexRM(pfx, rm);
22940 UInt rV = getVexNvvvv(pfx);
22941 IRTemp argL = newTemp(Ity_V256);
22942 IRTemp argR = newTemp(Ity_V256);
22943 IRTemp argLhi = IRTemp_INVALID;
22944 IRTemp argLlo = IRTemp_INVALID;
22945 IRTemp argRhi = IRTemp_INVALID;
22946 IRTemp argRlo = IRTemp_INVALID;
22947
22948 assign(argL, getYMMReg(rV));
22949 if (epartIsReg(rm)) {
22950 imm8 = getUChar(delta+1);
22951 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8,
22952 True/*all_lanes*/, sz);
22953 if (!ok) return deltaIN; /* FAIL */
22954 UInt rE = eregOfRexRM(pfx,rm);
22955 assign(argR, getYMMReg(rE));
22956 delta += 1+1;
22957 DIP("%s $%d,%s,%s,%s\n",
22958 opname, (Int)imm8,
22959 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
22960 } else {
22961 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
22962 imm8 = getUChar(delta+alen);
22963 Bool ok = findSSECmpOp(&preSwap, &op, &postNot, imm8,
22964 True/*all_lanes*/, sz);
22965 if (!ok) return deltaIN; /* FAIL */
22966 assign(argR, loadLE(Ity_V256, mkexpr(addr)) );
22967 delta += alen+1;
22968 DIP("%s $%d,%s,%s,%s\n",
22969 opname, (Int)imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
22970 }
22971
sewardjb1a41a22012-06-24 13:27:46 +000022972 breakupV256toV128s( preSwap ? argR : argL, &argLhi, &argLlo );
22973 breakupV256toV128s( preSwap ? argL : argR, &argRhi, &argRlo );
sewardj89378162012-06-24 12:12:20 +000022974 assign(plain, binop( Iop_V128HLtoV256,
22975 binop(op, mkexpr(argLhi), mkexpr(argRhi)),
22976 binop(op, mkexpr(argLlo), mkexpr(argRlo)) ) );
22977
22978 /* This is simple: just invert the result, if necessary, and
22979 have done. */
22980 if (postNot) {
22981 putYMMReg( rG, unop(Iop_NotV256, mkexpr(plain)) );
22982 } else {
22983 putYMMReg( rG, mkexpr(plain) );
22984 }
22985
22986 *uses_vvvv = True;
22987 return delta;
22988}
22989
22990
sewardj97f72452012-05-23 05:56:53 +000022991/* Handles AVX128 unary E-to-G all-lanes operations. */
22992static
22993Long dis_AVX128_E_to_G_unary ( /*OUT*/Bool* uses_vvvv,
22994 VexAbiInfo* vbi,
22995 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000022996 const HChar* opname,
sewardj97f72452012-05-23 05:56:53 +000022997 IRTemp (*opFn)(IRTemp) )
22998{
22999 HChar dis_buf[50];
23000 Int alen;
23001 IRTemp addr;
23002 IRTemp res = newTemp(Ity_V128);
23003 IRTemp arg = newTemp(Ity_V128);
23004 UChar rm = getUChar(delta);
23005 UInt rG = gregOfRexRM(pfx, rm);
23006 if (epartIsReg(rm)) {
23007 UInt rE = eregOfRexRM(pfx,rm);
23008 assign(arg, getXMMReg(rE));
23009 delta += 1;
23010 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG));
23011 } else {
sewardj29ac4282012-05-24 06:31:21 +000023012 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj97f72452012-05-23 05:56:53 +000023013 assign(arg, loadLE(Ity_V128, mkexpr(addr)));
23014 delta += alen;
23015 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG));
23016 }
23017 res = opFn(arg);
23018 putYMMRegLoAndZU( rG, mkexpr(res) );
23019 *uses_vvvv = False;
23020 return delta;
23021}
23022
23023
sewardj66becf32012-06-18 23:15:16 +000023024/* Handles AVX128 unary E-to-G all-lanes operations. */
23025static
23026Long dis_AVX128_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv,
23027 VexAbiInfo* vbi,
23028 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000023029 const HChar* opname, IROp op )
sewardj66becf32012-06-18 23:15:16 +000023030{
23031 HChar dis_buf[50];
23032 Int alen;
23033 IRTemp addr;
23034 IRTemp arg = newTemp(Ity_V128);
23035 UChar rm = getUChar(delta);
23036 UInt rG = gregOfRexRM(pfx, rm);
23037 if (epartIsReg(rm)) {
23038 UInt rE = eregOfRexRM(pfx,rm);
23039 assign(arg, getXMMReg(rE));
23040 delta += 1;
23041 DIP("%s %s,%s\n", opname, nameXMMReg(rE), nameXMMReg(rG));
23042 } else {
23043 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23044 assign(arg, loadLE(Ity_V128, mkexpr(addr)));
23045 delta += alen;
23046 DIP("%s %s,%s\n", opname, dis_buf, nameXMMReg(rG));
23047 }
23048 putYMMRegLoAndZU( rG, unop(op, mkexpr(arg)) );
23049 *uses_vvvv = False;
23050 return delta;
23051}
23052
23053
sewardj56c30312012-06-12 08:45:39 +000023054/* FIXME: common up with the _128_ version above? */
23055static
23056Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG (
23057 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi,
florian55085f82012-11-21 00:36:55 +000023058 Prefix pfx, Long delta, const HChar* name,
sewardj56c30312012-06-12 08:45:39 +000023059 /* The actual operation. Use either 'op' or 'opfn',
23060 but not both. */
23061 IROp op, IRTemp(*opFn)(IRTemp,IRTemp),
23062 Bool invertLeftArg,
23063 Bool swapArgs
23064 )
23065{
23066 UChar modrm = getUChar(delta);
23067 UInt rD = gregOfRexRM(pfx, modrm);
23068 UInt rSL = getVexNvvvv(pfx);
23069 IRTemp tSL = newTemp(Ity_V256);
23070 IRTemp tSR = newTemp(Ity_V256);
23071 IRTemp addr = IRTemp_INVALID;
23072 HChar dis_buf[50];
23073 Int alen = 0;
23074 vassert(1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*WIG?*/);
23075
sewardj2a2bda92012-06-14 23:32:02 +000023076 assign(tSL, invertLeftArg ? unop(Iop_NotV256, getYMMReg(rSL))
23077 : getYMMReg(rSL));
sewardj56c30312012-06-12 08:45:39 +000023078
23079 if (epartIsReg(modrm)) {
23080 UInt rSR = eregOfRexRM(pfx, modrm);
23081 delta += 1;
23082 assign(tSR, getYMMReg(rSR));
23083 DIP("%s %s,%s,%s\n",
23084 name, nameYMMReg(rSR), nameYMMReg(rSL), nameYMMReg(rD));
23085 } else {
23086 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
23087 delta += alen;
23088 assign(tSR, loadLE(Ity_V256, mkexpr(addr)));
23089 DIP("%s %s,%s,%s\n",
23090 name, dis_buf, nameYMMReg(rSL), nameYMMReg(rD));
23091 }
23092
23093 IRTemp res = IRTemp_INVALID;
23094 if (op != Iop_INVALID) {
23095 vassert(opFn == NULL);
23096 res = newTemp(Ity_V256);
sewardj9571dc02014-01-26 18:34:23 +000023097 if (requiresRMode(op)) {
23098 IRTemp rm = newTemp(Ity_I32);
23099 assign(rm, get_FAKE_roundingmode()); /* XXXROUNDINGFIXME */
23100 assign(res, swapArgs
23101 ? triop(op, mkexpr(rm), mkexpr(tSR), mkexpr(tSL))
23102 : triop(op, mkexpr(rm), mkexpr(tSL), mkexpr(tSR)));
23103 } else {
23104 assign(res, swapArgs
23105 ? binop(op, mkexpr(tSR), mkexpr(tSL))
23106 : binop(op, mkexpr(tSL), mkexpr(tSR)));
23107 }
sewardj56c30312012-06-12 08:45:39 +000023108 } else {
23109 vassert(opFn != NULL);
23110 res = swapArgs ? opFn(tSR, tSL) : opFn(tSL, tSR);
23111 }
23112
23113 putYMMReg(rD, mkexpr(res));
23114
23115 *uses_vvvv = True;
23116 return delta;
23117}
23118
23119
23120/* All-lanes AVX256 binary operation:
23121 G[255:0] = V[255:0] `op` E[255:0]
23122*/
23123static Long dis_AVX256_E_V_to_G ( /*OUT*/Bool* uses_vvvv,
23124 VexAbiInfo* vbi,
23125 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000023126 const HChar* opname, IROp op )
sewardj56c30312012-06-12 08:45:39 +000023127{
23128 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23129 uses_vvvv, vbi, pfx, delta, opname, op,
23130 NULL, False/*!invertLeftArg*/, False/*!swapArgs*/
23131 );
23132}
23133
23134
sewardjcc3d2192013-03-27 11:37:33 +000023135/* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, with a simple IROp
23136 for the operation, no inversion of the left arg, and no swapping of
23137 args. */
23138static
23139Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple (
23140 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi,
23141 Prefix pfx, Long delta, const HChar* name,
23142 IROp op
23143 )
23144{
23145 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23146 uses_vvvv, vbi, pfx, delta, name, op, NULL, False, False);
23147}
23148
23149
sewardj89378162012-06-24 12:12:20 +000023150/* Handle a VEX_NDS_256_66_0F_WIG (3-addr) insn, using the given IR
23151 generator to compute the result, no inversion of the left
23152 arg, and no swapping of args. */
23153static
23154Long dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex (
23155 /*OUT*/Bool* uses_vvvv, VexAbiInfo* vbi,
florian55085f82012-11-21 00:36:55 +000023156 Prefix pfx, Long delta, const HChar* name,
sewardj89378162012-06-24 12:12:20 +000023157 IRTemp(*opFn)(IRTemp,IRTemp)
23158 )
23159{
23160 return dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
23161 uses_vvvv, vbi, pfx, delta, name,
23162 Iop_INVALID, opFn, False, False );
23163}
23164
23165
sewardj66becf32012-06-18 23:15:16 +000023166/* Handles AVX256 unary E-to-G all-lanes operations. */
23167static
sewardjcc3d2192013-03-27 11:37:33 +000023168Long dis_AVX256_E_to_G_unary ( /*OUT*/Bool* uses_vvvv,
23169 VexAbiInfo* vbi,
23170 Prefix pfx, Long delta,
23171 const HChar* opname,
23172 IRTemp (*opFn)(IRTemp) )
23173{
23174 HChar dis_buf[50];
23175 Int alen;
23176 IRTemp addr;
23177 IRTemp res = newTemp(Ity_V256);
23178 IRTemp arg = newTemp(Ity_V256);
23179 UChar rm = getUChar(delta);
23180 UInt rG = gregOfRexRM(pfx, rm);
23181 if (epartIsReg(rm)) {
23182 UInt rE = eregOfRexRM(pfx,rm);
23183 assign(arg, getYMMReg(rE));
23184 delta += 1;
23185 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG));
23186 } else {
23187 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23188 assign(arg, loadLE(Ity_V256, mkexpr(addr)));
23189 delta += alen;
23190 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG));
23191 }
23192 res = opFn(arg);
23193 putYMMReg( rG, mkexpr(res) );
23194 *uses_vvvv = False;
23195 return delta;
23196}
23197
23198
23199/* Handles AVX256 unary E-to-G all-lanes operations. */
23200static
sewardj66becf32012-06-18 23:15:16 +000023201Long dis_AVX256_E_to_G_unary_all ( /*OUT*/Bool* uses_vvvv,
23202 VexAbiInfo* vbi,
23203 Prefix pfx, Long delta,
florian55085f82012-11-21 00:36:55 +000023204 const HChar* opname, IROp op )
sewardj66becf32012-06-18 23:15:16 +000023205{
23206 HChar dis_buf[50];
23207 Int alen;
23208 IRTemp addr;
23209 IRTemp arg = newTemp(Ity_V256);
23210 UChar rm = getUChar(delta);
23211 UInt rG = gregOfRexRM(pfx, rm);
23212 if (epartIsReg(rm)) {
23213 UInt rE = eregOfRexRM(pfx,rm);
23214 assign(arg, getYMMReg(rE));
23215 delta += 1;
23216 DIP("%s %s,%s\n", opname, nameYMMReg(rE), nameYMMReg(rG));
23217 } else {
23218 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23219 assign(arg, loadLE(Ity_V256, mkexpr(addr)));
23220 delta += alen;
23221 DIP("%s %s,%s\n", opname, dis_buf, nameYMMReg(rG));
23222 }
23223 putYMMReg( rG, unop(op, mkexpr(arg)) );
23224 *uses_vvvv = False;
23225 return delta;
23226}
23227
23228
sewardj6fcd43e2012-06-14 08:51:35 +000023229/* The use of ReinterpF64asI64 is ugly. Surely could do better if we
23230 had a variant of Iop_64x4toV256 that took F64s as args instead. */
23231static Long dis_CVTDQ2PD_256 ( VexAbiInfo* vbi, Prefix pfx,
23232 Long delta )
23233{
23234 IRTemp addr = IRTemp_INVALID;
23235 Int alen = 0;
23236 HChar dis_buf[50];
23237 UChar modrm = getUChar(delta);
23238 IRTemp sV = newTemp(Ity_V128);
23239 UInt rG = gregOfRexRM(pfx,modrm);
23240 if (epartIsReg(modrm)) {
23241 UInt rE = eregOfRexRM(pfx,modrm);
23242 assign( sV, getXMMReg(rE) );
23243 delta += 1;
23244 DIP("vcvtdq2pd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
23245 } else {
23246 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23247 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
23248 delta += alen;
23249 DIP("vcvtdq2pd %s,%s\n", dis_buf, nameYMMReg(rG) );
23250 }
23251 IRTemp s3, s2, s1, s0;
23252 s3 = s2 = s1 = s0 = IRTemp_INVALID;
23253 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
23254 IRExpr* res
23255 = IRExpr_Qop(
23256 Iop_64x4toV256,
23257 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s3))),
23258 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s2))),
23259 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s1))),
23260 unop(Iop_ReinterpF64asI64, unop(Iop_I32StoF64, mkexpr(s0)))
23261 );
23262 putYMMReg(rG, res);
23263 return delta;
23264}
23265
23266
23267static Long dis_CVTPD2PS_256 ( VexAbiInfo* vbi, Prefix pfx,
23268 Long delta )
23269{
23270 IRTemp addr = IRTemp_INVALID;
23271 Int alen = 0;
23272 HChar dis_buf[50];
23273 UChar modrm = getUChar(delta);
23274 UInt rG = gregOfRexRM(pfx,modrm);
23275 IRTemp argV = newTemp(Ity_V256);
23276 IRTemp rmode = newTemp(Ity_I32);
23277 if (epartIsReg(modrm)) {
23278 UInt rE = eregOfRexRM(pfx,modrm);
23279 assign( argV, getYMMReg(rE) );
23280 delta += 1;
23281 DIP("vcvtpd2psy %s,%s\n", nameYMMReg(rE), nameXMMReg(rG));
23282 } else {
23283 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23284 assign( argV, loadLE(Ity_V256, mkexpr(addr)) );
23285 delta += alen;
23286 DIP("vcvtpd2psy %s,%s\n", dis_buf, nameXMMReg(rG) );
23287 }
23288
23289 assign( rmode, get_sse_roundingmode() );
23290 IRTemp t3, t2, t1, t0;
23291 t3 = t2 = t1 = t0 = IRTemp_INVALID;
23292 breakupV256to64s( argV, &t3, &t2, &t1, &t0 );
23293# define CVT(_t) binop( Iop_F64toF32, mkexpr(rmode), \
23294 unop(Iop_ReinterpI64asF64, mkexpr(_t)) )
23295 putXMMRegLane32F( rG, 3, CVT(t3) );
23296 putXMMRegLane32F( rG, 2, CVT(t2) );
23297 putXMMRegLane32F( rG, 1, CVT(t1) );
23298 putXMMRegLane32F( rG, 0, CVT(t0) );
23299# undef CVT
23300 putYMMRegLane128( rG, 1, mkV128(0) );
23301 return delta;
23302}
23303
23304
sewardjcc3d2192013-03-27 11:37:33 +000023305static IRTemp math_VPUNPCK_YMM ( IRTemp tL, IRType tR, IROp op )
23306{
23307 IRTemp tLhi, tLlo, tRhi, tRlo;
23308 tLhi = tLlo = tRhi = tRlo = IRTemp_INVALID;
23309 IRTemp res = newTemp(Ity_V256);
23310 breakupV256toV128s( tL, &tLhi, &tLlo );
23311 breakupV256toV128s( tR, &tRhi, &tRlo );
23312 assign( res, binop( Iop_V128HLtoV256,
23313 binop( op, mkexpr(tRhi), mkexpr(tLhi) ),
23314 binop( op, mkexpr(tRlo), mkexpr(tLlo) ) ) );
23315 return res;
23316}
23317
23318
23319static IRTemp math_VPUNPCKLBW_YMM ( IRTemp tL, IRTemp tR )
23320{
23321 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO8x16 );
23322}
23323
23324
23325static IRTemp math_VPUNPCKLWD_YMM ( IRTemp tL, IRTemp tR )
23326{
23327 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO16x8 );
23328}
23329
23330
23331static IRTemp math_VPUNPCKLDQ_YMM ( IRTemp tL, IRTemp tR )
23332{
23333 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO32x4 );
23334}
23335
23336
23337static IRTemp math_VPUNPCKLQDQ_YMM ( IRTemp tL, IRTemp tR )
23338{
23339 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveLO64x2 );
23340}
23341
23342
23343static IRTemp math_VPUNPCKHBW_YMM ( IRTemp tL, IRTemp tR )
23344{
23345 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI8x16 );
23346}
23347
23348
23349static IRTemp math_VPUNPCKHWD_YMM ( IRTemp tL, IRTemp tR )
23350{
23351 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI16x8 );
23352}
23353
23354
23355static IRTemp math_VPUNPCKHDQ_YMM ( IRTemp tL, IRTemp tR )
23356{
23357 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI32x4 );
23358}
23359
23360
23361static IRTemp math_VPUNPCKHQDQ_YMM ( IRTemp tL, IRTemp tR )
23362{
23363 return math_VPUNPCK_YMM( tL, tR, Iop_InterleaveHI64x2 );
23364}
23365
23366
23367static IRTemp math_VPACKSSWB_YMM ( IRTemp tL, IRTemp tR )
23368{
23369 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Sx16 );
23370}
23371
23372
23373static IRTemp math_VPACKUSWB_YMM ( IRTemp tL, IRTemp tR )
23374{
23375 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin16Sto8Ux16 );
23376}
23377
23378
23379static IRTemp math_VPACKSSDW_YMM ( IRTemp tL, IRTemp tR )
23380{
23381 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Sx8 );
23382}
23383
23384
23385static IRTemp math_VPACKUSDW_YMM ( IRTemp tL, IRTemp tR )
23386{
23387 return math_VPUNPCK_YMM( tL, tR, Iop_QNarrowBin32Sto16Ux8 );
23388}
23389
23390
sewardjc4530ae2012-05-21 10:18:49 +000023391__attribute__((noinline))
23392static
23393Long dis_ESC_0F__VEX (
23394 /*MB_OUT*/DisResult* dres,
23395 /*OUT*/ Bool* uses_vvvv,
23396 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
23397 Bool resteerCisOk,
23398 void* callback_opaque,
23399 VexArchInfo* archinfo,
23400 VexAbiInfo* vbi,
23401 Prefix pfx, Int sz, Long deltaIN
23402 )
23403{
23404 IRTemp addr = IRTemp_INVALID;
23405 Int alen = 0;
23406 HChar dis_buf[50];
23407 Long delta = deltaIN;
23408 UChar opc = getUChar(delta);
23409 delta++;
23410 *uses_vvvv = False;
23411
23412 switch (opc) {
23413
23414 case 0x10:
23415 /* VMOVSD m64, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
23416 /* Move 64 bits from E (mem only) to G (lo half xmm).
23417 Bits 255-64 of the dest are zeroed out. */
23418 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) {
23419 UChar modrm = getUChar(delta);
23420 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23421 UInt rG = gregOfRexRM(pfx,modrm);
23422 IRTemp z128 = newTemp(Ity_V128);
23423 assign(z128, mkV128(0));
23424 putXMMReg( rG, mkexpr(z128) );
23425 /* FIXME: ALIGNMENT CHECK? */
23426 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) );
23427 putYMMRegLane128( rG, 1, mkexpr(z128) );
23428 DIP("vmovsd %s,%s\n", dis_buf, nameXMMReg(rG));
23429 delta += alen;
23430 goto decode_success;
23431 }
sewardj21459cb2012-06-18 14:05:52 +000023432 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 10 /r */
23433 /* Reg form. */
23434 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) {
23435 UChar modrm = getUChar(delta);
23436 UInt rG = gregOfRexRM(pfx, modrm);
23437 UInt rE = eregOfRexRM(pfx, modrm);
23438 UInt rV = getVexNvvvv(pfx);
23439 delta++;
23440 DIP("vmovsd %s,%s,%s\n",
23441 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23442 IRTemp res = newTemp(Ity_V128);
23443 assign(res, binop(Iop_64HLtoV128,
23444 getXMMRegLane64(rV, 1),
23445 getXMMRegLane64(rE, 0)));
23446 putYMMRegLoAndZU(rG, mkexpr(res));
23447 *uses_vvvv = True;
23448 goto decode_success;
23449 }
sewardjc4530ae2012-05-21 10:18:49 +000023450 /* VMOVSS m32, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
23451 /* Move 32 bits from E (mem only) to G (lo half xmm).
23452 Bits 255-32 of the dest are zeroed out. */
23453 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) {
23454 UChar modrm = getUChar(delta);
23455 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23456 UInt rG = gregOfRexRM(pfx,modrm);
23457 IRTemp z128 = newTemp(Ity_V128);
23458 assign(z128, mkV128(0));
23459 putXMMReg( rG, mkexpr(z128) );
23460 /* FIXME: ALIGNMENT CHECK? */
23461 putXMMRegLane32( rG, 0, loadLE(Ity_I32, mkexpr(addr)) );
23462 putYMMRegLane128( rG, 1, mkexpr(z128) );
23463 DIP("vmovss %s,%s\n", dis_buf, nameXMMReg(rG));
23464 delta += alen;
23465 goto decode_success;
23466 }
sewardj15ad1942012-06-20 10:21:05 +000023467 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 10 /r */
23468 /* Reg form. */
23469 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) {
23470 UChar modrm = getUChar(delta);
23471 UInt rG = gregOfRexRM(pfx, modrm);
23472 UInt rE = eregOfRexRM(pfx, modrm);
23473 UInt rV = getVexNvvvv(pfx);
23474 delta++;
23475 DIP("vmovss %s,%s,%s\n",
23476 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23477 IRTemp res = newTemp(Ity_V128);
23478 assign( res, binop( Iop_64HLtoV128,
23479 getXMMRegLane64(rV, 1),
23480 binop(Iop_32HLto64,
23481 getXMMRegLane32(rV, 1),
23482 getXMMRegLane32(rE, 0)) ) );
23483 putYMMRegLoAndZU(rG, mkexpr(res));
23484 *uses_vvvv = True;
23485 goto decode_success;
23486 }
sewardj56c30312012-06-12 08:45:39 +000023487 /* VMOVUPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 10 /r */
23488 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23489 UChar modrm = getUChar(delta);
23490 UInt rG = gregOfRexRM(pfx, modrm);
23491 if (epartIsReg(modrm)) {
23492 UInt rE = eregOfRexRM(pfx,modrm);
23493 putYMMRegLoAndZU( rG, getXMMReg( rE ));
23494 DIP("vmovupd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
23495 delta += 1;
23496 } else {
23497 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23498 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
23499 DIP("vmovupd %s,%s\n", dis_buf, nameXMMReg(rG));
23500 delta += alen;
23501 }
23502 goto decode_success;
23503 }
sewardjfce47a62012-06-03 23:12:33 +000023504 /* VMOVUPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 10 /r */
23505 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23506 UChar modrm = getUChar(delta);
23507 UInt rG = gregOfRexRM(pfx, modrm);
23508 if (epartIsReg(modrm)) {
23509 UInt rE = eregOfRexRM(pfx,modrm);
23510 putYMMReg( rG, getYMMReg( rE ));
23511 DIP("vmovupd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
23512 delta += 1;
23513 } else {
23514 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23515 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
23516 DIP("vmovupd %s,%s\n", dis_buf, nameYMMReg(rG));
23517 delta += alen;
23518 }
23519 goto decode_success;
23520 }
sewardj56c30312012-06-12 08:45:39 +000023521 /* VMOVUPS xmm2/m128, xmm1 = VEX.128.0F.WIG 10 /r */
23522 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23523 UChar modrm = getUChar(delta);
23524 UInt rG = gregOfRexRM(pfx, modrm);
23525 if (epartIsReg(modrm)) {
23526 UInt rE = eregOfRexRM(pfx,modrm);
23527 putYMMRegLoAndZU( rG, getXMMReg( rE ));
23528 DIP("vmovups %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
23529 delta += 1;
23530 } else {
23531 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23532 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
23533 DIP("vmovups %s,%s\n", dis_buf, nameXMMReg(rG));
23534 delta += alen;
23535 }
23536 goto decode_success;
23537 }
sewardj66becf32012-06-18 23:15:16 +000023538 /* VMOVUPS ymm2/m256, ymm1 = VEX.256.0F.WIG 10 /r */
23539 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23540 UChar modrm = getUChar(delta);
23541 UInt rG = gregOfRexRM(pfx, modrm);
23542 if (epartIsReg(modrm)) {
23543 UInt rE = eregOfRexRM(pfx,modrm);
23544 putYMMReg( rG, getYMMReg( rE ));
23545 DIP("vmovups %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
23546 delta += 1;
23547 } else {
23548 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23549 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
23550 DIP("vmovups %s,%s\n", dis_buf, nameYMMReg(rG));
23551 delta += alen;
23552 }
23553 goto decode_success;
23554 }
sewardjc4530ae2012-05-21 10:18:49 +000023555 break;
23556
23557 case 0x11:
23558 /* VMOVSD xmm1, m64 = VEX.LIG.F2.0F.WIG 11 /r */
23559 /* Move 64 bits from G (low half xmm) to mem only. */
23560 if (haveF2no66noF3(pfx) && !epartIsReg(getUChar(delta))) {
23561 UChar modrm = getUChar(delta);
23562 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23563 UInt rG = gregOfRexRM(pfx,modrm);
23564 /* FIXME: ALIGNMENT CHECK? */
23565 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0));
23566 DIP("vmovsd %s,%s\n", nameXMMReg(rG), dis_buf);
23567 delta += alen;
23568 goto decode_success;
23569 }
sewardj21459cb2012-06-18 14:05:52 +000023570 /* VMOVSD xmm3, xmm2, xmm1 = VEX.LIG.F2.0F.WIG 11 /r */
23571 /* Reg form. */
23572 if (haveF2no66noF3(pfx) && epartIsReg(getUChar(delta))) {
23573 UChar modrm = getUChar(delta);
23574 UInt rG = gregOfRexRM(pfx, modrm);
23575 UInt rE = eregOfRexRM(pfx, modrm);
23576 UInt rV = getVexNvvvv(pfx);
23577 delta++;
23578 DIP("vmovsd %s,%s,%s\n",
23579 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23580 IRTemp res = newTemp(Ity_V128);
23581 assign(res, binop(Iop_64HLtoV128,
23582 getXMMRegLane64(rV, 1),
23583 getXMMRegLane64(rE, 0)));
23584 putYMMRegLoAndZU(rG, mkexpr(res));
23585 *uses_vvvv = True;
23586 goto decode_success;
23587 }
sewardjc4530ae2012-05-21 10:18:49 +000023588 /* VMOVSS xmm1, m64 = VEX.LIG.F3.0F.WIG 11 /r */
23589 /* Move 32 bits from G (low 1/4 xmm) to mem only. */
23590 if (haveF3no66noF2(pfx) && !epartIsReg(getUChar(delta))) {
23591 UChar modrm = getUChar(delta);
23592 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23593 UInt rG = gregOfRexRM(pfx,modrm);
23594 /* FIXME: ALIGNMENT CHECK? */
23595 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0));
23596 DIP("vmovss %s,%s\n", nameXMMReg(rG), dis_buf);
23597 delta += alen;
23598 goto decode_success;
23599 }
sewardj15ad1942012-06-20 10:21:05 +000023600 /* VMOVSS xmm3, xmm2, xmm1 = VEX.LIG.F3.0F.WIG 11 /r */
23601 /* Reg form. */
23602 if (haveF3no66noF2(pfx) && epartIsReg(getUChar(delta))) {
23603 UChar modrm = getUChar(delta);
23604 UInt rG = gregOfRexRM(pfx, modrm);
23605 UInt rE = eregOfRexRM(pfx, modrm);
23606 UInt rV = getVexNvvvv(pfx);
23607 delta++;
23608 DIP("vmovss %s,%s,%s\n",
23609 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23610 IRTemp res = newTemp(Ity_V128);
23611 assign( res, binop( Iop_64HLtoV128,
23612 getXMMRegLane64(rV, 1),
23613 binop(Iop_32HLto64,
23614 getXMMRegLane32(rV, 1),
23615 getXMMRegLane32(rE, 0)) ) );
23616 putYMMRegLoAndZU(rG, mkexpr(res));
23617 *uses_vvvv = True;
23618 goto decode_success;
23619 }
sewardjc4530ae2012-05-21 10:18:49 +000023620 /* VMOVUPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 11 /r */
23621 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23622 UChar modrm = getUChar(delta);
23623 UInt rG = gregOfRexRM(pfx,modrm);
23624 if (epartIsReg(modrm)) {
23625 UInt rE = eregOfRexRM(pfx,modrm);
23626 putYMMRegLoAndZU( rE, getXMMReg(rG) );
23627 DIP("vmovupd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
23628 delta += 1;
23629 } else {
23630 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23631 storeLE( mkexpr(addr), getXMMReg(rG) );
23632 DIP("vmovupd %s,%s\n", nameXMMReg(rG), dis_buf);
23633 delta += alen;
23634 }
23635 goto decode_success;
23636 }
sewardj4ed05e02012-06-18 15:01:30 +000023637 /* VMOVUPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 11 /r */
23638 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23639 UChar modrm = getUChar(delta);
23640 UInt rG = gregOfRexRM(pfx,modrm);
23641 if (epartIsReg(modrm)) {
23642 UInt rE = eregOfRexRM(pfx,modrm);
23643 putYMMReg( rE, getYMMReg(rG) );
23644 DIP("vmovupd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
23645 delta += 1;
23646 } else {
23647 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23648 storeLE( mkexpr(addr), getYMMReg(rG) );
23649 DIP("vmovupd %s,%s\n", nameYMMReg(rG), dis_buf);
23650 delta += alen;
23651 }
23652 goto decode_success;
23653 }
sewardj6eaf00c2012-05-23 11:33:56 +000023654 /* VMOVUPS xmm1, xmm2/m128 = VEX.128.0F.WIG 11 /r */
23655 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23656 UChar modrm = getUChar(delta);
23657 UInt rG = gregOfRexRM(pfx,modrm);
23658 if (epartIsReg(modrm)) {
23659 UInt rE = eregOfRexRM(pfx,modrm);
23660 putYMMRegLoAndZU( rE, getXMMReg(rG) );
23661 DIP("vmovups %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
23662 delta += 1;
23663 } else {
23664 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23665 storeLE( mkexpr(addr), getXMMReg(rG) );
23666 DIP("vmovups %s,%s\n", nameXMMReg(rG), dis_buf);
23667 delta += alen;
23668 }
23669 goto decode_success;
23670 }
sewardj4ed05e02012-06-18 15:01:30 +000023671 /* VMOVUPS ymm1, ymm2/m256 = VEX.256.0F.WIG 11 /r */
23672 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
sewardjfce47a62012-06-03 23:12:33 +000023673 UChar modrm = getUChar(delta);
23674 UInt rG = gregOfRexRM(pfx,modrm);
23675 if (epartIsReg(modrm)) {
23676 UInt rE = eregOfRexRM(pfx,modrm);
23677 putYMMReg( rE, getYMMReg(rG) );
sewardj4ed05e02012-06-18 15:01:30 +000023678 DIP("vmovups %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
sewardjfce47a62012-06-03 23:12:33 +000023679 delta += 1;
23680 } else {
23681 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23682 storeLE( mkexpr(addr), getYMMReg(rG) );
sewardj4ed05e02012-06-18 15:01:30 +000023683 DIP("vmovups %s,%s\n", nameYMMReg(rG), dis_buf);
sewardjfce47a62012-06-03 23:12:33 +000023684 delta += alen;
23685 }
23686 goto decode_success;
23687 }
sewardjc4530ae2012-05-21 10:18:49 +000023688 break;
23689
23690 case 0x12:
sewardj53b12782012-05-22 23:34:06 +000023691 /* VMOVDDUP xmm2/m64, xmm1 = VEX.128.F2.0F.WIG /12 r */
sewardjc4530ae2012-05-21 10:18:49 +000023692 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23693 delta = dis_MOVDDUP_128( vbi, pfx, delta, True/*isAvx*/ );
23694 goto decode_success;
23695 }
sewardj82096922012-06-24 14:57:59 +000023696 /* VMOVDDUP ymm2/m256, ymm1 = VEX.256.F2.0F.WIG /12 r */
23697 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23698 delta = dis_MOVDDUP_256( vbi, pfx, delta );
23699 goto decode_success;
23700 }
sewardj53b12782012-05-22 23:34:06 +000023701 /* VMOVHLPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 12 /r */
23702 /* Insn only exists in reg form */
23703 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
23704 && epartIsReg(getUChar(delta))) {
23705 UChar modrm = getUChar(delta);
23706 UInt rG = gregOfRexRM(pfx, modrm);
23707 UInt rE = eregOfRexRM(pfx, modrm);
23708 UInt rV = getVexNvvvv(pfx);
23709 delta++;
23710 DIP("vmovhlps %s,%s,%s\n",
23711 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23712 IRTemp res = newTemp(Ity_V128);
23713 assign(res, binop(Iop_64HLtoV128,
23714 getXMMRegLane64(rV, 1),
23715 getXMMRegLane64(rE, 1)));
23716 putYMMRegLoAndZU(rG, mkexpr(res));
23717 *uses_vvvv = True;
23718 goto decode_success;
23719 }
sewardj82096922012-06-24 14:57:59 +000023720 /* VMOVLPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 12 /r */
23721 /* Insn exists only in mem form, it appears. */
sewardj21459cb2012-06-18 14:05:52 +000023722 /* VMOVLPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 12 /r */
23723 /* Insn exists only in mem form, it appears. */
sewardj82096922012-06-24 14:57:59 +000023724 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
23725 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
sewardj21459cb2012-06-18 14:05:52 +000023726 UChar modrm = getUChar(delta);
23727 UInt rG = gregOfRexRM(pfx, modrm);
23728 UInt rV = getVexNvvvv(pfx);
23729 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23730 delta += alen;
23731 DIP("vmovlpd %s,%s,%s\n",
23732 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23733 IRTemp res = newTemp(Ity_V128);
23734 assign(res, binop(Iop_64HLtoV128,
23735 getXMMRegLane64(rV, 1),
23736 loadLE(Ity_I64, mkexpr(addr))));
23737 putYMMRegLoAndZU(rG, mkexpr(res));
23738 *uses_vvvv = True;
23739 goto decode_success;
23740 }
sewardj15ad1942012-06-20 10:21:05 +000023741 /* VMOVSLDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 12 /r */
23742 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
23743 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/,
23744 True/*isL*/ );
23745 goto decode_success;
23746 }
23747 /* VMOVSLDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 12 /r */
23748 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
23749 delta = dis_MOVSxDUP_256( vbi, pfx, delta, True/*isL*/ );
23750 goto decode_success;
23751 }
sewardj21459cb2012-06-18 14:05:52 +000023752 break;
23753
23754 case 0x13:
sewardj82096922012-06-24 14:57:59 +000023755 /* VMOVLPS xmm1, m64 = VEX.128.0F.WIG 13 /r */
23756 /* Insn exists only in mem form, it appears. */
sewardj21459cb2012-06-18 14:05:52 +000023757 /* VMOVLPD xmm1, m64 = VEX.128.66.0F.WIG 13 /r */
23758 /* Insn exists only in mem form, it appears. */
sewardj82096922012-06-24 14:57:59 +000023759 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
23760 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
sewardj21459cb2012-06-18 14:05:52 +000023761 UChar modrm = getUChar(delta);
23762 UInt rG = gregOfRexRM(pfx, modrm);
23763 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23764 delta += alen;
23765 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0));
23766 DIP("vmovlpd %s,%s\n", nameXMMReg(rG), dis_buf);
23767 goto decode_success;
23768 }
sewardjc4530ae2012-05-21 10:18:49 +000023769 break;
23770
23771 case 0x14:
sewardj56c30312012-06-12 08:45:39 +000023772 case 0x15:
sewardjc4530ae2012-05-21 10:18:49 +000023773 /* VUNPCKLPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 14 /r */
sewardj56c30312012-06-12 08:45:39 +000023774 /* VUNPCKHPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 15 /r */
sewardjc4530ae2012-05-21 10:18:49 +000023775 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23776 Bool hi = opc == 0x15;
23777 UChar modrm = getUChar(delta);
23778 UInt rG = gregOfRexRM(pfx,modrm);
23779 UInt rV = getVexNvvvv(pfx);
23780 IRTemp eV = newTemp(Ity_V128);
23781 IRTemp vV = newTemp(Ity_V128);
23782 assign( vV, getXMMReg(rV) );
23783 if (epartIsReg(modrm)) {
23784 UInt rE = eregOfRexRM(pfx,modrm);
23785 assign( eV, getXMMReg(rE) );
23786 delta += 1;
23787 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
23788 nameXMMReg(rE), nameXMMReg(rG));
23789 } else {
23790 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23791 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
23792 delta += alen;
23793 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
23794 dis_buf, nameXMMReg(rG));
23795 }
sewardj56c30312012-06-12 08:45:39 +000023796 IRTemp res = math_UNPCKxPS_128( eV, vV, hi );
23797 putYMMRegLoAndZU( rG, mkexpr(res) );
23798 *uses_vvvv = True;
23799 goto decode_success;
23800 }
sewardj4b1cc832012-06-13 11:10:20 +000023801 /* VUNPCKLPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 14 /r */
23802 /* VUNPCKHPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 15 /r */
23803 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23804 Bool hi = opc == 0x15;
23805 UChar modrm = getUChar(delta);
23806 UInt rG = gregOfRexRM(pfx,modrm);
23807 UInt rV = getVexNvvvv(pfx);
23808 IRTemp eV = newTemp(Ity_V256);
23809 IRTemp vV = newTemp(Ity_V256);
23810 assign( vV, getYMMReg(rV) );
23811 if (epartIsReg(modrm)) {
23812 UInt rE = eregOfRexRM(pfx,modrm);
23813 assign( eV, getYMMReg(rE) );
23814 delta += 1;
23815 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
23816 nameYMMReg(rE), nameYMMReg(rG));
23817 } else {
23818 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23819 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
23820 delta += alen;
23821 DIP("vunpck%sps %s,%s\n", hi ? "h" : "l",
23822 dis_buf, nameYMMReg(rG));
23823 }
23824 IRTemp res = math_UNPCKxPS_256( eV, vV, hi );
23825 putYMMReg( rG, mkexpr(res) );
23826 *uses_vvvv = True;
23827 goto decode_success;
23828 }
sewardj56c30312012-06-12 08:45:39 +000023829 /* VUNPCKLPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 14 /r */
23830 /* VUNPCKHPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 15 /r */
23831 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23832 Bool hi = opc == 0x15;
23833 UChar modrm = getUChar(delta);
23834 UInt rG = gregOfRexRM(pfx,modrm);
23835 UInt rV = getVexNvvvv(pfx);
23836 IRTemp eV = newTemp(Ity_V128);
23837 IRTemp vV = newTemp(Ity_V128);
23838 assign( vV, getXMMReg(rV) );
23839 if (epartIsReg(modrm)) {
23840 UInt rE = eregOfRexRM(pfx,modrm);
23841 assign( eV, getXMMReg(rE) );
23842 delta += 1;
23843 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
23844 nameXMMReg(rE), nameXMMReg(rG));
23845 } else {
23846 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23847 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
23848 delta += alen;
23849 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
23850 dis_buf, nameXMMReg(rG));
23851 }
23852 IRTemp res = math_UNPCKxPD_128( eV, vV, hi );
sewardjc4530ae2012-05-21 10:18:49 +000023853 putYMMRegLoAndZU( rG, mkexpr(res) );
23854 *uses_vvvv = True;
23855 goto decode_success;
23856 }
sewardj4b1cc832012-06-13 11:10:20 +000023857 /* VUNPCKLPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 14 /r */
23858 /* VUNPCKHPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 15 /r */
23859 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23860 Bool hi = opc == 0x15;
23861 UChar modrm = getUChar(delta);
23862 UInt rG = gregOfRexRM(pfx,modrm);
23863 UInt rV = getVexNvvvv(pfx);
23864 IRTemp eV = newTemp(Ity_V256);
23865 IRTemp vV = newTemp(Ity_V256);
23866 assign( vV, getYMMReg(rV) );
23867 if (epartIsReg(modrm)) {
23868 UInt rE = eregOfRexRM(pfx,modrm);
23869 assign( eV, getYMMReg(rE) );
23870 delta += 1;
23871 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
23872 nameYMMReg(rE), nameYMMReg(rG));
23873 } else {
23874 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23875 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
23876 delta += alen;
23877 DIP("vunpck%spd %s,%s\n", hi ? "h" : "l",
23878 dis_buf, nameYMMReg(rG));
23879 }
23880 IRTemp res = math_UNPCKxPD_256( eV, vV, hi );
23881 putYMMReg( rG, mkexpr(res) );
23882 *uses_vvvv = True;
23883 goto decode_success;
23884 }
sewardjc4530ae2012-05-21 10:18:49 +000023885 break;
23886
sewardj91fa9802012-05-23 06:16:26 +000023887 case 0x16:
23888 /* VMOVLHPS xmm3, xmm2, xmm1 = VEX.NDS.128.0F.WIG 16 /r */
23889 /* Insn only exists in reg form */
23890 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
23891 && epartIsReg(getUChar(delta))) {
23892 UChar modrm = getUChar(delta);
23893 UInt rG = gregOfRexRM(pfx, modrm);
23894 UInt rE = eregOfRexRM(pfx, modrm);
23895 UInt rV = getVexNvvvv(pfx);
23896 delta++;
23897 DIP("vmovlhps %s,%s,%s\n",
23898 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
23899 IRTemp res = newTemp(Ity_V128);
23900 assign(res, binop(Iop_64HLtoV128,
23901 getXMMRegLane64(rE, 0),
23902 getXMMRegLane64(rV, 0)));
23903 putYMMRegLoAndZU(rG, mkexpr(res));
23904 *uses_vvvv = True;
23905 goto decode_success;
23906 }
sewardj8eb7ae82012-06-24 14:00:27 +000023907 /* VMOVHPS m64, xmm1, xmm2 = VEX.NDS.128.0F.WIG 16 /r */
23908 /* Insn exists only in mem form, it appears. */
sewardj6fcd43e2012-06-14 08:51:35 +000023909 /* VMOVHPD m64, xmm1, xmm2 = VEX.NDS.128.66.0F.WIG 16 /r */
23910 /* Insn exists only in mem form, it appears. */
sewardj8eb7ae82012-06-24 14:00:27 +000023911 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
23912 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
sewardj6fcd43e2012-06-14 08:51:35 +000023913 UChar modrm = getUChar(delta);
23914 UInt rG = gregOfRexRM(pfx, modrm);
23915 UInt rV = getVexNvvvv(pfx);
23916 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23917 delta += alen;
sewardj8eb7ae82012-06-24 14:00:27 +000023918 DIP("vmovhp%c %s,%s,%s\n", have66(pfx) ? 'd' : 's',
sewardj6fcd43e2012-06-14 08:51:35 +000023919 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
23920 IRTemp res = newTemp(Ity_V128);
23921 assign(res, binop(Iop_64HLtoV128,
23922 loadLE(Ity_I64, mkexpr(addr)),
23923 getXMMRegLane64(rV, 0)));
23924 putYMMRegLoAndZU(rG, mkexpr(res));
23925 *uses_vvvv = True;
23926 goto decode_success;
23927 }
sewardj15ad1942012-06-20 10:21:05 +000023928 /* VMOVSHDUP xmm2/m128, xmm1 = VEX.NDS.128.F3.0F.WIG 16 /r */
23929 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
23930 delta = dis_MOVSxDUP_128( vbi, pfx, delta, True/*isAvx*/,
23931 False/*!isL*/ );
23932 goto decode_success;
23933 }
23934 /* VMOVSHDUP ymm2/m256, ymm1 = VEX.NDS.256.F3.0F.WIG 16 /r */
23935 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
23936 delta = dis_MOVSxDUP_256( vbi, pfx, delta, False/*!isL*/ );
23937 goto decode_success;
23938 }
sewardj91fa9802012-05-23 06:16:26 +000023939 break;
23940
sewardj8ef22422012-05-24 16:29:18 +000023941 case 0x17:
sewardj8eb7ae82012-06-24 14:00:27 +000023942 /* VMOVHPS xmm1, m64 = VEX.128.0F.WIG 17 /r */
23943 /* Insn exists only in mem form, it appears. */
sewardj8ef22422012-05-24 16:29:18 +000023944 /* VMOVHPD xmm1, m64 = VEX.128.66.0F.WIG 17 /r */
sewardj6fcd43e2012-06-14 08:51:35 +000023945 /* Insn exists only in mem form, it appears. */
sewardj8eb7ae82012-06-24 14:00:27 +000023946 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
23947 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
sewardj8ef22422012-05-24 16:29:18 +000023948 UChar modrm = getUChar(delta);
23949 UInt rG = gregOfRexRM(pfx, modrm);
23950 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23951 delta += alen;
23952 storeLE( mkexpr(addr), getXMMRegLane64( rG, 1));
sewardj8eb7ae82012-06-24 14:00:27 +000023953 DIP("vmovhp%c %s,%s\n", have66(pfx) ? 'd' : 's',
23954 nameXMMReg(rG), dis_buf);
sewardj8ef22422012-05-24 16:29:18 +000023955 goto decode_success;
23956 }
23957 break;
23958
sewardjc4530ae2012-05-21 10:18:49 +000023959 case 0x28:
23960 /* VMOVAPD xmm2/m128, xmm1 = VEX.128.66.0F.WIG 28 /r */
23961 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23962 UChar modrm = getUChar(delta);
23963 UInt rG = gregOfRexRM(pfx, modrm);
23964 if (epartIsReg(modrm)) {
23965 UInt rE = eregOfRexRM(pfx,modrm);
23966 putYMMRegLoAndZU( rG, getXMMReg( rE ));
23967 DIP("vmovapd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
23968 delta += 1;
23969 } else {
23970 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23971 gen_SEGV_if_not_16_aligned( addr );
23972 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
23973 DIP("vmovapd %s,%s\n", dis_buf, nameXMMReg(rG));
23974 delta += alen;
23975 }
23976 goto decode_success;
23977 }
23978 /* VMOVAPD ymm2/m256, ymm1 = VEX.256.66.0F.WIG 28 /r */
23979 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
23980 UChar modrm = getUChar(delta);
23981 UInt rG = gregOfRexRM(pfx, modrm);
23982 if (epartIsReg(modrm)) {
23983 UInt rE = eregOfRexRM(pfx,modrm);
23984 putYMMReg( rG, getYMMReg( rE ));
23985 DIP("vmovapd %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
23986 delta += 1;
23987 } else {
23988 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
23989 gen_SEGV_if_not_32_aligned( addr );
23990 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
23991 DIP("vmovapd %s,%s\n", dis_buf, nameYMMReg(rG));
23992 delta += alen;
23993 }
23994 goto decode_success;
23995 }
23996 /* VMOVAPS xmm2/m128, xmm1 = VEX.128.0F.WIG 28 /r */
23997 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
23998 UChar modrm = getUChar(delta);
23999 UInt rG = gregOfRexRM(pfx, modrm);
24000 if (epartIsReg(modrm)) {
24001 UInt rE = eregOfRexRM(pfx,modrm);
24002 putYMMRegLoAndZU( rG, getXMMReg( rE ));
24003 DIP("vmovaps %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
24004 delta += 1;
24005 } else {
24006 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24007 gen_SEGV_if_not_16_aligned( addr );
24008 putYMMRegLoAndZU( rG, loadLE(Ity_V128, mkexpr(addr)) );
24009 DIP("vmovaps %s,%s\n", dis_buf, nameXMMReg(rG));
24010 delta += alen;
24011 }
24012 goto decode_success;
24013 }
sewardj6fcd43e2012-06-14 08:51:35 +000024014 /* VMOVAPS ymm2/m256, ymm1 = VEX.256.0F.WIG 28 /r */
24015 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24016 UChar modrm = getUChar(delta);
24017 UInt rG = gregOfRexRM(pfx, modrm);
24018 if (epartIsReg(modrm)) {
24019 UInt rE = eregOfRexRM(pfx,modrm);
24020 putYMMReg( rG, getYMMReg( rE ));
24021 DIP("vmovaps %s,%s\n", nameYMMReg(rE), nameYMMReg(rG));
24022 delta += 1;
24023 } else {
24024 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24025 gen_SEGV_if_not_32_aligned( addr );
24026 putYMMReg( rG, loadLE(Ity_V256, mkexpr(addr)) );
24027 DIP("vmovaps %s,%s\n", dis_buf, nameYMMReg(rG));
24028 delta += alen;
24029 }
24030 goto decode_success;
24031 }
sewardjc4530ae2012-05-21 10:18:49 +000024032 break;
24033
24034 case 0x29:
sewardj98d02cc2012-06-02 11:55:25 +000024035 /* VMOVAPD xmm1, xmm2/m128 = VEX.128.66.0F.WIG 29 /r */
24036 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24037 UChar modrm = getUChar(delta);
24038 UInt rG = gregOfRexRM(pfx,modrm);
24039 if (epartIsReg(modrm)) {
24040 UInt rE = eregOfRexRM(pfx,modrm);
24041 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24042 DIP("vmovapd %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24043 delta += 1;
24044 } else {
24045 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24046 gen_SEGV_if_not_16_aligned( addr );
24047 storeLE( mkexpr(addr), getXMMReg(rG) );
24048 DIP("vmovapd %s,%s\n", nameXMMReg(rG), dis_buf );
24049 delta += alen;
24050 }
24051 goto decode_success;
24052 }
24053 /* VMOVAPD ymm1, ymm2/m256 = VEX.256.66.0F.WIG 29 /r */
24054 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24055 UChar modrm = getUChar(delta);
24056 UInt rG = gregOfRexRM(pfx,modrm);
24057 if (epartIsReg(modrm)) {
24058 UInt rE = eregOfRexRM(pfx,modrm);
24059 putYMMReg( rE, getYMMReg(rG) );
24060 DIP("vmovapd %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
24061 delta += 1;
24062 } else {
24063 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24064 gen_SEGV_if_not_32_aligned( addr );
24065 storeLE( mkexpr(addr), getYMMReg(rG) );
24066 DIP("vmovapd %s,%s\n", nameYMMReg(rG), dis_buf );
24067 delta += alen;
24068 }
24069 goto decode_success;
24070 }
sewardjc4530ae2012-05-21 10:18:49 +000024071 /* VMOVAPS xmm1, xmm2/m128 = VEX.128.0F.WIG 29 /r */
24072 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24073 UChar modrm = getUChar(delta);
24074 UInt rG = gregOfRexRM(pfx,modrm);
24075 if (epartIsReg(modrm)) {
24076 UInt rE = eregOfRexRM(pfx,modrm);
24077 putYMMRegLoAndZU( rE, getXMMReg(rG) );
24078 DIP("vmovaps %s,%s\n", nameXMMReg(rG), nameXMMReg(rE));
24079 delta += 1;
24080 goto decode_success;
24081 } else {
24082 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24083 gen_SEGV_if_not_16_aligned( addr );
24084 storeLE( mkexpr(addr), getXMMReg(rG) );
24085 DIP("vmovaps %s,%s\n", nameXMMReg(rG), dis_buf );
24086 delta += alen;
24087 goto decode_success;
24088 }
24089 }
sewardj98d02cc2012-06-02 11:55:25 +000024090 /* VMOVAPS ymm1, ymm2/m256 = VEX.256.0F.WIG 29 /r */
24091 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
sewardjc4530ae2012-05-21 10:18:49 +000024092 UChar modrm = getUChar(delta);
24093 UInt rG = gregOfRexRM(pfx,modrm);
24094 if (epartIsReg(modrm)) {
24095 UInt rE = eregOfRexRM(pfx,modrm);
sewardj98d02cc2012-06-02 11:55:25 +000024096 putYMMReg( rE, getYMMReg(rG) );
24097 DIP("vmovaps %s,%s\n", nameYMMReg(rG), nameYMMReg(rE));
sewardjc4530ae2012-05-21 10:18:49 +000024098 delta += 1;
sewardj98d02cc2012-06-02 11:55:25 +000024099 goto decode_success;
sewardjc4530ae2012-05-21 10:18:49 +000024100 } else {
24101 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardj98d02cc2012-06-02 11:55:25 +000024102 gen_SEGV_if_not_32_aligned( addr );
24103 storeLE( mkexpr(addr), getYMMReg(rG) );
24104 DIP("vmovaps %s,%s\n", nameYMMReg(rG), dis_buf );
sewardjc4530ae2012-05-21 10:18:49 +000024105 delta += alen;
sewardj98d02cc2012-06-02 11:55:25 +000024106 goto decode_success;
sewardjc4530ae2012-05-21 10:18:49 +000024107 }
sewardjc4530ae2012-05-21 10:18:49 +000024108 }
24109 break;
24110
24111 case 0x2A: {
24112 IRTemp rmode = newTemp(Ity_I32);
24113 assign( rmode, get_sse_roundingmode() );
24114 /* VCVTSI2SD r/m32, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W0 2A /r */
24115 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
24116 UChar modrm = getUChar(delta);
24117 UInt rV = getVexNvvvv(pfx);
24118 UInt rD = gregOfRexRM(pfx, modrm);
24119 IRTemp arg32 = newTemp(Ity_I32);
24120 if (epartIsReg(modrm)) {
24121 UInt rS = eregOfRexRM(pfx,modrm);
24122 assign( arg32, getIReg32(rS) );
24123 delta += 1;
24124 DIP("vcvtsi2sdl %s,%s,%s\n",
24125 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD));
24126 } else {
24127 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24128 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
24129 delta += alen;
24130 DIP("vcvtsi2sdl %s,%s,%s\n",
24131 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24132 }
24133 putXMMRegLane64F( rD, 0,
24134 unop(Iop_I32StoF64, mkexpr(arg32)));
24135 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24136 putYMMRegLane128( rD, 1, mkV128(0) );
24137 *uses_vvvv = True;
24138 goto decode_success;
24139 }
24140 /* VCVTSI2SD r/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.W1 2A /r */
24141 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
24142 UChar modrm = getUChar(delta);
24143 UInt rV = getVexNvvvv(pfx);
24144 UInt rD = gregOfRexRM(pfx, modrm);
24145 IRTemp arg64 = newTemp(Ity_I64);
24146 if (epartIsReg(modrm)) {
24147 UInt rS = eregOfRexRM(pfx,modrm);
24148 assign( arg64, getIReg64(rS) );
24149 delta += 1;
24150 DIP("vcvtsi2sdq %s,%s,%s\n",
24151 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD));
24152 } else {
24153 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24154 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
24155 delta += alen;
24156 DIP("vcvtsi2sdq %s,%s,%s\n",
24157 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24158 }
24159 putXMMRegLane64F( rD, 0,
24160 binop( Iop_I64StoF64,
24161 get_sse_roundingmode(),
24162 mkexpr(arg64)) );
24163 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24164 putYMMRegLane128( rD, 1, mkV128(0) );
24165 *uses_vvvv = True;
24166 goto decode_success;
24167 }
24168 /* VCVTSI2SS r/m64, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W1 2A /r */
24169 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
24170 UChar modrm = getUChar(delta);
24171 UInt rV = getVexNvvvv(pfx);
24172 UInt rD = gregOfRexRM(pfx, modrm);
24173 IRTemp arg64 = newTemp(Ity_I64);
24174 if (epartIsReg(modrm)) {
24175 UInt rS = eregOfRexRM(pfx,modrm);
24176 assign( arg64, getIReg64(rS) );
24177 delta += 1;
24178 DIP("vcvtsi2ssq %s,%s,%s\n",
24179 nameIReg64(rS), nameXMMReg(rV), nameXMMReg(rD));
24180 } else {
24181 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24182 assign( arg64, loadLE(Ity_I64, mkexpr(addr)) );
24183 delta += alen;
24184 DIP("vcvtsi2ssq %s,%s,%s\n",
24185 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24186 }
24187 putXMMRegLane32F( rD, 0,
24188 binop(Iop_F64toF32,
24189 mkexpr(rmode),
24190 binop(Iop_I64StoF64, mkexpr(rmode),
24191 mkexpr(arg64)) ) );
24192 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
24193 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24194 putYMMRegLane128( rD, 1, mkV128(0) );
24195 *uses_vvvv = True;
24196 goto decode_success;
24197 }
24198 /* VCVTSI2SS r/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.W0 2A /r */
24199 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
24200 UChar modrm = getUChar(delta);
24201 UInt rV = getVexNvvvv(pfx);
24202 UInt rD = gregOfRexRM(pfx, modrm);
24203 IRTemp arg32 = newTemp(Ity_I32);
24204 if (epartIsReg(modrm)) {
24205 UInt rS = eregOfRexRM(pfx,modrm);
24206 assign( arg32, getIReg32(rS) );
24207 delta += 1;
24208 DIP("vcvtsi2ssl %s,%s,%s\n",
24209 nameIReg32(rS), nameXMMReg(rV), nameXMMReg(rD));
24210 } else {
24211 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24212 assign( arg32, loadLE(Ity_I32, mkexpr(addr)) );
24213 delta += alen;
24214 DIP("vcvtsi2ssl %s,%s,%s\n",
24215 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24216 }
24217 putXMMRegLane32F( rD, 0,
24218 binop(Iop_F64toF32,
24219 mkexpr(rmode),
24220 unop(Iop_I32StoF64, mkexpr(arg32)) ) );
24221 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
24222 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24223 putYMMRegLane128( rD, 1, mkV128(0) );
24224 *uses_vvvv = True;
24225 goto decode_success;
24226 }
24227 break;
24228 }
24229
sewardj8eb7ae82012-06-24 14:00:27 +000024230 case 0x2B:
24231 /* VMOVNTPD xmm1, m128 = VEX.128.66.0F.WIG 2B /r */
24232 /* VMOVNTPS xmm1, m128 = VEX.128.0F.WIG 2B /r */
24233 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24234 && 0==getVexL(pfx)/*128*/ && !epartIsReg(getUChar(delta))) {
24235 UChar modrm = getUChar(delta);
24236 UInt rS = gregOfRexRM(pfx, modrm);
24237 IRTemp tS = newTemp(Ity_V128);
24238 assign(tS, getXMMReg(rS));
24239 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
24240 delta += alen;
24241 gen_SEGV_if_not_16_aligned(addr);
24242 storeLE(mkexpr(addr), mkexpr(tS));
24243 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's',
24244 nameXMMReg(rS), dis_buf);
24245 goto decode_success;
24246 }
24247 /* VMOVNTPD ymm1, m256 = VEX.256.66.0F.WIG 2B /r */
24248 /* VMOVNTPS ymm1, m256 = VEX.256.0F.WIG 2B /r */
24249 if ((have66noF2noF3(pfx) || haveNo66noF2noF3(pfx))
24250 && 1==getVexL(pfx)/*256*/ && !epartIsReg(getUChar(delta))) {
24251 UChar modrm = getUChar(delta);
24252 UInt rS = gregOfRexRM(pfx, modrm);
24253 IRTemp tS = newTemp(Ity_V256);
24254 assign(tS, getYMMReg(rS));
24255 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
24256 delta += alen;
24257 gen_SEGV_if_not_32_aligned(addr);
24258 storeLE(mkexpr(addr), mkexpr(tS));
24259 DIP("vmovntp%c %s,%s\n", have66(pfx) ? 'd' : 's',
24260 nameYMMReg(rS), dis_buf);
24261 goto decode_success;
24262 }
24263 break;
24264
sewardjc4530ae2012-05-21 10:18:49 +000024265 case 0x2C:
sewardj80804d12012-05-22 10:48:13 +000024266 /* VCVTTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2C /r */
sewardjc4530ae2012-05-21 10:18:49 +000024267 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
24268 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
24269 goto decode_success;
24270 }
24271 /* VCVTTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2C /r */
24272 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
24273 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
24274 goto decode_success;
24275 }
sewardj80804d12012-05-22 10:48:13 +000024276 /* VCVTTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2C /r */
24277 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
24278 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
24279 goto decode_success;
24280 }
sewardj8ef22422012-05-24 16:29:18 +000024281 /* VCVTTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2C /r */
24282 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
24283 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
24284 goto decode_success;
24285 }
sewardjc4530ae2012-05-21 10:18:49 +000024286 break;
24287
sewardjadf357c2012-06-24 13:44:17 +000024288 case 0x2D:
24289 /* VCVTSD2SI xmm1/m32, r32 = VEX.LIG.F2.0F.W0 2D /r */
24290 if (haveF2no66noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
24291 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
24292 goto decode_success;
24293 }
24294 /* VCVTSD2SI xmm1/m64, r64 = VEX.LIG.F2.0F.W1 2D /r */
24295 if (haveF2no66noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
24296 delta = dis_CVTxSD2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
24297 goto decode_success;
24298 }
24299 /* VCVTSS2SI xmm1/m32, r32 = VEX.LIG.F3.0F.W0 2D /r */
24300 if (haveF3no66noF2(pfx) && 0==getRexW(pfx)/*W0*/) {
24301 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 4);
24302 goto decode_success;
24303 }
24304 /* VCVTSS2SI xmm1/m64, r64 = VEX.LIG.F3.0F.W1 2D /r */
24305 if (haveF3no66noF2(pfx) && 1==getRexW(pfx)/*W1*/) {
24306 delta = dis_CVTxSS2SI( vbi, pfx, delta, True/*isAvx*/, opc, 8);
24307 goto decode_success;
24308 }
24309 break;
24310
sewardjc4530ae2012-05-21 10:18:49 +000024311 case 0x2E:
sewardj4ed05e02012-06-18 15:01:30 +000024312 case 0x2F:
sewardjc4530ae2012-05-21 10:18:49 +000024313 /* VUCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2E /r */
sewardj4ed05e02012-06-18 15:01:30 +000024314 /* VCOMISD xmm2/m64, xmm1 = VEX.LIG.66.0F.WIG 2F /r */
sewardjc4530ae2012-05-21 10:18:49 +000024315 if (have66noF2noF3(pfx)) {
24316 delta = dis_COMISD( vbi, pfx, delta, True/*isAvx*/, opc );
24317 goto decode_success;
24318 }
24319 /* VUCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2E /r */
sewardj4ed05e02012-06-18 15:01:30 +000024320 /* VCOMISS xmm2/m32, xmm1 = VEX.LIG.0F.WIG 2F /r */
sewardjc4530ae2012-05-21 10:18:49 +000024321 if (haveNo66noF2noF3(pfx)) {
24322 delta = dis_COMISS( vbi, pfx, delta, True/*isAvx*/, opc );
24323 goto decode_success;
24324 }
24325 break;
24326
sewardj8eb7ae82012-06-24 14:00:27 +000024327 case 0x50:
24328 /* VMOVMSKPD xmm2, r32 = VEX.128.66.0F.WIG 50 /r */
24329 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24330 delta = dis_MOVMSKPD_128( vbi, pfx, delta, True/*isAvx*/ );
24331 goto decode_success;
24332 }
24333 /* VMOVMSKPD ymm2, r32 = VEX.256.66.0F.WIG 50 /r */
24334 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24335 delta = dis_MOVMSKPD_256( vbi, pfx, delta );
24336 goto decode_success;
24337 }
24338 /* VMOVMSKPS xmm2, r32 = VEX.128.0F.WIG 50 /r */
24339 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24340 delta = dis_MOVMSKPS_128( vbi, pfx, delta, True/*isAvx*/ );
24341 goto decode_success;
24342 }
24343 /* VMOVMSKPS ymm2, r32 = VEX.256.0F.WIG 50 /r */
24344 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24345 delta = dis_MOVMSKPS_256( vbi, pfx, delta );
24346 goto decode_success;
24347 }
24348 break;
24349
sewardjc4530ae2012-05-21 10:18:49 +000024350 case 0x51:
sewardj66becf32012-06-18 23:15:16 +000024351 /* VSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 51 /r */
24352 if (haveF3no66noF2(pfx)) {
24353 delta = dis_AVX128_E_V_to_G_lo32_unary(
24354 uses_vvvv, vbi, pfx, delta, "vsqrtss", Iop_Sqrt32F0x4 );
24355 goto decode_success;
24356 }
24357 /* VSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 51 /r */
24358 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24359 delta = dis_AVX128_E_to_G_unary_all(
24360 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx4 );
24361 goto decode_success;
24362 }
24363 /* VSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 51 /r */
24364 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24365 delta = dis_AVX256_E_to_G_unary_all(
24366 uses_vvvv, vbi, pfx, delta, "vsqrtps", Iop_Sqrt32Fx8 );
24367 goto decode_success;
24368 }
sewardjc4530ae2012-05-21 10:18:49 +000024369 /* VSQRTSD xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F2.0F.WIG 51 /r */
24370 if (haveF2no66noF3(pfx)) {
24371 delta = dis_AVX128_E_V_to_G_lo64_unary(
24372 uses_vvvv, vbi, pfx, delta, "vsqrtsd", Iop_Sqrt64F0x2 );
24373 goto decode_success;
sewardj66becf32012-06-18 23:15:16 +000024374 }
24375 /* VSQRTPD xmm2/m128(E), xmm1(G) = VEX.NDS.128.66.0F.WIG 51 /r */
24376 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24377 delta = dis_AVX128_E_to_G_unary_all(
24378 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx2 );
24379 goto decode_success;
24380 }
24381 /* VSQRTPD ymm2/m256(E), ymm1(G) = VEX.NDS.256.66.0F.WIG 51 /r */
24382 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24383 delta = dis_AVX256_E_to_G_unary_all(
24384 uses_vvvv, vbi, pfx, delta, "vsqrtpd", Iop_Sqrt64Fx4 );
24385 goto decode_success;
24386 }
24387 break;
24388
24389 case 0x52:
24390 /* VRSQRTSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 52 /r */
24391 if (haveF3no66noF2(pfx)) {
24392 delta = dis_AVX128_E_V_to_G_lo32_unary(
sewardj1ddee212014-08-24 14:00:19 +000024393 uses_vvvv, vbi, pfx, delta, "vrsqrtss",
24394 Iop_RSqrtEst32F0x4 );
sewardj66becf32012-06-18 23:15:16 +000024395 goto decode_success;
24396 }
24397 /* VRSQRTPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 52 /r */
24398 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24399 delta = dis_AVX128_E_to_G_unary_all(
sewardj1ddee212014-08-24 14:00:19 +000024400 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx4 );
sewardj66becf32012-06-18 23:15:16 +000024401 goto decode_success;
24402 }
24403 /* VRSQRTPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 52 /r */
24404 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24405 delta = dis_AVX256_E_to_G_unary_all(
sewardj1ddee212014-08-24 14:00:19 +000024406 uses_vvvv, vbi, pfx, delta, "vrsqrtps", Iop_RSqrtEst32Fx8 );
sewardj66becf32012-06-18 23:15:16 +000024407 goto decode_success;
24408 }
24409 break;
sewardjc4530ae2012-05-21 10:18:49 +000024410
sewardj82096922012-06-24 14:57:59 +000024411 case 0x53:
24412 /* VRCPSS xmm3/m64(E), xmm2(V), xmm1(G) = VEX.NDS.LIG.F3.0F.WIG 53 /r */
24413 if (haveF3no66noF2(pfx)) {
24414 delta = dis_AVX128_E_V_to_G_lo32_unary(
sewardj1ddee212014-08-24 14:00:19 +000024415 uses_vvvv, vbi, pfx, delta, "vrcpss", Iop_RecipEst32F0x4 );
sewardj82096922012-06-24 14:57:59 +000024416 goto decode_success;
24417 }
24418 /* VRCPPS xmm2/m128(E), xmm1(G) = VEX.NDS.128.0F.WIG 53 /r */
24419 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24420 delta = dis_AVX128_E_to_G_unary_all(
sewardj1ddee212014-08-24 14:00:19 +000024421 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx4 );
sewardj82096922012-06-24 14:57:59 +000024422 goto decode_success;
24423 }
24424 /* VRCPPS ymm2/m256(E), ymm1(G) = VEX.NDS.256.0F.WIG 53 /r */
24425 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24426 delta = dis_AVX256_E_to_G_unary_all(
sewardj1ddee212014-08-24 14:00:19 +000024427 uses_vvvv, vbi, pfx, delta, "vrcpps", Iop_RecipEst32Fx8 );
sewardj82096922012-06-24 14:57:59 +000024428 goto decode_success;
24429 }
24430 break;
24431
sewardjc4530ae2012-05-21 10:18:49 +000024432 case 0x54:
sewardj251b59e2012-05-25 13:51:07 +000024433 /* VANDPD r/m, rV, r ::: r = rV & r/m */
sewardjc4530ae2012-05-21 10:18:49 +000024434 /* VANDPD = VEX.NDS.128.66.0F.WIG 54 /r */
24435 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24436 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24437 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128 );
24438 goto decode_success;
24439 }
sewardj4b1cc832012-06-13 11:10:20 +000024440 /* VANDPD r/m, rV, r ::: r = rV & r/m */
24441 /* VANDPD = VEX.NDS.256.66.0F.WIG 54 /r */
24442 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24443 delta = dis_AVX256_E_V_to_G(
24444 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256 );
24445 goto decode_success;
24446 }
sewardjc4530ae2012-05-21 10:18:49 +000024447 /* VANDPS = VEX.NDS.128.0F.WIG 54 /r */
24448 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24449 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24450 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128 );
24451 goto decode_success;
24452 }
sewardj2a2bda92012-06-14 23:32:02 +000024453 /* VANDPS = VEX.NDS.256.0F.WIG 54 /r */
24454 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24455 delta = dis_AVX256_E_V_to_G(
24456 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256 );
24457 goto decode_success;
24458 }
sewardjc4530ae2012-05-21 10:18:49 +000024459 break;
24460
24461 case 0x55:
sewardj251b59e2012-05-25 13:51:07 +000024462 /* VANDNPD r/m, rV, r ::: r = (not rV) & r/m */
sewardjc4530ae2012-05-21 10:18:49 +000024463 /* VANDNPD = VEX.NDS.128.66.0F.WIG 55 /r */
24464 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24465 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
24466 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV128,
sewardj44565e82012-05-22 09:14:15 +000024467 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
sewardjc4530ae2012-05-21 10:18:49 +000024468 goto decode_success;
24469 }
sewardj2a2bda92012-06-14 23:32:02 +000024470 /* VANDNPD = VEX.NDS.256.66.0F.WIG 55 /r */
24471 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24472 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
24473 uses_vvvv, vbi, pfx, delta, "vandpd", Iop_AndV256,
24474 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
24475 goto decode_success;
24476 }
sewardjc4530ae2012-05-21 10:18:49 +000024477 /* VANDNPS = VEX.NDS.128.0F.WIG 55 /r */
24478 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24479 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
24480 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV128,
sewardj44565e82012-05-22 09:14:15 +000024481 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
sewardjc4530ae2012-05-21 10:18:49 +000024482 goto decode_success;
24483 }
sewardj2a2bda92012-06-14 23:32:02 +000024484 /* VANDNPS = VEX.NDS.256.0F.WIG 55 /r */
24485 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24486 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
24487 uses_vvvv, vbi, pfx, delta, "vandps", Iop_AndV256,
24488 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
24489 goto decode_success;
24490 }
sewardjc4530ae2012-05-21 10:18:49 +000024491 break;
24492
24493 case 0x56:
sewardj251b59e2012-05-25 13:51:07 +000024494 /* VORPD r/m, rV, r ::: r = rV | r/m */
sewardjc4530ae2012-05-21 10:18:49 +000024495 /* VORPD = VEX.NDS.128.66.0F.WIG 56 /r */
24496 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24497 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24498 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV128 );
24499 goto decode_success;
24500 }
sewardj2a2bda92012-06-14 23:32:02 +000024501 /* VORPD r/m, rV, r ::: r = rV | r/m */
24502 /* VORPD = VEX.NDS.256.66.0F.WIG 56 /r */
24503 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24504 delta = dis_AVX256_E_V_to_G(
24505 uses_vvvv, vbi, pfx, delta, "vorpd", Iop_OrV256 );
24506 goto decode_success;
24507 }
sewardj251b59e2012-05-25 13:51:07 +000024508 /* VORPS r/m, rV, r ::: r = rV | r/m */
sewardjc4530ae2012-05-21 10:18:49 +000024509 /* VORPS = VEX.NDS.128.0F.WIG 56 /r */
24510 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24511 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24512 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV128 );
24513 goto decode_success;
24514 }
sewardj2a2bda92012-06-14 23:32:02 +000024515 /* VORPS r/m, rV, r ::: r = rV | r/m */
24516 /* VORPS = VEX.NDS.256.0F.WIG 56 /r */
24517 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24518 delta = dis_AVX256_E_V_to_G(
24519 uses_vvvv, vbi, pfx, delta, "vorps", Iop_OrV256 );
24520 goto decode_success;
24521 }
sewardjc4530ae2012-05-21 10:18:49 +000024522 break;
24523
24524 case 0x57:
sewardj251b59e2012-05-25 13:51:07 +000024525 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
sewardjc4530ae2012-05-21 10:18:49 +000024526 /* VXORPD = VEX.NDS.128.66.0F.WIG 57 /r */
24527 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24528 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24529 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV128 );
24530 goto decode_success;
24531 }
sewardj4b1cc832012-06-13 11:10:20 +000024532 /* VXORPD r/m, rV, r ::: r = rV ^ r/m */
24533 /* VXORPD = VEX.NDS.256.66.0F.WIG 57 /r */
24534 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24535 delta = dis_AVX256_E_V_to_G(
24536 uses_vvvv, vbi, pfx, delta, "vxorpd", Iop_XorV256 );
24537 goto decode_success;
24538 }
sewardj251b59e2012-05-25 13:51:07 +000024539 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
sewardjc4530ae2012-05-21 10:18:49 +000024540 /* VXORPS = VEX.NDS.128.0F.WIG 57 /r */
24541 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24542 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24543 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV128 );
24544 goto decode_success;
24545 }
sewardj2a2bda92012-06-14 23:32:02 +000024546 /* VXORPS r/m, rV, r ::: r = rV ^ r/m */
24547 /* VXORPS = VEX.NDS.256.0F.WIG 57 /r */
24548 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24549 delta = dis_AVX256_E_V_to_G(
24550 uses_vvvv, vbi, pfx, delta, "vxorps", Iop_XorV256 );
24551 goto decode_success;
24552 }
sewardjc4530ae2012-05-21 10:18:49 +000024553 break;
24554
24555 case 0x58:
24556 /* VADDSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 58 /r */
24557 if (haveF2no66noF3(pfx)) {
24558 delta = dis_AVX128_E_V_to_G_lo64(
24559 uses_vvvv, vbi, pfx, delta, "vaddsd", Iop_Add64F0x2 );
24560 goto decode_success;
24561 }
24562 /* VADDSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 58 /r */
24563 if (haveF3no66noF2(pfx)) {
24564 delta = dis_AVX128_E_V_to_G_lo32(
24565 uses_vvvv, vbi, pfx, delta, "vaddss", Iop_Add32F0x4 );
24566 goto decode_success;
24567 }
sewardj251b59e2012-05-25 13:51:07 +000024568 /* VADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 58 /r */
24569 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24570 delta = dis_AVX128_E_V_to_G(
24571 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx4 );
24572 goto decode_success;
24573 }
sewardj56c30312012-06-12 08:45:39 +000024574 /* VADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 58 /r */
24575 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24576 delta = dis_AVX256_E_V_to_G(
24577 uses_vvvv, vbi, pfx, delta, "vaddps", Iop_Add32Fx8 );
24578 goto decode_success;
24579 }
24580 /* VADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 58 /r */
24581 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24582 delta = dis_AVX128_E_V_to_G(
24583 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx2 );
24584 goto decode_success;
24585 }
24586 /* VADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 58 /r */
24587 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24588 delta = dis_AVX256_E_V_to_G(
24589 uses_vvvv, vbi, pfx, delta, "vaddpd", Iop_Add64Fx4 );
24590 goto decode_success;
24591 }
sewardjc4530ae2012-05-21 10:18:49 +000024592 break;
24593
24594 case 0x59:
24595 /* VMULSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 59 /r */
24596 if (haveF2no66noF3(pfx)) {
24597 delta = dis_AVX128_E_V_to_G_lo64(
24598 uses_vvvv, vbi, pfx, delta, "vmulsd", Iop_Mul64F0x2 );
24599 goto decode_success;
24600 }
24601 /* VMULSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 59 /r */
24602 if (haveF3no66noF2(pfx)) {
24603 delta = dis_AVX128_E_V_to_G_lo32(
24604 uses_vvvv, vbi, pfx, delta, "vmulss", Iop_Mul32F0x4 );
24605 goto decode_success;
24606 }
sewardj251b59e2012-05-25 13:51:07 +000024607 /* VMULPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 59 /r */
24608 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24609 delta = dis_AVX128_E_V_to_G(
24610 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx4 );
24611 goto decode_success;
24612 }
sewardj56c30312012-06-12 08:45:39 +000024613 /* VMULPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 59 /r */
24614 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24615 delta = dis_AVX256_E_V_to_G(
24616 uses_vvvv, vbi, pfx, delta, "vmulps", Iop_Mul32Fx8 );
24617 goto decode_success;
24618 }
24619 /* VMULPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 59 /r */
24620 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24621 delta = dis_AVX128_E_V_to_G(
24622 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx2 );
24623 goto decode_success;
24624 }
24625 /* VMULPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 59 /r */
24626 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24627 delta = dis_AVX256_E_V_to_G(
24628 uses_vvvv, vbi, pfx, delta, "vmulpd", Iop_Mul64Fx4 );
24629 goto decode_success;
24630 }
sewardjc4530ae2012-05-21 10:18:49 +000024631 break;
24632
24633 case 0x5A:
24634 /* VCVTPS2PD xmm2/m64, xmm1 = VEX.128.0F.WIG 5A /r */
sewardj72df0682012-05-23 23:54:30 +000024635 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardj66becf32012-06-18 23:15:16 +000024636 delta = dis_CVTPS2PD_128( vbi, pfx, delta, True/*isAvx*/ );
24637 goto decode_success;
24638 }
24639 /* VCVTPS2PD xmm2/m128, ymm1 = VEX.256.0F.WIG 5A /r */
24640 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24641 delta = dis_CVTPS2PD_256( vbi, pfx, delta );
sewardjc4530ae2012-05-21 10:18:49 +000024642 goto decode_success;
24643 }
24644 /* VCVTPD2PS xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5A /r */
sewardj72df0682012-05-23 23:54:30 +000024645 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardj6fcd43e2012-06-14 08:51:35 +000024646 delta = dis_CVTPD2PS_128( vbi, pfx, delta, True/*isAvx*/ );
24647 goto decode_success;
24648 }
24649 /* VCVTPD2PS ymm2/m256, xmm1 = VEX.256.66.0F.WIG 5A /r */
24650 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24651 delta = dis_CVTPD2PS_256( vbi, pfx, delta );
sewardjc4530ae2012-05-21 10:18:49 +000024652 goto decode_success;
24653 }
sewardj72df0682012-05-23 23:54:30 +000024654 /* VCVTSD2SS xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5A /r */
24655 if (haveF2no66noF3(pfx)) {
24656 UChar modrm = getUChar(delta);
24657 UInt rV = getVexNvvvv(pfx);
24658 UInt rD = gregOfRexRM(pfx, modrm);
24659 IRTemp f64lo = newTemp(Ity_F64);
24660 IRTemp rmode = newTemp(Ity_I32);
24661 assign( rmode, get_sse_roundingmode() );
24662 if (epartIsReg(modrm)) {
24663 UInt rS = eregOfRexRM(pfx,modrm);
24664 assign(f64lo, getXMMRegLane64F(rS, 0));
24665 delta += 1;
24666 DIP("vcvtsd2ss %s,%s,%s\n",
24667 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD));
24668 } else {
24669 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24670 assign(f64lo, loadLE(Ity_F64, mkexpr(addr)) );
24671 delta += alen;
24672 DIP("vcvtsd2ss %s,%s,%s\n",
24673 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24674 }
24675 putXMMRegLane32F( rD, 0,
24676 binop( Iop_F64toF32, mkexpr(rmode),
24677 mkexpr(f64lo)) );
24678 putXMMRegLane32( rD, 1, getXMMRegLane32( rV, 1 ));
24679 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24680 putYMMRegLane128( rD, 1, mkV128(0) );
24681 *uses_vvvv = True;
24682 goto decode_success;
24683 }
24684 /* VCVTSS2SD xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5A /r */
24685 if (haveF3no66noF2(pfx)) {
24686 UChar modrm = getUChar(delta);
24687 UInt rV = getVexNvvvv(pfx);
24688 UInt rD = gregOfRexRM(pfx, modrm);
24689 IRTemp f32lo = newTemp(Ity_F32);
24690 if (epartIsReg(modrm)) {
24691 UInt rS = eregOfRexRM(pfx,modrm);
24692 assign(f32lo, getXMMRegLane32F(rS, 0));
24693 delta += 1;
24694 DIP("vcvtss2sd %s,%s,%s\n",
24695 nameXMMReg(rS), nameXMMReg(rV), nameXMMReg(rD));
24696 } else {
24697 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
24698 assign(f32lo, loadLE(Ity_F32, mkexpr(addr)) );
24699 delta += alen;
24700 DIP("vcvtss2sd %s,%s,%s\n",
24701 dis_buf, nameXMMReg(rV), nameXMMReg(rD));
24702 }
24703 putXMMRegLane64F( rD, 0,
24704 unop( Iop_F32toF64, mkexpr(f32lo)) );
24705 putXMMRegLane64( rD, 1, getXMMRegLane64( rV, 1 ));
24706 putYMMRegLane128( rD, 1, mkV128(0) );
24707 *uses_vvvv = True;
24708 goto decode_success;
24709 }
sewardjc4530ae2012-05-21 10:18:49 +000024710 break;
24711
sewardj251b59e2012-05-25 13:51:07 +000024712 case 0x5B:
24713 /* VCVTPS2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG 5B /r */
24714 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardj66becf32012-06-18 23:15:16 +000024715 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta,
24716 True/*isAvx*/, False/*!r2zero*/ );
24717 goto decode_success;
24718 }
24719 /* VCVTPS2DQ ymm2/m256, ymm1 = VEX.256.66.0F.WIG 5B /r */
24720 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24721 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta,
24722 False/*!r2zero*/ );
24723 goto decode_success;
24724 }
24725 /* VCVTTPS2DQ xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 5B /r */
24726 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
24727 delta = dis_CVTxPS2DQ_128( vbi, pfx, delta,
24728 True/*isAvx*/, True/*r2zero*/ );
24729 goto decode_success;
24730 }
24731 /* VCVTTPS2DQ ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 5B /r */
24732 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
24733 delta = dis_CVTxPS2DQ_256( vbi, pfx, delta,
24734 True/*r2zero*/ );
24735 goto decode_success;
24736 }
24737 /* VCVTDQ2PS xmm2/m128, xmm1 = VEX.128.0F.WIG 5B /r */
24738 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24739 delta = dis_CVTDQ2PS_128 ( vbi, pfx, delta, True/*isAvx*/ );
24740 goto decode_success;
24741 }
24742 /* VCVTDQ2PS ymm2/m256, ymm1 = VEX.256.0F.WIG 5B /r */
24743 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24744 delta = dis_CVTDQ2PS_256 ( vbi, pfx, delta );
sewardj251b59e2012-05-25 13:51:07 +000024745 goto decode_success;
24746 }
24747 break;
24748
sewardjc4530ae2012-05-21 10:18:49 +000024749 case 0x5C:
24750 /* VSUBSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5C /r */
24751 if (haveF2no66noF3(pfx)) {
24752 delta = dis_AVX128_E_V_to_G_lo64(
24753 uses_vvvv, vbi, pfx, delta, "vsubsd", Iop_Sub64F0x2 );
24754 goto decode_success;
24755 }
24756 /* VSUBSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5C /r */
24757 if (haveF3no66noF2(pfx)) {
24758 delta = dis_AVX128_E_V_to_G_lo32(
24759 uses_vvvv, vbi, pfx, delta, "vsubss", Iop_Sub32F0x4 );
24760 goto decode_success;
24761 }
sewardj251b59e2012-05-25 13:51:07 +000024762 /* VSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5C /r */
24763 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24764 delta = dis_AVX128_E_V_to_G(
24765 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx4 );
24766 goto decode_success;
24767 }
sewardj56c30312012-06-12 08:45:39 +000024768 /* VSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5C /r */
24769 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24770 delta = dis_AVX256_E_V_to_G(
24771 uses_vvvv, vbi, pfx, delta, "vsubps", Iop_Sub32Fx8 );
24772 goto decode_success;
24773 }
24774 /* VSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5C /r */
24775 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24776 delta = dis_AVX128_E_V_to_G(
24777 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx2 );
24778 goto decode_success;
24779 }
24780 /* VSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5C /r */
24781 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24782 delta = dis_AVX256_E_V_to_G(
24783 uses_vvvv, vbi, pfx, delta, "vsubpd", Iop_Sub64Fx4 );
24784 goto decode_success;
24785 }
sewardjc4530ae2012-05-21 10:18:49 +000024786 break;
24787
24788 case 0x5D:
24789 /* VMINSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5D /r */
24790 if (haveF2no66noF3(pfx)) {
24791 delta = dis_AVX128_E_V_to_G_lo64(
24792 uses_vvvv, vbi, pfx, delta, "vminsd", Iop_Min64F0x2 );
24793 goto decode_success;
24794 }
24795 /* VMINSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5D /r */
24796 if (haveF3no66noF2(pfx)) {
24797 delta = dis_AVX128_E_V_to_G_lo32(
24798 uses_vvvv, vbi, pfx, delta, "vminss", Iop_Min32F0x4 );
24799 goto decode_success;
24800 }
sewardj251b59e2012-05-25 13:51:07 +000024801 /* VMINPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5D /r */
24802 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24803 delta = dis_AVX128_E_V_to_G(
24804 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx4 );
24805 goto decode_success;
24806 }
sewardj8eb7ae82012-06-24 14:00:27 +000024807 /* VMINPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5D /r */
24808 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24809 delta = dis_AVX256_E_V_to_G(
24810 uses_vvvv, vbi, pfx, delta, "vminps", Iop_Min32Fx8 );
24811 goto decode_success;
24812 }
24813 /* VMINPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5D /r */
24814 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24815 delta = dis_AVX128_E_V_to_G(
24816 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx2 );
24817 goto decode_success;
24818 }
24819 /* VMINPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5D /r */
24820 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24821 delta = dis_AVX256_E_V_to_G(
24822 uses_vvvv, vbi, pfx, delta, "vminpd", Iop_Min64Fx4 );
24823 goto decode_success;
24824 }
sewardjc4530ae2012-05-21 10:18:49 +000024825 break;
24826
24827 case 0x5E:
24828 /* VDIVSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5E /r */
24829 if (haveF2no66noF3(pfx)) {
24830 delta = dis_AVX128_E_V_to_G_lo64(
24831 uses_vvvv, vbi, pfx, delta, "vdivsd", Iop_Div64F0x2 );
24832 goto decode_success;
24833 }
24834 /* VDIVSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5E /r */
24835 if (haveF3no66noF2(pfx)) {
24836 delta = dis_AVX128_E_V_to_G_lo32(
24837 uses_vvvv, vbi, pfx, delta, "vdivss", Iop_Div32F0x4 );
24838 goto decode_success;
24839 }
sewardj2a2bda92012-06-14 23:32:02 +000024840 /* VDIVPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5E /r */
24841 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24842 delta = dis_AVX128_E_V_to_G(
24843 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx4 );
24844 goto decode_success;
24845 }
sewardj56c30312012-06-12 08:45:39 +000024846 /* VDIVPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5E /r */
24847 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24848 delta = dis_AVX256_E_V_to_G(
24849 uses_vvvv, vbi, pfx, delta, "vdivps", Iop_Div32Fx8 );
24850 goto decode_success;
24851 }
sewardj4b1cc832012-06-13 11:10:20 +000024852 /* VDIVPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5E /r */
24853 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24854 delta = dis_AVX128_E_V_to_G(
24855 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx2 );
24856 goto decode_success;
24857 }
sewardj56c30312012-06-12 08:45:39 +000024858 /* VDIVPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5E /r */
24859 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24860 delta = dis_AVX256_E_V_to_G(
24861 uses_vvvv, vbi, pfx, delta, "vdivpd", Iop_Div64Fx4 );
24862 goto decode_success;
24863 }
sewardjc4530ae2012-05-21 10:18:49 +000024864 break;
24865
24866 case 0x5F:
24867 /* VMAXSD xmm3/m64, xmm2, xmm1 = VEX.NDS.LIG.F2.0F.WIG 5F /r */
24868 if (haveF2no66noF3(pfx)) {
24869 delta = dis_AVX128_E_V_to_G_lo64(
24870 uses_vvvv, vbi, pfx, delta, "vmaxsd", Iop_Max64F0x2 );
24871 goto decode_success;
24872 }
24873 /* VMAXSS xmm3/m32, xmm2, xmm1 = VEX.NDS.LIG.F3.0F.WIG 5F /r */
24874 if (haveF3no66noF2(pfx)) {
24875 delta = dis_AVX128_E_V_to_G_lo32(
24876 uses_vvvv, vbi, pfx, delta, "vmaxss", Iop_Max32F0x4 );
24877 goto decode_success;
24878 }
sewardj251b59e2012-05-25 13:51:07 +000024879 /* VMAXPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.0F.WIG 5F /r */
24880 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24881 delta = dis_AVX128_E_V_to_G(
24882 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx4 );
24883 goto decode_success;
24884 }
sewardj8eb7ae82012-06-24 14:00:27 +000024885 /* VMAXPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.0F.WIG 5F /r */
24886 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24887 delta = dis_AVX256_E_V_to_G(
24888 uses_vvvv, vbi, pfx, delta, "vmaxps", Iop_Max32Fx8 );
24889 goto decode_success;
24890 }
24891 /* VMAXPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 5F /r */
24892 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24893 delta = dis_AVX128_E_V_to_G(
24894 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx2 );
24895 goto decode_success;
24896 }
24897 /* VMAXPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 5F /r */
24898 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24899 delta = dis_AVX256_E_V_to_G(
24900 uses_vvvv, vbi, pfx, delta, "vmaxpd", Iop_Max64Fx4 );
24901 goto decode_success;
24902 }
sewardjc4530ae2012-05-21 10:18:49 +000024903 break;
24904
sewardj44565e82012-05-22 09:14:15 +000024905 case 0x60:
sewardj251b59e2012-05-25 13:51:07 +000024906 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
sewardj6faf7cc2012-05-25 15:53:01 +000024907 /* VPUNPCKLBW = VEX.NDS.128.66.0F.WIG 60 /r */
sewardj44565e82012-05-22 09:14:15 +000024908 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24909 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
24910 uses_vvvv, vbi, pfx, delta, "vpunpcklbw",
24911 Iop_InterleaveLO8x16, NULL,
24912 False/*!invertLeftArg*/, True/*swapArgs*/ );
24913 goto decode_success;
24914 }
sewardjcc3d2192013-03-27 11:37:33 +000024915 /* VPUNPCKLBW r/m, rV, r ::: r = interleave-lo-bytes(rV, r/m) */
24916 /* VPUNPCKLBW = VEX.NDS.256.66.0F.WIG 60 /r */
24917 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24918 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
24919 uses_vvvv, vbi, pfx, delta, "vpunpcklbw",
24920 math_VPUNPCKLBW_YMM );
24921 goto decode_success;
24922 }
sewardj44565e82012-05-22 09:14:15 +000024923 break;
24924
sewardj251b59e2012-05-25 13:51:07 +000024925 case 0x61:
24926 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
sewardj6faf7cc2012-05-25 15:53:01 +000024927 /* VPUNPCKLWD = VEX.NDS.128.66.0F.WIG 61 /r */
sewardj251b59e2012-05-25 13:51:07 +000024928 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24929 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
24930 uses_vvvv, vbi, pfx, delta, "vpunpcklwd",
24931 Iop_InterleaveLO16x8, NULL,
24932 False/*!invertLeftArg*/, True/*swapArgs*/ );
24933 goto decode_success;
24934 }
sewardjcc3d2192013-03-27 11:37:33 +000024935 /* VPUNPCKLWD r/m, rV, r ::: r = interleave-lo-words(rV, r/m) */
24936 /* VPUNPCKLWD = VEX.NDS.256.66.0F.WIG 61 /r */
24937 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24938 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
24939 uses_vvvv, vbi, pfx, delta, "vpunpcklwd",
24940 math_VPUNPCKLWD_YMM );
24941 goto decode_success;
24942 }
sewardj251b59e2012-05-25 13:51:07 +000024943 break;
24944
sewardj6faf7cc2012-05-25 15:53:01 +000024945 case 0x62:
24946 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
24947 /* VPUNPCKLDQ = VEX.NDS.128.66.0F.WIG 62 /r */
24948 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24949 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
24950 uses_vvvv, vbi, pfx, delta, "vpunpckldq",
24951 Iop_InterleaveLO32x4, NULL,
24952 False/*!invertLeftArg*/, True/*swapArgs*/ );
24953 goto decode_success;
24954 }
sewardjcc3d2192013-03-27 11:37:33 +000024955 /* VPUNPCKLDQ r/m, rV, r ::: r = interleave-lo-dwords(rV, r/m) */
24956 /* VPUNPCKLDQ = VEX.NDS.256.66.0F.WIG 62 /r */
24957 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24958 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
24959 uses_vvvv, vbi, pfx, delta, "vpunpckldq",
24960 math_VPUNPCKLDQ_YMM );
24961 goto decode_success;
24962 }
sewardj6faf7cc2012-05-25 15:53:01 +000024963 break;
24964
sewardj8516a1f2012-06-24 14:26:30 +000024965 case 0x63:
24966 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
24967 /* VPACKSSWB = VEX.NDS.128.66.0F.WIG 63 /r */
24968 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24969 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
24970 uses_vvvv, vbi, pfx, delta, "vpacksswb",
24971 Iop_QNarrowBin16Sto8Sx16, NULL,
24972 False/*!invertLeftArg*/, True/*swapArgs*/ );
24973 goto decode_success;
24974 }
sewardjcc3d2192013-03-27 11:37:33 +000024975 /* VPACKSSWB r/m, rV, r ::: r = QNarrowBin16Sto8Sx16(rV, r/m) */
24976 /* VPACKSSWB = VEX.NDS.256.66.0F.WIG 63 /r */
24977 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24978 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
24979 uses_vvvv, vbi, pfx, delta, "vpacksswb",
24980 math_VPACKSSWB_YMM );
24981 goto decode_success;
24982 }
sewardj8516a1f2012-06-24 14:26:30 +000024983 break;
24984
sewardj89378162012-06-24 12:12:20 +000024985 case 0x64:
24986 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
24987 /* VPCMPGTB = VEX.NDS.128.66.0F.WIG 64 /r */
24988 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
24989 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
24990 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx16 );
24991 goto decode_success;
24992 }
sewardjcc3d2192013-03-27 11:37:33 +000024993 /* VPCMPGTB r/m, rV, r ::: r = rV `>s-by-8s` r/m */
24994 /* VPCMPGTB = VEX.NDS.256.66.0F.WIG 64 /r */
24995 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
24996 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
24997 uses_vvvv, vbi, pfx, delta, "vpcmpgtb", Iop_CmpGT8Sx32 );
24998 goto decode_success;
24999 }
sewardj89378162012-06-24 12:12:20 +000025000 break;
25001
25002 case 0x65:
25003 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25004 /* VPCMPGTW = VEX.NDS.128.66.0F.WIG 65 /r */
25005 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25006 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25007 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx8 );
25008 goto decode_success;
25009 }
sewardjcc3d2192013-03-27 11:37:33 +000025010 /* VPCMPGTW r/m, rV, r ::: r = rV `>s-by-16s` r/m */
25011 /* VPCMPGTW = VEX.NDS.256.66.0F.WIG 65 /r */
25012 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25013 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25014 uses_vvvv, vbi, pfx, delta, "vpcmpgtw", Iop_CmpGT16Sx16 );
25015 goto decode_success;
25016 }
sewardj89378162012-06-24 12:12:20 +000025017 break;
25018
sewardj4ed05e02012-06-18 15:01:30 +000025019 case 0x66:
25020 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25021 /* VPCMPGTD = VEX.NDS.128.66.0F.WIG 66 /r */
25022 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25023 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25024 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx4 );
25025 goto decode_success;
25026 }
sewardjcc3d2192013-03-27 11:37:33 +000025027 /* VPCMPGTD r/m, rV, r ::: r = rV `>s-by-32s` r/m */
25028 /* VPCMPGTD = VEX.NDS.256.66.0F.WIG 66 /r */
25029 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25030 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25031 uses_vvvv, vbi, pfx, delta, "vpcmpgtd", Iop_CmpGT32Sx8 );
25032 goto decode_success;
25033 }
sewardj4ed05e02012-06-18 15:01:30 +000025034 break;
25035
sewardj6c4e45c2012-05-24 00:09:27 +000025036 case 0x67:
25037 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
sewardj6faf7cc2012-05-25 15:53:01 +000025038 /* VPACKUSWB = VEX.NDS.128.66.0F.WIG 67 /r */
sewardj6c4e45c2012-05-24 00:09:27 +000025039 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25040 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25041 uses_vvvv, vbi, pfx, delta, "vpackuswb",
25042 Iop_QNarrowBin16Sto8Ux16, NULL,
25043 False/*!invertLeftArg*/, True/*swapArgs*/ );
25044 goto decode_success;
25045 }
sewardjcc3d2192013-03-27 11:37:33 +000025046 /* VPACKUSWB r/m, rV, r ::: r = QNarrowBin16Sto8Ux16(rV, r/m) */
25047 /* VPACKUSWB = VEX.NDS.256.66.0F.WIG 67 /r */
25048 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25049 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25050 uses_vvvv, vbi, pfx, delta, "vpackuswb",
25051 math_VPACKUSWB_YMM );
25052 goto decode_success;
25053 }
sewardj6c4e45c2012-05-24 00:09:27 +000025054 break;
25055
sewardj44565e82012-05-22 09:14:15 +000025056 case 0x68:
sewardj251b59e2012-05-25 13:51:07 +000025057 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
sewardj44565e82012-05-22 09:14:15 +000025058 /* VPUNPCKHBW = VEX.NDS.128.0F.WIG 68 /r */
25059 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25060 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25061 uses_vvvv, vbi, pfx, delta, "vpunpckhbw",
25062 Iop_InterleaveHI8x16, NULL,
25063 False/*!invertLeftArg*/, True/*swapArgs*/ );
25064 goto decode_success;
25065 }
sewardjcc3d2192013-03-27 11:37:33 +000025066 /* VPUNPCKHBW r/m, rV, r ::: r = interleave-hi-bytes(rV, r/m) */
25067 /* VPUNPCKHBW = VEX.NDS.256.0F.WIG 68 /r */
25068 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25069 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25070 uses_vvvv, vbi, pfx, delta, "vpunpckhbw",
25071 math_VPUNPCKHBW_YMM );
25072 goto decode_success;
25073 }
sewardj44565e82012-05-22 09:14:15 +000025074 break;
25075
sewardj251b59e2012-05-25 13:51:07 +000025076 case 0x69:
25077 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25078 /* VPUNPCKHWD = VEX.NDS.128.0F.WIG 69 /r */
25079 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25080 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25081 uses_vvvv, vbi, pfx, delta, "vpunpckhwd",
25082 Iop_InterleaveHI16x8, NULL,
25083 False/*!invertLeftArg*/, True/*swapArgs*/ );
25084 goto decode_success;
25085 }
sewardjcc3d2192013-03-27 11:37:33 +000025086 /* VPUNPCKHWD r/m, rV, r ::: r = interleave-hi-words(rV, r/m) */
25087 /* VPUNPCKHWD = VEX.NDS.256.0F.WIG 69 /r */
25088 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25089 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25090 uses_vvvv, vbi, pfx, delta, "vpunpckhwd",
25091 math_VPUNPCKHWD_YMM );
25092 goto decode_success;
25093 }
sewardj251b59e2012-05-25 13:51:07 +000025094 break;
25095
sewardj6fcd43e2012-06-14 08:51:35 +000025096 case 0x6A:
25097 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25098 /* VPUNPCKHDQ = VEX.NDS.128.66.0F.WIG 6A /r */
25099 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25100 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25101 uses_vvvv, vbi, pfx, delta, "vpunpckhdq",
25102 Iop_InterleaveHI32x4, NULL,
25103 False/*!invertLeftArg*/, True/*swapArgs*/ );
25104 goto decode_success;
25105 }
sewardjcc3d2192013-03-27 11:37:33 +000025106 /* VPUNPCKHDQ r/m, rV, r ::: r = interleave-hi-dwords(rV, r/m) */
25107 /* VPUNPCKHDQ = VEX.NDS.256.66.0F.WIG 6A /r */
25108 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25109 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25110 uses_vvvv, vbi, pfx, delta, "vpunpckhdq",
25111 math_VPUNPCKHDQ_YMM );
25112 goto decode_success;
25113 }
sewardj6fcd43e2012-06-14 08:51:35 +000025114 break;
25115
sewardj6faf7cc2012-05-25 15:53:01 +000025116 case 0x6B:
25117 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25118 /* VPACKSSDW = VEX.NDS.128.66.0F.WIG 6B /r */
25119 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25120 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25121 uses_vvvv, vbi, pfx, delta, "vpackssdw",
25122 Iop_QNarrowBin32Sto16Sx8, NULL,
25123 False/*!invertLeftArg*/, True/*swapArgs*/ );
25124 goto decode_success;
25125 }
sewardjcc3d2192013-03-27 11:37:33 +000025126 /* VPACKSSDW r/m, rV, r ::: r = QNarrowBin32Sto16Sx8(rV, r/m) */
25127 /* VPACKSSDW = VEX.NDS.256.66.0F.WIG 6B /r */
25128 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25129 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25130 uses_vvvv, vbi, pfx, delta, "vpackssdw",
25131 math_VPACKSSDW_YMM );
25132 goto decode_success;
25133 }
sewardj6faf7cc2012-05-25 15:53:01 +000025134 break;
25135
25136 case 0x6C:
25137 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25138 /* VPUNPCKLQDQ = VEX.NDS.128.0F.WIG 6C /r */
25139 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25140 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25141 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq",
25142 Iop_InterleaveLO64x2, NULL,
25143 False/*!invertLeftArg*/, True/*swapArgs*/ );
25144 goto decode_success;
25145 }
sewardjcc3d2192013-03-27 11:37:33 +000025146 /* VPUNPCKLQDQ r/m, rV, r ::: r = interleave-lo-64bitses(rV, r/m) */
25147 /* VPUNPCKLQDQ = VEX.NDS.256.0F.WIG 6C /r */
25148 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25149 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25150 uses_vvvv, vbi, pfx, delta, "vpunpcklqdq",
25151 math_VPUNPCKLQDQ_YMM );
25152 goto decode_success;
25153 }
sewardj6faf7cc2012-05-25 15:53:01 +000025154 break;
25155
sewardjfe0c5e72012-06-15 15:48:07 +000025156 case 0x6D:
25157 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25158 /* VPUNPCKHQDQ = VEX.NDS.128.0F.WIG 6D /r */
25159 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25160 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
25161 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq",
25162 Iop_InterleaveHI64x2, NULL,
25163 False/*!invertLeftArg*/, True/*swapArgs*/ );
25164 goto decode_success;
25165 }
sewardjcc3d2192013-03-27 11:37:33 +000025166 /* VPUNPCKHQDQ r/m, rV, r ::: r = interleave-hi-64bitses(rV, r/m) */
25167 /* VPUNPCKHQDQ = VEX.NDS.256.0F.WIG 6D /r */
25168 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25169 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
25170 uses_vvvv, vbi, pfx, delta, "vpunpckhqdq",
25171 math_VPUNPCKHQDQ_YMM );
25172 goto decode_success;
25173 }
sewardjfe0c5e72012-06-15 15:48:07 +000025174 break;
25175
sewardjc4530ae2012-05-21 10:18:49 +000025176 case 0x6E:
25177 /* VMOVD r32/m32, xmm1 = VEX.128.66.0F.W0 6E */
25178 if (have66noF2noF3(pfx)
25179 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
25180 vassert(sz == 2); /* even tho we are transferring 4, not 2. */
25181 UChar modrm = getUChar(delta);
25182 if (epartIsReg(modrm)) {
25183 delta += 1;
25184 putYMMRegLoAndZU(
25185 gregOfRexRM(pfx,modrm),
25186 unop( Iop_32UtoV128, getIReg32(eregOfRexRM(pfx,modrm)) )
25187 );
25188 DIP("vmovd %s, %s\n", nameIReg32(eregOfRexRM(pfx,modrm)),
25189 nameXMMReg(gregOfRexRM(pfx,modrm)));
25190 } else {
25191 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25192 delta += alen;
25193 putYMMRegLoAndZU(
25194 gregOfRexRM(pfx,modrm),
25195 unop( Iop_32UtoV128,loadLE(Ity_I32, mkexpr(addr)))
25196 );
25197 DIP("vmovd %s, %s\n", dis_buf,
25198 nameXMMReg(gregOfRexRM(pfx,modrm)));
25199 }
25200 goto decode_success;
25201 }
sewardj6eaf00c2012-05-23 11:33:56 +000025202 /* VMOVQ r64/m64, xmm1 = VEX.128.66.0F.W1 6E */
25203 if (have66noF2noF3(pfx)
25204 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
25205 vassert(sz == 2); /* even tho we are transferring 8, not 2. */
25206 UChar modrm = getUChar(delta);
25207 if (epartIsReg(modrm)) {
25208 delta += 1;
25209 putYMMRegLoAndZU(
25210 gregOfRexRM(pfx,modrm),
25211 unop( Iop_64UtoV128, getIReg64(eregOfRexRM(pfx,modrm)) )
25212 );
25213 DIP("vmovq %s, %s\n", nameIReg64(eregOfRexRM(pfx,modrm)),
25214 nameXMMReg(gregOfRexRM(pfx,modrm)));
25215 } else {
25216 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25217 delta += alen;
25218 putYMMRegLoAndZU(
25219 gregOfRexRM(pfx,modrm),
25220 unop( Iop_64UtoV128,loadLE(Ity_I64, mkexpr(addr)))
25221 );
25222 DIP("vmovq %s, %s\n", dis_buf,
25223 nameXMMReg(gregOfRexRM(pfx,modrm)));
25224 }
25225 goto decode_success;
25226 }
sewardjc4530ae2012-05-21 10:18:49 +000025227 break;
25228
25229 case 0x6F:
25230 /* VMOVDQA ymm2/m256, ymm1 = VEX.256.66.0F.WIG 6F */
25231 /* VMOVDQU ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 6F */
sewardj66becf32012-06-18 23:15:16 +000025232 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
sewardjc4530ae2012-05-21 10:18:49 +000025233 && 1==getVexL(pfx)/*256*/) {
25234 UChar modrm = getUChar(delta);
25235 UInt rD = gregOfRexRM(pfx, modrm);
25236 IRTemp tD = newTemp(Ity_V256);
25237 Bool isA = have66noF2noF3(pfx);
florian5df8ab02012-10-13 19:34:19 +000025238 HChar ch = isA ? 'a' : 'u';
sewardjc4530ae2012-05-21 10:18:49 +000025239 if (epartIsReg(modrm)) {
25240 UInt rS = eregOfRexRM(pfx, modrm);
25241 delta += 1;
25242 assign(tD, getYMMReg(rS));
25243 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD));
25244 } else {
25245 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25246 delta += alen;
25247 if (isA)
25248 gen_SEGV_if_not_32_aligned(addr);
25249 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
25250 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameYMMReg(rD));
25251 }
25252 putYMMReg(rD, mkexpr(tD));
25253 goto decode_success;
25254 }
25255 /* VMOVDQA xmm2/m128, xmm1 = VEX.128.66.0F.WIG 6F */
25256 /* VMOVDQU xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 6F */
25257 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
25258 && 0==getVexL(pfx)/*128*/) {
25259 UChar modrm = getUChar(delta);
25260 UInt rD = gregOfRexRM(pfx, modrm);
25261 IRTemp tD = newTemp(Ity_V128);
25262 Bool isA = have66noF2noF3(pfx);
florian5df8ab02012-10-13 19:34:19 +000025263 HChar ch = isA ? 'a' : 'u';
sewardjc4530ae2012-05-21 10:18:49 +000025264 if (epartIsReg(modrm)) {
25265 UInt rS = eregOfRexRM(pfx, modrm);
25266 delta += 1;
25267 assign(tD, getXMMReg(rS));
25268 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD));
25269 } else {
25270 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25271 delta += alen;
25272 if (isA)
25273 gen_SEGV_if_not_16_aligned(addr);
25274 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
25275 DIP("vmovdq%c %s,%s\n", ch, dis_buf, nameXMMReg(rD));
25276 }
25277 putYMMRegLoAndZU(rD, mkexpr(tD));
25278 goto decode_success;
25279 }
25280 break;
25281
25282 case 0x70:
25283 /* VPSHUFD imm8, xmm2/m128, xmm1 = VEX.128.66.0F.WIG 70 /r ib */
25284 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25285 delta = dis_PSHUFD_32x4( vbi, pfx, delta, True/*writesYmm*/);
25286 goto decode_success;
25287 }
sewardjcc3d2192013-03-27 11:37:33 +000025288 /* VPSHUFD imm8, ymm2/m256, ymm1 = VEX.256.66.0F.WIG 70 /r ib */
25289 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25290 delta = dis_PSHUFD_32x8( vbi, pfx, delta);
25291 goto decode_success;
25292 }
sewardj251b59e2012-05-25 13:51:07 +000025293 /* VPSHUFLW imm8, xmm2/m128, xmm1 = VEX.128.F2.0F.WIG 70 /r ib */
25294 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25295 delta = dis_PSHUFxW_128( vbi, pfx, delta,
25296 True/*isAvx*/, False/*!xIsH*/ );
25297 goto decode_success;
25298 }
sewardjcc3d2192013-03-27 11:37:33 +000025299 /* VPSHUFLW imm8, ymm2/m256, ymm1 = VEX.256.F2.0F.WIG 70 /r ib */
25300 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25301 delta = dis_PSHUFxW_256( vbi, pfx, delta, False/*!xIsH*/ );
25302 goto decode_success;
25303 }
sewardj251b59e2012-05-25 13:51:07 +000025304 /* VPSHUFHW imm8, xmm2/m128, xmm1 = VEX.128.F3.0F.WIG 70 /r ib */
25305 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
25306 delta = dis_PSHUFxW_128( vbi, pfx, delta,
25307 True/*isAvx*/, True/*xIsH*/ );
25308 goto decode_success;
25309 }
sewardjcc3d2192013-03-27 11:37:33 +000025310 /* VPSHUFHW imm8, ymm2/m256, ymm1 = VEX.256.F3.0F.WIG 70 /r ib */
25311 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
25312 delta = dis_PSHUFxW_256( vbi, pfx, delta, True/*xIsH*/ );
25313 goto decode_success;
25314 }
sewardjc4530ae2012-05-21 10:18:49 +000025315 break;
25316
sewardj6faf7cc2012-05-25 15:53:01 +000025317 case 0x71:
25318 /* VPSRLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /2 ib */
sewardjfe0c5e72012-06-15 15:48:07 +000025319 /* VPSRAW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /4 ib */
sewardj15ad1942012-06-20 10:21:05 +000025320 /* VPSLLW imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 71 /6 ib */
sewardj6faf7cc2012-05-25 15:53:01 +000025321 if (have66noF2noF3(pfx)
25322 && 0==getVexL(pfx)/*128*/
25323 && epartIsReg(getUChar(delta))) {
25324 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
25325 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25326 "vpsrlw", Iop_ShrN16x8 );
25327 *uses_vvvv = True;
25328 goto decode_success;
25329 }
sewardjfe0c5e72012-06-15 15:48:07 +000025330 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
25331 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25332 "vpsraw", Iop_SarN16x8 );
25333 *uses_vvvv = True;
25334 goto decode_success;
25335 }
sewardj15ad1942012-06-20 10:21:05 +000025336 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
25337 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25338 "vpsllw", Iop_ShlN16x8 );
25339 *uses_vvvv = True;
25340 goto decode_success;
25341 }
sewardj6faf7cc2012-05-25 15:53:01 +000025342 /* else fall through */
25343 }
sewardjcc3d2192013-03-27 11:37:33 +000025344 /* VPSRLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /2 ib */
25345 /* VPSRAW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /4 ib */
25346 /* VPSLLW imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 71 /6 ib */
25347 if (have66noF2noF3(pfx)
25348 && 1==getVexL(pfx)/*256*/
25349 && epartIsReg(getUChar(delta))) {
25350 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
25351 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
25352 "vpsrlw", Iop_ShrN16x16 );
25353 *uses_vvvv = True;
25354 goto decode_success;
25355 }
25356 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
25357 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
25358 "vpsraw", Iop_SarN16x16 );
25359 *uses_vvvv = True;
25360 goto decode_success;
25361 }
25362 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
25363 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
25364 "vpsllw", Iop_ShlN16x16 );
25365 *uses_vvvv = True;
25366 goto decode_success;
25367 }
25368 /* else fall through */
25369 }
sewardj6faf7cc2012-05-25 15:53:01 +000025370 break;
25371
sewardjc4530ae2012-05-21 10:18:49 +000025372 case 0x72:
sewardj251b59e2012-05-25 13:51:07 +000025373 /* VPSRLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /2 ib */
sewardj15ad1942012-06-20 10:21:05 +000025374 /* VPSRAD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /4 ib */
25375 /* VPSLLD imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 72 /6 ib */
sewardjc4530ae2012-05-21 10:18:49 +000025376 if (have66noF2noF3(pfx)
25377 && 0==getVexL(pfx)/*128*/
sewardj251b59e2012-05-25 13:51:07 +000025378 && epartIsReg(getUChar(delta))) {
sewardj251b59e2012-05-25 13:51:07 +000025379 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
25380 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25381 "vpsrld", Iop_ShrN32x4 );
25382 *uses_vvvv = True;
25383 goto decode_success;
25384 }
sewardj15ad1942012-06-20 10:21:05 +000025385 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
25386 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25387 "vpsrad", Iop_SarN32x4 );
25388 *uses_vvvv = True;
25389 goto decode_success;
25390 }
25391 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
25392 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25393 "vpslld", Iop_ShlN32x4 );
25394 *uses_vvvv = True;
25395 goto decode_success;
25396 }
sewardj251b59e2012-05-25 13:51:07 +000025397 /* else fall through */
sewardjc4530ae2012-05-21 10:18:49 +000025398 }
sewardjcc3d2192013-03-27 11:37:33 +000025399 /* VPSRLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /2 ib */
25400 /* VPSRAD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /4 ib */
25401 /* VPSLLD imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 72 /6 ib */
25402 if (have66noF2noF3(pfx)
25403 && 1==getVexL(pfx)/*256*/
25404 && epartIsReg(getUChar(delta))) {
25405 if (gregLO3ofRM(getUChar(delta)) == 2/*SRL*/) {
25406 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
25407 "vpsrld", Iop_ShrN32x8 );
25408 *uses_vvvv = True;
25409 goto decode_success;
25410 }
25411 if (gregLO3ofRM(getUChar(delta)) == 4/*SRA*/) {
25412 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
25413 "vpsrad", Iop_SarN32x8 );
25414 *uses_vvvv = True;
25415 goto decode_success;
25416 }
25417 if (gregLO3ofRM(getUChar(delta)) == 6/*SLL*/) {
25418 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
25419 "vpslld", Iop_ShlN32x8 );
25420 *uses_vvvv = True;
25421 goto decode_success;
25422 }
25423 /* else fall through */
25424 }
sewardjc4530ae2012-05-21 10:18:49 +000025425 break;
25426
25427 case 0x73:
sewardj251b59e2012-05-25 13:51:07 +000025428 /* VPSRLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /3 ib */
25429 /* VPSLLDQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /7 ib */
sewardj56c30312012-06-12 08:45:39 +000025430 /* VPSRLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /2 ib */
sewardje8a7eb72012-06-12 14:59:17 +000025431 /* VPSLLQ imm8, xmm2, xmm1 = VEX.NDD.128.66.0F.WIG 73 /6 ib */
sewardjc4530ae2012-05-21 10:18:49 +000025432 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
sewardj251b59e2012-05-25 13:51:07 +000025433 && epartIsReg(getUChar(delta))) {
25434 Int rS = eregOfRexRM(pfx,getUChar(delta));
25435 Int rD = getVexNvvvv(pfx);
sewardjc4530ae2012-05-21 10:18:49 +000025436 IRTemp vecS = newTemp(Ity_V128);
sewardj251b59e2012-05-25 13:51:07 +000025437 if (gregLO3ofRM(getUChar(delta)) == 3) {
25438 Int imm = (Int)getUChar(delta+1);
25439 DIP("vpsrldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD));
25440 delta += 2;
25441 assign( vecS, getXMMReg(rS) );
25442 putYMMRegLoAndZU(rD, mkexpr(math_PSRLDQ( vecS, imm )));
25443 *uses_vvvv = True;
25444 goto decode_success;
25445 }
25446 if (gregLO3ofRM(getUChar(delta)) == 7) {
25447 Int imm = (Int)getUChar(delta+1);
25448 DIP("vpslldq $%d,%s,%s\n", imm, nameXMMReg(rS), nameXMMReg(rD));
25449 delta += 2;
25450 assign( vecS, getXMMReg(rS) );
25451 putYMMRegLoAndZU(rD, mkexpr(math_PSLLDQ( vecS, imm )));
25452 *uses_vvvv = True;
25453 goto decode_success;
25454 }
sewardj56c30312012-06-12 08:45:39 +000025455 if (gregLO3ofRM(getUChar(delta)) == 2) {
25456 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25457 "vpsrlq", Iop_ShrN64x2 );
25458 *uses_vvvv = True;
25459 goto decode_success;
25460 }
sewardje8a7eb72012-06-12 14:59:17 +000025461 if (gregLO3ofRM(getUChar(delta)) == 6) {
25462 delta = dis_AVX128_shiftE_to_V_imm( pfx, delta,
25463 "vpsllq", Iop_ShlN64x2 );
25464 *uses_vvvv = True;
25465 goto decode_success;
25466 }
sewardj251b59e2012-05-25 13:51:07 +000025467 /* else fall through */
sewardjc4530ae2012-05-21 10:18:49 +000025468 }
sewardjcc3d2192013-03-27 11:37:33 +000025469 /* VPSRLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /3 ib */
25470 /* VPSLLDQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /7 ib */
25471 /* VPSRLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /2 ib */
25472 /* VPSLLQ imm8, ymm2, ymm1 = VEX.NDD.256.66.0F.WIG 73 /6 ib */
25473 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
25474 && epartIsReg(getUChar(delta))) {
25475 Int rS = eregOfRexRM(pfx,getUChar(delta));
25476 Int rD = getVexNvvvv(pfx);
25477 if (gregLO3ofRM(getUChar(delta)) == 3) {
25478 IRTemp vecS0 = newTemp(Ity_V128);
25479 IRTemp vecS1 = newTemp(Ity_V128);
25480 Int imm = (Int)getUChar(delta+1);
25481 DIP("vpsrldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD));
25482 delta += 2;
25483 assign( vecS0, getYMMRegLane128(rS, 0));
25484 assign( vecS1, getYMMRegLane128(rS, 1));
25485 putYMMRegLane128(rD, 0, mkexpr(math_PSRLDQ( vecS0, imm )));
25486 putYMMRegLane128(rD, 1, mkexpr(math_PSRLDQ( vecS1, imm )));
25487 *uses_vvvv = True;
25488 goto decode_success;
25489 }
25490 if (gregLO3ofRM(getUChar(delta)) == 7) {
25491 IRTemp vecS0 = newTemp(Ity_V128);
25492 IRTemp vecS1 = newTemp(Ity_V128);
25493 Int imm = (Int)getUChar(delta+1);
25494 DIP("vpslldq $%d,%s,%s\n", imm, nameYMMReg(rS), nameYMMReg(rD));
25495 delta += 2;
25496 assign( vecS0, getYMMRegLane128(rS, 0));
25497 assign( vecS1, getYMMRegLane128(rS, 1));
25498 putYMMRegLane128(rD, 0, mkexpr(math_PSLLDQ( vecS0, imm )));
25499 putYMMRegLane128(rD, 1, mkexpr(math_PSLLDQ( vecS1, imm )));
25500 *uses_vvvv = True;
25501 goto decode_success;
25502 }
25503 if (gregLO3ofRM(getUChar(delta)) == 2) {
25504 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
25505 "vpsrlq", Iop_ShrN64x4 );
25506 *uses_vvvv = True;
25507 goto decode_success;
25508 }
25509 if (gregLO3ofRM(getUChar(delta)) == 6) {
25510 delta = dis_AVX256_shiftE_to_V_imm( pfx, delta,
25511 "vpsllq", Iop_ShlN64x4 );
25512 *uses_vvvv = True;
25513 goto decode_success;
25514 }
25515 /* else fall through */
25516 }
sewardjc4530ae2012-05-21 10:18:49 +000025517 break;
25518
sewardj8ef22422012-05-24 16:29:18 +000025519 case 0x74:
sewardj251b59e2012-05-25 13:51:07 +000025520 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
sewardj8ef22422012-05-24 16:29:18 +000025521 /* VPCMPEQB = VEX.NDS.128.66.0F.WIG 74 /r */
25522 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25523 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25524 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x16 );
25525 goto decode_success;
25526 }
sewardjcc3d2192013-03-27 11:37:33 +000025527 /* VPCMPEQB r/m, rV, r ::: r = rV `eq-by-8s` r/m */
25528 /* VPCMPEQB = VEX.NDS.256.66.0F.WIG 74 /r */
25529 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25530 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25531 uses_vvvv, vbi, pfx, delta, "vpcmpeqb", Iop_CmpEQ8x32 );
25532 goto decode_success;
25533 }
sewardj8ef22422012-05-24 16:29:18 +000025534 break;
25535
sewardj6fcd43e2012-06-14 08:51:35 +000025536 case 0x75:
25537 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
25538 /* VPCMPEQW = VEX.NDS.128.66.0F.WIG 75 /r */
25539 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25540 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25541 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x8 );
25542 goto decode_success;
25543 }
sewardjcc3d2192013-03-27 11:37:33 +000025544 /* VPCMPEQW r/m, rV, r ::: r = rV `eq-by-16s` r/m */
25545 /* VPCMPEQW = VEX.NDS.256.66.0F.WIG 75 /r */
25546 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25547 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25548 uses_vvvv, vbi, pfx, delta, "vpcmpeqw", Iop_CmpEQ16x16 );
25549 goto decode_success;
25550 }
sewardj6fcd43e2012-06-14 08:51:35 +000025551 break;
25552
sewardjc4530ae2012-05-21 10:18:49 +000025553 case 0x76:
sewardj251b59e2012-05-25 13:51:07 +000025554 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
sewardjc4530ae2012-05-21 10:18:49 +000025555 /* VPCMPEQD = VEX.NDS.128.66.0F.WIG 76 /r */
25556 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25557 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
25558 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x4 );
25559 goto decode_success;
25560 }
sewardjcc3d2192013-03-27 11:37:33 +000025561 /* VPCMPEQD r/m, rV, r ::: r = rV `eq-by-32s` r/m */
25562 /* VPCMPEQD = VEX.NDS.256.66.0F.WIG 76 /r */
25563 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25564 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
25565 uses_vvvv, vbi, pfx, delta, "vpcmpeqd", Iop_CmpEQ32x8 );
25566 goto decode_success;
25567 }
sewardjc4530ae2012-05-21 10:18:49 +000025568 break;
25569
25570 case 0x77:
25571 /* VZEROUPPER = VEX.128.0F.WIG 77 */
25572 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25573 Int i;
25574 IRTemp zero128 = newTemp(Ity_V128);
25575 assign(zero128, mkV128(0));
25576 for (i = 0; i < 16; i++) {
25577 putYMMRegLane128(i, 1, mkexpr(zero128));
25578 }
25579 DIP("vzeroupper\n");
25580 goto decode_success;
25581 }
sewardj66becf32012-06-18 23:15:16 +000025582 /* VZEROALL = VEX.256.0F.WIG 77 */
25583 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25584 Int i;
25585 IRTemp zero128 = newTemp(Ity_V128);
25586 assign(zero128, mkV128(0));
25587 for (i = 0; i < 16; i++) {
25588 putYMMRegLoAndZU(i, mkexpr(zero128));
25589 }
25590 DIP("vzeroall\n");
25591 goto decode_success;
25592 }
sewardjc4530ae2012-05-21 10:18:49 +000025593 break;
25594
sewardjadf357c2012-06-24 13:44:17 +000025595 case 0x7C:
25596 case 0x7D:
25597 /* VHADDPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7C /r */
25598 /* VHSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG 7D /r */
25599 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25600 IRTemp sV = newTemp(Ity_V128);
25601 IRTemp dV = newTemp(Ity_V128);
25602 Bool isAdd = opc == 0x7C;
florian55085f82012-11-21 00:36:55 +000025603 const HChar* str = isAdd ? "add" : "sub";
sewardjadf357c2012-06-24 13:44:17 +000025604 UChar modrm = getUChar(delta);
25605 UInt rG = gregOfRexRM(pfx,modrm);
25606 UInt rV = getVexNvvvv(pfx);
25607 if (epartIsReg(modrm)) {
25608 UInt rE = eregOfRexRM(pfx,modrm);
25609 assign( sV, getXMMReg(rE) );
25610 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE),
25611 nameXMMReg(rV), nameXMMReg(rG));
25612 delta += 1;
25613 } else {
25614 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25615 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
25616 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
25617 nameXMMReg(rV), nameXMMReg(rG));
25618 delta += alen;
25619 }
25620 assign( dV, getXMMReg(rV) );
25621 putYMMRegLoAndZU( rG, mkexpr( math_HADDPS_128 ( dV, sV, isAdd ) ) );
25622 *uses_vvvv = True;
25623 goto decode_success;
25624 }
25625 /* VHADDPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7C /r */
25626 /* VHSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG 7D /r */
25627 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25628 IRTemp sV = newTemp(Ity_V256);
25629 IRTemp dV = newTemp(Ity_V256);
25630 IRTemp s1, s0, d1, d0;
25631 Bool isAdd = opc == 0x7C;
florian55085f82012-11-21 00:36:55 +000025632 const HChar* str = isAdd ? "add" : "sub";
sewardjadf357c2012-06-24 13:44:17 +000025633 UChar modrm = getUChar(delta);
25634 UInt rG = gregOfRexRM(pfx,modrm);
25635 UInt rV = getVexNvvvv(pfx);
25636 s1 = s0 = d1 = d0 = IRTemp_INVALID;
25637 if (epartIsReg(modrm)) {
25638 UInt rE = eregOfRexRM(pfx,modrm);
25639 assign( sV, getYMMReg(rE) );
25640 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE),
25641 nameYMMReg(rV), nameYMMReg(rG));
25642 delta += 1;
25643 } else {
25644 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25645 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
25646 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
25647 nameYMMReg(rV), nameYMMReg(rG));
25648 delta += alen;
25649 }
25650 assign( dV, getYMMReg(rV) );
25651 breakupV256toV128s( dV, &d1, &d0 );
25652 breakupV256toV128s( sV, &s1, &s0 );
25653 putYMMReg( rG, binop(Iop_V128HLtoV256,
25654 mkexpr( math_HADDPS_128 ( d1, s1, isAdd ) ),
25655 mkexpr( math_HADDPS_128 ( d0, s0, isAdd ) ) ) );
25656 *uses_vvvv = True;
25657 goto decode_success;
25658 }
25659 /* VHADDPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7C /r */
25660 /* VHSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG 7D /r */
25661 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25662 IRTemp sV = newTemp(Ity_V128);
25663 IRTemp dV = newTemp(Ity_V128);
25664 Bool isAdd = opc == 0x7C;
florian55085f82012-11-21 00:36:55 +000025665 const HChar* str = isAdd ? "add" : "sub";
sewardjadf357c2012-06-24 13:44:17 +000025666 UChar modrm = getUChar(delta);
25667 UInt rG = gregOfRexRM(pfx,modrm);
25668 UInt rV = getVexNvvvv(pfx);
25669 if (epartIsReg(modrm)) {
25670 UInt rE = eregOfRexRM(pfx,modrm);
25671 assign( sV, getXMMReg(rE) );
25672 DIP("vh%spd %s,%s,%s\n", str, nameXMMReg(rE),
25673 nameXMMReg(rV), nameXMMReg(rG));
25674 delta += 1;
25675 } else {
25676 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25677 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
25678 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
25679 nameXMMReg(rV), nameXMMReg(rG));
25680 delta += alen;
25681 }
25682 assign( dV, getXMMReg(rV) );
25683 putYMMRegLoAndZU( rG, mkexpr( math_HADDPD_128 ( dV, sV, isAdd ) ) );
25684 *uses_vvvv = True;
25685 goto decode_success;
25686 }
25687 /* VHADDPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7C /r */
25688 /* VHSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG 7D /r */
25689 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25690 IRTemp sV = newTemp(Ity_V256);
25691 IRTemp dV = newTemp(Ity_V256);
25692 IRTemp s1, s0, d1, d0;
25693 Bool isAdd = opc == 0x7C;
florian55085f82012-11-21 00:36:55 +000025694 const HChar* str = isAdd ? "add" : "sub";
sewardjadf357c2012-06-24 13:44:17 +000025695 UChar modrm = getUChar(delta);
25696 UInt rG = gregOfRexRM(pfx,modrm);
25697 UInt rV = getVexNvvvv(pfx);
25698 s1 = s0 = d1 = d0 = IRTemp_INVALID;
25699 if (epartIsReg(modrm)) {
25700 UInt rE = eregOfRexRM(pfx,modrm);
25701 assign( sV, getYMMReg(rE) );
25702 DIP("vh%spd %s,%s,%s\n", str, nameYMMReg(rE),
25703 nameYMMReg(rV), nameYMMReg(rG));
25704 delta += 1;
25705 } else {
25706 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25707 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
25708 DIP("vh%spd %s,%s,%s\n", str, dis_buf,
25709 nameYMMReg(rV), nameYMMReg(rG));
25710 delta += alen;
25711 }
25712 assign( dV, getYMMReg(rV) );
25713 breakupV256toV128s( dV, &d1, &d0 );
25714 breakupV256toV128s( sV, &s1, &s0 );
25715 putYMMReg( rG, binop(Iop_V128HLtoV256,
25716 mkexpr( math_HADDPD_128 ( d1, s1, isAdd ) ),
25717 mkexpr( math_HADDPD_128 ( d0, s0, isAdd ) ) ) );
25718 *uses_vvvv = True;
25719 goto decode_success;
25720 }
25721 break;
25722
sewardjc4530ae2012-05-21 10:18:49 +000025723 case 0x7E:
25724 /* Note the Intel docs don't make sense for this. I think they
25725 are wrong. They seem to imply it is a store when in fact I
25726 think it is a load. Also it's unclear whether this is W0, W1
25727 or WIG. */
sewardj6be43242012-05-22 23:12:13 +000025728 /* VMOVQ xmm2/m64, xmm1 = VEX.128.F3.0F.W0 7E /r */
sewardjc4530ae2012-05-21 10:18:49 +000025729 if (haveF3no66noF2(pfx)
25730 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
25731 vassert(sz == 4); /* even tho we are transferring 8, not 4. */
25732 UChar modrm = getUChar(delta);
25733 UInt rG = gregOfRexRM(pfx,modrm);
25734 if (epartIsReg(modrm)) {
25735 UInt rE = eregOfRexRM(pfx,modrm);
25736 putXMMRegLane64( rG, 0, getXMMRegLane64( rE, 0 ));
25737 DIP("vmovq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
25738 delta += 1;
25739 } else {
25740 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25741 putXMMRegLane64( rG, 0, loadLE(Ity_I64, mkexpr(addr)) );
25742 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG));
25743 delta += alen;
25744 }
25745 /* zero bits 255:64 */
25746 putXMMRegLane64( rG, 1, mkU64(0) );
25747 putYMMRegLane128( rG, 1, mkV128(0) );
25748 goto decode_success;
25749 }
sewardj6be43242012-05-22 23:12:13 +000025750 /* VMOVQ xmm1, r64 = VEX.128.66.0F.W1 7E /r (reg case only) */
25751 /* Moves from G to E, so is a store-form insn */
sewardj251b59e2012-05-25 13:51:07 +000025752 /* Intel docs list this in the VMOVD entry for some reason. */
sewardj6be43242012-05-22 23:12:13 +000025753 if (have66noF2noF3(pfx)
sewardj82096922012-06-24 14:57:59 +000025754 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
sewardj6be43242012-05-22 23:12:13 +000025755 UChar modrm = getUChar(delta);
25756 UInt rG = gregOfRexRM(pfx,modrm);
sewardj82096922012-06-24 14:57:59 +000025757 if (epartIsReg(modrm)) {
25758 UInt rE = eregOfRexRM(pfx,modrm);
25759 DIP("vmovq %s,%s\n", nameXMMReg(rG), nameIReg64(rE));
25760 putIReg64(rE, getXMMRegLane64(rG, 0));
25761 delta += 1;
25762 } else {
25763 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25764 storeLE( mkexpr(addr), getXMMRegLane64(rG, 0) );
25765 DIP("vmovq %s,%s\n", dis_buf, nameXMMReg(rG));
25766 delta += alen;
25767 }
sewardj6be43242012-05-22 23:12:13 +000025768 goto decode_success;
25769 }
sewardj6faf7cc2012-05-25 15:53:01 +000025770 /* VMOVD xmm1, m32/r32 = VEX.128.66.0F.W0 7E /r (reg case only) */
sewardj72df0682012-05-23 23:54:30 +000025771 /* Moves from G to E, so is a store-form insn */
25772 if (have66noF2noF3(pfx)
sewardj251b59e2012-05-25 13:51:07 +000025773 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
sewardj72df0682012-05-23 23:54:30 +000025774 UChar modrm = getUChar(delta);
25775 UInt rG = gregOfRexRM(pfx,modrm);
sewardj251b59e2012-05-25 13:51:07 +000025776 if (epartIsReg(modrm)) {
25777 UInt rE = eregOfRexRM(pfx,modrm);
25778 DIP("vmovd %s,%s\n", nameXMMReg(rG), nameIReg32(rE));
25779 putIReg32(rE, getXMMRegLane32(rG, 0));
25780 delta += 1;
25781 } else {
25782 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
25783 storeLE( mkexpr(addr), getXMMRegLane32(rG, 0) );
25784 DIP("vmovd %s,%s\n", dis_buf, nameXMMReg(rG));
25785 delta += alen;
25786 }
sewardj72df0682012-05-23 23:54:30 +000025787 goto decode_success;
25788 }
sewardjc4530ae2012-05-21 10:18:49 +000025789 break;
25790
25791 case 0x7F:
25792 /* VMOVDQA ymm1, ymm2/m256 = VEX.256.66.0F.WIG 7F */
sewardj66becf32012-06-18 23:15:16 +000025793 /* VMOVDQU ymm1, ymm2/m256 = VEX.256.F3.0F.WIG 7F */
25794 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
25795 && 1==getVexL(pfx)/*256*/) {
sewardjc4530ae2012-05-21 10:18:49 +000025796 UChar modrm = getUChar(delta);
25797 UInt rS = gregOfRexRM(pfx, modrm);
25798 IRTemp tS = newTemp(Ity_V256);
sewardj66becf32012-06-18 23:15:16 +000025799 Bool isA = have66noF2noF3(pfx);
florian5df8ab02012-10-13 19:34:19 +000025800 HChar ch = isA ? 'a' : 'u';
sewardjc4530ae2012-05-21 10:18:49 +000025801 assign(tS, getYMMReg(rS));
25802 if (epartIsReg(modrm)) {
25803 UInt rD = eregOfRexRM(pfx, modrm);
25804 delta += 1;
25805 putYMMReg(rD, mkexpr(tS));
sewardj66becf32012-06-18 23:15:16 +000025806 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), nameYMMReg(rD));
sewardjc4530ae2012-05-21 10:18:49 +000025807 } else {
25808 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25809 delta += alen;
sewardj66becf32012-06-18 23:15:16 +000025810 if (isA)
25811 gen_SEGV_if_not_32_aligned(addr);
sewardjc4530ae2012-05-21 10:18:49 +000025812 storeLE(mkexpr(addr), mkexpr(tS));
sewardj66becf32012-06-18 23:15:16 +000025813 DIP("vmovdq%c %s,%s\n", ch, nameYMMReg(rS), dis_buf);
sewardjc4530ae2012-05-21 10:18:49 +000025814 }
25815 goto decode_success;
25816 }
25817 /* VMOVDQA xmm1, xmm2/m128 = VEX.128.66.0F.WIG 7F */
25818 /* VMOVDQU xmm1, xmm2/m128 = VEX.128.F3.0F.WIG 7F */
25819 if ((have66noF2noF3(pfx) || haveF3no66noF2(pfx))
25820 && 0==getVexL(pfx)/*128*/) {
25821 UChar modrm = getUChar(delta);
25822 UInt rS = gregOfRexRM(pfx, modrm);
25823 IRTemp tS = newTemp(Ity_V128);
25824 Bool isA = have66noF2noF3(pfx);
florian5df8ab02012-10-13 19:34:19 +000025825 HChar ch = isA ? 'a' : 'u';
sewardjc4530ae2012-05-21 10:18:49 +000025826 assign(tS, getXMMReg(rS));
25827 if (epartIsReg(modrm)) {
25828 UInt rD = eregOfRexRM(pfx, modrm);
25829 delta += 1;
25830 putYMMRegLoAndZU(rD, mkexpr(tS));
25831 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), nameXMMReg(rD));
25832 } else {
25833 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
25834 delta += alen;
25835 if (isA)
25836 gen_SEGV_if_not_16_aligned(addr);
25837 storeLE(mkexpr(addr), mkexpr(tS));
25838 DIP("vmovdq%c %s,%s\n", ch, nameXMMReg(rS), dis_buf);
25839 }
25840 goto decode_success;
25841 }
25842 break;
25843
sewardjfe0c5e72012-06-15 15:48:07 +000025844 case 0xAE:
25845 /* VSTMXCSR m32 = VEX.LZ.0F.WIG AE /3 */
25846 if (haveNo66noF2noF3(pfx)
25847 && 0==getVexL(pfx)/*LZ*/
25848 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */
25849 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 3
25850 && sz == 4) {
25851 delta = dis_STMXCSR(vbi, pfx, delta, True/*isAvx*/);
25852 goto decode_success;
25853 }
25854 /* VLDMXCSR m32 = VEX.LZ.0F.WIG AE /2 */
25855 if (haveNo66noF2noF3(pfx)
25856 && 0==getVexL(pfx)/*LZ*/
25857 && 0==getRexW(pfx) /* be paranoid -- Intel docs don't require this */
25858 && !epartIsReg(getUChar(delta)) && gregLO3ofRM(getUChar(delta)) == 2
25859 && sz == 4) {
25860 delta = dis_LDMXCSR(vbi, pfx, delta, True/*isAvx*/);
25861 goto decode_success;
25862 }
25863 break;
25864
sewardjc4530ae2012-05-21 10:18:49 +000025865 case 0xC2:
25866 /* VCMPSD xmm3/m64(E=argL), xmm2(V=argR), xmm1(G) */
25867 /* = VEX.NDS.LIG.F2.0F.WIG C2 /r ib */
25868 if (haveF2no66noF3(pfx)) {
25869 Long delta0 = delta;
25870 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
25871 "vcmpsd", False/*!all_lanes*/,
25872 8/*sz*/);
25873 if (delta > delta0) goto decode_success;
25874 /* else fall through -- decoding has failed */
25875 }
25876 /* VCMPSS xmm3/m32(E=argL), xmm2(V=argR), xmm1(G) */
25877 /* = VEX.NDS.LIG.F3.0F.WIG C2 /r ib */
25878 if (haveF3no66noF2(pfx)) {
25879 Long delta0 = delta;
25880 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
25881 "vcmpss", False/*!all_lanes*/,
25882 4/*sz*/);
25883 if (delta > delta0) goto decode_success;
25884 /* else fall through -- decoding has failed */
25885 }
sewardj89378162012-06-24 12:12:20 +000025886 /* VCMPPD xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
sewardj4b1cc832012-06-13 11:10:20 +000025887 /* = VEX.NDS.128.66.0F.WIG C2 /r ib */
25888 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25889 Long delta0 = delta;
25890 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
25891 "vcmppd", True/*all_lanes*/,
25892 8/*sz*/);
25893 if (delta > delta0) goto decode_success;
25894 /* else fall through -- decoding has failed */
25895 }
sewardj89378162012-06-24 12:12:20 +000025896 /* VCMPPD ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
25897 /* = VEX.NDS.256.66.0F.WIG C2 /r ib */
25898 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25899 Long delta0 = delta;
25900 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
25901 "vcmppd", 8/*sz*/);
25902 if (delta > delta0) goto decode_success;
25903 /* else fall through -- decoding has failed */
25904 }
25905 /* VCMPPS xmm3/m128(E=argL), xmm2(V=argR), xmm1(G) */
25906 /* = VEX.NDS.128.0F.WIG C2 /r ib */
25907 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25908 Long delta0 = delta;
25909 delta = dis_AVX128_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
25910 "vcmpps", True/*all_lanes*/,
25911 4/*sz*/);
25912 if (delta > delta0) goto decode_success;
25913 /* else fall through -- decoding has failed */
25914 }
25915 /* VCMPPS ymm3/m256(E=argL), ymm2(V=argR), ymm1(G) */
25916 /* = VEX.NDS.256.0F.WIG C2 /r ib */
25917 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
25918 Long delta0 = delta;
25919 delta = dis_AVX256_cmp_V_E_to_G( uses_vvvv, vbi, pfx, delta,
25920 "vcmpps", 4/*sz*/);
25921 if (delta > delta0) goto decode_success;
25922 /* else fall through -- decoding has failed */
25923 }
sewardjc4530ae2012-05-21 10:18:49 +000025924 break;
25925
sewardj21459cb2012-06-18 14:05:52 +000025926 case 0xC4:
25927 /* VPINSRW r32/m16, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG C4 /r ib */
25928 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25929 UChar modrm = getUChar(delta);
25930 UInt rG = gregOfRexRM(pfx, modrm);
25931 UInt rV = getVexNvvvv(pfx);
25932 Int imm8;
sewardj4ed05e02012-06-18 15:01:30 +000025933 IRTemp new16 = newTemp(Ity_I16);
sewardj21459cb2012-06-18 14:05:52 +000025934
25935 if ( epartIsReg( modrm ) ) {
25936 imm8 = (Int)(getUChar(delta+1) & 7);
sewardj4ed05e02012-06-18 15:01:30 +000025937 assign( new16, unop(Iop_32to16,
25938 getIReg32(eregOfRexRM(pfx,modrm))) );
sewardj21459cb2012-06-18 14:05:52 +000025939 delta += 1+1;
25940 DIP( "vpinsrw $%d,%s,%s\n", imm8,
25941 nameIReg32( eregOfRexRM(pfx, modrm) ), nameXMMReg(rG) );
25942 } else {
25943 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
25944 imm8 = (Int)(getUChar(delta+alen) & 7);
sewardj4ed05e02012-06-18 15:01:30 +000025945 assign( new16, loadLE( Ity_I16, mkexpr(addr) ));
sewardj21459cb2012-06-18 14:05:52 +000025946 delta += alen+1;
25947 DIP( "vpinsrw $%d,%s,%s\n",
25948 imm8, dis_buf, nameXMMReg(rG) );
25949 }
25950
sewardj4ed05e02012-06-18 15:01:30 +000025951 IRTemp src_vec = newTemp(Ity_V128);
25952 assign(src_vec, getXMMReg( rV ));
25953 IRTemp res_vec = math_PINSRW_128( src_vec, new16, imm8 );
25954 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
sewardj21459cb2012-06-18 14:05:52 +000025955 *uses_vvvv = True;
25956 goto decode_success;
25957 }
sewardj4ed05e02012-06-18 15:01:30 +000025958 break;
sewardj21459cb2012-06-18 14:05:52 +000025959
sewardje8a7eb72012-06-12 14:59:17 +000025960 case 0xC5:
25961 /* VPEXTRW imm8, xmm1, reg32 = VEX.128.66.0F.W0 C5 /r ib */
25962 if (have66noF2noF3(pfx)
25963 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
25964 Long delta0 = delta;
25965 delta = dis_PEXTRW_128_EregOnly_toG( vbi, pfx, delta,
25966 True/*isAvx*/ );
25967 if (delta > delta0) goto decode_success;
25968 /* else fall through -- decoding has failed */
25969 }
25970 break;
25971
sewardj251b59e2012-05-25 13:51:07 +000025972 case 0xC6:
25973 /* VSHUFPS imm8, xmm3/m128, xmm2, xmm1, xmm2 */
25974 /* = VEX.NDS.128.0F.WIG C6 /r ib */
25975 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
25976 Int imm8 = 0;
25977 IRTemp eV = newTemp(Ity_V128);
25978 IRTemp vV = newTemp(Ity_V128);
25979 UInt modrm = getUChar(delta);
25980 UInt rG = gregOfRexRM(pfx,modrm);
25981 UInt rV = getVexNvvvv(pfx);
25982 assign( vV, getXMMReg(rV) );
25983 if (epartIsReg(modrm)) {
25984 UInt rE = eregOfRexRM(pfx,modrm);
25985 assign( eV, getXMMReg(rE) );
25986 imm8 = (Int)getUChar(delta+1);
25987 delta += 1+1;
25988 DIP("vshufps $%d,%s,%s,%s\n",
25989 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
25990 } else {
25991 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
25992 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
25993 imm8 = (Int)getUChar(delta+alen);
25994 delta += 1+alen;
25995 DIP("vshufps $%d,%s,%s,%s\n",
25996 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
25997 }
sewardj4b1cc832012-06-13 11:10:20 +000025998 IRTemp res = math_SHUFPS_128( eV, vV, imm8 );
25999 putYMMRegLoAndZU( rG, mkexpr(res) );
26000 *uses_vvvv = True;
26001 goto decode_success;
26002 }
26003 /* VSHUFPS imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26004 /* = VEX.NDS.256.0F.WIG C6 /r ib */
26005 if (haveNo66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26006 Int imm8 = 0;
26007 IRTemp eV = newTemp(Ity_V256);
26008 IRTemp vV = newTemp(Ity_V256);
26009 UInt modrm = getUChar(delta);
26010 UInt rG = gregOfRexRM(pfx,modrm);
26011 UInt rV = getVexNvvvv(pfx);
26012 assign( vV, getYMMReg(rV) );
26013 if (epartIsReg(modrm)) {
26014 UInt rE = eregOfRexRM(pfx,modrm);
26015 assign( eV, getYMMReg(rE) );
26016 imm8 = (Int)getUChar(delta+1);
26017 delta += 1+1;
26018 DIP("vshufps $%d,%s,%s,%s\n",
26019 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
26020 } else {
26021 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26022 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
26023 imm8 = (Int)getUChar(delta+alen);
26024 delta += 1+alen;
26025 DIP("vshufps $%d,%s,%s,%s\n",
26026 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
26027 }
26028 IRTemp res = math_SHUFPS_256( eV, vV, imm8 );
26029 putYMMReg( rG, mkexpr(res) );
sewardj251b59e2012-05-25 13:51:07 +000026030 *uses_vvvv = True;
26031 goto decode_success;
26032 }
sewardj21459cb2012-06-18 14:05:52 +000026033 /* VSHUFPD imm8, xmm3/m128, xmm2, xmm1, xmm2 */
26034 /* = VEX.NDS.128.66.0F.WIG C6 /r ib */
26035 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26036 Int imm8 = 0;
26037 IRTemp eV = newTemp(Ity_V128);
26038 IRTemp vV = newTemp(Ity_V128);
26039 UInt modrm = getUChar(delta);
26040 UInt rG = gregOfRexRM(pfx,modrm);
26041 UInt rV = getVexNvvvv(pfx);
26042 assign( vV, getXMMReg(rV) );
26043 if (epartIsReg(modrm)) {
26044 UInt rE = eregOfRexRM(pfx,modrm);
26045 assign( eV, getXMMReg(rE) );
sewardj47933bc2012-06-18 22:09:33 +000026046 imm8 = (Int)getUChar(delta+1);
sewardj21459cb2012-06-18 14:05:52 +000026047 delta += 1+1;
26048 DIP("vshufpd $%d,%s,%s,%s\n",
26049 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
26050 } else {
26051 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26052 assign( eV, loadLE(Ity_V128, mkexpr(addr)) );
sewardj47933bc2012-06-18 22:09:33 +000026053 imm8 = (Int)getUChar(delta+alen);
sewardj21459cb2012-06-18 14:05:52 +000026054 delta += 1+alen;
26055 DIP("vshufpd $%d,%s,%s,%s\n",
26056 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
26057 }
26058 IRTemp res = math_SHUFPD_128( eV, vV, imm8 );
26059 putYMMRegLoAndZU( rG, mkexpr(res) );
26060 *uses_vvvv = True;
26061 goto decode_success;
26062 }
26063 /* VSHUFPD imm8, ymm3/m256, ymm2, ymm1, ymm2 */
26064 /* = VEX.NDS.256.66.0F.WIG C6 /r ib */
26065 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26066 Int imm8 = 0;
26067 IRTemp eV = newTemp(Ity_V256);
26068 IRTemp vV = newTemp(Ity_V256);
26069 UInt modrm = getUChar(delta);
26070 UInt rG = gregOfRexRM(pfx,modrm);
26071 UInt rV = getVexNvvvv(pfx);
26072 assign( vV, getYMMReg(rV) );
26073 if (epartIsReg(modrm)) {
26074 UInt rE = eregOfRexRM(pfx,modrm);
26075 assign( eV, getYMMReg(rE) );
sewardj47933bc2012-06-18 22:09:33 +000026076 imm8 = (Int)getUChar(delta+1);
sewardj21459cb2012-06-18 14:05:52 +000026077 delta += 1+1;
26078 DIP("vshufpd $%d,%s,%s,%s\n",
26079 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
26080 } else {
26081 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 1 );
26082 assign( eV, loadLE(Ity_V256, mkexpr(addr)) );
sewardj47933bc2012-06-18 22:09:33 +000026083 imm8 = (Int)getUChar(delta+alen);
sewardj21459cb2012-06-18 14:05:52 +000026084 delta += 1+alen;
26085 DIP("vshufpd $%d,%s,%s,%s\n",
26086 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
26087 }
26088 IRTemp res = math_SHUFPD_256( eV, vV, imm8 );
26089 putYMMReg( rG, mkexpr(res) );
26090 *uses_vvvv = True;
26091 goto decode_success;
26092 }
sewardj251b59e2012-05-25 13:51:07 +000026093 break;
26094
sewardj89378162012-06-24 12:12:20 +000026095 case 0xD0:
26096 /* VADDSUBPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D0 /r */
26097 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26098 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26099 uses_vvvv, vbi, pfx, delta,
26100 "vaddsubpd", math_ADDSUBPD_128 );
26101 goto decode_success;
26102 }
26103 /* VADDSUBPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D0 /r */
26104 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26105 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26106 uses_vvvv, vbi, pfx, delta,
26107 "vaddsubpd", math_ADDSUBPD_256 );
26108 goto decode_success;
26109 }
26110 /* VADDSUBPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.F2.0F.WIG D0 /r */
26111 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26112 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26113 uses_vvvv, vbi, pfx, delta,
26114 "vaddsubps", math_ADDSUBPS_128 );
26115 goto decode_success;
26116 }
26117 /* VADDSUBPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.F2.0F.WIG D0 /r */
26118 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26119 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26120 uses_vvvv, vbi, pfx, delta,
26121 "vaddsubps", math_ADDSUBPS_256 );
26122 goto decode_success;
26123 }
26124 break;
26125
sewardj4c0a7ac2012-06-21 09:08:19 +000026126 case 0xD1:
26127 /* VPSRLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D1 /r */
26128 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26129 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26130 "vpsrlw", Iop_ShrN16x8 );
26131 *uses_vvvv = True;
26132 goto decode_success;
26133
26134 }
sewardjcc3d2192013-03-27 11:37:33 +000026135 /* VPSRLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D1 /r */
26136 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26137 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26138 "vpsrlw", Iop_ShrN16x16 );
26139 *uses_vvvv = True;
26140 goto decode_success;
26141
26142 }
sewardj4c0a7ac2012-06-21 09:08:19 +000026143 break;
26144
26145 case 0xD2:
26146 /* VPSRLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D2 /r */
26147 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26148 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26149 "vpsrld", Iop_ShrN32x4 );
26150 *uses_vvvv = True;
26151 goto decode_success;
26152 }
sewardjcc3d2192013-03-27 11:37:33 +000026153 /* VPSRLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D2 /r */
26154 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26155 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26156 "vpsrld", Iop_ShrN32x8 );
26157 *uses_vvvv = True;
26158 goto decode_success;
26159 }
sewardj4c0a7ac2012-06-21 09:08:19 +000026160 break;
26161
26162 case 0xD3:
26163 /* VPSRLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D3 /r */
26164 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26165 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26166 "vpsrlq", Iop_ShrN64x2 );
26167 *uses_vvvv = True;
26168 goto decode_success;
26169 }
sewardjcc3d2192013-03-27 11:37:33 +000026170 /* VPSRLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D3 /r */
26171 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26172 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26173 "vpsrlq", Iop_ShrN64x4 );
26174 *uses_vvvv = True;
26175 goto decode_success;
26176 }
sewardj4c0a7ac2012-06-21 09:08:19 +000026177 break;
26178
sewardj98d02cc2012-06-02 11:55:25 +000026179 case 0xD4:
26180 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26181 /* VPADDQ = VEX.NDS.128.66.0F.WIG D4 /r */
26182 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26183 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26184 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x2 );
26185 goto decode_success;
26186 }
sewardjcc3d2192013-03-27 11:37:33 +000026187 /* VPADDQ r/m, rV, r ::: r = rV + r/m */
26188 /* VPADDQ = VEX.NDS.256.66.0F.WIG D4 /r */
26189 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26190 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26191 uses_vvvv, vbi, pfx, delta, "vpaddq", Iop_Add64x4 );
26192 goto decode_success;
26193 }
sewardj98d02cc2012-06-02 11:55:25 +000026194 break;
26195
sewardj251b59e2012-05-25 13:51:07 +000026196 case 0xD5:
26197 /* VPMULLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D5 /r */
26198 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26199 delta = dis_AVX128_E_V_to_G(
26200 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x8 );
26201 goto decode_success;
26202 }
sewardjcc3d2192013-03-27 11:37:33 +000026203 /* VPMULLW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D5 /r */
26204 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26205 delta = dis_AVX256_E_V_to_G(
26206 uses_vvvv, vbi, pfx, delta, "vpmullw", Iop_Mul16x16 );
26207 goto decode_success;
26208 }
sewardj251b59e2012-05-25 13:51:07 +000026209 break;
26210
sewardjc4530ae2012-05-21 10:18:49 +000026211 case 0xD6:
26212 /* I can't even find any Intel docs for this one. */
26213 /* Basically: 66 0F D6 = MOVQ -- move 64 bits from G (lo half
26214 xmm) to E (mem or lo half xmm). Looks like L==0(128), W==0
26215 (WIG, maybe?) */
sewardj6eaf00c2012-05-23 11:33:56 +000026216 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
sewardjc4530ae2012-05-21 10:18:49 +000026217 && 0==getRexW(pfx)/*this might be redundant, dunno*/) {
26218 UChar modrm = getUChar(delta);
26219 UInt rG = gregOfRexRM(pfx,modrm);
26220 if (epartIsReg(modrm)) {
26221 /* fall through, awaiting test case */
26222 /* dst: lo half copied, hi half zeroed */
26223 } else {
26224 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26225 storeLE( mkexpr(addr), getXMMRegLane64( rG, 0 ));
26226 DIP("vmovq %s,%s\n", nameXMMReg(rG), dis_buf );
26227 delta += alen;
26228 goto decode_success;
26229 }
26230 }
26231 break;
26232
sewardj8ef22422012-05-24 16:29:18 +000026233 case 0xD7:
26234 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB xmm1, r32 */
26235 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26236 delta = dis_PMOVMSKB_128( vbi, pfx, delta, True/*isAvx*/ );
26237 goto decode_success;
26238 }
sewardjcc3d2192013-03-27 11:37:33 +000026239 /* VEX.128.66.0F.WIG D7 /r = VPMOVMSKB ymm1, r32 */
26240 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26241 delta = dis_PMOVMSKB_256( vbi, pfx, delta );
26242 goto decode_success;
26243 }
sewardj8ef22422012-05-24 16:29:18 +000026244 break;
26245
sewardj251b59e2012-05-25 13:51:07 +000026246 case 0xD8:
26247 /* VPSUBUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D8 /r */
26248 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26249 delta = dis_AVX128_E_V_to_G(
26250 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux16 );
26251 goto decode_success;
26252 }
sewardjcc3d2192013-03-27 11:37:33 +000026253 /* VPSUBUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D8 /r */
26254 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26255 delta = dis_AVX256_E_V_to_G(
26256 uses_vvvv, vbi, pfx, delta, "vpsubusb", Iop_QSub8Ux32 );
26257 goto decode_success;
26258 }
26259 break;
sewardj251b59e2012-05-25 13:51:07 +000026260
sewardj6fcd43e2012-06-14 08:51:35 +000026261 case 0xD9:
26262 /* VPSUBUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG D9 /r */
26263 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26264 delta = dis_AVX128_E_V_to_G(
26265 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux8 );
26266 goto decode_success;
26267 }
sewardjcc3d2192013-03-27 11:37:33 +000026268 /* VPSUBUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG D9 /r */
26269 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26270 delta = dis_AVX256_E_V_to_G(
26271 uses_vvvv, vbi, pfx, delta, "vpsubusw", Iop_QSub16Ux16 );
26272 goto decode_success;
26273 }
sewardj6fcd43e2012-06-14 08:51:35 +000026274 break;
26275
sewardje8a7eb72012-06-12 14:59:17 +000026276 case 0xDA:
26277 /* VPMINUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DA /r */
26278 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26279 delta = dis_AVX128_E_V_to_G(
26280 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux16 );
26281 goto decode_success;
26282 }
sewardjcc3d2192013-03-27 11:37:33 +000026283 /* VPMINUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DA /r */
26284 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26285 delta = dis_AVX256_E_V_to_G(
26286 uses_vvvv, vbi, pfx, delta, "vpminub", Iop_Min8Ux32 );
26287 goto decode_success;
26288 }
sewardje8a7eb72012-06-12 14:59:17 +000026289 break;
26290
sewardj8ef22422012-05-24 16:29:18 +000026291 case 0xDB:
sewardj251b59e2012-05-25 13:51:07 +000026292 /* VPAND r/m, rV, r ::: r = rV & r/m */
sewardj8ef22422012-05-24 16:29:18 +000026293 /* VEX.NDS.128.66.0F.WIG DB /r = VPAND xmm3/m128, xmm2, xmm1 */
26294 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26295 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26296 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV128 );
26297 goto decode_success;
26298 }
sewardjcc3d2192013-03-27 11:37:33 +000026299 /* VPAND r/m, rV, r ::: r = rV & r/m */
26300 /* VEX.NDS.256.66.0F.WIG DB /r = VPAND ymm3/m256, ymm2, ymm1 */
26301 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26302 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26303 uses_vvvv, vbi, pfx, delta, "vpand", Iop_AndV256 );
26304 goto decode_success;
26305 }
sewardj8ef22422012-05-24 16:29:18 +000026306 break;
26307
sewardj251b59e2012-05-25 13:51:07 +000026308 case 0xDC:
26309 /* VPADDUSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DC /r */
26310 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26311 delta = dis_AVX128_E_V_to_G(
26312 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux16 );
26313 goto decode_success;
26314 }
sewardjcc3d2192013-03-27 11:37:33 +000026315 /* VPADDUSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DC /r */
26316 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26317 delta = dis_AVX256_E_V_to_G(
26318 uses_vvvv, vbi, pfx, delta, "vpaddusb", Iop_QAdd8Ux32 );
26319 goto decode_success;
26320 }
sewardj251b59e2012-05-25 13:51:07 +000026321 break;
26322
26323 case 0xDD:
26324 /* VPADDUSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DD /r */
26325 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26326 delta = dis_AVX128_E_V_to_G(
26327 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux8 );
26328 goto decode_success;
26329 }
sewardjcc3d2192013-03-27 11:37:33 +000026330 /* VPADDUSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DD /r */
26331 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26332 delta = dis_AVX256_E_V_to_G(
26333 uses_vvvv, vbi, pfx, delta, "vpaddusw", Iop_QAdd16Ux16 );
26334 goto decode_success;
26335 }
sewardj251b59e2012-05-25 13:51:07 +000026336 break;
26337
sewardje8a7eb72012-06-12 14:59:17 +000026338 case 0xDE:
26339 /* VPMAXUB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG DE /r */
26340 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26341 delta = dis_AVX128_E_V_to_G(
26342 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux16 );
26343 goto decode_success;
26344 }
sewardjcc3d2192013-03-27 11:37:33 +000026345 /* VPMAXUB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG DE /r */
26346 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26347 delta = dis_AVX256_E_V_to_G(
26348 uses_vvvv, vbi, pfx, delta, "vpmaxub", Iop_Max8Ux32 );
26349 goto decode_success;
26350 }
sewardje8a7eb72012-06-12 14:59:17 +000026351 break;
26352
26353 case 0xDF:
26354 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
26355 /* VEX.NDS.128.66.0F.WIG DF /r = VPANDN xmm3/m128, xmm2, xmm1 */
26356 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26357 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
26358 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV128,
26359 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
26360 goto decode_success;
26361 }
sewardjcc3d2192013-03-27 11:37:33 +000026362 /* VPANDN r/m, rV, r ::: r = rV & ~r/m (is that correct, re the ~ ?) */
26363 /* VEX.NDS.256.66.0F.WIG DF /r = VPANDN ymm3/m256, ymm2, ymm1 */
26364 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26365 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG(
26366 uses_vvvv, vbi, pfx, delta, "vpandn", Iop_AndV256,
26367 NULL, True/*invertLeftArg*/, False/*swapArgs*/ );
26368 goto decode_success;
26369 }
sewardje8a7eb72012-06-12 14:59:17 +000026370 break;
26371
sewardj8516a1f2012-06-24 14:26:30 +000026372 case 0xE0:
26373 /* VPAVGB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E0 /r */
26374 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26375 delta = dis_AVX128_E_V_to_G(
26376 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux16 );
26377 goto decode_success;
26378 }
sewardjcc3d2192013-03-27 11:37:33 +000026379 /* VPAVGB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E0 /r */
26380 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26381 delta = dis_AVX256_E_V_to_G(
26382 uses_vvvv, vbi, pfx, delta, "vpavgb", Iop_Avg8Ux32 );
26383 goto decode_success;
26384 }
sewardj8516a1f2012-06-24 14:26:30 +000026385 break;
26386
sewardj4c0a7ac2012-06-21 09:08:19 +000026387 case 0xE1:
26388 /* VPSRAW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E1 /r */
26389 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26390 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26391 "vpsraw", Iop_SarN16x8 );
26392 *uses_vvvv = True;
26393 goto decode_success;
26394 }
sewardjcc3d2192013-03-27 11:37:33 +000026395 /* VPSRAW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E1 /r */
26396 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26397 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26398 "vpsraw", Iop_SarN16x16 );
26399 *uses_vvvv = True;
26400 goto decode_success;
26401 }
sewardj4c0a7ac2012-06-21 09:08:19 +000026402 break;
26403
26404 case 0xE2:
26405 /* VPSRAD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E2 /r */
26406 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26407 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26408 "vpsrad", Iop_SarN32x4 );
26409 *uses_vvvv = True;
26410 goto decode_success;
26411 }
sewardjcc3d2192013-03-27 11:37:33 +000026412 /* VPSRAD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E2 /r */
26413 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26414 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26415 "vpsrad", Iop_SarN32x8 );
26416 *uses_vvvv = True;
26417 goto decode_success;
26418 }
sewardj4c0a7ac2012-06-21 09:08:19 +000026419 break;
26420
sewardj8516a1f2012-06-24 14:26:30 +000026421 case 0xE3:
26422 /* VPAVGW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E3 /r */
26423 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26424 delta = dis_AVX128_E_V_to_G(
26425 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux8 );
26426 goto decode_success;
26427 }
sewardjcc3d2192013-03-27 11:37:33 +000026428 /* VPAVGW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E3 /r */
26429 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26430 delta = dis_AVX256_E_V_to_G(
26431 uses_vvvv, vbi, pfx, delta, "vpavgw", Iop_Avg16Ux16 );
26432 goto decode_success;
26433 }
sewardj8516a1f2012-06-24 14:26:30 +000026434 break;
26435
sewardj251b59e2012-05-25 13:51:07 +000026436 case 0xE4:
26437 /* VPMULHUW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E4 /r */
26438 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26439 delta = dis_AVX128_E_V_to_G(
26440 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux8 );
26441 goto decode_success;
26442 }
sewardjcc3d2192013-03-27 11:37:33 +000026443 /* VPMULHUW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E4 /r */
26444 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26445 delta = dis_AVX256_E_V_to_G(
26446 uses_vvvv, vbi, pfx, delta, "vpmulhuw", Iop_MulHi16Ux16 );
26447 goto decode_success;
26448 }
sewardj251b59e2012-05-25 13:51:07 +000026449 break;
26450
sewardjfe0c5e72012-06-15 15:48:07 +000026451 case 0xE5:
26452 /* VPMULHW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E5 /r */
26453 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26454 delta = dis_AVX128_E_V_to_G(
26455 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx8 );
26456 goto decode_success;
26457 }
sewardjcc3d2192013-03-27 11:37:33 +000026458 /* VPMULHW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E5 /r */
26459 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26460 delta = dis_AVX256_E_V_to_G(
26461 uses_vvvv, vbi, pfx, delta, "vpmulhw", Iop_MulHi16Sx16 );
26462 goto decode_success;
26463 }
sewardjfe0c5e72012-06-15 15:48:07 +000026464 break;
26465
sewardj4b1cc832012-06-13 11:10:20 +000026466 case 0xE6:
26467 /* VCVTDQ2PD xmm2/m64, xmm1 = VEX.128.F3.0F.WIG E6 /r */
26468 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*128*/) {
26469 delta = dis_CVTDQ2PD_128(vbi, pfx, delta, True/*isAvx*/);
26470 goto decode_success;
26471 }
sewardj6fcd43e2012-06-14 08:51:35 +000026472 /* VCVTDQ2PD xmm2/m128, ymm1 = VEX.256.F3.0F.WIG E6 /r */
26473 if (haveF3no66noF2(pfx) && 1==getVexL(pfx)/*256*/) {
26474 delta = dis_CVTDQ2PD_256(vbi, pfx, delta);
26475 goto decode_success;
26476 }
sewardj66becf32012-06-18 23:15:16 +000026477 /* VCVTTPD2DQ xmm2/m128, xmm1 = VEX.128.66.0F.WIG E6 /r */
26478 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26479 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/,
26480 True/*r2zero*/);
26481 goto decode_success;
26482 }
26483 /* VCVTTPD2DQ ymm2/m256, xmm1 = VEX.256.66.0F.WIG E6 /r */
26484 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26485 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, True/*r2zero*/);
26486 goto decode_success;
26487 }
26488 /* VCVTPD2DQ xmm2/m128, xmm1 = VEX.128.F2.0F.WIG E6 /r */
26489 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26490 delta = dis_CVTxPD2DQ_128(vbi, pfx, delta, True/*isAvx*/,
26491 False/*!r2zero*/);
26492 goto decode_success;
26493 }
26494 /* VCVTPD2DQ ymm2/m256, xmm1 = VEX.256.F2.0F.WIG E6 /r */
26495 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26496 delta = dis_CVTxPD2DQ_256(vbi, pfx, delta, False/*!r2zero*/);
26497 goto decode_success;
26498 }
sewardj4b1cc832012-06-13 11:10:20 +000026499 break;
26500
sewardj6eaf00c2012-05-23 11:33:56 +000026501 case 0xE7:
sewardj8eb7ae82012-06-24 14:00:27 +000026502 /* VMOVNTDQ xmm1, m128 = VEX.128.66.0F.WIG E7 /r */
sewardj6eaf00c2012-05-23 11:33:56 +000026503 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26504 UChar modrm = getUChar(delta);
26505 UInt rG = gregOfRexRM(pfx,modrm);
26506 if (!epartIsReg(modrm)) {
26507 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26508 gen_SEGV_if_not_16_aligned( addr );
26509 storeLE( mkexpr(addr), getXMMReg(rG) );
26510 DIP("vmovntdq %s,%s\n", dis_buf, nameXMMReg(rG));
26511 delta += alen;
26512 goto decode_success;
26513 }
26514 /* else fall through */
26515 }
sewardj8eb7ae82012-06-24 14:00:27 +000026516 /* VMOVNTDQ ymm1, m256 = VEX.256.66.0F.WIG E7 /r */
26517 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26518 UChar modrm = getUChar(delta);
26519 UInt rG = gregOfRexRM(pfx,modrm);
26520 if (!epartIsReg(modrm)) {
26521 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
26522 gen_SEGV_if_not_32_aligned( addr );
26523 storeLE( mkexpr(addr), getYMMReg(rG) );
26524 DIP("vmovntdq %s,%s\n", dis_buf, nameYMMReg(rG));
26525 delta += alen;
26526 goto decode_success;
26527 }
26528 /* else fall through */
26529 }
sewardj6eaf00c2012-05-23 11:33:56 +000026530 break;
26531
sewardj4f228902012-06-21 09:17:58 +000026532 case 0xE8:
26533 /* VPSUBSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E8 /r */
26534 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26535 delta = dis_AVX128_E_V_to_G(
26536 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx16 );
26537 goto decode_success;
26538 }
sewardjcc3d2192013-03-27 11:37:33 +000026539 /* VPSUBSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E8 /r */
26540 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26541 delta = dis_AVX256_E_V_to_G(
26542 uses_vvvv, vbi, pfx, delta, "vpsubsb", Iop_QSub8Sx32 );
26543 goto decode_success;
26544 }
sewardj8516a1f2012-06-24 14:26:30 +000026545 break;
sewardj4f228902012-06-21 09:17:58 +000026546
26547 case 0xE9:
26548 /* VPSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG E9 /r */
26549 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26550 delta = dis_AVX128_E_V_to_G(
26551 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx8 );
26552 goto decode_success;
26553 }
sewardjcc3d2192013-03-27 11:37:33 +000026554 /* VPSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG E9 /r */
26555 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26556 delta = dis_AVX256_E_V_to_G(
26557 uses_vvvv, vbi, pfx, delta, "vpsubsw", Iop_QSub16Sx16 );
26558 goto decode_success;
26559 }
sewardj4f228902012-06-21 09:17:58 +000026560 break;
26561
sewardje8a7eb72012-06-12 14:59:17 +000026562 case 0xEA:
26563 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
26564 /* VPMINSW = VEX.NDS.128.66.0F.WIG EA /r */
26565 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26566 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26567 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx8 );
26568 goto decode_success;
26569 }
sewardjcc3d2192013-03-27 11:37:33 +000026570 /* VPMINSW r/m, rV, r ::: r = min-signed16s(rV, r/m) */
26571 /* VPMINSW = VEX.NDS.256.66.0F.WIG EA /r */
26572 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26573 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26574 uses_vvvv, vbi, pfx, delta, "vpminsw", Iop_Min16Sx16 );
26575 goto decode_success;
26576 }
sewardje8a7eb72012-06-12 14:59:17 +000026577 break;
26578
sewardjc4530ae2012-05-21 10:18:49 +000026579 case 0xEB:
sewardj251b59e2012-05-25 13:51:07 +000026580 /* VPOR r/m, rV, r ::: r = rV | r/m */
sewardjc4530ae2012-05-21 10:18:49 +000026581 /* VPOR = VEX.NDS.128.66.0F.WIG EB /r */
26582 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26583 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26584 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV128 );
26585 goto decode_success;
26586 }
sewardjcc3d2192013-03-27 11:37:33 +000026587 /* VPOR r/m, rV, r ::: r = rV | r/m */
26588 /* VPOR = VEX.NDS.256.66.0F.WIG EB /r */
26589 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26590 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26591 uses_vvvv, vbi, pfx, delta, "vpor", Iop_OrV256 );
26592 goto decode_success;
26593 }
sewardjc4530ae2012-05-21 10:18:49 +000026594 break;
26595
sewardj8516a1f2012-06-24 14:26:30 +000026596 case 0xEC:
26597 /* VPADDSB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG EC /r */
26598 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26599 delta = dis_AVX128_E_V_to_G(
26600 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx16 );
26601 goto decode_success;
26602 }
sewardjcc3d2192013-03-27 11:37:33 +000026603 /* VPADDSB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG EC /r */
26604 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26605 delta = dis_AVX256_E_V_to_G(
26606 uses_vvvv, vbi, pfx, delta, "vpaddsb", Iop_QAdd8Sx32 );
26607 goto decode_success;
26608 }
sewardj8516a1f2012-06-24 14:26:30 +000026609 break;
26610
26611 case 0xED:
26612 /* VPADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG ED /r */
26613 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26614 delta = dis_AVX128_E_V_to_G(
26615 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx8 );
26616 goto decode_success;
26617 }
sewardjcc3d2192013-03-27 11:37:33 +000026618 /* VPADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG ED /r */
26619 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26620 delta = dis_AVX256_E_V_to_G(
26621 uses_vvvv, vbi, pfx, delta, "vpaddsw", Iop_QAdd16Sx16 );
26622 goto decode_success;
26623 }
sewardj8516a1f2012-06-24 14:26:30 +000026624 break;
26625
sewardje8a7eb72012-06-12 14:59:17 +000026626 case 0xEE:
26627 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
26628 /* VPMAXSW = VEX.NDS.128.66.0F.WIG EE /r */
26629 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26630 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26631 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx8 );
26632 goto decode_success;
26633 }
sewardjcc3d2192013-03-27 11:37:33 +000026634 /* VPMAXSW r/m, rV, r ::: r = max-signed16s(rV, r/m) */
26635 /* VPMAXSW = VEX.NDS.256.66.0F.WIG EE /r */
26636 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26637 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26638 uses_vvvv, vbi, pfx, delta, "vpmaxsw", Iop_Max16Sx16 );
26639 goto decode_success;
26640 }
sewardje8a7eb72012-06-12 14:59:17 +000026641 break;
26642
sewardjc4530ae2012-05-21 10:18:49 +000026643 case 0xEF:
sewardj251b59e2012-05-25 13:51:07 +000026644 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
sewardjc4530ae2012-05-21 10:18:49 +000026645 /* VPXOR = VEX.NDS.128.66.0F.WIG EF /r */
26646 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26647 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26648 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV128 );
26649 goto decode_success;
26650 }
sewardjcc3d2192013-03-27 11:37:33 +000026651 /* VPXOR r/m, rV, r ::: r = rV ^ r/m */
26652 /* VPXOR = VEX.NDS.256.66.0F.WIG EF /r */
26653 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26654 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26655 uses_vvvv, vbi, pfx, delta, "vpxor", Iop_XorV256 );
26656 goto decode_success;
26657 }
sewardjc4530ae2012-05-21 10:18:49 +000026658 break;
26659
sewardjadf357c2012-06-24 13:44:17 +000026660 case 0xF0:
26661 /* VLDDQU m256, ymm1 = VEX.256.F2.0F.WIG F0 /r */
26662 if (haveF2no66noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26663 UChar modrm = getUChar(delta);
26664 UInt rD = gregOfRexRM(pfx, modrm);
26665 IRTemp tD = newTemp(Ity_V256);
26666 if (epartIsReg(modrm)) break;
26667 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26668 delta += alen;
26669 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
26670 DIP("vlddqu %s,%s\n", dis_buf, nameYMMReg(rD));
26671 putYMMReg(rD, mkexpr(tD));
26672 goto decode_success;
26673 }
26674 /* VLDDQU m128, xmm1 = VEX.128.F2.0F.WIG F0 /r */
26675 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26676 UChar modrm = getUChar(delta);
26677 UInt rD = gregOfRexRM(pfx, modrm);
26678 IRTemp tD = newTemp(Ity_V128);
26679 if (epartIsReg(modrm)) break;
26680 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
26681 delta += alen;
26682 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
26683 DIP("vlddqu %s,%s\n", dis_buf, nameXMMReg(rD));
26684 putYMMRegLoAndZU(rD, mkexpr(tD));
26685 goto decode_success;
26686 }
26687 break;
26688
sewardj4c0a7ac2012-06-21 09:08:19 +000026689 case 0xF1:
26690 /* VPSLLW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F1 /r */
26691 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26692 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26693 "vpsllw", Iop_ShlN16x8 );
26694 *uses_vvvv = True;
26695 goto decode_success;
26696
26697 }
sewardjcc3d2192013-03-27 11:37:33 +000026698 /* VPSLLW xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F1 /r */
26699 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26700 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26701 "vpsllw", Iop_ShlN16x16 );
26702 *uses_vvvv = True;
26703 goto decode_success;
26704
26705 }
sewardj4c0a7ac2012-06-21 09:08:19 +000026706 break;
26707
26708 case 0xF2:
26709 /* VPSLLD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F2 /r */
26710 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26711 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26712 "vpslld", Iop_ShlN32x4 );
26713 *uses_vvvv = True;
26714 goto decode_success;
26715 }
sewardjcc3d2192013-03-27 11:37:33 +000026716 /* VPSLLD xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F2 /r */
26717 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26718 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26719 "vpslld", Iop_ShlN32x8 );
26720 *uses_vvvv = True;
26721 goto decode_success;
26722 }
sewardj4c0a7ac2012-06-21 09:08:19 +000026723 break;
26724
26725 case 0xF3:
26726 /* VPSLLQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F3 /r */
26727 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26728 delta = dis_AVX128_shiftV_byE( vbi, pfx, delta,
26729 "vpsllq", Iop_ShlN64x2 );
26730 *uses_vvvv = True;
26731 goto decode_success;
26732 }
sewardjcc3d2192013-03-27 11:37:33 +000026733 /* VPSLLQ xmm3/m128, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F3 /r */
26734 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26735 delta = dis_AVX256_shiftV_byE( vbi, pfx, delta,
26736 "vpsllq", Iop_ShlN64x4 );
26737 *uses_vvvv = True;
26738 goto decode_success;
26739 }
sewardj4c0a7ac2012-06-21 09:08:19 +000026740 break;
26741
sewardje8a7eb72012-06-12 14:59:17 +000026742 case 0xF4:
26743 /* VPMULUDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F4 /r */
26744 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26745 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26746 uses_vvvv, vbi, pfx, delta,
26747 "vpmuludq", math_PMULUDQ_128 );
sewardj89378162012-06-24 12:12:20 +000026748 goto decode_success;
26749 }
sewardjcc3d2192013-03-27 11:37:33 +000026750 /* VPMULUDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F4 /r */
26751 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26752 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26753 uses_vvvv, vbi, pfx, delta,
26754 "vpmuludq", math_PMULUDQ_256 );
26755 goto decode_success;
26756 }
sewardj89378162012-06-24 12:12:20 +000026757 break;
26758
26759 case 0xF5:
26760 /* VPMADDWD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F5 /r */
26761 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26762 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26763 uses_vvvv, vbi, pfx, delta,
26764 "vpmaddwd", math_PMADDWD_128 );
26765 goto decode_success;
sewardje8a7eb72012-06-12 14:59:17 +000026766 }
sewardjcc3d2192013-03-27 11:37:33 +000026767 /* VPMADDWD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F5 /r */
26768 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26769 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26770 uses_vvvv, vbi, pfx, delta,
26771 "vpmaddwd", math_PMADDWD_256 );
26772 goto decode_success;
26773 }
sewardje8a7eb72012-06-12 14:59:17 +000026774 break;
26775
sewardj82096922012-06-24 14:57:59 +000026776 case 0xF6:
26777 /* VPSADBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F.WIG F6 /r */
26778 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26779 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
26780 uses_vvvv, vbi, pfx, delta,
26781 "vpsadbw", math_PSADBW_128 );
26782 goto decode_success;
26783 }
sewardjcc3d2192013-03-27 11:37:33 +000026784 /* VPSADBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F.WIG F6 /r */
26785 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26786 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
26787 uses_vvvv, vbi, pfx, delta,
26788 "vpsadbw", math_PSADBW_256 );
26789 goto decode_success;
26790 }
sewardj82096922012-06-24 14:57:59 +000026791 break;
26792
sewardj8eb7ae82012-06-24 14:00:27 +000026793 case 0xF7:
26794 /* VMASKMOVDQU xmm2, xmm1 = VEX.128.66.0F.WIG F7 /r */
26795 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
26796 && epartIsReg(getUChar(delta))) {
26797 delta = dis_MASKMOVDQU( vbi, pfx, delta, True/*isAvx*/ );
26798 goto decode_success;
26799 }
26800 break;
26801
sewardjc4530ae2012-05-21 10:18:49 +000026802 case 0xF8:
sewardj251b59e2012-05-25 13:51:07 +000026803 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
sewardj98d02cc2012-06-02 11:55:25 +000026804 /* VPSUBB = VEX.NDS.128.66.0F.WIG F8 /r */
sewardjc4530ae2012-05-21 10:18:49 +000026805 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26806 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26807 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x16 );
26808 goto decode_success;
26809 }
sewardjcc3d2192013-03-27 11:37:33 +000026810 /* VPSUBB r/m, rV, r ::: r = rV - r/m */
26811 /* VPSUBB = VEX.NDS.256.66.0F.WIG F8 /r */
26812 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26813 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26814 uses_vvvv, vbi, pfx, delta, "vpsubb", Iop_Sub8x32 );
26815 goto decode_success;
26816 }
sewardjc4530ae2012-05-21 10:18:49 +000026817 break;
26818
sewardj98d02cc2012-06-02 11:55:25 +000026819 case 0xF9:
26820 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
26821 /* VPSUBW = VEX.NDS.128.66.0F.WIG F9 /r */
26822 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26823 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26824 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x8 );
26825 goto decode_success;
26826 }
sewardjcc3d2192013-03-27 11:37:33 +000026827 /* VPSUBW r/m, rV, r ::: r = rV - r/m */
26828 /* VPSUBW = VEX.NDS.256.66.0F.WIG F9 /r */
26829 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26830 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26831 uses_vvvv, vbi, pfx, delta, "vpsubw", Iop_Sub16x16 );
26832 goto decode_success;
26833 }
sewardj98d02cc2012-06-02 11:55:25 +000026834 break;
26835
sewardjc4530ae2012-05-21 10:18:49 +000026836 case 0xFA:
sewardj251b59e2012-05-25 13:51:07 +000026837 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
sewardj98d02cc2012-06-02 11:55:25 +000026838 /* VPSUBD = VEX.NDS.128.66.0F.WIG FA /r */
sewardjc4530ae2012-05-21 10:18:49 +000026839 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26840 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26841 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x4 );
26842 goto decode_success;
26843 }
sewardjcc3d2192013-03-27 11:37:33 +000026844 /* VPSUBD r/m, rV, r ::: r = rV - r/m */
26845 /* VPSUBD = VEX.NDS.256.66.0F.WIG FA /r */
26846 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26847 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26848 uses_vvvv, vbi, pfx, delta, "vpsubd", Iop_Sub32x8 );
26849 goto decode_success;
26850 }
sewardjc4530ae2012-05-21 10:18:49 +000026851 break;
26852
sewardj98d02cc2012-06-02 11:55:25 +000026853 case 0xFB:
26854 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
26855 /* VPSUBQ = VEX.NDS.128.66.0F.WIG FB /r */
26856 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26857 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26858 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x2 );
26859 goto decode_success;
26860 }
sewardjcc3d2192013-03-27 11:37:33 +000026861 /* VPSUBQ r/m, rV, r ::: r = rV - r/m */
26862 /* VPSUBQ = VEX.NDS.256.66.0F.WIG FB /r */
26863 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26864 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26865 uses_vvvv, vbi, pfx, delta, "vpsubq", Iop_Sub64x4 );
26866 goto decode_success;
26867 }
sewardj98d02cc2012-06-02 11:55:25 +000026868 break;
26869
sewardj6fcd43e2012-06-14 08:51:35 +000026870 case 0xFC:
26871 /* VPADDB r/m, rV, r ::: r = rV + r/m */
26872 /* VPADDB = VEX.NDS.128.66.0F.WIG FC /r */
26873 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26874 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26875 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x16 );
26876 goto decode_success;
26877 }
sewardjcc3d2192013-03-27 11:37:33 +000026878 /* VPADDB r/m, rV, r ::: r = rV + r/m */
26879 /* VPADDB = VEX.NDS.256.66.0F.WIG FC /r */
26880 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26881 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26882 uses_vvvv, vbi, pfx, delta, "vpaddb", Iop_Add8x32 );
26883 goto decode_success;
26884 }
sewardj6fcd43e2012-06-14 08:51:35 +000026885 break;
26886
sewardj6faf7cc2012-05-25 15:53:01 +000026887 case 0xFD:
26888 /* VPADDW r/m, rV, r ::: r = rV + r/m */
26889 /* VPADDW = VEX.NDS.128.66.0F.WIG FD /r */
26890 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26891 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26892 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x8 );
26893 goto decode_success;
26894 }
sewardjcc3d2192013-03-27 11:37:33 +000026895 /* VPADDW r/m, rV, r ::: r = rV + r/m */
26896 /* VPADDW = VEX.NDS.256.66.0F.WIG FD /r */
26897 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26898 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26899 uses_vvvv, vbi, pfx, delta, "vpaddw", Iop_Add16x16 );
26900 goto decode_success;
26901 }
sewardj6faf7cc2012-05-25 15:53:01 +000026902 break;
26903
sewardjc4530ae2012-05-21 10:18:49 +000026904 case 0xFE:
sewardj251b59e2012-05-25 13:51:07 +000026905 /* VPADDD r/m, rV, r ::: r = rV + r/m */
sewardjc4530ae2012-05-21 10:18:49 +000026906 /* VPADDD = VEX.NDS.128.66.0F.WIG FE /r */
26907 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
26908 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
26909 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x4 );
26910 goto decode_success;
26911 }
sewardjcc3d2192013-03-27 11:37:33 +000026912 /* VPADDD r/m, rV, r ::: r = rV + r/m */
26913 /* VPADDD = VEX.NDS.256.66.0F.WIG FE /r */
26914 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
26915 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
26916 uses_vvvv, vbi, pfx, delta, "vpaddd", Iop_Add32x8 );
26917 goto decode_success;
26918 }
sewardjc4530ae2012-05-21 10:18:49 +000026919 break;
26920
26921 default:
26922 break;
26923
26924 }
26925
26926 //decode_failure:
26927 return deltaIN;
26928
26929 decode_success:
26930 return delta;
26931}
26932
26933
26934/*------------------------------------------------------------*/
26935/*--- ---*/
26936/*--- Top-level post-escape decoders: dis_ESC_0F38__VEX ---*/
26937/*--- ---*/
26938/*------------------------------------------------------------*/
26939
sewardjd8bca7e2012-06-20 11:46:19 +000026940static IRTemp math_PERMILPS_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
26941{
26942 /* In the control vector, zero out all but the bottom two bits of
26943 each 32-bit lane. */
26944 IRExpr* cv1 = binop(Iop_ShrN32x4,
26945 binop(Iop_ShlN32x4, mkexpr(ctrlV), mkU8(30)),
26946 mkU8(30));
26947 /* And use the resulting cleaned-up control vector as steering
26948 in a Perm operation. */
26949 IRTemp res = newTemp(Ity_V128);
26950 assign(res, binop(Iop_Perm32x4, mkexpr(dataV), cv1));
26951 return res;
26952}
26953
26954static IRTemp math_PERMILPS_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
26955{
26956 IRTemp dHi, dLo, cHi, cLo;
26957 dHi = dLo = cHi = cLo = IRTemp_INVALID;
26958 breakupV256toV128s( dataV, &dHi, &dLo );
26959 breakupV256toV128s( ctrlV, &cHi, &cLo );
26960 IRTemp rHi = math_PERMILPS_VAR_128( dHi, cHi );
26961 IRTemp rLo = math_PERMILPS_VAR_128( dLo, cLo );
26962 IRTemp res = newTemp(Ity_V256);
26963 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
26964 return res;
26965}
26966
26967static IRTemp math_PERMILPD_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
26968{
26969 /* No cleverness here .. */
26970 IRTemp dHi, dLo, cHi, cLo;
26971 dHi = dLo = cHi = cLo = IRTemp_INVALID;
26972 breakupV128to64s( dataV, &dHi, &dLo );
26973 breakupV128to64s( ctrlV, &cHi, &cLo );
26974 IRExpr* rHi
florian99dd03e2013-01-29 03:56:06 +000026975 = IRExpr_ITE( unop(Iop_64to1,
26976 binop(Iop_Shr64, mkexpr(cHi), mkU8(1))),
26977 mkexpr(dHi), mkexpr(dLo) );
sewardjd8bca7e2012-06-20 11:46:19 +000026978 IRExpr* rLo
florian99dd03e2013-01-29 03:56:06 +000026979 = IRExpr_ITE( unop(Iop_64to1,
26980 binop(Iop_Shr64, mkexpr(cLo), mkU8(1))),
26981 mkexpr(dHi), mkexpr(dLo) );
sewardjd8bca7e2012-06-20 11:46:19 +000026982 IRTemp res = newTemp(Ity_V128);
26983 assign(res, binop(Iop_64HLtoV128, rHi, rLo));
26984 return res;
26985}
26986
26987static IRTemp math_PERMILPD_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
26988{
26989 IRTemp dHi, dLo, cHi, cLo;
26990 dHi = dLo = cHi = cLo = IRTemp_INVALID;
26991 breakupV256toV128s( dataV, &dHi, &dLo );
26992 breakupV256toV128s( ctrlV, &cHi, &cLo );
26993 IRTemp rHi = math_PERMILPD_VAR_128( dHi, cHi );
26994 IRTemp rLo = math_PERMILPD_VAR_128( dLo, cLo );
26995 IRTemp res = newTemp(Ity_V256);
26996 assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
26997 return res;
26998}
26999
sewardjcc3d2192013-03-27 11:37:33 +000027000static IRTemp math_VPERMD ( IRTemp ctrlV, IRTemp dataV )
27001{
27002 /* In the control vector, zero out all but the bottom three bits of
27003 each 32-bit lane. */
27004 IRExpr* cv1 = binop(Iop_ShrN32x8,
27005 binop(Iop_ShlN32x8, mkexpr(ctrlV), mkU8(29)),
27006 mkU8(29));
27007 /* And use the resulting cleaned-up control vector as steering
27008 in a Perm operation. */
27009 IRTemp res = newTemp(Ity_V256);
27010 assign(res, binop(Iop_Perm32x8, mkexpr(dataV), cv1));
27011 return res;
27012}
27013
27014static Long dis_SHIFTX ( /*OUT*/Bool* uses_vvvv,
27015 VexAbiInfo* vbi, Prefix pfx, Long delta,
27016 const HChar* opname, IROp op8 )
27017{
27018 HChar dis_buf[50];
27019 Int alen;
27020 Int size = getRexW(pfx) ? 8 : 4;
27021 IRType ty = szToITy(size);
27022 IRTemp src = newTemp(ty);
27023 IRTemp amt = newTemp(ty);
27024 UChar rm = getUChar(delta);
27025
27026 assign( amt, getIRegV(size,pfx) );
27027 if (epartIsReg(rm)) {
27028 assign( src, getIRegE(size,pfx,rm) );
27029 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx),
27030 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
27031 delta++;
27032 } else {
27033 IRTemp addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27034 assign( src, loadLE(ty, mkexpr(addr)) );
27035 DIP("%s %s,%s,%s\n", opname, nameIRegV(size,pfx), dis_buf,
27036 nameIRegG(size,pfx,rm));
27037 delta += alen;
27038 }
27039
27040 putIRegG( size, pfx, rm,
27041 binop(mkSizedOp(ty,op8), mkexpr(src),
27042 narrowTo(Ity_I8, binop(mkSizedOp(ty,Iop_And8), mkexpr(amt),
27043 mkU(ty,8*size-1)))) );
27044 /* Flags aren't modified. */
27045 *uses_vvvv = True;
27046 return delta;
27047}
27048
27049
27050static Long dis_FMA ( VexAbiInfo* vbi, Prefix pfx, Long delta, UChar opc )
27051{
27052 UChar modrm = getUChar(delta);
27053 UInt rG = gregOfRexRM(pfx, modrm);
27054 UInt rV = getVexNvvvv(pfx);
27055 Bool scalar = (opc & 0xF) > 7 && (opc & 1);
27056 IRType ty = getRexW(pfx) ? Ity_F64 : Ity_F32;
27057 IRType vty = scalar ? ty : getVexL(pfx) ? Ity_V256 : Ity_V128;
27058 IRTemp vX = newTemp(vty);
27059 IRTemp vY = newTemp(vty);
27060 IRTemp vZ = newTemp(vty);
27061 IRExpr *x[8], *y[8], *z[8];
27062 IRTemp addr = IRTemp_INVALID;
27063 HChar dis_buf[50];
27064 Int alen = 0;
27065 const HChar *name;
27066 const HChar *suffix;
27067 const HChar *order;
27068 Bool negateRes = False;
27069 Bool negateZeven = False;
27070 Bool negateZodd = False;
27071 Int i, j;
27072 Int count;
27073 static IROp ops[] = { Iop_V256to64_0, Iop_V256to64_1,
27074 Iop_V256to64_2, Iop_V256to64_3,
27075 Iop_V128to64, Iop_V128HIto64 };
27076
27077 switch (opc & 0xF) {
27078 case 0x6:
27079 name = "addsub";
27080 negateZeven = True;
27081 break;
27082 case 0x7:
27083 name = "subadd";
27084 negateZodd = True;
27085 break;
27086 case 0x8:
27087 case 0x9:
27088 name = "add";
27089 break;
27090 case 0xA:
27091 case 0xB:
27092 name = "sub";
27093 negateZeven = True;
27094 negateZodd = True;
27095 break;
27096 case 0xC:
27097 case 0xD:
27098 name = "add";
27099 negateRes = True;
27100 negateZeven = True;
27101 negateZodd = True;
27102 break;
27103 case 0xE:
27104 case 0xF:
27105 name = "sub";
27106 negateRes = True;
27107 break;
27108 default:
27109 vpanic("dis_FMA(amd64)");
27110 break;
27111 }
27112 switch (opc & 0xF0) {
27113 case 0x90: order = "132"; break;
27114 case 0xA0: order = "213"; break;
27115 case 0xB0: order = "231"; break;
27116 default: vpanic("dis_FMA(amd64)"); break;
27117 }
27118 if (scalar)
27119 suffix = ty == Ity_F64 ? "sd" : "ss";
27120 else
27121 suffix = ty == Ity_F64 ? "pd" : "ps";
27122
27123 if (scalar) {
27124 assign( vX, ty == Ity_F64
27125 ? getXMMRegLane64F(rG, 0) : getXMMRegLane32F(rG, 0) );
27126 assign( vZ, ty == Ity_F64
27127 ? getXMMRegLane64F(rV, 0) : getXMMRegLane32F(rV, 0) );
27128 } else {
27129 assign( vX, vty == Ity_V256 ? getYMMReg(rG) : getXMMReg(rG) );
27130 assign( vZ, vty == Ity_V256 ? getYMMReg(rV) : getXMMReg(rV) );
27131 }
27132
27133 if (epartIsReg(modrm)) {
27134 UInt rE = eregOfRexRM(pfx, modrm);
27135 delta += 1;
27136 if (scalar)
27137 assign( vY, ty == Ity_F64
27138 ? getXMMRegLane64F(rE, 0) : getXMMRegLane32F(rE, 0) );
27139 else
27140 assign( vY, vty == Ity_V256 ? getYMMReg(rE) : getXMMReg(rE) );
27141 if (vty == Ity_V256) {
27142 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27143 name, order, suffix, nameYMMReg(rE), nameYMMReg(rV),
27144 nameYMMReg(rG));
27145 } else {
27146 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27147 name, order, suffix, nameXMMReg(rE), nameXMMReg(rV),
27148 nameXMMReg(rG));
27149 }
27150 } else {
27151 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27152 delta += alen;
27153 assign(vY, loadLE(vty, mkexpr(addr)));
27154 if (vty == Ity_V256) {
27155 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27156 name, order, suffix, dis_buf, nameYMMReg(rV),
27157 nameYMMReg(rG));
27158 } else {
27159 DIP("vf%sm%s%s%s %s,%s,%s\n", negateRes ? "n" : "",
27160 name, order, suffix, dis_buf, nameXMMReg(rV),
27161 nameXMMReg(rG));
27162 }
27163 }
27164
27165 /* vX/vY/vZ now in 132 order. If it is different order, swap the
27166 arguments. */
27167 if ((opc & 0xF0) != 0x90) {
27168 IRTemp tem = vX;
27169 if ((opc & 0xF0) == 0xA0) {
27170 vX = vZ;
27171 vZ = vY;
27172 vY = tem;
27173 } else {
27174 vX = vZ;
27175 vZ = tem;
27176 }
27177 }
27178
27179 if (scalar) {
27180 count = 1;
27181 x[0] = mkexpr(vX);
27182 y[0] = mkexpr(vY);
27183 z[0] = mkexpr(vZ);
27184 } else if (ty == Ity_F32) {
27185 count = vty == Ity_V256 ? 8 : 4;
27186 j = vty == Ity_V256 ? 0 : 4;
27187 for (i = 0; i < count; i += 2) {
27188 IRTemp tem = newTemp(Ity_I64);
27189 assign(tem, unop(ops[i / 2 + j], mkexpr(vX)));
27190 x[i] = unop(Iop_64to32, mkexpr(tem));
27191 x[i + 1] = unop(Iop_64HIto32, mkexpr(tem));
27192 tem = newTemp(Ity_I64);
27193 assign(tem, unop(ops[i / 2 + j], mkexpr(vY)));
27194 y[i] = unop(Iop_64to32, mkexpr(tem));
27195 y[i + 1] = unop(Iop_64HIto32, mkexpr(tem));
27196 tem = newTemp(Ity_I64);
27197 assign(tem, unop(ops[i / 2 + j], mkexpr(vZ)));
27198 z[i] = unop(Iop_64to32, mkexpr(tem));
27199 z[i + 1] = unop(Iop_64HIto32, mkexpr(tem));
27200 }
27201 } else {
27202 count = vty == Ity_V256 ? 4 : 2;
27203 j = vty == Ity_V256 ? 0 : 4;
27204 for (i = 0; i < count; i++) {
27205 x[i] = unop(ops[i + j], mkexpr(vX));
27206 y[i] = unop(ops[i + j], mkexpr(vY));
27207 z[i] = unop(ops[i + j], mkexpr(vZ));
27208 }
27209 }
27210 if (!scalar)
27211 for (i = 0; i < count; i++) {
27212 IROp op = ty == Ity_F64
27213 ? Iop_ReinterpI64asF64 : Iop_ReinterpI32asF32;
27214 x[i] = unop(op, x[i]);
27215 y[i] = unop(op, y[i]);
27216 z[i] = unop(op, z[i]);
27217 }
27218 for (i = 0; i < count; i++) {
27219 if ((i & 1) ? negateZodd : negateZeven)
27220 z[i] = unop(ty == Ity_F64 ? Iop_NegF64 : Iop_NegF32, z[i]);
27221 x[i] = IRExpr_Qop(ty == Ity_F64 ? Iop_MAddF64 : Iop_MAddF32,
27222 get_FAKE_roundingmode(), x[i], y[i], z[i]);
27223 if (negateRes)
27224 x[i] = unop(ty == Ity_F64 ? Iop_NegF64 : Iop_NegF32, x[i]);
27225 if (ty == Ity_F64)
27226 putYMMRegLane64F( rG, i, x[i] );
27227 else
27228 putYMMRegLane32F( rG, i, x[i] );
27229 }
27230 if (vty != Ity_V256)
27231 putYMMRegLane128( rG, 1, mkV128(0) );
27232
27233 return delta;
27234}
27235
27236
27237/* Masked load. */
27238static ULong dis_VMASKMOV_load ( Bool *uses_vvvv, VexAbiInfo* vbi,
27239 Prefix pfx, Long delta,
27240 const HChar* opname, Bool isYMM, IRType ty )
27241{
27242 HChar dis_buf[50];
27243 Int alen, i;
27244 IRTemp addr;
27245 UChar modrm = getUChar(delta);
27246 UInt rG = gregOfRexRM(pfx,modrm);
27247 UInt rV = getVexNvvvv(pfx);
27248 IRTemp res[8], cond;
27249 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27250 if (isYMM) {
27251 DIP("%s %s,%s,%s\n", opname, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
27252 } else {
27253 DIP("%s %s,%s,%s\n", opname, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
27254 }
27255 delta += alen;
27256
27257 for (i = 0; i < 2 * (isYMM ? 2 : 1) * (ty == Ity_I32 ? 2 : 1); i++) {
27258 res[i] = newTemp(ty);
27259 cond = newTemp(Ity_I1);
27260 assign( cond,
27261 binop(ty == Ity_I32 ? Iop_CmpLT32S : Iop_CmpLT64S,
27262 ty == Ity_I32 ? getYMMRegLane32( rV, i )
27263 : getYMMRegLane64( rV, i ),
27264 mkU(ty, 0) ));
27265 assign( res[i],
27266 IRExpr_ITE(
27267 mkexpr(cond),
27268 loadLE(ty, IRExpr_ITE(
27269 mkexpr(cond),
27270 binop(Iop_Add64, mkexpr(addr),
27271 mkU64(i*(ty == Ity_I32 ? 4 : 8))),
27272 getIReg64(R_RSP)
27273 )
27274 ),
27275 mkU(ty, 0)
27276 )
27277 );
27278 }
27279 switch (ty) {
27280 case Ity_I32:
27281 for (i = 0; i < 8; i++)
27282 putYMMRegLane32( rG, i, (i < 4 || isYMM)
27283 ? mkexpr(res[i]) : mkU32(0) );
27284 break;
27285 case Ity_I64:
27286 for (i = 0; i < 4; i++)
27287 putYMMRegLane64( rG, i, (i < 2 || isYMM)
27288 ? mkexpr(res[i]) : mkU64(0) );
27289 break;
27290 default: vassert(0);
27291 }
27292
27293 *uses_vvvv = True;
27294 return delta;
27295}
27296
27297
27298/* Gather. */
27299static ULong dis_VGATHER ( Bool *uses_vvvv, VexAbiInfo* vbi,
27300 Prefix pfx, Long delta,
27301 const HChar* opname, Bool isYMM,
27302 Bool isVM64x, IRType ty )
27303{
27304 HChar dis_buf[50];
27305 Int alen, i, vscale, count1, count2;
27306 IRTemp addr;
27307 UChar modrm = getUChar(delta);
27308 UInt rG = gregOfRexRM(pfx,modrm);
27309 UInt rV = getVexNvvvv(pfx);
27310 UInt rI;
27311 IRType dstTy = (isYMM && (ty == Ity_I64 || !isVM64x)) ? Ity_V256 : Ity_V128;
27312 IRType idxTy = (isYMM && (ty == Ity_I32 || isVM64x)) ? Ity_V256 : Ity_V128;
27313 IRTemp cond;
27314 addr = disAVSIBMode ( &alen, vbi, pfx, delta, dis_buf, &rI,
27315 idxTy, &vscale );
27316 if (addr == IRTemp_INVALID || rI == rG || rI == rV || rG == rV)
27317 return delta;
27318 if (dstTy == Ity_V256) {
27319 DIP("%s %s,%s,%s\n", opname, nameYMMReg(rV), dis_buf, nameYMMReg(rG) );
27320 } else {
27321 DIP("%s %s,%s,%s\n", opname, nameXMMReg(rV), dis_buf, nameXMMReg(rG) );
27322 }
27323 delta += alen;
27324
27325 if (ty == Ity_I32) {
27326 count1 = isYMM ? 8 : 4;
27327 count2 = isVM64x ? count1 / 2 : count1;
27328 } else {
27329 count1 = count2 = isYMM ? 4 : 2;
27330 }
27331
27332 /* First update the mask register to copies of the sign bit. */
27333 if (ty == Ity_I32) {
27334 if (isYMM)
27335 putYMMReg( rV, binop(Iop_SarN32x8, getYMMReg( rV ), mkU8(31)) );
27336 else
27337 putYMMRegLoAndZU( rV, binop(Iop_SarN32x4, getXMMReg( rV ), mkU8(31)) );
27338 } else {
27339 for (i = 0; i < count1; i++) {
27340 putYMMRegLane64( rV, i, binop(Iop_Sar64, getYMMRegLane64( rV, i ),
27341 mkU8(63)) );
27342 }
27343 }
27344
27345 /* Next gather the individual elements. If any fault occurs, the
27346 corresponding mask element will be set and the loop stops. */
27347 for (i = 0; i < count2; i++) {
27348 IRExpr *expr, *addr_expr;
27349 cond = newTemp(Ity_I1);
27350 assign( cond,
27351 binop(ty == Ity_I32 ? Iop_CmpLT32S : Iop_CmpLT64S,
27352 ty == Ity_I32 ? getYMMRegLane32( rV, i )
27353 : getYMMRegLane64( rV, i ),
27354 mkU(ty, 0)) );
27355 expr = ty == Ity_I32 ? getYMMRegLane32( rG, i )
27356 : getYMMRegLane64( rG, i );
27357 addr_expr = isVM64x ? getYMMRegLane64( rI, i )
27358 : unop(Iop_32Sto64, getYMMRegLane32( rI, i ));
27359 switch (vscale) {
27360 case 2: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(1)); break;
27361 case 4: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(2)); break;
27362 case 8: addr_expr = binop(Iop_Shl64, addr_expr, mkU8(3)); break;
27363 default: break;
27364 }
27365 addr_expr = binop(Iop_Add64, mkexpr(addr), addr_expr);
27366 addr_expr = handleAddrOverrides(vbi, pfx, addr_expr);
27367 addr_expr = IRExpr_ITE(mkexpr(cond), addr_expr, getIReg64(R_RSP));
27368 expr = IRExpr_ITE(mkexpr(cond), loadLE(ty, addr_expr), expr);
27369 if (ty == Ity_I32) {
27370 putYMMRegLane32( rG, i, expr );
27371 putYMMRegLane32( rV, i, mkU32(0) );
27372 } else {
27373 putYMMRegLane64( rG, i, expr);
27374 putYMMRegLane64( rV, i, mkU64(0) );
27375 }
27376 }
27377
27378 if (!isYMM || (ty == Ity_I32 && isVM64x)) {
27379 if (ty == Ity_I64 || isYMM)
27380 putYMMRegLane128( rV, 1, mkV128(0) );
27381 else if (ty == Ity_I32 && count2 == 2) {
27382 putYMMRegLane64( rV, 1, mkU64(0) );
27383 putYMMRegLane64( rG, 1, mkU64(0) );
27384 }
27385 putYMMRegLane128( rG, 1, mkV128(0) );
27386 }
27387
27388 *uses_vvvv = True;
27389 return delta;
27390}
27391
27392
sewardjc4530ae2012-05-21 10:18:49 +000027393__attribute__((noinline))
27394static
27395Long dis_ESC_0F38__VEX (
27396 /*MB_OUT*/DisResult* dres,
27397 /*OUT*/ Bool* uses_vvvv,
27398 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
27399 Bool resteerCisOk,
27400 void* callback_opaque,
27401 VexArchInfo* archinfo,
27402 VexAbiInfo* vbi,
27403 Prefix pfx, Int sz, Long deltaIN
27404 )
27405{
sewardj4b1cc832012-06-13 11:10:20 +000027406 IRTemp addr = IRTemp_INVALID;
27407 Int alen = 0;
27408 HChar dis_buf[50];
sewardjc4530ae2012-05-21 10:18:49 +000027409 Long delta = deltaIN;
27410 UChar opc = getUChar(delta);
27411 delta++;
27412 *uses_vvvv = False;
27413
27414 switch (opc) {
27415
27416 case 0x00:
sewardj251b59e2012-05-25 13:51:07 +000027417 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
sewardjc4530ae2012-05-21 10:18:49 +000027418 /* VPSHUFB = VEX.NDS.128.66.0F38.WIG 00 /r */
27419 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27420 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27421 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_XMM );
27422 goto decode_success;
27423 }
sewardjcc3d2192013-03-27 11:37:33 +000027424 /* VPSHUFB r/m, rV, r ::: r = shuf(rV, r/m) */
27425 /* VPSHUFB = VEX.NDS.256.66.0F38.WIG 00 /r */
27426 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27427 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27428 uses_vvvv, vbi, pfx, delta, "vpshufb", math_PSHUFB_YMM );
27429 goto decode_success;
27430 }
sewardjc4530ae2012-05-21 10:18:49 +000027431 break;
27432
sewardj8516a1f2012-06-24 14:26:30 +000027433 case 0x01:
27434 case 0x02:
27435 case 0x03:
27436 /* VPHADDW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 01 /r */
27437 /* VPHADDD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 02 /r */
27438 /* VPHADDSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 03 /r */
27439 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27440 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc );
27441 *uses_vvvv = True;
27442 goto decode_success;
27443 }
sewardjcc3d2192013-03-27 11:37:33 +000027444 /* VPHADDW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 01 /r */
27445 /* VPHADDD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 02 /r */
27446 /* VPHADDSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 03 /r */
27447 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27448 delta = dis_PHADD_256( vbi, pfx, delta, opc );
27449 *uses_vvvv = True;
27450 goto decode_success;
27451 }
sewardj8516a1f2012-06-24 14:26:30 +000027452 break;
27453
27454 case 0x04:
27455 /* VPMADDUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 04 /r */
27456 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27457 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
27458 uses_vvvv, vbi, pfx, delta, "vpmaddubsw",
27459 math_PMADDUBSW_128 );
27460 goto decode_success;
27461 }
sewardjcc3d2192013-03-27 11:37:33 +000027462 /* VPMADDUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 04 /r */
27463 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27464 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27465 uses_vvvv, vbi, pfx, delta, "vpmaddubsw",
27466 math_PMADDUBSW_256 );
27467 goto decode_success;
27468 }
sewardj8516a1f2012-06-24 14:26:30 +000027469 break;
27470
27471 case 0x05:
27472 case 0x06:
27473 case 0x07:
27474 /* VPHSUBW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 05 /r */
27475 /* VPHSUBD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 06 /r */
27476 /* VPHSUBSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 07 /r */
27477 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27478 delta = dis_PHADD_128( vbi, pfx, delta, True/*isAvx*/, opc );
27479 *uses_vvvv = True;
27480 goto decode_success;
27481 }
sewardjcc3d2192013-03-27 11:37:33 +000027482 /* VPHSUBW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 05 /r */
27483 /* VPHSUBD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 06 /r */
27484 /* VPHSUBSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 07 /r */
27485 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27486 delta = dis_PHADD_256( vbi, pfx, delta, opc );
27487 *uses_vvvv = True;
27488 goto decode_success;
27489 }
sewardj8516a1f2012-06-24 14:26:30 +000027490 break;
27491
sewardj82096922012-06-24 14:57:59 +000027492 case 0x08:
27493 case 0x09:
27494 case 0x0A:
27495 /* VPSIGNB xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 08 /r */
27496 /* VPSIGNW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 09 /r */
27497 /* VPSIGND xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0A /r */
27498 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27499 IRTemp sV = newTemp(Ity_V128);
27500 IRTemp dV = newTemp(Ity_V128);
27501 IRTemp sHi, sLo, dHi, dLo;
27502 sHi = sLo = dHi = dLo = IRTemp_INVALID;
florian5df8ab02012-10-13 19:34:19 +000027503 HChar ch = '?';
sewardj82096922012-06-24 14:57:59 +000027504 Int laneszB = 0;
27505 UChar modrm = getUChar(delta);
27506 UInt rG = gregOfRexRM(pfx,modrm);
27507 UInt rV = getVexNvvvv(pfx);
27508
27509 switch (opc) {
27510 case 0x08: laneszB = 1; ch = 'b'; break;
27511 case 0x09: laneszB = 2; ch = 'w'; break;
27512 case 0x0A: laneszB = 4; ch = 'd'; break;
27513 default: vassert(0);
27514 }
27515
27516 assign( dV, getXMMReg(rV) );
27517
27518 if (epartIsReg(modrm)) {
27519 UInt rE = eregOfRexRM(pfx,modrm);
27520 assign( sV, getXMMReg(rE) );
27521 delta += 1;
27522 DIP("vpsign%c %s,%s,%s\n", ch, nameXMMReg(rE),
27523 nameXMMReg(rV), nameXMMReg(rG));
27524 } else {
27525 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27526 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
27527 delta += alen;
27528 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf,
27529 nameXMMReg(rV), nameXMMReg(rG));
27530 }
27531
27532 breakupV128to64s( dV, &dHi, &dLo );
27533 breakupV128to64s( sV, &sHi, &sLo );
27534
27535 putYMMRegLoAndZU(
27536 rG,
27537 binop(Iop_64HLtoV128,
27538 dis_PSIGN_helper( mkexpr(sHi), mkexpr(dHi), laneszB ),
27539 dis_PSIGN_helper( mkexpr(sLo), mkexpr(dLo), laneszB )
27540 )
27541 );
27542 *uses_vvvv = True;
27543 goto decode_success;
27544 }
sewardjcc3d2192013-03-27 11:37:33 +000027545 /* VPSIGNB ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 08 /r */
27546 /* VPSIGNW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 09 /r */
27547 /* VPSIGND ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0A /r */
27548 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27549 IRTemp sV = newTemp(Ity_V256);
27550 IRTemp dV = newTemp(Ity_V256);
27551 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
27552 s3 = s2 = s1 = s0 = IRTemp_INVALID;
27553 d3 = d2 = d1 = d0 = IRTemp_INVALID;
27554 UChar ch = '?';
27555 Int laneszB = 0;
27556 UChar modrm = getUChar(delta);
27557 UInt rG = gregOfRexRM(pfx,modrm);
27558 UInt rV = getVexNvvvv(pfx);
27559
27560 switch (opc) {
27561 case 0x08: laneszB = 1; ch = 'b'; break;
27562 case 0x09: laneszB = 2; ch = 'w'; break;
27563 case 0x0A: laneszB = 4; ch = 'd'; break;
27564 default: vassert(0);
27565 }
27566
27567 assign( dV, getYMMReg(rV) );
27568
27569 if (epartIsReg(modrm)) {
27570 UInt rE = eregOfRexRM(pfx,modrm);
27571 assign( sV, getYMMReg(rE) );
27572 delta += 1;
27573 DIP("vpsign%c %s,%s,%s\n", ch, nameYMMReg(rE),
27574 nameYMMReg(rV), nameYMMReg(rG));
27575 } else {
27576 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27577 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
27578 delta += alen;
27579 DIP("vpsign%c %s,%s,%s\n", ch, dis_buf,
27580 nameYMMReg(rV), nameYMMReg(rG));
27581 }
27582
27583 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
27584 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
27585
27586 putYMMReg(
27587 rG,
27588 binop( Iop_V128HLtoV256,
27589 binop(Iop_64HLtoV128,
27590 dis_PSIGN_helper( mkexpr(s3), mkexpr(d3), laneszB ),
27591 dis_PSIGN_helper( mkexpr(s2), mkexpr(d2), laneszB )
27592 ),
27593 binop(Iop_64HLtoV128,
27594 dis_PSIGN_helper( mkexpr(s1), mkexpr(d1), laneszB ),
27595 dis_PSIGN_helper( mkexpr(s0), mkexpr(d0), laneszB )
27596 )
27597 )
27598 );
27599 *uses_vvvv = True;
27600 goto decode_success;
27601 }
sewardj82096922012-06-24 14:57:59 +000027602 break;
27603
27604 case 0x0B:
27605 /* VPMULHRSW xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 0B /r */
27606 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27607 IRTemp sV = newTemp(Ity_V128);
27608 IRTemp dV = newTemp(Ity_V128);
27609 IRTemp sHi, sLo, dHi, dLo;
27610 sHi = sLo = dHi = dLo = IRTemp_INVALID;
27611 UChar modrm = getUChar(delta);
27612 UInt rG = gregOfRexRM(pfx,modrm);
27613 UInt rV = getVexNvvvv(pfx);
27614
27615 assign( dV, getXMMReg(rV) );
27616
27617 if (epartIsReg(modrm)) {
27618 UInt rE = eregOfRexRM(pfx,modrm);
27619 assign( sV, getXMMReg(rE) );
27620 delta += 1;
27621 DIP("vpmulhrsw %s,%s,%s\n", nameXMMReg(rE),
27622 nameXMMReg(rV), nameXMMReg(rG));
27623 } else {
27624 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27625 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
27626 delta += alen;
27627 DIP("vpmulhrsw %s,%s,%s\n", dis_buf,
27628 nameXMMReg(rV), nameXMMReg(rG));
27629 }
27630
27631 breakupV128to64s( dV, &dHi, &dLo );
27632 breakupV128to64s( sV, &sHi, &sLo );
27633
27634 putYMMRegLoAndZU(
27635 rG,
27636 binop(Iop_64HLtoV128,
27637 dis_PMULHRSW_helper( mkexpr(sHi), mkexpr(dHi) ),
27638 dis_PMULHRSW_helper( mkexpr(sLo), mkexpr(dLo) )
27639 )
27640 );
27641 *uses_vvvv = True;
27642 goto decode_success;
27643 }
sewardjcc3d2192013-03-27 11:37:33 +000027644 /* VPMULHRSW ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 0B /r */
27645 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27646 IRTemp sV = newTemp(Ity_V256);
27647 IRTemp dV = newTemp(Ity_V256);
27648 IRTemp s3, s2, s1, s0, d3, d2, d1, d0;
27649 s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID;
27650 UChar modrm = getUChar(delta);
27651 UInt rG = gregOfRexRM(pfx,modrm);
27652 UInt rV = getVexNvvvv(pfx);
27653
27654 assign( dV, getYMMReg(rV) );
27655
27656 if (epartIsReg(modrm)) {
27657 UInt rE = eregOfRexRM(pfx,modrm);
27658 assign( sV, getYMMReg(rE) );
27659 delta += 1;
27660 DIP("vpmulhrsw %s,%s,%s\n", nameYMMReg(rE),
27661 nameYMMReg(rV), nameYMMReg(rG));
27662 } else {
27663 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
27664 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
27665 delta += alen;
27666 DIP("vpmulhrsw %s,%s,%s\n", dis_buf,
27667 nameYMMReg(rV), nameYMMReg(rG));
27668 }
27669
27670 breakupV256to64s( dV, &d3, &d2, &d1, &d0 );
27671 breakupV256to64s( sV, &s3, &s2, &s1, &s0 );
27672
27673 putYMMReg(
27674 rG,
27675 binop(Iop_V128HLtoV256,
27676 binop(Iop_64HLtoV128,
27677 dis_PMULHRSW_helper( mkexpr(s3), mkexpr(d3) ),
27678 dis_PMULHRSW_helper( mkexpr(s2), mkexpr(d2) ) ),
27679 binop(Iop_64HLtoV128,
27680 dis_PMULHRSW_helper( mkexpr(s1), mkexpr(d1) ),
27681 dis_PMULHRSW_helper( mkexpr(s0), mkexpr(d0) ) )
27682 )
27683 );
27684 *uses_vvvv = True;
27685 goto decode_success;
27686 }
sewardj82096922012-06-24 14:57:59 +000027687 break;
27688
sewardjd8bca7e2012-06-20 11:46:19 +000027689 case 0x0C:
27690 /* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */
27691 if (have66noF2noF3(pfx)
27692 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
27693 UChar modrm = getUChar(delta);
27694 UInt rG = gregOfRexRM(pfx, modrm);
27695 UInt rV = getVexNvvvv(pfx);
27696 IRTemp ctrlV = newTemp(Ity_V128);
27697 if (epartIsReg(modrm)) {
27698 UInt rE = eregOfRexRM(pfx, modrm);
27699 delta += 1;
27700 DIP("vpermilps %s,%s,%s\n",
27701 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
27702 assign(ctrlV, getXMMReg(rE));
27703 } else {
sewardj148e5942012-06-21 08:34:19 +000027704 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardjd8bca7e2012-06-20 11:46:19 +000027705 delta += alen;
27706 DIP("vpermilps %s,%s,%s\n",
27707 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
27708 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
27709 }
27710 IRTemp dataV = newTemp(Ity_V128);
27711 assign(dataV, getXMMReg(rV));
27712 IRTemp resV = math_PERMILPS_VAR_128(dataV, ctrlV);
27713 putYMMRegLoAndZU(rG, mkexpr(resV));
27714 *uses_vvvv = True;
27715 goto decode_success;
27716 }
27717 /* VPERMILPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0C /r */
27718 if (have66noF2noF3(pfx)
27719 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
27720 UChar modrm = getUChar(delta);
27721 UInt rG = gregOfRexRM(pfx, modrm);
27722 UInt rV = getVexNvvvv(pfx);
27723 IRTemp ctrlV = newTemp(Ity_V256);
27724 if (epartIsReg(modrm)) {
27725 UInt rE = eregOfRexRM(pfx, modrm);
27726 delta += 1;
27727 DIP("vpermilps %s,%s,%s\n",
27728 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
27729 assign(ctrlV, getYMMReg(rE));
27730 } else {
sewardj148e5942012-06-21 08:34:19 +000027731 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardjd8bca7e2012-06-20 11:46:19 +000027732 delta += alen;
27733 DIP("vpermilps %s,%s,%s\n",
27734 dis_buf, nameYMMReg(rV), nameYMMReg(rG));
27735 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
27736 }
27737 IRTemp dataV = newTemp(Ity_V256);
27738 assign(dataV, getYMMReg(rV));
27739 IRTemp resV = math_PERMILPS_VAR_256(dataV, ctrlV);
27740 putYMMReg(rG, mkexpr(resV));
27741 *uses_vvvv = True;
27742 goto decode_success;
27743 }
27744 break;
27745
27746 case 0x0D:
27747 /* VPERMILPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0D /r */
27748 if (have66noF2noF3(pfx)
27749 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
27750 UChar modrm = getUChar(delta);
27751 UInt rG = gregOfRexRM(pfx, modrm);
27752 UInt rV = getVexNvvvv(pfx);
27753 IRTemp ctrlV = newTemp(Ity_V128);
27754 if (epartIsReg(modrm)) {
27755 UInt rE = eregOfRexRM(pfx, modrm);
27756 delta += 1;
27757 DIP("vpermilpd %s,%s,%s\n",
27758 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
27759 assign(ctrlV, getXMMReg(rE));
27760 } else {
sewardj148e5942012-06-21 08:34:19 +000027761 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardjd8bca7e2012-06-20 11:46:19 +000027762 delta += alen;
27763 DIP("vpermilpd %s,%s,%s\n",
27764 dis_buf, nameXMMReg(rV), nameXMMReg(rG));
27765 assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
27766 }
27767 IRTemp dataV = newTemp(Ity_V128);
27768 assign(dataV, getXMMReg(rV));
27769 IRTemp resV = math_PERMILPD_VAR_128(dataV, ctrlV);
27770 putYMMRegLoAndZU(rG, mkexpr(resV));
27771 *uses_vvvv = True;
27772 goto decode_success;
27773 }
27774 /* VPERMILPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0D /r */
27775 if (have66noF2noF3(pfx)
27776 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
27777 UChar modrm = getUChar(delta);
27778 UInt rG = gregOfRexRM(pfx, modrm);
27779 UInt rV = getVexNvvvv(pfx);
27780 IRTemp ctrlV = newTemp(Ity_V256);
27781 if (epartIsReg(modrm)) {
27782 UInt rE = eregOfRexRM(pfx, modrm);
27783 delta += 1;
27784 DIP("vpermilpd %s,%s,%s\n",
27785 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
27786 assign(ctrlV, getYMMReg(rE));
27787 } else {
sewardj148e5942012-06-21 08:34:19 +000027788 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
sewardjd8bca7e2012-06-20 11:46:19 +000027789 delta += alen;
27790 DIP("vpermilpd %s,%s,%s\n",
27791 dis_buf, nameYMMReg(rV), nameYMMReg(rG));
27792 assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
27793 }
27794 IRTemp dataV = newTemp(Ity_V256);
27795 assign(dataV, getYMMReg(rV));
27796 IRTemp resV = math_PERMILPD_VAR_256(dataV, ctrlV);
27797 putYMMReg(rG, mkexpr(resV));
27798 *uses_vvvv = True;
27799 goto decode_success;
27800 }
27801 break;
27802
sewardjed1884d2012-06-21 08:53:48 +000027803 case 0x0E:
27804 /* VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r */
27805 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27806 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 32 );
27807 goto decode_success;
27808 }
27809 /* VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r */
27810 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27811 delta = dis_xTESTy_256( vbi, pfx, delta, 32 );
27812 goto decode_success;
27813 }
27814 break;
27815
27816 case 0x0F:
27817 /* VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r */
27818 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27819 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 64 );
27820 goto decode_success;
27821 }
27822 /* VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r */
27823 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27824 delta = dis_xTESTy_256( vbi, pfx, delta, 64 );
27825 goto decode_success;
27826 }
27827 break;
27828
sewardjcc3d2192013-03-27 11:37:33 +000027829 case 0x16:
27830 /* VPERMPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 16 /r */
27831 if (have66noF2noF3(pfx)
27832 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
27833 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
27834 uses_vvvv, vbi, pfx, delta, "vpermps", math_VPERMD );
27835 goto decode_success;
27836 }
27837 break;
27838
sewardjed1884d2012-06-21 08:53:48 +000027839 case 0x17:
27840 /* VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r */
27841 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27842 delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 0 );
27843 goto decode_success;
27844 }
27845 /* VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r */
27846 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27847 delta = dis_xTESTy_256( vbi, pfx, delta, 0 );
27848 goto decode_success;
27849 }
27850 break;
27851
sewardj6fcd43e2012-06-14 08:51:35 +000027852 case 0x18:
sewardj151cd3e2012-06-18 13:56:55 +000027853 /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */
sewardj6fcd43e2012-06-14 08:51:35 +000027854 if (have66noF2noF3(pfx)
sewardj151cd3e2012-06-18 13:56:55 +000027855 && 0==getVexL(pfx)/*128*/
sewardj6fcd43e2012-06-14 08:51:35 +000027856 && !epartIsReg(getUChar(delta))) {
27857 UChar modrm = getUChar(delta);
27858 UInt rG = gregOfRexRM(pfx, modrm);
27859 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27860 delta += alen;
27861 DIP("vbroadcastss %s,%s\n", dis_buf, nameXMMReg(rG));
27862 IRTemp t32 = newTemp(Ity_I32);
27863 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
27864 IRTemp t64 = newTemp(Ity_I64);
27865 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
27866 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
27867 putYMMRegLoAndZU(rG, res);
27868 goto decode_success;
sewardj82096922012-06-24 14:57:59 +000027869 }
sewardj151cd3e2012-06-18 13:56:55 +000027870 /* VBROADCASTSS m32, ymm1 = VEX.256.66.0F38.WIG 18 /r */
27871 if (have66noF2noF3(pfx)
27872 && 1==getVexL(pfx)/*256*/
27873 && !epartIsReg(getUChar(delta))) {
27874 UChar modrm = getUChar(delta);
27875 UInt rG = gregOfRexRM(pfx, modrm);
27876 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27877 delta += alen;
27878 DIP("vbroadcastss %s,%s\n", dis_buf, nameYMMReg(rG));
27879 IRTemp t32 = newTemp(Ity_I32);
27880 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
27881 IRTemp t64 = newTemp(Ity_I64);
27882 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
27883 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
27884 mkexpr(t64), mkexpr(t64));
27885 putYMMReg(rG, res);
27886 goto decode_success;
sewardj82096922012-06-24 14:57:59 +000027887 }
sewardjcc3d2192013-03-27 11:37:33 +000027888 /* VBROADCASTSS xmm2, xmm1 = VEX.128.66.0F38.WIG 18 /r */
27889 if (have66noF2noF3(pfx)
27890 && 0==getVexL(pfx)/*128*/
27891 && epartIsReg(getUChar(delta))) {
27892 UChar modrm = getUChar(delta);
27893 UInt rG = gregOfRexRM(pfx, modrm);
27894 UInt rE = eregOfRexRM(pfx, modrm);
27895 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
27896 IRTemp t32 = newTemp(Ity_I32);
27897 assign(t32, getXMMRegLane32(rE, 0));
27898 IRTemp t64 = newTemp(Ity_I64);
27899 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
27900 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
27901 putYMMRegLoAndZU(rG, res);
27902 delta++;
27903 goto decode_success;
27904 }
27905 /* VBROADCASTSS xmm2, ymm1 = VEX.256.66.0F38.WIG 18 /r */
27906 if (have66noF2noF3(pfx)
27907 && 1==getVexL(pfx)/*256*/
27908 && epartIsReg(getUChar(delta))) {
27909 UChar modrm = getUChar(delta);
27910 UInt rG = gregOfRexRM(pfx, modrm);
27911 UInt rE = eregOfRexRM(pfx, modrm);
27912 DIP("vbroadcastss %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
27913 IRTemp t32 = newTemp(Ity_I32);
27914 assign(t32, getXMMRegLane32(rE, 0));
27915 IRTemp t64 = newTemp(Ity_I64);
27916 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
27917 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
27918 mkexpr(t64), mkexpr(t64));
27919 putYMMReg(rG, res);
27920 delta++;
27921 goto decode_success;
27922 }
sewardj82096922012-06-24 14:57:59 +000027923 break;
sewardj6fcd43e2012-06-14 08:51:35 +000027924
sewardj4b1cc832012-06-13 11:10:20 +000027925 case 0x19:
sewardj151cd3e2012-06-18 13:56:55 +000027926 /* VBROADCASTSD m64, ymm1 = VEX.256.66.0F38.WIG 19 /r */
sewardj4b1cc832012-06-13 11:10:20 +000027927 if (have66noF2noF3(pfx)
sewardj151cd3e2012-06-18 13:56:55 +000027928 && 1==getVexL(pfx)/*256*/
sewardj4b1cc832012-06-13 11:10:20 +000027929 && !epartIsReg(getUChar(delta))) {
27930 UChar modrm = getUChar(delta);
27931 UInt rG = gregOfRexRM(pfx, modrm);
27932 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27933 delta += alen;
27934 DIP("vbroadcastsd %s,%s\n", dis_buf, nameYMMReg(rG));
27935 IRTemp t64 = newTemp(Ity_I64);
27936 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
27937 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
27938 mkexpr(t64), mkexpr(t64));
27939 putYMMReg(rG, res);
27940 goto decode_success;
sewardj82096922012-06-24 14:57:59 +000027941 }
sewardjcc3d2192013-03-27 11:37:33 +000027942 /* VBROADCASTSD xmm2, ymm1 = VEX.256.66.0F38.WIG 19 /r */
27943 if (have66noF2noF3(pfx)
27944 && 1==getVexL(pfx)/*256*/
27945 && epartIsReg(getUChar(delta))) {
27946 UChar modrm = getUChar(delta);
27947 UInt rG = gregOfRexRM(pfx, modrm);
27948 UInt rE = eregOfRexRM(pfx, modrm);
27949 DIP("vbroadcastsd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
27950 IRTemp t64 = newTemp(Ity_I64);
27951 assign(t64, getXMMRegLane64(rE, 0));
27952 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
27953 mkexpr(t64), mkexpr(t64));
27954 putYMMReg(rG, res);
27955 delta++;
27956 goto decode_success;
27957 }
sewardj82096922012-06-24 14:57:59 +000027958 break;
27959
27960 case 0x1A:
27961 /* VBROADCASTF128 m128, ymm1 = VEX.256.66.0F38.WIG 1A /r */
27962 if (have66noF2noF3(pfx)
27963 && 1==getVexL(pfx)/*256*/
27964 && !epartIsReg(getUChar(delta))) {
27965 UChar modrm = getUChar(delta);
27966 UInt rG = gregOfRexRM(pfx, modrm);
27967 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
27968 delta += alen;
27969 DIP("vbroadcastf128 %s,%s\n", dis_buf, nameYMMReg(rG));
27970 IRTemp t128 = newTemp(Ity_V128);
27971 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
27972 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) );
27973 goto decode_success;
27974 }
27975 break;
sewardj4b1cc832012-06-13 11:10:20 +000027976
sewardj8516a1f2012-06-24 14:26:30 +000027977 case 0x1C:
27978 /* VPABSB xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1C /r */
27979 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27980 delta = dis_AVX128_E_to_G_unary(
27981 uses_vvvv, vbi, pfx, delta,
27982 "vpabsb", math_PABS_XMM_pap1 );
27983 goto decode_success;
27984 }
sewardjcc3d2192013-03-27 11:37:33 +000027985 /* VPABSB ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1C /r */
27986 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
27987 delta = dis_AVX256_E_to_G_unary(
27988 uses_vvvv, vbi, pfx, delta,
27989 "vpabsb", math_PABS_YMM_pap1 );
27990 goto decode_success;
27991 }
sewardj8516a1f2012-06-24 14:26:30 +000027992 break;
27993
27994 case 0x1D:
27995 /* VPABSW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1D /r */
27996 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
27997 delta = dis_AVX128_E_to_G_unary(
27998 uses_vvvv, vbi, pfx, delta,
27999 "vpabsw", math_PABS_XMM_pap2 );
28000 goto decode_success;
28001 }
sewardjcc3d2192013-03-27 11:37:33 +000028002 /* VPABSW ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1D /r */
28003 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28004 delta = dis_AVX256_E_to_G_unary(
28005 uses_vvvv, vbi, pfx, delta,
28006 "vpabsw", math_PABS_YMM_pap2 );
28007 goto decode_success;
28008 }
sewardj8516a1f2012-06-24 14:26:30 +000028009 break;
28010
sewardj97f72452012-05-23 05:56:53 +000028011 case 0x1E:
28012 /* VPABSD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 1E /r */
28013 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28014 delta = dis_AVX128_E_to_G_unary(
28015 uses_vvvv, vbi, pfx, delta,
28016 "vpabsd", math_PABS_XMM_pap4 );
28017 goto decode_success;
28018 }
sewardjcc3d2192013-03-27 11:37:33 +000028019 /* VPABSD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 1E /r */
28020 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28021 delta = dis_AVX256_E_to_G_unary(
28022 uses_vvvv, vbi, pfx, delta,
28023 "vpabsd", math_PABS_YMM_pap4 );
28024 goto decode_success;
28025 }
sewardj97f72452012-05-23 05:56:53 +000028026 break;
28027
sewardj6fcd43e2012-06-14 08:51:35 +000028028 case 0x20:
28029 /* VPMOVSXBW xmm2/m64, xmm1 */
28030 /* VPMOVSXBW = VEX.128.66.0F38.WIG 20 /r */
28031 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28032 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
28033 True/*isAvx*/, False/*!xIsZ*/ );
28034 goto decode_success;
28035 }
sewardjcc3d2192013-03-27 11:37:33 +000028036 /* VPMOVSXBW xmm2/m128, ymm1 */
28037 /* VPMOVSXBW = VEX.256.66.0F38.WIG 20 /r */
28038 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28039 delta = dis_PMOVxXBW_256( vbi, pfx, delta, False/*!xIsZ*/ );
28040 goto decode_success;
28041 }
sewardj6fcd43e2012-06-14 08:51:35 +000028042 break;
28043
sewardj4ed05e02012-06-18 15:01:30 +000028044 case 0x21:
28045 /* VPMOVSXBD xmm2/m32, xmm1 */
28046 /* VPMOVSXBD = VEX.128.66.0F38.WIG 21 /r */
28047 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28048 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
28049 True/*isAvx*/, False/*!xIsZ*/ );
28050 goto decode_success;
28051 }
sewardjcc3d2192013-03-27 11:37:33 +000028052 /* VPMOVSXBD xmm2/m64, ymm1 */
28053 /* VPMOVSXBD = VEX.256.66.0F38.WIG 21 /r */
28054 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28055 delta = dis_PMOVxXBD_256( vbi, pfx, delta, False/*!xIsZ*/ );
28056 goto decode_success;
28057 }
sewardj4ed05e02012-06-18 15:01:30 +000028058 break;
28059
sewardj8516a1f2012-06-24 14:26:30 +000028060 case 0x22:
28061 /* VPMOVSXBQ xmm2/m16, xmm1 */
28062 /* VPMOVSXBQ = VEX.128.66.0F38.WIG 22 /r */
28063 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28064 delta = dis_PMOVSXBQ_128( vbi, pfx, delta, True/*isAvx*/ );
28065 goto decode_success;
28066 }
sewardjcc3d2192013-03-27 11:37:33 +000028067 /* VPMOVSXBQ xmm2/m32, ymm1 */
28068 /* VPMOVSXBQ = VEX.256.66.0F38.WIG 22 /r */
28069 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28070 delta = dis_PMOVSXBQ_256( vbi, pfx, delta );
28071 goto decode_success;
28072 }
sewardj8516a1f2012-06-24 14:26:30 +000028073 break;
28074
sewardj6fcd43e2012-06-14 08:51:35 +000028075 case 0x23:
28076 /* VPMOVSXWD xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 23 /r */
28077 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28078 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
28079 True/*isAvx*/, False/*!xIsZ*/ );
28080 goto decode_success;
28081 }
sewardjcc3d2192013-03-27 11:37:33 +000028082 /* VPMOVSXWD xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 23 /r */
28083 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28084 delta = dis_PMOVxXWD_256( vbi, pfx, delta, False/*!xIsZ*/ );
28085 goto decode_success;
28086 }
sewardj6fcd43e2012-06-14 08:51:35 +000028087 break;
28088
sewardj8516a1f2012-06-24 14:26:30 +000028089 case 0x24:
28090 /* VPMOVSXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 24 /r */
28091 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28092 delta = dis_PMOVSXWQ_128( vbi, pfx, delta, True/*isAvx*/ );
28093 goto decode_success;
28094 }
sewardjcc3d2192013-03-27 11:37:33 +000028095 /* VPMOVSXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 24 /r */
28096 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28097 delta = dis_PMOVSXWQ_256( vbi, pfx, delta );
28098 goto decode_success;
28099 }
sewardj8516a1f2012-06-24 14:26:30 +000028100 break;
28101
sewardj6fcd43e2012-06-14 08:51:35 +000028102 case 0x25:
28103 /* VPMOVSXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 25 /r */
28104 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28105 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
28106 True/*isAvx*/, False/*!xIsZ*/ );
28107 goto decode_success;
28108 }
sewardjcc3d2192013-03-27 11:37:33 +000028109 /* VPMOVSXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 25 /r */
28110 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28111 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, False/*!xIsZ*/ );
28112 goto decode_success;
28113 }
sewardj6fcd43e2012-06-14 08:51:35 +000028114 break;
28115
sewardj89378162012-06-24 12:12:20 +000028116 case 0x28:
28117 /* VPMULDQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 28 /r */
28118 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28119 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_complex(
28120 uses_vvvv, vbi, pfx, delta,
28121 "vpmuldq", math_PMULDQ_128 );
28122 goto decode_success;
28123 }
sewardjcc3d2192013-03-27 11:37:33 +000028124 /* VPMULDQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 28 /r */
28125 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28126 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28127 uses_vvvv, vbi, pfx, delta,
28128 "vpmuldq", math_PMULDQ_256 );
28129 goto decode_success;
28130 }
sewardj89378162012-06-24 12:12:20 +000028131 break;
28132
sewardj56c30312012-06-12 08:45:39 +000028133 case 0x29:
28134 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28135 /* VPCMPEQQ = VEX.NDS.128.66.0F38.WIG 29 /r */
28136 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28137 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28138 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x2 );
28139 goto decode_success;
28140 }
sewardjcc3d2192013-03-27 11:37:33 +000028141 /* VPCMPEQQ r/m, rV, r ::: r = rV `eq-by-64s` r/m */
28142 /* VPCMPEQQ = VEX.NDS.256.66.0F38.WIG 29 /r */
28143 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28144 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28145 uses_vvvv, vbi, pfx, delta, "vpcmpeqq", Iop_CmpEQ64x4 );
28146 goto decode_success;
28147 }
sewardj56c30312012-06-12 08:45:39 +000028148 break;
28149
sewardj8eb7ae82012-06-24 14:00:27 +000028150 case 0x2A:
28151 /* VMOVNTDQA m128, xmm1 = VEX.128.66.0F38.WIG 2A /r */
28152 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28153 && !epartIsReg(getUChar(delta))) {
28154 UChar modrm = getUChar(delta);
28155 UInt rD = gregOfRexRM(pfx, modrm);
28156 IRTemp tD = newTemp(Ity_V128);
28157 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28158 delta += alen;
28159 gen_SEGV_if_not_16_aligned(addr);
28160 assign(tD, loadLE(Ity_V128, mkexpr(addr)));
28161 DIP("vmovntdqa %s,%s\n", dis_buf, nameXMMReg(rD));
28162 putYMMRegLoAndZU(rD, mkexpr(tD));
28163 goto decode_success;
28164 }
sewardjcc3d2192013-03-27 11:37:33 +000028165 /* VMOVNTDQA m256, ymm1 = VEX.256.66.0F38.WIG 2A /r */
28166 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28167 && !epartIsReg(getUChar(delta))) {
28168 UChar modrm = getUChar(delta);
28169 UInt rD = gregOfRexRM(pfx, modrm);
28170 IRTemp tD = newTemp(Ity_V256);
28171 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28172 delta += alen;
28173 gen_SEGV_if_not_32_aligned(addr);
28174 assign(tD, loadLE(Ity_V256, mkexpr(addr)));
28175 DIP("vmovntdqa %s,%s\n", dis_buf, nameYMMReg(rD));
28176 putYMMReg(rD, mkexpr(tD));
28177 goto decode_success;
28178 }
sewardj8eb7ae82012-06-24 14:00:27 +000028179 break;
28180
sewardj8516a1f2012-06-24 14:26:30 +000028181 case 0x2B:
28182 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28183 /* VPACKUSDW = VEX.NDS.128.66.0F38.WIG 2B /r */
28184 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28185 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG(
28186 uses_vvvv, vbi, pfx, delta, "vpackusdw",
28187 Iop_QNarrowBin32Sto16Ux8, NULL,
28188 False/*!invertLeftArg*/, True/*swapArgs*/ );
28189 goto decode_success;
28190 }
sewardjcc3d2192013-03-27 11:37:33 +000028191 /* VPACKUSDW r/m, rV, r ::: r = QNarrowBin32Sto16Ux8(rV, r/m) */
28192 /* VPACKUSDW = VEX.NDS.256.66.0F38.WIG 2B /r */
28193 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28194 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28195 uses_vvvv, vbi, pfx, delta, "vpackusdw",
28196 math_VPACKUSDW_YMM );
28197 goto decode_success;
28198 }
28199 break;
28200
28201 case 0x2C:
28202 /* VMASKMOVPS m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 2C /r */
28203 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28204 && !epartIsReg(getUChar(delta))) {
28205 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
28206 /*!isYMM*/False, Ity_I32 );
28207 goto decode_success;
28208 }
28209 /* VMASKMOVPS m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 2C /r */
28210 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28211 && !epartIsReg(getUChar(delta))) {
28212 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovps",
28213 /*isYMM*/True, Ity_I32 );
28214 goto decode_success;
28215 }
28216 break;
28217
28218 case 0x2D:
28219 /* VMASKMOVPD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.WIG 2D /r */
28220 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28221 && !epartIsReg(getUChar(delta))) {
28222 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
28223 /*!isYMM*/False, Ity_I64 );
28224 goto decode_success;
28225 }
28226 /* VMASKMOVPD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.WIG 2D /r */
28227 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28228 && !epartIsReg(getUChar(delta))) {
28229 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vmaskmovpd",
28230 /*isYMM*/True, Ity_I64 );
28231 goto decode_success;
28232 }
sewardj8516a1f2012-06-24 14:26:30 +000028233 break;
28234
sewardjc4530ae2012-05-21 10:18:49 +000028235 case 0x30:
28236 /* VPMOVZXBW xmm2/m64, xmm1 */
28237 /* VPMOVZXBW = VEX.128.66.0F38.WIG 30 /r */
28238 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardj6fcd43e2012-06-14 08:51:35 +000028239 delta = dis_PMOVxXBW_128( vbi, pfx, delta,
28240 True/*isAvx*/, True/*xIsZ*/ );
sewardjc4530ae2012-05-21 10:18:49 +000028241 goto decode_success;
28242 }
sewardjcc3d2192013-03-27 11:37:33 +000028243 /* VPMOVZXBW xmm2/m128, ymm1 */
28244 /* VPMOVZXBW = VEX.256.66.0F38.WIG 30 /r */
28245 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28246 delta = dis_PMOVxXBW_256( vbi, pfx, delta, True/*xIsZ*/ );
28247 goto decode_success;
28248 }
sewardjc4530ae2012-05-21 10:18:49 +000028249 break;
28250
sewardj4ed05e02012-06-18 15:01:30 +000028251 case 0x31:
28252 /* VPMOVZXBD xmm2/m32, xmm1 */
28253 /* VPMOVZXBD = VEX.128.66.0F38.WIG 31 /r */
28254 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28255 delta = dis_PMOVxXBD_128( vbi, pfx, delta,
28256 True/*isAvx*/, True/*xIsZ*/ );
28257 goto decode_success;
28258 }
sewardjcc3d2192013-03-27 11:37:33 +000028259 /* VPMOVZXBD xmm2/m64, ymm1 */
28260 /* VPMOVZXBD = VEX.256.66.0F38.WIG 31 /r */
28261 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28262 delta = dis_PMOVxXBD_256( vbi, pfx, delta, True/*xIsZ*/ );
28263 goto decode_success;
28264 }
sewardj4ed05e02012-06-18 15:01:30 +000028265 break;
28266
sewardj8516a1f2012-06-24 14:26:30 +000028267 case 0x32:
28268 /* VPMOVZXBQ xmm2/m16, xmm1 */
28269 /* VPMOVZXBQ = VEX.128.66.0F38.WIG 32 /r */
28270 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28271 delta = dis_PMOVZXBQ_128( vbi, pfx, delta, True/*isAvx*/ );
28272 goto decode_success;
28273 }
sewardjcc3d2192013-03-27 11:37:33 +000028274 /* VPMOVZXBQ xmm2/m32, ymm1 */
28275 /* VPMOVZXBQ = VEX.256.66.0F38.WIG 32 /r */
28276 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28277 delta = dis_PMOVZXBQ_256( vbi, pfx, delta );
28278 goto decode_success;
28279 }
sewardj8516a1f2012-06-24 14:26:30 +000028280 break;
28281
sewardjc4530ae2012-05-21 10:18:49 +000028282 case 0x33:
28283 /* VPMOVZXWD xmm2/m64, xmm1 */
28284 /* VPMOVZXWD = VEX.128.66.0F38.WIG 33 /r */
28285 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardj6fcd43e2012-06-14 08:51:35 +000028286 delta = dis_PMOVxXWD_128( vbi, pfx, delta,
28287 True/*isAvx*/, True/*xIsZ*/ );
sewardjc4530ae2012-05-21 10:18:49 +000028288 goto decode_success;
28289 }
sewardjcc3d2192013-03-27 11:37:33 +000028290 /* VPMOVZXWD xmm2/m128, ymm1 */
28291 /* VPMOVZXWD = VEX.256.66.0F38.WIG 33 /r */
28292 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28293 delta = dis_PMOVxXWD_256( vbi, pfx, delta, True/*xIsZ*/ );
28294 goto decode_success;
28295 }
sewardjc4530ae2012-05-21 10:18:49 +000028296 break;
28297
sewardj8516a1f2012-06-24 14:26:30 +000028298 case 0x34:
28299 /* VPMOVZXWQ xmm2/m32, xmm1 = VEX.128.66.0F38.WIG 34 /r */
28300 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28301 delta = dis_PMOVZXWQ_128( vbi, pfx, delta, True/*isAvx*/ );
28302 goto decode_success;
28303 }
sewardjcc3d2192013-03-27 11:37:33 +000028304 /* VPMOVZXWQ xmm2/m64, ymm1 = VEX.256.66.0F38.WIG 34 /r */
28305 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28306 delta = dis_PMOVZXWQ_256( vbi, pfx, delta );
28307 goto decode_success;
28308 }
sewardj8516a1f2012-06-24 14:26:30 +000028309 break;
28310
28311 case 0x35:
28312 /* VPMOVZXDQ xmm2/m64, xmm1 = VEX.128.66.0F38.WIG 35 /r */
28313 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28314 delta = dis_PMOVxXDQ_128( vbi, pfx, delta,
28315 True/*isAvx*/, True/*xIsZ*/ );
28316 goto decode_success;
28317 }
sewardjcc3d2192013-03-27 11:37:33 +000028318 /* VPMOVZXDQ xmm2/m128, ymm1 = VEX.256.66.0F38.WIG 35 /r */
28319 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28320 delta = dis_PMOVxXDQ_256( vbi, pfx, delta, True/*xIsZ*/ );
28321 goto decode_success;
28322 }
28323 break;
28324
28325 case 0x36:
28326 /* VPERMD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 36 /r */
28327 if (have66noF2noF3(pfx)
28328 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
28329 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_complex(
28330 uses_vvvv, vbi, pfx, delta, "vpermd", math_VPERMD );
28331 goto decode_success;
28332 }
sewardj8516a1f2012-06-24 14:26:30 +000028333 break;
28334
sewardj56c30312012-06-12 08:45:39 +000028335 case 0x37:
28336 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
28337 /* VPCMPGTQ = VEX.NDS.128.66.0F38.WIG 37 /r */
28338 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28339 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28340 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx2 );
28341 goto decode_success;
28342 }
sewardjcc3d2192013-03-27 11:37:33 +000028343 /* VPCMPGTQ r/m, rV, r ::: r = rV `>s-by-64s` r/m */
28344 /* VPCMPGTQ = VEX.NDS.256.66.0F38.WIG 37 /r */
28345 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28346 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28347 uses_vvvv, vbi, pfx, delta, "vpcmpgtq", Iop_CmpGT64Sx4 );
28348 goto decode_success;
28349 }
sewardj56c30312012-06-12 08:45:39 +000028350 break;
28351
sewardje8a7eb72012-06-12 14:59:17 +000028352 case 0x38:
28353 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
28354 /* VPMINSB = VEX.NDS.128.66.0F38.WIG 38 /r */
28355 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28356 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28357 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx16 );
28358 goto decode_success;
28359 }
sewardjcc3d2192013-03-27 11:37:33 +000028360 /* VPMINSB r/m, rV, r ::: r = min-signed-8s(rV, r/m) */
28361 /* VPMINSB = VEX.NDS.256.66.0F38.WIG 38 /r */
28362 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28363 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28364 uses_vvvv, vbi, pfx, delta, "vpminsb", Iop_Min8Sx32 );
28365 goto decode_success;
28366 }
sewardje8a7eb72012-06-12 14:59:17 +000028367 break;
28368
sewardjc4530ae2012-05-21 10:18:49 +000028369 case 0x39:
sewardj251b59e2012-05-25 13:51:07 +000028370 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
sewardjc4530ae2012-05-21 10:18:49 +000028371 /* VPMINSD = VEX.NDS.128.66.0F38.WIG 39 /r */
28372 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28373 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28374 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx4 );
28375 goto decode_success;
28376 }
sewardjcc3d2192013-03-27 11:37:33 +000028377 /* VPMINSD r/m, rV, r ::: r = min-signed-32s(rV, r/m) */
28378 /* VPMINSD = VEX.NDS.256.66.0F38.WIG 39 /r */
28379 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28380 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28381 uses_vvvv, vbi, pfx, delta, "vpminsd", Iop_Min32Sx8 );
28382 goto decode_success;
28383 }
sewardjc4530ae2012-05-21 10:18:49 +000028384 break;
28385
sewardje8a7eb72012-06-12 14:59:17 +000028386 case 0x3A:
28387 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
28388 /* VPMINUW = VEX.NDS.128.66.0F38.WIG 3A /r */
28389 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28390 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28391 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux8 );
28392 goto decode_success;
28393 }
sewardjcc3d2192013-03-27 11:37:33 +000028394 /* VPMINUW r/m, rV, r ::: r = min-unsigned-16s(rV, r/m) */
28395 /* VPMINUW = VEX.NDS.256.66.0F38.WIG 3A /r */
28396 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28397 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28398 uses_vvvv, vbi, pfx, delta, "vpminuw", Iop_Min16Ux16 );
28399 goto decode_success;
28400 }
sewardje8a7eb72012-06-12 14:59:17 +000028401 break;
28402
28403 case 0x3B:
28404 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
28405 /* VPMINUD = VEX.NDS.128.66.0F38.WIG 3B /r */
28406 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28407 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28408 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux4 );
28409 goto decode_success;
28410 }
sewardjcc3d2192013-03-27 11:37:33 +000028411 /* VPMINUD r/m, rV, r ::: r = min-unsigned-32s(rV, r/m) */
28412 /* VPMINUD = VEX.NDS.256.66.0F38.WIG 3B /r */
28413 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28414 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28415 uses_vvvv, vbi, pfx, delta, "vpminud", Iop_Min32Ux8 );
28416 goto decode_success;
28417 }
sewardje8a7eb72012-06-12 14:59:17 +000028418 break;
28419
28420 case 0x3C:
28421 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
28422 /* VPMAXSB = VEX.NDS.128.66.0F38.WIG 3C /r */
28423 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28424 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28425 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx16 );
28426 goto decode_success;
28427 }
sewardjcc3d2192013-03-27 11:37:33 +000028428 /* VPMAXSB r/m, rV, r ::: r = max-signed-8s(rV, r/m) */
28429 /* VPMAXSB = VEX.NDS.256.66.0F38.WIG 3C /r */
28430 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28431 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28432 uses_vvvv, vbi, pfx, delta, "vpmaxsb", Iop_Max8Sx32 );
28433 goto decode_success;
28434 }
sewardje8a7eb72012-06-12 14:59:17 +000028435 break;
28436
sewardjc4530ae2012-05-21 10:18:49 +000028437 case 0x3D:
sewardj251b59e2012-05-25 13:51:07 +000028438 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
sewardjc4530ae2012-05-21 10:18:49 +000028439 /* VPMAXSD = VEX.NDS.128.66.0F38.WIG 3D /r */
28440 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28441 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28442 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx4 );
28443 goto decode_success;
28444 }
sewardjcc3d2192013-03-27 11:37:33 +000028445 /* VPMAXSD r/m, rV, r ::: r = max-signed-32s(rV, r/m) */
28446 /* VPMAXSD = VEX.NDS.256.66.0F38.WIG 3D /r */
28447 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28448 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28449 uses_vvvv, vbi, pfx, delta, "vpmaxsd", Iop_Max32Sx8 );
28450 goto decode_success;
28451 }
sewardjc4530ae2012-05-21 10:18:49 +000028452 break;
28453
sewardje8a7eb72012-06-12 14:59:17 +000028454 case 0x3E:
28455 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
28456 /* VPMAXUW = VEX.NDS.128.66.0F38.WIG 3E /r */
28457 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28458 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28459 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux8 );
28460 goto decode_success;
28461 }
sewardjcc3d2192013-03-27 11:37:33 +000028462 /* VPMAXUW r/m, rV, r ::: r = max-unsigned-16s(rV, r/m) */
28463 /* VPMAXUW = VEX.NDS.256.66.0F38.WIG 3E /r */
28464 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28465 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28466 uses_vvvv, vbi, pfx, delta, "vpmaxuw", Iop_Max16Ux16 );
28467 goto decode_success;
28468 }
sewardje8a7eb72012-06-12 14:59:17 +000028469 break;
28470
28471 case 0x3F:
28472 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
28473 /* VPMAXUD = VEX.NDS.128.66.0F38.WIG 3F /r */
28474 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28475 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28476 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux4 );
28477 goto decode_success;
28478 }
sewardjcc3d2192013-03-27 11:37:33 +000028479 /* VPMAXUD r/m, rV, r ::: r = max-unsigned-32s(rV, r/m) */
28480 /* VPMAXUD = VEX.NDS.256.66.0F38.WIG 3F /r */
28481 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28482 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28483 uses_vvvv, vbi, pfx, delta, "vpmaxud", Iop_Max32Ux8 );
28484 goto decode_success;
28485 }
sewardje8a7eb72012-06-12 14:59:17 +000028486 break;
28487
28488 case 0x40:
28489 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
28490 /* VPMULLD = VEX.NDS.128.66.0F38.WIG 40 /r */
28491 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28492 delta = dis_VEX_NDS_128_AnySimdPfx_0F_WIG_simple(
28493 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x4 );
28494 goto decode_success;
28495 }
sewardjcc3d2192013-03-27 11:37:33 +000028496 /* VPMULLD r/m, rV, r ::: r = mul-32s(rV, r/m) */
28497 /* VPMULLD = VEX.NDS.256.66.0F38.WIG 40 /r */
28498 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
28499 delta = dis_VEX_NDS_256_AnySimdPfx_0F_WIG_simple(
28500 uses_vvvv, vbi, pfx, delta, "vpmulld", Iop_Mul32x8 );
28501 goto decode_success;
28502 }
sewardje8a7eb72012-06-12 14:59:17 +000028503 break;
28504
28505 case 0x41:
28506 /* VPHMINPOSUW xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 41 /r */
28507 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
28508 delta = dis_PHMINPOSUW_128( vbi, pfx, delta, True/*isAvx*/ );
28509 goto decode_success;
28510 }
28511 break;
28512
sewardjcc3d2192013-03-27 11:37:33 +000028513 case 0x45:
28514 /* VPSRLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 45 /r */
28515 /* VPSRLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 45 /r */
28516 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
28517 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvd",
28518 Iop_Shr32, 1==getVexL(pfx) );
28519 *uses_vvvv = True;
28520 goto decode_success;
28521 }
28522 /* VPSRLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 45 /r */
28523 /* VPSRLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 45 /r */
28524 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
28525 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsrlvq",
28526 Iop_Shr64, 1==getVexL(pfx) );
28527 *uses_vvvv = True;
28528 goto decode_success;
28529 }
28530 break;
28531
28532 case 0x46:
28533 /* VPSRAVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 46 /r */
28534 /* VPSRAVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 46 /r */
28535 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
28536 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsravd",
28537 Iop_Sar32, 1==getVexL(pfx) );
28538 *uses_vvvv = True;
28539 goto decode_success;
28540 }
28541 break;
28542
28543 case 0x47:
28544 /* VPSLLVD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 47 /r */
28545 /* VPSLLVD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 47 /r */
28546 if (have66noF2noF3(pfx) && 0==getRexW(pfx)/*W0*/) {
28547 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvd",
28548 Iop_Shl32, 1==getVexL(pfx) );
28549 *uses_vvvv = True;
28550 goto decode_success;
28551 }
28552 /* VPSLLVQ xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 47 /r */
28553 /* VPSLLVQ ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 47 /r */
28554 if (have66noF2noF3(pfx) && 1==getRexW(pfx)/*W1*/) {
28555 delta = dis_AVX_var_shiftV_byE( vbi, pfx, delta, "vpsllvq",
28556 Iop_Shl64, 1==getVexL(pfx) );
28557 *uses_vvvv = True;
28558 goto decode_success;
28559 }
28560 break;
28561
28562 case 0x58:
28563 /* VPBROADCASTD xmm2/m32, xmm1 = VEX.128.66.0F38.W0 58 /r */
28564 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28565 && 0==getRexW(pfx)/*W0*/) {
28566 UChar modrm = getUChar(delta);
28567 UInt rG = gregOfRexRM(pfx, modrm);
28568 IRTemp t32 = newTemp(Ity_I32);
28569 if (epartIsReg(modrm)) {
28570 UInt rE = eregOfRexRM(pfx, modrm);
28571 delta++;
28572 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
28573 assign(t32, getXMMRegLane32(rE, 0));
28574 } else {
28575 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28576 delta += alen;
28577 DIP("vpbroadcastd %s,%s\n", dis_buf, nameXMMReg(rG));
28578 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
28579 }
28580 IRTemp t64 = newTemp(Ity_I64);
28581 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28582 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28583 putYMMRegLoAndZU(rG, res);
28584 goto decode_success;
28585 }
28586 /* VPBROADCASTD xmm2/m32, ymm1 = VEX.256.66.0F38.W0 58 /r */
28587 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28588 && 0==getRexW(pfx)/*W0*/) {
28589 UChar modrm = getUChar(delta);
28590 UInt rG = gregOfRexRM(pfx, modrm);
28591 IRTemp t32 = newTemp(Ity_I32);
28592 if (epartIsReg(modrm)) {
28593 UInt rE = eregOfRexRM(pfx, modrm);
28594 delta++;
28595 DIP("vpbroadcastd %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28596 assign(t32, getXMMRegLane32(rE, 0));
28597 } else {
28598 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28599 delta += alen;
28600 DIP("vpbroadcastd %s,%s\n", dis_buf, nameYMMReg(rG));
28601 assign(t32, loadLE(Ity_I32, mkexpr(addr)));
28602 }
28603 IRTemp t64 = newTemp(Ity_I64);
28604 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28605 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28606 mkexpr(t64), mkexpr(t64));
28607 putYMMReg(rG, res);
28608 goto decode_success;
28609 }
28610 break;
28611
28612 case 0x59:
28613 /* VPBROADCASTQ xmm2/m64, xmm1 = VEX.128.66.0F38.W0 59 /r */
28614 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28615 && 0==getRexW(pfx)/*W0*/) {
28616 UChar modrm = getUChar(delta);
28617 UInt rG = gregOfRexRM(pfx, modrm);
28618 IRTemp t64 = newTemp(Ity_I64);
28619 if (epartIsReg(modrm)) {
28620 UInt rE = eregOfRexRM(pfx, modrm);
28621 delta++;
28622 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
28623 assign(t64, getXMMRegLane64(rE, 0));
28624 } else {
28625 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28626 delta += alen;
28627 DIP("vpbroadcastq %s,%s\n", dis_buf, nameXMMReg(rG));
28628 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
28629 }
28630 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28631 putYMMRegLoAndZU(rG, res);
28632 goto decode_success;
28633 }
28634 /* VPBROADCASTQ xmm2/m64, ymm1 = VEX.256.66.0F38.W0 59 /r */
28635 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28636 && 0==getRexW(pfx)/*W0*/) {
28637 UChar modrm = getUChar(delta);
28638 UInt rG = gregOfRexRM(pfx, modrm);
28639 IRTemp t64 = newTemp(Ity_I64);
28640 if (epartIsReg(modrm)) {
28641 UInt rE = eregOfRexRM(pfx, modrm);
28642 delta++;
28643 DIP("vpbroadcastq %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28644 assign(t64, getXMMRegLane64(rE, 0));
28645 } else {
28646 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28647 delta += alen;
28648 DIP("vpbroadcastq %s,%s\n", dis_buf, nameYMMReg(rG));
28649 assign(t64, loadLE(Ity_I64, mkexpr(addr)));
28650 }
28651 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28652 mkexpr(t64), mkexpr(t64));
28653 putYMMReg(rG, res);
28654 goto decode_success;
28655 }
28656 break;
28657
28658 case 0x5A:
28659 /* VBROADCASTI128 m128, ymm1 = VEX.256.66.0F38.WIG 5A /r */
28660 if (have66noF2noF3(pfx)
28661 && 1==getVexL(pfx)/*256*/
28662 && !epartIsReg(getUChar(delta))) {
28663 UChar modrm = getUChar(delta);
28664 UInt rG = gregOfRexRM(pfx, modrm);
28665 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28666 delta += alen;
28667 DIP("vbroadcasti128 %s,%s\n", dis_buf, nameYMMReg(rG));
28668 IRTemp t128 = newTemp(Ity_V128);
28669 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
28670 putYMMReg( rG, binop(Iop_V128HLtoV256, mkexpr(t128), mkexpr(t128)) );
28671 goto decode_success;
28672 }
28673 break;
28674
28675 case 0x78:
28676 /* VPBROADCASTB xmm2/m8, xmm1 = VEX.128.66.0F38.W0 78 /r */
28677 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28678 && 0==getRexW(pfx)/*W0*/) {
28679 UChar modrm = getUChar(delta);
28680 UInt rG = gregOfRexRM(pfx, modrm);
28681 IRTemp t8 = newTemp(Ity_I8);
28682 if (epartIsReg(modrm)) {
28683 UInt rE = eregOfRexRM(pfx, modrm);
sewardj07ab40d2014-11-07 10:08:21 +000028684 delta++;
sewardjcc3d2192013-03-27 11:37:33 +000028685 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
28686 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0)));
28687 } else {
28688 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28689 delta += alen;
28690 DIP("vpbroadcastb %s,%s\n", dis_buf, nameXMMReg(rG));
28691 assign(t8, loadLE(Ity_I8, mkexpr(addr)));
28692 }
28693 IRTemp t16 = newTemp(Ity_I16);
28694 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8)));
28695 IRTemp t32 = newTemp(Ity_I32);
28696 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
28697 IRTemp t64 = newTemp(Ity_I64);
28698 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28699 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28700 putYMMRegLoAndZU(rG, res);
28701 goto decode_success;
28702 }
28703 /* VPBROADCASTB xmm2/m8, ymm1 = VEX.256.66.0F38.W0 78 /r */
28704 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28705 && 0==getRexW(pfx)/*W0*/) {
28706 UChar modrm = getUChar(delta);
28707 UInt rG = gregOfRexRM(pfx, modrm);
28708 IRTemp t8 = newTemp(Ity_I8);
28709 if (epartIsReg(modrm)) {
28710 UInt rE = eregOfRexRM(pfx, modrm);
sewardj07ab40d2014-11-07 10:08:21 +000028711 delta++;
sewardjcc3d2192013-03-27 11:37:33 +000028712 DIP("vpbroadcastb %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28713 assign(t8, unop(Iop_32to8, getXMMRegLane32(rE, 0)));
28714 } else {
28715 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28716 delta += alen;
28717 DIP("vpbroadcastb %s,%s\n", dis_buf, nameYMMReg(rG));
28718 assign(t8, loadLE(Ity_I8, mkexpr(addr)));
28719 }
28720 IRTemp t16 = newTemp(Ity_I16);
28721 assign(t16, binop(Iop_8HLto16, mkexpr(t8), mkexpr(t8)));
28722 IRTemp t32 = newTemp(Ity_I32);
28723 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
28724 IRTemp t64 = newTemp(Ity_I64);
28725 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28726 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28727 mkexpr(t64), mkexpr(t64));
28728 putYMMReg(rG, res);
28729 goto decode_success;
28730 }
28731 break;
28732
28733 case 0x79:
28734 /* VPBROADCASTW xmm2/m16, xmm1 = VEX.128.66.0F38.W0 79 /r */
28735 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28736 && 0==getRexW(pfx)/*W0*/) {
28737 UChar modrm = getUChar(delta);
28738 UInt rG = gregOfRexRM(pfx, modrm);
28739 IRTemp t16 = newTemp(Ity_I16);
28740 if (epartIsReg(modrm)) {
28741 UInt rE = eregOfRexRM(pfx, modrm);
sewardj07ab40d2014-11-07 10:08:21 +000028742 delta++;
sewardjcc3d2192013-03-27 11:37:33 +000028743 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameXMMReg(rG));
28744 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0)));
28745 } else {
28746 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28747 delta += alen;
28748 DIP("vpbroadcastw %s,%s\n", dis_buf, nameXMMReg(rG));
28749 assign(t16, loadLE(Ity_I16, mkexpr(addr)));
28750 }
28751 IRTemp t32 = newTemp(Ity_I32);
28752 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
28753 IRTemp t64 = newTemp(Ity_I64);
28754 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28755 IRExpr* res = binop(Iop_64HLtoV128, mkexpr(t64), mkexpr(t64));
28756 putYMMRegLoAndZU(rG, res);
28757 goto decode_success;
28758 }
28759 /* VPBROADCASTW xmm2/m16, ymm1 = VEX.256.66.0F38.W0 79 /r */
28760 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28761 && 0==getRexW(pfx)/*W0*/) {
28762 UChar modrm = getUChar(delta);
28763 UInt rG = gregOfRexRM(pfx, modrm);
28764 IRTemp t16 = newTemp(Ity_I16);
28765 if (epartIsReg(modrm)) {
28766 UInt rE = eregOfRexRM(pfx, modrm);
sewardj07ab40d2014-11-07 10:08:21 +000028767 delta++;
sewardjcc3d2192013-03-27 11:37:33 +000028768 DIP("vpbroadcastw %s,%s\n", nameXMMReg(rE), nameYMMReg(rG));
28769 assign(t16, unop(Iop_32to16, getXMMRegLane32(rE, 0)));
28770 } else {
28771 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
28772 delta += alen;
28773 DIP("vpbroadcastw %s,%s\n", dis_buf, nameYMMReg(rG));
28774 assign(t16, loadLE(Ity_I16, mkexpr(addr)));
28775 }
28776 IRTemp t32 = newTemp(Ity_I32);
28777 assign(t32, binop(Iop_16HLto32, mkexpr(t16), mkexpr(t16)));
28778 IRTemp t64 = newTemp(Ity_I64);
28779 assign(t64, binop(Iop_32HLto64, mkexpr(t32), mkexpr(t32)));
28780 IRExpr* res = IRExpr_Qop(Iop_64x4toV256, mkexpr(t64), mkexpr(t64),
28781 mkexpr(t64), mkexpr(t64));
28782 putYMMReg(rG, res);
28783 goto decode_success;
28784 }
28785 break;
28786
28787 case 0x8C:
28788 /* VPMASKMOVD m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 8C /r */
28789 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28790 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28791 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
28792 /*!isYMM*/False, Ity_I32 );
28793 goto decode_success;
28794 }
28795 /* VPMASKMOVD m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 8C /r */
28796 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28797 && 0==getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28798 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovd",
28799 /*isYMM*/True, Ity_I32 );
28800 goto decode_success;
28801 }
28802 /* VPMASKMOVQ m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W1 8C /r */
28803 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28804 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28805 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
28806 /*!isYMM*/False, Ity_I64 );
28807 goto decode_success;
28808 }
28809 /* VPMASKMOVQ m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W1 8C /r */
28810 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28811 && 1==getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28812 delta = dis_VMASKMOV_load( uses_vvvv, vbi, pfx, delta, "vpmaskmovq",
28813 /*isYMM*/True, Ity_I64 );
28814 goto decode_success;
28815 }
28816 break;
28817
28818 case 0x90:
28819 /* VPGATHERDD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 90 /r */
28820 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28821 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28822 Long delta0 = delta;
28823 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd",
28824 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 );
28825 if (delta != delta0)
28826 goto decode_success;
28827 }
28828 /* VPGATHERDD ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 90 /r */
28829 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28830 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28831 Long delta0 = delta;
28832 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdd",
28833 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 );
28834 if (delta != delta0)
28835 goto decode_success;
28836 }
28837 /* VPGATHERDQ xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 90 /r */
28838 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28839 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28840 Long delta0 = delta;
28841 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq",
28842 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 );
28843 if (delta != delta0)
28844 goto decode_success;
28845 }
28846 /* VPGATHERDQ ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 90 /r */
28847 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28848 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28849 Long delta0 = delta;
28850 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherdq",
28851 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 );
28852 if (delta != delta0)
28853 goto decode_success;
28854 }
28855 break;
28856
28857 case 0x91:
28858 /* VPGATHERQD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 91 /r */
28859 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28860 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28861 Long delta0 = delta;
28862 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd",
28863 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 );
28864 if (delta != delta0)
28865 goto decode_success;
28866 }
28867 /* VPGATHERQD xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 91 /r */
28868 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28869 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28870 Long delta0 = delta;
28871 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqd",
28872 /*isYMM*/True, /*isVM64x*/True, Ity_I32 );
28873 if (delta != delta0)
28874 goto decode_success;
28875 }
28876 /* VPGATHERQQ xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 91 /r */
28877 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28878 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28879 Long delta0 = delta;
28880 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq",
28881 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 );
28882 if (delta != delta0)
28883 goto decode_success;
28884 }
28885 /* VPGATHERQQ ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 91 /r */
28886 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28887 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28888 Long delta0 = delta;
28889 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vpgatherqq",
28890 /*isYMM*/True, /*isVM64x*/True, Ity_I64 );
28891 if (delta != delta0)
28892 goto decode_success;
28893 }
28894 break;
28895
28896 case 0x92:
28897 /* VGATHERDPS xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W0 92 /r */
28898 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28899 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28900 Long delta0 = delta;
28901 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps",
28902 /*!isYMM*/False, /*!isVM64x*/False, Ity_I32 );
28903 if (delta != delta0)
28904 goto decode_success;
28905 }
28906 /* VGATHERDPS ymm2, vm32y, ymm1 = VEX.DDS.256.66.0F38.W0 92 /r */
28907 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28908 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28909 Long delta0 = delta;
28910 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdps",
28911 /*isYMM*/True, /*!isVM64x*/False, Ity_I32 );
28912 if (delta != delta0)
28913 goto decode_success;
28914 }
28915 /* VGATHERDPD xmm2, vm32x, xmm1 = VEX.DDS.128.66.0F38.W1 92 /r */
28916 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28917 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28918 Long delta0 = delta;
28919 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd",
28920 /*!isYMM*/False, /*!isVM64x*/False, Ity_I64 );
28921 if (delta != delta0)
28922 goto decode_success;
28923 }
28924 /* VGATHERDPD ymm2, vm32x, ymm1 = VEX.DDS.256.66.0F38.W1 92 /r */
28925 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28926 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28927 Long delta0 = delta;
28928 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherdpd",
28929 /*isYMM*/True, /*!isVM64x*/False, Ity_I64 );
28930 if (delta != delta0)
28931 goto decode_success;
28932 }
28933 break;
28934
28935 case 0x93:
28936 /* VGATHERQPS xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W0 93 /r */
28937 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28938 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28939 Long delta0 = delta;
28940 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps",
28941 /*!isYMM*/False, /*isVM64x*/True, Ity_I32 );
28942 if (delta != delta0)
28943 goto decode_success;
28944 }
28945 /* VGATHERQPS xmm2, vm64y, xmm1 = VEX.DDS.256.66.0F38.W0 93 /r */
28946 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28947 && 0 == getRexW(pfx)/*W0*/ && !epartIsReg(getUChar(delta))) {
28948 Long delta0 = delta;
28949 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqps",
28950 /*isYMM*/True, /*isVM64x*/True, Ity_I32 );
28951 if (delta != delta0)
28952 goto decode_success;
28953 }
28954 /* VGATHERQPD xmm2, vm64x, xmm1 = VEX.DDS.128.66.0F38.W1 93 /r */
28955 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/
28956 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28957 Long delta0 = delta;
28958 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd",
28959 /*!isYMM*/False, /*isVM64x*/True, Ity_I64 );
28960 if (delta != delta0)
28961 goto decode_success;
28962 }
28963 /* VGATHERQPD ymm2, vm64y, ymm1 = VEX.DDS.256.66.0F38.W1 93 /r */
28964 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
28965 && 1 == getRexW(pfx)/*W1*/ && !epartIsReg(getUChar(delta))) {
28966 Long delta0 = delta;
28967 delta = dis_VGATHER( uses_vvvv, vbi, pfx, delta, "vgatherqpd",
28968 /*isYMM*/True, /*isVM64x*/True, Ity_I64 );
28969 if (delta != delta0)
28970 goto decode_success;
28971 }
28972 break;
28973
28974 case 0x96 ... 0x9F:
28975 case 0xA6 ... 0xAF:
28976 case 0xB6 ... 0xBF:
28977 /* VFMADDSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 96 /r */
28978 /* VFMADDSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 96 /r */
28979 /* VFMADDSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 96 /r */
28980 /* VFMADDSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 96 /r */
28981 /* VFMSUBADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 97 /r */
28982 /* VFMSUBADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 97 /r */
28983 /* VFMSUBADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 97 /r */
28984 /* VFMSUBADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 97 /r */
28985 /* VFMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 98 /r */
28986 /* VFMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 98 /r */
28987 /* VFMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 98 /r */
28988 /* VFMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 98 /r */
28989 /* VFMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 99 /r */
28990 /* VFMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 99 /r */
28991 /* VFMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9A /r */
28992 /* VFMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9A /r */
28993 /* VFMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9A /r */
28994 /* VFMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9A /r */
28995 /* VFMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9B /r */
28996 /* VFMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9B /r */
28997 /* VFNMADD132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9C /r */
28998 /* VFNMADD132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9C /r */
28999 /* VFNMADD132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9C /r */
29000 /* VFNMADD132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9C /r */
29001 /* VFNMADD132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9D /r */
29002 /* VFNMADD132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9D /r */
29003 /* VFNMSUB132PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 9E /r */
29004 /* VFNMSUB132PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 9E /r */
29005 /* VFNMSUB132PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 9E /r */
29006 /* VFNMSUB132PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 9E /r */
29007 /* VFNMSUB132SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 9F /r */
29008 /* VFNMSUB132SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 9F /r */
29009 /* VFMADDSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A6 /r */
29010 /* VFMADDSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A6 /r */
29011 /* VFMADDSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A6 /r */
29012 /* VFMADDSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A6 /r */
29013 /* VFMSUBADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A7 /r */
29014 /* VFMSUBADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A7 /r */
29015 /* VFMSUBADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A7 /r */
29016 /* VFMSUBADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A7 /r */
29017 /* VFMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 A8 /r */
29018 /* VFMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 A8 /r */
29019 /* VFMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 A8 /r */
29020 /* VFMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 A8 /r */
29021 /* VFMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 A9 /r */
29022 /* VFMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 A9 /r */
29023 /* VFMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AA /r */
29024 /* VFMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AA /r */
29025 /* VFMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AA /r */
29026 /* VFMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AA /r */
29027 /* VFMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AB /r */
29028 /* VFMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AB /r */
29029 /* VFNMADD213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AC /r */
29030 /* VFNMADD213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AC /r */
29031 /* VFNMADD213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AC /r */
29032 /* VFNMADD213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AC /r */
29033 /* VFNMADD213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AD /r */
29034 /* VFNMADD213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AD /r */
29035 /* VFNMSUB213PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 AE /r */
29036 /* VFNMSUB213PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 AE /r */
29037 /* VFNMSUB213PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 AE /r */
29038 /* VFNMSUB213PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 AE /r */
29039 /* VFNMSUB213SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 AF /r */
29040 /* VFNMSUB213SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 AF /r */
29041 /* VFMADDSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B6 /r */
29042 /* VFMADDSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B6 /r */
29043 /* VFMADDSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B6 /r */
29044 /* VFMADDSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B6 /r */
29045 /* VFMSUBADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B7 /r */
29046 /* VFMSUBADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B7 /r */
29047 /* VFMSUBADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B7 /r */
29048 /* VFMSUBADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B7 /r */
29049 /* VFMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 B8 /r */
29050 /* VFMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 B8 /r */
29051 /* VFMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 B8 /r */
29052 /* VFMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 B8 /r */
29053 /* VFMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 B9 /r */
29054 /* VFMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 B9 /r */
29055 /* VFMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BA /r */
29056 /* VFMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BA /r */
29057 /* VFMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BA /r */
29058 /* VFMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BA /r */
29059 /* VFMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BB /r */
29060 /* VFMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BB /r */
29061 /* VFNMADD231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BC /r */
29062 /* VFNMADD231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BC /r */
29063 /* VFNMADD231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BC /r */
29064 /* VFNMADD231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BC /r */
29065 /* VFNMADD231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BD /r */
29066 /* VFNMADD231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BD /r */
29067 /* VFNMSUB231PS xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W0 BE /r */
29068 /* VFNMSUB231PS ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W0 BE /r */
29069 /* VFNMSUB231PD xmm3/m128, xmm2, xmm1 = VEX.DDS.128.66.0F38.W1 BE /r */
29070 /* VFNMSUB231PD ymm3/m256, ymm2, ymm1 = VEX.DDS.256.66.0F38.W1 BE /r */
29071 /* VFNMSUB231SS xmm3/m32, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W0 BF /r */
29072 /* VFNMSUB231SD xmm3/m64, xmm2, xmm1 = VEX.DDS.LIG.66.0F38.W1 BF /r */
29073 if (have66noF2noF3(pfx)) {
29074 delta = dis_FMA( vbi, pfx, delta, opc );
29075 *uses_vvvv = True;
29076 goto decode_success;
29077 }
29078 break;
29079
sewardj1407a362012-06-24 15:11:38 +000029080 case 0xDB:
29081 case 0xDC:
29082 case 0xDD:
29083 case 0xDE:
29084 case 0xDF:
29085 /* VAESIMC xmm2/m128, xmm1 = VEX.128.66.0F38.WIG DB /r */
29086 /* VAESENC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DC /r */
29087 /* VAESENCLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DD /r */
29088 /* VAESDEC xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DE /r */
29089 /* VAESDECLAST xmm3/m128, xmm2, xmm1 = VEX.128.66.0F38.WIG DF /r */
29090 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29091 delta = dis_AESx( vbi, pfx, delta, True/*!isAvx*/, opc );
29092 if (opc != 0xDB) *uses_vvvv = True;
29093 goto decode_success;
29094 }
29095 break;
29096
sewardjcc3d2192013-03-27 11:37:33 +000029097 case 0xF2:
29098 /* ANDN r/m32, r32b, r32a = VEX.NDS.LZ.0F38.W0 F2 /r */
29099 /* ANDN r/m64, r64b, r64a = VEX.NDS.LZ.0F38.W1 F2 /r */
29100 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29101 Int size = getRexW(pfx) ? 8 : 4;
29102 IRType ty = szToITy(size);
29103 IRTemp dst = newTemp(ty);
29104 IRTemp src1 = newTemp(ty);
29105 IRTemp src2 = newTemp(ty);
29106 UChar rm = getUChar(delta);
29107
29108 assign( src1, getIRegV(size,pfx) );
29109 if (epartIsReg(rm)) {
29110 assign( src2, getIRegE(size,pfx,rm) );
29111 DIP("andn %s,%s,%s\n", nameIRegE(size,pfx,rm),
29112 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
29113 delta++;
29114 } else {
29115 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29116 assign( src2, loadLE(ty, mkexpr(addr)) );
29117 DIP("andn %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
29118 nameIRegG(size,pfx,rm));
29119 delta += alen;
29120 }
29121
29122 assign( dst, binop( mkSizedOp(ty,Iop_And8),
29123 unop( mkSizedOp(ty,Iop_Not8), mkexpr(src1) ),
29124 mkexpr(src2) ) );
29125 putIRegG( size, pfx, rm, mkexpr(dst) );
29126 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29127 ? AMD64G_CC_OP_ANDN64
29128 : AMD64G_CC_OP_ANDN32)) );
29129 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29130 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
29131 *uses_vvvv = True;
29132 goto decode_success;
29133 }
29134 break;
29135
29136 case 0xF3:
29137 /* BLSI r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /3 */
29138 /* BLSI r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /3 */
29139 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
29140 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 3) {
29141 Int size = getRexW(pfx) ? 8 : 4;
29142 IRType ty = szToITy(size);
29143 IRTemp src = newTemp(ty);
29144 IRTemp dst = newTemp(ty);
29145 UChar rm = getUChar(delta);
29146
29147 if (epartIsReg(rm)) {
29148 assign( src, getIRegE(size,pfx,rm) );
29149 DIP("blsi %s,%s\n", nameIRegE(size,pfx,rm),
29150 nameIRegV(size,pfx));
29151 delta++;
29152 } else {
29153 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29154 assign( src, loadLE(ty, mkexpr(addr)) );
29155 DIP("blsi %s,%s\n", dis_buf, nameIRegV(size,pfx));
29156 delta += alen;
29157 }
29158
29159 assign( dst, binop(mkSizedOp(ty,Iop_And8),
29160 binop(mkSizedOp(ty,Iop_Sub8), mkU(ty, 0),
29161 mkexpr(src)), mkexpr(src)) );
29162 putIRegV( size, pfx, mkexpr(dst) );
29163 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29164 ? AMD64G_CC_OP_BLSI64
29165 : AMD64G_CC_OP_BLSI32)) );
29166 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29167 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
29168 *uses_vvvv = True;
29169 goto decode_success;
29170 }
29171 /* BLSMSK r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /2 */
29172 /* BLSMSK r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /2 */
29173 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
29174 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 2) {
29175 Int size = getRexW(pfx) ? 8 : 4;
29176 IRType ty = szToITy(size);
29177 IRTemp src = newTemp(ty);
29178 IRTemp dst = newTemp(ty);
29179 UChar rm = getUChar(delta);
29180
29181 if (epartIsReg(rm)) {
29182 assign( src, getIRegE(size,pfx,rm) );
29183 DIP("blsmsk %s,%s\n", nameIRegE(size,pfx,rm),
29184 nameIRegV(size,pfx));
29185 delta++;
29186 } else {
29187 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29188 assign( src, loadLE(ty, mkexpr(addr)) );
29189 DIP("blsmsk %s,%s\n", dis_buf, nameIRegV(size,pfx));
29190 delta += alen;
29191 }
29192
29193 assign( dst, binop(mkSizedOp(ty,Iop_Xor8),
29194 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src),
29195 mkU(ty, 1)), mkexpr(src)) );
29196 putIRegV( size, pfx, mkexpr(dst) );
29197 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29198 ? AMD64G_CC_OP_BLSMSK64
29199 : AMD64G_CC_OP_BLSMSK32)) );
29200 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29201 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
29202 *uses_vvvv = True;
29203 goto decode_success;
29204 }
29205 /* BLSR r/m32, r32 = VEX.NDD.LZ.0F38.W0 F3 /1 */
29206 /* BLSR r/m64, r64 = VEX.NDD.LZ.0F38.W1 F3 /1 */
29207 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/
29208 && !haveREX(pfx) && gregLO3ofRM(getUChar(delta)) == 1) {
29209 Int size = getRexW(pfx) ? 8 : 4;
29210 IRType ty = szToITy(size);
29211 IRTemp src = newTemp(ty);
29212 IRTemp dst = newTemp(ty);
29213 UChar rm = getUChar(delta);
29214
29215 if (epartIsReg(rm)) {
29216 assign( src, getIRegE(size,pfx,rm) );
29217 DIP("blsr %s,%s\n", nameIRegE(size,pfx,rm),
29218 nameIRegV(size,pfx));
29219 delta++;
29220 } else {
29221 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29222 assign( src, loadLE(ty, mkexpr(addr)) );
29223 DIP("blsr %s,%s\n", dis_buf, nameIRegV(size,pfx));
29224 delta += alen;
29225 }
29226
29227 assign( dst, binop(mkSizedOp(ty,Iop_And8),
29228 binop(mkSizedOp(ty,Iop_Sub8), mkexpr(src),
29229 mkU(ty, 1)), mkexpr(src)) );
29230 putIRegV( size, pfx, mkexpr(dst) );
29231 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29232 ? AMD64G_CC_OP_BLSR64
29233 : AMD64G_CC_OP_BLSR32)) );
29234 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29235 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(src))) );
29236 *uses_vvvv = True;
29237 goto decode_success;
29238 }
29239 break;
29240
29241 case 0xF5:
29242 /* BZHI r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F5 /r */
29243 /* BZHI r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F5 /r */
29244 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29245 Int size = getRexW(pfx) ? 8 : 4;
29246 IRType ty = szToITy(size);
29247 IRTemp dst = newTemp(ty);
29248 IRTemp src1 = newTemp(ty);
29249 IRTemp src2 = newTemp(ty);
29250 IRTemp start = newTemp(Ity_I8);
sewardj656b8f42013-03-27 22:15:36 +000029251 IRTemp cond = newTemp(Ity_I1);
sewardjcc3d2192013-03-27 11:37:33 +000029252 UChar rm = getUChar(delta);
29253
29254 assign( src2, getIRegV(size,pfx) );
29255 if (epartIsReg(rm)) {
29256 assign( src1, getIRegE(size,pfx,rm) );
29257 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx),
29258 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
29259 delta++;
29260 } else {
29261 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29262 assign( src1, loadLE(ty, mkexpr(addr)) );
29263 DIP("bzhi %s,%s,%s\n", nameIRegV(size,pfx), dis_buf,
29264 nameIRegG(size,pfx,rm));
29265 delta += alen;
29266 }
29267
29268 assign( start, narrowTo( Ity_I8, mkexpr(src2) ) );
29269 assign( cond, binop(Iop_CmpLT32U,
29270 unop(Iop_8Uto32, mkexpr(start)),
29271 mkU32(8*size)) );
29272 /* if (start < opsize) {
29273 if (start == 0)
29274 dst = 0;
29275 else
29276 dst = (src1 << (opsize-start)) u>> (opsize-start);
29277 } else {
29278 dst = src1;
29279 } */
29280 assign( dst,
29281 IRExpr_ITE(
29282 mkexpr(cond),
29283 IRExpr_ITE(
29284 binop(Iop_CmpEQ8, mkexpr(start), mkU8(0)),
29285 mkU(ty, 0),
29286 binop(
29287 mkSizedOp(ty,Iop_Shr8),
29288 binop(
29289 mkSizedOp(ty,Iop_Shl8),
29290 mkexpr(src1),
29291 binop(Iop_Sub8, mkU8(8*size), mkexpr(start))
29292 ),
29293 binop(Iop_Sub8, mkU8(8*size), mkexpr(start))
29294 )
29295 ),
29296 mkexpr(src1)
29297 )
29298 );
29299 putIRegG( size, pfx, rm, mkexpr(dst) );
29300 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29301 ? AMD64G_CC_OP_BLSR64
29302 : AMD64G_CC_OP_BLSR32)) );
29303 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29304 stmt( IRStmt_Put( OFFB_CC_DEP2, widenUto64(mkexpr(cond))) );
29305 *uses_vvvv = True;
29306 goto decode_success;
29307 }
29308 /* PDEP r/m32, r32b, r32a = VEX.NDS.LZ.F2.0F38.W0 F5 /r */
29309 /* PDEP r/m64, r64b, r64a = VEX.NDS.LZ.F2.0F38.W1 F5 /r */
29310 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29311 Int size = getRexW(pfx) ? 8 : 4;
29312 IRType ty = szToITy(size);
29313 IRTemp src = newTemp(ty);
29314 IRTemp mask = newTemp(ty);
29315 UChar rm = getUChar(delta);
29316
29317 assign( src, getIRegV(size,pfx) );
29318 if (epartIsReg(rm)) {
29319 assign( mask, getIRegE(size,pfx,rm) );
29320 DIP("pdep %s,%s,%s\n", nameIRegE(size,pfx,rm),
29321 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
29322 delta++;
29323 } else {
29324 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29325 assign( mask, loadLE(ty, mkexpr(addr)) );
29326 DIP("pdep %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
29327 nameIRegG(size,pfx,rm));
29328 delta += alen;
29329 }
29330
29331 IRExpr** args = mkIRExprVec_2( widenUto64(mkexpr(src)),
29332 widenUto64(mkexpr(mask)) );
29333 putIRegG( size, pfx, rm,
29334 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/,
29335 "amd64g_calculate_pdep",
29336 &amd64g_calculate_pdep, args)) );
29337 *uses_vvvv = True;
29338 /* Flags aren't modified. */
29339 goto decode_success;
29340 }
29341 /* PEXT r/m32, r32b, r32a = VEX.NDS.LZ.F3.0F38.W0 F5 /r */
29342 /* PEXT r/m64, r64b, r64a = VEX.NDS.LZ.F3.0F38.W1 F5 /r */
29343 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29344 Int size = getRexW(pfx) ? 8 : 4;
29345 IRType ty = szToITy(size);
29346 IRTemp src = newTemp(ty);
29347 IRTemp mask = newTemp(ty);
29348 UChar rm = getUChar(delta);
29349
29350 assign( src, getIRegV(size,pfx) );
29351 if (epartIsReg(rm)) {
29352 assign( mask, getIRegE(size,pfx,rm) );
29353 DIP("pext %s,%s,%s\n", nameIRegE(size,pfx,rm),
29354 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
29355 delta++;
29356 } else {
29357 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29358 assign( mask, loadLE(ty, mkexpr(addr)) );
29359 DIP("pext %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
29360 nameIRegG(size,pfx,rm));
29361 delta += alen;
29362 }
29363
29364 /* First mask off bits not set in mask, they are ignored
29365 and it should be fine if they contain undefined values. */
29366 IRExpr* masked = binop(mkSizedOp(ty,Iop_And8),
29367 mkexpr(src), mkexpr(mask));
29368 IRExpr** args = mkIRExprVec_2( widenUto64(masked),
29369 widenUto64(mkexpr(mask)) );
29370 putIRegG( size, pfx, rm,
29371 narrowTo(ty, mkIRExprCCall(Ity_I64, 0/*regparms*/,
29372 "amd64g_calculate_pext",
29373 &amd64g_calculate_pext, args)) );
29374 *uses_vvvv = True;
29375 /* Flags aren't modified. */
29376 goto decode_success;
29377 }
29378 break;
29379
29380 case 0xF6:
29381 /* MULX r/m32, r32b, r32a = VEX.NDD.LZ.F2.0F38.W0 F6 /r */
29382 /* MULX r/m64, r64b, r64a = VEX.NDD.LZ.F2.0F38.W1 F6 /r */
29383 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29384 Int size = getRexW(pfx) ? 8 : 4;
29385 IRType ty = szToITy(size);
29386 IRTemp src1 = newTemp(ty);
29387 IRTemp src2 = newTemp(ty);
29388 IRTemp res = newTemp(size == 8 ? Ity_I128 : Ity_I64);
29389 UChar rm = getUChar(delta);
29390
29391 assign( src1, getIRegRDX(size) );
29392 if (epartIsReg(rm)) {
29393 assign( src2, getIRegE(size,pfx,rm) );
29394 DIP("mulx %s,%s,%s\n", nameIRegE(size,pfx,rm),
29395 nameIRegV(size,pfx), nameIRegG(size,pfx,rm));
29396 delta++;
29397 } else {
29398 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29399 assign( src2, loadLE(ty, mkexpr(addr)) );
29400 DIP("mulx %s,%s,%s\n", dis_buf, nameIRegV(size,pfx),
29401 nameIRegG(size,pfx,rm));
29402 delta += alen;
29403 }
29404
29405 assign( res, binop(size == 8 ? Iop_MullU64 : Iop_MullU32,
29406 mkexpr(src1), mkexpr(src2)) );
29407 putIRegV( size, pfx,
29408 unop(size == 8 ? Iop_128to64 : Iop_64to32, mkexpr(res)) );
29409 putIRegG( size, pfx, rm,
29410 unop(size == 8 ? Iop_128HIto64 : Iop_64HIto32,
29411 mkexpr(res)) );
29412 *uses_vvvv = True;
29413 /* Flags aren't modified. */
29414 goto decode_success;
29415 }
29416 break;
29417
29418 case 0xF7:
29419 /* SARX r32b, r/m32, r32a = VEX.NDS.LZ.F3.0F38.W0 F7 /r */
29420 /* SARX r64b, r/m64, r64a = VEX.NDS.LZ.F3.0F38.W1 F7 /r */
29421 if (haveF3no66noF2(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29422 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "sarx", Iop_Sar8 );
29423 goto decode_success;
29424 }
29425 /* SHLX r32b, r/m32, r32a = VEX.NDS.LZ.66.0F38.W0 F7 /r */
29426 /* SHLX r64b, r/m64, r64a = VEX.NDS.LZ.66.0F38.W1 F7 /r */
29427 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29428 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shlx", Iop_Shl8 );
29429 goto decode_success;
29430 }
29431 /* SHRX r32b, r/m32, r32a = VEX.NDS.LZ.F2.0F38.W0 F7 /r */
29432 /* SHRX r64b, r/m64, r64a = VEX.NDS.LZ.F2.0F38.W1 F7 /r */
29433 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29434 delta = dis_SHIFTX( uses_vvvv, vbi, pfx, delta, "shrx", Iop_Shr8 );
29435 goto decode_success;
29436 }
29437 /* BEXTR r32b, r/m32, r32a = VEX.NDS.LZ.0F38.W0 F7 /r */
29438 /* BEXTR r64b, r/m64, r64a = VEX.NDS.LZ.0F38.W1 F7 /r */
29439 if (haveNo66noF2noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
29440 Int size = getRexW(pfx) ? 8 : 4;
29441 IRType ty = szToITy(size);
29442 IRTemp dst = newTemp(ty);
29443 IRTemp src1 = newTemp(ty);
29444 IRTemp src2 = newTemp(ty);
29445 IRTemp stle = newTemp(Ity_I16);
29446 IRTemp start = newTemp(Ity_I8);
29447 IRTemp len = newTemp(Ity_I8);
29448 UChar rm = getUChar(delta);
29449
29450 assign( src2, getIRegV(size,pfx) );
29451 if (epartIsReg(rm)) {
29452 assign( src1, getIRegE(size,pfx,rm) );
29453 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx),
29454 nameIRegE(size,pfx,rm), nameIRegG(size,pfx,rm));
29455 delta++;
29456 } else {
29457 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
29458 assign( src1, loadLE(ty, mkexpr(addr)) );
29459 DIP("bextr %s,%s,%s\n", nameIRegV(size,pfx), dis_buf,
29460 nameIRegG(size,pfx,rm));
29461 delta += alen;
29462 }
29463
29464 assign( stle, narrowTo( Ity_I16, mkexpr(src2) ) );
29465 assign( start, unop( Iop_16to8, mkexpr(stle) ) );
29466 assign( len, unop( Iop_16HIto8, mkexpr(stle) ) );
29467 /* if (start+len < opsize) {
29468 if (len != 0)
29469 dst = (src1 << (opsize-start-len)) u>> (opsize-len);
29470 else
29471 dst = 0;
29472 } else {
29473 if (start < opsize)
29474 dst = src1 u>> start;
29475 else
29476 dst = 0;
29477 } */
29478 assign( dst,
29479 IRExpr_ITE(
29480 binop(Iop_CmpLT32U,
29481 binop(Iop_Add32,
29482 unop(Iop_8Uto32, mkexpr(start)),
29483 unop(Iop_8Uto32, mkexpr(len))),
29484 mkU32(8*size)),
29485 IRExpr_ITE(
29486 binop(Iop_CmpEQ8, mkexpr(len), mkU8(0)),
29487 mkU(ty, 0),
29488 binop(mkSizedOp(ty,Iop_Shr8),
29489 binop(mkSizedOp(ty,Iop_Shl8), mkexpr(src1),
29490 binop(Iop_Sub8,
29491 binop(Iop_Sub8, mkU8(8*size),
29492 mkexpr(start)),
29493 mkexpr(len))),
29494 binop(Iop_Sub8, mkU8(8*size),
29495 mkexpr(len)))
29496 ),
29497 IRExpr_ITE(
29498 binop(Iop_CmpLT32U,
29499 unop(Iop_8Uto32, mkexpr(start)),
29500 mkU32(8*size)),
29501 binop(mkSizedOp(ty,Iop_Shr8), mkexpr(src1),
29502 mkexpr(start)),
29503 mkU(ty, 0)
29504 )
29505 )
29506 );
29507 putIRegG( size, pfx, rm, mkexpr(dst) );
29508 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(size == 8
29509 ? AMD64G_CC_OP_ANDN64
29510 : AMD64G_CC_OP_ANDN32)) );
29511 stmt( IRStmt_Put( OFFB_CC_DEP1, widenUto64(mkexpr(dst))) );
29512 stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0)) );
29513 *uses_vvvv = True;
29514 goto decode_success;
29515 }
29516 break;
29517
sewardjc4530ae2012-05-21 10:18:49 +000029518 default:
29519 break;
29520
29521 }
29522
29523 //decode_failure:
29524 return deltaIN;
29525
29526 decode_success:
29527 return delta;
29528}
29529
29530
29531/*------------------------------------------------------------*/
29532/*--- ---*/
29533/*--- Top-level post-escape decoders: dis_ESC_0F3A__VEX ---*/
29534/*--- ---*/
29535/*------------------------------------------------------------*/
29536
sewardjfe0c5e72012-06-15 15:48:07 +000029537static IRTemp math_VPERMILPS_128 ( IRTemp sV, UInt imm8 )
29538{
29539 vassert(imm8 < 256);
29540 IRTemp s3, s2, s1, s0;
29541 s3 = s2 = s1 = s0 = IRTemp_INVALID;
29542 breakupV128to32s( sV, &s3, &s2, &s1, &s0 );
29543# define SEL(_nn) (((_nn)==0) ? s0 : ((_nn)==1) ? s1 \
29544 : ((_nn)==2) ? s2 : s3)
29545 IRTemp res = newTemp(Ity_V128);
29546 assign(res, mkV128from32s( SEL((imm8 >> 6) & 3),
29547 SEL((imm8 >> 4) & 3),
29548 SEL((imm8 >> 2) & 3),
29549 SEL((imm8 >> 0) & 3) ));
29550# undef SEL
29551 return res;
29552}
29553
sewardjc4530ae2012-05-21 10:18:49 +000029554__attribute__((noinline))
29555static
29556Long dis_ESC_0F3A__VEX (
29557 /*MB_OUT*/DisResult* dres,
29558 /*OUT*/ Bool* uses_vvvv,
29559 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
29560 Bool resteerCisOk,
29561 void* callback_opaque,
29562 VexArchInfo* archinfo,
29563 VexAbiInfo* vbi,
29564 Prefix pfx, Int sz, Long deltaIN
29565 )
29566{
29567 IRTemp addr = IRTemp_INVALID;
29568 Int alen = 0;
29569 HChar dis_buf[50];
29570 Long delta = deltaIN;
29571 UChar opc = getUChar(delta);
29572 delta++;
29573 *uses_vvvv = False;
29574
29575 switch (opc) {
29576
sewardjcc3d2192013-03-27 11:37:33 +000029577 case 0x00:
29578 case 0x01:
29579 /* VPERMQ imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 00 /r ib */
29580 /* VPERMPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.W1 01 /r ib */
29581 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/
29582 && 1==getRexW(pfx)/*W1*/) {
29583 UChar modrm = getUChar(delta);
29584 UInt imm8 = 0;
29585 UInt rG = gregOfRexRM(pfx, modrm);
29586 IRTemp sV = newTemp(Ity_V256);
29587 const HChar *name = opc == 0 ? "vpermq" : "vpermpd";
29588 if (epartIsReg(modrm)) {
29589 UInt rE = eregOfRexRM(pfx, modrm);
29590 delta += 1;
29591 imm8 = getUChar(delta);
29592 DIP("%s $%u,%s,%s\n",
29593 name, imm8, nameYMMReg(rE), nameYMMReg(rG));
29594 assign(sV, getYMMReg(rE));
29595 } else {
29596 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29597 delta += alen;
29598 imm8 = getUChar(delta);
29599 DIP("%s $%u,%s,%s\n",
29600 name, imm8, dis_buf, nameYMMReg(rG));
29601 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
29602 }
29603 delta++;
29604 IRTemp s[4];
29605 s[3] = s[2] = s[1] = s[0] = IRTemp_INVALID;
29606 breakupV256to64s(sV, &s[3], &s[2], &s[1], &s[0]);
29607 IRTemp dV = newTemp(Ity_V256);
29608 assign(dV, IRExpr_Qop(Iop_64x4toV256,
29609 mkexpr(s[(imm8 >> 6) & 3]),
29610 mkexpr(s[(imm8 >> 4) & 3]),
29611 mkexpr(s[(imm8 >> 2) & 3]),
29612 mkexpr(s[(imm8 >> 0) & 3])));
29613 putYMMReg(rG, mkexpr(dV));
29614 goto decode_success;
29615 }
29616 break;
29617
29618 case 0x02:
29619 /* VPBLENDD imm8, xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 02 /r ib */
29620 if (have66noF2noF3(pfx)
29621 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
29622 UChar modrm = getUChar(delta);
29623 UInt imm8 = 0;
29624 UInt rG = gregOfRexRM(pfx, modrm);
29625 UInt rV = getVexNvvvv(pfx);
29626 IRTemp sV = newTemp(Ity_V128);
29627 IRTemp dV = newTemp(Ity_V128);
29628 UInt i;
29629 IRTemp s[4], d[4];
29630 assign(sV, getXMMReg(rV));
29631 if (epartIsReg(modrm)) {
29632 UInt rE = eregOfRexRM(pfx, modrm);
29633 delta += 1;
29634 imm8 = getUChar(delta);
29635 DIP("vpblendd $%u,%s,%s,%s\n",
29636 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
29637 assign(dV, getXMMReg(rE));
29638 } else {
29639 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29640 delta += alen;
29641 imm8 = getUChar(delta);
29642 DIP("vpblendd $%u,%s,%s,%s\n",
29643 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
29644 assign(dV, loadLE(Ity_V128, mkexpr(addr)));
29645 }
29646 delta++;
29647 for (i = 0; i < 4; i++) {
29648 s[i] = IRTemp_INVALID;
29649 d[i] = IRTemp_INVALID;
29650 }
29651 breakupV128to32s( sV, &s[3], &s[2], &s[1], &s[0] );
29652 breakupV128to32s( dV, &d[3], &d[2], &d[1], &d[0] );
29653 for (i = 0; i < 4; i++)
29654 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i]));
29655 putYMMRegLane128(rG, 1, mkV128(0));
29656 *uses_vvvv = True;
29657 goto decode_success;
29658 }
29659 /* VPBLENDD imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F3A.W0 02 /r ib */
29660 if (have66noF2noF3(pfx)
29661 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
29662 UChar modrm = getUChar(delta);
29663 UInt imm8 = 0;
29664 UInt rG = gregOfRexRM(pfx, modrm);
29665 UInt rV = getVexNvvvv(pfx);
29666 IRTemp sV = newTemp(Ity_V256);
29667 IRTemp dV = newTemp(Ity_V256);
29668 UInt i;
29669 IRTemp s[8], d[8];
29670 assign(sV, getYMMReg(rV));
29671 if (epartIsReg(modrm)) {
29672 UInt rE = eregOfRexRM(pfx, modrm);
29673 delta += 1;
29674 imm8 = getUChar(delta);
29675 DIP("vpblendd $%u,%s,%s,%s\n",
29676 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
29677 assign(dV, getYMMReg(rE));
29678 } else {
29679 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29680 delta += alen;
29681 imm8 = getUChar(delta);
29682 DIP("vpblendd $%u,%s,%s,%s\n",
29683 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
29684 assign(dV, loadLE(Ity_V256, mkexpr(addr)));
29685 }
29686 delta++;
29687 for (i = 0; i < 8; i++) {
29688 s[i] = IRTemp_INVALID;
29689 d[i] = IRTemp_INVALID;
29690 }
29691 breakupV256to32s( sV, &s[7], &s[6], &s[5], &s[4],
29692 &s[3], &s[2], &s[1], &s[0] );
29693 breakupV256to32s( dV, &d[7], &d[6], &d[5], &d[4],
29694 &d[3], &d[2], &d[1], &d[0] );
29695 for (i = 0; i < 8; i++)
29696 putYMMRegLane32(rG, i, mkexpr((imm8 & (1<<i)) ? d[i] : s[i]));
29697 *uses_vvvv = True;
29698 goto decode_success;
29699 }
29700 break;
29701
sewardjfe0c5e72012-06-15 15:48:07 +000029702 case 0x04:
sewardj21459cb2012-06-18 14:05:52 +000029703 /* VPERMILPS imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 04 /r ib */
29704 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
sewardjfe0c5e72012-06-15 15:48:07 +000029705 UChar modrm = getUChar(delta);
29706 UInt imm8 = 0;
29707 UInt rG = gregOfRexRM(pfx, modrm);
29708 IRTemp sV = newTemp(Ity_V256);
29709 if (epartIsReg(modrm)) {
29710 UInt rE = eregOfRexRM(pfx, modrm);
29711 delta += 1;
29712 imm8 = getUChar(delta);
29713 DIP("vpermilps $%u,%s,%s\n",
29714 imm8, nameYMMReg(rE), nameYMMReg(rG));
29715 assign(sV, getYMMReg(rE));
29716 } else {
29717 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29718 delta += alen;
29719 imm8 = getUChar(delta);
29720 DIP("vpermilps $%u,%s,%s\n",
29721 imm8, dis_buf, nameYMMReg(rG));
29722 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
29723 }
29724 delta++;
29725 IRTemp sVhi = IRTemp_INVALID, sVlo = IRTemp_INVALID;
29726 breakupV256toV128s( sV, &sVhi, &sVlo );
29727 IRTemp dVhi = math_VPERMILPS_128( sVhi, imm8 );
29728 IRTemp dVlo = math_VPERMILPS_128( sVlo, imm8 );
29729 IRExpr* res = binop(Iop_V128HLtoV256, mkexpr(dVhi), mkexpr(dVlo));
29730 putYMMReg(rG, res);
29731 goto decode_success;
29732 }
sewardj21459cb2012-06-18 14:05:52 +000029733 /* VPERMILPS imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 04 /r ib */
29734 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29735 UChar modrm = getUChar(delta);
29736 UInt imm8 = 0;
29737 UInt rG = gregOfRexRM(pfx, modrm);
29738 IRTemp sV = newTemp(Ity_V128);
29739 if (epartIsReg(modrm)) {
29740 UInt rE = eregOfRexRM(pfx, modrm);
29741 delta += 1;
29742 imm8 = getUChar(delta);
29743 DIP("vpermilps $%u,%s,%s\n",
29744 imm8, nameXMMReg(rE), nameXMMReg(rG));
29745 assign(sV, getXMMReg(rE));
29746 } else {
29747 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29748 delta += alen;
29749 imm8 = getUChar(delta);
29750 DIP("vpermilps $%u,%s,%s\n",
29751 imm8, dis_buf, nameXMMReg(rG));
29752 assign(sV, loadLE(Ity_V128, mkexpr(addr)));
29753 }
29754 delta++;
29755 putYMMRegLoAndZU(rG, mkexpr ( math_VPERMILPS_128 ( sV, imm8 ) ) );
29756 goto decode_success;
29757 }
sewardjfe0c5e72012-06-15 15:48:07 +000029758 break;
29759
sewardje8a7eb72012-06-12 14:59:17 +000029760 case 0x05:
sewardj21459cb2012-06-18 14:05:52 +000029761 /* VPERMILPD imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG 05 /r ib */
29762 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
sewardjfe0c5e72012-06-15 15:48:07 +000029763 UChar modrm = getUChar(delta);
29764 UInt imm8 = 0;
29765 UInt rG = gregOfRexRM(pfx, modrm);
29766 IRTemp sV = newTemp(Ity_V128);
29767 if (epartIsReg(modrm)) {
29768 UInt rE = eregOfRexRM(pfx, modrm);
29769 delta += 1;
29770 imm8 = getUChar(delta);
29771 DIP("vpermilpd $%u,%s,%s\n",
29772 imm8, nameXMMReg(rE), nameXMMReg(rG));
29773 assign(sV, getXMMReg(rE));
29774 } else {
29775 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29776 delta += alen;
29777 imm8 = getUChar(delta);
29778 DIP("vpermilpd $%u,%s,%s\n",
29779 imm8, dis_buf, nameXMMReg(rG));
29780 assign(sV, loadLE(Ity_V128, mkexpr(addr)));
29781 }
29782 delta++;
29783 IRTemp s1 = newTemp(Ity_I64);
29784 IRTemp s0 = newTemp(Ity_I64);
29785 assign(s1, unop(Iop_V128HIto64, mkexpr(sV)));
29786 assign(s0, unop(Iop_V128to64, mkexpr(sV)));
29787 IRTemp dV = newTemp(Ity_V128);
29788 assign(dV, binop(Iop_64HLtoV128,
29789 mkexpr((imm8 & (1<<1)) ? s1 : s0),
29790 mkexpr((imm8 & (1<<0)) ? s1 : s0)));
29791 putYMMRegLoAndZU(rG, mkexpr(dV));
29792 goto decode_success;
29793 }
sewardj21459cb2012-06-18 14:05:52 +000029794 /* VPERMILPD imm8, ymm2/m256, ymm1 = VEX.256.66.0F3A.WIG 05 /r ib */
29795 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
sewardje8a7eb72012-06-12 14:59:17 +000029796 UChar modrm = getUChar(delta);
29797 UInt imm8 = 0;
29798 UInt rG = gregOfRexRM(pfx, modrm);
29799 IRTemp sV = newTemp(Ity_V256);
29800 if (epartIsReg(modrm)) {
29801 UInt rE = eregOfRexRM(pfx, modrm);
29802 delta += 1;
29803 imm8 = getUChar(delta);
29804 DIP("vpermilpd $%u,%s,%s\n",
29805 imm8, nameYMMReg(rE), nameYMMReg(rG));
29806 assign(sV, getYMMReg(rE));
29807 } else {
29808 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29809 delta += alen;
29810 imm8 = getUChar(delta);
29811 DIP("vpermilpd $%u,%s,%s\n",
29812 imm8, dis_buf, nameYMMReg(rG));
29813 assign(sV, loadLE(Ity_V256, mkexpr(addr)));
29814 }
29815 delta++;
sewardj4b1cc832012-06-13 11:10:20 +000029816 IRTemp s3, s2, s1, s0;
29817 s3 = s2 = s1 = s0 = IRTemp_INVALID;
29818 breakupV256to64s(sV, &s3, &s2, &s1, &s0);
sewardje8a7eb72012-06-12 14:59:17 +000029819 IRTemp dV = newTemp(Ity_V256);
29820 assign(dV, IRExpr_Qop(Iop_64x4toV256,
29821 mkexpr((imm8 & (1<<3)) ? s3 : s2),
29822 mkexpr((imm8 & (1<<2)) ? s3 : s2),
29823 mkexpr((imm8 & (1<<1)) ? s1 : s0),
29824 mkexpr((imm8 & (1<<0)) ? s1 : s0)));
29825 putYMMReg(rG, mkexpr(dV));
29826 goto decode_success;
29827 }
sewardje8a7eb72012-06-12 14:59:17 +000029828 break;
29829
29830 case 0x06:
29831 /* VPERM2F128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 06 /r ib */
29832 if (have66noF2noF3(pfx)
29833 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
29834 UChar modrm = getUChar(delta);
29835 UInt imm8 = 0;
29836 UInt rG = gregOfRexRM(pfx, modrm);
29837 UInt rV = getVexNvvvv(pfx);
29838 IRTemp s00 = newTemp(Ity_V128);
29839 IRTemp s01 = newTemp(Ity_V128);
29840 IRTemp s10 = newTemp(Ity_V128);
29841 IRTemp s11 = newTemp(Ity_V128);
29842 assign(s00, getYMMRegLane128(rV, 0));
29843 assign(s01, getYMMRegLane128(rV, 1));
29844 if (epartIsReg(modrm)) {
29845 UInt rE = eregOfRexRM(pfx, modrm);
29846 delta += 1;
29847 imm8 = getUChar(delta);
29848 DIP("vperm2f128 $%u,%s,%s,%s\n",
29849 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
29850 assign(s10, getYMMRegLane128(rE, 0));
29851 assign(s11, getYMMRegLane128(rE, 1));
29852 } else {
29853 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29854 delta += alen;
29855 imm8 = getUChar(delta);
29856 DIP("vperm2f128 $%u,%s,%s,%s\n",
29857 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
29858 assign(s10, loadLE(Ity_V128, binop(Iop_Add64,
29859 mkexpr(addr), mkU64(0))));
29860 assign(s11, loadLE(Ity_V128, binop(Iop_Add64,
29861 mkexpr(addr), mkU64(16))));
29862 }
29863 delta++;
29864# define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
29865 : ((_nn)==2) ? s10 : s11)
29866 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3)));
29867 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3)));
29868# undef SEL
29869 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0));
29870 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0));
29871 *uses_vvvv = True;
29872 goto decode_success;
29873 }
29874 break;
29875
sewardj4f228902012-06-21 09:17:58 +000029876 case 0x08:
sewardja9651762012-06-24 11:09:37 +000029877 /* VROUNDPS imm8, xmm2/m128, xmm1 */
sewardj4f228902012-06-21 09:17:58 +000029878 /* VROUNDPS = VEX.NDS.128.66.0F3A.WIG 08 ib */
29879 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29880 UChar modrm = getUChar(delta);
29881 UInt rG = gregOfRexRM(pfx, modrm);
29882 IRTemp src = newTemp(Ity_V128);
29883 IRTemp s0 = IRTemp_INVALID;
29884 IRTemp s1 = IRTemp_INVALID;
29885 IRTemp s2 = IRTemp_INVALID;
29886 IRTemp s3 = IRTemp_INVALID;
29887 IRTemp rm = newTemp(Ity_I32);
29888 Int imm = 0;
29889
29890 modrm = getUChar(delta);
29891
29892 if (epartIsReg(modrm)) {
29893 UInt rE = eregOfRexRM(pfx, modrm);
29894 assign( src, getXMMReg( rE ) );
29895 imm = getUChar(delta+1);
29896 if (imm & ~15) break;
29897 delta += 1+1;
29898 DIP( "vroundps $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) );
29899 } else {
29900 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29901 assign( src, loadLE(Ity_V128, mkexpr(addr) ) );
29902 imm = getUChar(delta+alen);
29903 if (imm & ~15) break;
29904 delta += alen+1;
29905 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) );
29906 }
29907
29908 /* (imm & 3) contains an Intel-encoded rounding mode. Because
29909 that encoding is the same as the encoding for IRRoundingMode,
29910 we can use that value directly in the IR as a rounding
29911 mode. */
29912 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
29913
29914 breakupV128to32s( src, &s3, &s2, &s1, &s0 );
29915 putYMMRegLane128( rG, 1, mkV128(0) );
29916# define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
29917 unop(Iop_ReinterpI32asF32, mkexpr(s)))
29918 putYMMRegLane32F( rG, 3, CVT(s3) );
29919 putYMMRegLane32F( rG, 2, CVT(s2) );
29920 putYMMRegLane32F( rG, 1, CVT(s1) );
29921 putYMMRegLane32F( rG, 0, CVT(s0) );
29922# undef CVT
29923 goto decode_success;
29924 }
sewardja9651762012-06-24 11:09:37 +000029925 /* VROUNDPS imm8, ymm2/m256, ymm1 */
sewardj4f228902012-06-21 09:17:58 +000029926 /* VROUNDPS = VEX.NDS.256.66.0F3A.WIG 08 ib */
29927 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
29928 UChar modrm = getUChar(delta);
29929 UInt rG = gregOfRexRM(pfx, modrm);
29930 IRTemp src = newTemp(Ity_V256);
29931 IRTemp s0 = IRTemp_INVALID;
29932 IRTemp s1 = IRTemp_INVALID;
29933 IRTemp s2 = IRTemp_INVALID;
29934 IRTemp s3 = IRTemp_INVALID;
29935 IRTemp s4 = IRTemp_INVALID;
29936 IRTemp s5 = IRTemp_INVALID;
29937 IRTemp s6 = IRTemp_INVALID;
29938 IRTemp s7 = IRTemp_INVALID;
29939 IRTemp rm = newTemp(Ity_I32);
29940 Int imm = 0;
29941
29942 modrm = getUChar(delta);
29943
29944 if (epartIsReg(modrm)) {
29945 UInt rE = eregOfRexRM(pfx, modrm);
29946 assign( src, getYMMReg( rE ) );
29947 imm = getUChar(delta+1);
29948 if (imm & ~15) break;
29949 delta += 1+1;
29950 DIP( "vroundps $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) );
29951 } else {
29952 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
29953 assign( src, loadLE(Ity_V256, mkexpr(addr) ) );
29954 imm = getUChar(delta+alen);
29955 if (imm & ~15) break;
29956 delta += alen+1;
29957 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) );
29958 }
29959
29960 /* (imm & 3) contains an Intel-encoded rounding mode. Because
29961 that encoding is the same as the encoding for IRRoundingMode,
29962 we can use that value directly in the IR as a rounding
29963 mode. */
29964 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
29965
29966 breakupV256to32s( src, &s7, &s6, &s5, &s4, &s3, &s2, &s1, &s0 );
29967# define CVT(s) binop(Iop_RoundF32toInt, mkexpr(rm), \
29968 unop(Iop_ReinterpI32asF32, mkexpr(s)))
29969 putYMMRegLane32F( rG, 7, CVT(s7) );
29970 putYMMRegLane32F( rG, 6, CVT(s6) );
29971 putYMMRegLane32F( rG, 5, CVT(s5) );
29972 putYMMRegLane32F( rG, 4, CVT(s4) );
29973 putYMMRegLane32F( rG, 3, CVT(s3) );
29974 putYMMRegLane32F( rG, 2, CVT(s2) );
29975 putYMMRegLane32F( rG, 1, CVT(s1) );
29976 putYMMRegLane32F( rG, 0, CVT(s0) );
29977# undef CVT
29978 goto decode_success;
29979 }
29980
29981 case 0x09:
sewardja9651762012-06-24 11:09:37 +000029982 /* VROUNDPD imm8, xmm2/m128, xmm1 */
sewardj4f228902012-06-21 09:17:58 +000029983 /* VROUNDPD = VEX.NDS.128.66.0F3A.WIG 09 ib */
29984 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
29985 UChar modrm = getUChar(delta);
29986 UInt rG = gregOfRexRM(pfx, modrm);
29987 IRTemp src = newTemp(Ity_V128);
29988 IRTemp s0 = IRTemp_INVALID;
29989 IRTemp s1 = IRTemp_INVALID;
29990 IRTemp rm = newTemp(Ity_I32);
29991 Int imm = 0;
29992
29993 modrm = getUChar(delta);
29994
29995 if (epartIsReg(modrm)) {
29996 UInt rE = eregOfRexRM(pfx, modrm);
29997 assign( src, getXMMReg( rE ) );
29998 imm = getUChar(delta+1);
29999 if (imm & ~15) break;
30000 delta += 1+1;
30001 DIP( "vroundpd $%d,%s,%s\n", imm, nameXMMReg(rE), nameXMMReg(rG) );
30002 } else {
30003 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30004 assign( src, loadLE(Ity_V128, mkexpr(addr) ) );
30005 imm = getUChar(delta+alen);
30006 if (imm & ~15) break;
30007 delta += alen+1;
30008 DIP( "vroundpd $%d,%s,%s\n", imm, dis_buf, nameXMMReg(rG) );
30009 }
30010
30011 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30012 that encoding is the same as the encoding for IRRoundingMode,
30013 we can use that value directly in the IR as a rounding
30014 mode. */
30015 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30016
30017 breakupV128to64s( src, &s1, &s0 );
30018 putYMMRegLane128( rG, 1, mkV128(0) );
30019# define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
30020 unop(Iop_ReinterpI64asF64, mkexpr(s)))
30021 putYMMRegLane64F( rG, 1, CVT(s1) );
30022 putYMMRegLane64F( rG, 0, CVT(s0) );
30023# undef CVT
30024 goto decode_success;
30025 }
sewardja9651762012-06-24 11:09:37 +000030026 /* VROUNDPD imm8, ymm2/m256, ymm1 */
sewardj4f228902012-06-21 09:17:58 +000030027 /* VROUNDPD = VEX.NDS.256.66.0F3A.WIG 09 ib */
30028 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30029 UChar modrm = getUChar(delta);
30030 UInt rG = gregOfRexRM(pfx, modrm);
30031 IRTemp src = newTemp(Ity_V256);
30032 IRTemp s0 = IRTemp_INVALID;
30033 IRTemp s1 = IRTemp_INVALID;
30034 IRTemp s2 = IRTemp_INVALID;
30035 IRTemp s3 = IRTemp_INVALID;
30036 IRTemp rm = newTemp(Ity_I32);
30037 Int imm = 0;
30038
30039 modrm = getUChar(delta);
30040
30041 if (epartIsReg(modrm)) {
30042 UInt rE = eregOfRexRM(pfx, modrm);
30043 assign( src, getYMMReg( rE ) );
30044 imm = getUChar(delta+1);
30045 if (imm & ~15) break;
30046 delta += 1+1;
30047 DIP( "vroundpd $%d,%s,%s\n", imm, nameYMMReg(rE), nameYMMReg(rG) );
30048 } else {
30049 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30050 assign( src, loadLE(Ity_V256, mkexpr(addr) ) );
30051 imm = getUChar(delta+alen);
30052 if (imm & ~15) break;
30053 delta += alen+1;
30054 DIP( "vroundps $%d,%s,%s\n", imm, dis_buf, nameYMMReg(rG) );
30055 }
30056
30057 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30058 that encoding is the same as the encoding for IRRoundingMode,
30059 we can use that value directly in the IR as a rounding
30060 mode. */
30061 assign(rm, (imm & 4) ? get_sse_roundingmode() : mkU32(imm & 3));
30062
30063 breakupV256to64s( src, &s3, &s2, &s1, &s0 );
30064# define CVT(s) binop(Iop_RoundF64toInt, mkexpr(rm), \
30065 unop(Iop_ReinterpI64asF64, mkexpr(s)))
30066 putYMMRegLane64F( rG, 3, CVT(s3) );
30067 putYMMRegLane64F( rG, 2, CVT(s2) );
30068 putYMMRegLane64F( rG, 1, CVT(s1) );
30069 putYMMRegLane64F( rG, 0, CVT(s0) );
30070# undef CVT
30071 goto decode_success;
30072 }
30073
30074 case 0x0A:
30075 case 0x0B:
30076 /* VROUNDSS imm8, xmm3/m32, xmm2, xmm1 */
30077 /* VROUNDSS = VEX.NDS.128.66.0F3A.WIG 0A ib */
30078 /* VROUNDSD imm8, xmm3/m64, xmm2, xmm1 */
30079 /* VROUNDSD = VEX.NDS.128.66.0F3A.WIG 0B ib */
30080 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30081 UChar modrm = getUChar(delta);
30082 UInt rG = gregOfRexRM(pfx, modrm);
30083 UInt rV = getVexNvvvv(pfx);
30084 Bool isD = opc == 0x0B;
30085 IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
30086 IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
30087 Int imm = 0;
30088
30089 if (epartIsReg(modrm)) {
30090 UInt rE = eregOfRexRM(pfx, modrm);
30091 assign( src,
30092 isD ? getXMMRegLane64F(rE, 0) : getXMMRegLane32F(rE, 0) );
30093 imm = getUChar(delta+1);
30094 if (imm & ~15) break;
30095 delta += 1+1;
30096 DIP( "vrounds%c $%d,%s,%s,%s\n",
30097 isD ? 'd' : 's',
30098 imm, nameXMMReg( rE ), nameXMMReg( rV ), nameXMMReg( rG ) );
30099 } else {
30100 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30101 assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
30102 imm = getUChar(delta+alen);
30103 if (imm & ~15) break;
30104 delta += alen+1;
30105 DIP( "vrounds%c $%d,%s,%s,%s\n",
30106 isD ? 'd' : 's',
30107 imm, dis_buf, nameXMMReg( rV ), nameXMMReg( rG ) );
30108 }
30109
30110 /* (imm & 3) contains an Intel-encoded rounding mode. Because
30111 that encoding is the same as the encoding for IRRoundingMode,
30112 we can use that value directly in the IR as a rounding
30113 mode. */
30114 assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
30115 (imm & 4) ? get_sse_roundingmode()
30116 : mkU32(imm & 3),
30117 mkexpr(src)) );
30118
30119 if (isD)
30120 putXMMRegLane64F( rG, 0, mkexpr(res) );
30121 else {
30122 putXMMRegLane32F( rG, 0, mkexpr(res) );
30123 putXMMRegLane32F( rG, 1, getXMMRegLane32F( rV, 1 ) );
30124 }
30125 putXMMRegLane64F( rG, 1, getXMMRegLane64F( rV, 1 ) );
30126 putYMMRegLane128( rG, 1, mkV128(0) );
30127 *uses_vvvv = True;
30128 goto decode_success;
30129 }
30130 break;
30131
sewardj21459cb2012-06-18 14:05:52 +000030132 case 0x0C:
30133 /* VBLENDPS imm8, ymm3/m256, ymm2, ymm1 */
30134 /* VBLENDPS = VEX.NDS.256.66.0F3A.WIG 0C /r ib */
30135 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30136 UChar modrm = getUChar(delta);
30137 UInt imm8;
30138 UInt rG = gregOfRexRM(pfx, modrm);
30139 UInt rV = getVexNvvvv(pfx);
30140 IRTemp sV = newTemp(Ity_V256);
30141 IRTemp sE = newTemp(Ity_V256);
30142 assign ( sV, getYMMReg(rV) );
30143 if (epartIsReg(modrm)) {
30144 UInt rE = eregOfRexRM(pfx, modrm);
30145 delta += 1;
30146 imm8 = getUChar(delta);
30147 DIP("vblendps $%u,%s,%s,%s\n",
30148 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30149 assign(sE, getYMMReg(rE));
30150 } else {
30151 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30152 delta += alen;
30153 imm8 = getUChar(delta);
30154 DIP("vblendps $%u,%s,%s,%s\n",
30155 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30156 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
30157 }
30158 delta++;
30159 putYMMReg( rG,
30160 mkexpr( math_BLENDPS_256( sE, sV, imm8) ) );
30161 *uses_vvvv = True;
30162 goto decode_success;
30163 }
30164 /* VBLENDPS imm8, xmm3/m128, xmm2, xmm1 */
30165 /* VBLENDPS = VEX.NDS.128.66.0F3A.WIG 0C /r ib */
30166 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30167 UChar modrm = getUChar(delta);
30168 UInt imm8;
30169 UInt rG = gregOfRexRM(pfx, modrm);
30170 UInt rV = getVexNvvvv(pfx);
30171 IRTemp sV = newTemp(Ity_V128);
30172 IRTemp sE = newTemp(Ity_V128);
30173 assign ( sV, getXMMReg(rV) );
30174 if (epartIsReg(modrm)) {
30175 UInt rE = eregOfRexRM(pfx, modrm);
30176 delta += 1;
30177 imm8 = getUChar(delta);
30178 DIP("vblendps $%u,%s,%s,%s\n",
30179 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
30180 assign(sE, getXMMReg(rE));
30181 } else {
30182 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30183 delta += alen;
30184 imm8 = getUChar(delta);
30185 DIP("vblendps $%u,%s,%s,%s\n",
30186 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
30187 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
30188 }
30189 delta++;
30190 putYMMRegLoAndZU( rG,
30191 mkexpr( math_BLENDPS_128( sE, sV, imm8) ) );
30192 *uses_vvvv = True;
30193 goto decode_success;
30194 }
30195 break;
30196
30197 case 0x0D:
30198 /* VBLENDPD imm8, ymm3/m256, ymm2, ymm1 */
30199 /* VBLENDPD = VEX.NDS.256.66.0F3A.WIG 0D /r ib */
30200 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30201 UChar modrm = getUChar(delta);
30202 UInt imm8;
30203 UInt rG = gregOfRexRM(pfx, modrm);
30204 UInt rV = getVexNvvvv(pfx);
30205 IRTemp sV = newTemp(Ity_V256);
30206 IRTemp sE = newTemp(Ity_V256);
30207 assign ( sV, getYMMReg(rV) );
30208 if (epartIsReg(modrm)) {
30209 UInt rE = eregOfRexRM(pfx, modrm);
30210 delta += 1;
30211 imm8 = getUChar(delta);
30212 DIP("vblendpd $%u,%s,%s,%s\n",
30213 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30214 assign(sE, getYMMReg(rE));
30215 } else {
30216 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30217 delta += alen;
30218 imm8 = getUChar(delta);
30219 DIP("vblendpd $%u,%s,%s,%s\n",
30220 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30221 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
30222 }
30223 delta++;
30224 putYMMReg( rG,
30225 mkexpr( math_BLENDPD_256( sE, sV, imm8) ) );
30226 *uses_vvvv = True;
30227 goto decode_success;
30228 }
30229 /* VBLENDPD imm8, xmm3/m128, xmm2, xmm1 */
30230 /* VBLENDPD = VEX.NDS.128.66.0F3A.WIG 0D /r ib */
30231 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30232 UChar modrm = getUChar(delta);
30233 UInt imm8;
30234 UInt rG = gregOfRexRM(pfx, modrm);
30235 UInt rV = getVexNvvvv(pfx);
30236 IRTemp sV = newTemp(Ity_V128);
30237 IRTemp sE = newTemp(Ity_V128);
30238 assign ( sV, getXMMReg(rV) );
30239 if (epartIsReg(modrm)) {
30240 UInt rE = eregOfRexRM(pfx, modrm);
30241 delta += 1;
30242 imm8 = getUChar(delta);
30243 DIP("vblendpd $%u,%s,%s,%s\n",
30244 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
30245 assign(sE, getXMMReg(rE));
30246 } else {
30247 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30248 delta += alen;
30249 imm8 = getUChar(delta);
30250 DIP("vblendpd $%u,%s,%s,%s\n",
30251 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
30252 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
30253 }
30254 delta++;
30255 putYMMRegLoAndZU( rG,
30256 mkexpr( math_BLENDPD_128( sE, sV, imm8) ) );
30257 *uses_vvvv = True;
30258 goto decode_success;
30259 }
30260 break;
30261
30262 case 0x0E:
30263 /* VPBLENDW imm8, xmm3/m128, xmm2, xmm1 */
30264 /* VPBLENDW = VEX.NDS.128.66.0F3A.WIG 0E /r ib */
30265 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30266 UChar modrm = getUChar(delta);
30267 UInt imm8;
30268 UInt rG = gregOfRexRM(pfx, modrm);
30269 UInt rV = getVexNvvvv(pfx);
30270 IRTemp sV = newTemp(Ity_V128);
30271 IRTemp sE = newTemp(Ity_V128);
30272 assign ( sV, getXMMReg(rV) );
30273 if (epartIsReg(modrm)) {
30274 UInt rE = eregOfRexRM(pfx, modrm);
30275 delta += 1;
30276 imm8 = getUChar(delta);
30277 DIP("vpblendw $%u,%s,%s,%s\n",
30278 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
30279 assign(sE, getXMMReg(rE));
30280 } else {
30281 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30282 delta += alen;
30283 imm8 = getUChar(delta);
30284 DIP("vpblendw $%u,%s,%s,%s\n",
sewardjcc3d2192013-03-27 11:37:33 +000030285 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG));
sewardj21459cb2012-06-18 14:05:52 +000030286 assign(sE, loadLE(Ity_V128, mkexpr(addr)));
30287 }
30288 delta++;
30289 putYMMRegLoAndZU( rG,
30290 mkexpr( math_PBLENDW_128( sE, sV, imm8) ) );
30291 *uses_vvvv = True;
30292 goto decode_success;
30293 }
sewardjcc3d2192013-03-27 11:37:33 +000030294 /* VPBLENDW imm8, ymm3/m256, ymm2, ymm1 */
30295 /* VPBLENDW = VEX.NDS.256.66.0F3A.WIG 0E /r ib */
30296 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30297 UChar modrm = getUChar(delta);
30298 UInt imm8;
30299 UInt rG = gregOfRexRM(pfx, modrm);
30300 UInt rV = getVexNvvvv(pfx);
30301 IRTemp sV = newTemp(Ity_V256);
30302 IRTemp sE = newTemp(Ity_V256);
30303 IRTemp sVhi, sVlo, sEhi, sElo;
30304 sVhi = sVlo = sEhi = sElo = IRTemp_INVALID;
30305 assign ( sV, getYMMReg(rV) );
30306 if (epartIsReg(modrm)) {
30307 UInt rE = eregOfRexRM(pfx, modrm);
30308 delta += 1;
30309 imm8 = getUChar(delta);
30310 DIP("vpblendw $%u,%s,%s,%s\n",
30311 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30312 assign(sE, getYMMReg(rE));
30313 } else {
30314 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30315 delta += alen;
30316 imm8 = getUChar(delta);
30317 DIP("vpblendw $%u,%s,%s,%s\n",
30318 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30319 assign(sE, loadLE(Ity_V256, mkexpr(addr)));
30320 }
30321 delta++;
30322 breakupV256toV128s( sV, &sVhi, &sVlo );
30323 breakupV256toV128s( sE, &sEhi, &sElo );
30324 putYMMReg( rG, binop( Iop_V128HLtoV256,
30325 mkexpr( math_PBLENDW_128( sEhi, sVhi, imm8) ),
30326 mkexpr( math_PBLENDW_128( sElo, sVlo, imm8) ) ) );
30327 *uses_vvvv = True;
30328 goto decode_success;
30329 }
sewardj21459cb2012-06-18 14:05:52 +000030330 break;
30331
sewardj151cd3e2012-06-18 13:56:55 +000030332 case 0x0F:
30333 /* VPALIGNR imm8, xmm3/m128, xmm2, xmm1 */
30334 /* VPALIGNR = VEX.NDS.128.66.0F3A.WIG 0F /r ib */
30335 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30336 UChar modrm = getUChar(delta);
30337 UInt rG = gregOfRexRM(pfx, modrm);
30338 UInt rV = getVexNvvvv(pfx);
30339 IRTemp sV = newTemp(Ity_V128);
30340 IRTemp dV = newTemp(Ity_V128);
30341 UInt imm8;
30342
30343 assign( dV, getXMMReg(rV) );
30344
30345 if ( epartIsReg( modrm ) ) {
30346 UInt rE = eregOfRexRM(pfx, modrm);
30347 assign( sV, getXMMReg(rE) );
30348 imm8 = getUChar(delta+1);
30349 delta += 1+1;
30350 DIP("vpalignr $%d,%s,%s,%s\n", imm8, nameXMMReg(rE),
30351 nameXMMReg(rV), nameXMMReg(rG));
30352 } else {
30353 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj151cd3e2012-06-18 13:56:55 +000030354 assign( sV, loadLE(Ity_V128, mkexpr(addr)) );
30355 imm8 = getUChar(delta+alen);
30356 delta += alen+1;
30357 DIP("vpalignr $%d,%s,%s,%s\n", imm8, dis_buf,
30358 nameXMMReg(rV), nameXMMReg(rG));
30359 }
30360
30361 IRTemp res = math_PALIGNR_XMM( sV, dV, imm8 );
30362 putYMMRegLoAndZU( rG, mkexpr(res) );
30363 *uses_vvvv = True;
30364 goto decode_success;
30365 }
sewardjcc3d2192013-03-27 11:37:33 +000030366 /* VPALIGNR imm8, ymm3/m256, ymm2, ymm1 */
30367 /* VPALIGNR = VEX.NDS.256.66.0F3A.WIG 0F /r ib */
30368 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30369 UChar modrm = getUChar(delta);
30370 UInt rG = gregOfRexRM(pfx, modrm);
30371 UInt rV = getVexNvvvv(pfx);
30372 IRTemp sV = newTemp(Ity_V256);
30373 IRTemp dV = newTemp(Ity_V256);
30374 IRTemp sHi, sLo, dHi, dLo;
30375 sHi = sLo = dHi = dLo = IRTemp_INVALID;
30376 UInt imm8;
30377
30378 assign( dV, getYMMReg(rV) );
30379
30380 if ( epartIsReg( modrm ) ) {
30381 UInt rE = eregOfRexRM(pfx, modrm);
30382 assign( sV, getYMMReg(rE) );
30383 imm8 = getUChar(delta+1);
30384 delta += 1+1;
30385 DIP("vpalignr $%d,%s,%s,%s\n", imm8, nameYMMReg(rE),
30386 nameYMMReg(rV), nameYMMReg(rG));
30387 } else {
30388 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30389 assign( sV, loadLE(Ity_V256, mkexpr(addr)) );
30390 imm8 = getUChar(delta+alen);
30391 delta += alen+1;
30392 DIP("vpalignr $%d,%s,%s,%s\n", imm8, dis_buf,
30393 nameYMMReg(rV), nameYMMReg(rG));
30394 }
30395
30396 breakupV256toV128s( dV, &dHi, &dLo );
30397 breakupV256toV128s( sV, &sHi, &sLo );
30398 putYMMReg( rG, binop( Iop_V128HLtoV256,
30399 mkexpr( math_PALIGNR_XMM( sHi, dHi, imm8 ) ),
30400 mkexpr( math_PALIGNR_XMM( sLo, dLo, imm8 ) ) )
30401 );
30402 *uses_vvvv = True;
30403 goto decode_success;
30404 }
sewardj151cd3e2012-06-18 13:56:55 +000030405 break;
30406
sewardje8a7eb72012-06-12 14:59:17 +000030407 case 0x14:
30408 /* VPEXTRB imm8, xmm2, reg/m8 = VEX.128.66.0F3A.W0 14 /r ib */
30409 if (have66noF2noF3(pfx)
30410 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
30411 delta = dis_PEXTRB_128_GtoE( vbi, pfx, delta, False/*!isAvx*/ );
30412 goto decode_success;
30413 }
30414 break;
30415
sewardj82096922012-06-24 14:57:59 +000030416 case 0x15:
30417 /* VPEXTRW imm8, reg/m16, xmm2 */
30418 /* VPEXTRW = VEX.128.66.0F3A.W0 15 /r ib */
30419 if (have66noF2noF3(pfx)
30420 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
30421 delta = dis_PEXTRW( vbi, pfx, delta, True/*isAvx*/ );
30422 goto decode_success;
30423 }
30424 break;
30425
sewardjc4530ae2012-05-21 10:18:49 +000030426 case 0x16:
30427 /* VPEXTRD imm8, r32/m32, xmm2 */
30428 /* VPEXTRD = VEX.128.66.0F3A.W0 16 /r ib */
30429 if (have66noF2noF3(pfx)
30430 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
30431 delta = dis_PEXTRD( vbi, pfx, delta, True/*isAvx*/ );
30432 goto decode_success;
30433 }
sewardj56c30312012-06-12 08:45:39 +000030434 /* VPEXTRQ = VEX.128.66.0F3A.W1 16 /r ib */
30435 if (have66noF2noF3(pfx)
30436 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
30437 delta = dis_PEXTRQ( vbi, pfx, delta, True/*isAvx*/ );
30438 goto decode_success;
30439 }
sewardjc4530ae2012-05-21 10:18:49 +000030440 break;
30441
sewardjadf357c2012-06-24 13:44:17 +000030442 case 0x17:
30443 /* VEXTRACTPS imm8, xmm1, r32/m32 = VEX.128.66.0F3A.WIG 17 /r ib */
30444 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30445 delta = dis_EXTRACTPS( vbi, pfx, delta, True/*isAvx*/ );
30446 goto decode_success;
30447 }
30448 break;
30449
sewardjc4530ae2012-05-21 10:18:49 +000030450 case 0x18:
30451 /* VINSERTF128 r/m, rV, rD
sewardj251b59e2012-05-25 13:51:07 +000030452 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
sewardjc4530ae2012-05-21 10:18:49 +000030453 /* VINSERTF128 = VEX.NDS.256.66.0F3A.W0 18 /r ib */
30454 if (have66noF2noF3(pfx)
30455 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30456 UChar modrm = getUChar(delta);
30457 UInt ib = 0;
sewardje8a7eb72012-06-12 14:59:17 +000030458 UInt rG = gregOfRexRM(pfx, modrm);
sewardjc4530ae2012-05-21 10:18:49 +000030459 UInt rV = getVexNvvvv(pfx);
30460 IRTemp t128 = newTemp(Ity_V128);
30461 if (epartIsReg(modrm)) {
sewardje8a7eb72012-06-12 14:59:17 +000030462 UInt rE = eregOfRexRM(pfx, modrm);
sewardjc4530ae2012-05-21 10:18:49 +000030463 delta += 1;
sewardje8a7eb72012-06-12 14:59:17 +000030464 assign(t128, getXMMReg(rE));
sewardjc4530ae2012-05-21 10:18:49 +000030465 ib = getUChar(delta);
30466 DIP("vinsertf128 $%u,%s,%s,%s\n",
sewardje8a7eb72012-06-12 14:59:17 +000030467 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
sewardjc4530ae2012-05-21 10:18:49 +000030468 } else {
30469 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30470 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
30471 delta += alen;
30472 ib = getUChar(delta);
30473 DIP("vinsertf128 $%u,%s,%s,%s\n",
sewardje8a7eb72012-06-12 14:59:17 +000030474 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
sewardjc4530ae2012-05-21 10:18:49 +000030475 }
30476 delta++;
sewardje8a7eb72012-06-12 14:59:17 +000030477 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0));
30478 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1));
30479 putYMMRegLane128(rG, ib & 1, mkexpr(t128));
sewardjc4530ae2012-05-21 10:18:49 +000030480 *uses_vvvv = True;
30481 goto decode_success;
30482 }
30483 break;
30484
30485 case 0x19:
sewardjcfca8cd2012-05-27 08:25:42 +000030486 /* VEXTRACTF128 $lane_no, rS, r/m
sewardjc4530ae2012-05-21 10:18:49 +000030487 ::: r/m:V128 = a lane of rS:V256 (RM format) */
30488 /* VEXTRACTF128 = VEX.256.66.0F3A.W0 19 /r ib */
30489 if (have66noF2noF3(pfx)
30490 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30491 UChar modrm = getUChar(delta);
30492 UInt ib = 0;
30493 UInt rS = gregOfRexRM(pfx, modrm);
30494 IRTemp t128 = newTemp(Ity_V128);
30495 if (epartIsReg(modrm)) {
30496 UInt rD = eregOfRexRM(pfx, modrm);
30497 delta += 1;
30498 ib = getUChar(delta);
30499 assign(t128, getYMMRegLane128(rS, ib & 1));
sewardjc93904b2012-05-27 13:50:42 +000030500 putYMMRegLoAndZU(rD, mkexpr(t128));
sewardjc4530ae2012-05-21 10:18:49 +000030501 DIP("vextractf128 $%u,%s,%s\n",
30502 ib, nameXMMReg(rS), nameYMMReg(rD));
30503 } else {
30504 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30505 delta += alen;
30506 ib = getUChar(delta);
30507 assign(t128, getYMMRegLane128(rS, ib & 1));
30508 storeLE(mkexpr(addr), mkexpr(t128));
30509 DIP("vextractf128 $%u,%s,%s\n",
30510 ib, nameYMMReg(rS), dis_buf);
30511 }
30512 delta++;
30513 /* doesn't use vvvv */
30514 goto decode_success;
30515 }
30516 break;
30517
sewardj21459cb2012-06-18 14:05:52 +000030518 case 0x20:
sewardj4ed05e02012-06-18 15:01:30 +000030519 /* VPINSRB r32/m8, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 20 /r ib */
30520 if (have66noF2noF3(pfx)
30521 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
30522 UChar modrm = getUChar(delta);
30523 UInt rG = gregOfRexRM(pfx, modrm);
30524 UInt rV = getVexNvvvv(pfx);
sewardj21459cb2012-06-18 14:05:52 +000030525 Int imm8;
sewardj4ed05e02012-06-18 15:01:30 +000030526 IRTemp src_u8 = newTemp(Ity_I8);
sewardj21459cb2012-06-18 14:05:52 +000030527
30528 if ( epartIsReg( modrm ) ) {
sewardj4ed05e02012-06-18 15:01:30 +000030529 UInt rE = eregOfRexRM(pfx,modrm);
30530 imm8 = (Int)(getUChar(delta+1) & 15);
30531 assign( src_u8, unop(Iop_32to8, getIReg32( rE )) );
sewardj21459cb2012-06-18 14:05:52 +000030532 delta += 1+1;
sewardj4ed05e02012-06-18 15:01:30 +000030533 DIP( "vpinsrb $%d,%s,%s,%s\n",
30534 imm8, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) );
sewardj21459cb2012-06-18 14:05:52 +000030535 } else {
30536 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
sewardj4ed05e02012-06-18 15:01:30 +000030537 imm8 = (Int)(getUChar(delta+alen) & 15);
30538 assign( src_u8, loadLE( Ity_I8, mkexpr(addr) ) );
sewardj21459cb2012-06-18 14:05:52 +000030539 delta += alen+1;
sewardj4ed05e02012-06-18 15:01:30 +000030540 DIP( "vpinsrb $%d,%s,%s,%s\n",
30541 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
sewardj21459cb2012-06-18 14:05:52 +000030542 }
30543
30544 IRTemp src_vec = newTemp(Ity_V128);
30545 assign(src_vec, getXMMReg( rV ));
sewardj4ed05e02012-06-18 15:01:30 +000030546 IRTemp res_vec = math_PINSRB_128( src_vec, src_u8, imm8 );
30547 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
sewardj21459cb2012-06-18 14:05:52 +000030548 *uses_vvvv = True;
30549 goto decode_success;
30550 }
sewardj4ed05e02012-06-18 15:01:30 +000030551 break;
sewardj21459cb2012-06-18 14:05:52 +000030552
sewardjcfca8cd2012-05-27 08:25:42 +000030553 case 0x21:
30554 /* VINSERTPS imm8, xmm3/m32, xmm2, xmm1
30555 = VEX.NDS.128.66.0F3A.WIG 21 /r ib */
30556 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30557 UChar modrm = getUChar(delta);
30558 UInt rG = gregOfRexRM(pfx, modrm);
30559 UInt rV = getVexNvvvv(pfx);
30560 UInt imm8;
30561 IRTemp d2ins = newTemp(Ity_I32); /* comes from the E part */
30562 const IRTemp inval = IRTemp_INVALID;
30563
30564 if ( epartIsReg( modrm ) ) {
30565 UInt rE = eregOfRexRM(pfx, modrm);
30566 IRTemp vE = newTemp(Ity_V128);
30567 assign( vE, getXMMReg(rE) );
30568 IRTemp dsE[4] = { inval, inval, inval, inval };
sewardj4b1cc832012-06-13 11:10:20 +000030569 breakupV128to32s( vE, &dsE[3], &dsE[2], &dsE[1], &dsE[0] );
sewardjcfca8cd2012-05-27 08:25:42 +000030570 imm8 = getUChar(delta+1);
30571 d2ins = dsE[(imm8 >> 6) & 3]; /* "imm8_count_s" */
30572 delta += 1+1;
30573 DIP( "insertps $%u, %s,%s\n",
30574 imm8, nameXMMReg(rE), nameXMMReg(rG) );
30575 } else {
30576 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30577 assign( d2ins, loadLE( Ity_I32, mkexpr(addr) ) );
30578 imm8 = getUChar(delta+alen);
30579 delta += alen+1;
30580 DIP( "insertps $%u, %s,%s\n",
30581 imm8, dis_buf, nameXMMReg(rG) );
30582 }
30583
30584 IRTemp vV = newTemp(Ity_V128);
30585 assign( vV, getXMMReg(rV) );
30586
30587 putYMMRegLoAndZU( rG, mkexpr(math_INSERTPS( vV, d2ins, imm8 )) );
30588 *uses_vvvv = True;
30589 goto decode_success;
30590 }
30591 break;
30592
sewardj6faf7cc2012-05-25 15:53:01 +000030593 case 0x22:
30594 /* VPINSRD r32/m32, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W0 22 /r ib */
30595 if (have66noF2noF3(pfx)
30596 && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
30597 UChar modrm = getUChar(delta);
30598 UInt rG = gregOfRexRM(pfx, modrm);
30599 UInt rV = getVexNvvvv(pfx);
30600 Int imm8_10;
30601 IRTemp src_u32 = newTemp(Ity_I32);
30602
30603 if ( epartIsReg( modrm ) ) {
30604 UInt rE = eregOfRexRM(pfx,modrm);
30605 imm8_10 = (Int)(getUChar(delta+1) & 3);
30606 assign( src_u32, getIReg32( rE ) );
30607 delta += 1+1;
30608 DIP( "vpinsrd $%d,%s,%s,%s\n",
30609 imm8_10, nameIReg32(rE), nameXMMReg(rV), nameXMMReg(rG) );
30610 } else {
30611 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30612 imm8_10 = (Int)(getUChar(delta+alen) & 3);
30613 assign( src_u32, loadLE( Ity_I32, mkexpr(addr) ) );
30614 delta += alen+1;
sewardj98d02cc2012-06-02 11:55:25 +000030615 DIP( "vpinsrd $%d,%s,%s,%s\n",
sewardj6faf7cc2012-05-25 15:53:01 +000030616 imm8_10, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
30617 }
30618
30619 IRTemp src_vec = newTemp(Ity_V128);
30620 assign(src_vec, getXMMReg( rV ));
30621 IRTemp res_vec = math_PINSRD_128( src_vec, src_u32, imm8_10 );
30622 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
30623 *uses_vvvv = True;
30624 goto decode_success;
30625 }
sewardj98d02cc2012-06-02 11:55:25 +000030626 /* VPINSRQ r64/m64, xmm2, xmm1 = VEX.NDS.128.66.0F3A.W1 22 /r ib */
30627 if (have66noF2noF3(pfx)
30628 && 0==getVexL(pfx)/*128*/ && 1==getRexW(pfx)/*W1*/) {
30629 UChar modrm = getUChar(delta);
30630 UInt rG = gregOfRexRM(pfx, modrm);
30631 UInt rV = getVexNvvvv(pfx);
30632 Int imm8_0;
30633 IRTemp src_u64 = newTemp(Ity_I64);
30634
30635 if ( epartIsReg( modrm ) ) {
30636 UInt rE = eregOfRexRM(pfx,modrm);
30637 imm8_0 = (Int)(getUChar(delta+1) & 1);
30638 assign( src_u64, getIReg64( rE ) );
30639 delta += 1+1;
30640 DIP( "vpinsrq $%d,%s,%s,%s\n",
30641 imm8_0, nameIReg64(rE), nameXMMReg(rV), nameXMMReg(rG) );
30642 } else {
30643 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30644 imm8_0 = (Int)(getUChar(delta+alen) & 1);
30645 assign( src_u64, loadLE( Ity_I64, mkexpr(addr) ) );
30646 delta += alen+1;
30647 DIP( "vpinsrd $%d,%s,%s,%s\n",
30648 imm8_0, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
30649 }
30650
30651 IRTemp src_vec = newTemp(Ity_V128);
30652 assign(src_vec, getXMMReg( rV ));
30653 IRTemp res_vec = math_PINSRQ_128( src_vec, src_u64, imm8_0 );
30654 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
30655 *uses_vvvv = True;
30656 goto decode_success;
30657 }
sewardj6faf7cc2012-05-25 15:53:01 +000030658 break;
30659
sewardjcc3d2192013-03-27 11:37:33 +000030660 case 0x38:
30661 /* VINSERTI128 r/m, rV, rD
30662 ::: rD = insertinto(a lane in rV, 128 bits from r/m) */
30663 /* VINSERTI128 = VEX.NDS.256.66.0F3A.W0 38 /r ib */
30664 if (have66noF2noF3(pfx)
30665 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30666 UChar modrm = getUChar(delta);
30667 UInt ib = 0;
30668 UInt rG = gregOfRexRM(pfx, modrm);
30669 UInt rV = getVexNvvvv(pfx);
30670 IRTemp t128 = newTemp(Ity_V128);
30671 if (epartIsReg(modrm)) {
30672 UInt rE = eregOfRexRM(pfx, modrm);
30673 delta += 1;
30674 assign(t128, getXMMReg(rE));
30675 ib = getUChar(delta);
30676 DIP("vinserti128 $%u,%s,%s,%s\n",
30677 ib, nameXMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30678 } else {
30679 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30680 assign(t128, loadLE(Ity_V128, mkexpr(addr)));
30681 delta += alen;
30682 ib = getUChar(delta);
30683 DIP("vinserti128 $%u,%s,%s,%s\n",
30684 ib, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30685 }
30686 delta++;
30687 putYMMRegLane128(rG, 0, getYMMRegLane128(rV, 0));
30688 putYMMRegLane128(rG, 1, getYMMRegLane128(rV, 1));
30689 putYMMRegLane128(rG, ib & 1, mkexpr(t128));
30690 *uses_vvvv = True;
30691 goto decode_success;
30692 }
30693 break;
30694
30695 case 0x39:
30696 /* VEXTRACTI128 $lane_no, rS, r/m
30697 ::: r/m:V128 = a lane of rS:V256 (RM format) */
30698 /* VEXTRACTI128 = VEX.256.66.0F3A.W0 39 /r ib */
30699 if (have66noF2noF3(pfx)
30700 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30701 UChar modrm = getUChar(delta);
30702 UInt ib = 0;
30703 UInt rS = gregOfRexRM(pfx, modrm);
30704 IRTemp t128 = newTemp(Ity_V128);
30705 if (epartIsReg(modrm)) {
30706 UInt rD = eregOfRexRM(pfx, modrm);
30707 delta += 1;
30708 ib = getUChar(delta);
30709 assign(t128, getYMMRegLane128(rS, ib & 1));
30710 putYMMRegLoAndZU(rD, mkexpr(t128));
30711 DIP("vextracti128 $%u,%s,%s\n",
30712 ib, nameXMMReg(rS), nameYMMReg(rD));
30713 } else {
30714 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30715 delta += alen;
30716 ib = getUChar(delta);
30717 assign(t128, getYMMRegLane128(rS, ib & 1));
30718 storeLE(mkexpr(addr), mkexpr(t128));
30719 DIP("vextracti128 $%u,%s,%s\n",
30720 ib, nameYMMReg(rS), dis_buf);
30721 }
30722 delta++;
30723 /* doesn't use vvvv */
30724 goto decode_success;
30725 }
30726 break;
30727
sewardjadf357c2012-06-24 13:44:17 +000030728 case 0x40:
30729 /* VDPPS imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 40 /r ib */
30730 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30731 UChar modrm = getUChar(delta);
30732 UInt rG = gregOfRexRM(pfx, modrm);
30733 UInt rV = getVexNvvvv(pfx);
30734 IRTemp dst_vec = newTemp(Ity_V128);
30735 Int imm8;
30736 if (epartIsReg( modrm )) {
30737 UInt rE = eregOfRexRM(pfx,modrm);
30738 imm8 = (Int)getUChar(delta+1);
30739 assign( dst_vec, getXMMReg( rE ) );
30740 delta += 1+1;
30741 DIP( "vdpps $%d,%s,%s,%s\n",
30742 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
30743 } else {
30744 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30745 imm8 = (Int)getUChar(delta+alen);
30746 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) );
30747 delta += alen+1;
30748 DIP( "vdpps $%d,%s,%s,%s\n",
30749 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
30750 }
30751
30752 IRTemp src_vec = newTemp(Ity_V128);
30753 assign(src_vec, getXMMReg( rV ));
30754 IRTemp res_vec = math_DPPS_128( src_vec, dst_vec, imm8 );
30755 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
30756 *uses_vvvv = True;
30757 goto decode_success;
30758 }
30759 /* VDPPS imm8, ymm3/m128,ymm2,ymm1 = VEX.NDS.256.66.0F3A.WIG 40 /r ib */
30760 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30761 UChar modrm = getUChar(delta);
30762 UInt rG = gregOfRexRM(pfx, modrm);
30763 UInt rV = getVexNvvvv(pfx);
30764 IRTemp dst_vec = newTemp(Ity_V256);
30765 Int imm8;
30766 if (epartIsReg( modrm )) {
30767 UInt rE = eregOfRexRM(pfx,modrm);
30768 imm8 = (Int)getUChar(delta+1);
30769 assign( dst_vec, getYMMReg( rE ) );
30770 delta += 1+1;
30771 DIP( "vdpps $%d,%s,%s,%s\n",
30772 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) );
30773 } else {
30774 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30775 imm8 = (Int)getUChar(delta+alen);
30776 assign( dst_vec, loadLE( Ity_V256, mkexpr(addr) ) );
30777 delta += alen+1;
30778 DIP( "vdpps $%d,%s,%s,%s\n",
30779 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
30780 }
30781
30782 IRTemp src_vec = newTemp(Ity_V256);
30783 assign(src_vec, getYMMReg( rV ));
30784 IRTemp s0, s1, d0, d1;
30785 s0 = s1 = d0 = d1 = IRTemp_INVALID;
30786 breakupV256toV128s( dst_vec, &d1, &d0 );
30787 breakupV256toV128s( src_vec, &s1, &s0 );
30788 putYMMReg( rG, binop( Iop_V128HLtoV256,
30789 mkexpr( math_DPPS_128(s1, d1, imm8) ),
30790 mkexpr( math_DPPS_128(s0, d0, imm8) ) ) );
30791 *uses_vvvv = True;
30792 goto decode_success;
30793 }
30794 break;
30795
sewardj4ed05e02012-06-18 15:01:30 +000030796 case 0x41:
sewardjadf357c2012-06-24 13:44:17 +000030797 /* VDPPD imm8, xmm3/m128,xmm2,xmm1 = VEX.NDS.128.66.0F3A.WIG 41 /r ib */
sewardj4ed05e02012-06-18 15:01:30 +000030798 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30799 UChar modrm = getUChar(delta);
30800 UInt rG = gregOfRexRM(pfx, modrm);
30801 UInt rV = getVexNvvvv(pfx);
30802 IRTemp dst_vec = newTemp(Ity_V128);
30803 Int imm8;
30804 if (epartIsReg( modrm )) {
30805 UInt rE = eregOfRexRM(pfx,modrm);
30806 imm8 = (Int)getUChar(delta+1);
30807 assign( dst_vec, getXMMReg( rE ) );
30808 delta += 1+1;
30809 DIP( "vdppd $%d,%s,%s,%s\n",
30810 imm8, nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
30811 } else {
30812 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30813 imm8 = (Int)getUChar(delta+alen);
30814 assign( dst_vec, loadLE( Ity_V128, mkexpr(addr) ) );
30815 delta += alen+1;
30816 DIP( "vdppd $%d,%s,%s,%s\n",
30817 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
30818 }
30819
30820 IRTemp src_vec = newTemp(Ity_V128);
30821 assign(src_vec, getXMMReg( rV ));
30822 IRTemp res_vec = math_DPPD_128( src_vec, dst_vec, imm8 );
30823 putYMMRegLoAndZU( rG, mkexpr(res_vec) );
30824 *uses_vvvv = True;
30825 goto decode_success;
30826 }
30827 break;
30828
sewardj8516a1f2012-06-24 14:26:30 +000030829 case 0x42:
30830 /* VMPSADBW imm8, xmm3/m128,xmm2,xmm1 */
30831 /* VMPSADBW = VEX.NDS.128.66.0F3A.WIG 42 /r ib */
30832 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30833 UChar modrm = getUChar(delta);
30834 Int imm8;
30835 IRTemp src_vec = newTemp(Ity_V128);
30836 IRTemp dst_vec = newTemp(Ity_V128);
30837 UInt rG = gregOfRexRM(pfx, modrm);
30838 UInt rV = getVexNvvvv(pfx);
30839
30840 assign( dst_vec, getXMMReg(rV) );
30841
30842 if ( epartIsReg( modrm ) ) {
30843 UInt rE = eregOfRexRM(pfx, modrm);
30844
30845 imm8 = (Int)getUChar(delta+1);
30846 assign( src_vec, getXMMReg(rE) );
30847 delta += 1+1;
30848 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
30849 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
30850 } else {
30851 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
30852 1/* imm8 is 1 byte after the amode */ );
sewardj8516a1f2012-06-24 14:26:30 +000030853 assign( src_vec, loadLE( Ity_V128, mkexpr(addr) ) );
30854 imm8 = (Int)getUChar(delta+alen);
30855 delta += alen+1;
30856 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
30857 dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
30858 }
30859
30860 putYMMRegLoAndZU( rG, mkexpr( math_MPSADBW_128(dst_vec,
30861 src_vec, imm8) ) );
30862 *uses_vvvv = True;
30863 goto decode_success;
30864 }
sewardjcc3d2192013-03-27 11:37:33 +000030865 /* VMPSADBW imm8, ymm3/m256,ymm2,ymm1 */
30866 /* VMPSADBW = VEX.NDS.256.66.0F3A.WIG 42 /r ib */
30867 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
30868 UChar modrm = getUChar(delta);
30869 Int imm8;
30870 IRTemp src_vec = newTemp(Ity_V256);
30871 IRTemp dst_vec = newTemp(Ity_V256);
30872 UInt rG = gregOfRexRM(pfx, modrm);
30873 UInt rV = getVexNvvvv(pfx);
30874 IRTemp sHi, sLo, dHi, dLo;
30875 sHi = sLo = dHi = dLo = IRTemp_INVALID;
30876
30877 assign( dst_vec, getYMMReg(rV) );
30878
30879 if ( epartIsReg( modrm ) ) {
30880 UInt rE = eregOfRexRM(pfx, modrm);
30881
30882 imm8 = (Int)getUChar(delta+1);
30883 assign( src_vec, getYMMReg(rE) );
30884 delta += 1+1;
30885 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
30886 nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG) );
30887 } else {
30888 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
30889 1/* imm8 is 1 byte after the amode */ );
30890 assign( src_vec, loadLE( Ity_V256, mkexpr(addr) ) );
30891 imm8 = (Int)getUChar(delta+alen);
30892 delta += alen+1;
30893 DIP( "vmpsadbw $%d, %s,%s,%s\n", imm8,
30894 dis_buf, nameYMMReg(rV), nameYMMReg(rG) );
30895 }
30896
30897 breakupV256toV128s( dst_vec, &dHi, &dLo );
30898 breakupV256toV128s( src_vec, &sHi, &sLo );
30899 putYMMReg( rG, binop( Iop_V128HLtoV256,
30900 mkexpr( math_MPSADBW_128(dHi, sHi, imm8 >> 3) ),
30901 mkexpr( math_MPSADBW_128(dLo, sLo, imm8) ) ) );
30902 *uses_vvvv = True;
30903 goto decode_success;
30904 }
sewardj8516a1f2012-06-24 14:26:30 +000030905 break;
30906
sewardj1407a362012-06-24 15:11:38 +000030907 case 0x44:
30908 /* VPCLMULQDQ imm8, xmm3/m128,xmm2,xmm1 */
30909 /* VPCLMULQDQ = VEX.NDS.128.66.0F3A.WIG 44 /r ib */
30910 /* 66 0F 3A 44 /r ib = PCLMULQDQ xmm1, xmm2/m128, imm8
30911 * Carry-less multiplication of selected XMM quadwords into XMM
30912 * registers (a.k.a multiplication of polynomials over GF(2))
30913 */
30914 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30915 UChar modrm = getUChar(delta);
30916 Int imm8;
30917 IRTemp sV = newTemp(Ity_V128);
30918 IRTemp dV = newTemp(Ity_V128);
30919 UInt rG = gregOfRexRM(pfx, modrm);
30920 UInt rV = getVexNvvvv(pfx);
30921
30922 assign( dV, getXMMReg(rV) );
30923
30924 if ( epartIsReg( modrm ) ) {
30925 UInt rE = eregOfRexRM(pfx, modrm);
30926 imm8 = (Int)getUChar(delta+1);
30927 assign( sV, getXMMReg(rE) );
30928 delta += 1+1;
30929 DIP( "vpclmulqdq $%d, %s,%s,%s\n", imm8,
30930 nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG) );
30931 } else {
30932 addr = disAMode( &alen, vbi, pfx, delta, dis_buf,
30933 1/* imm8 is 1 byte after the amode */ );
30934 assign( sV, loadLE( Ity_V128, mkexpr(addr) ) );
30935 imm8 = (Int)getUChar(delta+alen);
30936 delta += alen+1;
30937 DIP( "vpclmulqdq $%d, %s,%s,%s\n",
30938 imm8, dis_buf, nameXMMReg(rV), nameXMMReg(rG) );
30939 }
30940
30941 putYMMRegLoAndZU( rG, mkexpr( math_PCLMULQDQ(dV, sV, imm8) ) );
30942 *uses_vvvv = True;
30943 goto decode_success;
30944 }
30945 break;
30946
sewardjcc3d2192013-03-27 11:37:33 +000030947 case 0x46:
30948 /* VPERM2I128 imm8, ymm3/m256, ymm2, ymm1 = VEX.NDS.66.0F3A.W0 46 /r ib */
30949 if (have66noF2noF3(pfx)
30950 && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
30951 UChar modrm = getUChar(delta);
30952 UInt imm8 = 0;
30953 UInt rG = gregOfRexRM(pfx, modrm);
30954 UInt rV = getVexNvvvv(pfx);
30955 IRTemp s00 = newTemp(Ity_V128);
30956 IRTemp s01 = newTemp(Ity_V128);
30957 IRTemp s10 = newTemp(Ity_V128);
30958 IRTemp s11 = newTemp(Ity_V128);
30959 assign(s00, getYMMRegLane128(rV, 0));
30960 assign(s01, getYMMRegLane128(rV, 1));
30961 if (epartIsReg(modrm)) {
30962 UInt rE = eregOfRexRM(pfx, modrm);
30963 delta += 1;
30964 imm8 = getUChar(delta);
30965 DIP("vperm2i128 $%u,%s,%s,%s\n",
30966 imm8, nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
30967 assign(s10, getYMMRegLane128(rE, 0));
30968 assign(s11, getYMMRegLane128(rE, 1));
30969 } else {
30970 addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
30971 delta += alen;
30972 imm8 = getUChar(delta);
30973 DIP("vperm2i128 $%u,%s,%s,%s\n",
30974 imm8, dis_buf, nameYMMReg(rV), nameYMMReg(rG));
30975 assign(s10, loadLE(Ity_V128, binop(Iop_Add64,
30976 mkexpr(addr), mkU64(0))));
30977 assign(s11, loadLE(Ity_V128, binop(Iop_Add64,
30978 mkexpr(addr), mkU64(16))));
30979 }
30980 delta++;
30981# define SEL(_nn) (((_nn)==0) ? s00 : ((_nn)==1) ? s01 \
30982 : ((_nn)==2) ? s10 : s11)
30983 putYMMRegLane128(rG, 0, mkexpr(SEL((imm8 >> 0) & 3)));
30984 putYMMRegLane128(rG, 1, mkexpr(SEL((imm8 >> 4) & 3)));
30985# undef SEL
30986 if (imm8 & (1<<3)) putYMMRegLane128(rG, 0, mkV128(0));
30987 if (imm8 & (1<<7)) putYMMRegLane128(rG, 1, mkV128(0));
30988 *uses_vvvv = True;
30989 goto decode_success;
30990 }
30991 break;
30992
sewardj4c0a7ac2012-06-21 09:08:19 +000030993 case 0x4A:
30994 /* VBLENDVPS xmmG, xmmE/memE, xmmV, xmmIS4
30995 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
30996 /* VBLENDVPS = VEX.NDS.128.66.0F3A.WIG 4A /r /is4 */
30997 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
30998 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
30999 "vblendvps", 4, Iop_SarN32x4 );
31000 *uses_vvvv = True;
31001 goto decode_success;
31002 }
31003 /* VBLENDVPS ymmG, ymmE/memE, ymmV, ymmIS4
31004 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31005 /* VBLENDVPS = VEX.NDS.256.66.0F3A.WIG 4A /r /is4 */
31006 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31007 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
31008 "vblendvps", 4, Iop_SarN32x4 );
31009 *uses_vvvv = True;
31010 goto decode_success;
31011 }
31012 break;
31013
31014 case 0x4B:
31015 /* VBLENDVPD xmmG, xmmE/memE, xmmV, xmmIS4
31016 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
31017 /* VBLENDVPD = VEX.NDS.128.66.0F3A.WIG 4B /r /is4 */
31018 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31019 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
31020 "vblendvpd", 8, Iop_SarN64x2 );
31021 *uses_vvvv = True;
31022 goto decode_success;
31023 }
31024 /* VBLENDVPD ymmG, ymmE/memE, ymmV, ymmIS4
31025 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31026 /* VBLENDVPD = VEX.NDS.256.66.0F3A.WIG 4B /r /is4 */
31027 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31028 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
31029 "vblendvpd", 8, Iop_SarN64x2 );
31030 *uses_vvvv = True;
31031 goto decode_success;
31032 }
31033 break;
31034
sewardjc4530ae2012-05-21 10:18:49 +000031035 case 0x4C:
31036 /* VPBLENDVB xmmG, xmmE/memE, xmmV, xmmIS4
31037 ::: xmmG:V128 = PBLEND(xmmE, xmmV, xmmIS4) (RMVR) */
sewardj4c0a7ac2012-06-21 09:08:19 +000031038 /* VPBLENDVB = VEX.NDS.128.66.0F3A.WIG 4C /r /is4 */
31039 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31040 delta = dis_VBLENDV_128 ( vbi, pfx, delta,
31041 "vpblendvb", 1, Iop_SarN8x16 );
sewardjc4530ae2012-05-21 10:18:49 +000031042 *uses_vvvv = True;
31043 goto decode_success;
sewardj4c0a7ac2012-06-21 09:08:19 +000031044 }
sewardjcc3d2192013-03-27 11:37:33 +000031045 /* VPBLENDVB ymmG, ymmE/memE, ymmV, ymmIS4
31046 ::: ymmG:V256 = PBLEND(ymmE, ymmV, ymmIS4) (RMVR) */
31047 /* VPBLENDVB = VEX.NDS.256.66.0F3A.WIG 4C /r /is4 */
31048 if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
31049 delta = dis_VBLENDV_256 ( vbi, pfx, delta,
31050 "vpblendvb", 1, Iop_SarN8x16 );
31051 *uses_vvvv = True;
31052 goto decode_success;
31053 }
sewardj4c0a7ac2012-06-21 09:08:19 +000031054 break;
sewardjc4530ae2012-05-21 10:18:49 +000031055
sewardjac75d7b2012-05-23 12:42:39 +000031056 case 0x60:
31057 case 0x61:
31058 case 0x62:
31059 case 0x63:
31060 /* VEX.128.66.0F3A.WIG 63 /r ib = VPCMPISTRI imm8, xmm2/m128, xmm1
31061 VEX.128.66.0F3A.WIG 62 /r ib = VPCMPISTRM imm8, xmm2/m128, xmm1
31062 VEX.128.66.0F3A.WIG 61 /r ib = VPCMPESTRI imm8, xmm2/m128, xmm1
31063 VEX.128.66.0F3A.WIG 60 /r ib = VPCMPESTRM imm8, xmm2/m128, xmm1
31064 (selected special cases that actually occur in glibc,
31065 not by any means a complete implementation.)
31066 */
31067 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31068 Long delta0 = delta;
31069 delta = dis_PCMPxSTRx( vbi, pfx, delta, True/*isAvx*/, opc );
31070 if (delta > delta0) goto decode_success;
31071 /* else fall though; dis_PCMPxSTRx failed to decode it */
31072 }
31073 break;
31074
sewardj1407a362012-06-24 15:11:38 +000031075 case 0xDF:
31076 /* VAESKEYGENASSIST imm8, xmm2/m128, xmm1 = VEX.128.66.0F3A.WIG DF /r */
31077 if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
31078 delta = dis_AESKEYGENASSIST( vbi, pfx, delta, True/*!isAvx*/ );
31079 goto decode_success;
31080 }
31081 break;
31082
sewardjcc3d2192013-03-27 11:37:33 +000031083 case 0xF0:
31084 /* RORX imm8, r/m32, r32a = VEX.LZ.F2.0F3A.W0 F0 /r /i */
31085 /* RORX imm8, r/m64, r64a = VEX.LZ.F2.0F3A.W1 F0 /r /i */
31086 if (haveF2no66noF3(pfx) && 0==getVexL(pfx)/*LZ*/ && !haveREX(pfx)) {
31087 Int size = getRexW(pfx) ? 8 : 4;
31088 IRType ty = szToITy(size);
31089 IRTemp src = newTemp(ty);
31090 UChar rm = getUChar(delta);
31091 UChar imm8;
31092
31093 if (epartIsReg(rm)) {
31094 imm8 = getUChar(delta+1);
31095 assign( src, getIRegE(size,pfx,rm) );
31096 DIP("rorx %d,%s,%s\n", imm8, nameIRegE(size,pfx,rm),
31097 nameIRegG(size,pfx,rm));
31098 delta += 2;
31099 } else {
31100 addr = disAMode ( &alen, vbi, pfx, delta, dis_buf, 0 );
31101 imm8 = getUChar(delta+alen);
31102 assign( src, loadLE(ty, mkexpr(addr)) );
31103 DIP("rorx %d,%s,%s\n", imm8, dis_buf, nameIRegG(size,pfx,rm));
31104 delta += alen + 1;
31105 }
31106 imm8 &= 8*size-1;
31107
31108 /* dst = (src >>u imm8) | (src << (size-imm8)) */
31109 putIRegG( size, pfx, rm,
31110 imm8 == 0 ? mkexpr(src)
31111 : binop( mkSizedOp(ty,Iop_Or8),
31112 binop( mkSizedOp(ty,Iop_Shr8), mkexpr(src),
31113 mkU8(imm8) ),
31114 binop( mkSizedOp(ty,Iop_Shl8), mkexpr(src),
31115 mkU8(8*size-imm8) ) ) );
31116 /* Flags aren't modified. */
31117 goto decode_success;
31118 }
31119 break;
31120
sewardjc4530ae2012-05-21 10:18:49 +000031121 default:
31122 break;
31123
31124 }
31125
31126 //decode_failure:
31127 return deltaIN;
31128
31129 decode_success:
31130 return delta;
31131}
31132
31133
31134/*------------------------------------------------------------*/
31135/*--- ---*/
sewardjdf0e0022005-01-25 15:48:43 +000031136/*--- Disassemble a single instruction ---*/
sewardj80611e32012-01-20 13:07:24 +000031137/*--- ---*/
sewardjdf0e0022005-01-25 15:48:43 +000031138/*------------------------------------------------------------*/
31139
sewardj9e6491a2005-07-02 19:24:10 +000031140/* Disassemble a single instruction into IR. The instruction is
31141 located in host memory at &guest_code[delta]. */
sewardjdf0e0022005-01-25 15:48:43 +000031142
sewardj9e6491a2005-07-02 19:24:10 +000031143static
31144DisResult disInstr_AMD64_WRK (
sewardje9d8a262009-07-01 08:06:34 +000031145 /*OUT*/Bool* expect_CAS,
sewardjc716aea2006-01-17 01:48:46 +000031146 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
sewardj984d9b12010-01-15 10:53:21 +000031147 Bool resteerCisOk,
sewardjc716aea2006-01-17 01:48:46 +000031148 void* callback_opaque,
sewardj9e6491a2005-07-02 19:24:10 +000031149 Long delta64,
sewardjaca070a2006-10-17 00:28:22 +000031150 VexArchInfo* archinfo,
sewardj442e51a2012-12-06 18:08:04 +000031151 VexAbiInfo* vbi,
31152 Bool sigill_diag
sewardj9e6491a2005-07-02 19:24:10 +000031153 )
sewardjdf0e0022005-01-25 15:48:43 +000031154{
sewardj80611e32012-01-20 13:07:24 +000031155 IRTemp t1, t2, t3, t4, t5, t6;
31156 UChar pre;
31157 Int n, n_prefixes;
sewardj9e6491a2005-07-02 19:24:10 +000031158 DisResult dres;
sewardjdf0e0022005-01-25 15:48:43 +000031159
sewardj9e6491a2005-07-02 19:24:10 +000031160 /* The running delta */
31161 Long delta = delta64;
31162
sewardjdf0e0022005-01-25 15:48:43 +000031163 /* Holds eip at the start of the insn, so that we can print
31164 consistent error messages for unimplemented insns. */
sewardj270def42005-07-03 01:03:01 +000031165 Long delta_start = delta;
sewardjdf0e0022005-01-25 15:48:43 +000031166
31167 /* sz denotes the nominal data-op size of the insn; we change it to
31168 2 if an 0x66 prefix is seen and 8 if REX.W is 1. In case of
31169 conflict REX.W takes precedence. */
31170 Int sz = 4;
31171
sewardj3ca55a12005-01-27 16:06:23 +000031172 /* pfx holds the summary of prefixes. */
31173 Prefix pfx = PFX_EMPTY;
sewardjdf0e0022005-01-25 15:48:43 +000031174
sewardjc4530ae2012-05-21 10:18:49 +000031175 /* Holds the computed opcode-escape indication. */
31176 Escape esc = ESC_NONE;
31177
sewardj9e6491a2005-07-02 19:24:10 +000031178 /* Set result defaults. */
sewardjc6f970f2012-04-02 21:54:49 +000031179 dres.whatNext = Dis_Continue;
31180 dres.len = 0;
31181 dres.continueAt = 0;
31182 dres.jk_StopHere = Ijk_INVALID;
sewardje9d8a262009-07-01 08:06:34 +000031183 *expect_CAS = False;
31184
sewardj9e6491a2005-07-02 19:24:10 +000031185 vassert(guest_RIP_next_assumed == 0);
31186 vassert(guest_RIP_next_mustcheck == False);
sewardj4b744762005-02-07 15:02:25 +000031187
sewardj80611e32012-01-20 13:07:24 +000031188 t1 = t2 = t3 = t4 = t5 = t6 = IRTemp_INVALID;
sewardjdf0e0022005-01-25 15:48:43 +000031189
sewardj9e6491a2005-07-02 19:24:10 +000031190 DIP("\t0x%llx: ", guest_RIP_bbstart+delta);
31191
sewardjce02aa72006-01-12 12:27:58 +000031192 /* Spot "Special" instructions (see comment at top of file). */
sewardjdf0e0022005-01-25 15:48:43 +000031193 {
florian8462d112014-09-24 15:18:09 +000031194 const UChar* code = guest_code + delta;
sewardjce02aa72006-01-12 12:27:58 +000031195 /* Spot the 16-byte preamble:
31196 48C1C703 rolq $3, %rdi
31197 48C1C70D rolq $13, %rdi
31198 48C1C73D rolq $61, %rdi
31199 48C1C733 rolq $51, %rdi
sewardjdf0e0022005-01-25 15:48:43 +000031200 */
sewardjce02aa72006-01-12 12:27:58 +000031201 if (code[ 0] == 0x48 && code[ 1] == 0xC1 && code[ 2] == 0xC7
31202 && code[ 3] == 0x03 &&
31203 code[ 4] == 0x48 && code[ 5] == 0xC1 && code[ 6] == 0xC7
31204 && code[ 7] == 0x0D &&
31205 code[ 8] == 0x48 && code[ 9] == 0xC1 && code[10] == 0xC7
31206 && code[11] == 0x3D &&
31207 code[12] == 0x48 && code[13] == 0xC1 && code[14] == 0xC7
31208 && code[15] == 0x33) {
31209 /* Got a "Special" instruction preamble. Which one is it? */
31210 if (code[16] == 0x48 && code[17] == 0x87
31211 && code[18] == 0xDB /* xchgq %rbx,%rbx */) {
31212 /* %RDX = client_request ( %RAX ) */
31213 DIP("%%rdx = client_request ( %%rax )\n");
31214 delta += 19;
sewardjc6f970f2012-04-02 21:54:49 +000031215 jmp_lit(&dres, Ijk_ClientReq, guest_RIP_bbstart+delta);
31216 vassert(dres.whatNext == Dis_StopHere);
sewardjce02aa72006-01-12 12:27:58 +000031217 goto decode_success;
31218 }
31219 else
31220 if (code[16] == 0x48 && code[17] == 0x87
31221 && code[18] == 0xC9 /* xchgq %rcx,%rcx */) {
31222 /* %RAX = guest_NRADDR */
31223 DIP("%%rax = guest_NRADDR\n");
31224 delta += 19;
31225 putIRegRAX(8, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
31226 goto decode_success;
31227 }
31228 else
31229 if (code[16] == 0x48 && code[17] == 0x87
31230 && code[18] == 0xD2 /* xchgq %rdx,%rdx */) {
31231 /* call-noredir *%RAX */
31232 DIP("call-noredir *%%rax\n");
31233 delta += 19;
31234 t1 = newTemp(Ity_I64);
31235 assign(t1, getIRegRAX(8));
31236 t2 = newTemp(Ity_I64);
31237 assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
31238 putIReg64(R_RSP, mkexpr(t2));
31239 storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta));
sewardjc6f970f2012-04-02 21:54:49 +000031240 jmp_treg(&dres, Ijk_NoRedir, t1);
31241 vassert(dres.whatNext == Dis_StopHere);
sewardjce02aa72006-01-12 12:27:58 +000031242 goto decode_success;
31243 }
florian2245ce92012-08-28 16:49:30 +000031244 else
31245 if (code[16] == 0x48 && code[17] == 0x87
31246 && code[18] == 0xff /* xchgq %rdi,%rdi */) {
31247 /* IR injection */
31248 DIP("IR injection\n");
31249 vex_inject_ir(irsb, Iend_LE);
31250
31251 // Invalidate the current insn. The reason is that the IRop we're
31252 // injecting here can change. In which case the translation has to
31253 // be redone. For ease of handling, we simply invalidate all the
31254 // time.
sewardj05f5e012014-05-04 10:52:11 +000031255 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_RIP_curr_instr)));
31256 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(19)));
florian2245ce92012-08-28 16:49:30 +000031257
31258 delta += 19;
31259
31260 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) );
31261 dres.whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +000031262 dres.jk_StopHere = Ijk_InvalICache;
florian2245ce92012-08-28 16:49:30 +000031263 goto decode_success;
31264 }
sewardjce02aa72006-01-12 12:27:58 +000031265 /* We don't know what it is. */
31266 goto decode_failure;
31267 /*NOTREACHED*/
sewardjdf0e0022005-01-25 15:48:43 +000031268 }
31269 }
31270
31271 /* Eat prefixes, summarising the result in pfx and sz, and rejecting
31272 as many invalid combinations as possible. */
31273 n_prefixes = 0;
31274 while (True) {
sewardj54477e32007-08-23 18:53:59 +000031275 if (n_prefixes > 7) goto decode_failure;
sewardj8c332e22005-01-28 01:36:56 +000031276 pre = getUChar(delta);
sewardjdf0e0022005-01-25 15:48:43 +000031277 switch (pre) {
31278 case 0x66: pfx |= PFX_66; break;
31279 case 0x67: pfx |= PFX_ASO; break;
31280 case 0xF2: pfx |= PFX_F2; break;
31281 case 0xF3: pfx |= PFX_F3; break;
sewardje9d8a262009-07-01 08:06:34 +000031282 case 0xF0: pfx |= PFX_LOCK; *expect_CAS = True; break;
sewardjdf0e0022005-01-25 15:48:43 +000031283 case 0x2E: pfx |= PFX_CS; break;
31284 case 0x3E: pfx |= PFX_DS; break;
31285 case 0x26: pfx |= PFX_ES; break;
31286 case 0x64: pfx |= PFX_FS; break;
31287 case 0x65: pfx |= PFX_GS; break;
31288 case 0x36: pfx |= PFX_SS; break;
31289 case 0x40 ... 0x4F:
31290 pfx |= PFX_REX;
31291 if (pre & (1<<3)) pfx |= PFX_REXW;
31292 if (pre & (1<<2)) pfx |= PFX_REXR;
31293 if (pre & (1<<1)) pfx |= PFX_REXX;
31294 if (pre & (1<<0)) pfx |= PFX_REXB;
31295 break;
31296 default:
sewardjc4530ae2012-05-21 10:18:49 +000031297 goto not_a_legacy_prefix;
sewardjdf0e0022005-01-25 15:48:43 +000031298 }
31299 n_prefixes++;
31300 delta++;
31301 }
31302
sewardjc4530ae2012-05-21 10:18:49 +000031303 not_a_legacy_prefix:
31304 /* We've used up all the non-VEX prefixes. Parse and validate a
31305 VEX prefix if that's appropriate. */
31306 if (archinfo->hwcaps & VEX_HWCAPS_AMD64_AVX) {
31307 /* Used temporarily for holding VEX prefixes. */
31308 UChar vex0 = getUChar(delta);
31309 if (vex0 == 0xC4) {
31310 /* 3-byte VEX */
31311 UChar vex1 = getUChar(delta+1);
31312 UChar vex2 = getUChar(delta+2);
31313 delta += 3;
31314 pfx |= PFX_VEX;
31315 /* Snarf contents of byte 1 */
31316 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR;
31317 /* X */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_REXX;
31318 /* B */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_REXB;
31319 /* m-mmmm */
31320 switch (vex1 & 0x1F) {
31321 case 1: esc = ESC_0F; break;
31322 case 2: esc = ESC_0F38; break;
31323 case 3: esc = ESC_0F3A; break;
31324 /* Any other m-mmmm field will #UD */
31325 default: goto decode_failure;
31326 }
31327 /* Snarf contents of byte 2 */
31328 /* W */ pfx |= (vex2 & (1<<7)) ? PFX_REXW : 0;
31329 /* ~v3 */ pfx |= (vex2 & (1<<6)) ? 0 : PFX_VEXnV3;
31330 /* ~v2 */ pfx |= (vex2 & (1<<5)) ? 0 : PFX_VEXnV2;
31331 /* ~v1 */ pfx |= (vex2 & (1<<4)) ? 0 : PFX_VEXnV1;
31332 /* ~v0 */ pfx |= (vex2 & (1<<3)) ? 0 : PFX_VEXnV0;
31333 /* L */ pfx |= (vex2 & (1<<2)) ? PFX_VEXL : 0;
31334 /* pp */
31335 switch (vex2 & 3) {
31336 case 0: break;
31337 case 1: pfx |= PFX_66; break;
31338 case 2: pfx |= PFX_F3; break;
31339 case 3: pfx |= PFX_F2; break;
31340 default: vassert(0);
31341 }
31342 }
31343 else if (vex0 == 0xC5) {
31344 /* 2-byte VEX */
31345 UChar vex1 = getUChar(delta+1);
31346 delta += 2;
31347 pfx |= PFX_VEX;
31348 /* Snarf contents of byte 1 */
31349 /* R */ pfx |= (vex1 & (1<<7)) ? 0 : PFX_REXR;
31350 /* ~v3 */ pfx |= (vex1 & (1<<6)) ? 0 : PFX_VEXnV3;
31351 /* ~v2 */ pfx |= (vex1 & (1<<5)) ? 0 : PFX_VEXnV2;
31352 /* ~v1 */ pfx |= (vex1 & (1<<4)) ? 0 : PFX_VEXnV1;
31353 /* ~v0 */ pfx |= (vex1 & (1<<3)) ? 0 : PFX_VEXnV0;
31354 /* L */ pfx |= (vex1 & (1<<2)) ? PFX_VEXL : 0;
31355 /* pp */
31356 switch (vex1 & 3) {
31357 case 0: break;
31358 case 1: pfx |= PFX_66; break;
31359 case 2: pfx |= PFX_F3; break;
31360 case 3: pfx |= PFX_F2; break;
31361 default: vassert(0);
31362 }
31363 /* implied: */
31364 esc = ESC_0F;
31365 }
31366 /* Can't have both VEX and REX */
31367 if ((pfx & PFX_VEX) && (pfx & PFX_REX))
31368 goto decode_failure; /* can't have both */
31369 }
sewardjdf0e0022005-01-25 15:48:43 +000031370
sewardj42561ef2005-11-04 14:18:31 +000031371 /* Dump invalid combinations */
sewardjdf0e0022005-01-25 15:48:43 +000031372 n = 0;
31373 if (pfx & PFX_F2) n++;
31374 if (pfx & PFX_F3) n++;
sewardj3ca55a12005-01-27 16:06:23 +000031375 if (n > 1)
31376 goto decode_failure; /* can't have both */
sewardjdf0e0022005-01-25 15:48:43 +000031377
31378 n = 0;
31379 if (pfx & PFX_CS) n++;
31380 if (pfx & PFX_DS) n++;
31381 if (pfx & PFX_ES) n++;
31382 if (pfx & PFX_FS) n++;
31383 if (pfx & PFX_GS) n++;
31384 if (pfx & PFX_SS) n++;
sewardj3ca55a12005-01-27 16:06:23 +000031385 if (n > 1)
31386 goto decode_failure; /* multiple seg overrides == illegal */
sewardjdf0e0022005-01-25 15:48:43 +000031387
sewardjceccb292009-01-22 20:40:22 +000031388 /* We have a %fs prefix. Reject it if there's no evidence in 'vbi'
31389 that we should accept it. */
31390 if ((pfx & PFX_FS) && !vbi->guest_amd64_assume_fs_is_zero)
31391 goto decode_failure;
31392
31393 /* Ditto for %gs prefixes. */
31394 if ((pfx & PFX_GS) && !vbi->guest_amd64_assume_gs_is_0x60)
31395 goto decode_failure;
sewardj42561ef2005-11-04 14:18:31 +000031396
sewardjdf0e0022005-01-25 15:48:43 +000031397 /* Set up sz. */
31398 sz = 4;
31399 if (pfx & PFX_66) sz = 2;
31400 if ((pfx & PFX_REX) && (pfx & PFX_REXW)) sz = 8;
31401
sewardje9d8a262009-07-01 08:06:34 +000031402 /* Now we should be looking at the primary opcode byte or the
sewardj80611e32012-01-20 13:07:24 +000031403 leading escapes. Check that any LOCK prefix is actually
sewardje9d8a262009-07-01 08:06:34 +000031404 allowed. */
sewardj6491f862013-10-15 17:29:19 +000031405 if (haveLOCK(pfx)) {
florian8462d112014-09-24 15:18:09 +000031406 if (can_be_used_with_LOCK_prefix( &guest_code[delta] )) {
sewardjc4356f02007-11-09 21:15:04 +000031407 DIP("lock ");
31408 } else {
sewardje9d8a262009-07-01 08:06:34 +000031409 *expect_CAS = False;
sewardjc4356f02007-11-09 21:15:04 +000031410 goto decode_failure;
31411 }
sewardjdf0e0022005-01-25 15:48:43 +000031412 }
31413
sewardj80611e32012-01-20 13:07:24 +000031414 /* Eat up opcode escape bytes, until we're really looking at the
sewardjc4530ae2012-05-21 10:18:49 +000031415 primary opcode byte. But only if there's no VEX present. */
31416 if (!(pfx & PFX_VEX)) {
31417 vassert(esc == ESC_NONE);
sewardj80611e32012-01-20 13:07:24 +000031418 pre = getUChar(delta);
sewardjc4530ae2012-05-21 10:18:49 +000031419 if (pre == 0x0F) {
31420 delta++;
31421 pre = getUChar(delta);
31422 switch (pre) {
31423 case 0x38: esc = ESC_0F38; delta++; break;
31424 case 0x3A: esc = ESC_0F3A; delta++; break;
31425 default: esc = ESC_0F; break;
31426 }
sewardj80611e32012-01-20 13:07:24 +000031427 }
31428 }
31429
31430 /* So now we're really really looking at the primary opcode
31431 byte. */
31432 Long delta_at_primary_opcode = delta;
sewardjc4530ae2012-05-21 10:18:49 +000031433
31434 if (!(pfx & PFX_VEX)) {
31435 /* Handle non-VEX prefixed instructions. "Legacy" (non-VEX) SSE
31436 instructions preserve the upper 128 bits of YMM registers;
31437 iow we can simply ignore the presence of the upper halves of
31438 these registers. */
31439 switch (esc) {
31440 case ESC_NONE:
31441 delta = dis_ESC_NONE( &dres, expect_CAS,
31442 resteerOkFn, resteerCisOk, callback_opaque,
31443 archinfo, vbi, pfx, sz, delta );
31444 break;
31445 case ESC_0F:
31446 delta = dis_ESC_0F ( &dres, expect_CAS,
31447 resteerOkFn, resteerCisOk, callback_opaque,
31448 archinfo, vbi, pfx, sz, delta );
31449 break;
31450 case ESC_0F38:
31451 delta = dis_ESC_0F38( &dres,
31452 resteerOkFn, resteerCisOk, callback_opaque,
31453 archinfo, vbi, pfx, sz, delta );
31454 break;
31455 case ESC_0F3A:
31456 delta = dis_ESC_0F3A( &dres,
31457 resteerOkFn, resteerCisOk, callback_opaque,
31458 archinfo, vbi, pfx, sz, delta );
31459 break;
31460 default:
31461 vassert(0);
31462 }
31463 } else {
31464 /* VEX prefixed instruction */
31465 /* Sloppy Intel wording: "An instruction encoded with a VEX.128
31466 prefix that loads a YMM register operand ..." zeroes out bits
31467 128 and above of the register. */
31468 Bool uses_vvvv = False;
31469 switch (esc) {
31470 case ESC_0F:
31471 delta = dis_ESC_0F__VEX ( &dres, &uses_vvvv,
31472 resteerOkFn, resteerCisOk,
31473 callback_opaque,
31474 archinfo, vbi, pfx, sz, delta );
31475 break;
31476 case ESC_0F38:
31477 delta = dis_ESC_0F38__VEX ( &dres, &uses_vvvv,
31478 resteerOkFn, resteerCisOk,
31479 callback_opaque,
31480 archinfo, vbi, pfx, sz, delta );
31481 break;
31482 case ESC_0F3A:
31483 delta = dis_ESC_0F3A__VEX ( &dres, &uses_vvvv,
31484 resteerOkFn, resteerCisOk,
31485 callback_opaque,
31486 archinfo, vbi, pfx, sz, delta );
31487 break;
sewardjac75d7b2012-05-23 12:42:39 +000031488 case ESC_NONE:
31489 /* The presence of a VEX prefix, by Intel definition,
31490 always implies at least an 0F escape. */
31491 goto decode_failure;
sewardjc4530ae2012-05-21 10:18:49 +000031492 default:
sewardjac75d7b2012-05-23 12:42:39 +000031493 vassert(0);
sewardjc4530ae2012-05-21 10:18:49 +000031494 }
31495 /* If the insn doesn't use VEX.vvvv then it must be all ones.
31496 Check this. */
31497 if (!uses_vvvv) {
31498 if (getVexNvvvv(pfx) != 0)
31499 goto decode_failure;
31500 }
sewardj80611e32012-01-20 13:07:24 +000031501 }
sewardjc4530ae2012-05-21 10:18:49 +000031502
sewardj80611e32012-01-20 13:07:24 +000031503 vassert(delta - delta_at_primary_opcode >= 0);
31504 vassert(delta - delta_at_primary_opcode < 16/*let's say*/);
31505
31506 /* Use delta == delta_at_primary_opcode to denote decode failure.
31507 This implies that any successful decode must use at least one
31508 byte up. */
31509 if (delta == delta_at_primary_opcode)
31510 goto decode_failure;
31511 else
31512 goto decode_success; /* \o/ */
31513
31514#if 0 /* XYZZY */
sewardja6b93d12005-02-17 09:28:28 +000031515
31516 /* ---------------------------------------------------- */
sewardj09717342005-05-05 21:34:02 +000031517 /* --- The SSE/SSE2 decoder. --- */
sewardja6b93d12005-02-17 09:28:28 +000031518 /* ---------------------------------------------------- */
31519
31520 /* What did I do to deserve SSE ? Perhaps I was really bad in a
31521 previous life? */
31522
sewardj09717342005-05-05 21:34:02 +000031523 /* Note, this doesn't handle SSE3 right now. All amd64s support
31524 SSE2 as a minimum so there is no point distinguishing SSE1 vs
31525 SSE2. */
31526
florian8462d112014-09-24 15:18:09 +000031527 insn = &guest_code[delta];
sewardja6b93d12005-02-17 09:28:28 +000031528
sewardj5abcfe62007-01-10 04:59:33 +000031529 /* FXSAVE is spuriously at the start here only because it is
31530 thusly placed in guest-x86/toIR.c. */
31531
sewardj5abcfe62007-01-10 04:59:33 +000031532 /* ------ SSE decoder main ------ */
sewardj432f8b62005-05-10 02:50:05 +000031533
sewardj5992bd02005-05-11 02:13:42 +000031534 /* ---------------------------------------------------- */
31535 /* --- end of the SSE decoder. --- */
31536 /* ---------------------------------------------------- */
31537
31538 /* ---------------------------------------------------- */
31539 /* --- start of the SSE2 decoder. --- */
31540 /* ---------------------------------------------------- */
sewardj4c328cf2005-05-05 12:05:54 +000031541
sewardjdf0e0022005-01-25 15:48:43 +000031542 /* ---------------------------------------------------- */
31543 /* --- end of the SSE/SSE2 decoder. --- */
31544 /* ---------------------------------------------------- */
31545
sewardjfcf21f32006-08-04 14:51:19 +000031546 /* ---------------------------------------------------- */
31547 /* --- start of the SSE3 decoder. --- */
31548 /* ---------------------------------------------------- */
31549
sewardjfcf21f32006-08-04 14:51:19 +000031550 /* ---------------------------------------------------- */
31551 /* --- end of the SSE3 decoder. --- */
31552 /* ---------------------------------------------------- */
31553
sewardjd166e282008-02-06 11:42:45 +000031554 /* ---------------------------------------------------- */
31555 /* --- start of the SSSE3 decoder. --- */
31556 /* ---------------------------------------------------- */
31557
sewardjd166e282008-02-06 11:42:45 +000031558 /* ---------------------------------------------------- */
31559 /* --- end of the SSSE3 decoder. --- */
31560 /* ---------------------------------------------------- */
31561
de5a70f5c2010-04-01 23:08:59 +000031562 /* ---------------------------------------------------- */
31563 /* --- start of the SSE4 decoder --- */
31564 /* ---------------------------------------------------- */
31565
de5a70f5c2010-04-01 23:08:59 +000031566 /* ---------------------------------------------------- */
31567 /* --- end of the SSE4 decoder --- */
31568 /* ---------------------------------------------------- */
31569
sewardj7a240552005-01-28 21:37:12 +000031570 /*after_sse_decoders:*/
sewardjdf0e0022005-01-25 15:48:43 +000031571
31572 /* Get the primary opcode. */
sewardj8c332e22005-01-28 01:36:56 +000031573 opc = getUChar(delta); delta++;
sewardjdf0e0022005-01-25 15:48:43 +000031574
31575 /* We get here if the current insn isn't SSE, or this CPU doesn't
31576 support SSE. */
31577
31578 switch (opc) {
31579
31580 /* ------------------------ Control flow --------------- */
31581
sewardj3ca55a12005-01-27 16:06:23 +000031582 /* ------------------------ CWD/CDQ -------------------- */
31583
sewardj8d965312005-02-25 02:48:47 +000031584 /* ------------------------ FPU ops -------------------- */
31585
sewardj4fa325a2005-11-03 13:27:24 +000031586 /* ------------------------ INT ------------------------ */
31587
31588 case 0xCD: { /* INT imm8 */
31589 IRJumpKind jk = Ijk_Boring;
31590 if (have66orF2orF3(pfx)) goto decode_failure;
31591 d64 = getUChar(delta); delta++;
31592 switch (d64) {
31593 case 32: jk = Ijk_Sys_int32; break;
31594 default: goto decode_failure;
31595 }
31596 guest_RIP_next_mustcheck = True;
31597 guest_RIP_next_assumed = guest_RIP_bbstart + delta;
31598 jmp_lit(jk, guest_RIP_next_assumed);
31599 /* It's important that all ArchRegs carry their up-to-date value
31600 at this point. So we declare an end-of-block here, which
31601 forces any TempRegs caching ArchRegs to be flushed. */
sewardjc6f970f2012-04-02 21:54:49 +000031602 vassert(dres.whatNext == Dis_StopHere);
sewardj4fa325a2005-11-03 13:27:24 +000031603 DIP("int $0x%02x\n", (UInt)d64);
31604 break;
31605 }
31606
sewardjf8c37f72005-02-07 18:55:29 +000031607 /* ------------------------ Jcond, byte offset --------- */
31608
sewardj32b2bbe2005-01-28 00:50:10 +000031609 /* ------------------------ IMUL ----------------------- */
31610
sewardj1389d4d2005-01-28 13:46:29 +000031611 /* ------------------------ MOV ------------------------ */
31612
sewardj5e525292005-01-28 15:13:10 +000031613 /* ------------------------ MOVx ------------------------ */
31614
sewardj4c328cf2005-05-05 12:05:54 +000031615 /* ------------------------ opl imm, A ----------------- */
31616
sewardj118b23e2005-01-29 02:14:44 +000031617 /* ------------------------ opl Ev, Gv ----------------- */
sewardj118b23e2005-01-29 02:14:44 +000031618
31619 /* ------------------------ opl Gv, Ev ----------------- */
31620
sewardj55dbb262005-01-28 16:36:51 +000031621 /* ------------------------ POP ------------------------ */
31622
sewardj55dbb262005-01-28 16:36:51 +000031623 /* ------------------------ PUSH ----------------------- */
31624
sewardj909c06d2005-02-19 22:47:41 +000031625 /* ------ AE: SCAS variants ------ */
sewardjd0a12df2005-02-10 02:07:43 +000031626
sewardj909c06d2005-02-19 22:47:41 +000031627 /* ------ A6, A7: CMPS variants ------ */
sewardja6b93d12005-02-17 09:28:28 +000031628
sewardj909c06d2005-02-19 22:47:41 +000031629 /* ------ AA, AB: STOS variants ------ */
sewardjd0a12df2005-02-10 02:07:43 +000031630
sewardj909c06d2005-02-19 22:47:41 +000031631 /* ------ A4, A5: MOVS variants ------ */
sewardj7de0d3c2005-02-13 02:26:41 +000031632
31633 /* ------------------------ XCHG ----------------------- */
31634
sewardjbb4396c2007-11-20 17:29:08 +000031635 /* ------------------------ IN / OUT ----------------------- */
31636
sewardj3ca55a12005-01-27 16:06:23 +000031637 /* ------------------------ (Grp1 extensions) ---------- */
31638
sewardj118b23e2005-01-29 02:14:44 +000031639 /* ------------------------ (Grp2 extensions) ---------- */
sewardj03b07cc2005-01-31 18:09:43 +000031640
sewardj32b2bbe2005-01-28 00:50:10 +000031641 /* ------------------------ (Grp3 extensions) ---------- */
31642
sewardj03b07cc2005-01-31 18:09:43 +000031643 /* ------------------------ (Grp4 extensions) ---------- */
31644
sewardj354e5c62005-01-27 20:12:52 +000031645 /* ------------------------ (Grp5 extensions) ---------- */
31646
sewardj3ca55a12005-01-27 16:06:23 +000031647 /* ------------------------ Escapes to 2-byte opcodes -- */
31648
31649 case 0x0F: {
sewardj8c332e22005-01-28 01:36:56 +000031650 opc = getUChar(delta); delta++;
sewardj3ca55a12005-01-27 16:06:23 +000031651 switch (opc) {
31652
sewardj1d511802005-03-27 17:59:45 +000031653 /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */
31654
sewardjf53b7352005-04-06 20:01:56 +000031655 /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */
31656
sewardj82c9f2f2005-03-02 16:05:13 +000031657 /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */
31658
sewardj9ed16802005-08-24 10:46:19 +000031659 /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */
31660
sewardj3ca55a12005-01-27 16:06:23 +000031661 /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */
sewardj3ca55a12005-01-27 16:06:23 +000031662
sewardja6b93d12005-02-17 09:28:28 +000031663 /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */
31664
sewardjd0a12df2005-02-10 02:07:43 +000031665 /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */
31666
sewardj5e525292005-01-28 15:13:10 +000031667 /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */
31668
sewardj32b2bbe2005-01-28 00:50:10 +000031669 /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */
31670
sewardjec387ca2006-08-01 18:36:25 +000031671 /* =-=-=-=-=-=-=-=-=- NOPs =-=-=-=-=-=-=-=-=-=-=-= */
31672
sewardj1389d4d2005-01-28 13:46:29 +000031673 /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */
sewardj1389d4d2005-01-28 13:46:29 +000031674
sewardjb04a47c2005-08-10 12:27:46 +000031675 /* =-=-=-=-=-=-=-=-=- PREFETCH =-=-=-=-=-=-=-=-=-= */
sewardjb04a47c2005-08-10 12:27:46 +000031676
sewardj31191072005-02-05 18:24:47 +000031677 /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */
sewardj118b23e2005-01-29 02:14:44 +000031678
31679 /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */
sewardj118b23e2005-01-29 02:14:44 +000031680
sewardj33ef9c22005-11-04 20:05:57 +000031681 /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */
31682
sewardje1698952005-02-08 15:02:39 +000031683 /* =-=-=-=-=-=-=-=-=- SYSCALL -=-=-=-=-=-=-=-=-=-= */
sewardje1698952005-02-08 15:02:39 +000031684
sewardjb4fd2e72005-03-23 13:34:11 +000031685 /* =-=-=-=-=-=-=-=-=- XADD -=-=-=-=-=-=-=-=-=-= */
31686
sewardjb9dc2432010-06-07 16:22:22 +000031687 /* =-=-=-=-=-=-=-=-=- SGDT and SIDT =-=-=-=-=-=-=-=-=-=-= */
sewardjb9dc2432010-06-07 16:22:22 +000031688
sewardj3ca55a12005-01-27 16:06:23 +000031689 /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */
31690
31691 default:
31692 goto decode_failure;
31693 } /* switch (opc) for the 2-byte opcodes */
31694 goto decode_success;
31695 } /* case 0x0F: of primary opcode */
sewardjdf0e0022005-01-25 15:48:43 +000031696
31697 /* ------------------------ ??? ------------------------ */
sewardj80611e32012-01-20 13:07:24 +000031698#endif /* XYZZY */
sewardjdf0e0022005-01-25 15:48:43 +000031699
sewardj80611e32012-01-20 13:07:24 +000031700 //default:
sewardjdf0e0022005-01-25 15:48:43 +000031701 decode_failure:
31702 /* All decode failures end up here. */
sewardj442e51a2012-12-06 18:08:04 +000031703 if (sigill_diag) {
31704 vex_printf("vex amd64->IR: unhandled instruction bytes: "
31705 "0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
31706 (Int)getUChar(delta_start+0),
31707 (Int)getUChar(delta_start+1),
31708 (Int)getUChar(delta_start+2),
31709 (Int)getUChar(delta_start+3),
31710 (Int)getUChar(delta_start+4),
31711 (Int)getUChar(delta_start+5),
31712 (Int)getUChar(delta_start+6),
31713 (Int)getUChar(delta_start+7) );
31714 vex_printf("vex amd64->IR: REX=%d REX.W=%d REX.R=%d REX.X=%d REX.B=%d\n",
31715 haveREX(pfx) ? 1 : 0, getRexW(pfx), getRexR(pfx),
31716 getRexX(pfx), getRexB(pfx));
31717 vex_printf("vex amd64->IR: VEX=%d VEX.L=%d VEX.nVVVV=0x%x ESC=%s\n",
31718 haveVEX(pfx) ? 1 : 0, getVexL(pfx),
31719 getVexNvvvv(pfx),
31720 esc==ESC_NONE ? "NONE" :
31721 esc==ESC_0F ? "0F" :
31722 esc==ESC_0F38 ? "0F38" :
31723 esc==ESC_0F3A ? "0F3A" : "???");
31724 vex_printf("vex amd64->IR: PFX.66=%d PFX.F2=%d PFX.F3=%d\n",
31725 have66(pfx) ? 1 : 0, haveF2(pfx) ? 1 : 0,
31726 haveF3(pfx) ? 1 : 0);
31727 }
sewardjdf0e0022005-01-25 15:48:43 +000031728
31729 /* Tell the dispatcher that this insn cannot be decoded, and so has
31730 not been executed, and (is currently) the next to be executed.
31731 RIP should be up-to-date since it made so at the start of each
31732 insn, but nevertheless be paranoid and update it again right
31733 now. */
sewardj9e6491a2005-07-02 19:24:10 +000031734 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
sewardjc6f970f2012-04-02 21:54:49 +000031735 jmp_lit(&dres, Ijk_NoDecode, guest_RIP_curr_instr);
31736 vassert(dres.whatNext == Dis_StopHere);
31737 dres.len = 0;
sewardje9d8a262009-07-01 08:06:34 +000031738 /* We also need to say that a CAS is not expected now, regardless
31739 of what it might have been set to at the start of the function,
31740 since the IR that we've emitted just above (to synthesis a
31741 SIGILL) does not involve any CAS, and presumably no other IR has
31742 been emitted for this (non-decoded) insn. */
31743 *expect_CAS = False;
sewardj9e6491a2005-07-02 19:24:10 +000031744 return dres;
sewardjdf0e0022005-01-25 15:48:43 +000031745
sewardj80611e32012-01-20 13:07:24 +000031746 // } /* switch (opc) for the main (primary) opcode switch. */
sewardjdf0e0022005-01-25 15:48:43 +000031747
31748 decode_success:
31749 /* All decode successes end up here. */
sewardjc6f970f2012-04-02 21:54:49 +000031750 switch (dres.whatNext) {
31751 case Dis_Continue:
31752 stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) );
31753 break;
31754 case Dis_ResteerU:
31755 case Dis_ResteerC:
31756 stmt( IRStmt_Put( OFFB_RIP, mkU64(dres.continueAt) ) );
31757 break;
31758 case Dis_StopHere:
31759 break;
31760 default:
31761 vassert(0);
31762 }
31763
sewardjdf0e0022005-01-25 15:48:43 +000031764 DIP("\n");
sewardj9e6491a2005-07-02 19:24:10 +000031765 dres.len = (Int)toUInt(delta - delta_start);
31766 return dres;
sewardjdf0e0022005-01-25 15:48:43 +000031767}
31768
31769#undef DIP
31770#undef DIS
sewardjd20c8852005-01-20 20:04:07 +000031771
sewardj9e6491a2005-07-02 19:24:10 +000031772
31773/*------------------------------------------------------------*/
31774/*--- Top-level fn ---*/
31775/*------------------------------------------------------------*/
31776
31777/* Disassemble a single instruction into IR. The instruction
31778 is located in host memory at &guest_code[delta]. */
31779
sewardjdd40fdf2006-12-24 02:20:24 +000031780DisResult disInstr_AMD64 ( IRSB* irsb_IN,
sewardjc716aea2006-01-17 01:48:46 +000031781 Bool (*resteerOkFn) ( void*, Addr64 ),
sewardj984d9b12010-01-15 10:53:21 +000031782 Bool resteerCisOk,
sewardjc716aea2006-01-17 01:48:46 +000031783 void* callback_opaque,
florian8462d112014-09-24 15:18:09 +000031784 const UChar* guest_code_IN,
sewardj9e6491a2005-07-02 19:24:10 +000031785 Long delta,
31786 Addr64 guest_IP,
sewardja5f55da2006-04-30 23:37:32 +000031787 VexArch guest_arch,
sewardj9e6491a2005-07-02 19:24:10 +000031788 VexArchInfo* archinfo,
sewardjdd40fdf2006-12-24 02:20:24 +000031789 VexAbiInfo* abiinfo,
sewardj9b769162014-07-24 12:42:03 +000031790 VexEndness host_endness_IN,
sewardj442e51a2012-12-06 18:08:04 +000031791 Bool sigill_diag_IN )
sewardj9e6491a2005-07-02 19:24:10 +000031792{
sewardje9d8a262009-07-01 08:06:34 +000031793 Int i, x1, x2;
31794 Bool expect_CAS, has_CAS;
sewardj9e6491a2005-07-02 19:24:10 +000031795 DisResult dres;
31796
31797 /* Set globals (see top of this file) */
sewardja5f55da2006-04-30 23:37:32 +000031798 vassert(guest_arch == VexArchAMD64);
sewardj9e6491a2005-07-02 19:24:10 +000031799 guest_code = guest_code_IN;
sewardjdd40fdf2006-12-24 02:20:24 +000031800 irsb = irsb_IN;
sewardj9b769162014-07-24 12:42:03 +000031801 host_endness = host_endness_IN;
sewardj9e6491a2005-07-02 19:24:10 +000031802 guest_RIP_curr_instr = guest_IP;
31803 guest_RIP_bbstart = guest_IP - delta;
31804
31805 /* We'll consult these after doing disInstr_AMD64_WRK. */
31806 guest_RIP_next_assumed = 0;
31807 guest_RIP_next_mustcheck = False;
31808
sewardje9d8a262009-07-01 08:06:34 +000031809 x1 = irsb_IN->stmts_used;
31810 expect_CAS = False;
sewardjc6f970f2012-04-02 21:54:49 +000031811 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn,
sewardj984d9b12010-01-15 10:53:21 +000031812 resteerCisOk,
sewardje9d8a262009-07-01 08:06:34 +000031813 callback_opaque,
sewardj442e51a2012-12-06 18:08:04 +000031814 delta, archinfo, abiinfo, sigill_diag_IN );
sewardje9d8a262009-07-01 08:06:34 +000031815 x2 = irsb_IN->stmts_used;
31816 vassert(x2 >= x1);
sewardj9e6491a2005-07-02 19:24:10 +000031817
31818 /* If disInstr_AMD64_WRK tried to figure out the next rip, check it
31819 got it right. Failure of this assertion is serious and denotes
31820 a bug in disInstr. */
31821 if (guest_RIP_next_mustcheck
31822 && guest_RIP_next_assumed != guest_RIP_curr_instr + dres.len) {
31823 vex_printf("\n");
31824 vex_printf("assumed next %%rip = 0x%llx\n",
31825 guest_RIP_next_assumed );
31826 vex_printf(" actual next %%rip = 0x%llx\n",
31827 guest_RIP_curr_instr + dres.len );
sewardje9d8a262009-07-01 08:06:34 +000031828 vpanic("disInstr_AMD64: disInstr miscalculated next %rip");
31829 }
31830
31831 /* See comment at the top of disInstr_AMD64_WRK for meaning of
31832 expect_CAS. Here, we (sanity-)check for the presence/absence of
31833 IRCAS as directed by the returned expect_CAS value. */
31834 has_CAS = False;
31835 for (i = x1; i < x2; i++) {
31836 if (irsb_IN->stmts[i]->tag == Ist_CAS)
31837 has_CAS = True;
31838 }
31839
31840 if (expect_CAS != has_CAS) {
31841 /* inconsistency detected. re-disassemble the instruction so as
31842 to generate a useful error message; then assert. */
31843 vex_traceflags |= VEX_TRACE_FE;
sewardjc6f970f2012-04-02 21:54:49 +000031844 dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn,
sewardj984d9b12010-01-15 10:53:21 +000031845 resteerCisOk,
sewardje9d8a262009-07-01 08:06:34 +000031846 callback_opaque,
sewardj442e51a2012-12-06 18:08:04 +000031847 delta, archinfo, abiinfo, sigill_diag_IN );
sewardje9d8a262009-07-01 08:06:34 +000031848 for (i = x1; i < x2; i++) {
31849 vex_printf("\t\t");
31850 ppIRStmt(irsb_IN->stmts[i]);
31851 vex_printf("\n");
31852 }
31853 /* Failure of this assertion is serious and denotes a bug in
31854 disInstr. */
31855 vpanic("disInstr_AMD64: inconsistency in LOCK prefix handling");
sewardj9e6491a2005-07-02 19:24:10 +000031856 }
31857
31858 return dres;
31859}
31860
31861
sewardj9a660ea2010-07-29 11:34:38 +000031862/*------------------------------------------------------------*/
31863/*--- Unused stuff ---*/
31864/*------------------------------------------------------------*/
31865
31866// A potentially more Memcheck-friendly version of gen_LZCNT, if
31867// this should ever be needed.
31868//
31869//static IRTemp gen_LZCNT ( IRType ty, IRTemp src )
31870//{
31871// /* Scheme is simple: propagate the most significant 1-bit into all
31872// lower positions in the word. This gives a word of the form
31873// 0---01---1. Now invert it, giving a word of the form
31874// 1---10---0, then do a population-count idiom (to count the 1s,
31875// which is the number of leading zeroes, or the word size if the
31876// original word was 0.
31877// */
31878// Int i;
31879// IRTemp t[7];
31880// for (i = 0; i < 7; i++) {
31881// t[i] = newTemp(ty);
31882// }
31883// if (ty == Ity_I64) {
31884// assign(t[0], binop(Iop_Or64, mkexpr(src),
31885// binop(Iop_Shr64, mkexpr(src), mkU8(1))));
31886// assign(t[1], binop(Iop_Or64, mkexpr(t[0]),
31887// binop(Iop_Shr64, mkexpr(t[0]), mkU8(2))));
31888// assign(t[2], binop(Iop_Or64, mkexpr(t[1]),
31889// binop(Iop_Shr64, mkexpr(t[1]), mkU8(4))));
31890// assign(t[3], binop(Iop_Or64, mkexpr(t[2]),
31891// binop(Iop_Shr64, mkexpr(t[2]), mkU8(8))));
31892// assign(t[4], binop(Iop_Or64, mkexpr(t[3]),
31893// binop(Iop_Shr64, mkexpr(t[3]), mkU8(16))));
31894// assign(t[5], binop(Iop_Or64, mkexpr(t[4]),
31895// binop(Iop_Shr64, mkexpr(t[4]), mkU8(32))));
31896// assign(t[6], unop(Iop_Not64, mkexpr(t[5])));
31897// return gen_POPCOUNT(ty, t[6]);
31898// }
31899// if (ty == Ity_I32) {
31900// assign(t[0], binop(Iop_Or32, mkexpr(src),
31901// binop(Iop_Shr32, mkexpr(src), mkU8(1))));
31902// assign(t[1], binop(Iop_Or32, mkexpr(t[0]),
31903// binop(Iop_Shr32, mkexpr(t[0]), mkU8(2))));
31904// assign(t[2], binop(Iop_Or32, mkexpr(t[1]),
31905// binop(Iop_Shr32, mkexpr(t[1]), mkU8(4))));
31906// assign(t[3], binop(Iop_Or32, mkexpr(t[2]),
31907// binop(Iop_Shr32, mkexpr(t[2]), mkU8(8))));
31908// assign(t[4], binop(Iop_Or32, mkexpr(t[3]),
31909// binop(Iop_Shr32, mkexpr(t[3]), mkU8(16))));
31910// assign(t[5], unop(Iop_Not32, mkexpr(t[4])));
31911// return gen_POPCOUNT(ty, t[5]);
31912// }
31913// if (ty == Ity_I16) {
31914// assign(t[0], binop(Iop_Or16, mkexpr(src),
31915// binop(Iop_Shr16, mkexpr(src), mkU8(1))));
31916// assign(t[1], binop(Iop_Or16, mkexpr(t[0]),
31917// binop(Iop_Shr16, mkexpr(t[0]), mkU8(2))));
31918// assign(t[2], binop(Iop_Or16, mkexpr(t[1]),
31919// binop(Iop_Shr16, mkexpr(t[1]), mkU8(4))));
31920// assign(t[3], binop(Iop_Or16, mkexpr(t[2]),
31921// binop(Iop_Shr16, mkexpr(t[2]), mkU8(8))));
31922// assign(t[4], unop(Iop_Not16, mkexpr(t[3])));
31923// return gen_POPCOUNT(ty, t[4]);
31924// }
31925// vassert(0);
31926//}
31927
sewardj9e6491a2005-07-02 19:24:10 +000031928
sewardjd20c8852005-01-20 20:04:07 +000031929/*--------------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +000031930/*--- end guest_amd64_toIR.c ---*/
sewardjd20c8852005-01-20 20:04:07 +000031931/*--------------------------------------------------------------------*/