blob: d0db663d665b0756ebfda3d07df0e33c413aa132 [file] [log] [blame]
sewardjbbcf1882014-01-12 12:49:10 +00001/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- begin guest_arm64_toIR.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2013-2013 OpenWorks
12 info@open-works.net
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
32//ZZ /* XXXX thumb to check:
33//ZZ that all cases where putIRegT writes r15, we generate a jump.
34//ZZ
35//ZZ All uses of newTemp assign to an IRTemp and not a UInt
36//ZZ
37//ZZ For all thumb loads and stores, including VFP ones, new-ITSTATE is
38//ZZ backed out before the memory op, and restored afterwards. This
39//ZZ needs to happen even after we go uncond. (and for sure it doesn't
40//ZZ happen for VFP loads/stores right now).
41//ZZ
42//ZZ VFP on thumb: check that we exclude all r13/r15 cases that we
43//ZZ should.
44//ZZ
45//ZZ XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
46//ZZ taking into account the number of insns guarded by an IT.
47//ZZ
48//ZZ remove the nasty hack, in the spechelper, of looking for Or32(...,
49//ZZ 0xE0) in as the first arg to armg_calculate_condition, and instead
50//ZZ use Slice44 as specified in comments in the spechelper.
51//ZZ
52//ZZ add specialisations for armg_calculate_flag_c and _v, as they
53//ZZ are moderately often needed in Thumb code.
54//ZZ
55//ZZ Correctness: ITSTATE handling in Thumb SVCs is wrong.
56//ZZ
57//ZZ Correctness (obscure): in m_transtab, when invalidating code
58//ZZ address ranges, invalidate up to 18 bytes after the end of the
59//ZZ range. This is because the ITSTATE optimisation at the top of
60//ZZ _THUMB_WRK below analyses up to 18 bytes before the start of any
61//ZZ given instruction, and so might depend on the invalidated area.
62//ZZ */
63//ZZ
64//ZZ /* Limitations, etc
65//ZZ
66//ZZ - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
67//ZZ These instructions are non-restartable in the case where the
68//ZZ transfer(s) fault.
69//ZZ
70//ZZ - SWP: the restart jump back is Ijk_Boring; it should be
71//ZZ Ijk_NoRedir but that's expensive. See comments on casLE() in
72//ZZ guest_x86_toIR.c.
73//ZZ */
74
75/* "Special" instructions.
76
77 This instruction decoder can decode four special instructions
78 which mean nothing natively (are no-ops as far as regs/mem are
79 concerned) but have meaning for supporting Valgrind. A special
80 instruction is flagged by a 16-byte preamble:
81
82 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
83 (ror x12, x12, #3; ror x12, x12, #13
84 ror x12, x12, #51; ror x12, x12, #61)
85
86 Following that, one of the following 3 are allowed
87 (standard interpretation in parentheses):
88
89 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
90 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
91 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
92 AA090129 (orr x9,x9,x9) IR injection
93
94 Any other bytes following the 16-byte preamble are illegal and
95 constitute a failure in instruction decoding. This all assumes
96 that the preamble will never occur except in specific code
97 fragments designed for Valgrind to catch.
98*/
99
100/* Translates ARM64 code to IR. */
101
102#include "libvex_basictypes.h"
103#include "libvex_ir.h"
104#include "libvex.h"
105#include "libvex_guest_arm64.h"
106
107#include "main_util.h"
108#include "main_globals.h"
109#include "guest_generic_bb_to_IR.h"
110#include "guest_arm64_defs.h"
111
112
113/*------------------------------------------------------------*/
114/*--- Globals ---*/
115/*------------------------------------------------------------*/
116
117/* These are set at the start of the translation of a instruction, so
118 that we don't have to pass them around endlessly. CONST means does
119 not change during translation of the instruction.
120*/
121
sewardj9b769162014-07-24 12:42:03 +0000122/* CONST: what is the host's endianness? We need to know this in
123 order to do sub-register accesses to the SIMD/FP registers
124 correctly. */
125static VexEndness host_endness;
sewardjbbcf1882014-01-12 12:49:10 +0000126
127/* CONST: The guest address for the instruction currently being
128 translated. */
129static Addr64 guest_PC_curr_instr;
130
131/* MOD: The IRSB* into which we're generating code. */
132static IRSB* irsb;
133
134
135/*------------------------------------------------------------*/
136/*--- Debugging output ---*/
137/*------------------------------------------------------------*/
138
139#define DIP(format, args...) \
140 if (vex_traceflags & VEX_TRACE_FE) \
141 vex_printf(format, ## args)
142
143#define DIS(buf, format, args...) \
144 if (vex_traceflags & VEX_TRACE_FE) \
145 vex_sprintf(buf, format, ## args)
146
147
148/*------------------------------------------------------------*/
149/*--- Helper bits and pieces for deconstructing the ---*/
150/*--- arm insn stream. ---*/
151/*------------------------------------------------------------*/
152
153/* Do a little-endian load of a 32-bit word, regardless of the
154 endianness of the underlying host. */
155static inline UInt getUIntLittleEndianly ( UChar* p )
156{
157 UInt w = 0;
158 w = (w << 8) | p[3];
159 w = (w << 8) | p[2];
160 w = (w << 8) | p[1];
161 w = (w << 8) | p[0];
162 return w;
163}
164
165/* Sign extend a N-bit value up to 64 bits, by copying
166 bit N-1 into all higher positions. */
167static ULong sx_to_64 ( ULong x, UInt n )
168{
169 vassert(n > 1 && n < 64);
170 Long r = (Long)x;
171 r = (r << (64-n)) >> (64-n);
172 return (ULong)r;
173}
174
175//ZZ /* Do a little-endian load of a 16-bit word, regardless of the
176//ZZ endianness of the underlying host. */
177//ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
178//ZZ {
179//ZZ UShort w = 0;
180//ZZ w = (w << 8) | p[1];
181//ZZ w = (w << 8) | p[0];
182//ZZ return w;
183//ZZ }
184//ZZ
185//ZZ static UInt ROR32 ( UInt x, UInt sh ) {
186//ZZ vassert(sh >= 0 && sh < 32);
187//ZZ if (sh == 0)
188//ZZ return x;
189//ZZ else
190//ZZ return (x << (32-sh)) | (x >> sh);
191//ZZ }
192//ZZ
193//ZZ static Int popcount32 ( UInt x )
194//ZZ {
195//ZZ Int res = 0, i;
196//ZZ for (i = 0; i < 32; i++) {
197//ZZ res += (x & 1);
198//ZZ x >>= 1;
199//ZZ }
200//ZZ return res;
201//ZZ }
202//ZZ
203//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
204//ZZ {
205//ZZ UInt mask = 1 << ix;
206//ZZ x &= ~mask;
207//ZZ x |= ((b << ix) & mask);
208//ZZ return x;
209//ZZ }
210
211#define BITS2(_b1,_b0) \
212 (((_b1) << 1) | (_b0))
213
214#define BITS3(_b2,_b1,_b0) \
215 (((_b2) << 2) | ((_b1) << 1) | (_b0))
216
217#define BITS4(_b3,_b2,_b1,_b0) \
218 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
219
220#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
221 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
222 | BITS4((_b3),(_b2),(_b1),(_b0)))
223
224#define BITS5(_b4,_b3,_b2,_b1,_b0) \
225 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
226#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
227 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
228#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
229 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
230
231#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
232 (((_b8) << 8) \
233 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
234
235#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
236 (((_b9) << 9) | ((_b8) << 8) \
237 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
238
239#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
240 (((_b10) << 10) \
241 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
242
sewardjdc9259c2014-02-27 11:10:19 +0000243#define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
244 (((_b11) << 11) \
245 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
246
sewardjdf1628c2014-06-10 22:52:05 +0000247#define X00 BITS2(0,0)
248#define X01 BITS2(0,1)
249#define X10 BITS2(1,0)
250#define X11 BITS2(1,1)
251
sewardjbbcf1882014-01-12 12:49:10 +0000252// produces _uint[_bMax:_bMin]
253#define SLICE_UInt(_uint,_bMax,_bMin) \
254 (( ((UInt)(_uint)) >> (_bMin)) \
255 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
256
257
258/*------------------------------------------------------------*/
259/*--- Helper bits and pieces for creating IR fragments. ---*/
260/*------------------------------------------------------------*/
261
262static IRExpr* mkV128 ( UShort w )
263{
264 return IRExpr_Const(IRConst_V128(w));
265}
266
267static IRExpr* mkU64 ( ULong i )
268{
269 return IRExpr_Const(IRConst_U64(i));
270}
271
272static IRExpr* mkU32 ( UInt i )
273{
274 return IRExpr_Const(IRConst_U32(i));
275}
276
sewardj25523c42014-06-15 19:36:29 +0000277static IRExpr* mkU16 ( UInt i )
278{
279 vassert(i < 65536);
280 return IRExpr_Const(IRConst_U16(i));
281}
282
sewardjbbcf1882014-01-12 12:49:10 +0000283static IRExpr* mkU8 ( UInt i )
284{
285 vassert(i < 256);
286 return IRExpr_Const(IRConst_U8( (UChar)i ));
287}
288
289static IRExpr* mkexpr ( IRTemp tmp )
290{
291 return IRExpr_RdTmp(tmp);
292}
293
294static IRExpr* unop ( IROp op, IRExpr* a )
295{
296 return IRExpr_Unop(op, a);
297}
298
299static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
300{
301 return IRExpr_Binop(op, a1, a2);
302}
303
304static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
305{
306 return IRExpr_Triop(op, a1, a2, a3);
307}
308
309static IRExpr* loadLE ( IRType ty, IRExpr* addr )
310{
311 return IRExpr_Load(Iend_LE, ty, addr);
312}
313
314/* Add a statement to the list held by "irbb". */
315static void stmt ( IRStmt* st )
316{
317 addStmtToIRSB( irsb, st );
318}
319
320static void assign ( IRTemp dst, IRExpr* e )
321{
322 stmt( IRStmt_WrTmp(dst, e) );
323}
324
325static void storeLE ( IRExpr* addr, IRExpr* data )
326{
327 stmt( IRStmt_Store(Iend_LE, addr, data) );
328}
329
330//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
331//ZZ {
332//ZZ if (guardT == IRTemp_INVALID) {
333//ZZ /* unconditional */
334//ZZ storeLE(addr, data);
335//ZZ } else {
336//ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
337//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
338//ZZ }
339//ZZ }
340//ZZ
341//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
342//ZZ IRExpr* addr, IRExpr* alt,
343//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
344//ZZ {
345//ZZ if (guardT == IRTemp_INVALID) {
346//ZZ /* unconditional */
347//ZZ IRExpr* loaded = NULL;
348//ZZ switch (cvt) {
349//ZZ case ILGop_Ident32:
350//ZZ loaded = loadLE(Ity_I32, addr); break;
351//ZZ case ILGop_8Uto32:
352//ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
353//ZZ case ILGop_8Sto32:
354//ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
355//ZZ case ILGop_16Uto32:
356//ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
357//ZZ case ILGop_16Sto32:
358//ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
359//ZZ default:
360//ZZ vassert(0);
361//ZZ }
362//ZZ vassert(loaded != NULL);
363//ZZ assign(dst, loaded);
364//ZZ } else {
365//ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
366//ZZ loaded data before putting the data in 'dst'. If the load
367//ZZ does not take place, 'alt' is placed directly in 'dst'. */
368//ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
369//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
370//ZZ }
371//ZZ }
372
373/* Generate a new temporary of the given type. */
374static IRTemp newTemp ( IRType ty )
375{
376 vassert(isPlausibleIRType(ty));
377 return newIRTemp( irsb->tyenv, ty );
378}
379
sewardj8e91fd42014-07-11 12:05:47 +0000380/* This is used in many places, so the brevity is an advantage. */
381static IRTemp newTempV128(void)
382{
383 return newTemp(Ity_V128);
384}
385
386/* Initialise V128 temporaries en masse. */
387static
sewardj51d012a2014-07-21 09:19:50 +0000388void newTempsV128_2(IRTemp* t1, IRTemp* t2)
389{
390 vassert(t1 && *t1 == IRTemp_INVALID);
391 vassert(t2 && *t2 == IRTemp_INVALID);
392 *t1 = newTempV128();
393 *t2 = newTempV128();
394}
395
sewardj51d012a2014-07-21 09:19:50 +0000396static
397void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
398{
399 vassert(t1 && *t1 == IRTemp_INVALID);
400 vassert(t2 && *t2 == IRTemp_INVALID);
401 vassert(t3 && *t3 == IRTemp_INVALID);
402 *t1 = newTempV128();
403 *t2 = newTempV128();
404 *t3 = newTempV128();
405}
406
sewardj54ffa1d2014-07-22 09:27:49 +0000407//static
408//void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
409//{
410// vassert(t1 && *t1 == IRTemp_INVALID);
411// vassert(t2 && *t2 == IRTemp_INVALID);
412// vassert(t3 && *t3 == IRTemp_INVALID);
413// vassert(t4 && *t4 == IRTemp_INVALID);
414// *t1 = newTempV128();
415// *t2 = newTempV128();
416// *t3 = newTempV128();
417// *t4 = newTempV128();
418//}
419
sewardj51d012a2014-07-21 09:19:50 +0000420static
sewardj8e91fd42014-07-11 12:05:47 +0000421void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
422 IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
423{
424 vassert(t1 && *t1 == IRTemp_INVALID);
425 vassert(t2 && *t2 == IRTemp_INVALID);
426 vassert(t3 && *t3 == IRTemp_INVALID);
427 vassert(t4 && *t4 == IRTemp_INVALID);
428 vassert(t5 && *t5 == IRTemp_INVALID);
429 vassert(t6 && *t6 == IRTemp_INVALID);
430 vassert(t7 && *t7 == IRTemp_INVALID);
431 *t1 = newTempV128();
432 *t2 = newTempV128();
433 *t3 = newTempV128();
434 *t4 = newTempV128();
435 *t5 = newTempV128();
436 *t6 = newTempV128();
437 *t7 = newTempV128();
438}
439
sewardjbbcf1882014-01-12 12:49:10 +0000440//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
441//ZZ IRRoundingMode. */
442//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
443//ZZ {
444//ZZ return mkU32(Irrm_NEAREST);
445//ZZ }
446//ZZ
447//ZZ /* Generate an expression for SRC rotated right by ROT. */
448//ZZ static IRExpr* genROR32( IRTemp src, Int rot )
449//ZZ {
450//ZZ vassert(rot >= 0 && rot < 32);
451//ZZ if (rot == 0)
452//ZZ return mkexpr(src);
453//ZZ return
454//ZZ binop(Iop_Or32,
455//ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
456//ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
457//ZZ }
458//ZZ
459//ZZ static IRExpr* mkU128 ( ULong i )
460//ZZ {
461//ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
462//ZZ }
463//ZZ
464//ZZ /* Generate a 4-aligned version of the given expression if
465//ZZ the given condition is true. Else return it unchanged. */
466//ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
467//ZZ {
468//ZZ if (b)
469//ZZ return binop(Iop_And32, e, mkU32(~3));
470//ZZ else
471//ZZ return e;
472//ZZ }
473
474/* Other IR construction helpers. */
475static IROp mkAND ( IRType ty ) {
476 switch (ty) {
477 case Ity_I32: return Iop_And32;
478 case Ity_I64: return Iop_And64;
479 default: vpanic("mkAND");
480 }
481}
482
483static IROp mkOR ( IRType ty ) {
484 switch (ty) {
485 case Ity_I32: return Iop_Or32;
486 case Ity_I64: return Iop_Or64;
487 default: vpanic("mkOR");
488 }
489}
490
491static IROp mkXOR ( IRType ty ) {
492 switch (ty) {
493 case Ity_I32: return Iop_Xor32;
494 case Ity_I64: return Iop_Xor64;
495 default: vpanic("mkXOR");
496 }
497}
498
499static IROp mkSHL ( IRType ty ) {
500 switch (ty) {
501 case Ity_I32: return Iop_Shl32;
502 case Ity_I64: return Iop_Shl64;
503 default: vpanic("mkSHL");
504 }
505}
506
507static IROp mkSHR ( IRType ty ) {
508 switch (ty) {
509 case Ity_I32: return Iop_Shr32;
510 case Ity_I64: return Iop_Shr64;
511 default: vpanic("mkSHR");
512 }
513}
514
515static IROp mkSAR ( IRType ty ) {
516 switch (ty) {
517 case Ity_I32: return Iop_Sar32;
518 case Ity_I64: return Iop_Sar64;
519 default: vpanic("mkSAR");
520 }
521}
522
523static IROp mkNOT ( IRType ty ) {
524 switch (ty) {
525 case Ity_I32: return Iop_Not32;
526 case Ity_I64: return Iop_Not64;
527 default: vpanic("mkNOT");
528 }
529}
530
531static IROp mkADD ( IRType ty ) {
532 switch (ty) {
533 case Ity_I32: return Iop_Add32;
534 case Ity_I64: return Iop_Add64;
535 default: vpanic("mkADD");
536 }
537}
538
539static IROp mkSUB ( IRType ty ) {
540 switch (ty) {
541 case Ity_I32: return Iop_Sub32;
542 case Ity_I64: return Iop_Sub64;
543 default: vpanic("mkSUB");
544 }
545}
546
547static IROp mkADDF ( IRType ty ) {
548 switch (ty) {
549 case Ity_F32: return Iop_AddF32;
550 case Ity_F64: return Iop_AddF64;
551 default: vpanic("mkADDF");
552 }
553}
554
555static IROp mkSUBF ( IRType ty ) {
556 switch (ty) {
557 case Ity_F32: return Iop_SubF32;
558 case Ity_F64: return Iop_SubF64;
559 default: vpanic("mkSUBF");
560 }
561}
562
563static IROp mkMULF ( IRType ty ) {
564 switch (ty) {
565 case Ity_F32: return Iop_MulF32;
566 case Ity_F64: return Iop_MulF64;
567 default: vpanic("mkMULF");
568 }
569}
570
571static IROp mkDIVF ( IRType ty ) {
572 switch (ty) {
573 case Ity_F32: return Iop_DivF32;
574 case Ity_F64: return Iop_DivF64;
575 default: vpanic("mkMULF");
576 }
577}
578
579static IROp mkNEGF ( IRType ty ) {
580 switch (ty) {
581 case Ity_F32: return Iop_NegF32;
582 case Ity_F64: return Iop_NegF64;
583 default: vpanic("mkNEGF");
584 }
585}
586
587static IROp mkABSF ( IRType ty ) {
588 switch (ty) {
589 case Ity_F32: return Iop_AbsF32;
590 case Ity_F64: return Iop_AbsF64;
591 default: vpanic("mkNEGF");
592 }
593}
594
595static IROp mkSQRTF ( IRType ty ) {
596 switch (ty) {
597 case Ity_F32: return Iop_SqrtF32;
598 case Ity_F64: return Iop_SqrtF64;
599 default: vpanic("mkNEGF");
600 }
601}
602
sewardja5a6b752014-06-30 07:33:56 +0000603static IROp mkVecADD ( UInt size ) {
604 const IROp ops[4]
605 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
606 vassert(size < 4);
607 return ops[size];
608}
609
610static IROp mkVecQADDU ( UInt size ) {
611 const IROp ops[4]
612 = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
613 vassert(size < 4);
614 return ops[size];
615}
616
617static IROp mkVecQADDS ( UInt size ) {
618 const IROp ops[4]
619 = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
620 vassert(size < 4);
621 return ops[size];
622}
623
sewardjf7003bc2014-08-18 12:28:02 +0000624static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
625 const IROp ops[4]
626 = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
627 Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
628 vassert(size < 4);
629 return ops[size];
630}
631
632static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
633 const IROp ops[4]
634 = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
635 Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
636 vassert(size < 4);
637 return ops[size];
638}
639
sewardja5a6b752014-06-30 07:33:56 +0000640static IROp mkVecSUB ( UInt size ) {
641 const IROp ops[4]
642 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
643 vassert(size < 4);
644 return ops[size];
645}
646
647static IROp mkVecQSUBU ( UInt size ) {
648 const IROp ops[4]
649 = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
650 vassert(size < 4);
651 return ops[size];
652}
653
654static IROp mkVecQSUBS ( UInt size ) {
655 const IROp ops[4]
656 = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
657 vassert(size < 4);
658 return ops[size];
659}
660
661static IROp mkVecSARN ( UInt size ) {
662 const IROp ops[4]
663 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
664 vassert(size < 4);
665 return ops[size];
666}
667
668static IROp mkVecSHRN ( UInt size ) {
669 const IROp ops[4]
670 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
671 vassert(size < 4);
672 return ops[size];
673}
674
675static IROp mkVecSHLN ( UInt size ) {
676 const IROp ops[4]
677 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
678 vassert(size < 4);
679 return ops[size];
680}
681
682static IROp mkVecCATEVENLANES ( UInt size ) {
683 const IROp ops[4]
684 = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
685 Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
686 vassert(size < 4);
687 return ops[size];
688}
689
690static IROp mkVecCATODDLANES ( UInt size ) {
691 const IROp ops[4]
692 = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
693 Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
694 vassert(size < 4);
695 return ops[size];
696}
697
sewardj487559e2014-07-10 14:22:45 +0000698static IROp mkVecINTERLEAVELO ( UInt size ) {
699 const IROp ops[4]
700 = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
701 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
702 vassert(size < 4);
703 return ops[size];
704}
705
706static IROp mkVecINTERLEAVEHI ( UInt size ) {
707 const IROp ops[4]
708 = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
709 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
710 vassert(size < 4);
711 return ops[size];
712}
713
sewardja5a6b752014-06-30 07:33:56 +0000714static IROp mkVecMAXU ( UInt size ) {
715 const IROp ops[4]
716 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
717 vassert(size < 4);
718 return ops[size];
719}
720
721static IROp mkVecMAXS ( UInt size ) {
722 const IROp ops[4]
723 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
724 vassert(size < 4);
725 return ops[size];
726}
727
728static IROp mkVecMINU ( UInt size ) {
729 const IROp ops[4]
730 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
731 vassert(size < 4);
732 return ops[size];
733}
734
735static IROp mkVecMINS ( UInt size ) {
736 const IROp ops[4]
737 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
738 vassert(size < 4);
739 return ops[size];
740}
741
sewardj487559e2014-07-10 14:22:45 +0000742static IROp mkVecMUL ( UInt size ) {
743 const IROp ops[4]
744 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
745 vassert(size < 3);
746 return ops[size];
747}
748
749static IROp mkVecMULLU ( UInt sizeNarrow ) {
750 const IROp ops[4]
751 = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
752 vassert(sizeNarrow < 3);
753 return ops[sizeNarrow];
754}
755
756static IROp mkVecMULLS ( UInt sizeNarrow ) {
757 const IROp ops[4]
758 = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
759 vassert(sizeNarrow < 3);
760 return ops[sizeNarrow];
761}
762
sewardj51d012a2014-07-21 09:19:50 +0000763static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
764 const IROp ops[4]
765 = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
766 vassert(sizeNarrow < 3);
767 return ops[sizeNarrow];
768}
769
sewardj8e91fd42014-07-11 12:05:47 +0000770static IROp mkVecCMPEQ ( UInt size ) {
771 const IROp ops[4]
772 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
773 vassert(size < 4);
774 return ops[size];
775}
776
777static IROp mkVecCMPGTU ( UInt size ) {
778 const IROp ops[4]
779 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
780 vassert(size < 4);
781 return ops[size];
782}
783
784static IROp mkVecCMPGTS ( UInt size ) {
785 const IROp ops[4]
786 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
787 vassert(size < 4);
788 return ops[size];
789}
790
791static IROp mkVecABS ( UInt size ) {
792 const IROp ops[4]
793 = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
794 vassert(size < 4);
795 return ops[size];
796}
797
798static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
799 const IROp ops[4]
800 = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
801 Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 };
802 vassert(size < 4);
803 return ops[size];
804}
805
sewardjbbcf1882014-01-12 12:49:10 +0000806static IRExpr* mkU ( IRType ty, ULong imm ) {
807 switch (ty) {
808 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
809 case Ity_I64: return mkU64(imm);
810 default: vpanic("mkU");
811 }
812}
813
sewardj54ffa1d2014-07-22 09:27:49 +0000814static IROp mkVecQDMULHIS ( UInt size ) {
815 const IROp ops[4]
816 = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
817 vassert(size < 4);
818 return ops[size];
819}
820
821static IROp mkVecQRDMULHIS ( UInt size ) {
822 const IROp ops[4]
823 = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
824 vassert(size < 4);
825 return ops[size];
826}
827
sewardjecedd982014-08-11 14:02:47 +0000828static IROp mkVecQANDUQSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000829 const IROp ops[4]
830 = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
831 Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
832 vassert(size < 4);
833 return ops[size];
834}
835
sewardjecedd982014-08-11 14:02:47 +0000836static IROp mkVecQANDSQSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000837 const IROp ops[4]
838 = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
839 Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
840 vassert(size < 4);
841 return ops[size];
842}
843
sewardjecedd982014-08-11 14:02:47 +0000844static IROp mkVecQANDUQRSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000845 const IROp ops[4]
846 = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
847 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
848 vassert(size < 4);
849 return ops[size];
850}
851
sewardjecedd982014-08-11 14:02:47 +0000852static IROp mkVecQANDSQRSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000853 const IROp ops[4]
854 = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
855 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
856 vassert(size < 4);
857 return ops[size];
858}
859
sewardja6b61f02014-08-17 18:32:14 +0000860static IROp mkVecSHU ( UInt size ) {
861 const IROp ops[4]
862 = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
863 vassert(size < 4);
864 return ops[size];
865}
866
867static IROp mkVecSHS ( UInt size ) {
868 const IROp ops[4]
869 = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
870 vassert(size < 4);
871 return ops[size];
872}
873
874static IROp mkVecRSHU ( UInt size ) {
875 const IROp ops[4]
876 = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
877 vassert(size < 4);
878 return ops[size];
879}
880
881static IROp mkVecRSHS ( UInt size ) {
882 const IROp ops[4]
883 = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
884 vassert(size < 4);
885 return ops[size];
886}
887
sewardjecedd982014-08-11 14:02:47 +0000888static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
889 const IROp ops[4]
890 = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
891 Iop_NarrowUn64to32x2, Iop_INVALID };
892 vassert(sizeNarrow < 4);
893 return ops[sizeNarrow];
894}
895
896static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
897 const IROp ops[4]
898 = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4,
899 Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
900 vassert(sizeNarrow < 4);
901 return ops[sizeNarrow];
902}
903
904static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
905 const IROp ops[4]
906 = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4,
907 Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
908 vassert(sizeNarrow < 4);
909 return ops[sizeNarrow];
910}
911
912static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
913 const IROp ops[4]
914 = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4,
915 Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
916 vassert(sizeNarrow < 4);
917 return ops[sizeNarrow];
918}
919
920static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
921 const IROp ops[4]
922 = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
923 Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
924 vassert(sizeNarrow < 4);
925 return ops[sizeNarrow];
926}
927
928static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
929 const IROp ops[4]
930 = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4,
931 Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
932 vassert(sizeNarrow < 4);
933 return ops[sizeNarrow];
934}
935
936static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
937 const IROp ops[4]
938 = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4,
939 Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
940 vassert(sizeNarrow < 4);
941 return ops[sizeNarrow];
942}
943
944static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
945 const IROp ops[4]
946 = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4,
947 Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
948 vassert(sizeNarrow < 4);
949 return ops[sizeNarrow];
950}
951
952static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
953 const IROp ops[4]
954 = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4,
955 Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
956 vassert(sizeNarrow < 4);
957 return ops[sizeNarrow];
958}
959
960static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
961 const IROp ops[4]
962 = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4,
963 Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
964 vassert(sizeNarrow < 4);
965 return ops[sizeNarrow];
966}
967
sewardj1dd3ec12014-08-15 09:11:08 +0000968static IROp mkVecQSHLNSATUU ( UInt size ) {
sewardja97dddf2014-08-14 22:26:52 +0000969 const IROp ops[4]
sewardj1dd3ec12014-08-15 09:11:08 +0000970 = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
971 Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
sewardja97dddf2014-08-14 22:26:52 +0000972 vassert(size < 4);
973 return ops[size];
974}
975
sewardj1dd3ec12014-08-15 09:11:08 +0000976static IROp mkVecQSHLNSATSS ( UInt size ) {
sewardja97dddf2014-08-14 22:26:52 +0000977 const IROp ops[4]
sewardj1dd3ec12014-08-15 09:11:08 +0000978 = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
979 Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
sewardja97dddf2014-08-14 22:26:52 +0000980 vassert(size < 4);
981 return ops[size];
982}
983
sewardj1dd3ec12014-08-15 09:11:08 +0000984static IROp mkVecQSHLNSATSU ( UInt size ) {
sewardja97dddf2014-08-14 22:26:52 +0000985 const IROp ops[4]
sewardj1dd3ec12014-08-15 09:11:08 +0000986 = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
987 Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
sewardja97dddf2014-08-14 22:26:52 +0000988 vassert(size < 4);
989 return ops[size];
990}
991
992
sewardjbbcf1882014-01-12 12:49:10 +0000993/* Generate IR to create 'arg rotated right by imm', for sane values
994 of 'ty' and 'imm'. */
995static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
996{
997 UInt w = 0;
998 if (ty == Ity_I64) {
999 w = 64;
1000 } else {
1001 vassert(ty == Ity_I32);
1002 w = 32;
1003 }
1004 vassert(w != 0);
1005 vassert(imm < w);
1006 if (imm == 0) {
1007 return arg;
1008 }
1009 IRTemp res = newTemp(ty);
1010 assign(res, binop(mkOR(ty),
1011 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
1012 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
1013 return res;
1014}
1015
1016/* Generate IR to set the returned temp to either all-zeroes or
1017 all ones, as a copy of arg<imm>. */
1018static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
1019{
1020 UInt w = 0;
1021 if (ty == Ity_I64) {
1022 w = 64;
1023 } else {
1024 vassert(ty == Ity_I32);
1025 w = 32;
1026 }
1027 vassert(w != 0);
1028 vassert(imm < w);
1029 IRTemp res = newTemp(ty);
1030 assign(res, binop(mkSAR(ty),
1031 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
1032 mkU8(w - 1)));
1033 return res;
1034}
1035
sewardj7d009132014-02-20 17:43:38 +00001036/* U-widen 8/16/32/64 bit int expr to 64. */
1037static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
1038{
1039 switch (srcTy) {
1040 case Ity_I64: return e;
1041 case Ity_I32: return unop(Iop_32Uto64, e);
1042 case Ity_I16: return unop(Iop_16Uto64, e);
1043 case Ity_I8: return unop(Iop_8Uto64, e);
1044 default: vpanic("widenUto64(arm64)");
1045 }
1046}
1047
1048/* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
1049 of these combinations make sense. */
1050static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
1051{
1052 switch (dstTy) {
1053 case Ity_I64: return e;
1054 case Ity_I32: return unop(Iop_64to32, e);
1055 case Ity_I16: return unop(Iop_64to16, e);
1056 case Ity_I8: return unop(Iop_64to8, e);
1057 default: vpanic("narrowFrom64(arm64)");
1058 }
1059}
1060
sewardjbbcf1882014-01-12 12:49:10 +00001061
1062/*------------------------------------------------------------*/
1063/*--- Helpers for accessing guest registers. ---*/
1064/*------------------------------------------------------------*/
1065
1066#define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
1067#define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
1068#define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
1069#define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
1070#define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
1071#define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
1072#define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
1073#define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
1074#define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
1075#define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
1076#define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
1077#define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
1078#define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
1079#define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
1080#define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
1081#define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
1082#define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
1083#define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
1084#define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
1085#define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
1086#define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
1087#define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
1088#define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
1089#define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
1090#define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
1091#define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
1092#define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
1093#define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
1094#define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
1095#define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
1096#define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
1097
sewardj60687882014-01-15 10:25:21 +00001098#define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
sewardjbbcf1882014-01-12 12:49:10 +00001099#define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
1100
1101#define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
1102#define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
1103#define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
1104#define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
1105
1106#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1107#define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
1108
1109#define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
1110#define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
1111#define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
1112#define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
1113#define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
1114#define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
1115#define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
1116#define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
1117#define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
1118#define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
1119#define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
1120#define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
1121#define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
1122#define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
1123#define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
1124#define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
1125#define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
1126#define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
1127#define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
1128#define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
1129#define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
1130#define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
1131#define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
1132#define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
1133#define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
1134#define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
1135#define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
1136#define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
1137#define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
1138#define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
1139#define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
1140#define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
1141
1142#define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
sewardja0645d52014-06-28 22:11:16 +00001143#define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
sewardjbbcf1882014-01-12 12:49:10 +00001144
sewardj05f5e012014-05-04 10:52:11 +00001145#define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
1146#define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
sewardjbbcf1882014-01-12 12:49:10 +00001147
1148
1149/* ---------------- Integer registers ---------------- */
1150
1151static Int offsetIReg64 ( UInt iregNo )
1152{
1153 /* Do we care about endianness here? We do if sub-parts of integer
1154 registers are accessed. */
1155 switch (iregNo) {
1156 case 0: return OFFB_X0;
1157 case 1: return OFFB_X1;
1158 case 2: return OFFB_X2;
1159 case 3: return OFFB_X3;
1160 case 4: return OFFB_X4;
1161 case 5: return OFFB_X5;
1162 case 6: return OFFB_X6;
1163 case 7: return OFFB_X7;
1164 case 8: return OFFB_X8;
1165 case 9: return OFFB_X9;
1166 case 10: return OFFB_X10;
1167 case 11: return OFFB_X11;
1168 case 12: return OFFB_X12;
1169 case 13: return OFFB_X13;
1170 case 14: return OFFB_X14;
1171 case 15: return OFFB_X15;
1172 case 16: return OFFB_X16;
1173 case 17: return OFFB_X17;
1174 case 18: return OFFB_X18;
1175 case 19: return OFFB_X19;
1176 case 20: return OFFB_X20;
1177 case 21: return OFFB_X21;
1178 case 22: return OFFB_X22;
1179 case 23: return OFFB_X23;
1180 case 24: return OFFB_X24;
1181 case 25: return OFFB_X25;
1182 case 26: return OFFB_X26;
1183 case 27: return OFFB_X27;
1184 case 28: return OFFB_X28;
1185 case 29: return OFFB_X29;
1186 case 30: return OFFB_X30;
1187 /* but not 31 */
1188 default: vassert(0);
1189 }
1190}
1191
1192static Int offsetIReg64orSP ( UInt iregNo )
1193{
sewardj60687882014-01-15 10:25:21 +00001194 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
sewardjbbcf1882014-01-12 12:49:10 +00001195}
1196
1197static const HChar* nameIReg64orZR ( UInt iregNo )
1198{
1199 vassert(iregNo < 32);
1200 static const HChar* names[32]
1201 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
1202 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1203 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1204 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1205 return names[iregNo];
1206}
1207
1208static const HChar* nameIReg64orSP ( UInt iregNo )
1209{
1210 if (iregNo == 31) {
1211 return "sp";
1212 }
1213 vassert(iregNo < 31);
1214 return nameIReg64orZR(iregNo);
1215}
1216
1217static IRExpr* getIReg64orSP ( UInt iregNo )
1218{
1219 vassert(iregNo < 32);
1220 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1221}
1222
1223static IRExpr* getIReg64orZR ( UInt iregNo )
1224{
1225 if (iregNo == 31) {
1226 return mkU64(0);
1227 }
1228 vassert(iregNo < 31);
1229 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1230}
1231
1232static void putIReg64orSP ( UInt iregNo, IRExpr* e )
1233{
1234 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1235 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1236}
1237
1238static void putIReg64orZR ( UInt iregNo, IRExpr* e )
1239{
1240 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1241 if (iregNo == 31) {
1242 return;
1243 }
1244 vassert(iregNo < 31);
1245 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1246}
1247
1248static const HChar* nameIReg32orZR ( UInt iregNo )
1249{
1250 vassert(iregNo < 32);
1251 static const HChar* names[32]
1252 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
1253 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
1254 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1255 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1256 return names[iregNo];
1257}
1258
1259static const HChar* nameIReg32orSP ( UInt iregNo )
1260{
1261 if (iregNo == 31) {
1262 return "wsp";
1263 }
1264 vassert(iregNo < 31);
1265 return nameIReg32orZR(iregNo);
1266}
1267
1268static IRExpr* getIReg32orSP ( UInt iregNo )
1269{
1270 vassert(iregNo < 32);
1271 return unop(Iop_64to32,
1272 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1273}
1274
1275static IRExpr* getIReg32orZR ( UInt iregNo )
1276{
1277 if (iregNo == 31) {
1278 return mkU32(0);
1279 }
1280 vassert(iregNo < 31);
1281 return unop(Iop_64to32,
1282 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1283}
1284
1285static void putIReg32orSP ( UInt iregNo, IRExpr* e )
1286{
1287 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1288 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1289}
1290
1291static void putIReg32orZR ( UInt iregNo, IRExpr* e )
1292{
1293 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1294 if (iregNo == 31) {
1295 return;
1296 }
1297 vassert(iregNo < 31);
1298 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1299}
1300
1301static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
1302{
1303 vassert(is64 == True || is64 == False);
1304 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
1305}
1306
1307static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
1308{
1309 vassert(is64 == True || is64 == False);
1310 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
1311}
1312
1313static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
1314{
1315 vassert(is64 == True || is64 == False);
1316 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
1317}
1318
1319static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
1320{
1321 vassert(is64 == True || is64 == False);
1322 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
1323}
1324
1325static void putPC ( IRExpr* e )
1326{
1327 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1328 stmt( IRStmt_Put(OFFB_PC, e) );
1329}
1330
1331
1332/* ---------------- Vector (Q) registers ---------------- */
1333
1334static Int offsetQReg128 ( UInt qregNo )
1335{
1336 /* We don't care about endianness at this point. It only becomes
1337 relevant when dealing with sections of these registers.*/
1338 switch (qregNo) {
1339 case 0: return OFFB_Q0;
1340 case 1: return OFFB_Q1;
1341 case 2: return OFFB_Q2;
1342 case 3: return OFFB_Q3;
1343 case 4: return OFFB_Q4;
1344 case 5: return OFFB_Q5;
1345 case 6: return OFFB_Q6;
1346 case 7: return OFFB_Q7;
1347 case 8: return OFFB_Q8;
1348 case 9: return OFFB_Q9;
1349 case 10: return OFFB_Q10;
1350 case 11: return OFFB_Q11;
1351 case 12: return OFFB_Q12;
1352 case 13: return OFFB_Q13;
1353 case 14: return OFFB_Q14;
1354 case 15: return OFFB_Q15;
1355 case 16: return OFFB_Q16;
1356 case 17: return OFFB_Q17;
1357 case 18: return OFFB_Q18;
1358 case 19: return OFFB_Q19;
1359 case 20: return OFFB_Q20;
1360 case 21: return OFFB_Q21;
1361 case 22: return OFFB_Q22;
1362 case 23: return OFFB_Q23;
1363 case 24: return OFFB_Q24;
1364 case 25: return OFFB_Q25;
1365 case 26: return OFFB_Q26;
1366 case 27: return OFFB_Q27;
1367 case 28: return OFFB_Q28;
1368 case 29: return OFFB_Q29;
1369 case 30: return OFFB_Q30;
1370 case 31: return OFFB_Q31;
1371 default: vassert(0);
1372 }
1373}
1374
sewardjbbcf1882014-01-12 12:49:10 +00001375/* Write to a complete Qreg. */
1376static void putQReg128 ( UInt qregNo, IRExpr* e )
1377{
1378 vassert(qregNo < 32);
1379 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
1380 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
1381}
1382
1383/* Read a complete Qreg. */
1384static IRExpr* getQReg128 ( UInt qregNo )
1385{
1386 vassert(qregNo < 32);
1387 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
1388}
1389
1390/* Produce the IR type for some sub-part of a vector. For 32- and 64-
1391 bit sub-parts we can choose either integer or float types, and
1392 choose float on the basis that that is the common use case and so
1393 will give least interference with Put-to-Get forwarding later
1394 on. */
1395static IRType preferredVectorSubTypeFromSize ( UInt szB )
1396{
1397 switch (szB) {
1398 case 1: return Ity_I8;
1399 case 2: return Ity_I16;
1400 case 4: return Ity_I32; //Ity_F32;
1401 case 8: return Ity_F64;
1402 case 16: return Ity_V128;
1403 default: vassert(0);
1404 }
1405}
1406
sewardj606c4ba2014-01-26 19:11:14 +00001407/* Find the offset of the laneNo'th lane of type laneTy in the given
1408 Qreg. Since the host is little-endian, the least significant lane
1409 has the lowest offset. */
1410static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
sewardjbbcf1882014-01-12 12:49:10 +00001411{
sewardj9b769162014-07-24 12:42:03 +00001412 vassert(host_endness == VexEndnessLE);
sewardjbbcf1882014-01-12 12:49:10 +00001413 Int base = offsetQReg128(qregNo);
sewardj606c4ba2014-01-26 19:11:14 +00001414 /* Since the host is little-endian, the least significant lane
1415 will be at the lowest address. */
1416 /* Restrict this to known types, so as to avoid silently accepting
1417 stupid types. */
1418 UInt laneSzB = 0;
1419 switch (laneTy) {
sewardj5860ec72014-03-01 11:19:45 +00001420 case Ity_I8: laneSzB = 1; break;
1421 case Ity_I16: laneSzB = 2; break;
sewardj606c4ba2014-01-26 19:11:14 +00001422 case Ity_F32: case Ity_I32: laneSzB = 4; break;
1423 case Ity_F64: case Ity_I64: laneSzB = 8; break;
1424 case Ity_V128: laneSzB = 16; break;
1425 default: break;
sewardjbbcf1882014-01-12 12:49:10 +00001426 }
sewardj606c4ba2014-01-26 19:11:14 +00001427 vassert(laneSzB > 0);
1428 UInt minOff = laneNo * laneSzB;
1429 UInt maxOff = minOff + laneSzB - 1;
1430 vassert(maxOff < 16);
1431 return base + minOff;
sewardjbbcf1882014-01-12 12:49:10 +00001432}
1433
sewardj606c4ba2014-01-26 19:11:14 +00001434/* Put to the least significant lane of a Qreg. */
1435static void putQRegLO ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +00001436{
1437 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +00001438 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +00001439 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001440 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
1441 case Ity_F32: case Ity_F64: case Ity_V128:
1442 break;
1443 default:
1444 vassert(0); // Other cases are probably invalid
sewardjbbcf1882014-01-12 12:49:10 +00001445 }
1446 stmt(IRStmt_Put(off, e));
1447}
1448
sewardj606c4ba2014-01-26 19:11:14 +00001449/* Get from the least significant lane of a Qreg. */
1450static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
sewardjbbcf1882014-01-12 12:49:10 +00001451{
sewardj606c4ba2014-01-26 19:11:14 +00001452 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +00001453 switch (ty) {
sewardjb3553472014-05-15 16:49:21 +00001454 case Ity_I8:
1455 case Ity_I16:
sewardj606c4ba2014-01-26 19:11:14 +00001456 case Ity_I32: case Ity_I64:
1457 case Ity_F32: case Ity_F64: case Ity_V128:
1458 break;
1459 default:
1460 vassert(0); // Other cases are ATC
sewardjbbcf1882014-01-12 12:49:10 +00001461 }
1462 return IRExpr_Get(off, ty);
1463}
1464
sewardj606c4ba2014-01-26 19:11:14 +00001465static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
sewardjbbcf1882014-01-12 12:49:10 +00001466{
1467 static const HChar* namesQ[32]
1468 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1469 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1470 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1471 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1472 static const HChar* namesD[32]
1473 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1474 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1475 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1476 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1477 static const HChar* namesS[32]
1478 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1479 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1480 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1481 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1482 static const HChar* namesH[32]
1483 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1484 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1485 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1486 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1487 static const HChar* namesB[32]
1488 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1489 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1490 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1491 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1492 vassert(qregNo < 32);
sewardj606c4ba2014-01-26 19:11:14 +00001493 switch (sizeofIRType(laneTy)) {
sewardjbbcf1882014-01-12 12:49:10 +00001494 case 1: return namesB[qregNo];
1495 case 2: return namesH[qregNo];
1496 case 4: return namesS[qregNo];
1497 case 8: return namesD[qregNo];
1498 case 16: return namesQ[qregNo];
1499 default: vassert(0);
1500 }
1501 /*NOTREACHED*/
1502}
1503
sewardj606c4ba2014-01-26 19:11:14 +00001504static const HChar* nameQReg128 ( UInt qregNo )
1505{
1506 return nameQRegLO(qregNo, Ity_V128);
1507}
1508
sewardjbbcf1882014-01-12 12:49:10 +00001509/* Find the offset of the most significant half (8 bytes) of the given
1510 Qreg. This requires knowing the endianness of the host. */
sewardj606c4ba2014-01-26 19:11:14 +00001511static Int offsetQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001512{
sewardj606c4ba2014-01-26 19:11:14 +00001513 return offsetQRegLane(qregNo, Ity_I64, 1);
sewardjbbcf1882014-01-12 12:49:10 +00001514}
1515
sewardj606c4ba2014-01-26 19:11:14 +00001516static IRExpr* getQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001517{
sewardj606c4ba2014-01-26 19:11:14 +00001518 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
sewardjbbcf1882014-01-12 12:49:10 +00001519}
1520
sewardj606c4ba2014-01-26 19:11:14 +00001521static void putQRegHI64 ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +00001522{
1523 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +00001524 Int off = offsetQRegHI64(qregNo);
sewardjbbcf1882014-01-12 12:49:10 +00001525 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001526 case Ity_I64: case Ity_F64:
1527 break;
1528 default:
1529 vassert(0); // Other cases are plain wrong
sewardjbbcf1882014-01-12 12:49:10 +00001530 }
1531 stmt(IRStmt_Put(off, e));
1532}
1533
sewardj606c4ba2014-01-26 19:11:14 +00001534/* Put to a specified lane of a Qreg. */
1535static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1536{
1537 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1538 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1539 switch (laneTy) {
1540 case Ity_F64: case Ity_I64:
sewardj32d86752014-03-02 12:47:18 +00001541 case Ity_I32: case Ity_F32:
sewardj5860ec72014-03-01 11:19:45 +00001542 case Ity_I16:
1543 case Ity_I8:
sewardj606c4ba2014-01-26 19:11:14 +00001544 break;
1545 default:
1546 vassert(0); // Other cases are ATC
1547 }
1548 stmt(IRStmt_Put(off, e));
1549}
1550
sewardj32d86752014-03-02 12:47:18 +00001551/* Get from a specified lane of a Qreg. */
sewardj606c4ba2014-01-26 19:11:14 +00001552static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1553{
1554 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1555 switch (laneTy) {
sewardj32d86752014-03-02 12:47:18 +00001556 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
sewardj85fbb022014-06-12 13:16:01 +00001557 case Ity_F64: case Ity_F32:
sewardj606c4ba2014-01-26 19:11:14 +00001558 break;
1559 default:
1560 vassert(0); // Other cases are ATC
1561 }
1562 return IRExpr_Get(off, laneTy);
1563}
1564
1565
sewardjbbcf1882014-01-12 12:49:10 +00001566//ZZ /* ---------------- Misc registers ---------------- */
1567//ZZ
1568//ZZ static void putMiscReg32 ( UInt gsoffset,
1569//ZZ IRExpr* e, /* :: Ity_I32 */
1570//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1571//ZZ {
1572//ZZ switch (gsoffset) {
1573//ZZ case OFFB_FPSCR: break;
1574//ZZ case OFFB_QFLAG32: break;
1575//ZZ case OFFB_GEFLAG0: break;
1576//ZZ case OFFB_GEFLAG1: break;
1577//ZZ case OFFB_GEFLAG2: break;
1578//ZZ case OFFB_GEFLAG3: break;
1579//ZZ default: vassert(0); /* awaiting more cases */
1580//ZZ }
1581//ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1582//ZZ
1583//ZZ if (guardT == IRTemp_INVALID) {
1584//ZZ /* unconditional write */
1585//ZZ stmt(IRStmt_Put(gsoffset, e));
1586//ZZ } else {
1587//ZZ stmt(IRStmt_Put(
1588//ZZ gsoffset,
1589//ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1590//ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1591//ZZ ));
1592//ZZ }
1593//ZZ }
1594//ZZ
1595//ZZ static IRTemp get_ITSTATE ( void )
1596//ZZ {
1597//ZZ ASSERT_IS_THUMB;
1598//ZZ IRTemp t = newTemp(Ity_I32);
1599//ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1600//ZZ return t;
1601//ZZ }
1602//ZZ
1603//ZZ static void put_ITSTATE ( IRTemp t )
1604//ZZ {
1605//ZZ ASSERT_IS_THUMB;
1606//ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1607//ZZ }
1608//ZZ
1609//ZZ static IRTemp get_QFLAG32 ( void )
1610//ZZ {
1611//ZZ IRTemp t = newTemp(Ity_I32);
1612//ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1613//ZZ return t;
1614//ZZ }
1615//ZZ
1616//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1617//ZZ {
1618//ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1619//ZZ }
1620//ZZ
1621//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1622//ZZ Status Register) to indicate that overflow or saturation occurred.
1623//ZZ Nb: t must be zero to denote no saturation, and any nonzero
1624//ZZ value to indicate saturation. */
1625//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1626//ZZ {
1627//ZZ IRTemp old = get_QFLAG32();
1628//ZZ IRTemp nyu = newTemp(Ity_I32);
1629//ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1630//ZZ put_QFLAG32(nyu, condT);
1631//ZZ }
1632
1633
1634/* ---------------- FPCR stuff ---------------- */
1635
1636/* Generate IR to get hold of the rounding mode bits in FPCR, and
1637 convert them to IR format. Bind the final result to the
1638 returned temp. */
1639static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1640{
1641 /* The ARMvfp encoding for rounding mode bits is:
1642 00 to nearest
1643 01 to +infinity
1644 10 to -infinity
1645 11 to zero
1646 We need to convert that to the IR encoding:
1647 00 to nearest (the default)
1648 10 to +infinity
1649 01 to -infinity
1650 11 to zero
1651 Which can be done by swapping bits 0 and 1.
1652 The rmode bits are at 23:22 in FPSCR.
1653 */
1654 IRTemp armEncd = newTemp(Ity_I32);
1655 IRTemp swapped = newTemp(Ity_I32);
1656 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1657 we don't zero out bits 24 and above, since the assignment to
1658 'swapped' will mask them out anyway. */
1659 assign(armEncd,
1660 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1661 /* Now swap them. */
1662 assign(swapped,
1663 binop(Iop_Or32,
1664 binop(Iop_And32,
1665 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1666 mkU32(2)),
1667 binop(Iop_And32,
1668 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1669 mkU32(1))
1670 ));
1671 return swapped;
1672}
1673
1674
1675/*------------------------------------------------------------*/
1676/*--- Helpers for flag handling and conditional insns ---*/
1677/*------------------------------------------------------------*/
1678
1679static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1680{
1681 switch (cond) {
1682 case ARM64CondEQ: return "eq";
1683 case ARM64CondNE: return "ne";
1684 case ARM64CondCS: return "cs"; // or 'hs'
1685 case ARM64CondCC: return "cc"; // or 'lo'
1686 case ARM64CondMI: return "mi";
1687 case ARM64CondPL: return "pl";
1688 case ARM64CondVS: return "vs";
1689 case ARM64CondVC: return "vc";
1690 case ARM64CondHI: return "hi";
1691 case ARM64CondLS: return "ls";
1692 case ARM64CondGE: return "ge";
1693 case ARM64CondLT: return "lt";
1694 case ARM64CondGT: return "gt";
1695 case ARM64CondLE: return "le";
1696 case ARM64CondAL: return "al";
1697 case ARM64CondNV: return "nv";
1698 default: vpanic("name_ARM64Condcode");
1699 }
1700}
1701
1702/* and a handy shorthand for it */
1703static const HChar* nameCC ( ARM64Condcode cond ) {
1704 return nameARM64Condcode(cond);
1705}
1706
1707
1708/* Build IR to calculate some particular condition from stored
1709 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1710 Ity_I64, suitable for narrowing. Although the return type is
1711 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1712 :: Ity_I64 and must denote the condition to compute in
1713 bits 7:4, and be zero everywhere else.
1714*/
1715static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1716{
1717 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1718 /* And 'cond' had better produce a value in which only bits 7:4 are
1719 nonzero. However, obviously we can't assert for that. */
1720
1721 /* So what we're constructing for the first argument is
1722 "(cond << 4) | stored-operation".
1723 However, as per comments above, 'cond' must be supplied
1724 pre-shifted to this function.
1725
1726 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1727 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1728 8 bits of the first argument. */
1729 IRExpr** args
1730 = mkIRExprVec_4(
1731 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1732 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1733 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1734 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1735 );
1736 IRExpr* call
1737 = mkIRExprCCall(
1738 Ity_I64,
1739 0/*regparm*/,
1740 "arm64g_calculate_condition", &arm64g_calculate_condition,
1741 args
1742 );
1743
1744 /* Exclude the requested condition, OP and NDEP from definedness
1745 checking. We're only interested in DEP1 and DEP2. */
1746 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1747 return call;
1748}
1749
1750
1751/* Build IR to calculate some particular condition from stored
1752 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1753 Ity_I64, suitable for narrowing. Although the return type is
1754 Ity_I64, the returned value is either 0 or 1.
1755*/
1756static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1757{
1758 /* First arg is "(cond << 4) | condition". This requires that the
1759 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1760 (COND, OP) pair in the lowest 8 bits of the first argument. */
1761 vassert(cond >= 0 && cond <= 15);
1762 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1763}
1764
1765
sewardjdee30502014-06-04 13:09:44 +00001766/* Build IR to calculate just the carry flag from stored
1767 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1768 Ity_I64. */
1769static IRExpr* mk_arm64g_calculate_flag_c ( void )
1770{
1771 IRExpr** args
1772 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1773 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1774 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1775 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1776 IRExpr* call
1777 = mkIRExprCCall(
1778 Ity_I64,
1779 0/*regparm*/,
1780 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1781 args
1782 );
1783 /* Exclude OP and NDEP from definedness checking. We're only
1784 interested in DEP1 and DEP2. */
1785 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1786 return call;
1787}
1788
1789
sewardjbbcf1882014-01-12 12:49:10 +00001790//ZZ /* Build IR to calculate just the overflow flag from stored
1791//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1792//ZZ Ity_I32. */
1793//ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1794//ZZ {
1795//ZZ IRExpr** args
1796//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1797//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1798//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1799//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1800//ZZ IRExpr* call
1801//ZZ = mkIRExprCCall(
1802//ZZ Ity_I32,
1803//ZZ 0/*regparm*/,
1804//ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1805//ZZ args
1806//ZZ );
1807//ZZ /* Exclude OP and NDEP from definedness checking. We're only
1808//ZZ interested in DEP1 and DEP2. */
1809//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1810//ZZ return call;
1811//ZZ }
1812
1813
1814/* Build IR to calculate N Z C V in bits 31:28 of the
1815 returned word. */
1816static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1817{
1818 IRExpr** args
1819 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1820 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1821 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1822 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1823 IRExpr* call
1824 = mkIRExprCCall(
1825 Ity_I64,
1826 0/*regparm*/,
1827 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1828 args
1829 );
1830 /* Exclude OP and NDEP from definedness checking. We're only
1831 interested in DEP1 and DEP2. */
1832 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1833 return call;
1834}
1835
1836
1837/* Build IR to set the flags thunk, in the most general case. */
1838static
1839void setFlags_D1_D2_ND ( UInt cc_op,
1840 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1841{
1842 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1843 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1844 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1845 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1846 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1847 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1848 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1849 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1850}
1851
1852/* Build IR to set the flags thunk after ADD or SUB. */
1853static
1854void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1855{
1856 IRTemp argL64 = IRTemp_INVALID;
1857 IRTemp argR64 = IRTemp_INVALID;
1858 IRTemp z64 = newTemp(Ity_I64);
1859 if (is64) {
1860 argL64 = argL;
1861 argR64 = argR;
1862 } else {
1863 argL64 = newTemp(Ity_I64);
1864 argR64 = newTemp(Ity_I64);
1865 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1866 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1867 }
1868 assign(z64, mkU64(0));
1869 UInt cc_op = ARM64G_CC_OP_NUMBER;
1870 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1871 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1872 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1873 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1874 else { vassert(0); }
1875 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1876}
1877
sewardjdee30502014-06-04 13:09:44 +00001878/* Build IR to set the flags thunk after ADC or SBC. */
1879static
1880void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
1881 IRTemp argL, IRTemp argR, IRTemp oldC )
1882{
1883 IRTemp argL64 = IRTemp_INVALID;
1884 IRTemp argR64 = IRTemp_INVALID;
1885 IRTemp oldC64 = IRTemp_INVALID;
1886 if (is64) {
1887 argL64 = argL;
1888 argR64 = argR;
1889 oldC64 = oldC;
1890 } else {
1891 argL64 = newTemp(Ity_I64);
1892 argR64 = newTemp(Ity_I64);
1893 oldC64 = newTemp(Ity_I64);
1894 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1895 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1896 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1897 }
1898 UInt cc_op = ARM64G_CC_OP_NUMBER;
1899 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; }
1900 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1901 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; }
1902 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1903 else { vassert(0); }
1904 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1905}
1906
sewardjbbcf1882014-01-12 12:49:10 +00001907/* Build IR to set the flags thunk after ADD or SUB, if the given
1908 condition evaluates to True at run time. If not, the flags are set
1909 to the specified NZCV value. */
1910static
1911void setFlags_ADD_SUB_conditionally (
1912 Bool is64, Bool isSUB,
1913 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1914 )
1915{
1916 /* Generate IR as follows:
1917 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1918 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1919 CC_DEP2 = ITE(cond, argR64, 0)
1920 CC_NDEP = 0
1921 */
1922
1923 IRTemp z64 = newTemp(Ity_I64);
1924 assign(z64, mkU64(0));
1925
1926 /* Establish the operation and operands for the True case. */
1927 IRTemp t_dep1 = IRTemp_INVALID;
1928 IRTemp t_dep2 = IRTemp_INVALID;
1929 UInt t_op = ARM64G_CC_OP_NUMBER;
1930 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1931 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1932 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1933 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1934 else { vassert(0); }
1935 /* */
1936 if (is64) {
1937 t_dep1 = argL;
1938 t_dep2 = argR;
1939 } else {
1940 t_dep1 = newTemp(Ity_I64);
1941 t_dep2 = newTemp(Ity_I64);
1942 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1943 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1944 }
1945
1946 /* Establish the operation and operands for the False case. */
1947 IRTemp f_dep1 = newTemp(Ity_I64);
1948 IRTemp f_dep2 = z64;
1949 UInt f_op = ARM64G_CC_OP_COPY;
1950 assign(f_dep1, mkU64(nzcv << 28));
1951
1952 /* Final thunk values */
1953 IRTemp dep1 = newTemp(Ity_I64);
1954 IRTemp dep2 = newTemp(Ity_I64);
1955 IRTemp op = newTemp(Ity_I64);
1956
1957 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1958 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1959 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1960
1961 /* finally .. */
1962 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
1963 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1964 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1965 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1966}
1967
1968/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1969static
1970void setFlags_LOGIC ( Bool is64, IRTemp res )
1971{
1972 IRTemp res64 = IRTemp_INVALID;
1973 IRTemp z64 = newTemp(Ity_I64);
1974 UInt cc_op = ARM64G_CC_OP_NUMBER;
1975 if (is64) {
1976 res64 = res;
1977 cc_op = ARM64G_CC_OP_LOGIC64;
1978 } else {
1979 res64 = newTemp(Ity_I64);
1980 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1981 cc_op = ARM64G_CC_OP_LOGIC32;
1982 }
1983 assign(z64, mkU64(0));
1984 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1985}
1986
1987/* Build IR to set the flags thunk to a given NZCV value. NZCV is
1988 located in bits 31:28 of the supplied value. */
1989static
1990void setFlags_COPY ( IRTemp nzcv_28x0 )
1991{
1992 IRTemp z64 = newTemp(Ity_I64);
1993 assign(z64, mkU64(0));
1994 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
1995}
1996
1997
1998//ZZ /* Minor variant of the above that sets NDEP to zero (if it
1999//ZZ sets it at all) */
2000//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
2001//ZZ IRTemp t_dep2,
2002//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2003//ZZ {
2004//ZZ IRTemp z32 = newTemp(Ity_I32);
2005//ZZ assign( z32, mkU32(0) );
2006//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2007//ZZ }
2008//ZZ
2009//ZZ
2010//ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2011//ZZ sets it at all) */
2012//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2013//ZZ IRTemp t_ndep,
2014//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2015//ZZ {
2016//ZZ IRTemp z32 = newTemp(Ity_I32);
2017//ZZ assign( z32, mkU32(0) );
2018//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2019//ZZ }
2020//ZZ
2021//ZZ
2022//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2023//ZZ sets them at all) */
2024//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2025//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2026//ZZ {
2027//ZZ IRTemp z32 = newTemp(Ity_I32);
2028//ZZ assign( z32, mkU32(0) );
2029//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2030//ZZ }
2031
2032
2033/*------------------------------------------------------------*/
2034/*--- Misc math helpers ---*/
2035/*------------------------------------------------------------*/
2036
sewardj32d86752014-03-02 12:47:18 +00002037/* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
2038static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
sewardjbbcf1882014-01-12 12:49:10 +00002039{
sewardj32d86752014-03-02 12:47:18 +00002040 IRTemp maskT = newTemp(Ity_I64);
2041 IRTemp res = newTemp(Ity_I64);
2042 vassert(sh >= 1 && sh <= 63);
2043 assign(maskT, mkU64(mask));
sewardjdc9259c2014-02-27 11:10:19 +00002044 assign( res,
sewardjbbcf1882014-01-12 12:49:10 +00002045 binop(Iop_Or64,
2046 binop(Iop_Shr64,
sewardj32d86752014-03-02 12:47:18 +00002047 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
2048 mkU8(sh)),
sewardjbbcf1882014-01-12 12:49:10 +00002049 binop(Iop_And64,
sewardj32d86752014-03-02 12:47:18 +00002050 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
2051 mkexpr(maskT))
sewardjbbcf1882014-01-12 12:49:10 +00002052 )
2053 );
sewardjdc9259c2014-02-27 11:10:19 +00002054 return res;
2055}
2056
sewardj32d86752014-03-02 12:47:18 +00002057/* Generates byte swaps within 32-bit lanes. */
2058static IRTemp math_UINTSWAP64 ( IRTemp src )
2059{
2060 IRTemp res;
2061 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2062 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2063 return res;
2064}
2065
2066/* Generates byte swaps within 16-bit lanes. */
2067static IRTemp math_USHORTSWAP64 ( IRTemp src )
2068{
2069 IRTemp res;
2070 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2071 return res;
2072}
2073
2074/* Generates a 64-bit byte swap. */
2075static IRTemp math_BYTESWAP64 ( IRTemp src )
2076{
2077 IRTemp res;
2078 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2079 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2080 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
2081 return res;
2082}
sewardjdc9259c2014-02-27 11:10:19 +00002083
2084/* Generates a 64-bit bit swap. */
2085static IRTemp math_BITSWAP64 ( IRTemp src )
2086{
sewardj32d86752014-03-02 12:47:18 +00002087 IRTemp res;
2088 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
2089 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
2090 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
2091 return math_BYTESWAP64(res);
sewardjbbcf1882014-01-12 12:49:10 +00002092}
2093
sewardj606c4ba2014-01-26 19:11:14 +00002094/* Duplicates the bits at the bottom of the given word to fill the
2095 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
2096 except for the bottom bits. */
2097static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
2098{
2099 if (srcTy == Ity_I8) {
2100 IRTemp t16 = newTemp(Ity_I64);
2101 assign(t16, binop(Iop_Or64, mkexpr(src),
2102 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
2103 IRTemp t32 = newTemp(Ity_I64);
2104 assign(t32, binop(Iop_Or64, mkexpr(t16),
2105 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
2106 IRTemp t64 = newTemp(Ity_I64);
2107 assign(t64, binop(Iop_Or64, mkexpr(t32),
2108 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2109 return t64;
2110 }
2111 if (srcTy == Ity_I16) {
2112 IRTemp t32 = newTemp(Ity_I64);
2113 assign(t32, binop(Iop_Or64, mkexpr(src),
2114 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
2115 IRTemp t64 = newTemp(Ity_I64);
2116 assign(t64, binop(Iop_Or64, mkexpr(t32),
2117 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2118 return t64;
2119 }
2120 if (srcTy == Ity_I32) {
2121 IRTemp t64 = newTemp(Ity_I64);
2122 assign(t64, binop(Iop_Or64, mkexpr(src),
2123 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
2124 return t64;
2125 }
2126 if (srcTy == Ity_I64) {
2127 return src;
2128 }
2129 vassert(0);
2130}
2131
2132
sewardj18bf5172014-06-14 18:05:30 +00002133/* Duplicates the src element exactly so as to fill a V128 value. */
sewardj85fbb022014-06-12 13:16:01 +00002134static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
2135{
sewardj8e91fd42014-07-11 12:05:47 +00002136 IRTemp res = newTempV128();
sewardj85fbb022014-06-12 13:16:01 +00002137 if (srcTy == Ity_F64) {
2138 IRTemp i64 = newTemp(Ity_I64);
2139 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
2140 assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
2141 return res;
2142 }
2143 if (srcTy == Ity_F32) {
2144 IRTemp i64a = newTemp(Ity_I64);
2145 assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
2146 IRTemp i64b = newTemp(Ity_I64);
2147 assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
2148 mkexpr(i64a)));
2149 assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
2150 return res;
2151 }
sewardj18bf5172014-06-14 18:05:30 +00002152 if (srcTy == Ity_I64) {
2153 assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
2154 return res;
2155 }
2156 if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
2157 IRTemp t1 = newTemp(Ity_I64);
2158 assign(t1, widenUto64(srcTy, mkexpr(src)));
2159 IRTemp t2 = math_DUP_TO_64(t1, srcTy);
2160 assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
2161 return res;
2162 }
sewardj85fbb022014-06-12 13:16:01 +00002163 vassert(0);
2164}
2165
2166
sewardjdf9d6d52014-06-27 10:43:22 +00002167/* |fullWidth| is a full V128 width result. Depending on bitQ,
2168 zero out the upper half. */
2169static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
2170{
2171 if (bitQ == 1) return mkexpr(fullWidth);
2172 if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
2173 vassert(0);
2174}
2175
sewardja5a6b752014-06-30 07:33:56 +00002176/* The same, but from an expression instead. */
2177static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
2178{
sewardj8e91fd42014-07-11 12:05:47 +00002179 IRTemp fullWidthT = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00002180 assign(fullWidthT, fullWidth);
2181 return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
2182}
2183
sewardjdf9d6d52014-06-27 10:43:22 +00002184
sewardjbbcf1882014-01-12 12:49:10 +00002185/*------------------------------------------------------------*/
2186/*--- FP comparison helpers ---*/
2187/*------------------------------------------------------------*/
2188
2189/* irRes :: Ity_I32 holds a floating point comparison result encoded
2190 as an IRCmpF64Result. Generate code to convert it to an
2191 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2192 Assign a new temp to hold that value, and return the temp. */
2193static
2194IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
2195{
2196 IRTemp ix = newTemp(Ity_I64);
2197 IRTemp termL = newTemp(Ity_I64);
2198 IRTemp termR = newTemp(Ity_I64);
2199 IRTemp nzcv = newTemp(Ity_I64);
2200 IRTemp irRes = newTemp(Ity_I64);
2201
2202 /* This is where the fun starts. We have to convert 'irRes' from
2203 an IR-convention return result (IRCmpF64Result) to an
2204 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
2205 4 bits of 'nzcv'. */
2206 /* Map compare result from IR to ARM(nzcv) */
2207 /*
2208 FP cmp result | IR | ARM(nzcv)
2209 --------------------------------
2210 UN 0x45 0011
2211 LT 0x01 1000
2212 GT 0x00 0010
2213 EQ 0x40 0110
2214 */
2215 /* Now since you're probably wondering WTF ..
2216
2217 ix fishes the useful bits out of the IR value, bits 6 and 0, and
2218 places them side by side, giving a number which is 0, 1, 2 or 3.
2219
2220 termL is a sequence cooked up by GNU superopt. It converts ix
2221 into an almost correct value NZCV value (incredibly), except
2222 for the case of UN, where it produces 0100 instead of the
2223 required 0011.
2224
2225 termR is therefore a correction term, also computed from ix. It
2226 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
2227 the final correct value, we subtract termR from termL.
2228
2229 Don't take my word for it. There's a test program at the bottom
2230 of guest_arm_toIR.c, to try this out with.
2231 */
2232 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
2233
2234 assign(
2235 ix,
2236 binop(Iop_Or64,
2237 binop(Iop_And64,
2238 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
2239 mkU64(3)),
2240 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
2241
2242 assign(
2243 termL,
2244 binop(Iop_Add64,
2245 binop(Iop_Shr64,
2246 binop(Iop_Sub64,
2247 binop(Iop_Shl64,
2248 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
2249 mkU8(62)),
2250 mkU64(1)),
2251 mkU8(61)),
2252 mkU64(1)));
2253
2254 assign(
2255 termR,
2256 binop(Iop_And64,
2257 binop(Iop_And64,
2258 mkexpr(ix),
2259 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
2260 mkU64(1)));
2261
2262 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
2263 return nzcv;
2264}
2265
2266
2267/*------------------------------------------------------------*/
2268/*--- Data processing (immediate) ---*/
2269/*------------------------------------------------------------*/
2270
2271/* Helper functions for supporting "DecodeBitMasks" */
2272
2273static ULong dbm_ROR ( Int width, ULong x, Int rot )
2274{
2275 vassert(width > 0 && width <= 64);
2276 vassert(rot >= 0 && rot < width);
2277 if (rot == 0) return x;
2278 ULong res = x >> rot;
2279 res |= (x << (width - rot));
2280 if (width < 64)
2281 res &= ((1ULL << width) - 1);
2282 return res;
2283}
2284
2285static ULong dbm_RepTo64( Int esize, ULong x )
2286{
2287 switch (esize) {
2288 case 64:
2289 return x;
2290 case 32:
2291 x &= 0xFFFFFFFF; x |= (x << 32);
2292 return x;
2293 case 16:
2294 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
2295 return x;
2296 case 8:
2297 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
2298 return x;
2299 case 4:
2300 x &= 0xF; x |= (x << 4); x |= (x << 8);
2301 x |= (x << 16); x |= (x << 32);
2302 return x;
2303 case 2:
2304 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
2305 x |= (x << 16); x |= (x << 32);
2306 return x;
2307 default:
2308 break;
2309 }
2310 vpanic("dbm_RepTo64");
2311 /*NOTREACHED*/
2312 return 0;
2313}
2314
2315static Int dbm_highestSetBit ( ULong x )
2316{
2317 Int i;
2318 for (i = 63; i >= 0; i--) {
2319 if (x & (1ULL << i))
2320 return i;
2321 }
2322 vassert(x == 0);
2323 return -1;
2324}
2325
2326static
2327Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
2328 ULong immN, ULong imms, ULong immr, Bool immediate,
2329 UInt M /*32 or 64*/)
2330{
2331 vassert(immN < (1ULL << 1));
2332 vassert(imms < (1ULL << 6));
2333 vassert(immr < (1ULL << 6));
2334 vassert(immediate == False || immediate == True);
2335 vassert(M == 32 || M == 64);
2336
2337 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
2338 if (len < 1) { /* printf("fail1\n"); */ return False; }
2339 vassert(len <= 6);
2340 vassert(M >= (1 << len));
2341
2342 vassert(len >= 1 && len <= 6);
2343 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
2344 (1 << len) - 1;
2345 vassert(levels >= 1 && levels <= 63);
2346
2347 if (immediate && ((imms & levels) == levels)) {
2348 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2349 return False;
2350 }
2351
2352 ULong S = imms & levels;
2353 ULong R = immr & levels;
2354 Int diff = S - R;
2355 diff &= 63;
2356 Int esize = 1 << len;
2357 vassert(2 <= esize && esize <= 64);
2358
2359 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2360 same below with d. S can be 63 in which case we have an out of
2361 range and hence undefined shift. */
2362 vassert(S >= 0 && S <= 63);
2363 vassert(esize >= (S+1));
2364 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
2365 //(1ULL << (S+1)) - 1;
2366 ((1ULL << S) - 1) + (1ULL << S);
2367
2368 Int d = // diff<len-1:0>
2369 diff & ((1 << len)-1);
2370 vassert(esize >= (d+1));
2371 vassert(d >= 0 && d <= 63);
2372
2373 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
2374 //(1ULL << (d+1)) - 1;
2375 ((1ULL << d) - 1) + (1ULL << d);
2376
2377 if (esize != 64) vassert(elem_s < (1ULL << esize));
2378 if (esize != 64) vassert(elem_d < (1ULL << esize));
2379
2380 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
2381 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
2382
2383 return True;
2384}
2385
2386
2387static
2388Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
2389 UInt insn)
2390{
2391# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2392
2393 /* insn[28:23]
2394 10000x PC-rel addressing
2395 10001x Add/subtract (immediate)
2396 100100 Logical (immediate)
2397 100101 Move Wide (immediate)
2398 100110 Bitfield
2399 100111 Extract
2400 */
2401
2402 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2403 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2404 Bool is64 = INSN(31,31) == 1;
2405 Bool isSub = INSN(30,30) == 1;
2406 Bool setCC = INSN(29,29) == 1;
2407 UInt sh = INSN(23,22);
2408 UInt uimm12 = INSN(21,10);
2409 UInt nn = INSN(9,5);
2410 UInt dd = INSN(4,0);
2411 const HChar* nm = isSub ? "sub" : "add";
2412 if (sh >= 2) {
2413 /* Invalid; fall through */
2414 } else {
2415 vassert(sh <= 1);
2416 uimm12 <<= (12 * sh);
2417 if (is64) {
2418 IRTemp argL = newTemp(Ity_I64);
2419 IRTemp argR = newTemp(Ity_I64);
2420 IRTemp res = newTemp(Ity_I64);
2421 assign(argL, getIReg64orSP(nn));
2422 assign(argR, mkU64(uimm12));
2423 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2424 mkexpr(argL), mkexpr(argR)));
2425 if (setCC) {
2426 putIReg64orZR(dd, mkexpr(res));
2427 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2428 DIP("%ss %s, %s, 0x%x\n",
2429 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
2430 } else {
2431 putIReg64orSP(dd, mkexpr(res));
2432 DIP("%s %s, %s, 0x%x\n",
2433 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
2434 }
2435 } else {
2436 IRTemp argL = newTemp(Ity_I32);
2437 IRTemp argR = newTemp(Ity_I32);
2438 IRTemp res = newTemp(Ity_I32);
2439 assign(argL, getIReg32orSP(nn));
2440 assign(argR, mkU32(uimm12));
2441 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
2442 mkexpr(argL), mkexpr(argR)));
2443 if (setCC) {
2444 putIReg32orZR(dd, mkexpr(res));
2445 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
2446 DIP("%ss %s, %s, 0x%x\n",
2447 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
2448 } else {
2449 putIReg32orSP(dd, mkexpr(res));
2450 DIP("%s %s, %s, 0x%x\n",
2451 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
2452 }
2453 }
2454 return True;
2455 }
2456 }
2457
2458 /* -------------------- ADR/ADRP -------------------- */
2459 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2460 UInt bP = INSN(31,31);
2461 UInt immLo = INSN(30,29);
2462 UInt immHi = INSN(23,5);
2463 UInt rD = INSN(4,0);
2464 ULong uimm = (immHi << 2) | immLo;
2465 ULong simm = sx_to_64(uimm, 21);
2466 ULong val;
2467 if (bP) {
2468 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
2469 } else {
2470 val = guest_PC_curr_instr + simm;
2471 }
2472 putIReg64orZR(rD, mkU64(val));
2473 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
2474 return True;
2475 }
2476
2477 /* -------------------- LOGIC(imm) -------------------- */
2478 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2479 /* 31 30 28 22 21 15 9 4
2480 sf op 100100 N immr imms Rn Rd
2481 op=00: AND Rd|SP, Rn, #imm
2482 op=01: ORR Rd|SP, Rn, #imm
2483 op=10: EOR Rd|SP, Rn, #imm
2484 op=11: ANDS Rd|ZR, Rn, #imm
2485 */
2486 Bool is64 = INSN(31,31) == 1;
2487 UInt op = INSN(30,29);
2488 UInt N = INSN(22,22);
2489 UInt immR = INSN(21,16);
2490 UInt immS = INSN(15,10);
2491 UInt nn = INSN(9,5);
2492 UInt dd = INSN(4,0);
2493 ULong imm = 0;
2494 Bool ok;
2495 if (N == 1 && !is64)
2496 goto after_logic_imm; /* not allowed; fall through */
2497 ok = dbm_DecodeBitMasks(&imm, NULL,
2498 N, immS, immR, True, is64 ? 64 : 32);
2499 if (!ok)
2500 goto after_logic_imm;
2501
2502 const HChar* names[4] = { "and", "orr", "eor", "ands" };
2503 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2504 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2505
2506 vassert(op < 4);
2507 if (is64) {
2508 IRExpr* argL = getIReg64orZR(nn);
2509 IRExpr* argR = mkU64(imm);
2510 IRTemp res = newTemp(Ity_I64);
2511 assign(res, binop(ops64[op], argL, argR));
2512 if (op < 3) {
2513 putIReg64orSP(dd, mkexpr(res));
2514 DIP("%s %s, %s, 0x%llx\n", names[op],
2515 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2516 } else {
2517 putIReg64orZR(dd, mkexpr(res));
2518 setFlags_LOGIC(True/*is64*/, res);
2519 DIP("%s %s, %s, 0x%llx\n", names[op],
2520 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2521 }
2522 } else {
2523 IRExpr* argL = getIReg32orZR(nn);
2524 IRExpr* argR = mkU32((UInt)imm);
2525 IRTemp res = newTemp(Ity_I32);
2526 assign(res, binop(ops32[op], argL, argR));
2527 if (op < 3) {
2528 putIReg32orSP(dd, mkexpr(res));
2529 DIP("%s %s, %s, 0x%x\n", names[op],
2530 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2531 } else {
2532 putIReg32orZR(dd, mkexpr(res));
2533 setFlags_LOGIC(False/*!is64*/, res);
2534 DIP("%s %s, %s, 0x%x\n", names[op],
2535 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2536 }
2537 }
2538 return True;
2539 }
2540 after_logic_imm:
2541
2542 /* -------------------- MOV{Z,N,K} -------------------- */
2543 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2544 /* 31 30 28 22 20 4
2545 | | | | | |
2546 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2547 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2548 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2549 */
2550 Bool is64 = INSN(31,31) == 1;
2551 UInt subopc = INSN(30,29);
2552 UInt hw = INSN(22,21);
2553 UInt imm16 = INSN(20,5);
2554 UInt dd = INSN(4,0);
2555 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2556 /* invalid; fall through */
2557 } else {
2558 ULong imm64 = ((ULong)imm16) << (16 * hw);
2559 if (!is64)
2560 vassert(imm64 < 0x100000000ULL);
2561 switch (subopc) {
2562 case BITS2(1,0): // MOVZ
2563 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2564 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2565 break;
2566 case BITS2(0,0): // MOVN
2567 imm64 = ~imm64;
2568 if (!is64)
2569 imm64 &= 0xFFFFFFFFULL;
2570 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2571 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2572 break;
2573 case BITS2(1,1): // MOVK
2574 /* This is more complex. We are inserting a slice into
2575 the destination register, so we need to have the old
2576 value of it. */
2577 if (is64) {
2578 IRTemp old = newTemp(Ity_I64);
2579 assign(old, getIReg64orZR(dd));
2580 ULong mask = 0xFFFFULL << (16 * hw);
2581 IRExpr* res
2582 = binop(Iop_Or64,
2583 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2584 mkU64(imm64));
2585 putIReg64orZR(dd, res);
2586 DIP("movk %s, 0x%x, lsl %u\n",
2587 nameIReg64orZR(dd), imm16, 16*hw);
2588 } else {
2589 IRTemp old = newTemp(Ity_I32);
2590 assign(old, getIReg32orZR(dd));
2591 vassert(hw <= 1);
2592 UInt mask = 0xFFFF << (16 * hw);
2593 IRExpr* res
2594 = binop(Iop_Or32,
2595 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2596 mkU32((UInt)imm64));
2597 putIReg32orZR(dd, res);
2598 DIP("movk %s, 0x%x, lsl %u\n",
2599 nameIReg32orZR(dd), imm16, 16*hw);
2600 }
2601 break;
2602 default:
2603 vassert(0);
2604 }
2605 return True;
2606 }
2607 }
2608
2609 /* -------------------- {U,S,}BFM -------------------- */
2610 /* 30 28 22 21 15 9 4
2611
2612 sf 10 100110 N immr imms nn dd
2613 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2614 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2615
2616 sf 00 100110 N immr imms nn dd
2617 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2618 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2619
2620 sf 01 100110 N immr imms nn dd
2621 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2622 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2623 */
2624 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2625 UInt sf = INSN(31,31);
2626 UInt opc = INSN(30,29);
2627 UInt N = INSN(22,22);
2628 UInt immR = INSN(21,16);
2629 UInt immS = INSN(15,10);
2630 UInt nn = INSN(9,5);
2631 UInt dd = INSN(4,0);
2632 Bool inZero = False;
2633 Bool extend = False;
2634 const HChar* nm = "???";
2635 /* skip invalid combinations */
2636 switch (opc) {
2637 case BITS2(0,0):
2638 inZero = True; extend = True; nm = "sbfm"; break;
2639 case BITS2(0,1):
2640 inZero = False; extend = False; nm = "bfm"; break;
2641 case BITS2(1,0):
2642 inZero = True; extend = False; nm = "ubfm"; break;
2643 case BITS2(1,1):
2644 goto after_bfm; /* invalid */
2645 default:
2646 vassert(0);
2647 }
2648 if (sf == 1 && N != 1) goto after_bfm;
2649 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2650 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2651 ULong wmask = 0, tmask = 0;
2652 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2653 N, immS, immR, False, sf == 1 ? 64 : 32);
2654 if (!ok) goto after_bfm; /* hmmm */
2655
2656 Bool is64 = sf == 1;
2657 IRType ty = is64 ? Ity_I64 : Ity_I32;
2658
2659 IRTemp dst = newTemp(ty);
2660 IRTemp src = newTemp(ty);
2661 IRTemp bot = newTemp(ty);
2662 IRTemp top = newTemp(ty);
2663 IRTemp res = newTemp(ty);
2664 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2665 assign(src, getIRegOrZR(is64, nn));
2666 /* perform bitfield move on low bits */
2667 assign(bot, binop(mkOR(ty),
2668 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2669 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2670 mkU(ty, wmask))));
2671 /* determine extension bits (sign, zero or dest register) */
2672 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2673 /* combine extension bits and result bits */
2674 assign(res, binop(mkOR(ty),
2675 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2676 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2677 putIRegOrZR(is64, dd, mkexpr(res));
2678 DIP("%s %s, %s, immR=%u, immS=%u\n",
2679 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2680 return True;
2681 }
2682 after_bfm:
2683
2684 /* ---------------------- EXTR ---------------------- */
2685 /* 30 28 22 20 15 9 4
2686 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2687 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2688 */
2689 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2690 Bool is64 = INSN(31,31) == 1;
2691 UInt mm = INSN(20,16);
2692 UInt imm6 = INSN(15,10);
2693 UInt nn = INSN(9,5);
2694 UInt dd = INSN(4,0);
2695 Bool valid = True;
2696 if (INSN(31,31) != INSN(22,22))
2697 valid = False;
2698 if (!is64 && imm6 >= 32)
2699 valid = False;
2700 if (!valid) goto after_extr;
2701 IRType ty = is64 ? Ity_I64 : Ity_I32;
2702 IRTemp srcHi = newTemp(ty);
2703 IRTemp srcLo = newTemp(ty);
2704 IRTemp res = newTemp(ty);
2705 assign(srcHi, getIRegOrZR(is64, nn));
2706 assign(srcLo, getIRegOrZR(is64, mm));
2707 if (imm6 == 0) {
2708 assign(res, mkexpr(srcLo));
2709 } else {
2710 UInt szBits = 8 * sizeofIRType(ty);
2711 vassert(imm6 > 0 && imm6 < szBits);
2712 assign(res, binop(mkOR(ty),
2713 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2714 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2715 }
2716 putIRegOrZR(is64, dd, mkexpr(res));
2717 DIP("extr %s, %s, %s, #%u\n",
2718 nameIRegOrZR(is64,dd),
2719 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2720 return True;
2721 }
2722 after_extr:
2723
2724 vex_printf("ARM64 front end: data_processing_immediate\n");
2725 return False;
2726# undef INSN
2727}
2728
2729
2730/*------------------------------------------------------------*/
2731/*--- Data processing (register) instructions ---*/
2732/*------------------------------------------------------------*/
2733
2734static const HChar* nameSH ( UInt sh ) {
2735 switch (sh) {
2736 case 0: return "lsl";
2737 case 1: return "lsr";
2738 case 2: return "asr";
2739 case 3: return "ror";
2740 default: vassert(0);
2741 }
2742}
2743
2744/* Generate IR to get a register value, possibly shifted by an
2745 immediate. Returns either a 32- or 64-bit temporary holding the
2746 result. After the shift, the value can optionally be NOT-ed
2747 too.
2748
2749 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2750 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2751 isn't allowed, but it's the job of the caller to check that.
2752*/
2753static IRTemp getShiftedIRegOrZR ( Bool is64,
2754 UInt sh_how, UInt sh_amt, UInt regNo,
2755 Bool invert )
2756{
2757 vassert(sh_how < 4);
2758 vassert(sh_amt < (is64 ? 64 : 32));
2759 IRType ty = is64 ? Ity_I64 : Ity_I32;
2760 IRTemp t0 = newTemp(ty);
2761 assign(t0, getIRegOrZR(is64, regNo));
2762 IRTemp t1 = newTemp(ty);
2763 switch (sh_how) {
2764 case BITS2(0,0):
2765 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2766 break;
2767 case BITS2(0,1):
2768 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2769 break;
2770 case BITS2(1,0):
2771 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2772 break;
2773 case BITS2(1,1):
2774 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2775 break;
2776 default:
2777 vassert(0);
2778 }
2779 if (invert) {
2780 IRTemp t2 = newTemp(ty);
2781 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2782 return t2;
2783 } else {
2784 return t1;
2785 }
2786}
2787
2788
2789static
2790Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2791 UInt insn)
2792{
2793# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2794
2795 /* ------------------- ADD/SUB(reg) ------------------- */
2796 /* x==0 => 32 bit op x==1 => 64 bit op
2797 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2798
2799 31 30 29 28 23 21 20 15 9 4
2800 | | | | | | | | | |
2801 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2802 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2803 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2804 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2805 */
2806 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2807 UInt bX = INSN(31,31);
2808 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2809 UInt bS = INSN(29, 29); /* set flags? */
2810 UInt sh = INSN(23,22);
2811 UInt rM = INSN(20,16);
2812 UInt imm6 = INSN(15,10);
2813 UInt rN = INSN(9,5);
2814 UInt rD = INSN(4,0);
2815 Bool isSUB = bOP == 1;
2816 Bool is64 = bX == 1;
2817 IRType ty = is64 ? Ity_I64 : Ity_I32;
2818 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2819 /* invalid; fall through */
2820 } else {
2821 IRTemp argL = newTemp(ty);
2822 assign(argL, getIRegOrZR(is64, rN));
2823 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2824 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2825 IRTemp res = newTemp(ty);
2826 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2827 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2828 if (bS) {
2829 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2830 }
2831 DIP("%s%s %s, %s, %s, %s #%u\n",
2832 bOP ? "sub" : "add", bS ? "s" : "",
2833 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2834 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2835 return True;
2836 }
2837 }
2838
sewardjdee30502014-06-04 13:09:44 +00002839 /* ------------------- ADC/SBC(reg) ------------------- */
2840 /* x==0 => 32 bit op x==1 => 64 bit op
2841
2842 31 30 29 28 23 21 20 15 9 4
2843 | | | | | | | | | |
2844 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
2845 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
2846 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
2847 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
2848 */
2849
2850 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2851 UInt bX = INSN(31,31);
2852 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */
2853 UInt bS = INSN(29,29); /* set flags */
2854 UInt rM = INSN(20,16);
2855 UInt rN = INSN(9,5);
2856 UInt rD = INSN(4,0);
2857
2858 Bool isSUB = bOP == 1;
2859 Bool is64 = bX == 1;
2860 IRType ty = is64 ? Ity_I64 : Ity_I32;
2861
2862 IRTemp oldC = newTemp(ty);
2863 assign(oldC,
2864 is64 ? mk_arm64g_calculate_flag_c()
2865 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
2866
2867 IRTemp argL = newTemp(ty);
2868 assign(argL, getIRegOrZR(is64, rN));
2869 IRTemp argR = newTemp(ty);
2870 assign(argR, getIRegOrZR(is64, rM));
2871
2872 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2873 IRTemp res = newTemp(ty);
2874 if (isSUB) {
2875 IRExpr* one = is64 ? mkU64(1) : mkU32(1);
2876 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
2877 assign(res,
2878 binop(op,
2879 binop(op, mkexpr(argL), mkexpr(argR)),
2880 binop(xorOp, mkexpr(oldC), one)));
2881 } else {
2882 assign(res,
2883 binop(op,
2884 binop(op, mkexpr(argL), mkexpr(argR)),
2885 mkexpr(oldC)));
2886 }
2887
2888 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2889
2890 if (bS) {
2891 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
2892 }
2893
2894 DIP("%s%s %s, %s, %s\n",
2895 bOP ? "sbc" : "adc", bS ? "s" : "",
2896 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2897 nameIRegOrZR(is64, rM));
2898 return True;
2899 }
2900
sewardjbbcf1882014-01-12 12:49:10 +00002901 /* -------------------- LOGIC(reg) -------------------- */
2902 /* x==0 => 32 bit op x==1 => 64 bit op
2903 N==0 => inv? is no-op (no inversion)
2904 N==1 => inv? is NOT
2905 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2906
2907 31 30 28 23 21 20 15 9 4
2908 | | | | | | | | |
2909 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2910 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2911 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2912 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2913 With N=1, the names are: BIC ORN EON BICS
2914 */
2915 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2916 UInt bX = INSN(31,31);
2917 UInt sh = INSN(23,22);
2918 UInt bN = INSN(21,21);
2919 UInt rM = INSN(20,16);
2920 UInt imm6 = INSN(15,10);
2921 UInt rN = INSN(9,5);
2922 UInt rD = INSN(4,0);
2923 Bool is64 = bX == 1;
2924 IRType ty = is64 ? Ity_I64 : Ity_I32;
2925 if (!is64 && imm6 > 31) {
2926 /* invalid; fall though */
2927 } else {
2928 IRTemp argL = newTemp(ty);
2929 assign(argL, getIRegOrZR(is64, rN));
2930 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2931 IROp op = Iop_INVALID;
2932 switch (INSN(30,29)) {
2933 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2934 case BITS2(0,1): op = mkOR(ty); break;
2935 case BITS2(1,0): op = mkXOR(ty); break;
2936 default: vassert(0);
2937 }
2938 IRTemp res = newTemp(ty);
2939 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2940 if (INSN(30,29) == BITS2(1,1)) {
2941 setFlags_LOGIC(is64, res);
2942 }
2943 putIRegOrZR(is64, rD, mkexpr(res));
2944
2945 static const HChar* names_op[8]
2946 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2947 vassert(((bN << 2) | INSN(30,29)) < 8);
2948 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2949 /* Special-case the printing of "MOV" */
2950 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2951 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2952 nameIRegOrZR(is64, rM));
2953 } else {
2954 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2955 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2956 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2957 }
2958 return True;
2959 }
2960 }
2961
2962 /* -------------------- {U,S}MULH -------------------- */
2963 /* 31 23 22 20 15 9 4
2964 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2965 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2966 */
2967 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
sewardj7fce7cc2014-05-07 09:41:40 +00002968 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
sewardjbbcf1882014-01-12 12:49:10 +00002969 Bool isU = INSN(23,23) == 1;
2970 UInt mm = INSN(20,16);
2971 UInt nn = INSN(9,5);
2972 UInt dd = INSN(4,0);
2973 putIReg64orZR(dd, unop(Iop_128HIto64,
2974 binop(isU ? Iop_MullU64 : Iop_MullS64,
2975 getIReg64orZR(nn), getIReg64orZR(mm))));
2976 DIP("%cmulh %s, %s, %s\n",
2977 isU ? 'u' : 's',
2978 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2979 return True;
2980 }
2981
2982 /* -------------------- M{ADD,SUB} -------------------- */
2983 /* 31 30 20 15 14 9 4
2984 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
2985 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
2986 */
2987 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2988 Bool is64 = INSN(31,31) == 1;
2989 UInt mm = INSN(20,16);
2990 Bool isAdd = INSN(15,15) == 0;
2991 UInt aa = INSN(14,10);
2992 UInt nn = INSN(9,5);
2993 UInt dd = INSN(4,0);
2994 if (is64) {
2995 putIReg64orZR(
2996 dd,
2997 binop(isAdd ? Iop_Add64 : Iop_Sub64,
2998 getIReg64orZR(aa),
2999 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
3000 } else {
3001 putIReg32orZR(
3002 dd,
3003 binop(isAdd ? Iop_Add32 : Iop_Sub32,
3004 getIReg32orZR(aa),
3005 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
3006 }
3007 DIP("%s %s, %s, %s, %s\n",
3008 isAdd ? "madd" : "msub",
3009 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3010 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
3011 return True;
3012 }
3013
3014 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3015 /* 31 30 28 20 15 11 9 4
3016 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
3017 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
3018 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
3019 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
3020 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3021 */
3022 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3023 Bool is64 = INSN(31,31) == 1;
3024 UInt b30 = INSN(30,30);
3025 UInt mm = INSN(20,16);
3026 UInt cond = INSN(15,12);
3027 UInt b10 = INSN(10,10);
3028 UInt nn = INSN(9,5);
3029 UInt dd = INSN(4,0);
3030 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
3031 IRType ty = is64 ? Ity_I64 : Ity_I32;
3032 IRExpr* argL = getIRegOrZR(is64, nn);
3033 IRExpr* argR = getIRegOrZR(is64, mm);
3034 switch (op) {
3035 case BITS2(0,0):
3036 break;
3037 case BITS2(0,1):
3038 argR = binop(mkADD(ty), argR, mkU(ty,1));
3039 break;
3040 case BITS2(1,0):
3041 argR = unop(mkNOT(ty), argR);
3042 break;
3043 case BITS2(1,1):
3044 argR = binop(mkSUB(ty), mkU(ty,0), argR);
3045 break;
3046 default:
3047 vassert(0);
3048 }
3049 putIRegOrZR(
3050 is64, dd,
3051 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
3052 argL, argR)
3053 );
3054 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
3055 DIP("%s %s, %s, %s, %s\n", op_nm[op],
3056 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3057 nameIRegOrZR(is64, mm), nameCC(cond));
3058 return True;
3059 }
3060
3061 /* -------------- ADD/SUB(extended reg) -------------- */
3062 /* 28 20 15 12 9 4
3063 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
3064 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
3065
3066 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
3067 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
3068
3069 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
3070 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
3071
3072 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
3073 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
3074
3075 The 'm' operand is extended per opt, thusly:
3076
3077 000 Xm & 0xFF UXTB
3078 001 Xm & 0xFFFF UXTH
3079 010 Xm & (2^32)-1 UXTW
3080 011 Xm UXTX
3081
3082 100 Xm sx from bit 7 SXTB
3083 101 Xm sx from bit 15 SXTH
3084 110 Xm sx from bit 31 SXTW
3085 111 Xm SXTX
3086
3087 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3088 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3089 are the identity operation on Wm.
3090
3091 After extension, the value is shifted left by imm3 bits, which
3092 may only be in the range 0 .. 4 inclusive.
3093 */
3094 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3095 Bool is64 = INSN(31,31) == 1;
3096 Bool isSub = INSN(30,30) == 1;
3097 Bool setCC = INSN(29,29) == 1;
3098 UInt mm = INSN(20,16);
3099 UInt opt = INSN(15,13);
3100 UInt imm3 = INSN(12,10);
3101 UInt nn = INSN(9,5);
3102 UInt dd = INSN(4,0);
3103 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3104 "sxtb", "sxth", "sxtw", "sxtx" };
3105 /* Do almost the same thing in the 32- and 64-bit cases. */
3106 IRTemp xN = newTemp(Ity_I64);
3107 IRTemp xM = newTemp(Ity_I64);
3108 assign(xN, getIReg64orSP(nn));
3109 assign(xM, getIReg64orZR(mm));
3110 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
3111 Int shSX = 0;
3112 /* widen Xm .. */
3113 switch (opt) {
3114 case BITS3(0,0,0): // UXTB
3115 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
3116 case BITS3(0,0,1): // UXTH
3117 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
3118 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3119 if (is64) {
3120 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
3121 }
3122 break;
3123 case BITS3(0,1,1): // UXTX -- always a noop
3124 break;
3125 case BITS3(1,0,0): // SXTB
3126 shSX = 56; goto sxTo64;
3127 case BITS3(1,0,1): // SXTH
3128 shSX = 48; goto sxTo64;
3129 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3130 if (is64) {
3131 shSX = 32; goto sxTo64;
3132 }
3133 break;
3134 case BITS3(1,1,1): // SXTX -- always a noop
3135 break;
3136 sxTo64:
3137 vassert(shSX >= 32);
3138 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
3139 mkU8(shSX));
3140 break;
3141 default:
3142 vassert(0);
3143 }
3144 /* and now shift */
3145 IRTemp argL = xN;
3146 IRTemp argR = newTemp(Ity_I64);
3147 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
3148 IRTemp res = newTemp(Ity_I64);
3149 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
3150 mkexpr(argL), mkexpr(argR)));
3151 if (is64) {
3152 if (setCC) {
3153 putIReg64orZR(dd, mkexpr(res));
3154 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
3155 } else {
3156 putIReg64orSP(dd, mkexpr(res));
3157 }
3158 } else {
3159 if (setCC) {
3160 IRTemp argL32 = newTemp(Ity_I32);
3161 IRTemp argR32 = newTemp(Ity_I32);
3162 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
3163 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
3164 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
3165 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
3166 } else {
3167 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
3168 }
3169 }
3170 DIP("%s%s %s, %s, %s %s lsl %u\n",
3171 isSub ? "sub" : "add", setCC ? "s" : "",
3172 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
3173 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
3174 nameExt[opt], imm3);
3175 return True;
3176 }
3177
3178 /* ---------------- CCMP/CCMN(imm) ---------------- */
3179 /* Bizarrely, these appear in the "data processing register"
3180 category, even though they are operations against an
3181 immediate. */
3182 /* 31 29 20 15 11 9 3
3183 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
3184 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
3185
3186 Operation is:
3187 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3188 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3189 */
3190 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3191 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3192 Bool is64 = INSN(31,31) == 1;
3193 Bool isSUB = INSN(30,30) == 1;
3194 UInt imm5 = INSN(20,16);
3195 UInt cond = INSN(15,12);
3196 UInt nn = INSN(9,5);
3197 UInt nzcv = INSN(3,0);
3198
3199 IRTemp condT = newTemp(Ity_I1);
3200 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3201
3202 IRType ty = is64 ? Ity_I64 : Ity_I32;
3203 IRTemp argL = newTemp(ty);
3204 IRTemp argR = newTemp(ty);
3205
3206 if (is64) {
3207 assign(argL, getIReg64orZR(nn));
3208 assign(argR, mkU64(imm5));
3209 } else {
3210 assign(argL, getIReg32orZR(nn));
3211 assign(argR, mkU32(imm5));
3212 }
3213 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3214
3215 DIP("ccm%c %s, #%u, #%u, %s\n",
3216 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3217 imm5, nzcv, nameCC(cond));
3218 return True;
3219 }
3220
3221 /* ---------------- CCMP/CCMN(reg) ---------------- */
3222 /* 31 29 20 15 11 9 3
3223 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
3224 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
3225 Operation is:
3226 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3227 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3228 */
3229 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3230 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3231 Bool is64 = INSN(31,31) == 1;
3232 Bool isSUB = INSN(30,30) == 1;
3233 UInt mm = INSN(20,16);
3234 UInt cond = INSN(15,12);
3235 UInt nn = INSN(9,5);
3236 UInt nzcv = INSN(3,0);
3237
3238 IRTemp condT = newTemp(Ity_I1);
3239 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3240
3241 IRType ty = is64 ? Ity_I64 : Ity_I32;
3242 IRTemp argL = newTemp(ty);
3243 IRTemp argR = newTemp(ty);
3244
3245 if (is64) {
3246 assign(argL, getIReg64orZR(nn));
3247 assign(argR, getIReg64orZR(mm));
3248 } else {
3249 assign(argL, getIReg32orZR(nn));
3250 assign(argR, getIReg32orZR(mm));
3251 }
3252 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3253
3254 DIP("ccm%c %s, %s, #%u, %s\n",
3255 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3256 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
3257 return True;
3258 }
3259
3260
3261 /* -------------- REV/REV16/REV32/RBIT -------------- */
3262 /* 31 30 28 20 15 11 9 4
3263
sewardj32d86752014-03-02 12:47:18 +00003264 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
3265 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003266
sewardj32d86752014-03-02 12:47:18 +00003267 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
3268 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003269
sewardjdc9259c2014-02-27 11:10:19 +00003270 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
3271 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003272
sewardjdc9259c2014-02-27 11:10:19 +00003273 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
sewardjbbcf1882014-01-12 12:49:10 +00003274 */
sewardjbbcf1882014-01-12 12:49:10 +00003275 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
sewardjdc9259c2014-02-27 11:10:19 +00003276 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3277 UInt b31 = INSN(31,31);
3278 UInt opc = INSN(11,10);
3279
3280 UInt ix = 0;
3281 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
3282 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
3283 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
3284 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
3285 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
3286 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
3287 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
sewardj32d86752014-03-02 12:47:18 +00003288 if (ix >= 1 && ix <= 7) {
3289 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
sewardjdc9259c2014-02-27 11:10:19 +00003290 UInt nn = INSN(9,5);
3291 UInt dd = INSN(4,0);
3292 IRTemp src = newTemp(Ity_I64);
3293 IRTemp dst = IRTemp_INVALID;
sewardj32d86752014-03-02 12:47:18 +00003294 IRTemp (*math)(IRTemp) = NULL;
3295 switch (ix) {
3296 case 1: case 2: math = math_BYTESWAP64; break;
3297 case 3: case 4: math = math_BITSWAP64; break;
3298 case 5: case 6: math = math_USHORTSWAP64; break;
3299 case 7: math = math_UINTSWAP64; break;
3300 default: vassert(0);
3301 }
3302 const HChar* names[7]
3303 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3304 const HChar* nm = names[ix-1];
3305 vassert(math);
3306 if (ix == 6) {
3307 /* This has to be special cased, since the logic below doesn't
3308 handle it correctly. */
sewardjdc9259c2014-02-27 11:10:19 +00003309 assign(src, getIReg64orZR(nn));
sewardj32d86752014-03-02 12:47:18 +00003310 dst = math(src);
3311 putIReg64orZR(dd,
3312 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
3313 } else if (is64) {
3314 assign(src, getIReg64orZR(nn));
3315 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00003316 putIReg64orZR(dd, mkexpr(dst));
3317 } else {
3318 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
sewardj32d86752014-03-02 12:47:18 +00003319 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00003320 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3321 }
sewardj32d86752014-03-02 12:47:18 +00003322 DIP("%s %s, %s\n", nm,
sewardjdc9259c2014-02-27 11:10:19 +00003323 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
3324 return True;
sewardjbbcf1882014-01-12 12:49:10 +00003325 }
sewardjdc9259c2014-02-27 11:10:19 +00003326 /* else fall through */
sewardjbbcf1882014-01-12 12:49:10 +00003327 }
3328
3329 /* -------------------- CLZ/CLS -------------------- */
3330 /* 30 28 24 20 15 9 4
3331 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
3332 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
3333 */
3334 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3335 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3336 Bool is64 = INSN(31,31) == 1;
3337 Bool isCLS = INSN(10,10) == 1;
3338 UInt nn = INSN(9,5);
3339 UInt dd = INSN(4,0);
3340 IRTemp src = newTemp(Ity_I64);
3341 IRTemp dst = newTemp(Ity_I64);
3342 if (!isCLS) { // CLS not yet supported
3343 if (is64) {
3344 assign(src, getIReg64orZR(nn));
3345 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
3346 mkU64(64),
3347 unop(Iop_Clz64, mkexpr(src))));
3348 putIReg64orZR(dd, mkexpr(dst));
3349 } else {
3350 assign(src, binop(Iop_Shl64,
3351 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
3352 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
3353 mkU64(32),
3354 unop(Iop_Clz64, mkexpr(src))));
3355 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3356 }
3357 DIP("cl%c %s, %s\n",
3358 isCLS ? 's' : 'z', nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
3359 return True;
3360 }
3361 }
3362
3363 /* -------------------- LSLV/LSRV/ASRV -------------------- */
3364 /* 30 28 20 15 11 9 4
3365 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
3366 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
3367 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
3368 */
3369 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3370 && INSN(15,12) == BITS4(0,0,1,0) && INSN(11,10) < BITS2(1,1)) {
3371 Bool is64 = INSN(31,31) == 1;
3372 UInt mm = INSN(20,16);
3373 UInt op = INSN(11,10);
3374 UInt nn = INSN(9,5);
3375 UInt dd = INSN(4,0);
3376 IRType ty = is64 ? Ity_I64 : Ity_I32;
3377 IRTemp srcL = newTemp(ty);
3378 IRTemp srcR = newTemp(Ity_I8);
3379 IRTemp res = newTemp(ty);
3380 IROp iop = Iop_INVALID;
3381 assign(srcL, getIRegOrZR(is64, nn));
3382 assign(srcR,
3383 unop(Iop_64to8,
3384 binop(Iop_And64,
3385 getIReg64orZR(mm), mkU64(is64 ? 63 : 31))));
3386 switch (op) {
3387 case BITS2(0,0): iop = mkSHL(ty); break;
3388 case BITS2(0,1): iop = mkSHR(ty); break;
3389 case BITS2(1,0): iop = mkSAR(ty); break;
3390 default: vassert(0);
3391 }
3392 assign(res, binop(iop, mkexpr(srcL), mkexpr(srcR)));
3393 putIRegOrZR(is64, dd, mkexpr(res));
3394 vassert(op < 3);
3395 const HChar* names[3] = { "lslv", "lsrv", "asrv" };
3396 DIP("%s %s, %s, %s\n",
3397 names[op], nameIRegOrZR(is64,dd),
3398 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
3399 return True;
3400 }
3401
3402 /* -------------------- SDIV/UDIV -------------------- */
3403 /* 30 28 20 15 10 9 4
3404 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
3405 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
3406 */
3407 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3408 && INSN(15,11) == BITS5(0,0,0,0,1)) {
3409 Bool is64 = INSN(31,31) == 1;
3410 UInt mm = INSN(20,16);
3411 Bool isS = INSN(10,10) == 1;
3412 UInt nn = INSN(9,5);
3413 UInt dd = INSN(4,0);
3414 if (isS) {
3415 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
3416 getIRegOrZR(is64, nn),
3417 getIRegOrZR(is64, mm)));
3418 } else {
3419 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
3420 getIRegOrZR(is64, nn),
3421 getIRegOrZR(is64, mm)));
3422 }
3423 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
3424 nameIRegOrZR(is64, dd),
3425 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
3426 return True;
3427 }
3428
3429 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3430 /* 31 23 20 15 14 9 4
3431 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
3432 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
3433 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
3434 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
3435 with operation
3436 Xd = Xa +/- (Wn *u/s Wm)
3437 */
3438 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3439 Bool isU = INSN(23,23) == 1;
3440 UInt mm = INSN(20,16);
3441 Bool isAdd = INSN(15,15) == 0;
3442 UInt aa = INSN(14,10);
3443 UInt nn = INSN(9,5);
3444 UInt dd = INSN(4,0);
3445 IRTemp wN = newTemp(Ity_I32);
3446 IRTemp wM = newTemp(Ity_I32);
3447 IRTemp xA = newTemp(Ity_I64);
3448 IRTemp muld = newTemp(Ity_I64);
3449 IRTemp res = newTemp(Ity_I64);
3450 assign(wN, getIReg32orZR(nn));
3451 assign(wM, getIReg32orZR(mm));
3452 assign(xA, getIReg64orZR(aa));
3453 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
3454 mkexpr(wN), mkexpr(wM)));
3455 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
3456 mkexpr(xA), mkexpr(muld)));
3457 putIReg64orZR(dd, mkexpr(res));
3458 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
3459 nameIReg64orZR(dd), nameIReg32orZR(nn),
3460 nameIReg32orZR(mm), nameIReg64orZR(aa));
3461 return True;
3462 }
3463 vex_printf("ARM64 front end: data_processing_register\n");
3464 return False;
3465# undef INSN
3466}
3467
3468
3469/*------------------------------------------------------------*/
3470/*--- Load and Store instructions ---*/
3471/*------------------------------------------------------------*/
3472
3473/* Generate the EA for a "reg + reg" style amode. This is done from
3474 parts of the insn, but for sanity checking sake it takes the whole
3475 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
3476 and S=insn[12]:
3477
3478 The possible forms, along with their opt:S values, are:
3479 011:0 Xn|SP + Xm
3480 111:0 Xn|SP + Xm
3481 011:1 Xn|SP + Xm * transfer_szB
3482 111:1 Xn|SP + Xm * transfer_szB
3483 010:0 Xn|SP + 32Uto64(Wm)
3484 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
3485 110:0 Xn|SP + 32Sto64(Wm)
3486 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
3487
3488 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
3489 the transfer size is insn[23,31,30]. For integer loads/stores,
3490 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
3491
3492 If the decoding fails, it returns IRTemp_INVALID.
3493
3494 isInt is True iff this is decoding is for transfers to/from integer
3495 registers. If False it is for transfers to/from vector registers.
3496*/
3497static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
3498{
3499 UInt optS = SLICE_UInt(insn, 15, 12);
3500 UInt mm = SLICE_UInt(insn, 20, 16);
3501 UInt nn = SLICE_UInt(insn, 9, 5);
3502 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
3503 | SLICE_UInt(insn, 31, 30); // Log2 of the size
3504
3505 buf[0] = 0;
3506
3507 /* Sanity checks, that this really is a load/store insn. */
3508 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
3509 goto fail;
3510
3511 if (isInt
3512 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
3513 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
3514 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
3515 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
3516 goto fail;
3517
3518 if (!isInt
3519 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
3520 goto fail;
3521
3522 /* Throw out non-verified but possibly valid cases. */
3523 switch (szLg2) {
3524 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
3525 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
3526 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
3527 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
3528 case BITS3(1,0,0): // can only ever be valid for the vector case
3529 if (isInt) goto fail; else goto fail;
3530 case BITS3(1,0,1): // these sizes are never valid
3531 case BITS3(1,1,0):
3532 case BITS3(1,1,1): goto fail;
3533
3534 default: vassert(0);
3535 }
3536
3537 IRExpr* rhs = NULL;
3538 switch (optS) {
3539 case BITS4(1,1,1,0): goto fail; //ATC
3540 case BITS4(0,1,1,0):
3541 rhs = getIReg64orZR(mm);
3542 vex_sprintf(buf, "[%s, %s]",
3543 nameIReg64orZR(nn), nameIReg64orZR(mm));
3544 break;
3545 case BITS4(1,1,1,1): goto fail; //ATC
3546 case BITS4(0,1,1,1):
3547 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
3548 vex_sprintf(buf, "[%s, %s lsl %u]",
3549 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
3550 break;
3551 case BITS4(0,1,0,0):
3552 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
3553 vex_sprintf(buf, "[%s, %s uxtx]",
3554 nameIReg64orZR(nn), nameIReg32orZR(mm));
3555 break;
3556 case BITS4(0,1,0,1):
3557 rhs = binop(Iop_Shl64,
3558 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
3559 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
3560 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
3561 break;
3562 case BITS4(1,1,0,0):
3563 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
3564 vex_sprintf(buf, "[%s, %s sxtx]",
3565 nameIReg64orZR(nn), nameIReg32orZR(mm));
3566 break;
3567 case BITS4(1,1,0,1):
3568 rhs = binop(Iop_Shl64,
3569 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
3570 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
3571 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
3572 break;
3573 default:
3574 /* The rest appear to be genuinely invalid */
3575 goto fail;
3576 }
3577
3578 vassert(rhs);
3579 IRTemp res = newTemp(Ity_I64);
3580 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
3581 return res;
3582
3583 fail:
3584 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
3585 return IRTemp_INVALID;
3586}
3587
3588
3589/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
3590 bits of DATAE :: Ity_I64. */
3591static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
3592{
3593 IRExpr* addrE = mkexpr(addr);
3594 switch (szB) {
3595 case 8:
3596 storeLE(addrE, dataE);
3597 break;
3598 case 4:
3599 storeLE(addrE, unop(Iop_64to32, dataE));
3600 break;
3601 case 2:
3602 storeLE(addrE, unop(Iop_64to16, dataE));
3603 break;
3604 case 1:
3605 storeLE(addrE, unop(Iop_64to8, dataE));
3606 break;
3607 default:
3608 vassert(0);
3609 }
3610}
3611
3612
3613/* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
3614 placing the result in an Ity_I64 temporary. */
3615static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
3616{
3617 IRTemp res = newTemp(Ity_I64);
3618 IRExpr* addrE = mkexpr(addr);
3619 switch (szB) {
3620 case 8:
3621 assign(res, loadLE(Ity_I64,addrE));
3622 break;
3623 case 4:
3624 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
3625 break;
3626 case 2:
3627 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
3628 break;
3629 case 1:
3630 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
3631 break;
3632 default:
3633 vassert(0);
3634 }
3635 return res;
3636}
3637
3638
sewardj18bf5172014-06-14 18:05:30 +00003639/* Generate a "standard 7" name, from bitQ and size. But also
3640 allow ".1d" since that's occasionally useful. */
3641static
3642const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
3643{
3644 vassert(bitQ <= 1 && size <= 3);
3645 const HChar* nms[8]
sewardj25523c42014-06-15 19:36:29 +00003646 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
sewardj18bf5172014-06-14 18:05:30 +00003647 UInt ix = (bitQ << 2) | size;
3648 vassert(ix < 8);
3649 return nms[ix];
3650}
3651
3652
sewardjbbcf1882014-01-12 12:49:10 +00003653static
3654Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
3655{
3656# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
3657
3658 /* ------------ LDR,STR (immediate, uimm12) ----------- */
3659 /* uimm12 is scaled by the transfer size
3660
3661 31 29 26 21 9 4
3662 | | | | | |
3663 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
3664 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
3665
3666 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
3667 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
3668
3669 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
3670 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
3671
3672 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
3673 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
3674 */
3675 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
3676 UInt szLg2 = INSN(31,30);
3677 UInt szB = 1 << szLg2;
3678 Bool isLD = INSN(22,22) == 1;
3679 UInt offs = INSN(21,10) * szB;
3680 UInt nn = INSN(9,5);
3681 UInt tt = INSN(4,0);
3682 IRTemp ta = newTemp(Ity_I64);
3683 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
3684 if (nn == 31) { /* FIXME generate stack alignment check */ }
3685 vassert(szLg2 < 4);
3686 if (isLD) {
3687 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
3688 } else {
3689 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
3690 }
3691 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
3692 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
3693 DIP("%s %s, [%s, #%u]\n",
3694 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
3695 nameIReg64orSP(nn), offs);
3696 return True;
3697 }
3698
3699 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
3700 /*
3701 31 29 26 20 11 9 4
3702 | | | | | | |
3703 (at-Rn-then-Rn=EA) | | |
3704 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
3705 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
3706
3707 (at-EA-then-Rn=EA)
3708 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
3709 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
3710
3711 (at-EA)
3712 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
3713 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
3714
3715 simm9 is unscaled.
3716
3717 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
3718 load case this is because would create two competing values for
3719 Rt. In the store case the reason is unclear, but the spec
3720 disallows it anyway.
3721
3722 Stores are narrowing, loads are unsigned widening. sz encodes
3723 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
3724 */
3725 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
3726 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
3727 UInt szLg2 = INSN(31,30);
3728 UInt szB = 1 << szLg2;
3729 Bool isLoad = INSN(22,22) == 1;
3730 UInt imm9 = INSN(20,12);
3731 UInt nn = INSN(9,5);
3732 UInt tt = INSN(4,0);
3733 Bool wBack = INSN(10,10) == 1;
3734 UInt how = INSN(11,10);
3735 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
3736 /* undecodable; fall through */
3737 } else {
3738 if (nn == 31) { /* FIXME generate stack alignment check */ }
3739
3740 // Compute the transfer address TA and the writeback address WA.
3741 IRTemp tRN = newTemp(Ity_I64);
3742 assign(tRN, getIReg64orSP(nn));
3743 IRTemp tEA = newTemp(Ity_I64);
3744 Long simm9 = (Long)sx_to_64(imm9, 9);
3745 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3746
3747 IRTemp tTA = newTemp(Ity_I64);
3748 IRTemp tWA = newTemp(Ity_I64);
3749 switch (how) {
3750 case BITS2(0,1):
3751 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3752 case BITS2(1,1):
3753 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3754 case BITS2(0,0):
3755 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3756 default:
3757 vassert(0); /* NOTREACHED */
3758 }
3759
sewardje0bff8b2014-03-09 09:40:23 +00003760 /* Normally rN would be updated after the transfer. However, in
3761 the special case typifed by
3762 str x30, [sp,#-16]!
3763 it is necessary to update SP before the transfer, (1)
3764 because Memcheck will otherwise complain about a write
3765 below the stack pointer, and (2) because the segfault
3766 stack extension mechanism will otherwise extend the stack
3767 only down to SP before the instruction, which might not be
3768 far enough, if the -16 bit takes the actual access
3769 address to the next page.
3770 */
3771 Bool earlyWBack
3772 = wBack && simm9 < 0 && szB == 8
3773 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
3774
3775 if (wBack && earlyWBack)
3776 putIReg64orSP(nn, mkexpr(tEA));
3777
sewardjbbcf1882014-01-12 12:49:10 +00003778 if (isLoad) {
3779 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
3780 } else {
3781 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
3782 }
3783
sewardje0bff8b2014-03-09 09:40:23 +00003784 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00003785 putIReg64orSP(nn, mkexpr(tEA));
3786
3787 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
3788 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
3789 const HChar* fmt_str = NULL;
3790 switch (how) {
3791 case BITS2(0,1):
3792 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3793 break;
3794 case BITS2(1,1):
3795 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3796 break;
3797 case BITS2(0,0):
3798 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
3799 break;
3800 default:
3801 vassert(0);
3802 }
3803 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
3804 nameIRegOrZR(szB == 8, tt),
3805 nameIReg64orSP(nn), simm9);
3806 return True;
3807 }
3808 }
3809
3810 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
3811 /* L==1 => mm==LD
3812 L==0 => mm==ST
3813 x==0 => 32 bit transfers, and zero extended loads
3814 x==1 => 64 bit transfers
3815 simm7 is scaled by the (single-register) transfer size
3816
3817 (at-Rn-then-Rn=EA)
3818 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
3819
3820 (at-EA-then-Rn=EA)
3821 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
3822
3823 (at-EA)
3824 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
3825 */
3826
3827 UInt insn_30_23 = INSN(30,23);
3828 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
3829 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
3830 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
3831 UInt bL = INSN(22,22);
3832 UInt bX = INSN(31,31);
3833 UInt bWBack = INSN(23,23);
3834 UInt rT1 = INSN(4,0);
3835 UInt rN = INSN(9,5);
3836 UInt rT2 = INSN(14,10);
3837 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
3838 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
3839 || (bL && rT1 == rT2)) {
3840 /* undecodable; fall through */
3841 } else {
3842 if (rN == 31) { /* FIXME generate stack alignment check */ }
3843
3844 // Compute the transfer address TA and the writeback address WA.
3845 IRTemp tRN = newTemp(Ity_I64);
3846 assign(tRN, getIReg64orSP(rN));
3847 IRTemp tEA = newTemp(Ity_I64);
3848 simm7 = (bX ? 8 : 4) * simm7;
3849 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
3850
3851 IRTemp tTA = newTemp(Ity_I64);
3852 IRTemp tWA = newTemp(Ity_I64);
3853 switch (INSN(24,23)) {
3854 case BITS2(0,1):
3855 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3856 case BITS2(1,1):
3857 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3858 case BITS2(1,0):
3859 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3860 default:
3861 vassert(0); /* NOTREACHED */
3862 }
3863
3864 /* Normally rN would be updated after the transfer. However, in
3865 the special case typifed by
3866 stp x29, x30, [sp,#-112]!
3867 it is necessary to update SP before the transfer, (1)
3868 because Memcheck will otherwise complain about a write
3869 below the stack pointer, and (2) because the segfault
3870 stack extension mechanism will otherwise extend the stack
3871 only down to SP before the instruction, which might not be
3872 far enough, if the -112 bit takes the actual access
3873 address to the next page.
3874 */
3875 Bool earlyWBack
3876 = bWBack && simm7 < 0
3877 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
3878
3879 if (bWBack && earlyWBack)
3880 putIReg64orSP(rN, mkexpr(tEA));
3881
3882 /**/ if (bL == 1 && bX == 1) {
3883 // 64 bit load
3884 putIReg64orZR(rT1, loadLE(Ity_I64,
3885 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3886 putIReg64orZR(rT2, loadLE(Ity_I64,
3887 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
3888 } else if (bL == 1 && bX == 0) {
sewardjbbcf1882014-01-12 12:49:10 +00003889 // 32 bit load
3890 putIReg32orZR(rT1, loadLE(Ity_I32,
3891 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3892 putIReg32orZR(rT2, loadLE(Ity_I32,
3893 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
3894 } else if (bL == 0 && bX == 1) {
3895 // 64 bit store
3896 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3897 getIReg64orZR(rT1));
3898 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
3899 getIReg64orZR(rT2));
3900 } else {
3901 vassert(bL == 0 && bX == 0);
sewardjbbcf1882014-01-12 12:49:10 +00003902 // 32 bit store
3903 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3904 getIReg32orZR(rT1));
3905 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
3906 getIReg32orZR(rT2));
3907 }
3908
3909 if (bWBack && !earlyWBack)
3910 putIReg64orSP(rN, mkexpr(tEA));
3911
3912 const HChar* fmt_str = NULL;
3913 switch (INSN(24,23)) {
3914 case BITS2(0,1):
3915 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3916 break;
3917 case BITS2(1,1):
3918 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3919 break;
3920 case BITS2(1,0):
3921 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
3922 break;
3923 default:
3924 vassert(0);
3925 }
3926 DIP(fmt_str, bL == 0 ? "st" : "ld",
3927 nameIRegOrZR(bX == 1, rT1),
3928 nameIRegOrZR(bX == 1, rT2),
3929 nameIReg64orSP(rN), simm7);
3930 return True;
3931 }
3932 }
3933
3934 /* ---------------- LDR (literal, int reg) ---------------- */
3935 /* 31 29 23 4
3936 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
3937 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
3938 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
3939 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
3940 Just handles the first two cases for now.
3941 */
3942 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
3943 UInt imm19 = INSN(23,5);
3944 UInt rT = INSN(4,0);
3945 UInt bX = INSN(30,30);
3946 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
3947 if (bX) {
3948 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
3949 } else {
3950 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
3951 }
3952 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
3953 return True;
3954 }
3955
3956 /* -------------- {LD,ST}R (integer register) --------------- */
3957 /* 31 29 20 15 12 11 9 4
3958 | | | | | | | |
3959 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
3960 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
3961 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
3962 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
3963
3964 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
3965 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
3966 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
3967 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
3968 */
3969 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
3970 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3971 HChar dis_buf[64];
3972 UInt szLg2 = INSN(31,30);
3973 Bool isLD = INSN(22,22) == 1;
3974 UInt tt = INSN(4,0);
3975 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
3976 if (ea != IRTemp_INVALID) {
3977 switch (szLg2) {
3978 case 3: /* 64 bit */
3979 if (isLD) {
3980 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
3981 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
3982 } else {
3983 storeLE(mkexpr(ea), getIReg64orZR(tt));
3984 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
3985 }
3986 break;
3987 case 2: /* 32 bit */
3988 if (isLD) {
3989 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
3990 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
3991 } else {
3992 storeLE(mkexpr(ea), getIReg32orZR(tt));
3993 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
3994 }
3995 break;
3996 case 1: /* 16 bit */
3997 if (isLD) {
3998 putIReg64orZR(tt, unop(Iop_16Uto64,
3999 loadLE(Ity_I16, mkexpr(ea))));
4000 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
4001 } else {
4002 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
4003 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
4004 }
4005 break;
4006 case 0: /* 8 bit */
4007 if (isLD) {
4008 putIReg64orZR(tt, unop(Iop_8Uto64,
4009 loadLE(Ity_I8, mkexpr(ea))));
4010 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
4011 } else {
4012 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
4013 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
4014 }
4015 break;
4016 default:
4017 vassert(0);
4018 }
4019 return True;
4020 }
4021 }
4022
4023 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
4024 /* 31 29 26 23 21 9 4
4025 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
4026 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
4027 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
4028 where
4029 Rt is Wt when x==1, Xt when x==0
4030 */
4031 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
4032 /* Further checks on bits 31:30 and 22 */
4033 Bool valid = False;
4034 switch ((INSN(31,30) << 1) | INSN(22,22)) {
4035 case BITS3(1,0,0):
4036 case BITS3(0,1,0): case BITS3(0,1,1):
4037 case BITS3(0,0,0): case BITS3(0,0,1):
4038 valid = True;
4039 break;
4040 }
4041 if (valid) {
4042 UInt szLg2 = INSN(31,30);
4043 UInt bitX = INSN(22,22);
4044 UInt imm12 = INSN(21,10);
4045 UInt nn = INSN(9,5);
4046 UInt tt = INSN(4,0);
4047 UInt szB = 1 << szLg2;
4048 IRExpr* ea = binop(Iop_Add64,
4049 getIReg64orSP(nn), mkU64(imm12 * szB));
4050 switch (szB) {
4051 case 4:
4052 vassert(bitX == 0);
4053 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
4054 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
4055 nameIReg64orSP(nn), imm12 * szB);
4056 break;
4057 case 2:
4058 if (bitX == 1) {
4059 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
4060 } else {
4061 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
4062 }
4063 DIP("ldrsh %s, [%s, #%u]\n",
4064 nameIRegOrZR(bitX == 0, tt),
4065 nameIReg64orSP(nn), imm12 * szB);
4066 break;
4067 case 1:
4068 if (bitX == 1) {
4069 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
4070 } else {
4071 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
4072 }
4073 DIP("ldrsb %s, [%s, #%u]\n",
4074 nameIRegOrZR(bitX == 0, tt),
4075 nameIReg64orSP(nn), imm12 * szB);
4076 break;
4077 default:
4078 vassert(0);
4079 }
4080 return True;
4081 }
4082 /* else fall through */
4083 }
4084
4085 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
4086 /* (at-Rn-then-Rn=EA)
4087 31 29 23 21 20 11 9 4
4088 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
4089 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
4090 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
4091
4092 (at-EA-then-Rn=EA)
4093 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
4094 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
4095 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
4096 where
4097 Rt is Wt when x==1, Xt when x==0
4098 transfer-at-Rn when [11]==0, at EA when [11]==1
4099 */
4100 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
4101 && INSN(21,21) == 0 && INSN(10,10) == 1) {
4102 /* Further checks on bits 31:30 and 22 */
4103 Bool valid = False;
4104 switch ((INSN(31,30) << 1) | INSN(22,22)) {
4105 case BITS3(1,0,0): // LDRSW Xt
4106 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
4107 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
4108 valid = True;
4109 break;
4110 }
4111 if (valid) {
4112 UInt szLg2 = INSN(31,30);
4113 UInt imm9 = INSN(20,12);
4114 Bool atRN = INSN(11,11) == 0;
4115 UInt nn = INSN(9,5);
4116 UInt tt = INSN(4,0);
4117 IRTemp tRN = newTemp(Ity_I64);
4118 IRTemp tEA = newTemp(Ity_I64);
4119 IRTemp tTA = IRTemp_INVALID;
4120 ULong simm9 = sx_to_64(imm9, 9);
4121 Bool is64 = INSN(22,22) == 0;
4122 assign(tRN, getIReg64orSP(nn));
4123 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4124 tTA = atRN ? tRN : tEA;
4125 HChar ch = '?';
4126 /* There are 5 cases:
4127 byte load, SX to 64
4128 byte load, SX to 32, ZX to 64
4129 halfword load, SX to 64
4130 halfword load, SX to 32, ZX to 64
4131 word load, SX to 64
4132 The ifs below handle them in the listed order.
4133 */
4134 if (szLg2 == 0) {
4135 ch = 'b';
4136 if (is64) {
4137 putIReg64orZR(tt, unop(Iop_8Sto64,
4138 loadLE(Ity_I8, mkexpr(tTA))));
4139 } else {
4140 putIReg32orZR(tt, unop(Iop_8Sto32,
4141 loadLE(Ity_I8, mkexpr(tTA))));
4142 }
4143 }
4144 else if (szLg2 == 1) {
4145 ch = 'h';
4146 if (is64) {
4147 putIReg64orZR(tt, unop(Iop_16Sto64,
4148 loadLE(Ity_I16, mkexpr(tTA))));
4149 } else {
4150 putIReg32orZR(tt, unop(Iop_16Sto32,
4151 loadLE(Ity_I16, mkexpr(tTA))));
4152 }
4153 }
4154 else if (szLg2 == 2 && is64) {
4155 ch = 'w';
4156 putIReg64orZR(tt, unop(Iop_32Sto64,
4157 loadLE(Ity_I32, mkexpr(tTA))));
4158 }
4159 else {
4160 vassert(0);
4161 }
4162 putIReg64orSP(nn, mkexpr(tEA));
4163 DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
4164 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
4165 return True;
4166 }
4167 /* else fall through */
4168 }
4169
4170 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
4171 /* 31 29 23 21 20 11 9 4
4172 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
4173 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
4174 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
4175 where
4176 Rt is Wt when x==1, Xt when x==0
4177 */
4178 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
4179 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
4180 /* Further checks on bits 31:30 and 22 */
4181 Bool valid = False;
4182 switch ((INSN(31,30) << 1) | INSN(22,22)) {
4183 case BITS3(1,0,0): // LDURSW Xt
4184 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
4185 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
4186 valid = True;
4187 break;
4188 }
4189 if (valid) {
4190 UInt szLg2 = INSN(31,30);
4191 UInt imm9 = INSN(20,12);
4192 UInt nn = INSN(9,5);
4193 UInt tt = INSN(4,0);
4194 IRTemp tRN = newTemp(Ity_I64);
4195 IRTemp tEA = newTemp(Ity_I64);
4196 ULong simm9 = sx_to_64(imm9, 9);
4197 Bool is64 = INSN(22,22) == 0;
4198 assign(tRN, getIReg64orSP(nn));
4199 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4200 HChar ch = '?';
4201 /* There are 5 cases:
4202 byte load, SX to 64
4203 byte load, SX to 32, ZX to 64
4204 halfword load, SX to 64
4205 halfword load, SX to 32, ZX to 64
4206 word load, SX to 64
4207 The ifs below handle them in the listed order.
4208 */
4209 if (szLg2 == 0) {
4210 ch = 'b';
4211 if (is64) {
4212 putIReg64orZR(tt, unop(Iop_8Sto64,
4213 loadLE(Ity_I8, mkexpr(tEA))));
4214 } else {
4215 putIReg32orZR(tt, unop(Iop_8Sto32,
4216 loadLE(Ity_I8, mkexpr(tEA))));
4217 }
4218 }
4219 else if (szLg2 == 1) {
4220 ch = 'h';
4221 if (is64) {
4222 putIReg64orZR(tt, unop(Iop_16Sto64,
4223 loadLE(Ity_I16, mkexpr(tEA))));
4224 } else {
4225 putIReg32orZR(tt, unop(Iop_16Sto32,
4226 loadLE(Ity_I16, mkexpr(tEA))));
4227 }
4228 }
4229 else if (szLg2 == 2 && is64) {
4230 ch = 'w';
4231 putIReg64orZR(tt, unop(Iop_32Sto64,
4232 loadLE(Ity_I32, mkexpr(tEA))));
4233 }
4234 else {
4235 vassert(0);
4236 }
4237 DIP("ldurs%c %s, [%s, #%lld]",
4238 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
4239 return True;
4240 }
4241 /* else fall through */
4242 }
4243
4244 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
4245 /* L==1 => mm==LD
4246 L==0 => mm==ST
4247 sz==00 => 32 bit (S) transfers
4248 sz==01 => 64 bit (D) transfers
4249 sz==10 => 128 bit (Q) transfers
4250 sz==11 isn't allowed
4251 simm7 is scaled by the (single-register) transfer size
4252
4253 31 29 22 21 14 9 4
4254 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
4255 (at-Rn-then-Rn=EA)
4256
4257 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
4258 (at-EA-then-Rn=EA)
4259
4260 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
4261 (at-EA)
4262 */
4263
4264 UInt insn_29_23 = INSN(29,23);
4265 if (insn_29_23 == BITS7(1,0,1,1,0,0,1)
4266 || insn_29_23 == BITS7(1,0,1,1,0,1,1)
4267 || insn_29_23 == BITS7(1,0,1,1,0,1,0)) {
4268 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
4269 Bool isLD = INSN(22,22) == 1;
4270 Bool wBack = INSN(23,23) == 1;
4271 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
4272 UInt tt2 = INSN(14,10);
4273 UInt nn = INSN(9,5);
4274 UInt tt1 = INSN(4,0);
4275 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
4276 /* undecodable; fall through */
4277 } else {
4278 if (nn == 31) { /* FIXME generate stack alignment check */ }
4279
4280 // Compute the transfer address TA and the writeback address WA.
4281 UInt szB = 4 << szSlg2; /* szB is the per-register size */
4282 IRTemp tRN = newTemp(Ity_I64);
4283 assign(tRN, getIReg64orSP(nn));
4284 IRTemp tEA = newTemp(Ity_I64);
4285 simm7 = szB * simm7;
4286 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
4287
4288 IRTemp tTA = newTemp(Ity_I64);
4289 IRTemp tWA = newTemp(Ity_I64);
4290 switch (INSN(24,23)) {
4291 case BITS2(0,1):
4292 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4293 case BITS2(1,1):
4294 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4295 case BITS2(1,0):
4296 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4297 default:
4298 vassert(0); /* NOTREACHED */
4299 }
4300
4301 IRType ty = Ity_INVALID;
4302 switch (szB) {
4303 case 4: ty = Ity_F32; break;
4304 case 8: ty = Ity_F64; break;
4305 case 16: ty = Ity_V128; break;
4306 default: vassert(0);
4307 }
4308
sewardje0bff8b2014-03-09 09:40:23 +00004309 /* Normally rN would be updated after the transfer. However, in
sewardj19551432014-05-07 09:20:11 +00004310 the special cases typifed by
sewardje0bff8b2014-03-09 09:40:23 +00004311 stp q0, q1, [sp,#-512]!
sewardj19551432014-05-07 09:20:11 +00004312 stp d0, d1, [sp,#-512]!
4313 stp s0, s1, [sp,#-512]!
sewardje0bff8b2014-03-09 09:40:23 +00004314 it is necessary to update SP before the transfer, (1)
4315 because Memcheck will otherwise complain about a write
4316 below the stack pointer, and (2) because the segfault
4317 stack extension mechanism will otherwise extend the stack
4318 only down to SP before the instruction, which might not be
4319 far enough, if the -512 bit takes the actual access
4320 address to the next page.
4321 */
4322 Bool earlyWBack
sewardj19551432014-05-07 09:20:11 +00004323 = wBack && simm7 < 0
sewardje0bff8b2014-03-09 09:40:23 +00004324 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
4325
4326 if (wBack && earlyWBack)
4327 putIReg64orSP(nn, mkexpr(tEA));
4328
sewardjbbcf1882014-01-12 12:49:10 +00004329 if (isLD) {
sewardj5ba41302014-03-03 08:42:16 +00004330 if (szB < 16) {
4331 putQReg128(tt1, mkV128(0x0000));
4332 }
sewardj606c4ba2014-01-26 19:11:14 +00004333 putQRegLO(tt1,
4334 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
sewardj5ba41302014-03-03 08:42:16 +00004335 if (szB < 16) {
4336 putQReg128(tt2, mkV128(0x0000));
4337 }
sewardj606c4ba2014-01-26 19:11:14 +00004338 putQRegLO(tt2,
4339 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
sewardjbbcf1882014-01-12 12:49:10 +00004340 } else {
4341 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
sewardj606c4ba2014-01-26 19:11:14 +00004342 getQRegLO(tt1, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004343 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
sewardj606c4ba2014-01-26 19:11:14 +00004344 getQRegLO(tt2, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004345 }
4346
sewardje0bff8b2014-03-09 09:40:23 +00004347 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00004348 putIReg64orSP(nn, mkexpr(tEA));
4349
4350 const HChar* fmt_str = NULL;
4351 switch (INSN(24,23)) {
4352 case BITS2(0,1):
4353 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4354 break;
4355 case BITS2(1,1):
4356 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4357 break;
4358 case BITS2(1,0):
4359 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
4360 break;
4361 default:
4362 vassert(0);
4363 }
4364 DIP(fmt_str, isLD ? "ld" : "st",
sewardj606c4ba2014-01-26 19:11:14 +00004365 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
sewardjbbcf1882014-01-12 12:49:10 +00004366 nameIReg64orSP(nn), simm7);
4367 return True;
4368 }
4369 }
4370
4371 /* -------------- {LD,ST}R (vector register) --------------- */
4372 /* 31 29 23 20 15 12 11 9 4
4373 | | | | | | | | |
4374 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
4375 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
4376 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
4377 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
4378 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
4379
4380 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
4381 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
4382 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
4383 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
4384 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
4385 */
4386 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
4387 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
4388 HChar dis_buf[64];
4389 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
4390 Bool isLD = INSN(22,22) == 1;
4391 UInt tt = INSN(4,0);
4392 if (szLg2 >= 4) goto after_LDR_STR_vector_register;
4393 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
4394 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
4395 switch (szLg2) {
4396 case 0: /* 8 bit */
4397 if (isLD) {
4398 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00004399 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
4400 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004401 } else {
4402 vassert(0); //ATC
sewardj606c4ba2014-01-26 19:11:14 +00004403 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
4404 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004405 }
4406 break;
4407 case 1:
4408 if (isLD) {
4409 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00004410 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
4411 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004412 } else {
4413 vassert(0); //ATC
sewardj606c4ba2014-01-26 19:11:14 +00004414 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
4415 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004416 }
4417 break;
4418 case 2: /* 32 bit */
4419 if (isLD) {
4420 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00004421 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
4422 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004423 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004424 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
4425 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004426 }
4427 break;
4428 case 3: /* 64 bit */
4429 if (isLD) {
4430 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00004431 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
4432 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004433 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004434 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
4435 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004436 }
4437 break;
4438 case 4: return False; //ATC
4439 default: vassert(0);
4440 }
4441 return True;
4442 }
4443 after_LDR_STR_vector_register:
4444
4445 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
4446 /* 31 29 22 20 15 12 11 9 4
4447 | | | | | | | | |
4448 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
4449
4450 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
4451 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
4452
4453 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
4454 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
4455 */
4456 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
4457 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
4458 HChar dis_buf[64];
4459 UInt szLg2 = INSN(31,30);
4460 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
4461 UInt tt = INSN(4,0);
4462 if (szLg2 == 3) goto after_LDRS_integer_register;
4463 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
4464 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
4465 /* Enumerate the 5 variants explicitly. */
4466 if (szLg2 == 2/*32 bit*/ && sxTo64) {
4467 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
4468 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
4469 return True;
4470 }
4471 else
4472 if (szLg2 == 1/*16 bit*/) {
4473 if (sxTo64) {
4474 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
4475 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
4476 } else {
4477 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
4478 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
4479 }
4480 return True;
4481 }
4482 else
4483 if (szLg2 == 0/*8 bit*/) {
4484 if (sxTo64) {
4485 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
4486 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
4487 } else {
4488 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
4489 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
4490 }
4491 return True;
4492 }
4493 /* else it's an invalid combination */
4494 }
4495 after_LDRS_integer_register:
4496
4497 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
4498 /* This is the Unsigned offset variant only. The Post-Index and
4499 Pre-Index variants are below.
4500
4501 31 29 23 21 9 4
4502 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
4503 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
4504 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
4505 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
4506 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
4507
4508 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
4509 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
4510 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
4511 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
4512 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
4513 */
4514 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
4515 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
4516 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
4517 Bool isLD = INSN(22,22) == 1;
4518 UInt pimm12 = INSN(21,10) << szLg2;
4519 UInt nn = INSN(9,5);
4520 UInt tt = INSN(4,0);
4521 IRTemp tEA = newTemp(Ity_I64);
4522 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
4523 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
4524 if (isLD) {
4525 if (szLg2 < 4) {
4526 putQReg128(tt, mkV128(0x0000));
4527 }
sewardj606c4ba2014-01-26 19:11:14 +00004528 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00004529 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004530 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004531 }
4532 DIP("%s %s, [%s, #%u]\n",
4533 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00004534 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
sewardjbbcf1882014-01-12 12:49:10 +00004535 return True;
4536 }
4537
4538 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
4539 /* These are the Post-Index and Pre-Index variants.
4540
4541 31 29 23 20 11 9 4
4542 (at-Rn-then-Rn=EA)
4543 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
4544 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
4545 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
4546 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
4547 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
4548
4549 (at-EA-then-Rn=EA)
4550 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
4551 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
4552 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
4553 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
4554 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
4555
4556 Stores are the same except with bit 22 set to 0.
4557 */
4558 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
4559 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
4560 && INSN(21,21) == 0 && INSN(10,10) == 1) {
4561 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
4562 Bool isLD = INSN(22,22) == 1;
4563 UInt imm9 = INSN(20,12);
4564 Bool atRN = INSN(11,11) == 0;
4565 UInt nn = INSN(9,5);
4566 UInt tt = INSN(4,0);
4567 IRTemp tRN = newTemp(Ity_I64);
4568 IRTemp tEA = newTemp(Ity_I64);
4569 IRTemp tTA = IRTemp_INVALID;
4570 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
4571 ULong simm9 = sx_to_64(imm9, 9);
4572 assign(tRN, getIReg64orSP(nn));
4573 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4574 tTA = atRN ? tRN : tEA;
4575 if (isLD) {
4576 if (szLg2 < 4) {
4577 putQReg128(tt, mkV128(0x0000));
4578 }
sewardj606c4ba2014-01-26 19:11:14 +00004579 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
sewardjbbcf1882014-01-12 12:49:10 +00004580 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004581 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004582 }
4583 putIReg64orSP(nn, mkexpr(tEA));
4584 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
4585 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00004586 nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
sewardjbbcf1882014-01-12 12:49:10 +00004587 return True;
4588 }
4589
4590 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
4591 /* 31 29 23 20 11 9 4
4592 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
4593 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
4594 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
4595 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
4596 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
4597
4598 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
4599 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
4600 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
4601 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
4602 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
4603 */
4604 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
4605 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
4606 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
4607 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
4608 Bool isLD = INSN(22,22) == 1;
4609 UInt imm9 = INSN(20,12);
4610 UInt nn = INSN(9,5);
4611 UInt tt = INSN(4,0);
4612 ULong simm9 = sx_to_64(imm9, 9);
4613 IRTemp tEA = newTemp(Ity_I64);
4614 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
4615 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
4616 if (isLD) {
sewardj606c4ba2014-01-26 19:11:14 +00004617 if (szLg2 < 4) {
4618 putQReg128(tt, mkV128(0x0000));
4619 }
4620 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00004621 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004622 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004623 }
4624 DIP("%s %s, [%s, #%lld]\n",
4625 isLD ? "ldur" : "stur",
sewardj606c4ba2014-01-26 19:11:14 +00004626 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
sewardjbbcf1882014-01-12 12:49:10 +00004627 return True;
4628 }
4629
4630 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
4631 /* 31 29 23 4
4632 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
4633 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
4634 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
4635 */
4636 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
4637 UInt szB = 4 << INSN(31,30);
4638 UInt imm19 = INSN(23,5);
4639 UInt tt = INSN(4,0);
4640 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
4641 IRType ty = preferredVectorSubTypeFromSize(szB);
sewardj606c4ba2014-01-26 19:11:14 +00004642 putQReg128(tt, mkV128(0x0000));
4643 putQRegLO(tt, loadLE(ty, mkU64(ea)));
4644 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
sewardjbbcf1882014-01-12 12:49:10 +00004645 return True;
4646 }
4647
sewardj606c4ba2014-01-26 19:11:14 +00004648 /* ---------- LD1/ST1 (single structure, no offset) ---------- */
sewardjbbcf1882014-01-12 12:49:10 +00004649 /* 31 23
sewardj606c4ba2014-01-26 19:11:14 +00004650 0100 1100 0100 0000 0111 11 N T LD1 {vT.2d}, [Xn|SP]
4651 0100 1100 0000 0000 0111 11 N T ST1 {vT.2d}, [Xn|SP]
4652 0100 1100 0100 0000 0111 10 N T LD1 {vT.4s}, [Xn|SP]
4653 0100 1100 0000 0000 0111 10 N T ST1 {vT.4s}, [Xn|SP]
4654 0100 1100 0100 0000 0111 01 N T LD1 {vT.8h}, [Xn|SP]
4655 0100 1100 0000 0000 0111 01 N T ST1 {vT.8h}, [Xn|SP]
sewardjbbcf1882014-01-12 12:49:10 +00004656 0100 1100 0100 0000 0111 00 N T LD1 {vT.16b}, [Xn|SP]
4657 0100 1100 0000 0000 0111 00 N T ST1 {vT.16b}, [Xn|SP]
sewardj606c4ba2014-01-26 19:11:14 +00004658 FIXME does this assume that the host is little endian?
sewardjbbcf1882014-01-12 12:49:10 +00004659 */
sewardj606c4ba2014-01-26 19:11:14 +00004660 if ( (insn & 0xFFFFF000) == 0x4C407000 // LD1 cases
4661 || (insn & 0xFFFFF000) == 0x4C007000 // ST1 cases
sewardjbbcf1882014-01-12 12:49:10 +00004662 ) {
4663 Bool isLD = INSN(22,22) == 1;
4664 UInt rN = INSN(9,5);
4665 UInt vT = INSN(4,0);
4666 IRTemp tEA = newTemp(Ity_I64);
sewardj606c4ba2014-01-26 19:11:14 +00004667 const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4668 const HChar* name = names[INSN(11,10)];
sewardjbbcf1882014-01-12 12:49:10 +00004669 assign(tEA, getIReg64orSP(rN));
4670 if (rN == 31) { /* FIXME generate stack alignment check */ }
4671 if (isLD) {
4672 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4673 } else {
4674 storeLE(mkexpr(tEA), getQReg128(vT));
4675 }
4676 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
sewardj606c4ba2014-01-26 19:11:14 +00004677 vT, name, nameIReg64orSP(rN));
sewardjbbcf1882014-01-12 12:49:10 +00004678 return True;
4679 }
4680
sewardj606c4ba2014-01-26 19:11:14 +00004681 /* 31 23
4682 0000 1100 0100 0000 0111 11 N T LD1 {vT.1d}, [Xn|SP]
4683 0000 1100 0000 0000 0111 11 N T ST1 {vT.1d}, [Xn|SP]
4684 0000 1100 0100 0000 0111 10 N T LD1 {vT.2s}, [Xn|SP]
4685 0000 1100 0000 0000 0111 10 N T ST1 {vT.2s}, [Xn|SP]
4686 0000 1100 0100 0000 0111 01 N T LD1 {vT.4h}, [Xn|SP]
4687 0000 1100 0000 0000 0111 01 N T ST1 {vT.4h}, [Xn|SP]
4688 0000 1100 0100 0000 0111 00 N T LD1 {vT.8b}, [Xn|SP]
4689 0000 1100 0000 0000 0111 00 N T ST1 {vT.8b}, [Xn|SP]
4690 FIXME does this assume that the host is little endian?
4691 */
4692 if ( (insn & 0xFFFFF000) == 0x0C407000 // LD1 cases
4693 || (insn & 0xFFFFF000) == 0x0C007000 // ST1 cases
4694 ) {
4695 Bool isLD = INSN(22,22) == 1;
4696 UInt rN = INSN(9,5);
4697 UInt vT = INSN(4,0);
4698 IRTemp tEA = newTemp(Ity_I64);
4699 const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4700 const HChar* name = names[INSN(11,10)];
4701 assign(tEA, getIReg64orSP(rN));
4702 if (rN == 31) { /* FIXME generate stack alignment check */ }
4703 if (isLD) {
4704 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4705 putQRegLane(vT, 1, mkU64(0));
4706 } else {
4707 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4708 }
4709 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
4710 vT, name, nameIReg64orSP(rN));
4711 return True;
4712 }
4713
4714 /* ---------- LD1/ST1 (single structure, post index) ---------- */
4715 /* 31 23
sewardj7d009132014-02-20 17:43:38 +00004716 0100 1100 1001 1111 0111 11 N T ST1 {vT.2d}, [xN|SP], #16
4717 0100 1100 1101 1111 0111 11 N T LD1 {vT.2d}, [xN|SP], #16
4718 0100 1100 1001 1111 0111 10 N T ST1 {vT.4s}, [xN|SP], #16
4719 0100 1100 1101 1111 0111 10 N T LD1 {vT.4s}, [xN|SP], #16
4720 0100 1100 1001 1111 0111 01 N T ST1 {vT.8h}, [xN|SP], #16
4721 0100 1100 1101 1111 0111 01 N T LD1 {vT.8h}, [xN|SP], #16
4722 0100 1100 1001 1111 0111 00 N T ST1 {vT.16b}, [xN|SP], #16
sewardjf5b08912014-02-06 12:57:58 +00004723 0100 1100 1101 1111 0111 00 N T LD1 {vT.16b}, [xN|SP], #16
sewardj606c4ba2014-01-26 19:11:14 +00004724 Note that #16 is implied and cannot be any other value.
4725 FIXME does this assume that the host is little endian?
4726 */
sewardj7d009132014-02-20 17:43:38 +00004727 if ( (insn & 0xFFFFF000) == 0x4CDF7000 // LD1 cases
4728 || (insn & 0xFFFFF000) == 0x4C9F7000 // ST1 cases
sewardj606c4ba2014-01-26 19:11:14 +00004729 ) {
4730 Bool isLD = INSN(22,22) == 1;
4731 UInt rN = INSN(9,5);
4732 UInt vT = INSN(4,0);
4733 IRTemp tEA = newTemp(Ity_I64);
4734 const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4735 const HChar* name = names[INSN(11,10)];
4736 assign(tEA, getIReg64orSP(rN));
4737 if (rN == 31) { /* FIXME generate stack alignment check */ }
4738 if (isLD) {
4739 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4740 } else {
4741 storeLE(mkexpr(tEA), getQReg128(vT));
4742 }
4743 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(16)));
4744 DIP("%s {v%u.%s}, [%s], #16\n", isLD ? "ld1" : "st1",
4745 vT, name, nameIReg64orSP(rN));
4746 return True;
4747 }
4748
sewardj950ca7a2014-04-03 23:03:32 +00004749 /* 31 23
4750 0000 1100 1001 1111 0111 11 N T ST1 {vT.1d}, [xN|SP], #8
4751 0000 1100 1101 1111 0111 11 N T LD1 {vT.1d}, [xN|SP], #8
sewardj606c4ba2014-01-26 19:11:14 +00004752 0000 1100 1001 1111 0111 10 N T ST1 {vT.2s}, [xN|SP], #8
sewardj950ca7a2014-04-03 23:03:32 +00004753 0000 1100 1101 1111 0111 10 N T LD1 {vT.2s}, [xN|SP], #8
sewardjf5b08912014-02-06 12:57:58 +00004754 0000 1100 1001 1111 0111 01 N T ST1 {vT.4h}, [xN|SP], #8
sewardj950ca7a2014-04-03 23:03:32 +00004755 0000 1100 1101 1111 0111 01 N T LD1 {vT.4h}, [xN|SP], #8
4756 0000 1100 1001 1111 0111 00 N T ST1 {vT.8b}, [xN|SP], #8
4757 0000 1100 1101 1111 0111 00 N T LD1 {vT.8b}, [xN|SP], #8
sewardj606c4ba2014-01-26 19:11:14 +00004758 Note that #8 is implied and cannot be any other value.
4759 FIXME does this assume that the host is little endian?
4760 */
sewardj950ca7a2014-04-03 23:03:32 +00004761 if ( (insn & 0xFFFFF000) == 0x0CDF7000 // LD1 cases
4762 || (insn & 0xFFFFF000) == 0x0C9F7000 // ST1 cases
sewardj606c4ba2014-01-26 19:11:14 +00004763 ) {
sewardj950ca7a2014-04-03 23:03:32 +00004764 Bool isLD = INSN(22,22) == 1;
sewardj606c4ba2014-01-26 19:11:14 +00004765 UInt rN = INSN(9,5);
4766 UInt vT = INSN(4,0);
4767 IRTemp tEA = newTemp(Ity_I64);
4768 const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4769 const HChar* name = names[INSN(11,10)];
4770 assign(tEA, getIReg64orSP(rN));
4771 if (rN == 31) { /* FIXME generate stack alignment check */ }
sewardj950ca7a2014-04-03 23:03:32 +00004772 if (isLD) {
4773 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4774 putQRegLane(vT, 1, mkU64(0));
4775 } else {
4776 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4777 }
sewardj606c4ba2014-01-26 19:11:14 +00004778 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(8)));
sewardj950ca7a2014-04-03 23:03:32 +00004779 DIP("%s {v%u.%s}, [%s], #8\n", isLD ? "ld1" : "st1",
4780 vT, name, nameIReg64orSP(rN));
4781 return True;
4782 }
4783
sewardj18bf5172014-06-14 18:05:30 +00004784 /* ---------- LD1R (single structure, replicate) ---------- */
4785 /* 31 29 22 20 15 11 9 4
4786 0q 001 1010 10 00000 110 0 sz n t LD1R Vt.T, [Xn|SP]
4787 0q 001 1011 10 m 110 0 sz n t LD1R Vt.T, [Xn|SP], #sz (m=11111)
4788 , Xm (m!=11111)
4789 */
4790 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
4791 && INSN(22,21) == BITS2(1,0) && INSN(15,12) == BITS4(1,1,0,0)) {
sewardjdf9d6d52014-06-27 10:43:22 +00004792 UInt bitQ = INSN(30,30);
sewardj18bf5172014-06-14 18:05:30 +00004793 Bool isPX = INSN(23,23) == 1;
4794 UInt mm = INSN(20,16);
4795 UInt sz = INSN(11,10);
4796 UInt nn = INSN(9,5);
4797 UInt tt = INSN(4,0);
4798 IRType ty = integerIRTypeOfSize(1 << sz);
4799 IRTemp tEA = newTemp(Ity_I64);
4800 assign(tEA, getIReg64orSP(nn));
4801 if (nn == 31) { /* FIXME generate stack alignment check */ }
4802 IRTemp loaded = newTemp(ty);
4803 assign(loaded, loadLE(ty, mkexpr(tEA)));
4804 IRTemp dupd = math_DUP_TO_V128(loaded, ty);
sewardjdf9d6d52014-06-27 10:43:22 +00004805 putQReg128(tt, math_MAYBE_ZERO_HI64(bitQ, dupd));
4806 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
sewardj18bf5172014-06-14 18:05:30 +00004807 /* Deal with the writeback, if any. */
4808 if (!isPX && mm == BITS5(0,0,0,0,0)) {
4809 /* No writeback. */
4810 DIP("ld1r v%u.%s, [%s]\n", tt, arr, nameIReg64orSP(nn));
4811 return True;
4812 }
4813 if (isPX) {
4814 putIReg64orSP(nn, binop(Iop_Add64, mkexpr(tEA),
4815 mm == BITS5(1,1,1,1,1) ? mkU64(1 << sz)
4816 : getIReg64orZR(mm)));
4817 if (mm == BITS5(1,1,1,1,1)) {
4818 DIP("ld1r v%u.%s, [%s], %s\n", tt, arr,
4819 nameIReg64orSP(nn), nameIReg64orZR(mm));
4820 } else {
4821 DIP("ld1r v%u.%s, [%s], #%u\n", tt, arr,
4822 nameIReg64orSP(nn), 1 << sz);
4823 }
4824 return True;
4825 }
4826 return False;
4827 }
4828
sewardj168c8bd2014-06-25 13:05:23 +00004829 /* -------- LD2/ST2 (multi 2-elem structs, 2 regs, post index) -------- */
sewardj950ca7a2014-04-03 23:03:32 +00004830 /* Only a very few cases. */
4831 /* 31 23 11 9 4
4832 0100 1100 1101 1111 1000 11 n t LD2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4833 0100 1100 1001 1111 1000 11 n t ST2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4834 0100 1100 1101 1111 1000 10 n t LD2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4835 0100 1100 1001 1111 1000 10 n t ST2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4836 */
4837 if ( (insn & 0xFFFFFC00) == 0x4CDF8C00 // LD2 .2d
4838 || (insn & 0xFFFFFC00) == 0x4C9F8C00 // ST2 .2d
4839 || (insn & 0xFFFFFC00) == 0x4CDF8800 // LD2 .4s
4840 || (insn & 0xFFFFFC00) == 0x4C9F8800 // ST2 .4s
4841 ) {
4842 Bool isLD = INSN(22,22) == 1;
4843 UInt rN = INSN(9,5);
4844 UInt vT = INSN(4,0);
4845 IRTemp tEA = newTemp(Ity_I64);
4846 UInt sz = INSN(11,10);
4847 const HChar* name = "??";
4848 assign(tEA, getIReg64orSP(rN));
4849 if (rN == 31) { /* FIXME generate stack alignment check */ }
4850 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4851 IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8));
4852 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4853 IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24));
4854 if (sz == BITS2(1,1)) {
4855 name = "2d";
4856 if (isLD) {
4857 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0));
4858 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_16));
4859 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8));
4860 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_24));
4861 } else {
4862 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64));
4863 storeLE(tEA_16, getQRegLane((vT+0) % 32, 1, Ity_I64));
4864 storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64));
4865 storeLE(tEA_24, getQRegLane((vT+1) % 32, 1, Ity_I64));
4866 }
4867 }
4868 else if (sz == BITS2(1,0)) {
4869 /* Uh, this is ugly. TODO: better. */
4870 name = "4s";
4871 IRExpr* tEA_4 = binop(Iop_Add64, mkexpr(tEA), mkU64(4));
4872 IRExpr* tEA_12 = binop(Iop_Add64, mkexpr(tEA), mkU64(12));
4873 IRExpr* tEA_20 = binop(Iop_Add64, mkexpr(tEA), mkU64(20));
4874 IRExpr* tEA_28 = binop(Iop_Add64, mkexpr(tEA), mkU64(28));
4875 if (isLD) {
4876 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I32, tEA_0));
4877 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I32, tEA_8));
4878 putQRegLane((vT+0) % 32, 2, loadLE(Ity_I32, tEA_16));
4879 putQRegLane((vT+0) % 32, 3, loadLE(Ity_I32, tEA_24));
4880 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I32, tEA_4));
4881 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I32, tEA_12));
4882 putQRegLane((vT+1) % 32, 2, loadLE(Ity_I32, tEA_20));
4883 putQRegLane((vT+1) % 32, 3, loadLE(Ity_I32, tEA_28));
4884 } else {
4885 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I32));
4886 storeLE(tEA_8, getQRegLane((vT+0) % 32, 1, Ity_I32));
4887 storeLE(tEA_16, getQRegLane((vT+0) % 32, 2, Ity_I32));
4888 storeLE(tEA_24, getQRegLane((vT+0) % 32, 3, Ity_I32));
4889 storeLE(tEA_4, getQRegLane((vT+1) % 32, 0, Ity_I32));
4890 storeLE(tEA_12, getQRegLane((vT+1) % 32, 1, Ity_I32));
4891 storeLE(tEA_20, getQRegLane((vT+1) % 32, 2, Ity_I32));
4892 storeLE(tEA_28, getQRegLane((vT+1) % 32, 3, Ity_I32));
4893 }
4894 }
4895 else {
4896 vassert(0); // Can't happen.
4897 }
4898 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32)));
4899 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld2" : "st2",
4900 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
4901 return True;
4902 }
4903
sewardj39f754d2014-06-24 10:26:52 +00004904 /* -------- LD1/ST1 (multi 1-elem structs, 2 regs, no offset) -------- */
sewardj950ca7a2014-04-03 23:03:32 +00004905 /* Only a very few cases. */
4906 /* 31 23
4907 0100 1100 0100 0000 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4908 0100 1100 0000 0000 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4909 */
4910 if ( (insn & 0xFFFFFC00) == 0x4C40A000 // LD1
4911 || (insn & 0xFFFFFC00) == 0x4C00A000 // ST1
4912 ) {
4913 Bool isLD = INSN(22,22) == 1;
4914 UInt rN = INSN(9,5);
4915 UInt vT = INSN(4,0);
4916 IRTemp tEA = newTemp(Ity_I64);
4917 const HChar* name = "16b";
4918 assign(tEA, getIReg64orSP(rN));
4919 if (rN == 31) { /* FIXME generate stack alignment check */ }
4920 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4921 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4922 if (isLD) {
4923 putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
4924 putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
4925 } else {
4926 storeLE(tEA_0, getQReg128((vT+0) % 32));
4927 storeLE(tEA_16, getQReg128((vT+1) % 32));
4928 }
sewardj8a5ed542014-07-15 11:08:42 +00004929 DIP("%s {v%u.%s, v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
4930 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
4931 return True;
4932 }
4933
4934 /* -------- LD1/ST1 (multi 1-elem structs, 2 regs, post index) -------- */
4935 /* Only a very few cases. */
4936 /* 31 23
4937 0100 1100 1101 1111 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP], #32
4938 0100 1100 1001 1111 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP], #32
4939 */
4940 if ( (insn & 0xFFFFFC00) == 0x4CDFA000 // LD1
4941 || (insn & 0xFFFFFC00) == 0x4C9FA000 // ST1
4942 ) {
4943 Bool isLD = INSN(22,22) == 1;
4944 UInt rN = INSN(9,5);
4945 UInt vT = INSN(4,0);
4946 IRTemp tEA = newTemp(Ity_I64);
4947 const HChar* name = "16b";
4948 assign(tEA, getIReg64orSP(rN));
4949 if (rN == 31) { /* FIXME generate stack alignment check */ }
4950 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4951 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4952 if (isLD) {
4953 putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
4954 putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
4955 } else {
4956 storeLE(tEA_0, getQReg128((vT+0) % 32));
4957 storeLE(tEA_16, getQReg128((vT+1) % 32));
4958 }
4959 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32)));
sewardj950ca7a2014-04-03 23:03:32 +00004960 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld1" : "st1",
4961 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
sewardj606c4ba2014-01-26 19:11:14 +00004962 return True;
4963 }
4964
sewardj39f754d2014-06-24 10:26:52 +00004965 /* -------- LD1/ST1 (multi 1-elem structs, 3 regs, no offset) -------- */
4966 /* Only a very few cases. */
4967 /* 31 23
4968 0100 1100 0100 0000 0110 00 n t LD1 {Vt.16b .. V(t+2)%32.16b}, [Xn|SP]
4969 0100 1100 0000 0000 0110 00 n t ST1 {Vt.16b .. V(t+2)%32.16b}, [Xn|SP]
4970 */
4971 if ( (insn & 0xFFFFFC00) == 0x4C406000 // LD1
4972 || (insn & 0xFFFFFC00) == 0x4C006000 // ST1
4973 ) {
4974 Bool isLD = INSN(22,22) == 1;
4975 UInt rN = INSN(9,5);
4976 UInt vT = INSN(4,0);
4977 IRTemp tEA = newTemp(Ity_I64);
4978 const HChar* name = "16b";
4979 assign(tEA, getIReg64orSP(rN));
4980 if (rN == 31) { /* FIXME generate stack alignment check */ }
4981 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4982 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4983 IRExpr* tEA_32 = binop(Iop_Add64, mkexpr(tEA), mkU64(32));
4984 if (isLD) {
4985 putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
4986 putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
4987 putQReg128((vT+2) % 32, loadLE(Ity_V128, tEA_32));
4988 } else {
4989 storeLE(tEA_0, getQReg128((vT+0) % 32));
4990 storeLE(tEA_16, getQReg128((vT+1) % 32));
4991 storeLE(tEA_32, getQReg128((vT+2) % 32));
4992 }
4993 DIP("%s {v%u.%s, v%u.%s, v%u.%s}, [%s], #32\n",
4994 isLD ? "ld1" : "st1",
4995 (vT+0) % 32, name, (vT+1) % 32, name, (vT+2) % 32, name,
4996 nameIReg64orSP(rN));
4997 return True;
4998 }
4999
sewardj168c8bd2014-06-25 13:05:23 +00005000 /* -------- LD3/ST3 (multi 3-elem structs, 3 regs, post index) -------- */
5001 /* Only a very few cases. */
5002 /* 31 23 11 9 4
5003 0100 1100 1101 1111 0100 11 n t LD3 {Vt.2d .. V(t+2)%32.2d}, [Xn|SP], #48
5004 0100 1100 1001 1111 0100 11 n t ST3 {Vt.2d .. V(t+2)%32.2d}, [Xn|SP], #48
5005 */
5006 if ( (insn & 0xFFFFFC00) == 0x4CDF4C00 // LD3 .2d
5007 || (insn & 0xFFFFFC00) == 0x4C9F4C00 // ST3 .2d
5008 ) {
5009 Bool isLD = INSN(22,22) == 1;
5010 UInt rN = INSN(9,5);
5011 UInt vT = INSN(4,0);
5012 IRTemp tEA = newTemp(Ity_I64);
5013 UInt sz = INSN(11,10);
5014 const HChar* name = "??";
5015 assign(tEA, getIReg64orSP(rN));
5016 if (rN == 31) { /* FIXME generate stack alignment check */ }
5017 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
5018 IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8));
5019 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
5020 IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24));
5021 IRExpr* tEA_32 = binop(Iop_Add64, mkexpr(tEA), mkU64(32));
5022 IRExpr* tEA_40 = binop(Iop_Add64, mkexpr(tEA), mkU64(40));
5023 if (sz == BITS2(1,1)) {
5024 name = "2d";
5025 if (isLD) {
5026 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0));
5027 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_24));
5028 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8));
5029 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_32));
5030 putQRegLane((vT+2) % 32, 0, loadLE(Ity_I64, tEA_16));
5031 putQRegLane((vT+2) % 32, 1, loadLE(Ity_I64, tEA_40));
5032 } else {
5033 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64));
5034 storeLE(tEA_24, getQRegLane((vT+0) % 32, 1, Ity_I64));
5035 storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64));
5036 storeLE(tEA_32, getQRegLane((vT+1) % 32, 1, Ity_I64));
5037 storeLE(tEA_16, getQRegLane((vT+2) % 32, 0, Ity_I64));
5038 storeLE(tEA_40, getQRegLane((vT+2) % 32, 1, Ity_I64));
5039 }
5040 }
5041 else {
5042 vassert(0); // Can't happen.
5043 }
5044 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(48)));
5045 DIP("%s {v%u.%s, v%u.%s, v%u.%s}, [%s], #32\n",
5046 isLD ? "ld3" : "st3",
5047 (vT+0) % 32, name, (vT+1) % 32, name, (vT+2) % 32, name,
5048 nameIReg64orSP(rN));
5049 return True;
5050 }
5051
sewardj7d009132014-02-20 17:43:38 +00005052 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
5053 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
5054 /* 31 29 23 20 14 9 4
5055 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
5056 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
5057 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
5058 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
sewardjbbcf1882014-01-12 12:49:10 +00005059 */
sewardj7d009132014-02-20 17:43:38 +00005060 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
5061 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
5062 && INSN(14,10) == BITS5(1,1,1,1,1)) {
sewardjdc9259c2014-02-27 11:10:19 +00005063 UInt szBlg2 = INSN(31,30);
5064 Bool isLD = INSN(22,22) == 1;
5065 Bool isAcqOrRel = INSN(15,15) == 1;
5066 UInt ss = INSN(20,16);
5067 UInt nn = INSN(9,5);
5068 UInt tt = INSN(4,0);
sewardjbbcf1882014-01-12 12:49:10 +00005069
sewardjdc9259c2014-02-27 11:10:19 +00005070 vassert(szBlg2 < 4);
5071 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
5072 IRType ty = integerIRTypeOfSize(szB);
5073 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
sewardj7d009132014-02-20 17:43:38 +00005074
sewardjdc9259c2014-02-27 11:10:19 +00005075 IRTemp ea = newTemp(Ity_I64);
5076 assign(ea, getIReg64orSP(nn));
5077 /* FIXME generate check that ea is szB-aligned */
sewardj7d009132014-02-20 17:43:38 +00005078
sewardjdc9259c2014-02-27 11:10:19 +00005079 if (isLD && ss == BITS5(1,1,1,1,1)) {
5080 IRTemp res = newTemp(ty);
5081 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
5082 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
5083 if (isAcqOrRel) {
5084 stmt(IRStmt_MBE(Imbe_Fence));
5085 }
5086 DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
5087 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
5088 return True;
5089 }
5090 if (!isLD) {
5091 if (isAcqOrRel) {
5092 stmt(IRStmt_MBE(Imbe_Fence));
5093 }
5094 IRTemp res = newTemp(Ity_I1);
5095 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
5096 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
5097 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
5098 Need to set rS to 1 on failure, 0 on success. */
5099 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
5100 mkU64(1)));
5101 DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
5102 nameIRegOrZR(False, ss),
5103 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
5104 return True;
5105 }
5106 /* else fall through */
5107 }
5108
5109 /* ------------------ LDA{R,RH,RB} ------------------ */
5110 /* ------------------ STL{R,RH,RB} ------------------ */
5111 /* 31 29 23 20 14 9 4
5112 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
5113 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
5114 */
5115 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
5116 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
5117 UInt szBlg2 = INSN(31,30);
5118 Bool isLD = INSN(22,22) == 1;
5119 UInt nn = INSN(9,5);
5120 UInt tt = INSN(4,0);
5121
5122 vassert(szBlg2 < 4);
5123 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
5124 IRType ty = integerIRTypeOfSize(szB);
5125 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
5126
5127 IRTemp ea = newTemp(Ity_I64);
5128 assign(ea, getIReg64orSP(nn));
5129 /* FIXME generate check that ea is szB-aligned */
5130
5131 if (isLD) {
5132 IRTemp res = newTemp(ty);
5133 assign(res, loadLE(ty, mkexpr(ea)));
5134 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
5135 stmt(IRStmt_MBE(Imbe_Fence));
5136 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
5137 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
5138 } else {
5139 stmt(IRStmt_MBE(Imbe_Fence));
5140 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
5141 storeLE(mkexpr(ea), data);
5142 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
5143 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
5144 }
5145 return True;
sewardjbbcf1882014-01-12 12:49:10 +00005146 }
5147
5148 vex_printf("ARM64 front end: load_store\n");
5149 return False;
5150# undef INSN
5151}
5152
5153
5154/*------------------------------------------------------------*/
5155/*--- Control flow and misc instructions ---*/
5156/*------------------------------------------------------------*/
5157
5158static
sewardj65902992014-05-03 21:20:56 +00005159Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
5160 VexArchInfo* archinfo)
sewardjbbcf1882014-01-12 12:49:10 +00005161{
5162# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
5163
5164 /* ---------------------- B cond ----------------------- */
5165 /* 31 24 4 3
5166 0101010 0 imm19 0 cond */
5167 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
5168 UInt cond = INSN(3,0);
5169 ULong uimm64 = INSN(23,5) << 2;
5170 Long simm64 = (Long)sx_to_64(uimm64, 21);
5171 vassert(dres->whatNext == Dis_Continue);
5172 vassert(dres->len == 4);
5173 vassert(dres->continueAt == 0);
5174 vassert(dres->jk_StopHere == Ijk_INVALID);
5175 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
5176 Ijk_Boring,
5177 IRConst_U64(guest_PC_curr_instr + simm64),
5178 OFFB_PC) );
5179 putPC(mkU64(guest_PC_curr_instr + 4));
5180 dres->whatNext = Dis_StopHere;
5181 dres->jk_StopHere = Ijk_Boring;
5182 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
5183 return True;
5184 }
5185
5186 /* -------------------- B{L} uncond -------------------- */
5187 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
5188 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
5189 100101 imm26 B (PC + sxTo64(imm26 << 2))
5190 */
5191 UInt bLink = INSN(31,31);
5192 ULong uimm64 = INSN(25,0) << 2;
5193 Long simm64 = (Long)sx_to_64(uimm64, 28);
5194 if (bLink) {
5195 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
5196 }
5197 putPC(mkU64(guest_PC_curr_instr + simm64));
5198 dres->whatNext = Dis_StopHere;
5199 dres->jk_StopHere = Ijk_Call;
5200 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
5201 guest_PC_curr_instr + simm64);
5202 return True;
5203 }
5204
5205 /* --------------------- B{L} reg --------------------- */
5206 /* 31 24 22 20 15 9 4
5207 1101011 00 10 11111 000000 nn 00000 RET Rn
5208 1101011 00 01 11111 000000 nn 00000 CALL Rn
5209 1101011 00 00 11111 000000 nn 00000 JMP Rn
5210 */
5211 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
5212 && INSN(20,16) == BITS5(1,1,1,1,1)
5213 && INSN(15,10) == BITS6(0,0,0,0,0,0)
5214 && INSN(4,0) == BITS5(0,0,0,0,0)) {
5215 UInt branch_type = INSN(22,21);
5216 UInt nn = INSN(9,5);
5217 if (branch_type == BITS2(1,0) /* RET */) {
5218 putPC(getIReg64orZR(nn));
5219 dres->whatNext = Dis_StopHere;
5220 dres->jk_StopHere = Ijk_Ret;
5221 DIP("ret %s\n", nameIReg64orZR(nn));
5222 return True;
5223 }
5224 if (branch_type == BITS2(0,1) /* CALL */) {
sewardj702054e2014-05-07 11:09:28 +00005225 IRTemp dst = newTemp(Ity_I64);
5226 assign(dst, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005227 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
sewardj702054e2014-05-07 11:09:28 +00005228 putPC(mkexpr(dst));
sewardjbbcf1882014-01-12 12:49:10 +00005229 dres->whatNext = Dis_StopHere;
5230 dres->jk_StopHere = Ijk_Call;
5231 DIP("blr %s\n", nameIReg64orZR(nn));
5232 return True;
5233 }
5234 if (branch_type == BITS2(0,0) /* JMP */) {
5235 putPC(getIReg64orZR(nn));
5236 dres->whatNext = Dis_StopHere;
5237 dres->jk_StopHere = Ijk_Boring;
5238 DIP("jmp %s\n", nameIReg64orZR(nn));
5239 return True;
5240 }
5241 }
5242
5243 /* -------------------- CB{N}Z -------------------- */
5244 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
5245 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
5246 */
5247 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
5248 Bool is64 = INSN(31,31) == 1;
5249 Bool bIfZ = INSN(24,24) == 0;
5250 ULong uimm64 = INSN(23,5) << 2;
5251 UInt rT = INSN(4,0);
5252 Long simm64 = (Long)sx_to_64(uimm64, 21);
5253 IRExpr* cond = NULL;
5254 if (is64) {
5255 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
5256 getIReg64orZR(rT), mkU64(0));
5257 } else {
5258 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
5259 getIReg32orZR(rT), mkU32(0));
5260 }
5261 stmt( IRStmt_Exit(cond,
5262 Ijk_Boring,
5263 IRConst_U64(guest_PC_curr_instr + simm64),
5264 OFFB_PC) );
5265 putPC(mkU64(guest_PC_curr_instr + 4));
5266 dres->whatNext = Dis_StopHere;
5267 dres->jk_StopHere = Ijk_Boring;
5268 DIP("cb%sz %s, 0x%llx\n",
5269 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
5270 guest_PC_curr_instr + simm64);
5271 return True;
5272 }
5273
5274 /* -------------------- TB{N}Z -------------------- */
5275 /* 31 30 24 23 18 5 4
5276 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
5277 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
5278 */
5279 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
5280 UInt b5 = INSN(31,31);
5281 Bool bIfZ = INSN(24,24) == 0;
5282 UInt b40 = INSN(23,19);
5283 UInt imm14 = INSN(18,5);
5284 UInt tt = INSN(4,0);
5285 UInt bitNo = (b5 << 5) | b40;
5286 ULong uimm64 = imm14 << 2;
5287 Long simm64 = sx_to_64(uimm64, 16);
5288 IRExpr* cond
5289 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
5290 binop(Iop_And64,
5291 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
5292 mkU64(1)),
5293 mkU64(0));
5294 stmt( IRStmt_Exit(cond,
5295 Ijk_Boring,
5296 IRConst_U64(guest_PC_curr_instr + simm64),
5297 OFFB_PC) );
5298 putPC(mkU64(guest_PC_curr_instr + 4));
5299 dres->whatNext = Dis_StopHere;
5300 dres->jk_StopHere = Ijk_Boring;
5301 DIP("tb%sz %s, #%u, 0x%llx\n",
5302 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
5303 guest_PC_curr_instr + simm64);
5304 return True;
5305 }
5306
5307 /* -------------------- SVC -------------------- */
5308 /* 11010100 000 imm16 000 01
5309 Don't bother with anything except the imm16==0 case.
5310 */
5311 if (INSN(31,0) == 0xD4000001) {
5312 putPC(mkU64(guest_PC_curr_instr + 4));
5313 dres->whatNext = Dis_StopHere;
5314 dres->jk_StopHere = Ijk_Sys_syscall;
5315 DIP("svc #0\n");
5316 return True;
5317 }
5318
5319 /* ------------------ M{SR,RS} ------------------ */
sewardj6eb5ef82014-07-14 20:39:23 +00005320 /* ---- Cases for TPIDR_EL0 ----
sewardjbbcf1882014-01-12 12:49:10 +00005321 0xD51BD0 010 Rt MSR tpidr_el0, rT
5322 0xD53BD0 010 Rt MRS rT, tpidr_el0
5323 */
5324 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
5325 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
5326 Bool toSys = INSN(21,21) == 0;
5327 UInt tt = INSN(4,0);
5328 if (toSys) {
5329 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
5330 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
5331 } else {
5332 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
5333 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
5334 }
5335 return True;
5336 }
sewardj6eb5ef82014-07-14 20:39:23 +00005337 /* ---- Cases for FPCR ----
sewardjbbcf1882014-01-12 12:49:10 +00005338 0xD51B44 000 Rt MSR fpcr, rT
5339 0xD53B44 000 Rt MSR rT, fpcr
5340 */
5341 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
5342 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
5343 Bool toSys = INSN(21,21) == 0;
5344 UInt tt = INSN(4,0);
5345 if (toSys) {
5346 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
5347 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
5348 } else {
5349 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
5350 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
5351 }
5352 return True;
5353 }
sewardj6eb5ef82014-07-14 20:39:23 +00005354 /* ---- Cases for FPSR ----
sewardj7d009132014-02-20 17:43:38 +00005355 0xD51B44 001 Rt MSR fpsr, rT
5356 0xD53B44 001 Rt MSR rT, fpsr
sewardja0645d52014-06-28 22:11:16 +00005357 The only part of this we model is FPSR.QC. All other bits
5358 are ignored when writing to it and RAZ when reading from it.
sewardjbbcf1882014-01-12 12:49:10 +00005359 */
5360 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
5361 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
5362 Bool toSys = INSN(21,21) == 0;
5363 UInt tt = INSN(4,0);
5364 if (toSys) {
sewardja0645d52014-06-28 22:11:16 +00005365 /* Just deal with FPSR.QC. Make up a V128 value which is
5366 zero if Xt[27] is zero and any other value if Xt[27] is
5367 nonzero. */
5368 IRTemp qc64 = newTemp(Ity_I64);
5369 assign(qc64, binop(Iop_And64,
5370 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
5371 mkU64(1)));
5372 IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
5373 stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
sewardjbbcf1882014-01-12 12:49:10 +00005374 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
5375 } else {
sewardja0645d52014-06-28 22:11:16 +00005376 /* Generate a value which is all zeroes except for bit 27,
5377 which must be zero if QCFLAG is all zeroes and one otherwise. */
sewardj8e91fd42014-07-11 12:05:47 +00005378 IRTemp qcV128 = newTempV128();
sewardja0645d52014-06-28 22:11:16 +00005379 assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
5380 IRTemp qc64 = newTemp(Ity_I64);
5381 assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
5382 unop(Iop_V128to64, mkexpr(qcV128))));
5383 IRExpr* res = binop(Iop_Shl64,
5384 unop(Iop_1Uto64,
5385 binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
5386 mkU8(27));
5387 putIReg64orZR(tt, res);
sewardjbbcf1882014-01-12 12:49:10 +00005388 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
5389 }
5390 return True;
5391 }
sewardj6eb5ef82014-07-14 20:39:23 +00005392 /* ---- Cases for NZCV ----
sewardjbbcf1882014-01-12 12:49:10 +00005393 D51B42 000 Rt MSR nzcv, rT
5394 D53B42 000 Rt MRS rT, nzcv
sewardja0645d52014-06-28 22:11:16 +00005395 The only parts of NZCV that actually exist are bits 31:28, which
5396 are the N Z C and V bits themselves. Hence the flags thunk provides
5397 all the state we need.
sewardjbbcf1882014-01-12 12:49:10 +00005398 */
5399 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
5400 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
5401 Bool toSys = INSN(21,21) == 0;
5402 UInt tt = INSN(4,0);
5403 if (toSys) {
5404 IRTemp t = newTemp(Ity_I64);
5405 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
5406 setFlags_COPY(t);
5407 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
5408 } else {
5409 IRTemp res = newTemp(Ity_I64);
5410 assign(res, mk_arm64g_calculate_flags_nzcv());
5411 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
5412 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
5413 }
5414 return True;
5415 }
sewardj6eb5ef82014-07-14 20:39:23 +00005416 /* ---- Cases for DCZID_EL0 ----
sewardjd512d102014-02-21 14:49:44 +00005417 Don't support arbitrary reads and writes to this register. Just
5418 return the value 16, which indicates that the DC ZVA instruction
5419 is not permitted, so we don't have to emulate it.
5420 D5 3B 00 111 Rt MRS rT, dczid_el0
5421 */
5422 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
5423 UInt tt = INSN(4,0);
5424 putIReg64orZR(tt, mkU64(1<<4));
5425 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
5426 return True;
5427 }
sewardj6eb5ef82014-07-14 20:39:23 +00005428 /* ---- Cases for CTR_EL0 ----
sewardj65902992014-05-03 21:20:56 +00005429 We just handle reads, and make up a value from the D and I line
5430 sizes in the VexArchInfo we are given, and patch in the following
5431 fields that the Foundation model gives ("natively"):
5432 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
5433 D5 3B 00 001 Rt MRS rT, dczid_el0
5434 */
5435 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
5436 UInt tt = INSN(4,0);
5437 /* Need to generate a value from dMinLine_lg2_szB and
5438 dMinLine_lg2_szB. The value in the register is in 32-bit
5439 units, so need to subtract 2 from the values in the
5440 VexArchInfo. We can assume that the values here are valid --
5441 disInstr_ARM64 checks them -- so there's no need to deal with
5442 out-of-range cases. */
5443 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
5444 && archinfo->arm64_dMinLine_lg2_szB <= 17
5445 && archinfo->arm64_iMinLine_lg2_szB >= 2
5446 && archinfo->arm64_iMinLine_lg2_szB <= 17);
5447 UInt val
5448 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
5449 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
5450 putIReg64orZR(tt, mkU64(val));
5451 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
5452 return True;
5453 }
sewardj6eb5ef82014-07-14 20:39:23 +00005454 /* ---- Cases for CNTVCT_EL0 ----
5455 This is a timestamp counter of some sort. Support reads of it only
5456 by passing through to the host.
5457 D5 3B E0 010 Rt MRS Xt, cntvct_el0
5458 */
5459 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
5460 UInt tt = INSN(4,0);
5461 IRTemp val = newTemp(Ity_I64);
5462 IRExpr** args = mkIRExprVec_0();
5463 IRDirty* d = unsafeIRDirty_1_N (
5464 val,
5465 0/*regparms*/,
5466 "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
5467 &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
5468 args
5469 );
5470 /* execute the dirty call, dumping the result in val. */
5471 stmt( IRStmt_Dirty(d) );
5472 putIReg64orZR(tt, mkexpr(val));
5473 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
5474 return True;
5475 }
sewardjbbcf1882014-01-12 12:49:10 +00005476
sewardj65902992014-05-03 21:20:56 +00005477 /* ------------------ IC_IVAU ------------------ */
5478 /* D5 0B 75 001 Rt ic ivau, rT
5479 */
5480 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
5481 /* We will always be provided with a valid iMinLine value. */
5482 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
5483 && archinfo->arm64_iMinLine_lg2_szB <= 17);
5484 /* Round the requested address, in rT, down to the start of the
5485 containing block. */
5486 UInt tt = INSN(4,0);
5487 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
5488 IRTemp addr = newTemp(Ity_I64);
5489 assign( addr, binop( Iop_And64,
5490 getIReg64orZR(tt),
5491 mkU64(~(lineszB - 1))) );
5492 /* Set the invalidation range, request exit-and-invalidate, with
5493 continuation at the next instruction. */
sewardj05f5e012014-05-04 10:52:11 +00005494 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
5495 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj65902992014-05-03 21:20:56 +00005496 /* be paranoid ... */
5497 stmt( IRStmt_MBE(Imbe_Fence) );
5498 putPC(mkU64( guest_PC_curr_instr + 4 ));
5499 dres->whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +00005500 dres->jk_StopHere = Ijk_InvalICache;
sewardj65902992014-05-03 21:20:56 +00005501 DIP("ic ivau, %s\n", nameIReg64orZR(tt));
5502 return True;
5503 }
5504
5505 /* ------------------ DC_CVAU ------------------ */
5506 /* D5 0B 7B 001 Rt dc cvau, rT
5507 */
5508 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
5509 /* Exactly the same scheme as for IC IVAU, except we observe the
sewardj05f5e012014-05-04 10:52:11 +00005510 dMinLine size, and request an Ijk_FlushDCache instead of
5511 Ijk_InvalICache. */
sewardj65902992014-05-03 21:20:56 +00005512 /* We will always be provided with a valid dMinLine value. */
5513 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
5514 && archinfo->arm64_dMinLine_lg2_szB <= 17);
5515 /* Round the requested address, in rT, down to the start of the
5516 containing block. */
5517 UInt tt = INSN(4,0);
5518 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
5519 IRTemp addr = newTemp(Ity_I64);
5520 assign( addr, binop( Iop_And64,
5521 getIReg64orZR(tt),
5522 mkU64(~(lineszB - 1))) );
5523 /* Set the flush range, request exit-and-flush, with
5524 continuation at the next instruction. */
sewardj05f5e012014-05-04 10:52:11 +00005525 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
5526 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj65902992014-05-03 21:20:56 +00005527 /* be paranoid ... */
5528 stmt( IRStmt_MBE(Imbe_Fence) );
5529 putPC(mkU64( guest_PC_curr_instr + 4 ));
5530 dres->whatNext = Dis_StopHere;
5531 dres->jk_StopHere = Ijk_FlushDCache;
5532 DIP("dc cvau, %s\n", nameIReg64orZR(tt));
5533 return True;
5534 }
5535
5536 /* ------------------ ISB, DMB, DSB ------------------ */
sewardjbbcf1882014-01-12 12:49:10 +00005537 if (INSN(31,0) == 0xD5033FDF) {
sewardjd512d102014-02-21 14:49:44 +00005538 stmt(IRStmt_MBE(Imbe_Fence));
sewardjbbcf1882014-01-12 12:49:10 +00005539 DIP("isb\n");
5540 return True;
5541 }
5542 if (INSN(31,0) == 0xD5033BBF) {
sewardjd512d102014-02-21 14:49:44 +00005543 stmt(IRStmt_MBE(Imbe_Fence));
sewardjbbcf1882014-01-12 12:49:10 +00005544 DIP("dmb ish\n");
5545 return True;
5546 }
sewardjab102bd2014-06-04 11:44:45 +00005547 if (INSN(31,0) == 0xD5033ABF) {
5548 stmt(IRStmt_MBE(Imbe_Fence));
5549 DIP("dmb ishst\n");
5550 return True;
5551 }
sewardj65902992014-05-03 21:20:56 +00005552 if (INSN(31,0) == 0xD5033B9F) {
5553 stmt(IRStmt_MBE(Imbe_Fence));
5554 DIP("dsb ish\n");
5555 return True;
5556 }
sewardjbbcf1882014-01-12 12:49:10 +00005557
sewardjdc9259c2014-02-27 11:10:19 +00005558 /* -------------------- NOP -------------------- */
5559 if (INSN(31,0) == 0xD503201F) {
5560 DIP("nop\n");
5561 return True;
5562 }
5563
sewardjbbcf1882014-01-12 12:49:10 +00005564 //fail:
5565 vex_printf("ARM64 front end: branch_etc\n");
5566 return False;
5567# undef INSN
5568}
5569
5570
5571/*------------------------------------------------------------*/
sewardj8e91fd42014-07-11 12:05:47 +00005572/*--- SIMD and FP instructions: helper functions ---*/
sewardjbbcf1882014-01-12 12:49:10 +00005573/*------------------------------------------------------------*/
5574
sewardjd96daf62014-06-15 08:17:35 +00005575/* Some constructors for interleave/deinterleave expressions. */
sewardje520bb32014-02-17 11:00:53 +00005576
sewardjd96daf62014-06-15 08:17:35 +00005577static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
5578 // returns a0 b0
5579 return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
5580}
sewardje520bb32014-02-17 11:00:53 +00005581
sewardjd96daf62014-06-15 08:17:35 +00005582static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
5583 // returns a1 b1
5584 return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
5585}
sewardje520bb32014-02-17 11:00:53 +00005586
sewardjd96daf62014-06-15 08:17:35 +00005587static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
5588 // returns a2 a0 b2 b0
5589 return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
5590}
5591
5592static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
5593 // returns a3 a1 b3 b1
5594 return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
5595}
5596
5597static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
5598 // returns a1 b1 a0 b0
5599 return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
5600}
5601
5602static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
5603 // returns a3 b3 a2 b2
5604 return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
5605}
5606
5607static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
5608 // returns a6 a4 a2 a0 b6 b4 b2 b0
5609 return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
5610}
5611
5612static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
5613 // returns a7 a5 a3 a1 b7 b5 b3 b1
5614 return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
5615}
5616
5617static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
5618 // returns a3 b3 a2 b2 a1 b1 a0 b0
5619 return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
5620}
5621
5622static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
5623 // returns a7 b7 a6 b6 a5 b5 a4 b4
5624 return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
5625}
5626
5627static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
5628 IRTemp bFEDCBA9876543210 ) {
5629 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
5630 return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
5631 mkexpr(bFEDCBA9876543210));
5632}
5633
5634static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
5635 IRTemp bFEDCBA9876543210 ) {
5636 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
5637 return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
5638 mkexpr(bFEDCBA9876543210));
5639}
5640
5641static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
5642 IRTemp bFEDCBA9876543210 ) {
5643 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
5644 return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
5645 mkexpr(bFEDCBA9876543210));
5646}
5647
5648static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
5649 IRTemp bFEDCBA9876543210 ) {
5650 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
5651 return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
5652 mkexpr(bFEDCBA9876543210));
5653}
sewardjecde6972014-02-05 11:01:19 +00005654
sewardjbbcf1882014-01-12 12:49:10 +00005655/* Generate N copies of |bit| in the bottom of a ULong. */
5656static ULong Replicate ( ULong bit, Int N )
5657{
sewardj606c4ba2014-01-26 19:11:14 +00005658 vassert(bit <= 1 && N >= 1 && N < 64);
5659 if (bit == 0) {
5660 return 0;
5661 } else {
5662 /* Careful. This won't work for N == 64. */
5663 return (1ULL << N) - 1;
5664 }
sewardjbbcf1882014-01-12 12:49:10 +00005665}
5666
sewardjfab09142014-02-10 10:28:13 +00005667static ULong Replicate32x2 ( ULong bits32 )
5668{
5669 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
5670 return (bits32 << 32) | bits32;
5671}
5672
5673static ULong Replicate16x4 ( ULong bits16 )
5674{
5675 vassert(0 == (bits16 & ~0xFFFFULL));
5676 return Replicate32x2((bits16 << 16) | bits16);
5677}
5678
5679static ULong Replicate8x8 ( ULong bits8 )
5680{
5681 vassert(0 == (bits8 & ~0xFFULL));
5682 return Replicate16x4((bits8 << 8) | bits8);
5683}
5684
5685/* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
5686 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
5687 is 64. In the former case, the upper 32 bits of the returned value
5688 are guaranteed to be zero. */
sewardjbbcf1882014-01-12 12:49:10 +00005689static ULong VFPExpandImm ( ULong imm8, Int N )
5690{
sewardj606c4ba2014-01-26 19:11:14 +00005691 vassert(imm8 <= 0xFF);
5692 vassert(N == 32 || N == 64);
5693 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
5694 Int F = N - E - 1;
5695 ULong imm8_6 = (imm8 >> 6) & 1;
5696 /* sign: 1 bit */
5697 /* exp: E bits */
5698 /* frac: F bits */
5699 ULong sign = (imm8 >> 7) & 1;
5700 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
5701 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
5702 vassert(sign < (1ULL << 1));
5703 vassert(exp < (1ULL << E));
5704 vassert(frac < (1ULL << F));
5705 vassert(1 + E + F == N);
5706 ULong res = (sign << (E+F)) | (exp << F) | frac;
5707 return res;
sewardjbbcf1882014-01-12 12:49:10 +00005708}
5709
sewardjfab09142014-02-10 10:28:13 +00005710/* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
5711 This might fail, as indicated by the returned Bool. Page 2530 of
5712 the manual. */
5713static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
5714 UInt op, UInt cmode, UInt imm8 )
5715{
5716 vassert(op <= 1);
5717 vassert(cmode <= 15);
5718 vassert(imm8 <= 255);
5719
5720 *res = 0; /* will overwrite iff returning True */
5721
5722 ULong imm64 = 0;
5723 Bool testimm8 = False;
5724
5725 switch (cmode >> 1) {
5726 case 0:
5727 testimm8 = False; imm64 = Replicate32x2(imm8); break;
5728 case 1:
5729 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
5730 case 2:
5731 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
5732 case 3:
5733 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
5734 case 4:
5735 testimm8 = False; imm64 = Replicate16x4(imm8); break;
5736 case 5:
5737 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
5738 case 6:
5739 testimm8 = True;
5740 if ((cmode & 1) == 0)
5741 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
5742 else
5743 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
5744 break;
5745 case 7:
5746 testimm8 = False;
5747 if ((cmode & 1) == 0 && op == 0)
5748 imm64 = Replicate8x8(imm8);
5749 if ((cmode & 1) == 0 && op == 1) {
5750 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
5751 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
5752 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
5753 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
5754 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
5755 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
5756 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
5757 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
5758 }
5759 if ((cmode & 1) == 1 && op == 0) {
5760 ULong imm8_7 = (imm8 >> 7) & 1;
5761 ULong imm8_6 = (imm8 >> 6) & 1;
5762 ULong imm8_50 = imm8 & 63;
5763 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
5764 | ((imm8_6 ^ 1) << (5 + 6 + 19))
5765 | (Replicate(imm8_6, 5) << (6 + 19))
5766 | (imm8_50 << 19);
5767 imm64 = Replicate32x2(imm32);
5768 }
5769 if ((cmode & 1) == 1 && op == 1) {
5770 // imm64 = imm8<7>:NOT(imm8<6>)
5771 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
5772 ULong imm8_7 = (imm8 >> 7) & 1;
5773 ULong imm8_6 = (imm8 >> 6) & 1;
5774 ULong imm8_50 = imm8 & 63;
5775 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
5776 | (Replicate(imm8_6, 8) << 54)
5777 | (imm8_50 << 48);
5778 }
5779 break;
5780 default:
5781 vassert(0);
5782 }
5783
5784 if (testimm8 && imm8 == 0)
5785 return False;
5786
5787 *res = imm64;
5788 return True;
5789}
5790
sewardj606c4ba2014-01-26 19:11:14 +00005791/* Help a bit for decoding laneage for vector operations that can be
5792 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
5793 and SZ bits, typically for vector floating point. */
5794static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
5795 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
5796 /*OUT*/const HChar** arrSpec,
5797 Bool bitQ, Bool bitSZ )
5798{
5799 vassert(bitQ == True || bitQ == False);
5800 vassert(bitSZ == True || bitSZ == False);
5801 if (bitQ && bitSZ) { // 2x64
5802 if (tyI) *tyI = Ity_I64;
5803 if (tyF) *tyF = Ity_F64;
5804 if (nLanes) *nLanes = 2;
5805 if (zeroUpper) *zeroUpper = False;
5806 if (arrSpec) *arrSpec = "2d";
5807 return True;
5808 }
5809 if (bitQ && !bitSZ) { // 4x32
5810 if (tyI) *tyI = Ity_I32;
5811 if (tyF) *tyF = Ity_F32;
5812 if (nLanes) *nLanes = 4;
5813 if (zeroUpper) *zeroUpper = False;
5814 if (arrSpec) *arrSpec = "4s";
5815 return True;
5816 }
5817 if (!bitQ && !bitSZ) { // 2x32
5818 if (tyI) *tyI = Ity_I32;
5819 if (tyF) *tyF = Ity_F32;
5820 if (nLanes) *nLanes = 2;
5821 if (zeroUpper) *zeroUpper = True;
5822 if (arrSpec) *arrSpec = "2s";
5823 return True;
5824 }
5825 // Else impliedly 1x64, which isn't allowed.
5826 return False;
5827}
5828
sewardje520bb32014-02-17 11:00:53 +00005829/* Helper for decoding laneage for shift-style vector operations
5830 that involve an immediate shift amount. */
5831static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
5832 UInt immh, UInt immb )
5833{
5834 vassert(immh < (1<<4));
5835 vassert(immb < (1<<3));
5836 UInt immhb = (immh << 3) | immb;
5837 if (immh & 8) {
5838 if (shift) *shift = 128 - immhb;
5839 if (szBlg2) *szBlg2 = 3;
5840 return True;
5841 }
5842 if (immh & 4) {
5843 if (shift) *shift = 64 - immhb;
5844 if (szBlg2) *szBlg2 = 2;
5845 return True;
5846 }
5847 if (immh & 2) {
5848 if (shift) *shift = 32 - immhb;
5849 if (szBlg2) *szBlg2 = 1;
5850 return True;
5851 }
5852 if (immh & 1) {
5853 if (shift) *shift = 16 - immhb;
5854 if (szBlg2) *szBlg2 = 0;
5855 return True;
5856 }
5857 return False;
5858}
5859
sewardjecde6972014-02-05 11:01:19 +00005860/* Generate IR to fold all lanes of the V128 value in 'src' as
5861 characterised by the operator 'op', and return the result in the
5862 bottom bits of a V128, with all other bits set to zero. */
sewardjdf9d6d52014-06-27 10:43:22 +00005863static IRTemp math_FOLDV ( IRTemp src, IROp op )
sewardjecde6972014-02-05 11:01:19 +00005864{
5865 /* The basic idea is to use repeated applications of Iop_CatEven*
5866 and Iop_CatOdd* operators to 'src' so as to clone each lane into
5867 a complete vector. Then fold all those vectors with 'op' and
5868 zero out all but the least significant lane. */
5869 switch (op) {
5870 case Iop_Min8Sx16: case Iop_Min8Ux16:
sewardjb9aff1e2014-06-15 21:55:33 +00005871 case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
sewardjfab09142014-02-10 10:28:13 +00005872 /* NB: temp naming here is misleading -- the naming is for 8
5873 lanes of 16 bit, whereas what is being operated on is 16
5874 lanes of 8 bits. */
5875 IRTemp x76543210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00005876 IRTemp x76547654 = newTempV128();
5877 IRTemp x32103210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005878 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
5879 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
sewardj8e91fd42014-07-11 12:05:47 +00005880 IRTemp x76767676 = newTempV128();
5881 IRTemp x54545454 = newTempV128();
5882 IRTemp x32323232 = newTempV128();
5883 IRTemp x10101010 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005884 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
5885 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
5886 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
5887 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
sewardj8e91fd42014-07-11 12:05:47 +00005888 IRTemp x77777777 = newTempV128();
5889 IRTemp x66666666 = newTempV128();
5890 IRTemp x55555555 = newTempV128();
5891 IRTemp x44444444 = newTempV128();
5892 IRTemp x33333333 = newTempV128();
5893 IRTemp x22222222 = newTempV128();
5894 IRTemp x11111111 = newTempV128();
5895 IRTemp x00000000 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005896 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
5897 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
5898 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
5899 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
5900 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
5901 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
5902 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
5903 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
5904 /* Naming not misleading after here. */
sewardj8e91fd42014-07-11 12:05:47 +00005905 IRTemp xAllF = newTempV128();
5906 IRTemp xAllE = newTempV128();
5907 IRTemp xAllD = newTempV128();
5908 IRTemp xAllC = newTempV128();
5909 IRTemp xAllB = newTempV128();
5910 IRTemp xAllA = newTempV128();
5911 IRTemp xAll9 = newTempV128();
5912 IRTemp xAll8 = newTempV128();
5913 IRTemp xAll7 = newTempV128();
5914 IRTemp xAll6 = newTempV128();
5915 IRTemp xAll5 = newTempV128();
5916 IRTemp xAll4 = newTempV128();
5917 IRTemp xAll3 = newTempV128();
5918 IRTemp xAll2 = newTempV128();
5919 IRTemp xAll1 = newTempV128();
5920 IRTemp xAll0 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005921 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
5922 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
5923 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
5924 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
5925 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
5926 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
5927 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
5928 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
5929 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
5930 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
5931 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
5932 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
5933 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
5934 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
5935 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
5936 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
sewardj8e91fd42014-07-11 12:05:47 +00005937 IRTemp maxFE = newTempV128();
5938 IRTemp maxDC = newTempV128();
5939 IRTemp maxBA = newTempV128();
5940 IRTemp max98 = newTempV128();
5941 IRTemp max76 = newTempV128();
5942 IRTemp max54 = newTempV128();
5943 IRTemp max32 = newTempV128();
5944 IRTemp max10 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005945 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
5946 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
5947 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
5948 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
5949 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
5950 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
5951 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
5952 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
sewardj8e91fd42014-07-11 12:05:47 +00005953 IRTemp maxFEDC = newTempV128();
5954 IRTemp maxBA98 = newTempV128();
5955 IRTemp max7654 = newTempV128();
5956 IRTemp max3210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005957 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
5958 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
5959 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
5960 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00005961 IRTemp maxFEDCBA98 = newTempV128();
5962 IRTemp max76543210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005963 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
5964 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
sewardj8e91fd42014-07-11 12:05:47 +00005965 IRTemp maxAllLanes = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005966 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
5967 mkexpr(max76543210)));
sewardj8e91fd42014-07-11 12:05:47 +00005968 IRTemp res = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005969 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
5970 return res;
sewardjecde6972014-02-05 11:01:19 +00005971 }
5972 case Iop_Min16Sx8: case Iop_Min16Ux8:
sewardjb9aff1e2014-06-15 21:55:33 +00005973 case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
sewardjecde6972014-02-05 11:01:19 +00005974 IRTemp x76543210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00005975 IRTemp x76547654 = newTempV128();
5976 IRTemp x32103210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005977 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
5978 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
sewardj8e91fd42014-07-11 12:05:47 +00005979 IRTemp x76767676 = newTempV128();
5980 IRTemp x54545454 = newTempV128();
5981 IRTemp x32323232 = newTempV128();
5982 IRTemp x10101010 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005983 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
5984 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
5985 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
5986 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
sewardj8e91fd42014-07-11 12:05:47 +00005987 IRTemp x77777777 = newTempV128();
5988 IRTemp x66666666 = newTempV128();
5989 IRTemp x55555555 = newTempV128();
5990 IRTemp x44444444 = newTempV128();
5991 IRTemp x33333333 = newTempV128();
5992 IRTemp x22222222 = newTempV128();
5993 IRTemp x11111111 = newTempV128();
5994 IRTemp x00000000 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005995 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
5996 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
5997 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
5998 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
5999 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
6000 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
6001 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
6002 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
sewardj8e91fd42014-07-11 12:05:47 +00006003 IRTemp max76 = newTempV128();
6004 IRTemp max54 = newTempV128();
6005 IRTemp max32 = newTempV128();
6006 IRTemp max10 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00006007 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
6008 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
6009 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
6010 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
sewardj8e91fd42014-07-11 12:05:47 +00006011 IRTemp max7654 = newTempV128();
6012 IRTemp max3210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00006013 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
6014 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00006015 IRTemp max76543210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00006016 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
sewardj8e91fd42014-07-11 12:05:47 +00006017 IRTemp res = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00006018 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
6019 return res;
6020 }
6021 case Iop_Min32Sx4: case Iop_Min32Ux4:
sewardjb9aff1e2014-06-15 21:55:33 +00006022 case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
sewardjecde6972014-02-05 11:01:19 +00006023 IRTemp x3210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00006024 IRTemp x3232 = newTempV128();
6025 IRTemp x1010 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00006026 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
6027 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
sewardj8e91fd42014-07-11 12:05:47 +00006028 IRTemp x3333 = newTempV128();
6029 IRTemp x2222 = newTempV128();
6030 IRTemp x1111 = newTempV128();
6031 IRTemp x0000 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00006032 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
6033 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
6034 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
6035 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
sewardj8e91fd42014-07-11 12:05:47 +00006036 IRTemp max32 = newTempV128();
6037 IRTemp max10 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00006038 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
6039 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
sewardj8e91fd42014-07-11 12:05:47 +00006040 IRTemp max3210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00006041 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00006042 IRTemp res = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00006043 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
6044 return res;
6045 }
sewardja5a6b752014-06-30 07:33:56 +00006046 case Iop_Add64x2: {
6047 IRTemp x10 = src;
sewardj8e91fd42014-07-11 12:05:47 +00006048 IRTemp x00 = newTempV128();
6049 IRTemp x11 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006050 assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
6051 assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
sewardj8e91fd42014-07-11 12:05:47 +00006052 IRTemp max10 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006053 assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
sewardj8e91fd42014-07-11 12:05:47 +00006054 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006055 assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
6056 return res;
6057 }
sewardjecde6972014-02-05 11:01:19 +00006058 default:
6059 vassert(0);
6060 }
6061}
6062
6063
sewardj92d0ae32014-04-03 13:48:54 +00006064/* Generate IR for TBL and TBX. This deals with the 128 bit case
6065 only. */
6066static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
6067 IRTemp oor_values )
6068{
6069 vassert(len >= 0 && len <= 3);
6070
6071 /* Generate some useful constants as concisely as possible. */
6072 IRTemp half15 = newTemp(Ity_I64);
6073 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
6074 IRTemp half16 = newTemp(Ity_I64);
6075 assign(half16, mkU64(0x1010101010101010ULL));
6076
6077 /* A zero vector */
sewardj8e91fd42014-07-11 12:05:47 +00006078 IRTemp allZero = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006079 assign(allZero, mkV128(0x0000));
6080 /* A vector containing 15 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00006081 IRTemp all15 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006082 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
6083 /* A vector containing 16 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00006084 IRTemp all16 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006085 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
6086 /* A vector containing 32 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00006087 IRTemp all32 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006088 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
6089 /* A vector containing 48 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00006090 IRTemp all48 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006091 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
6092 /* A vector containing 64 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00006093 IRTemp all64 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006094 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
6095
6096 /* Group the 16/32/48/64 vectors so as to be indexable. */
6097 IRTemp allXX[4] = { all16, all32, all48, all64 };
6098
6099 /* Compute the result for each table vector, with zeroes in places
6100 where the index values are out of range, and OR them into the
6101 running vector. */
sewardj8e91fd42014-07-11 12:05:47 +00006102 IRTemp running_result = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006103 assign(running_result, mkV128(0));
6104
6105 UInt tabent;
6106 for (tabent = 0; tabent <= len; tabent++) {
6107 vassert(tabent >= 0 && tabent < 4);
sewardj8e91fd42014-07-11 12:05:47 +00006108 IRTemp bias = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006109 assign(bias,
6110 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
sewardj8e91fd42014-07-11 12:05:47 +00006111 IRTemp biased_indices = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006112 assign(biased_indices,
6113 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
sewardj8e91fd42014-07-11 12:05:47 +00006114 IRTemp valid_mask = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006115 assign(valid_mask,
6116 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
sewardj8e91fd42014-07-11 12:05:47 +00006117 IRTemp safe_biased_indices = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006118 assign(safe_biased_indices,
6119 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
sewardj8e91fd42014-07-11 12:05:47 +00006120 IRTemp results_or_junk = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006121 assign(results_or_junk,
6122 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
6123 mkexpr(safe_biased_indices)));
sewardj8e91fd42014-07-11 12:05:47 +00006124 IRTemp results_or_zero = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006125 assign(results_or_zero,
6126 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
6127 /* And OR that into the running result. */
sewardj8e91fd42014-07-11 12:05:47 +00006128 IRTemp tmp = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006129 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
6130 mkexpr(running_result)));
6131 running_result = tmp;
6132 }
6133
6134 /* So now running_result holds the overall result where the indices
6135 are in range, and zero in out-of-range lanes. Now we need to
6136 compute an overall validity mask and use this to copy in the
6137 lanes in the oor_values for out of range indices. This is
6138 unnecessary for TBL but will get folded out by iropt, so we lean
6139 on that and generate the same code for TBL and TBX here. */
sewardj8e91fd42014-07-11 12:05:47 +00006140 IRTemp overall_valid_mask = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006141 assign(overall_valid_mask,
6142 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
sewardj8e91fd42014-07-11 12:05:47 +00006143 IRTemp result = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006144 assign(result,
6145 binop(Iop_OrV128,
6146 mkexpr(running_result),
6147 binop(Iop_AndV128,
6148 mkexpr(oor_values),
6149 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
6150 return result;
6151}
6152
6153
sewardj31b5a952014-06-26 07:41:14 +00006154/* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
6155 an op which takes two I64s and produces a V128. That is, a widening
6156 operator. Generate IR which applies |opI64x2toV128| to either the
6157 lower (if |is2| is False) or upper (if |is2| is True) halves of
6158 |argL| and |argR|, and return the value in a new IRTemp.
6159*/
6160static
6161IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
6162 IRExpr* argL, IRExpr* argR )
6163{
sewardj8e91fd42014-07-11 12:05:47 +00006164 IRTemp res = newTempV128();
sewardj31b5a952014-06-26 07:41:14 +00006165 IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
6166 assign(res, binop(opI64x2toV128, unop(slice, argL),
6167 unop(slice, argR)));
6168 return res;
6169}
6170
6171
sewardjdf9d6d52014-06-27 10:43:22 +00006172/* Generate signed/unsigned absolute difference vector IR. */
6173static
6174IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
6175{
sewardj6f312d02014-06-28 12:21:37 +00006176 vassert(size <= 3);
sewardj8e91fd42014-07-11 12:05:47 +00006177 IRTemp argL = newTempV128();
6178 IRTemp argR = newTempV128();
6179 IRTemp msk = newTempV128();
6180 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +00006181 assign(argL, argLE);
6182 assign(argR, argRE);
sewardj8e91fd42014-07-11 12:05:47 +00006183 assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
sewardjdf9d6d52014-06-27 10:43:22 +00006184 mkexpr(argL), mkexpr(argR)));
6185 assign(res,
6186 binop(Iop_OrV128,
6187 binop(Iop_AndV128,
sewardj8e91fd42014-07-11 12:05:47 +00006188 binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
sewardjdf9d6d52014-06-27 10:43:22 +00006189 mkexpr(msk)),
6190 binop(Iop_AndV128,
sewardj8e91fd42014-07-11 12:05:47 +00006191 binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
sewardjdf9d6d52014-06-27 10:43:22 +00006192 unop(Iop_NotV128, mkexpr(msk)))));
6193 return res;
6194}
6195
6196
sewardj6f312d02014-06-28 12:21:37 +00006197/* Generate IR that takes a V128 and sign- or zero-widens
6198 either the lower or upper set of lanes to twice-as-wide,
6199 resulting in a new V128 value. */
6200static
sewardja5a6b752014-06-30 07:33:56 +00006201IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
6202 UInt sizeNarrow, IRExpr* srcE )
sewardj6f312d02014-06-28 12:21:37 +00006203{
sewardj8e91fd42014-07-11 12:05:47 +00006204 IRTemp src = newTempV128();
6205 IRTemp res = newTempV128();
sewardj6f312d02014-06-28 12:21:37 +00006206 assign(src, srcE);
6207 switch (sizeNarrow) {
6208 case X10:
6209 assign(res,
6210 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
6211 binop(fromUpperHalf ? Iop_InterleaveHI32x4
6212 : Iop_InterleaveLO32x4,
6213 mkexpr(src),
6214 mkexpr(src)),
6215 mkU8(32)));
6216 break;
6217 case X01:
6218 assign(res,
6219 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
6220 binop(fromUpperHalf ? Iop_InterleaveHI16x8
6221 : Iop_InterleaveLO16x8,
6222 mkexpr(src),
6223 mkexpr(src)),
6224 mkU8(16)));
6225 break;
6226 case X00:
6227 assign(res,
6228 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
6229 binop(fromUpperHalf ? Iop_InterleaveHI8x16
6230 : Iop_InterleaveLO8x16,
6231 mkexpr(src),
6232 mkexpr(src)),
6233 mkU8(8)));
6234 break;
6235 default:
6236 vassert(0);
6237 }
6238 return res;
6239}
6240
6241
sewardja5a6b752014-06-30 07:33:56 +00006242/* Generate IR that takes a V128 and sign- or zero-widens
6243 either the even or odd lanes to twice-as-wide,
6244 resulting in a new V128 value. */
6245static
6246IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
6247 UInt sizeNarrow, IRExpr* srcE )
6248{
sewardj8e91fd42014-07-11 12:05:47 +00006249 IRTemp src = newTempV128();
6250 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006251 IROp opSAR = mkVecSARN(sizeNarrow+1);
6252 IROp opSHR = mkVecSHRN(sizeNarrow+1);
6253 IROp opSHL = mkVecSHLN(sizeNarrow+1);
6254 IROp opSxR = zWiden ? opSHR : opSAR;
6255 UInt amt = 0;
6256 switch (sizeNarrow) {
6257 case X10: amt = 32; break;
6258 case X01: amt = 16; break;
6259 case X00: amt = 8; break;
6260 default: vassert(0);
6261 }
6262 assign(src, srcE);
6263 if (fromOdd) {
6264 assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
6265 } else {
6266 assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
6267 mkU8(amt)));
6268 }
6269 return res;
6270}
6271
6272
6273/* Generate IR that takes two V128s and narrows (takes lower half)
6274 of each lane, producing a single V128 value. */
6275static
6276IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
6277{
sewardj8e91fd42014-07-11 12:05:47 +00006278 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006279 assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
6280 mkexpr(argHi), mkexpr(argLo)));
6281 return res;
6282}
6283
6284
sewardj487559e2014-07-10 14:22:45 +00006285/* Return a temp which holds the vector dup of the lane of width
6286 (1 << size) obtained from src[laneNo]. */
6287static
6288IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
6289{
6290 vassert(size <= 3);
6291 /* Normalise |laneNo| so it is of the form
6292 x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
6293 This puts the bits we want to inspect at constant offsets
6294 regardless of the value of |size|.
6295 */
6296 UInt ix = laneNo << size;
6297 vassert(ix <= 15);
6298 IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
6299 switch (size) {
6300 case 0: /* B */
6301 ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
6302 /* fallthrough */
6303 case 1: /* H */
6304 ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
6305 /* fallthrough */
6306 case 2: /* S */
6307 ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
6308 /* fallthrough */
6309 case 3: /* D */
6310 ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
6311 break;
6312 default:
6313 vassert(0);
6314 }
sewardj8e91fd42014-07-11 12:05:47 +00006315 IRTemp res = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00006316 assign(res, src);
6317 Int i;
6318 for (i = 3; i >= 0; i--) {
6319 if (ops[i] == Iop_INVALID)
6320 break;
sewardj8e91fd42014-07-11 12:05:47 +00006321 IRTemp tmp = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00006322 assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
6323 res = tmp;
6324 }
6325 return res;
6326}
6327
6328
6329/* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
6330 selector encoded as shown below. Return a new V128 holding the
6331 selected lane from |srcV| dup'd out to V128, and also return the
6332 lane number, log2 of the lane size in bytes, and width-character via
6333 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5
6334 is an invalid selector, in which case return
6335 IRTemp_INVALID, 0, 0 and '?' respectively.
6336
6337 imm5 = xxxx1 signifies .b[xxxx]
6338 = xxx10 .h[xxx]
6339 = xx100 .s[xx]
6340 = x1000 .d[x]
6341 otherwise invalid
6342*/
6343static
6344IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
6345 /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
6346 IRExpr* srcV, UInt imm5 )
6347{
6348 *laneNo = 0;
6349 *laneSzLg2 = 0;
6350 *laneCh = '?';
6351
6352 if (imm5 & 1) {
6353 *laneNo = (imm5 >> 1) & 15;
6354 *laneSzLg2 = 0;
6355 *laneCh = 'b';
6356 }
6357 else if (imm5 & 2) {
6358 *laneNo = (imm5 >> 2) & 7;
6359 *laneSzLg2 = 1;
6360 *laneCh = 'h';
6361 }
6362 else if (imm5 & 4) {
6363 *laneNo = (imm5 >> 3) & 3;
6364 *laneSzLg2 = 2;
6365 *laneCh = 's';
6366 }
6367 else if (imm5 & 8) {
6368 *laneNo = (imm5 >> 4) & 1;
6369 *laneSzLg2 = 3;
6370 *laneCh = 'd';
6371 }
6372 else {
6373 /* invalid */
6374 return IRTemp_INVALID;
6375 }
6376
6377 return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
6378}
6379
6380
6381/* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
6382static
6383IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
6384{
6385 IRType ty = Ity_INVALID;
6386 IRTemp rcS = IRTemp_INVALID;
6387 switch (size) {
6388 case X01:
6389 vassert(imm <= 0xFFFFULL);
6390 ty = Ity_I16;
6391 rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
6392 break;
6393 case X10:
6394 vassert(imm <= 0xFFFFFFFFULL);
6395 ty = Ity_I32;
6396 rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
6397 break;
6398 case X11:
6399 ty = Ity_I64;
6400 rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
6401 default:
6402 vassert(0);
6403 }
6404 IRTemp rcV = math_DUP_TO_V128(rcS, ty);
6405 return rcV;
6406}
6407
6408
sewardj25523c42014-06-15 19:36:29 +00006409/* Let |new64| be a V128 in which only the lower 64 bits are interesting,
6410 and the upper can contain any value -- it is ignored. If |is2| is False,
6411 generate IR to put |new64| in the lower half of vector reg |dd| and zero
6412 the upper half. If |is2| is True, generate IR to put |new64| in the upper
6413 half of vector reg |dd| and leave the lower half unchanged. This
6414 simulates the behaviour of the "foo/foo2" instructions in which the
6415 destination is half the width of sources, for example addhn/addhn2.
6416*/
6417static
6418void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
6419{
6420 if (is2) {
6421 /* Get the old contents of Vdd, zero the upper half, and replace
6422 it with 'x'. */
sewardj8e91fd42014-07-11 12:05:47 +00006423 IRTemp t_zero_oldLO = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00006424 assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
sewardj8e91fd42014-07-11 12:05:47 +00006425 IRTemp t_newHI_zero = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00006426 assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
6427 mkV128(0x0000)));
sewardj8e91fd42014-07-11 12:05:47 +00006428 IRTemp res = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00006429 assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
6430 mkexpr(t_newHI_zero)));
6431 putQReg128(dd, mkexpr(res));
6432 } else {
6433 /* This is simple. */
6434 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
6435 }
6436}
6437
6438
sewardj8e91fd42014-07-11 12:05:47 +00006439/* Compute vector SQABS at lane size |size| for |srcE|, returning
6440 the q result in |*qabs| and the normal result in |*nabs|. */
6441static
6442void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
6443 IRExpr* srcE, UInt size )
6444{
6445 IRTemp src, mask, maskn, nsub, qsub;
6446 src = mask = maskn = nsub = qsub = IRTemp_INVALID;
6447 newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
6448 assign(src, srcE);
6449 assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src)));
6450 assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
6451 assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
6452 assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
6453 assign(*nabs, binop(Iop_OrV128,
6454 binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
6455 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
6456 assign(*qabs, binop(Iop_OrV128,
6457 binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
6458 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
6459}
6460
6461
sewardj51d012a2014-07-21 09:19:50 +00006462/* Compute vector SQNEG at lane size |size| for |srcE|, returning
6463 the q result in |*qneg| and the normal result in |*nneg|. */
6464static
6465void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
6466 IRExpr* srcE, UInt size )
6467{
6468 IRTemp src = IRTemp_INVALID;
6469 newTempsV128_3(&src, nneg, qneg);
6470 assign(src, srcE);
6471 assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
6472 assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
6473}
6474
6475
sewardjecedd982014-08-11 14:02:47 +00006476/* Zero all except the least significant lane of |srcE|, where |size|
6477 indicates the lane size in the usual way. */
sewardj257e99f2014-08-03 12:45:19 +00006478static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
sewardj8e91fd42014-07-11 12:05:47 +00006479{
6480 vassert(size < 4);
6481 IRTemp t = newTempV128();
sewardj51d012a2014-07-21 09:19:50 +00006482 assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
sewardj8e91fd42014-07-11 12:05:47 +00006483 return t;
6484}
6485
6486
sewardj51d012a2014-07-21 09:19:50 +00006487/* Generate IR to compute vector widening MULL from either the lower
6488 (is2==False) or upper (is2==True) halves of vecN and vecM. The
6489 widening multiplies are unsigned when isU==True and signed when
6490 isU==False. |size| is the narrow lane size indication. Optionally,
6491 the product may be added to or subtracted from vecD, at the wide lane
6492 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas|
6493 is 'm' (only multiply) then the accumulate part does not happen, and
6494 |vecD| is expected to == IRTemp_INVALID.
6495
6496 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
6497 are allowed. The result is returned in a new IRTemp, which is
6498 returned in *res. */
6499static
6500void math_MULL_ACC ( /*OUT*/IRTemp* res,
6501 Bool is2, Bool isU, UInt size, HChar mas,
6502 IRTemp vecN, IRTemp vecM, IRTemp vecD )
6503{
6504 vassert(res && *res == IRTemp_INVALID);
6505 vassert(size <= 2);
6506 vassert(mas == 'm' || mas == 'a' || mas == 's');
6507 if (mas == 'm') vassert(vecD == IRTemp_INVALID);
6508 IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
6509 IROp accOp = (mas == 'a') ? mkVecADD(size+1)
6510 : (mas == 's' ? mkVecSUB(size+1)
6511 : Iop_INVALID);
6512 IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp,
6513 mkexpr(vecN), mkexpr(vecM));
6514 *res = newTempV128();
6515 assign(*res, mas == 'm' ? mkexpr(mul)
6516 : binop(accOp, mkexpr(vecD), mkexpr(mul)));
6517}
6518
6519
6520/* Same as math_MULL_ACC, except the multiply is signed widening,
6521 the multiplied value is then doubled, before being added to or
6522 subtracted from the accumulated value. And everything is
6523 saturated. In all cases, saturation residuals are returned
6524 via (sat1q, sat1n), and in the accumulate cases,
6525 via (sat2q, sat2n) too. All results are returned in new temporaries.
6526 In the no-accumulate case, *sat2q and *sat2n are never instantiated,
6527 so the caller can tell this has happened. */
6528static
6529void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
6530 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
6531 /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
6532 Bool is2, UInt size, HChar mas,
6533 IRTemp vecN, IRTemp vecM, IRTemp vecD )
6534{
6535 vassert(size <= 2);
6536 vassert(mas == 'm' || mas == 'a' || mas == 's');
6537 /* Compute
6538 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
6539 sat1n = vecN.D[is2] *s vecM.d[is2] * 2
6540 IOW take either the low or high halves of vecN and vecM, signed widen,
6541 multiply, double that, and signedly saturate. Also compute the same
6542 but without saturation.
6543 */
6544 vassert(sat2q && *sat2q == IRTemp_INVALID);
6545 vassert(sat2n && *sat2n == IRTemp_INVALID);
6546 newTempsV128_3(sat1q, sat1n, res);
6547 IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
6548 mkexpr(vecN), mkexpr(vecM));
6549 IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
6550 mkexpr(vecN), mkexpr(vecM));
6551 assign(*sat1q, mkexpr(tq));
6552 assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
6553
6554 /* If there is no accumulation, the final result is sat1q,
6555 and there's no assignment to sat2q or sat2n. */
6556 if (mas == 'm') {
6557 assign(*res, mkexpr(*sat1q));
6558 return;
6559 }
6560
6561 /* Compute
6562 sat2q = vecD +sq/-sq sat1q
6563 sat2n = vecD +/- sat1n
6564 result = sat2q
6565 */
6566 newTempsV128_2(sat2q, sat2n);
6567 assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
6568 mkexpr(vecD), mkexpr(*sat1q)));
6569 assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
6570 mkexpr(vecD), mkexpr(*sat1n)));
6571 assign(*res, mkexpr(*sat2q));
6572}
6573
6574
sewardj54ffa1d2014-07-22 09:27:49 +00006575/* Generate IR for widening signed vector multiplies. The operands
6576 have their lane width signedly widened, and they are then multiplied
6577 at the wider width, returning results in two new IRTemps. */
sewardja5a6b752014-06-30 07:33:56 +00006578static
sewardj54ffa1d2014-07-22 09:27:49 +00006579void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
6580 UInt sizeNarrow, IRTemp argL, IRTemp argR )
6581{
6582 vassert(sizeNarrow <= 2);
6583 newTempsV128_2(resHI, resLO);
6584 IRTemp argLhi = newTemp(Ity_I64);
6585 IRTemp argLlo = newTemp(Ity_I64);
6586 IRTemp argRhi = newTemp(Ity_I64);
6587 IRTemp argRlo = newTemp(Ity_I64);
6588 assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
6589 assign(argLlo, unop(Iop_V128to64, mkexpr(argL)));
6590 assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
6591 assign(argRlo, unop(Iop_V128to64, mkexpr(argR)));
6592 IROp opMulls = mkVecMULLS(sizeNarrow);
6593 assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
6594 assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
6595}
6596
6597
sewardj257e99f2014-08-03 12:45:19 +00006598/* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
6599 double that, possibly add a rounding constant (R variants), and take
6600 the high half. */
sewardj54ffa1d2014-07-22 09:27:49 +00006601static
6602void math_SQDMULH ( /*OUT*/IRTemp* res,
6603 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
6604 Bool isR, UInt size, IRTemp vN, IRTemp vM )
6605{
6606 vassert(size == X01 || size == X10); /* s or h only */
6607
6608 newTempsV128_3(res, sat1q, sat1n);
6609
6610 IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
6611 math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
6612
6613 IRTemp addWide = mkVecADD(size+1);
6614
6615 if (isR) {
6616 assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
6617
6618 Int rcShift = size == X01 ? 15 : 31;
6619 IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
6620 assign(*sat1n,
6621 binop(mkVecCATODDLANES(size),
6622 binop(addWide,
6623 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
6624 mkexpr(roundConst)),
6625 binop(addWide,
6626 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
6627 mkexpr(roundConst))));
6628 } else {
6629 assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
6630
6631 assign(*sat1n,
6632 binop(mkVecCATODDLANES(size),
6633 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
6634 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
6635 }
6636
6637 assign(*res, mkexpr(*sat1q));
6638}
6639
6640
sewardja97dddf2014-08-14 22:26:52 +00006641/* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
6642 a new temp in *res, and the Q difference pair in new temps in
6643 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
6644 three operations it is. */
6645static
6646void math_QSHL_IMM ( /*OUT*/IRTemp* res,
6647 /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
6648 IRTemp src, UInt size, UInt shift, const HChar* nm )
6649{
6650 vassert(size <= 3);
6651 UInt laneBits = 8 << size;
6652 vassert(shift < laneBits);
6653 newTempsV128_3(res, qDiff1, qDiff2);
6654 IRTemp z128 = newTempV128();
6655 assign(z128, mkV128(0x0000));
6656
6657 /* UQSHL */
6658 if (vex_streq(nm, "uqshl")) {
sewardj1dd3ec12014-08-15 09:11:08 +00006659 IROp qop = mkVecQSHLNSATUU(size);
sewardja97dddf2014-08-14 22:26:52 +00006660 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
6661 if (shift == 0) {
6662 /* No shift means no saturation. */
6663 assign(*qDiff1, mkexpr(z128));
6664 assign(*qDiff2, mkexpr(z128));
6665 } else {
6666 /* Saturation has occurred if any of the shifted-out bits are
6667 nonzero. We get the shifted-out bits by right-shifting the
6668 original value. */
6669 UInt rshift = laneBits - shift;
6670 vassert(rshift >= 1 && rshift < laneBits);
6671 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
6672 assign(*qDiff2, mkexpr(z128));
6673 }
6674 return;
6675 }
6676
6677 /* SQSHL */
6678 if (vex_streq(nm, "sqshl")) {
sewardj1dd3ec12014-08-15 09:11:08 +00006679 IROp qop = mkVecQSHLNSATSS(size);
sewardja97dddf2014-08-14 22:26:52 +00006680 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
6681 if (shift == 0) {
6682 /* No shift means no saturation. */
6683 assign(*qDiff1, mkexpr(z128));
6684 assign(*qDiff2, mkexpr(z128));
6685 } else {
6686 /* Saturation has occurred if any of the shifted-out bits are
6687 different from the top bit of the original value. */
6688 UInt rshift = laneBits - 1 - shift;
6689 vassert(rshift >= 0 && rshift < laneBits-1);
6690 /* qDiff1 is the shifted out bits, and the top bit of the original
6691 value, preceded by zeroes. */
6692 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
6693 /* qDiff2 is the top bit of the original value, cloned the
6694 correct number of times. */
6695 assign(*qDiff2, binop(mkVecSHRN(size),
6696 binop(mkVecSARN(size), mkexpr(src),
6697 mkU8(laneBits-1)),
6698 mkU8(rshift)));
6699 /* This also succeeds in comparing the top bit of the original
6700 value to itself, which is a bit stupid, but not wrong. */
6701 }
6702 return;
6703 }
6704
6705 /* SQSHLU */
6706 if (vex_streq(nm, "sqshlu")) {
sewardj1dd3ec12014-08-15 09:11:08 +00006707 IROp qop = mkVecQSHLNSATSU(size);
sewardja97dddf2014-08-14 22:26:52 +00006708 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
sewardjacc29642014-08-15 05:35:35 +00006709 if (shift == 0) {
6710 /* If there's no shift, saturation depends on the top bit
6711 of the source. */
6712 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
6713 assign(*qDiff2, mkexpr(z128));
6714 } else {
6715 /* Saturation has occurred if any of the shifted-out bits are
6716 nonzero. We get the shifted-out bits by right-shifting the
6717 original value. */
6718 UInt rshift = laneBits - shift;
6719 vassert(rshift >= 1 && rshift < laneBits);
6720 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
6721 assign(*qDiff2, mkexpr(z128));
6722 }
sewardja97dddf2014-08-14 22:26:52 +00006723 return;
6724 }
6725
6726 vassert(0);
6727}
6728
6729
sewardj62ece662014-08-17 19:59:09 +00006730/* Generate IR to do SRHADD and URHADD. */
6731static
6732IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
6733{
6734 /* Generate this:
6735 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
6736 */
6737 vassert(size <= 3);
6738 IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
6739 IROp opADD = mkVecADD(size);
6740 /* The only tricky bit is to generate the correct vector 1 constant. */
6741 const ULong ones64[4]
6742 = { 0x0101010101010101ULL, 0x0001000100010001ULL,
6743 0x0000000100000001ULL, 0x0000000000000001ULL };
6744 IRTemp imm64 = newTemp(Ity_I64);
6745 assign(imm64, mkU64(ones64[size]));
6746 IRTemp vecOne = newTempV128();
6747 assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
6748 IRTemp scaOne = newTemp(Ity_I8);
6749 assign(scaOne, mkU8(1));
6750 IRTemp res = newTempV128();
6751 assign(res,
6752 binop(opADD,
6753 binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
6754 binop(opADD,
6755 binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
6756 binop(opSHR,
6757 binop(opADD,
6758 binop(opADD,
6759 binop(Iop_AndV128, mkexpr(aa),
6760 mkexpr(vecOne)),
6761 binop(Iop_AndV128, mkexpr(bb),
6762 mkexpr(vecOne))
6763 ),
6764 mkexpr(vecOne)
6765 ),
6766 mkexpr(scaOne)
6767 )
6768 )
6769 )
6770 );
6771 return res;
6772}
6773
6774
sewardj54ffa1d2014-07-22 09:27:49 +00006775/* QCFLAG tracks the SIMD sticky saturation status. Update the status
6776 thusly: if, after application of |opZHI| to both |qres| and |nres|,
6777 they have the same value, leave QCFLAG unchanged. Otherwise, set it
6778 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128
6779 operators, or Iop_INVALID, in which case |qres| and |nres| are used
6780 unmodified. The presence |opZHI| means this function can be used to
6781 generate QCFLAG update code for both scalar and vector SIMD operations.
6782*/
6783static
6784void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
sewardja5a6b752014-06-30 07:33:56 +00006785{
sewardj8e91fd42014-07-11 12:05:47 +00006786 IRTemp diff = newTempV128();
6787 IRTemp oldQCFLAG = newTempV128();
6788 IRTemp newQCFLAG = newTempV128();
sewardj54ffa1d2014-07-22 09:27:49 +00006789 if (opZHI == Iop_INVALID) {
6790 assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
6791 } else {
sewardj257e99f2014-08-03 12:45:19 +00006792 vassert(opZHI == Iop_ZeroHI64ofV128
6793 || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
sewardj54ffa1d2014-07-22 09:27:49 +00006794 assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
6795 }
sewardja5a6b752014-06-30 07:33:56 +00006796 assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
6797 assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
6798 stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
6799}
6800
6801
sewardj54ffa1d2014-07-22 09:27:49 +00006802/* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
6803 are used unmodified, hence suitable for QCFLAG updates for whole-vector
6804 operations. */
6805static
6806void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
6807{
6808 updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
6809}
6810
6811
sewardj8e91fd42014-07-11 12:05:47 +00006812/*------------------------------------------------------------*/
6813/*--- SIMD and FP instructions ---*/
6814/*------------------------------------------------------------*/
6815
sewardjdf1628c2014-06-10 22:52:05 +00006816static
6817Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
sewardjbbcf1882014-01-12 12:49:10 +00006818{
sewardjab33a7a2014-06-19 22:20:47 +00006819 /* 31 29 23 21 20 15 14 10 9 4
6820 0 q 101110 op2 0 m 0 imm4 0 n d
6821 Decode fields: op2
6822 */
sewardjbbcf1882014-01-12 12:49:10 +00006823# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjab33a7a2014-06-19 22:20:47 +00006824 if (INSN(31,31) != 0
6825 || INSN(29,24) != BITS6(1,0,1,1,1,0)
6826 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
6827 return False;
6828 }
6829 UInt bitQ = INSN(30,30);
6830 UInt op2 = INSN(23,22);
6831 UInt mm = INSN(20,16);
6832 UInt imm4 = INSN(14,11);
6833 UInt nn = INSN(9,5);
6834 UInt dd = INSN(4,0);
6835
6836 if (op2 == BITS2(0,0)) {
6837 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00006838 IRTemp sHi = newTempV128();
6839 IRTemp sLo = newTempV128();
6840 IRTemp res = newTempV128();
sewardjab33a7a2014-06-19 22:20:47 +00006841 assign(sHi, getQReg128(mm));
6842 assign(sLo, getQReg128(nn));
6843 if (bitQ == 1) {
6844 if (imm4 == 0) {
6845 assign(res, mkexpr(sLo));
6846 } else {
6847 vassert(imm4 <= 15);
6848 assign(res,
6849 binop(Iop_OrV128,
6850 binop(Iop_ShlV128, mkexpr(sHi), mkU8(8 * (16-imm4))),
6851 binop(Iop_ShrV128, mkexpr(sLo), mkU8(8 * imm4))));
6852 }
6853 putQReg128(dd, mkexpr(res));
6854 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
6855 } else {
6856 if (imm4 >= 8) return False;
6857 if (imm4 == 0) {
6858 assign(res, mkexpr(sLo));
6859 } else {
6860 assign(res,
6861 binop(Iop_ShrV128,
6862 binop(Iop_InterleaveLO64x2, mkexpr(sHi), mkexpr(sLo)),
6863 mkU8(8 * imm4)));
6864 }
6865 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
6866 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
6867 }
6868 return True;
6869 }
6870
sewardjdf1628c2014-06-10 22:52:05 +00006871 return False;
6872# undef INSN
6873}
sewardjbbcf1882014-01-12 12:49:10 +00006874
sewardjbbcf1882014-01-12 12:49:10 +00006875
sewardjdf1628c2014-06-10 22:52:05 +00006876static
6877Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
6878{
6879 /* 31 29 23 21 20 15 14 12 11 9 4
6880 0 q 001110 op2 0 m 0 len op 00 n d
6881 Decode fields: op2,len,op
sewardjbbcf1882014-01-12 12:49:10 +00006882 */
sewardjdf1628c2014-06-10 22:52:05 +00006883# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6884 if (INSN(31,31) != 0
6885 || INSN(29,24) != BITS6(0,0,1,1,1,0)
6886 || INSN(21,21) != 0
6887 || INSN(15,15) != 0
6888 || INSN(11,10) != BITS2(0,0)) {
6889 return False;
6890 }
6891 UInt bitQ = INSN(30,30);
6892 UInt op2 = INSN(23,22);
6893 UInt mm = INSN(20,16);
6894 UInt len = INSN(14,13);
6895 UInt bitOP = INSN(12,12);
6896 UInt nn = INSN(9,5);
6897 UInt dd = INSN(4,0);
6898
6899 if (op2 == X00) {
6900 /* -------- 00,xx,0 TBL, xx register table -------- */
6901 /* -------- 00,xx,1 TBX, xx register table -------- */
6902 /* 31 28 20 15 14 12 9 4
6903 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
6904 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
6905 where Ta = 16b(q=1) or 8b(q=0)
6906 */
sewardjdf1628c2014-06-10 22:52:05 +00006907 Bool isTBX = bitOP == 1;
6908 /* The out-of-range values to use. */
sewardj8e91fd42014-07-11 12:05:47 +00006909 IRTemp oor_values = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00006910 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
6911 /* src value */
sewardj8e91fd42014-07-11 12:05:47 +00006912 IRTemp src = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00006913 assign(src, getQReg128(mm));
6914 /* The table values */
6915 IRTemp tab[4];
6916 UInt i;
6917 for (i = 0; i <= len; i++) {
6918 vassert(i < 4);
sewardj8e91fd42014-07-11 12:05:47 +00006919 tab[i] = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00006920 assign(tab[i], getQReg128((nn + i) % 32));
6921 }
6922 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
sewardjdf9d6d52014-06-27 10:43:22 +00006923 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
6924 const HChar* Ta = bitQ ==1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +00006925 const HChar* nm = isTBX ? "tbx" : "tbl";
6926 DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
6927 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
6928 return True;
6929 }
6930
6931# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6932 return False;
6933# undef INSN
6934}
6935
6936
6937static
6938Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
6939{
sewardjfc261d92014-08-24 20:36:14 +00006940 /* 31 29 23 21 20 15 14 11 9 4
6941 0 q 001110 size 0 m 0 opcode 10 n d
6942 Decode fields: opcode
6943 */
sewardjdf1628c2014-06-10 22:52:05 +00006944# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjfc261d92014-08-24 20:36:14 +00006945 if (INSN(31,31) != 0
6946 || INSN(29,24) != BITS6(0,0,1,1,1,0)
6947 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
6948 return False;
6949 }
6950 UInt bitQ = INSN(30,30);
6951 UInt size = INSN(23,22);
6952 UInt mm = INSN(20,16);
6953 UInt opcode = INSN(14,12);
6954 UInt nn = INSN(9,5);
6955 UInt dd = INSN(4,0);
6956
6957 if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
6958 /* -------- 001 UZP1 std7_std7_std7 -------- */
6959 /* -------- 101 UZP2 std7_std7_std7 -------- */
6960 if (bitQ == 0 && size == X11) return False; // implied 1d case
6961 Bool isUZP1 = opcode == BITS3(0,0,1);
6962 IROp op = isUZP1 ? mkVecCATEVENLANES(size)
6963 : mkVecCATODDLANES(size);
6964 IRTemp preL = newTempV128();
6965 IRTemp preR = newTempV128();
6966 IRTemp res = newTempV128();
6967 if (bitQ == 0) {
6968 assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
6969 getQReg128(nn)));
6970 assign(preR, mkexpr(preL));
6971 } else {
6972 assign(preL, getQReg128(mm));
6973 assign(preR, getQReg128(nn));
6974 }
6975 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
6976 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
6977 const HChar* nm = isUZP1 ? "uzp1" : "uzp2";
6978 const HChar* arr = nameArr_Q_SZ(bitQ, size);
6979 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6980 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6981 return True;
6982 }
6983
6984 if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
6985 /* -------- 010 TRN1 std7_std7_std7 -------- */
6986 /* -------- 110 TRN2 std7_std7_std7 -------- */
6987 if (bitQ == 0 && size == X11) return False; // implied 1d case
6988 Bool isTRN1 = opcode == BITS3(0,1,0);
6989 IROp op1 = isTRN1 ? mkVecCATEVENLANES(size)
6990 : mkVecCATODDLANES(size);
6991 IROp op2 = mkVecINTERLEAVEHI(size);
6992 IRTemp srcM = newTempV128();
6993 IRTemp srcN = newTempV128();
6994 IRTemp res = newTempV128();
6995 assign(srcM, getQReg128(mm));
6996 assign(srcN, getQReg128(nn));
6997 assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
6998 binop(op1, mkexpr(srcN), mkexpr(srcN))));
6999 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
7000 const HChar* nm = isTRN1 ? "trn1" : "trn2";
7001 const HChar* arr = nameArr_Q_SZ(bitQ, size);
7002 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
7003 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
7004 return True;
7005 }
7006
7007 if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
7008 /* -------- 011 ZIP1 std7_std7_std7 -------- */
7009 /* -------- 111 ZIP2 std7_std7_std7 -------- */
7010 if (bitQ == 0 && size == X11) return False; // implied 1d case
7011 Bool isZIP1 = opcode == BITS3(0,1,1);
7012 IROp op = isZIP1 ? mkVecINTERLEAVELO(size)
7013 : mkVecINTERLEAVEHI(size);
7014 IRTemp preL = newTempV128();
7015 IRTemp preR = newTempV128();
7016 IRTemp res = newTempV128();
7017 if (bitQ == 0 && !isZIP1) {
7018 assign(preL, binop(Iop_ShlV128, getQReg128(mm), mkU8(32)));
7019 assign(preR, binop(Iop_ShlV128, getQReg128(nn), mkU8(32)));
7020 } else {
7021 assign(preL, getQReg128(mm));
7022 assign(preR, getQReg128(nn));
7023 }
7024 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
7025 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
7026 const HChar* nm = isZIP1 ? "zip1" : "zip2";
7027 const HChar* arr = nameArr_Q_SZ(bitQ, size);
7028 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
7029 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
7030 return True;
7031 }
7032
sewardjdf1628c2014-06-10 22:52:05 +00007033 return False;
7034# undef INSN
7035}
7036
7037
7038static
7039Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
7040{
7041 /* 31 28 23 21 16 11 9 4
7042 0 q u 01110 size 11000 opcode 10 n d
7043 Decode fields: u,size,opcode
7044 */
7045# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7046 if (INSN(31,31) != 0
7047 || INSN(28,24) != BITS5(0,1,1,1,0)
7048 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
7049 return False;
7050 }
7051 UInt bitQ = INSN(30,30);
7052 UInt bitU = INSN(29,29);
7053 UInt size = INSN(23,22);
7054 UInt opcode = INSN(16,12);
7055 UInt nn = INSN(9,5);
7056 UInt dd = INSN(4,0);
7057
sewardja5a6b752014-06-30 07:33:56 +00007058 if (opcode == BITS5(0,0,0,1,1)) {
7059 /* -------- 0,xx,00011 SADDLV -------- */
7060 /* -------- 1,xx,00011 UADDLV -------- */
7061 /* size is the narrow size */
7062 if (size == X11 || (size == X10 && bitQ == 0)) return False;
7063 Bool isU = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +00007064 IRTemp src = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00007065 assign(src, getQReg128(nn));
7066 /* The basic plan is to widen the lower half, and if Q = 1,
7067 the upper half too. Add them together (if Q = 1), and in
7068 either case fold with add at twice the lane width.
7069 */
7070 IRExpr* widened
7071 = mkexpr(math_WIDEN_LO_OR_HI_LANES(
7072 isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
7073 if (bitQ == 1) {
7074 widened
7075 = binop(mkVecADD(size+1),
7076 widened,
7077 mkexpr(math_WIDEN_LO_OR_HI_LANES(
7078 isU, True/*fromUpperHalf*/, size, mkexpr(src)))
7079 );
7080 }
7081 /* Now fold. */
sewardj8e91fd42014-07-11 12:05:47 +00007082 IRTemp tWi = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00007083 assign(tWi, widened);
7084 IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
7085 putQReg128(dd, mkexpr(res));
7086 const HChar* arr = nameArr_Q_SZ(bitQ, size);
7087 const HChar ch = "bhsd"[size];
7088 DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
7089 nameQReg128(dd), ch, nameQReg128(nn), arr);
7090 return True;
7091 }
7092
sewardjb9aff1e2014-06-15 21:55:33 +00007093 UInt ix = 0;
7094 /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
7095 else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
7096 else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
7097 /**/
7098 if (ix != 0) {
7099 /* -------- 0,xx,01010: SMAXV -------- (1) */
7100 /* -------- 1,xx,01010: UMAXV -------- (2) */
7101 /* -------- 0,xx,11010: SMINV -------- (3) */
7102 /* -------- 1,xx,11010: UMINV -------- (4) */
7103 /* -------- 0,xx,11011: ADDV -------- (5) */
7104 vassert(ix >= 1 && ix <= 5);
sewardjdf1628c2014-06-10 22:52:05 +00007105 if (size == X11) return False; // 1d,2d cases not allowed
7106 if (size == X10 && bitQ == 0) return False; // 2s case not allowed
sewardjdf1628c2014-06-10 22:52:05 +00007107 const IROp opMAXS[3]
7108 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
7109 const IROp opMAXU[3]
7110 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
sewardjb9aff1e2014-06-15 21:55:33 +00007111 const IROp opMINS[3]
7112 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
7113 const IROp opMINU[3]
7114 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
7115 const IROp opADD[3]
7116 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 };
sewardjdf1628c2014-06-10 22:52:05 +00007117 vassert(size < 3);
sewardjb9aff1e2014-06-15 21:55:33 +00007118 IROp op = Iop_INVALID;
7119 const HChar* nm = NULL;
7120 switch (ix) {
7121 case 1: op = opMAXS[size]; nm = "smaxv"; break;
7122 case 2: op = opMAXU[size]; nm = "umaxv"; break;
7123 case 3: op = opMINS[size]; nm = "sminv"; break;
7124 case 4: op = opMINU[size]; nm = "uminv"; break;
7125 case 5: op = opADD[size]; nm = "addv"; break;
7126 default: vassert(0);
7127 }
7128 vassert(op != Iop_INVALID && nm != NULL);
sewardj8e91fd42014-07-11 12:05:47 +00007129 IRTemp tN1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00007130 assign(tN1, getQReg128(nn));
7131 /* If Q == 0, we're just folding lanes in the lower half of
7132 the value. In which case, copy the lower half of the
7133 source into the upper half, so we can then treat it the
sewardjb9aff1e2014-06-15 21:55:33 +00007134 same as the full width case. Except for the addition case,
7135 in which we have to zero out the upper half. */
sewardj8e91fd42014-07-11 12:05:47 +00007136 IRTemp tN2 = newTempV128();
sewardjb9aff1e2014-06-15 21:55:33 +00007137 assign(tN2, bitQ == 0
7138 ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
7139 : mk_CatEvenLanes64x2(tN1,tN1))
7140 : mkexpr(tN1));
sewardjdf9d6d52014-06-27 10:43:22 +00007141 IRTemp res = math_FOLDV(tN2, op);
sewardjdf1628c2014-06-10 22:52:05 +00007142 if (res == IRTemp_INVALID)
7143 return False; /* means math_MINMAXV
7144 doesn't handle this case yet */
7145 putQReg128(dd, mkexpr(res));
sewardjdf1628c2014-06-10 22:52:05 +00007146 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
7147 IRType laneTy = tys[size];
7148 const HChar* arr = nameArr_Q_SZ(bitQ, size);
7149 DIP("%s %s, %s.%s\n", nm,
7150 nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
7151 return True;
7152 }
7153
7154# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7155 return False;
7156# undef INSN
7157}
7158
7159
7160static
7161Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
7162{
7163 /* 31 28 20 15 14 10 9 4
7164 0 q op 01110000 imm5 0 imm4 1 n d
7165 Decode fields: q,op,imm4
7166 */
7167# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7168 if (INSN(31,31) != 0
7169 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
7170 || INSN(15,15) != 0 || INSN(10,10) != 1) {
7171 return False;
7172 }
7173 UInt bitQ = INSN(30,30);
7174 UInt bitOP = INSN(29,29);
7175 UInt imm5 = INSN(20,16);
7176 UInt imm4 = INSN(14,11);
7177 UInt nn = INSN(9,5);
7178 UInt dd = INSN(4,0);
7179
7180 /* -------- x,0,0000: DUP (element, vector) -------- */
7181 /* 31 28 20 15 9 4
7182 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
7183 */
7184 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
sewardj487559e2014-07-10 14:22:45 +00007185 UInt laneNo = 0;
7186 UInt laneSzLg2 = 0;
7187 HChar laneCh = '?';
7188 IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
7189 getQReg128(nn), imm5);
7190 if (res == IRTemp_INVALID)
7191 return False;
7192 if (bitQ == 0 && laneSzLg2 == X11)
7193 return False; /* .1d case */
7194 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
7195 const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
7196 DIP("dup %s.%s, %s.%c[%u]\n",
7197 nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
7198 return True;
sewardjdf1628c2014-06-10 22:52:05 +00007199 }
7200
7201 /* -------- x,0,0001: DUP (general, vector) -------- */
7202 /* 31 28 20 15 9 4
7203 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
7204 Q=0 writes 64, Q=1 writes 128
7205 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
7206 xxx10 4H(q=0) or 8H(q=1), R=W
7207 xx100 2S(q=0) or 4S(q=1), R=W
7208 x1000 Invalid(q=0) or 2D(q=1), R=X
7209 x0000 Invalid(q=0) or Invalid(q=1)
7210 Require op=0, imm4=0001
7211 */
7212 if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
7213 Bool isQ = bitQ == 1;
7214 IRTemp w0 = newTemp(Ity_I64);
7215 const HChar* arT = "??";
7216 IRType laneTy = Ity_INVALID;
7217 if (imm5 & 1) {
7218 arT = isQ ? "16b" : "8b";
7219 laneTy = Ity_I8;
7220 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
7221 }
7222 else if (imm5 & 2) {
7223 arT = isQ ? "8h" : "4h";
7224 laneTy = Ity_I16;
7225 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
7226 }
7227 else if (imm5 & 4) {
7228 arT = isQ ? "4s" : "2s";
7229 laneTy = Ity_I32;
7230 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
7231 }
7232 else if ((imm5 & 8) && isQ) {
7233 arT = "2d";
7234 laneTy = Ity_I64;
7235 assign(w0, getIReg64orZR(nn));
7236 }
7237 else {
7238 /* invalid; leave laneTy unchanged. */
7239 }
7240 /* */
7241 if (laneTy != Ity_INVALID) {
7242 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
7243 putQReg128(dd, binop(Iop_64HLtoV128,
7244 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
7245 DIP("dup %s.%s, %s\n",
7246 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
7247 return True;
7248 }
sewardj787a67f2014-06-23 09:09:41 +00007249 /* invalid */
7250 return False;
sewardjdf1628c2014-06-10 22:52:05 +00007251 }
7252
7253 /* -------- 1,0,0011: INS (general) -------- */
7254 /* 31 28 20 15 9 4
7255 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
7256 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
7257 xxx10 -> H, xxx
7258 xx100 -> S, xx
7259 x1000 -> D, x
7260 */
7261 if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
7262 HChar ts = '?';
7263 UInt laneNo = 16;
7264 IRExpr* src = NULL;
7265 if (imm5 & 1) {
7266 src = unop(Iop_64to8, getIReg64orZR(nn));
7267 laneNo = (imm5 >> 1) & 15;
7268 ts = 'b';
7269 }
7270 else if (imm5 & 2) {
7271 src = unop(Iop_64to16, getIReg64orZR(nn));
7272 laneNo = (imm5 >> 2) & 7;
7273 ts = 'h';
7274 }
7275 else if (imm5 & 4) {
7276 src = unop(Iop_64to32, getIReg64orZR(nn));
7277 laneNo = (imm5 >> 3) & 3;
7278 ts = 's';
7279 }
7280 else if (imm5 & 8) {
7281 src = getIReg64orZR(nn);
7282 laneNo = (imm5 >> 4) & 1;
7283 ts = 'd';
7284 }
7285 /* */
7286 if (src) {
7287 vassert(laneNo < 16);
7288 putQRegLane(dd, laneNo, src);
7289 DIP("ins %s.%c[%u], %s\n",
7290 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
7291 return True;
7292 }
sewardj787a67f2014-06-23 09:09:41 +00007293 /* invalid */
7294 return False;
sewardjdf1628c2014-06-10 22:52:05 +00007295 }
7296
7297 /* -------- x,0,0101: SMOV -------- */
7298 /* -------- x,0,0111: UMOV -------- */
7299 /* 31 28 20 15 9 4
7300 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
7301 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
7302 dest is Xd when q==1, Wd when q==0
7303 UMOV:
7304 Ts,index,ops = case q:imm5 of
7305 0:xxxx1 -> B, xxxx, 8Uto64
7306 1:xxxx1 -> invalid
7307 0:xxx10 -> H, xxx, 16Uto64
7308 1:xxx10 -> invalid
7309 0:xx100 -> S, xx, 32Uto64
7310 1:xx100 -> invalid
7311 1:x1000 -> D, x, copy64
7312 other -> invalid
7313 SMOV:
7314 Ts,index,ops = case q:imm5 of
7315 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
7316 1:xxxx1 -> B, xxxx, 8Sto64
7317 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
7318 1:xxx10 -> H, xxx, 16Sto64
7319 0:xx100 -> invalid
7320 1:xx100 -> S, xx, 32Sto64
7321 1:x1000 -> invalid
7322 other -> invalid
7323 */
7324 if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
7325 Bool isU = (imm4 & 2) == 2;
7326 const HChar* arTs = "??";
7327 UInt laneNo = 16; /* invalid */
7328 // Setting 'res' to non-NULL determines valid/invalid
7329 IRExpr* res = NULL;
7330 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
7331 laneNo = (imm5 >> 1) & 15;
7332 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
7333 res = isU ? unop(Iop_8Uto64, lane)
7334 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
7335 arTs = "b";
7336 }
7337 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
7338 laneNo = (imm5 >> 1) & 15;
7339 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
7340 res = isU ? NULL
7341 : unop(Iop_8Sto64, lane);
7342 arTs = "b";
7343 }
7344 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
7345 laneNo = (imm5 >> 2) & 7;
7346 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
7347 res = isU ? unop(Iop_16Uto64, lane)
7348 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
7349 arTs = "h";
7350 }
7351 else if (bitQ && (imm5 & 2)) { // 1:xxx10
7352 laneNo = (imm5 >> 2) & 7;
7353 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
7354 res = isU ? NULL
7355 : unop(Iop_16Sto64, lane);
7356 arTs = "h";
7357 }
7358 else if (!bitQ && (imm5 & 4)) { // 0:xx100
7359 laneNo = (imm5 >> 3) & 3;
7360 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
7361 res = isU ? unop(Iop_32Uto64, lane)
7362 : NULL;
7363 arTs = "s";
7364 }
7365 else if (bitQ && (imm5 & 4)) { // 1:xxx10
7366 laneNo = (imm5 >> 3) & 3;
7367 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
7368 res = isU ? NULL
7369 : unop(Iop_32Sto64, lane);
7370 arTs = "s";
7371 }
7372 else if (bitQ && (imm5 & 8)) { // 1:x1000
7373 laneNo = (imm5 >> 4) & 1;
7374 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
7375 res = isU ? lane
7376 : NULL;
7377 arTs = "d";
7378 }
7379 /* */
7380 if (res) {
7381 vassert(laneNo < 16);
7382 putIReg64orZR(dd, res);
7383 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
7384 nameIRegOrZR(bitQ == 1, dd),
7385 nameQReg128(nn), arTs, laneNo);
7386 return True;
7387 }
sewardj787a67f2014-06-23 09:09:41 +00007388 /* invalid */
7389 return False;
7390 }
7391
7392 /* -------- 1,1,xxxx: INS (element) -------- */
7393 /* 31 28 20 14 9 4
7394 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
7395 where Ts,ix1,ix2
7396 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
7397 xxx10 -> H, xxx, imm4[3:1]
7398 xx100 -> S, xx, imm4[3:2]
7399 x1000 -> D, x, imm4[3:3]
7400 */
7401 if (bitQ == 1 && bitOP == 1) {
7402 HChar ts = '?';
7403 IRType ity = Ity_INVALID;
7404 UInt ix1 = 16;
7405 UInt ix2 = 16;
7406 if (imm5 & 1) {
7407 ts = 'b';
7408 ity = Ity_I8;
7409 ix1 = (imm5 >> 1) & 15;
7410 ix2 = (imm4 >> 0) & 15;
7411 }
7412 else if (imm5 & 2) {
7413 ts = 'h';
7414 ity = Ity_I16;
7415 ix1 = (imm5 >> 2) & 7;
7416 ix2 = (imm4 >> 1) & 7;
7417 }
7418 else if (imm5 & 4) {
7419 ts = 's';
7420 ity = Ity_I32;
7421 ix1 = (imm5 >> 3) & 3;
7422 ix2 = (imm4 >> 2) & 3;
7423 }
7424 else if (imm5 & 8) {
7425 ts = 'd';
7426 ity = Ity_I64;
7427 ix1 = (imm5 >> 4) & 1;
7428 ix2 = (imm4 >> 3) & 1;
7429 }
7430 /* */
7431 if (ity != Ity_INVALID) {
7432 vassert(ix1 < 16);
7433 vassert(ix2 < 16);
7434 putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
7435 DIP("ins %s.%c[%u], %s.%c[%u]\n",
7436 nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
7437 return True;
7438 }
7439 /* invalid */
7440 return False;
sewardjdf1628c2014-06-10 22:52:05 +00007441 }
7442
7443 return False;
7444# undef INSN
7445}
7446
7447
7448static
7449Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
7450{
7451 /* 31 28 18 15 11 9 4
7452 0q op 01111 00000 abc cmode 01 defgh d
sewardj2b6fd5e2014-06-19 14:21:37 +00007453 Decode fields: q,op,cmode
7454 Bit 11 is really "o2", but it is always zero.
sewardjdf1628c2014-06-10 22:52:05 +00007455 */
7456# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7457 if (INSN(31,31) != 0
7458 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
7459 || INSN(11,10) != BITS2(0,1)) {
7460 return False;
7461 }
7462 UInt bitQ = INSN(30,30);
7463 UInt bitOP = INSN(29,29);
7464 UInt cmode = INSN(15,12);
7465 UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
7466 UInt dd = INSN(4,0);
7467
sewardjdf1628c2014-06-10 22:52:05 +00007468 ULong imm64lo = 0;
7469 UInt op_cmode = (bitOP << 4) | cmode;
7470 Bool ok = False;
sewardj2b6fd5e2014-06-19 14:21:37 +00007471 Bool isORR = False;
7472 Bool isBIC = False;
sewardj787a67f2014-06-23 09:09:41 +00007473 Bool isMOV = False;
7474 Bool isMVN = False;
7475 Bool isFMOV = False;
sewardjdf1628c2014-06-10 22:52:05 +00007476 switch (op_cmode) {
sewardj2b6fd5e2014-06-19 14:21:37 +00007477 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
sewardj2b6fd5e2014-06-19 14:21:37 +00007478 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
sewardj787a67f2014-06-23 09:09:41 +00007479 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
7480 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
7481 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
7482 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
7483 ok = True; isMOV = True; break;
sewardj2b6fd5e2014-06-19 14:21:37 +00007484
7485 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
7486 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
7487 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
7488 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
7489 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
7490 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
7491 ok = True; isORR = True; break;
7492
sewardj787a67f2014-06-23 09:09:41 +00007493 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
7494 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
7495 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
7496 ok = True; isMOV = True; break;
7497
7498 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
7499 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
7500 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
7501 ok = True; isORR = True; break;
7502
7503 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
7504 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
7505 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
7506 ok = True; isMOV = True; break;
7507
7508 /* -------- x,0,1110 MOVI 8-bit -------- */
7509 case BITS5(0,1,1,1,0):
7510 ok = True; isMOV = True; break;
7511
7512 /* FMOV (vector, immediate, single precision) */
7513
7514 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
7515 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
7516 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
7517 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
7518 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
7519 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
7520 ok = True; isMVN = True; break;
7521
sewardj2b6fd5e2014-06-19 14:21:37 +00007522 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
7523 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
7524 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
7525 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
7526 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
7527 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
7528 ok = True; isBIC = True; break;
7529
sewardj787a67f2014-06-23 09:09:41 +00007530 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
7531 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
7532 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
7533 ok = True; isMVN = True; break;
7534
7535 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
7536 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
7537 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
7538 ok = True; isBIC = True; break;
7539
7540 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
7541 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
7542 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
7543 ok = True; isMVN = True; break;
7544
7545 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
7546 /* -------- 1,1,1110 MOVI 64-bit vector -------- */
7547 case BITS5(1,1,1,1,0):
7548 ok = True; isMOV = True; break;
7549
7550 /* -------- 1,1,1111 FMOV (vector, immediate) -------- */
7551 case BITS5(1,1,1,1,1): // 1:1111
7552 ok = bitQ == 1; isFMOV = True; break;
7553
sewardjdf1628c2014-06-10 22:52:05 +00007554 default:
7555 break;
7556 }
7557 if (ok) {
sewardj787a67f2014-06-23 09:09:41 +00007558 vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
7559 + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
sewardjdf1628c2014-06-10 22:52:05 +00007560 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
7561 }
7562 if (ok) {
sewardj2b6fd5e2014-06-19 14:21:37 +00007563 if (isORR || isBIC) {
7564 ULong inv
7565 = isORR ? 0ULL : ~0ULL;
7566 IRExpr* immV128
7567 = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
7568 IRExpr* res
7569 = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
sewardj2b6fd5e2014-06-19 14:21:37 +00007570 const HChar* nm = isORR ? "orr" : "bic";
7571 if (bitQ == 0) {
7572 putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
7573 DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
7574 } else {
7575 putQReg128(dd, res);
7576 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
7577 nameQReg128(dd), imm64lo, imm64lo);
7578 }
sewardj787a67f2014-06-23 09:09:41 +00007579 }
7580 else if (isMOV || isMVN || isFMOV) {
7581 if (isMVN) imm64lo = ~imm64lo;
7582 ULong imm64hi = bitQ == 0 ? 0 : imm64lo;
sewardj8e91fd42014-07-11 12:05:47 +00007583 IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
7584 mkU64(imm64lo));
sewardj2b6fd5e2014-06-19 14:21:37 +00007585 putQReg128(dd, immV128);
7586 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
7587 }
sewardjdf1628c2014-06-10 22:52:05 +00007588 return True;
7589 }
7590 /* else fall through */
7591
7592 return False;
7593# undef INSN
7594}
7595
7596
7597static
7598Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
7599{
sewardjab33a7a2014-06-19 22:20:47 +00007600 /* 31 28 20 15 14 10 9 4
7601 01 op 11110000 imm5 0 imm4 1 n d
7602 Decode fields: op,imm4
7603 */
sewardjdf1628c2014-06-10 22:52:05 +00007604# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjab33a7a2014-06-19 22:20:47 +00007605 if (INSN(31,30) != BITS2(0,1)
7606 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
7607 || INSN(15,15) != 0 || INSN(10,10) != 1) {
7608 return False;
7609 }
7610 UInt bitOP = INSN(29,29);
7611 UInt imm5 = INSN(20,16);
7612 UInt imm4 = INSN(14,11);
7613 UInt nn = INSN(9,5);
7614 UInt dd = INSN(4,0);
7615
7616 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
7617 /* -------- 0,0000 DUP (element, scalar) -------- */
7618 IRTemp w0 = newTemp(Ity_I64);
7619 const HChar* arTs = "??";
7620 IRType laneTy = Ity_INVALID;
7621 UInt laneNo = 16; /* invalid */
7622 if (imm5 & 1) {
7623 arTs = "b";
7624 laneNo = (imm5 >> 1) & 15;
7625 laneTy = Ity_I8;
7626 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
7627 }
7628 else if (imm5 & 2) {
7629 arTs = "h";
7630 laneNo = (imm5 >> 2) & 7;
7631 laneTy = Ity_I16;
7632 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
7633 }
7634 else if (imm5 & 4) {
7635 arTs = "s";
7636 laneNo = (imm5 >> 3) & 3;
7637 laneTy = Ity_I32;
7638 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
7639 }
7640 else if (imm5 & 8) {
7641 arTs = "d";
7642 laneNo = (imm5 >> 4) & 1;
7643 laneTy = Ity_I64;
7644 assign(w0, getQRegLane(nn, laneNo, laneTy));
7645 }
7646 else {
7647 /* invalid; leave laneTy unchanged. */
7648 }
7649 /* */
7650 if (laneTy != Ity_INVALID) {
7651 vassert(laneNo < 16);
7652 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
7653 DIP("dup %s, %s.%s[%u]\n",
7654 nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
7655 return True;
7656 }
7657 /* else fall through */
7658 }
7659
sewardjdf1628c2014-06-10 22:52:05 +00007660 return False;
7661# undef INSN
7662}
7663
sewardjfc83d2c2014-06-12 10:15:46 +00007664
sewardjdf1628c2014-06-10 22:52:05 +00007665static
7666Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn)
7667{
sewardjb9aff1e2014-06-15 21:55:33 +00007668 /* 31 28 23 21 16 11 9 4
7669 01 u 11110 sz 11000 opcode 10 n d
7670 Decode fields: u,sz,opcode
7671 */
sewardjdf1628c2014-06-10 22:52:05 +00007672# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjb9aff1e2014-06-15 21:55:33 +00007673 if (INSN(31,30) != BITS2(0,1)
7674 || INSN(28,24) != BITS5(1,1,1,1,0)
7675 || INSN(21,17) != BITS5(1,1,0,0,0)
7676 || INSN(11,10) != BITS2(1,0)) {
7677 return False;
7678 }
7679 UInt bitU = INSN(29,29);
7680 UInt sz = INSN(23,22);
7681 UInt opcode = INSN(16,12);
7682 UInt nn = INSN(9,5);
7683 UInt dd = INSN(4,0);
7684
7685 if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
7686 /* -------- 0,11,11011 ADDP d_2d -------- */
sewardj8e91fd42014-07-11 12:05:47 +00007687 IRTemp xy = newTempV128();
7688 IRTemp xx = newTempV128();
sewardjb9aff1e2014-06-15 21:55:33 +00007689 assign(xy, getQReg128(nn));
7690 assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
7691 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
7692 binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
7693 DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
7694 return True;
7695 }
7696
sewardjdf1628c2014-06-10 22:52:05 +00007697 return False;
7698# undef INSN
7699}
7700
sewardjfc83d2c2014-06-12 10:15:46 +00007701
sewardjdf1628c2014-06-10 22:52:05 +00007702static
7703Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
7704{
7705 /* 31 28 22 18 15 10 9 4
7706 01 u 111110 immh immb opcode 1 n d
7707 Decode fields: u,immh,opcode
7708 */
7709# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7710 if (INSN(31,30) != BITS2(0,1)
7711 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
7712 return False;
7713 }
7714 UInt bitU = INSN(29,29);
7715 UInt immh = INSN(22,19);
7716 UInt immb = INSN(18,16);
7717 UInt opcode = INSN(15,11);
7718 UInt nn = INSN(9,5);
7719 UInt dd = INSN(4,0);
7720 UInt immhb = (immh << 3) | immb;
7721
sewardja6b61f02014-08-17 18:32:14 +00007722 if ((immh & 8) == 8
7723 && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) {
7724 /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
7725 /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
7726 /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
7727 /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
7728 Bool isU = bitU == 1;
7729 Bool isAcc = opcode == BITS5(0,0,0,1,0);
7730 UInt sh = 128 - immhb;
sewardjfc83d2c2014-06-12 10:15:46 +00007731 vassert(sh >= 1 && sh <= 64);
sewardja6b61f02014-08-17 18:32:14 +00007732 IROp op = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
7733 IRExpr* src = getQReg128(nn);
7734 IRTemp shf = newTempV128();
7735 IRTemp res = newTempV128();
7736 if (sh == 64 && isU) {
7737 assign(shf, mkV128(0x0000));
7738 } else {
7739 UInt nudge = 0;
7740 if (sh == 64) {
7741 vassert(!isU);
7742 nudge = 1;
7743 }
7744 assign(shf, binop(op, src, mkU8(sh - nudge)));
7745 }
7746 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
7747 : mkexpr(shf));
7748 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
7749 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
7750 : (isU ? "ushr" : "sshr");
7751 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
7752 return True;
7753 }
7754
7755 if ((immh & 8) == 8
7756 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) {
7757 /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
7758 /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
7759 /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
7760 /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
7761 Bool isU = bitU == 1;
7762 Bool isAcc = opcode == BITS5(0,0,1,1,0);
7763 UInt sh = 128 - immhb;
7764 vassert(sh >= 1 && sh <= 64);
7765 IROp op = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
7766 vassert(sh >= 1 && sh <= 64);
7767 IRExpr* src = getQReg128(nn);
7768 IRTemp imm8 = newTemp(Ity_I8);
7769 assign(imm8, mkU8((UChar)(-sh)));
7770 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
7771 IRTemp shf = newTempV128();
7772 IRTemp res = newTempV128();
7773 assign(shf, binop(op, src, amt));
7774 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
7775 : mkexpr(shf));
7776 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
7777 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
7778 : (isU ? "urshr" : "srshr");
7779 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
sewardjfc83d2c2014-06-12 10:15:46 +00007780 return True;
7781 }
7782
sewardj8e91fd42014-07-11 12:05:47 +00007783 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
7784 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
7785 UInt sh = 128 - immhb;
7786 vassert(sh >= 1 && sh <= 64);
7787 if (sh == 64) {
7788 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
7789 } else {
7790 /* sh is in range 1 .. 63 */
7791 ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
7792 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
7793 IRTemp res = newTempV128();
7794 assign(res, binop(Iop_OrV128,
7795 binop(Iop_AndV128, getQReg128(dd), nmaskV),
7796 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
7797 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
7798 }
7799 DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
7800 return True;
7801 }
7802
sewardjacc29642014-08-15 05:35:35 +00007803 if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
7804 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
7805 UInt sh = immhb - 64;
7806 vassert(sh >= 0 && sh < 64);
7807 putQReg128(dd,
7808 unop(Iop_ZeroHI64ofV128,
7809 sh == 0 ? getQReg128(nn)
7810 : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
7811 DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
7812 return True;
7813 }
7814
sewardj8e91fd42014-07-11 12:05:47 +00007815 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
7816 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
7817 UInt sh = immhb - 64;
7818 vassert(sh >= 0 && sh < 64);
7819 if (sh == 0) {
7820 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
7821 } else {
7822 /* sh is in range 1 .. 63 */
7823 ULong nmask = (1ULL << sh) - 1;
7824 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
7825 IRTemp res = newTempV128();
7826 assign(res, binop(Iop_OrV128,
7827 binop(Iop_AndV128, getQReg128(dd), nmaskV),
7828 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
7829 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
7830 }
7831 DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
7832 return True;
7833 }
7834
sewardjacc29642014-08-15 05:35:35 +00007835 if (opcode == BITS5(0,1,1,1,0)
7836 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
7837 /* -------- 0,01110 SQSHL #imm -------- */
7838 /* -------- 1,01110 UQSHL #imm -------- */
7839 /* -------- 1,01100 SQSHLU #imm -------- */
7840 UInt size = 0;
7841 UInt shift = 0;
7842 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
7843 if (!ok) return False;
7844 vassert(size >= 0 && size <= 3);
7845 /* The shift encoding has opposite sign for the leftwards case.
7846 Adjust shift to compensate. */
7847 UInt lanebits = 8 << size;
7848 shift = lanebits - shift;
7849 vassert(shift >= 0 && shift < lanebits);
7850 const HChar* nm = NULL;
7851 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
7852 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
7853 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
7854 else vassert(0);
7855 IRTemp qDiff1 = IRTemp_INVALID;
7856 IRTemp qDiff2 = IRTemp_INVALID;
7857 IRTemp res = IRTemp_INVALID;
7858 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
7859 /* This relies on the fact that the zeroed out lanes generate zeroed
7860 result lanes and don't saturate, so there's no point in trimming
7861 the resulting res, qDiff1 or qDiff2 values. */
7862 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
7863 putQReg128(dd, mkexpr(res));
7864 updateQCFLAGwithDifference(qDiff1, qDiff2);
7865 const HChar arr = "bhsd"[size];
7866 DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
7867 return True;
7868 }
7869
sewardje741d162014-08-13 13:10:47 +00007870 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
7871 || (bitU == 1
7872 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
7873 /* -------- 0,10010 SQSHRN #imm -------- */
7874 /* -------- 1,10010 UQSHRN #imm -------- */
7875 /* -------- 0,10011 SQRSHRN #imm -------- */
7876 /* -------- 1,10011 UQRSHRN #imm -------- */
7877 /* -------- 1,10000 SQSHRUN #imm -------- */
7878 /* -------- 1,10001 SQRSHRUN #imm -------- */
7879 UInt size = 0;
7880 UInt shift = 0;
7881 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
7882 if (!ok || size == X11) return False;
7883 vassert(size >= X00 && size <= X10);
7884 vassert(shift >= 1 && shift <= (8 << size));
7885 const HChar* nm = "??";
7886 IROp op = Iop_INVALID;
7887 /* Decide on the name and the operation. */
7888 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
7889 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
7890 }
7891 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
7892 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
7893 }
7894 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
7895 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
7896 }
7897 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
7898 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
7899 }
7900 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
7901 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
7902 }
7903 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
7904 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
7905 }
7906 else vassert(0);
7907 /* Compute the result (Q, shifted value) pair. */
7908 IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
7909 IRTemp pair = newTempV128();
7910 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
7911 /* Update the result reg */
7912 IRTemp res64in128 = newTempV128();
7913 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
7914 putQReg128(dd, mkexpr(res64in128));
7915 /* Update the Q flag. */
7916 IRTemp q64q64 = newTempV128();
7917 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
7918 IRTemp z128 = newTempV128();
7919 assign(z128, mkV128(0x0000));
7920 updateQCFLAGwithDifference(q64q64, z128);
7921 /* */
7922 const HChar arrNarrow = "bhsd"[size];
7923 const HChar arrWide = "bhsd"[size+1];
7924 DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
7925 return True;
7926 }
7927
sewardjdf1628c2014-06-10 22:52:05 +00007928# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7929 return False;
7930# undef INSN
7931}
7932
sewardjfc83d2c2014-06-12 10:15:46 +00007933
sewardjdf1628c2014-06-10 22:52:05 +00007934static
7935Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
7936{
sewardj54ffa1d2014-07-22 09:27:49 +00007937 /* 31 29 28 23 21 20 15 11 9 4
7938 01 U 11110 size 1 m opcode 00 n d
7939 Decode fields: u,opcode
7940 */
sewardjdf1628c2014-06-10 22:52:05 +00007941# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj54ffa1d2014-07-22 09:27:49 +00007942 if (INSN(31,30) != BITS2(0,1)
7943 || INSN(28,24) != BITS5(1,1,1,1,0)
7944 || INSN(21,21) != 1
7945 || INSN(11,10) != BITS2(0,0)) {
7946 return False;
7947 }
7948 UInt bitU = INSN(29,29);
7949 UInt size = INSN(23,22);
7950 UInt mm = INSN(20,16);
7951 UInt opcode = INSN(15,12);
7952 UInt nn = INSN(9,5);
7953 UInt dd = INSN(4,0);
7954 vassert(size < 4);
7955
7956 if (bitU == 0
7957 && (opcode == BITS4(1,1,0,1)
7958 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
7959 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
7960 /* -------- 0,1001 SQDMLAL -------- */ // 1
7961 /* -------- 0,1011 SQDMLSL -------- */ // 2
7962 /* Widens, and size refers to the narrowed lanes. */
7963 UInt ks = 3;
7964 switch (opcode) {
7965 case BITS4(1,1,0,1): ks = 0; break;
7966 case BITS4(1,0,0,1): ks = 1; break;
7967 case BITS4(1,0,1,1): ks = 2; break;
7968 default: vassert(0);
7969 }
7970 vassert(ks >= 0 && ks <= 2);
7971 if (size == X00 || size == X11) return False;
7972 vassert(size <= 2);
7973 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
7974 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
7975 newTempsV128_3(&vecN, &vecM, &vecD);
7976 assign(vecN, getQReg128(nn));
7977 assign(vecM, getQReg128(mm));
7978 assign(vecD, getQReg128(dd));
7979 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
7980 False/*!is2*/, size, "mas"[ks],
7981 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
7982 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
7983 putQReg128(dd, unop(opZHI, mkexpr(res)));
7984 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
7985 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
7986 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
7987 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
7988 }
7989 const HChar* nm = ks == 0 ? "sqdmull"
7990 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
7991 const HChar arrNarrow = "bhsd"[size];
7992 const HChar arrWide = "bhsd"[size+1];
7993 DIP("%s %c%d, %c%d, %c%d\n",
7994 nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
7995 return True;
7996 }
7997
sewardjdf1628c2014-06-10 22:52:05 +00007998 return False;
7999# undef INSN
8000}
8001
8002
8003static
8004Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
8005{
8006 /* 31 29 28 23 21 20 15 10 9 4
8007 01 U 11110 size 1 m opcode 1 n d
sewardj51d012a2014-07-21 09:19:50 +00008008 Decode fields: u,size,opcode
sewardjdf1628c2014-06-10 22:52:05 +00008009 */
8010# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8011 if (INSN(31,30) != BITS2(0,1)
8012 || INSN(28,24) != BITS5(1,1,1,1,0)
8013 || INSN(21,21) != 1
8014 || INSN(10,10) != 1) {
8015 return False;
8016 }
8017 UInt bitU = INSN(29,29);
8018 UInt size = INSN(23,22);
8019 UInt mm = INSN(20,16);
8020 UInt opcode = INSN(15,11);
8021 UInt nn = INSN(9,5);
8022 UInt dd = INSN(4,0);
8023 vassert(size < 4);
8024
sewardj51d012a2014-07-21 09:19:50 +00008025 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
8026 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
8027 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
8028 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
8029 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
8030 Bool isADD = opcode == BITS5(0,0,0,0,1);
8031 Bool isU = bitU == 1;
8032 IROp qop = Iop_INVALID;
8033 IROp nop = Iop_INVALID;
8034 if (isADD) {
8035 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
8036 nop = mkVecADD(size);
8037 } else {
8038 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
8039 nop = mkVecSUB(size);
8040 }
8041 IRTemp argL = newTempV128();
8042 IRTemp argR = newTempV128();
8043 IRTemp qres = newTempV128();
8044 IRTemp nres = newTempV128();
8045 assign(argL, getQReg128(nn));
8046 assign(argR, getQReg128(mm));
8047 assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
sewardj257e99f2014-08-03 12:45:19 +00008048 size, binop(qop, mkexpr(argL), mkexpr(argR)))));
sewardj51d012a2014-07-21 09:19:50 +00008049 assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
sewardj257e99f2014-08-03 12:45:19 +00008050 size, binop(nop, mkexpr(argL), mkexpr(argR)))));
sewardj51d012a2014-07-21 09:19:50 +00008051 putQReg128(dd, mkexpr(qres));
8052 updateQCFLAGwithDifference(qres, nres);
8053 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
8054 : (isU ? "uqsub" : "sqsub");
8055 const HChar arr = "bhsd"[size];
sewardj12972182014-08-04 08:09:47 +00008056 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
sewardj51d012a2014-07-21 09:19:50 +00008057 return True;
8058 }
8059
sewardj2b6fd5e2014-06-19 14:21:37 +00008060 if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
8061 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
8062 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
8063 Bool isGT = bitU == 0;
8064 IRExpr* argL = getQReg128(nn);
8065 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00008066 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00008067 assign(res,
8068 isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
8069 : binop(Iop_CmpGT64Ux2, argL, argR));
8070 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8071 DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
8072 nameQRegLO(dd, Ity_I64),
8073 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
8074 return True;
8075 }
8076
8077 if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
8078 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
8079 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
8080 Bool isGE = bitU == 0;
8081 IRExpr* argL = getQReg128(nn);
8082 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00008083 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00008084 assign(res,
8085 isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
8086 : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
8087 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8088 DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
8089 nameQRegLO(dd, Ity_I64),
8090 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
8091 return True;
8092 }
8093
sewardja6b61f02014-08-17 18:32:14 +00008094 if (size == X11 && (opcode == BITS5(0,1,0,0,0)
8095 || opcode == BITS5(0,1,0,1,0))) {
8096 /* -------- 0,xx,01000 SSHL d_d_d -------- */
8097 /* -------- 0,xx,01010 SRSHL d_d_d -------- */
8098 /* -------- 1,xx,01000 USHL d_d_d -------- */
8099 /* -------- 1,xx,01010 URSHL d_d_d -------- */
8100 Bool isU = bitU == 1;
8101 Bool isR = opcode == BITS5(0,1,0,1,0);
8102 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
8103 : (isU ? mkVecSHU(size) : mkVecSHS(size));
8104 IRTemp res = newTempV128();
8105 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
8106 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8107 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
8108 : (isU ? "ushl" : "sshl");
8109 DIP("%s %s, %s, %s\n", nm,
8110 nameQRegLO(dd, Ity_I64),
8111 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
8112 return True;
8113 }
8114
sewardj12972182014-08-04 08:09:47 +00008115 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
8116 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
8117 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
8118 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
8119 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
8120 Bool isU = bitU == 1;
8121 Bool isR = opcode == BITS5(0,1,0,1,1);
8122 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
8123 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
8124 /* This is a bit tricky. Since we're only interested in the lowest
8125 lane of the result, we zero out all the rest in the operands, so
8126 as to ensure that other lanes don't pollute the returned Q value.
8127 This works because it means, for the lanes we don't care about, we
8128 are shifting zero by zero, which can never saturate. */
8129 IRTemp res256 = newTemp(Ity_V256);
8130 IRTemp resSH = newTempV128();
8131 IRTemp resQ = newTempV128();
8132 IRTemp zero = newTempV128();
8133 assign(
8134 res256,
8135 binop(op,
8136 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
8137 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
8138 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
8139 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
8140 assign(zero, mkV128(0x0000));
8141 putQReg128(dd, mkexpr(resSH));
8142 updateQCFLAGwithDifference(resQ, zero);
8143 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
8144 : (isU ? "uqshl" : "sqshl");
8145 const HChar arr = "bhsd"[size];
8146 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
8147 return True;
8148 }
8149
sewardjdf1628c2014-06-10 22:52:05 +00008150 if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
8151 /* -------- 0,11,10000 ADD d_d_d -------- */
8152 /* -------- 1,11,10000 SUB d_d_d -------- */
8153 Bool isSUB = bitU == 1;
8154 IRTemp res = newTemp(Ity_I64);
8155 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
8156 getQRegLane(nn, 0, Ity_I64),
8157 getQRegLane(mm, 0, Ity_I64)));
8158 putQRegLane(dd, 0, mkexpr(res));
8159 putQRegLane(dd, 1, mkU64(0));
8160 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
8161 nameQRegLO(dd, Ity_I64),
8162 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
8163 return True;
8164 }
8165
sewardj2b6fd5e2014-06-19 14:21:37 +00008166 if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
8167 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
8168 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
8169 Bool isEQ = bitU == 1;
8170 IRExpr* argL = getQReg128(nn);
8171 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00008172 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00008173 assign(res,
8174 isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
8175 : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
8176 binop(Iop_AndV128, argL, argR),
8177 mkV128(0x0000))));
8178 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8179 DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
8180 nameQRegLO(dd, Ity_I64),
8181 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
8182 return True;
8183 }
8184
sewardj257e99f2014-08-03 12:45:19 +00008185 if (opcode == BITS5(1,0,1,1,0)) {
8186 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
8187 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
8188 if (size == X00 || size == X11) return False;
8189 Bool isR = bitU == 1;
8190 IRTemp res, sat1q, sat1n, vN, vM;
8191 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
8192 newTempsV128_2(&vN, &vM);
8193 assign(vN, getQReg128(nn));
8194 assign(vM, getQReg128(mm));
8195 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
8196 putQReg128(dd,
8197 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
8198 updateQCFLAGwithDifference(
8199 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
8200 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
8201 const HChar arr = "bhsd"[size];
8202 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
8203 DIP("%s %c%d, %c%d, %c%d\n", nm, arr, dd, arr, nn, arr, mm);
8204 return True;
8205 }
8206
sewardjdf1628c2014-06-10 22:52:05 +00008207 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
8208 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
8209 IRType ity = size == X11 ? Ity_F64 : Ity_F32;
8210 IRTemp res = newTemp(ity);
8211 assign(res, unop(mkABSF(ity),
8212 triop(mkSUBF(ity),
8213 mkexpr(mk_get_IR_rounding_mode()),
8214 getQRegLO(nn,ity), getQRegLO(mm,ity))));
8215 putQReg128(dd, mkV128(0x0000));
8216 putQRegLO(dd, mkexpr(res));
8217 DIP("fabd %s, %s, %s\n",
8218 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
8219 return True;
8220 }
8221
sewardjdf1628c2014-06-10 22:52:05 +00008222 return False;
8223# undef INSN
8224}
8225
8226
8227static
8228Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
8229{
8230 /* 31 29 28 23 21 16 11 9 4
8231 01 U 11110 size 10000 opcode 10 n d
sewardj8e91fd42014-07-11 12:05:47 +00008232 Decode fields: u,size,opcode
sewardjdf1628c2014-06-10 22:52:05 +00008233 */
8234# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8235 if (INSN(31,30) != BITS2(0,1)
8236 || INSN(28,24) != BITS5(1,1,1,1,0)
8237 || INSN(21,17) != BITS5(1,0,0,0,0)
8238 || INSN(11,10) != BITS2(1,0)) {
8239 return False;
8240 }
8241 UInt bitU = INSN(29,29);
8242 UInt size = INSN(23,22);
8243 UInt opcode = INSN(16,12);
8244 UInt nn = INSN(9,5);
8245 UInt dd = INSN(4,0);
8246 vassert(size < 4);
8247
sewardjf7003bc2014-08-18 12:28:02 +00008248 if (opcode == BITS5(0,0,0,1,1)) {
8249 /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
8250 /* -------- 1,xx,00011: USQADD std4_std4 -------- */
8251 /* These are a bit tricky (to say the least). See comments on
8252 the vector variants (in dis_AdvSIMD_two_reg_misc) below for
8253 details. */
8254 Bool isUSQADD = bitU == 1;
8255 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
8256 : mkVecQADDEXTUSSATSS(size);
8257 IROp nop = mkVecADD(size);
8258 IRTemp argL = newTempV128();
8259 IRTemp argR = newTempV128();
8260 assign(argL, getQReg128(nn));
8261 assign(argR, getQReg128(dd));
8262 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
8263 size, binop(qop, mkexpr(argL), mkexpr(argR)));
8264 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
8265 size, binop(nop, mkexpr(argL), mkexpr(argR)));
8266 putQReg128(dd, mkexpr(qres));
8267 updateQCFLAGwithDifference(qres, nres);
8268 const HChar arr = "bhsd"[size];
8269 DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
8270 return True;
8271 }
8272
sewardj51d012a2014-07-21 09:19:50 +00008273 if (opcode == BITS5(0,0,1,1,1)) {
sewardj8e91fd42014-07-11 12:05:47 +00008274 /* -------- 0,xx,00111 SQABS std4_std4 -------- */
sewardj51d012a2014-07-21 09:19:50 +00008275 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
8276 Bool isNEG = bitU == 1;
8277 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
8278 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
8279 getQReg128(nn), size );
sewardj257e99f2014-08-03 12:45:19 +00008280 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
8281 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
sewardj8e91fd42014-07-11 12:05:47 +00008282 putQReg128(dd, mkexpr(qres));
8283 updateQCFLAGwithDifference(qres, nres);
8284 const HChar arr = "bhsd"[size];
sewardj51d012a2014-07-21 09:19:50 +00008285 DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
sewardj8e91fd42014-07-11 12:05:47 +00008286 return True;
8287 }
8288
sewardj2b6fd5e2014-06-19 14:21:37 +00008289 if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
8290 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
8291 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
8292 Bool isGT = bitU == 0;
8293 IRExpr* argL = getQReg128(nn);
8294 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +00008295 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00008296 assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
8297 : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
8298 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8299 DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
8300 return True;
8301 }
8302
8303 if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
8304 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
8305 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
8306 Bool isEQ = bitU == 0;
8307 IRExpr* argL = getQReg128(nn);
8308 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +00008309 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00008310 assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
8311 : unop(Iop_NotV128,
8312 binop(Iop_CmpGT64Sx2, argL, argR)));
8313 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8314 DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
8315 return True;
8316 }
8317
8318 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
8319 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
sewardjdf1628c2014-06-10 22:52:05 +00008320 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
sewardj2b6fd5e2014-06-19 14:21:37 +00008321 binop(Iop_CmpGT64Sx2, mkV128(0x0000),
8322 getQReg128(nn))));
8323 DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
sewardjdf1628c2014-06-10 22:52:05 +00008324 return True;
8325 }
8326
sewardj25523c42014-06-15 19:36:29 +00008327 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
8328 /* -------- 0,11,01011 ABS d_d -------- */
8329 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
8330 unop(Iop_Abs64x2, getQReg128(nn))));
8331 DIP("abs d%u, d%u\n", dd, nn);
8332 return True;
8333 }
8334
8335 if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
8336 /* -------- 1,11,01011 NEG d_d -------- */
8337 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
8338 binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
8339 DIP("neg d%u, d%u\n", dd, nn);
8340 return True;
8341 }
8342
sewardjecedd982014-08-11 14:02:47 +00008343 if (opcode == BITS5(1,0,1,0,0)
8344 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
8345 /* -------- 0,xx,10100: SQXTN -------- */
8346 /* -------- 1,xx,10100: UQXTN -------- */
8347 /* -------- 1,xx,10010: SQXTUN -------- */
8348 if (size == X11) return False;
8349 vassert(size < 3);
8350 IROp opN = Iop_INVALID;
8351 Bool zWiden = True;
8352 const HChar* nm = "??";
8353 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
8354 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
8355 }
8356 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
8357 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
8358 }
8359 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
8360 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
8361 }
8362 else vassert(0);
8363 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
8364 size+1, getQReg128(nn));
8365 IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
8366 size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
8367 putQReg128(dd, mkexpr(resN));
8368 /* This widens zero lanes to zero, and compares it against zero, so all
8369 of the non-participating lanes make no contribution to the
8370 Q flag state. */
8371 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
8372 size, mkexpr(resN));
8373 updateQCFLAGwithDifference(src, resW);
8374 const HChar arrNarrow = "bhsd"[size];
8375 const HChar arrWide = "bhsd"[size+1];
8376 DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
8377 return True;
8378 }
8379
sewardjdf1628c2014-06-10 22:52:05 +00008380# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8381 return False;
8382# undef INSN
8383}
8384
sewardjfc83d2c2014-06-12 10:15:46 +00008385
sewardjdf1628c2014-06-10 22:52:05 +00008386static
8387Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
8388{
sewardj54ffa1d2014-07-22 09:27:49 +00008389 /* 31 28 23 21 20 19 15 11 9 4
8390 01 U 11111 size L M m opcode H 0 n d
8391 Decode fields are: u,size,opcode
8392 M is really part of the mm register number. Individual
8393 cases need to inspect L and H though.
8394 */
sewardjdf1628c2014-06-10 22:52:05 +00008395# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj54ffa1d2014-07-22 09:27:49 +00008396 if (INSN(31,30) != BITS2(0,1)
8397 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) {
8398 return False;
8399 }
8400 UInt bitU = INSN(29,29);
8401 UInt size = INSN(23,22);
8402 UInt bitL = INSN(21,21);
8403 UInt bitM = INSN(20,20);
8404 UInt mmLO4 = INSN(19,16);
8405 UInt opcode = INSN(15,12);
8406 UInt bitH = INSN(11,11);
8407 UInt nn = INSN(9,5);
8408 UInt dd = INSN(4,0);
8409 vassert(size < 4);
8410 vassert(bitH < 2 && bitM < 2 && bitL < 2);
8411
8412 if (bitU == 0
8413 && (opcode == BITS4(1,0,1,1)
8414 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
8415 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
8416 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
8417 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
8418 /* Widens, and size refers to the narrowed lanes. */
8419 UInt ks = 3;
8420 switch (opcode) {
8421 case BITS4(1,0,1,1): ks = 0; break;
8422 case BITS4(0,0,1,1): ks = 1; break;
8423 case BITS4(0,1,1,1): ks = 2; break;
8424 default: vassert(0);
8425 }
8426 vassert(ks >= 0 && ks <= 2);
8427 UInt mm = 32; // invalid
8428 UInt ix = 16; // invalid
8429 switch (size) {
8430 case X00:
8431 return False; // h_b_b[] case is not allowed
8432 case X01:
8433 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
8434 case X10:
8435 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
8436 case X11:
8437 return False; // q_d_d[] case is not allowed
8438 default:
8439 vassert(0);
8440 }
8441 vassert(mm < 32 && ix < 16);
8442 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
8443 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
8444 newTempsV128_2(&vecN, &vecD);
8445 assign(vecN, getQReg128(nn));
8446 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
8447 assign(vecD, getQReg128(dd));
8448 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
8449 False/*!is2*/, size, "mas"[ks],
8450 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
8451 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
8452 putQReg128(dd, unop(opZHI, mkexpr(res)));
8453 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
8454 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
8455 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
8456 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
8457 }
8458 const HChar* nm = ks == 0 ? "sqmull"
8459 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
8460 const HChar arrNarrow = "bhsd"[size];
8461 const HChar arrWide = "bhsd"[size+1];
8462 DIP("%s %c%d, %c%d, v%d.%c[%u]\n",
8463 nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
8464 return True;
8465 }
8466
sewardj257e99f2014-08-03 12:45:19 +00008467 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
8468 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
8469 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
8470 UInt mm = 32; // invalid
8471 UInt ix = 16; // invalid
8472 switch (size) {
8473 case X00:
8474 return False; // b case is not allowed
8475 case X01:
8476 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
8477 case X10:
8478 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
8479 case X11:
8480 return False; // q case is not allowed
8481 default:
8482 vassert(0);
8483 }
8484 vassert(mm < 32 && ix < 16);
8485 Bool isR = opcode == BITS4(1,1,0,1);
8486 IRTemp res, sat1q, sat1n, vN, vM;
8487 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
8488 vN = newTempV128();
8489 assign(vN, getQReg128(nn));
8490 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
8491 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
8492 IROp opZHI = mkVecZEROHIxxOFV128(size);
8493 putQReg128(dd, unop(opZHI, mkexpr(res)));
8494 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
8495 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
8496 HChar ch = size == X01 ? 'h' : 's';
8497 DIP("%s %c%d, %c%d, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, dd, ix);
8498 return True;
8499 }
8500
sewardjdf1628c2014-06-10 22:52:05 +00008501 return False;
8502# undef INSN
8503}
8504
sewardjfc83d2c2014-06-12 10:15:46 +00008505
sewardjdf1628c2014-06-10 22:52:05 +00008506static
8507Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
8508{
8509 /* 31 28 22 18 15 10 9 4
8510 0 q u 011110 immh immb opcode 1 n d
8511 Decode fields: u,opcode
8512 */
8513# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8514 if (INSN(31,31) != 0
8515 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
8516 return False;
8517 }
8518 UInt bitQ = INSN(30,30);
8519 UInt bitU = INSN(29,29);
8520 UInt immh = INSN(22,19);
8521 UInt immb = INSN(18,16);
8522 UInt opcode = INSN(15,11);
8523 UInt nn = INSN(9,5);
8524 UInt dd = INSN(4,0);
8525
sewardja6b61f02014-08-17 18:32:14 +00008526 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) {
sewardjdf1628c2014-06-10 22:52:05 +00008527 /* -------- 0,00000 SSHR std7_std7_#imm -------- */
8528 /* -------- 1,00000 USHR std7_std7_#imm -------- */
sewardja6b61f02014-08-17 18:32:14 +00008529 /* -------- 0,00010 SSRA std7_std7_#imm -------- */
8530 /* -------- 1,00010 USRA std7_std7_#imm -------- */
sewardjdf1628c2014-06-10 22:52:05 +00008531 /* laneTy, shift = case immh:immb of
8532 0001:xxx -> B, SHR:8-xxx
8533 001x:xxx -> H, SHR:16-xxxx
8534 01xx:xxx -> S, SHR:32-xxxxx
8535 1xxx:xxx -> D, SHR:64-xxxxxx
8536 other -> invalid
8537 */
sewardjdf1628c2014-06-10 22:52:05 +00008538 UInt size = 0;
8539 UInt shift = 0;
8540 Bool isQ = bitQ == 1;
8541 Bool isU = bitU == 1;
sewardja6b61f02014-08-17 18:32:14 +00008542 Bool isAcc = opcode == BITS5(0,0,0,1,0);
sewardjdf1628c2014-06-10 22:52:05 +00008543 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
sewardj8e91fd42014-07-11 12:05:47 +00008544 if (!ok || (bitQ == 0 && size == X11)) return False;
sewardjdf1628c2014-06-10 22:52:05 +00008545 vassert(size >= 0 && size <= 3);
sewardj8e91fd42014-07-11 12:05:47 +00008546 UInt lanebits = 8 << size;
8547 vassert(shift >= 1 && shift <= lanebits);
8548 IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size);
8549 IRExpr* src = getQReg128(nn);
sewardja6b61f02014-08-17 18:32:14 +00008550 IRTemp shf = newTempV128();
sewardj8e91fd42014-07-11 12:05:47 +00008551 IRTemp res = newTempV128();
8552 if (shift == lanebits && isU) {
sewardja6b61f02014-08-17 18:32:14 +00008553 assign(shf, mkV128(0x0000));
sewardj8e91fd42014-07-11 12:05:47 +00008554 } else {
8555 UInt nudge = 0;
8556 if (shift == lanebits) {
8557 vassert(!isU);
8558 nudge = 1;
8559 }
sewardja6b61f02014-08-17 18:32:14 +00008560 assign(shf, binop(op, src, mkU8(shift - nudge)));
sewardjdf1628c2014-06-10 22:52:05 +00008561 }
sewardja6b61f02014-08-17 18:32:14 +00008562 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
8563 : mkexpr(shf));
sewardj8e91fd42014-07-11 12:05:47 +00008564 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8565 HChar laneCh = "bhsd"[size];
8566 UInt nLanes = (isQ ? 128 : 64) / lanebits;
sewardja6b61f02014-08-17 18:32:14 +00008567 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
8568 : (isU ? "ushr" : "sshr");
8569 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
8570 nameQReg128(dd), nLanes, laneCh,
8571 nameQReg128(nn), nLanes, laneCh, shift);
8572 return True;
8573 }
8574
8575 if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) {
8576 /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
8577 /* -------- 1,00100 URSHR std7_std7_#imm -------- */
8578 /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
8579 /* -------- 1,00110 URSRA std7_std7_#imm -------- */
8580 /* laneTy, shift = case immh:immb of
8581 0001:xxx -> B, SHR:8-xxx
8582 001x:xxx -> H, SHR:16-xxxx
8583 01xx:xxx -> S, SHR:32-xxxxx
8584 1xxx:xxx -> D, SHR:64-xxxxxx
8585 other -> invalid
8586 */
8587 UInt size = 0;
8588 UInt shift = 0;
8589 Bool isQ = bitQ == 1;
8590 Bool isU = bitU == 1;
8591 Bool isAcc = opcode == BITS5(0,0,1,1,0);
8592 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
8593 if (!ok || (bitQ == 0 && size == X11)) return False;
8594 vassert(size >= 0 && size <= 3);
8595 UInt lanebits = 8 << size;
8596 vassert(shift >= 1 && shift <= lanebits);
8597 IROp op = isU ? mkVecRSHU(size) : mkVecRSHS(size);
8598 IRExpr* src = getQReg128(nn);
8599 IRTemp imm8 = newTemp(Ity_I8);
8600 assign(imm8, mkU8((UChar)(-shift)));
8601 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
8602 IRTemp shf = newTempV128();
8603 IRTemp res = newTempV128();
8604 assign(shf, binop(op, src, amt));
8605 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
8606 : mkexpr(shf));
8607 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8608 HChar laneCh = "bhsd"[size];
8609 UInt nLanes = (isQ ? 128 : 64) / lanebits;
8610 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
8611 : (isU ? "urshr" : "srshr");
sewardj8e91fd42014-07-11 12:05:47 +00008612 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
8613 nameQReg128(dd), nLanes, laneCh,
8614 nameQReg128(nn), nLanes, laneCh, shift);
8615 return True;
sewardjdf1628c2014-06-10 22:52:05 +00008616 }
8617
sewardj8e91fd42014-07-11 12:05:47 +00008618 if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
8619 /* -------- 1,01000 SRI std7_std7_#imm -------- */
8620 /* laneTy, shift = case immh:immb of
8621 0001:xxx -> B, SHR:8-xxx
8622 001x:xxx -> H, SHR:16-xxxx
8623 01xx:xxx -> S, SHR:32-xxxxx
8624 1xxx:xxx -> D, SHR:64-xxxxxx
8625 other -> invalid
8626 */
8627 UInt size = 0;
8628 UInt shift = 0;
8629 Bool isQ = bitQ == 1;
8630 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
8631 if (!ok || (bitQ == 0 && size == X11)) return False;
8632 vassert(size >= 0 && size <= 3);
8633 UInt lanebits = 8 << size;
8634 vassert(shift >= 1 && shift <= lanebits);
8635 IRExpr* src = getQReg128(nn);
8636 IRTemp res = newTempV128();
8637 if (shift == lanebits) {
8638 assign(res, getQReg128(dd));
8639 } else {
8640 assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
8641 IRExpr* nmask = binop(mkVecSHLN(size),
8642 mkV128(0xFFFF), mkU8(lanebits - shift));
8643 IRTemp tmp = newTempV128();
8644 assign(tmp, binop(Iop_OrV128,
8645 mkexpr(res),
8646 binop(Iop_AndV128, getQReg128(dd), nmask)));
8647 res = tmp;
8648 }
8649 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8650 HChar laneCh = "bhsd"[size];
8651 UInt nLanes = (isQ ? 128 : 64) / lanebits;
8652 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
8653 nameQReg128(dd), nLanes, laneCh,
8654 nameQReg128(nn), nLanes, laneCh, shift);
8655 return True;
8656 }
8657
8658 if (opcode == BITS5(0,1,0,1,0)) {
sewardjdf1628c2014-06-10 22:52:05 +00008659 /* -------- 0,01010 SHL std7_std7_#imm -------- */
sewardj8e91fd42014-07-11 12:05:47 +00008660 /* -------- 1,01010 SLI std7_std7_#imm -------- */
sewardjdf1628c2014-06-10 22:52:05 +00008661 /* laneTy, shift = case immh:immb of
8662 0001:xxx -> B, xxx
8663 001x:xxx -> H, xxxx
8664 01xx:xxx -> S, xxxxx
8665 1xxx:xxx -> D, xxxxxx
8666 other -> invalid
8667 */
sewardjdf1628c2014-06-10 22:52:05 +00008668 UInt size = 0;
8669 UInt shift = 0;
sewardj8e91fd42014-07-11 12:05:47 +00008670 Bool isSLI = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +00008671 Bool isQ = bitQ == 1;
8672 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
sewardj8e91fd42014-07-11 12:05:47 +00008673 if (!ok || (bitQ == 0 && size == X11)) return False;
sewardjdf1628c2014-06-10 22:52:05 +00008674 vassert(size >= 0 && size <= 3);
8675 /* The shift encoding has opposite sign for the leftwards case.
8676 Adjust shift to compensate. */
sewardj8e91fd42014-07-11 12:05:47 +00008677 UInt lanebits = 8 << size;
8678 shift = lanebits - shift;
8679 vassert(shift >= 0 && shift < lanebits);
8680 IROp op = mkVecSHLN(size);
8681 IRExpr* src = getQReg128(nn);
8682 IRTemp res = newTempV128();
8683 if (shift == 0) {
8684 assign(res, src);
8685 } else {
sewardjdf9d6d52014-06-27 10:43:22 +00008686 assign(res, binop(op, src, mkU8(shift)));
sewardj8e91fd42014-07-11 12:05:47 +00008687 if (isSLI) {
8688 IRExpr* nmask = binop(mkVecSHRN(size),
8689 mkV128(0xFFFF), mkU8(lanebits - shift));
8690 IRTemp tmp = newTempV128();
8691 assign(tmp, binop(Iop_OrV128,
8692 mkexpr(res),
8693 binop(Iop_AndV128, getQReg128(dd), nmask)));
8694 res = tmp;
8695 }
sewardjdf1628c2014-06-10 22:52:05 +00008696 }
sewardj8e91fd42014-07-11 12:05:47 +00008697 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8698 HChar laneCh = "bhsd"[size];
8699 UInt nLanes = (isQ ? 128 : 64) / lanebits;
8700 const HChar* nm = isSLI ? "sli" : "shl";
8701 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
8702 nameQReg128(dd), nLanes, laneCh,
8703 nameQReg128(nn), nLanes, laneCh, shift);
8704 return True;
sewardjdf1628c2014-06-10 22:52:05 +00008705 }
8706
sewardja97dddf2014-08-14 22:26:52 +00008707 if (opcode == BITS5(0,1,1,1,0)
8708 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
8709 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
8710 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
8711 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
8712 UInt size = 0;
8713 UInt shift = 0;
8714 Bool isQ = bitQ == 1;
8715 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
8716 if (!ok || (bitQ == 0 && size == X11)) return False;
8717 vassert(size >= 0 && size <= 3);
8718 /* The shift encoding has opposite sign for the leftwards case.
8719 Adjust shift to compensate. */
8720 UInt lanebits = 8 << size;
8721 shift = lanebits - shift;
8722 vassert(shift >= 0 && shift < lanebits);
8723 const HChar* nm = NULL;
8724 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
8725 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
8726 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
8727 else vassert(0);
8728 IRTemp qDiff1 = IRTemp_INVALID;
8729 IRTemp qDiff2 = IRTemp_INVALID;
8730 IRTemp res = IRTemp_INVALID;
8731 IRTemp src = newTempV128();
8732 assign(src, getQReg128(nn));
8733 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
8734 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8735 updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
sewardjacc29642014-08-15 05:35:35 +00008736 isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
sewardja97dddf2014-08-14 22:26:52 +00008737 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8738 DIP("%s %s.%s, %s.%s, #%u\n", nm,
8739 nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
8740 return True;
8741 }
8742
sewardj487559e2014-07-10 14:22:45 +00008743 if (bitU == 0
8744 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
8745 /* -------- 0,10000 SHRN{,2} #imm -------- */
8746 /* -------- 0,10001 RSHRN{,2} #imm -------- */
8747 /* Narrows, and size is the narrow size. */
8748 UInt size = 0;
8749 UInt shift = 0;
8750 Bool is2 = bitQ == 1;
8751 Bool isR = opcode == BITS5(1,0,0,0,1);
8752 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
8753 if (!ok || size == X11) return False;
8754 vassert(shift >= 1);
sewardj8e91fd42014-07-11 12:05:47 +00008755 IRTemp t1 = newTempV128();
8756 IRTemp t2 = newTempV128();
8757 IRTemp t3 = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00008758 assign(t1, getQReg128(nn));
8759 assign(t2, isR ? binop(mkVecADD(size+1),
8760 mkexpr(t1),
8761 mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
8762 : mkexpr(t1));
8763 assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
8764 IRTemp t4 = math_NARROW_LANES(t3, t3, size);
8765 putLO64andZUorPutHI64(is2, dd, t4);
8766 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8767 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8768 DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
8769 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
8770 return True;
8771 }
8772
sewardjecedd982014-08-11 14:02:47 +00008773 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
8774 || (bitU == 1
8775 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
8776 /* -------- 0,10010 SQSHRN{,2} #imm -------- */
8777 /* -------- 1,10010 UQSHRN{,2} #imm -------- */
8778 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
8779 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
8780 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
8781 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
8782 UInt size = 0;
8783 UInt shift = 0;
8784 Bool is2 = bitQ == 1;
8785 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
8786 if (!ok || size == X11) return False;
8787 vassert(shift >= 1 && shift <= (8 << size));
8788 const HChar* nm = "??";
8789 IROp op = Iop_INVALID;
8790 /* Decide on the name and the operation. */
8791 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
8792 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
8793 }
8794 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
8795 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
8796 }
8797 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
8798 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
8799 }
8800 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
8801 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
8802 }
8803 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
8804 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
8805 }
8806 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
8807 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
8808 }
8809 else vassert(0);
8810 /* Compute the result (Q, shifted value) pair. */
8811 IRTemp src128 = newTempV128();
8812 assign(src128, getQReg128(nn));
8813 IRTemp pair = newTempV128();
8814 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
8815 /* Update the result reg */
8816 IRTemp res64in128 = newTempV128();
8817 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
8818 putLO64andZUorPutHI64(is2, dd, res64in128);
8819 /* Update the Q flag. */
8820 IRTemp q64q64 = newTempV128();
8821 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
8822 IRTemp z128 = newTempV128();
8823 assign(z128, mkV128(0x0000));
8824 updateQCFLAGwithDifference(q64q64, z128);
8825 /* */
8826 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8827 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8828 DIP("%s %s.%s, %s.%s, #%u\n", nm,
8829 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
8830 return True;
8831 }
8832
sewardjdf1628c2014-06-10 22:52:05 +00008833 if (opcode == BITS5(1,0,1,0,0)) {
8834 /* -------- 0,10100 SSHLL{,2} #imm -------- */
8835 /* -------- 1,10100 USHLL{,2} #imm -------- */
8836 /* 31 28 22 18 15 9 4
8837 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
8838 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
8839 where Ta,Tb,sh
8840 = case immh of 1xxx -> invalid
8841 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
8842 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
8843 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
8844 0000 -> AdvSIMD modified immediate (???)
8845 */
8846 Bool isQ = bitQ == 1;
8847 Bool isU = bitU == 1;
8848 UInt immhb = (immh << 3) | immb;
sewardj8e91fd42014-07-11 12:05:47 +00008849 IRTemp src = newTempV128();
8850 IRTemp zero = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008851 IRExpr* res = NULL;
8852 UInt sh = 0;
8853 const HChar* ta = "??";
8854 const HChar* tb = "??";
8855 assign(src, getQReg128(nn));
8856 assign(zero, mkV128(0x0000));
8857 if (immh & 8) {
8858 /* invalid; don't assign to res */
8859 }
8860 else if (immh & 4) {
8861 sh = immhb - 32;
8862 vassert(sh < 32); /* so 32-sh is 1..32 */
8863 ta = "2d";
8864 tb = isQ ? "4s" : "2s";
8865 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
8866 : mk_InterleaveLO32x4(src, zero);
8867 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
8868 }
8869 else if (immh & 2) {
8870 sh = immhb - 16;
8871 vassert(sh < 16); /* so 16-sh is 1..16 */
8872 ta = "4s";
8873 tb = isQ ? "8h" : "4h";
8874 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
8875 : mk_InterleaveLO16x8(src, zero);
8876 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
8877 }
8878 else if (immh & 1) {
8879 sh = immhb - 8;
8880 vassert(sh < 8); /* so 8-sh is 1..8 */
8881 ta = "8h";
8882 tb = isQ ? "16b" : "8b";
8883 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
8884 : mk_InterleaveLO8x16(src, zero);
8885 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
8886 } else {
8887 vassert(immh == 0);
8888 /* invalid; don't assign to res */
8889 }
8890 /* */
8891 if (res) {
8892 putQReg128(dd, res);
8893 DIP("%cshll%s %s.%s, %s.%s, #%d\n",
8894 isU ? 'u' : 's', isQ ? "2" : "",
8895 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
8896 return True;
8897 }
8898 return False;
8899 }
8900
8901# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8902 return False;
8903# undef INSN
8904}
8905
sewardjfc83d2c2014-06-12 10:15:46 +00008906
sewardjdf1628c2014-06-10 22:52:05 +00008907static
8908Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
8909{
sewardj25523c42014-06-15 19:36:29 +00008910 /* 31 30 29 28 23 21 20 15 11 9 4
8911 0 Q U 01110 size 1 m opcode 00 n d
8912 Decode fields: u,opcode
8913 */
sewardjdf1628c2014-06-10 22:52:05 +00008914# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj25523c42014-06-15 19:36:29 +00008915 if (INSN(31,31) != 0
8916 || INSN(28,24) != BITS5(0,1,1,1,0)
8917 || INSN(21,21) != 1
8918 || INSN(11,10) != BITS2(0,0)) {
8919 return False;
8920 }
8921 UInt bitQ = INSN(30,30);
8922 UInt bitU = INSN(29,29);
8923 UInt size = INSN(23,22);
8924 UInt mm = INSN(20,16);
8925 UInt opcode = INSN(15,12);
8926 UInt nn = INSN(9,5);
8927 UInt dd = INSN(4,0);
8928 vassert(size < 4);
8929 Bool is2 = bitQ == 1;
8930
sewardj6f312d02014-06-28 12:21:37 +00008931 if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
8932 /* -------- 0,0000 SADDL{2} -------- */
8933 /* -------- 1,0000 UADDL{2} -------- */
8934 /* -------- 0,0010 SSUBL{2} -------- */
8935 /* -------- 1,0010 USUBL{2} -------- */
8936 /* Widens, and size refers to the narrowed lanes. */
sewardj6f312d02014-06-28 12:21:37 +00008937 if (size == X11) return False;
8938 vassert(size <= 2);
8939 Bool isU = bitU == 1;
8940 Bool isADD = opcode == BITS4(0,0,0,0);
sewardja5a6b752014-06-30 07:33:56 +00008941 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
8942 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +00008943 IRTemp res = newTempV128();
sewardj54ffa1d2014-07-22 09:27:49 +00008944 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
sewardj6f312d02014-06-28 12:21:37 +00008945 mkexpr(argL), mkexpr(argR)));
8946 putQReg128(dd, mkexpr(res));
8947 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8948 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8949 const HChar* nm = isADD ? (isU ? "uaddl" : "saddl")
8950 : (isU ? "usubl" : "ssubl");
8951 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
8952 nameQReg128(dd), arrWide,
8953 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
8954 return True;
8955 }
8956
sewardja5a6b752014-06-30 07:33:56 +00008957 if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
8958 /* -------- 0,0001 SADDW{2} -------- */
8959 /* -------- 1,0001 UADDW{2} -------- */
8960 /* -------- 0,0011 SSUBW{2} -------- */
8961 /* -------- 1,0011 USUBW{2} -------- */
8962 /* Widens, and size refers to the narrowed lanes. */
8963 if (size == X11) return False;
8964 vassert(size <= 2);
8965 Bool isU = bitU == 1;
8966 Bool isADD = opcode == BITS4(0,0,0,1);
8967 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +00008968 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00008969 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
8970 getQReg128(nn), mkexpr(argR)));
8971 putQReg128(dd, mkexpr(res));
8972 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8973 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8974 const HChar* nm = isADD ? (isU ? "uaddw" : "saddw")
8975 : (isU ? "usubw" : "ssubw");
8976 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
8977 nameQReg128(dd), arrWide,
8978 nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
8979 return True;
8980 }
8981
sewardj25523c42014-06-15 19:36:29 +00008982 if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
8983 /* -------- 0,0100 ADDHN{2} -------- */
8984 /* -------- 1,0100 RADDHN{2} -------- */
8985 /* -------- 0,0110 SUBHN{2} -------- */
8986 /* -------- 1,0110 RSUBHN{2} -------- */
8987 /* Narrows, and size refers to the narrowed lanes. */
8988 if (size == X11) return False;
8989 vassert(size <= 2);
sewardj487559e2014-07-10 14:22:45 +00008990 const UInt shift[3] = { 8, 16, 32 };
sewardj25523c42014-06-15 19:36:29 +00008991 Bool isADD = opcode == BITS4(0,1,0,0);
8992 Bool isR = bitU == 1;
8993 /* Combined elements in wide lanes */
sewardj8e91fd42014-07-11 12:05:47 +00008994 IRTemp wide = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00008995 IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
sewardj25523c42014-06-15 19:36:29 +00008996 getQReg128(nn), getQReg128(mm));
8997 if (isR) {
sewardj487559e2014-07-10 14:22:45 +00008998 wideE = binop(mkVecADD(size+1),
8999 wideE,
9000 mkexpr(math_VEC_DUP_IMM(size+1,
9001 1ULL << (shift[size]-1))));
sewardj25523c42014-06-15 19:36:29 +00009002 }
9003 assign(wide, wideE);
9004 /* Top halves of elements, still in wide lanes */
sewardj8e91fd42014-07-11 12:05:47 +00009005 IRTemp shrd = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00009006 assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
sewardj25523c42014-06-15 19:36:29 +00009007 /* Elements now compacted into lower 64 bits */
sewardj8e91fd42014-07-11 12:05:47 +00009008 IRTemp new64 = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00009009 assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
sewardj25523c42014-06-15 19:36:29 +00009010 putLO64andZUorPutHI64(is2, dd, new64);
9011 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9012 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
9013 const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
9014 : (isR ? "rsubhn" : "subhn");
9015 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
9016 nameQReg128(dd), arrNarrow,
9017 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
9018 return True;
9019 }
9020
sewardj6f312d02014-06-28 12:21:37 +00009021 if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
9022 /* -------- 0,0101 SABAL{2} -------- */
9023 /* -------- 1,0101 UABAL{2} -------- */
9024 /* -------- 0,0111 SABDL{2} -------- */
9025 /* -------- 1,0111 UABDL{2} -------- */
9026 /* Widens, and size refers to the narrowed lanes. */
sewardj6f312d02014-06-28 12:21:37 +00009027 if (size == X11) return False;
9028 vassert(size <= 2);
9029 Bool isU = bitU == 1;
9030 Bool isACC = opcode == BITS4(0,1,0,1);
sewardja5a6b752014-06-30 07:33:56 +00009031 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
9032 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj6f312d02014-06-28 12:21:37 +00009033 IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
sewardj8e91fd42014-07-11 12:05:47 +00009034 IRTemp res = newTempV128();
9035 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
sewardj6f312d02014-06-28 12:21:37 +00009036 : mkexpr(abd));
9037 putQReg128(dd, mkexpr(res));
9038 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9039 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
9040 const HChar* nm = isACC ? (isU ? "uabal" : "sabal")
9041 : (isU ? "uabdl" : "sabdl");
9042 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
9043 nameQReg128(dd), arrWide,
9044 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
9045 return True;
9046 }
9047
9048 if (opcode == BITS4(1,1,0,0)
9049 || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
sewardj487559e2014-07-10 14:22:45 +00009050 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
sewardj6f312d02014-06-28 12:21:37 +00009051 /* -------- 1,1100 UMULL{2} -------- */ // 0
9052 /* -------- 0,1000 SMLAL{2} -------- */ // 1
9053 /* -------- 1,1000 UMLAL{2} -------- */ // 1
9054 /* -------- 0,1010 SMLSL{2} -------- */ // 2
9055 /* -------- 1,1010 UMLSL{2} -------- */ // 2
9056 /* Widens, and size refers to the narrowed lanes. */
sewardj487559e2014-07-10 14:22:45 +00009057 UInt ks = 3;
sewardj6f312d02014-06-28 12:21:37 +00009058 switch (opcode) {
sewardj487559e2014-07-10 14:22:45 +00009059 case BITS4(1,1,0,0): ks = 0; break;
9060 case BITS4(1,0,0,0): ks = 1; break;
9061 case BITS4(1,0,1,0): ks = 2; break;
sewardj6f312d02014-06-28 12:21:37 +00009062 default: vassert(0);
9063 }
sewardj487559e2014-07-10 14:22:45 +00009064 vassert(ks >= 0 && ks <= 2);
sewardj6f312d02014-06-28 12:21:37 +00009065 if (size == X11) return False;
9066 vassert(size <= 2);
sewardj51d012a2014-07-21 09:19:50 +00009067 Bool isU = bitU == 1;
9068 IRTemp vecN = newTempV128();
9069 IRTemp vecM = newTempV128();
9070 IRTemp vecD = newTempV128();
9071 assign(vecN, getQReg128(nn));
9072 assign(vecM, getQReg128(mm));
9073 assign(vecD, getQReg128(dd));
9074 IRTemp res = IRTemp_INVALID;
9075 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
9076 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
sewardj6f312d02014-06-28 12:21:37 +00009077 putQReg128(dd, mkexpr(res));
9078 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9079 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
sewardj487559e2014-07-10 14:22:45 +00009080 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
sewardj6f312d02014-06-28 12:21:37 +00009081 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
9082 nameQReg128(dd), arrWide,
9083 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
9084 return True;
9085 }
9086
sewardj54ffa1d2014-07-22 09:27:49 +00009087 if (bitU == 0
9088 && (opcode == BITS4(1,1,0,1)
9089 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
9090 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
9091 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1
9092 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2
9093 /* Widens, and size refers to the narrowed lanes. */
9094 UInt ks = 3;
9095 switch (opcode) {
9096 case BITS4(1,1,0,1): ks = 0; break;
9097 case BITS4(1,0,0,1): ks = 1; break;
9098 case BITS4(1,0,1,1): ks = 2; break;
9099 default: vassert(0);
9100 }
9101 vassert(ks >= 0 && ks <= 2);
9102 if (size == X00 || size == X11) return False;
9103 vassert(size <= 2);
9104 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
9105 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
9106 newTempsV128_3(&vecN, &vecM, &vecD);
9107 assign(vecN, getQReg128(nn));
9108 assign(vecM, getQReg128(mm));
9109 assign(vecD, getQReg128(dd));
9110 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
9111 is2, size, "mas"[ks],
9112 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
9113 putQReg128(dd, mkexpr(res));
9114 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
9115 updateQCFLAGwithDifference(sat1q, sat1n);
9116 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
9117 updateQCFLAGwithDifference(sat2q, sat2n);
9118 }
9119 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9120 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
9121 const HChar* nm = ks == 0 ? "sqdmull"
9122 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
9123 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
9124 nameQReg128(dd), arrWide,
9125 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
9126 return True;
9127 }
9128
sewardj31b5a952014-06-26 07:41:14 +00009129 if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
9130 /* -------- 0,1110 PMULL{2} -------- */
sewardj6f312d02014-06-28 12:21:37 +00009131 /* Widens, and size refers to the narrowed lanes. */
sewardj31b5a952014-06-26 07:41:14 +00009132 if (size != X00) return False;
9133 IRTemp res
9134 = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
9135 getQReg128(nn), getQReg128(mm));
9136 putQReg128(dd, mkexpr(res));
9137 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9138 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
9139 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
9140 nameQReg128(dd), arrNarrow,
9141 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
9142 return True;
9143 }
9144
sewardjdf1628c2014-06-10 22:52:05 +00009145 return False;
9146# undef INSN
9147}
9148
9149
9150static
9151Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
9152{
9153 /* 31 30 29 28 23 21 20 15 10 9 4
9154 0 Q U 01110 size 1 m opcode 1 n d
9155 Decode fields: u,size,opcode
9156 */
9157# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9158 if (INSN(31,31) != 0
9159 || INSN(28,24) != BITS5(0,1,1,1,0)
9160 || INSN(21,21) != 1
9161 || INSN(10,10) != 1) {
9162 return False;
9163 }
9164 UInt bitQ = INSN(30,30);
9165 UInt bitU = INSN(29,29);
9166 UInt size = INSN(23,22);
9167 UInt mm = INSN(20,16);
9168 UInt opcode = INSN(15,11);
9169 UInt nn = INSN(9,5);
9170 UInt dd = INSN(4,0);
9171 vassert(size < 4);
9172
sewardja5a6b752014-06-30 07:33:56 +00009173 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
9174 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
9175 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
9176 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
9177 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
9178 if (size == X11) return False;
9179 Bool isADD = opcode == BITS5(0,0,0,0,0);
9180 Bool isU = bitU == 1;
9181 /* Widen both args out, do the math, narrow to final result. */
sewardj8e91fd42014-07-11 12:05:47 +00009182 IRTemp argL = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009183 IRTemp argLhi = IRTemp_INVALID;
9184 IRTemp argLlo = IRTemp_INVALID;
sewardj8e91fd42014-07-11 12:05:47 +00009185 IRTemp argR = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009186 IRTemp argRhi = IRTemp_INVALID;
9187 IRTemp argRlo = IRTemp_INVALID;
sewardj8e91fd42014-07-11 12:05:47 +00009188 IRTemp resHi = newTempV128();
9189 IRTemp resLo = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009190 IRTemp res = IRTemp_INVALID;
9191 assign(argL, getQReg128(nn));
9192 argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
9193 argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL));
9194 assign(argR, getQReg128(mm));
9195 argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
9196 argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR));
9197 IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
9198 IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
9199 assign(resHi, binop(opSxR,
9200 binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
9201 mkU8(1)));
9202 assign(resLo, binop(opSxR,
9203 binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
9204 mkU8(1)));
9205 res = math_NARROW_LANES ( resHi, resLo, size );
9206 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9207 const HChar* nm = isADD ? (isU ? "uhadd" : "shadd")
9208 : (isU ? "uhsub" : "shsub");
9209 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9210 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9211 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9212 return True;
9213 }
9214
sewardj62ece662014-08-17 19:59:09 +00009215 if (opcode == BITS5(0,0,0,1,0)) {
9216 /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
9217 /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
9218 if (bitQ == 0 && size == X11) return False; // implied 1d case
9219 Bool isU = bitU == 1;
9220 IRTemp argL = newTempV128();
9221 IRTemp argR = newTempV128();
9222 assign(argL, getQReg128(nn));
9223 assign(argR, getQReg128(mm));
9224 IRTemp res = math_RHADD(size, isU, argL, argR);
9225 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9226 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9227 DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
9228 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9229 return True;
9230 }
9231
sewardja5a6b752014-06-30 07:33:56 +00009232 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
9233 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
9234 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
9235 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
9236 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
9237 if (bitQ == 0 && size == X11) return False; // implied 1d case
9238 Bool isADD = opcode == BITS5(0,0,0,0,1);
9239 Bool isU = bitU == 1;
9240 IROp qop = Iop_INVALID;
9241 IROp nop = Iop_INVALID;
9242 if (isADD) {
9243 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
9244 nop = mkVecADD(size);
9245 } else {
9246 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
9247 nop = mkVecSUB(size);
9248 }
sewardj8e91fd42014-07-11 12:05:47 +00009249 IRTemp argL = newTempV128();
9250 IRTemp argR = newTempV128();
9251 IRTemp qres = newTempV128();
9252 IRTemp nres = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009253 assign(argL, getQReg128(nn));
9254 assign(argR, getQReg128(mm));
9255 assign(qres, math_MAYBE_ZERO_HI64_fromE(
9256 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
9257 assign(nres, math_MAYBE_ZERO_HI64_fromE(
9258 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
9259 putQReg128(dd, mkexpr(qres));
sewardj8e91fd42014-07-11 12:05:47 +00009260 updateQCFLAGwithDifference(qres, nres);
sewardja5a6b752014-06-30 07:33:56 +00009261 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
9262 : (isU ? "uqsub" : "sqsub");
9263 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9264 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9265 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9266 return True;
9267 }
9268
sewardjdf1628c2014-06-10 22:52:05 +00009269 if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
9270 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
9271 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
9272 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
9273 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
sewardjdf9d6d52014-06-27 10:43:22 +00009274 Bool isORx = (size & 2) == 2;
sewardjdf1628c2014-06-10 22:52:05 +00009275 Bool invert = (size & 1) == 1;
sewardj8e91fd42014-07-11 12:05:47 +00009276 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +00009277 assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
sewardjdf1628c2014-06-10 22:52:05 +00009278 getQReg128(nn),
9279 invert ? unop(Iop_NotV128, getQReg128(mm))
9280 : getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009281 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009282 const HChar* names[4] = { "and", "bic", "orr", "orn" };
sewardjdf9d6d52014-06-27 10:43:22 +00009283 const HChar* ar = bitQ == 1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +00009284 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
9285 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
9286 return True;
9287 }
9288
9289 if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
9290 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
9291 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
9292 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
9293 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00009294 IRTemp argD = newTempV128();
9295 IRTemp argN = newTempV128();
9296 IRTemp argM = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009297 assign(argD, getQReg128(dd));
9298 assign(argN, getQReg128(nn));
9299 assign(argM, getQReg128(mm));
9300 const IROp opXOR = Iop_XorV128;
9301 const IROp opAND = Iop_AndV128;
9302 const IROp opNOT = Iop_NotV128;
sewardj8e91fd42014-07-11 12:05:47 +00009303 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009304 switch (size) {
9305 case BITS2(0,0): /* EOR */
sewardjdf9d6d52014-06-27 10:43:22 +00009306 assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
sewardjdf1628c2014-06-10 22:52:05 +00009307 break;
9308 case BITS2(0,1): /* BSL */
sewardjdf9d6d52014-06-27 10:43:22 +00009309 assign(res, binop(opXOR, mkexpr(argM),
9310 binop(opAND,
9311 binop(opXOR, mkexpr(argM), mkexpr(argN)),
9312 mkexpr(argD))));
sewardjdf1628c2014-06-10 22:52:05 +00009313 break;
9314 case BITS2(1,0): /* BIT */
sewardjdf9d6d52014-06-27 10:43:22 +00009315 assign(res, binop(opXOR, mkexpr(argD),
9316 binop(opAND,
9317 binop(opXOR, mkexpr(argD), mkexpr(argN)),
9318 mkexpr(argM))));
sewardjdf1628c2014-06-10 22:52:05 +00009319 break;
9320 case BITS2(1,1): /* BIF */
sewardjdf9d6d52014-06-27 10:43:22 +00009321 assign(res, binop(opXOR, mkexpr(argD),
9322 binop(opAND,
9323 binop(opXOR, mkexpr(argD), mkexpr(argN)),
9324 unop(opNOT, mkexpr(argM)))));
sewardjdf1628c2014-06-10 22:52:05 +00009325 break;
9326 default:
9327 vassert(0);
9328 }
sewardjdf9d6d52014-06-27 10:43:22 +00009329 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009330 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
sewardjdf9d6d52014-06-27 10:43:22 +00009331 const HChar* arr = bitQ == 1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +00009332 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
9333 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9334 return True;
9335 }
9336
9337 if (opcode == BITS5(0,0,1,1,0)) {
9338 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
9339 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
9340 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009341 Bool isGT = bitU == 0;
sewardjdf1628c2014-06-10 22:52:05 +00009342 IRExpr* argL = getQReg128(nn);
9343 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00009344 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009345 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +00009346 isGT ? binop(mkVecCMPGTS(size), argL, argR)
9347 : binop(mkVecCMPGTU(size), argL, argR));
sewardjdf9d6d52014-06-27 10:43:22 +00009348 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009349 const HChar* nm = isGT ? "cmgt" : "cmhi";
9350 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9351 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9352 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9353 return True;
9354 }
9355
9356 if (opcode == BITS5(0,0,1,1,1)) {
9357 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
9358 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
9359 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009360 Bool isGE = bitU == 0;
sewardjdf1628c2014-06-10 22:52:05 +00009361 IRExpr* argL = getQReg128(nn);
9362 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00009363 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009364 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +00009365 isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
9366 : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
sewardjdf9d6d52014-06-27 10:43:22 +00009367 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009368 const HChar* nm = isGE ? "cmge" : "cmhs";
9369 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9370 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9371 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9372 return True;
9373 }
9374
sewardja6b61f02014-08-17 18:32:14 +00009375 if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) {
9376 /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
9377 /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
9378 /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
9379 /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
9380 if (bitQ == 0 && size == X11) return False; // implied 1d case
9381 Bool isU = bitU == 1;
9382 Bool isR = opcode == BITS5(0,1,0,1,0);
9383 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
9384 : (isU ? mkVecSHU(size) : mkVecSHS(size));
9385 IRTemp res = newTempV128();
9386 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
9387 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9388 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
9389 : (isU ? "ushl" : "sshl");
9390 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9391 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9392 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9393 return True;
9394 }
9395
sewardj12972182014-08-04 08:09:47 +00009396 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
9397 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
9398 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
9399 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
9400 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
9401 if (bitQ == 0 && size == X11) return False; // implied 1d case
9402 Bool isU = bitU == 1;
9403 Bool isR = opcode == BITS5(0,1,0,1,1);
9404 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
9405 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
9406 /* This is a bit tricky. If we're only interested in the lowest 64 bits
9407 of the result (viz, bitQ == 0), then we must adjust the operands to
9408 ensure that the upper part of the result, that we don't care about,
9409 doesn't pollute the returned Q value. To do this, zero out the upper
9410 operand halves beforehand. This works because it means, for the
9411 lanes we don't care about, we are shifting zero by zero, which can
9412 never saturate. */
9413 IRTemp res256 = newTemp(Ity_V256);
9414 IRTemp resSH = newTempV128();
9415 IRTemp resQ = newTempV128();
9416 IRTemp zero = newTempV128();
9417 assign(res256, binop(op,
9418 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
9419 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
9420 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
9421 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
9422 assign(zero, mkV128(0x0000));
9423 putQReg128(dd, mkexpr(resSH));
9424 updateQCFLAGwithDifference(resQ, zero);
9425 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
9426 : (isU ? "uqshl" : "sqshl");
9427 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9428 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9429 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9430 return True;
9431 }
9432
sewardjdf1628c2014-06-10 22:52:05 +00009433 if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
9434 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
9435 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
9436 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
9437 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
9438 if (bitQ == 0 && size == X11) return False; // implied 1d case
9439 Bool isU = bitU == 1;
9440 Bool isMAX = (opcode & 1) == 0;
sewardj8e91fd42014-07-11 12:05:47 +00009441 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
9442 : (isU ? mkVecMINU(size) : mkVecMINS(size));
9443 IRTemp t = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009444 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009445 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
sewardjdf1628c2014-06-10 22:52:05 +00009446 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
9447 : (isU ? "umin" : "smin");
9448 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9449 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9450 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9451 return True;
9452 }
9453
sewardjdf9d6d52014-06-27 10:43:22 +00009454 if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
9455 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
9456 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
9457 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
9458 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
9459 if (size == X11) return False; // 1d/2d cases not allowed
9460 Bool isU = bitU == 1;
9461 Bool isACC = opcode == BITS5(0,1,1,1,1);
sewardjdf9d6d52014-06-27 10:43:22 +00009462 vassert(size <= 2);
9463 IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +00009464 IRTemp t2 = newTempV128();
9465 assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
sewardjdf9d6d52014-06-27 10:43:22 +00009466 : mkexpr(t1));
9467 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
9468 const HChar* nm = isACC ? (isU ? "uaba" : "saba")
9469 : (isU ? "uabd" : "sabd");
9470 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9471 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9472 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9473 return True;
9474 }
9475
sewardjdf1628c2014-06-10 22:52:05 +00009476 if (opcode == BITS5(1,0,0,0,0)) {
9477 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
9478 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
9479 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009480 Bool isSUB = bitU == 1;
9481 IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size);
9482 IRTemp t = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009483 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009484 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
sewardjdf1628c2014-06-10 22:52:05 +00009485 const HChar* nm = isSUB ? "sub" : "add";
9486 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9487 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9488 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9489 return True;
9490 }
9491
9492 if (opcode == BITS5(1,0,0,0,1)) {
9493 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
9494 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
9495 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009496 Bool isEQ = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +00009497 IRExpr* argL = getQReg128(nn);
9498 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00009499 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009500 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +00009501 isEQ ? binop(mkVecCMPEQ(size), argL, argR)
9502 : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
sewardjdf1628c2014-06-10 22:52:05 +00009503 binop(Iop_AndV128, argL, argR),
9504 mkV128(0x0000))));
sewardjdf9d6d52014-06-27 10:43:22 +00009505 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009506 const HChar* nm = isEQ ? "cmeq" : "cmtst";
9507 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9508 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9509 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9510 return True;
9511 }
9512
9513 if (opcode == BITS5(1,0,0,1,0)) {
9514 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
9515 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
9516 if (bitQ == 0 && size == X11) return False; // implied 1d case
9517 Bool isMLS = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +00009518 IROp opMUL = mkVecMUL(size);
9519 IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
9520 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009521 if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
9522 assign(res, binop(opADDSUB,
9523 getQReg128(dd),
9524 binop(opMUL, getQReg128(nn), getQReg128(mm))));
sewardjdf9d6d52014-06-27 10:43:22 +00009525 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009526 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9527 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
9528 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9529 return True;
9530 }
9531 return False;
9532 }
9533
9534 if (opcode == BITS5(1,0,0,1,1)) {
9535 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
9536 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
9537 if (bitQ == 0 && size == X11) return False; // implied 1d case
9538 Bool isPMUL = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +00009539 const IROp opsPMUL[4]
9540 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
sewardj8e91fd42014-07-11 12:05:47 +00009541 IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
9542 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009543 if (opMUL != Iop_INVALID) {
9544 assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009545 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009546 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9547 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
9548 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9549 return True;
9550 }
9551 return False;
9552 }
9553
sewardja5a6b752014-06-30 07:33:56 +00009554 if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
9555 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
9556 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
9557 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
9558 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
9559 if (size == X11) return False;
9560 Bool isU = bitU == 1;
9561 Bool isMAX = opcode == BITS5(1,0,1,0,0);
sewardj8e91fd42014-07-11 12:05:47 +00009562 IRTemp vN = newTempV128();
9563 IRTemp vM = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009564 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
9565 : (isU ? mkVecMINU(size) : mkVecMINS(size));
9566 assign(vN, getQReg128(nn));
9567 assign(vM, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +00009568 IRTemp res128 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009569 assign(res128,
9570 binop(op,
9571 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
9572 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
9573 /* In the half-width case, use CatEL32x4 to extract the half-width
9574 result from the full-width result. */
9575 IRExpr* res
9576 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
9577 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
9578 mkexpr(res128)))
9579 : mkexpr(res128);
9580 putQReg128(dd, res);
9581 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9582 const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp")
9583 : (isU ? "uminp" : "sminp");
9584 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9585 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9586 return True;
9587 }
9588
sewardj54ffa1d2014-07-22 09:27:49 +00009589 if (opcode == BITS5(1,0,1,1,0)) {
9590 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
9591 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
9592 if (size == X00 || size == X11) return False;
9593 Bool isR = bitU == 1;
9594 IRTemp res, sat1q, sat1n, vN, vM;
9595 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
9596 newTempsV128_2(&vN, &vM);
9597 assign(vN, getQReg128(nn));
9598 assign(vM, getQReg128(mm));
9599 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
9600 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9601 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
9602 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
9603 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9604 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
9605 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9606 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9607 return True;
9608 }
9609
sewardja5a6b752014-06-30 07:33:56 +00009610 if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
9611 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
9612 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009613 IRTemp vN = newTempV128();
9614 IRTemp vM = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009615 assign(vN, getQReg128(nn));
9616 assign(vM, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +00009617 IRTemp res128 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009618 assign(res128,
9619 binop(mkVecADD(size),
9620 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
9621 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
9622 /* In the half-width case, use CatEL32x4 to extract the half-width
9623 result from the full-width result. */
9624 IRExpr* res
9625 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
9626 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
9627 mkexpr(res128)))
9628 : mkexpr(res128);
9629 putQReg128(dd, res);
9630 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9631 DIP("addp %s.%s, %s.%s, %s.%s\n",
9632 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9633 return True;
9634 }
9635
sewardjdf1628c2014-06-10 22:52:05 +00009636 if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
9637 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9638 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9639 Bool isD = (size & 1) == 1;
9640 Bool isSUB = (size & 2) == 2;
9641 if (bitQ == 0 && isD) return False; // implied 1d case
9642 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
9643 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
9644 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
9645 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +00009646 IRTemp t1 = newTempV128();
9647 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009648 // FIXME: double rounding; use FMA primops instead
9649 assign(t1, triop(opMUL,
9650 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
9651 assign(t2, triop(isSUB ? opSUB : opADD,
9652 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
sewardjdf9d6d52014-06-27 10:43:22 +00009653 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
sewardjdf1628c2014-06-10 22:52:05 +00009654 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9655 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
9656 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9657 return True;
9658 }
9659
9660 if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
9661 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9662 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9663 Bool isD = (size & 1) == 1;
9664 Bool isSUB = (size & 2) == 2;
9665 if (bitQ == 0 && isD) return False; // implied 1d case
9666 const IROp ops[4]
9667 = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
9668 IROp op = ops[size];
9669 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +00009670 IRTemp t1 = newTempV128();
9671 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009672 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009673 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009674 putQReg128(dd, mkexpr(t2));
9675 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9676 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
9677 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9678 return True;
9679 }
9680
9681 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
9682 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9683 Bool isD = (size & 1) == 1;
9684 if (bitQ == 0 && isD) return False; // implied 1d case
9685 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
9686 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
9687 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +00009688 IRTemp t1 = newTempV128();
9689 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009690 // FIXME: use Abd primop instead?
sewardjdf9d6d52014-06-27 10:43:22 +00009691 assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf1628c2014-06-10 22:52:05 +00009692 assign(t2, unop(opABS, mkexpr(t1)));
sewardjdf9d6d52014-06-27 10:43:22 +00009693 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
sewardjdf1628c2014-06-10 22:52:05 +00009694 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9695 DIP("fabd %s.%s, %s.%s, %s.%s\n",
9696 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9697 return True;
9698 }
9699
9700 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
9701 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9702 Bool isD = (size & 1) == 1;
9703 if (bitQ == 0 && isD) return False; // implied 1d case
9704 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +00009705 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009706 assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
9707 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009708 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009709 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9710 DIP("fmul %s.%s, %s.%s, %s.%s\n",
9711 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9712 return True;
9713 }
9714
9715 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
9716 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9717 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9718 Bool isD = (size & 1) == 1;
9719 if (bitQ == 0 && isD) return False; // implied 1d case
9720 Bool isGE = bitU == 1;
9721 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
9722 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
sewardj8e91fd42014-07-11 12:05:47 +00009723 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009724 assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
9725 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009726 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009727 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9728 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
9729 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9730 return True;
9731 }
9732
9733 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
9734 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9735 Bool isD = (size & 1) == 1;
9736 if (bitQ == 0 && isD) return False; // implied 1d case
9737 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
sewardj8e91fd42014-07-11 12:05:47 +00009738 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009739 assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
sewardjdf9d6d52014-06-27 10:43:22 +00009740 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009741 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9742 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
9743 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9744 return True;
9745 }
9746
9747 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
9748 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9749 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9750 Bool isD = (size & 1) == 1;
9751 Bool isGT = (size & 2) == 2;
9752 if (bitQ == 0 && isD) return False; // implied 1d case
9753 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
9754 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
9755 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
sewardj8e91fd42014-07-11 12:05:47 +00009756 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009757 assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
9758 unop(opABS, getQReg128(nn)))); // swapd
sewardjdf9d6d52014-06-27 10:43:22 +00009759 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009760 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9761 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
9762 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9763 return True;
9764 }
9765
9766 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
9767 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9768 Bool isD = (size & 1) == 1;
9769 if (bitQ == 0 && isD) return False; // implied 1d case
9770 vassert(size <= 1);
9771 const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
9772 IROp op = ops[size];
9773 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +00009774 IRTemp t1 = newTempV128();
9775 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009776 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009777 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009778 putQReg128(dd, mkexpr(t2));
9779 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9780 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
9781 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9782 return True;
9783 }
9784
9785 return False;
9786# undef INSN
9787}
9788
9789
9790static
9791Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
9792{
9793 /* 31 30 29 28 23 21 16 11 9 4
9794 0 Q U 01110 size 10000 opcode 10 n d
9795 Decode fields: U,size,opcode
9796 */
9797# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9798 if (INSN(31,31) != 0
9799 || INSN(28,24) != BITS5(0,1,1,1,0)
9800 || INSN(21,17) != BITS5(1,0,0,0,0)
9801 || INSN(11,10) != BITS2(1,0)) {
9802 return False;
9803 }
9804 UInt bitQ = INSN(30,30);
9805 UInt bitU = INSN(29,29);
9806 UInt size = INSN(23,22);
9807 UInt opcode = INSN(16,12);
9808 UInt nn = INSN(9,5);
9809 UInt dd = INSN(4,0);
9810 vassert(size < 4);
9811
sewardjdf9d6d52014-06-27 10:43:22 +00009812 if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
9813 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
9814 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
9815 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
9816 const IROp iops[3] = { Iop_Reverse8sIn64_x2,
9817 Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
9818 vassert(size <= 2);
sewardj8e91fd42014-07-11 12:05:47 +00009819 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +00009820 assign(res, unop(iops[size], getQReg128(nn)));
9821 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9822 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9823 DIP("%s %s.%s, %s.%s\n", "rev64",
9824 nameQReg128(dd), arr, nameQReg128(nn), arr);
9825 return True;
9826 }
9827
9828 if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
9829 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
9830 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
9831 Bool isH = size == X01;
sewardj8e91fd42014-07-11 12:05:47 +00009832 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +00009833 IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
9834 assign(res, unop(iop, getQReg128(nn)));
9835 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9836 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9837 DIP("%s %s.%s, %s.%s\n", "rev32",
9838 nameQReg128(dd), arr, nameQReg128(nn), arr);
9839 return True;
9840 }
9841
sewardj715d1622014-06-26 12:39:05 +00009842 if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
9843 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00009844 IRTemp res = newTempV128();
sewardj715d1622014-06-26 12:39:05 +00009845 assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
9846 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf9d6d52014-06-27 10:43:22 +00009847 const HChar* arr = nameArr_Q_SZ(bitQ, size);
sewardj715d1622014-06-26 12:39:05 +00009848 DIP("%s %s.%s, %s.%s\n", "rev16",
9849 nameQReg128(dd), arr, nameQReg128(nn), arr);
9850 return True;
9851 }
9852
sewardja5a6b752014-06-30 07:33:56 +00009853 if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
9854 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
9855 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
9856 /* -------- 0,xx,00110: SADALP std6_std6 -------- */
9857 /* -------- 1,xx,00110: UADALP std6_std6 -------- */
9858 /* Widens, and size refers to the narrow size. */
9859 if (size == X11) return False; // no 1d or 2d cases
9860 Bool isU = bitU == 1;
9861 Bool isACC = opcode == BITS5(0,0,1,1,0);
sewardj8e91fd42014-07-11 12:05:47 +00009862 IRTemp src = newTempV128();
9863 IRTemp sum = newTempV128();
9864 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009865 assign(src, getQReg128(nn));
9866 assign(sum,
9867 binop(mkVecADD(size+1),
9868 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
9869 isU, True/*fromOdd*/, size, mkexpr(src))),
9870 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
9871 isU, False/*!fromOdd*/, size, mkexpr(src)))));
9872 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
9873 : mkexpr(sum));
9874 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9875 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9876 const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1);
9877 DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
9878 : (isU ? "uaddlp" : "saddlp"),
9879 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
9880 return True;
9881 }
9882
sewardjf7003bc2014-08-18 12:28:02 +00009883 if (opcode == BITS5(0,0,0,1,1)) {
9884 /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
9885 /* -------- 1,xx,00011: USQADD std7_std7 -------- */
9886 if (bitQ == 0 && size == X11) return False; // implied 1d case
9887 Bool isUSQADD = bitU == 1;
9888 /* This is switched (in the US vs SU sense) deliberately.
9889 SUQADD corresponds to the ExtUSsatSS variants and
9890 USQADD corresponds to the ExtSUsatUU variants.
9891 See libvex_ir for more details. */
9892 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
9893 : mkVecQADDEXTUSSATSS(size);
9894 IROp nop = mkVecADD(size);
9895 IRTemp argL = newTempV128();
9896 IRTemp argR = newTempV128();
9897 IRTemp qres = newTempV128();
9898 IRTemp nres = newTempV128();
9899 /* Because the two arguments to the addition are implicitly
9900 extended differently (one signedly, the other unsignedly) it is
9901 important to present them to the primop in the correct order. */
9902 assign(argL, getQReg128(nn));
9903 assign(argR, getQReg128(dd));
9904 assign(qres, math_MAYBE_ZERO_HI64_fromE(
9905 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
9906 assign(nres, math_MAYBE_ZERO_HI64_fromE(
9907 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
9908 putQReg128(dd, mkexpr(qres));
9909 updateQCFLAGwithDifference(qres, nres);
9910 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9911 DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
9912 nameQReg128(dd), arr, nameQReg128(nn), arr);
9913 return True;
9914 }
9915
sewardj2b6fd5e2014-06-19 14:21:37 +00009916 if (opcode == BITS5(0,0,1,0,0)) {
9917 /* -------- 0,xx,00100: CLS std6_std6 -------- */
9918 /* -------- 1,xx,00100: CLZ std6_std6 -------- */
9919 if (size == X11) return False; // no 1d or 2d cases
sewardja8c7b0f2014-06-26 08:18:08 +00009920 const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
9921 const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
sewardj2b6fd5e2014-06-19 14:21:37 +00009922 Bool isCLZ = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +00009923 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00009924 vassert(size <= 2);
9925 assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +00009926 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj2b6fd5e2014-06-19 14:21:37 +00009927 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9928 DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
9929 nameQReg128(dd), arr, nameQReg128(nn), arr);
9930 return True;
9931 }
9932
sewardj787a67f2014-06-23 09:09:41 +00009933 if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
sewardj2b6fd5e2014-06-19 14:21:37 +00009934 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
sewardj787a67f2014-06-23 09:09:41 +00009935 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00009936 IRTemp res = newTempV128();
sewardj787a67f2014-06-23 09:09:41 +00009937 assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +00009938 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj715d1622014-06-26 12:39:05 +00009939 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
sewardj787a67f2014-06-23 09:09:41 +00009940 DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
sewardj2b6fd5e2014-06-19 14:21:37 +00009941 nameQReg128(dd), arr, nameQReg128(nn), arr);
9942 return True;
9943 }
9944
sewardj715d1622014-06-26 12:39:05 +00009945 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
9946 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00009947 IRTemp res = newTempV128();
sewardj715d1622014-06-26 12:39:05 +00009948 assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +00009949 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj715d1622014-06-26 12:39:05 +00009950 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
9951 DIP("%s %s.%s, %s.%s\n", "rbit",
9952 nameQReg128(dd), arr, nameQReg128(nn), arr);
9953 return True;
9954 }
9955
sewardj51d012a2014-07-21 09:19:50 +00009956 if (opcode == BITS5(0,0,1,1,1)) {
sewardj8e91fd42014-07-11 12:05:47 +00009957 /* -------- 0,xx,00111 SQABS std7_std7 -------- */
sewardj51d012a2014-07-21 09:19:50 +00009958 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
sewardj8e91fd42014-07-11 12:05:47 +00009959 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj51d012a2014-07-21 09:19:50 +00009960 Bool isNEG = bitU == 1;
9961 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
9962 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
9963 getQReg128(nn), size );
sewardj8e91fd42014-07-11 12:05:47 +00009964 IRTemp qres = newTempV128(), nres = newTempV128();
sewardj51d012a2014-07-21 09:19:50 +00009965 assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
9966 assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
sewardj8e91fd42014-07-11 12:05:47 +00009967 putQReg128(dd, mkexpr(qres));
9968 updateQCFLAGwithDifference(qres, nres);
9969 const HChar* arr = nameArr_Q_SZ(bitQ, size);
sewardj51d012a2014-07-21 09:19:50 +00009970 DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
sewardj8e91fd42014-07-11 12:05:47 +00009971 nameQReg128(dd), arr, nameQReg128(nn), arr);
9972 return True;
9973 }
9974
sewardjdf1628c2014-06-10 22:52:05 +00009975 if (opcode == BITS5(0,1,0,0,0)) {
9976 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
9977 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
9978 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009979 Bool isGT = bitU == 0;
9980 IRExpr* argL = getQReg128(nn);
9981 IRExpr* argR = mkV128(0x0000);
9982 IRTemp res = newTempV128();
9983 IROp opGTS = mkVecCMPGTS(size);
9984 assign(res, isGT ? binop(opGTS, argL, argR)
9985 : unop(Iop_NotV128, binop(opGTS, argR, argL)));
sewardjdf9d6d52014-06-27 10:43:22 +00009986 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009987 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9988 DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
9989 nameQReg128(dd), arr, nameQReg128(nn), arr);
9990 return True;
9991 }
9992
9993 if (opcode == BITS5(0,1,0,0,1)) {
9994 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
9995 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
9996 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardjdf1628c2014-06-10 22:52:05 +00009997 Bool isEQ = bitU == 0;
9998 IRExpr* argL = getQReg128(nn);
9999 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +000010000 IRTemp res = newTempV128();
10001 assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
sewardjdf1628c2014-06-10 22:52:05 +000010002 : unop(Iop_NotV128,
sewardj8e91fd42014-07-11 12:05:47 +000010003 binop(mkVecCMPGTS(size), argL, argR)));
sewardjdf9d6d52014-06-27 10:43:22 +000010004 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000010005 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10006 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
10007 nameQReg128(dd), arr, nameQReg128(nn), arr);
10008 return True;
10009 }
10010
10011 if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
10012 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
10013 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardjdf1628c2014-06-10 22:52:05 +000010014 IRExpr* argL = getQReg128(nn);
10015 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +000010016 IRTemp res = newTempV128();
10017 assign(res, binop(mkVecCMPGTS(size), argR, argL));
sewardjdf9d6d52014-06-27 10:43:22 +000010018 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000010019 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10020 DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
10021 nameQReg128(dd), arr, nameQReg128(nn), arr);
10022 return True;
10023 }
10024
sewardj25523c42014-06-15 19:36:29 +000010025 if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
10026 /* -------- 0,xx,01011: ABS std7_std7 -------- */
10027 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000010028 IRTemp res = newTempV128();
10029 assign(res, unop(mkVecABS(size), getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000010030 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj25523c42014-06-15 19:36:29 +000010031 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10032 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
10033 return True;
10034 }
10035
sewardjdf1628c2014-06-10 22:52:05 +000010036 if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
10037 /* -------- 1,xx,01011: NEG std7_std7 -------- */
10038 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000010039 IRTemp res = newTempV128();
10040 assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000010041 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000010042 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10043 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
10044 return True;
10045 }
10046
10047 if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
10048 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
10049 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
10050 if (bitQ == 0 && size == X11) return False; // implied 1d case
10051 Bool isFNEG = bitU == 1;
10052 IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
10053 : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
sewardj8e91fd42014-07-11 12:05:47 +000010054 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000010055 assign(res, unop(op, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000010056 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000010057 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
10058 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
10059 nameQReg128(dd), arr, nameQReg128(nn), arr);
10060 return True;
10061 }
10062
10063 if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
10064 /* -------- 0,xx,10010: XTN{,2} -------- */
sewardjecedd982014-08-11 14:02:47 +000010065 if (size == X11) return False;
10066 vassert(size < 3);
10067 Bool is2 = bitQ == 1;
10068 IROp opN = mkVecNARROWUN(size);
10069 IRTemp resN = newTempV128();
10070 assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
10071 putLO64andZUorPutHI64(is2, dd, resN);
10072 const HChar* nm = "xtn";
10073 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10074 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10075 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
10076 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
10077 return True;
10078 }
10079
10080 if (opcode == BITS5(1,0,1,0,0)
10081 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
10082 /* -------- 0,xx,10100: SQXTN{,2} -------- */
10083 /* -------- 1,xx,10100: UQXTN{,2} -------- */
10084 /* -------- 1,xx,10010: SQXTUN{,2} -------- */
10085 if (size == X11) return False;
10086 vassert(size < 3);
10087 Bool is2 = bitQ == 1;
10088 IROp opN = Iop_INVALID;
10089 Bool zWiden = True;
10090 const HChar* nm = "??";
10091 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
10092 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
sewardjdf1628c2014-06-10 22:52:05 +000010093 }
sewardjecedd982014-08-11 14:02:47 +000010094 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
10095 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
sewardjdf1628c2014-06-10 22:52:05 +000010096 }
sewardjecedd982014-08-11 14:02:47 +000010097 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10098 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
10099 }
10100 else vassert(0);
10101 IRTemp src = newTempV128();
10102 assign(src, getQReg128(nn));
10103 IRTemp resN = newTempV128();
10104 assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
10105 putLO64andZUorPutHI64(is2, dd, resN);
10106 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
10107 size, mkexpr(resN));
10108 updateQCFLAGwithDifference(src, resW);
10109 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10110 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10111 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
10112 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
10113 return True;
sewardjdf1628c2014-06-10 22:52:05 +000010114 }
10115
sewardj487559e2014-07-10 14:22:45 +000010116 if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
10117 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
10118 /* Widens, and size is the narrow size. */
10119 if (size == X11) return False;
10120 Bool is2 = bitQ == 1;
10121 IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
10122 IROp opSHL = mkVecSHLN(size+1);
sewardj8e91fd42014-07-11 12:05:47 +000010123 IRTemp src = newTempV128();
10124 IRTemp res = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000010125 assign(src, getQReg128(nn));
10126 assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
10127 mkU8(8 << size)));
10128 putQReg128(dd, mkexpr(res));
10129 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10130 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10131 DIP("shll%s %s.%s, %s.%s, #%u\n", is2 ? "2" : "",
10132 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
10133 return True;
10134 }
10135
sewardjdf1628c2014-06-10 22:52:05 +000010136 if (bitU == 0 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
10137 /* -------- 0,01,10110: FCVTN 2s/4s_2d -------- */
10138 IRTemp rm = mk_get_IR_rounding_mode();
10139 IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64);
10140 IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64);
10141 putQRegLane(dd, 2 * bitQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo));
10142 putQRegLane(dd, 2 * bitQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi));
10143 if (bitQ == 0) {
10144 putQRegLane(dd, 1, mkU64(0));
10145 }
10146 DIP("fcvtn%s %s.%s, %s.2d\n", bitQ ? "2" : "",
10147 nameQReg128(dd), bitQ ? "4s" : "2s", nameQReg128(nn));
10148 return True;
10149 }
10150
sewardjfc261d92014-08-24 20:36:14 +000010151 if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
10152 /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
10153 /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
10154 Bool isREC = bitU == 0;
10155 IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
10156 IRTemp res = newTempV128();
10157 assign(res, unop(op, getQReg128(nn)));
10158 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10159 const HChar* nm = isREC ? "urecpe" : "ursqrte";
10160 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10161 DIP("%s %s.%s, %s.%s\n", nm,
10162 nameQReg128(dd), arr, nameQReg128(nn), arr);
10163 return True;
10164 }
10165
sewardj5747c4a2014-06-11 20:57:23 +000010166 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
10167 /* -------- 0,0x,11101: SCVTF -------- */
10168 /* -------- 1,0x,11101: UCVTF -------- */
10169 /* 31 28 22 21 15 9 4
10170 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
10171 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
10172 with laneage:
10173 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
10174 */
10175 Bool isQ = bitQ == 1;
10176 Bool isU = bitU == 1;
10177 Bool isF64 = (size & 1) == 1;
10178 if (isQ || !isF64) {
10179 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
10180 UInt nLanes = 0;
10181 Bool zeroHI = False;
10182 const HChar* arrSpec = NULL;
10183 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
10184 isQ, isF64 );
10185 IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
10186 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
10187 IRTemp rm = mk_get_IR_rounding_mode();
10188 UInt i;
10189 vassert(ok); /* the 'if' above should ensure this */
10190 for (i = 0; i < nLanes; i++) {
10191 putQRegLane(dd, i,
10192 binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
10193 }
10194 if (zeroHI) {
10195 putQRegLane(dd, 1, mkU64(0));
10196 }
10197 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
10198 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
10199 return True;
10200 }
10201 /* else fall through */
10202 }
10203
sewardjdf1628c2014-06-10 22:52:05 +000010204 return False;
10205# undef INSN
10206}
10207
sewardjfc83d2c2014-06-12 10:15:46 +000010208
sewardjdf1628c2014-06-10 22:52:05 +000010209static
10210Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
10211{
sewardj85fbb022014-06-12 13:16:01 +000010212 /* 31 28 23 21 20 19 15 11 9 4
10213 0 Q U 01111 size L M m opcode H 0 n d
10214 Decode fields are: u,size,opcode
sewardj787a67f2014-06-23 09:09:41 +000010215 M is really part of the mm register number. Individual
10216 cases need to inspect L and H though.
sewardj85fbb022014-06-12 13:16:01 +000010217 */
sewardjdf1628c2014-06-10 22:52:05 +000010218# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj85fbb022014-06-12 13:16:01 +000010219 if (INSN(31,31) != 0
sewardj8e91fd42014-07-11 12:05:47 +000010220 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
sewardj85fbb022014-06-12 13:16:01 +000010221 return False;
10222 }
10223 UInt bitQ = INSN(30,30);
10224 UInt bitU = INSN(29,29);
10225 UInt size = INSN(23,22);
10226 UInt bitL = INSN(21,21);
10227 UInt bitM = INSN(20,20);
10228 UInt mmLO4 = INSN(19,16);
10229 UInt opcode = INSN(15,12);
10230 UInt bitH = INSN(11,11);
10231 UInt nn = INSN(9,5);
10232 UInt dd = INSN(4,0);
sewardj85fbb022014-06-12 13:16:01 +000010233 vassert(size < 4);
sewardj787a67f2014-06-23 09:09:41 +000010234 vassert(bitH < 2 && bitM < 2 && bitL < 2);
sewardj85fbb022014-06-12 13:16:01 +000010235
10236 if (bitU == 0 && size >= X10 && opcode == BITS4(1,0,0,1)) {
10237 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
10238 if (bitQ == 0 && size == X11) return False; // implied 1d case
10239 Bool isD = (size & 1) == 1;
10240 UInt index;
10241 if (!isD) index = (bitH << 1) | bitL;
10242 else if (isD && bitL == 0) index = bitH;
10243 else return False; // sz:L == x11 => unallocated encoding
10244 vassert(index < (isD ? 2 : 4));
10245 IRType ity = isD ? Ity_F64 : Ity_F32;
10246 IRTemp elem = newTemp(ity);
sewardj787a67f2014-06-23 09:09:41 +000010247 UInt mm = (bitM << 4) | mmLO4;
sewardj85fbb022014-06-12 13:16:01 +000010248 assign(elem, getQRegLane(mm, index, ity));
10249 IRTemp dupd = math_DUP_TO_V128(elem, ity);
sewardj8e91fd42014-07-11 12:05:47 +000010250 IRTemp res = newTempV128();
sewardj85fbb022014-06-12 13:16:01 +000010251 assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
10252 mkexpr(mk_get_IR_rounding_mode()),
10253 getQReg128(nn), mkexpr(dupd)));
sewardjdf9d6d52014-06-27 10:43:22 +000010254 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj85fbb022014-06-12 13:16:01 +000010255 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
10256 DIP("fmul %s.%s, %s.%s, %s.%c[%u]\n", nameQReg128(dd), arr,
10257 nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
10258 return True;
10259 }
10260
sewardj787a67f2014-06-23 09:09:41 +000010261 if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
10262 || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
10263 /* -------- 1,xx,0000 MLA s/h variants only -------- */
10264 /* -------- 1,xx,0100 MLS s/h variants only -------- */
10265 /* -------- 0,xx,1000 MUL s/h variants only -------- */
10266 Bool isMLA = opcode == BITS4(0,0,0,0);
10267 Bool isMLS = opcode == BITS4(0,1,0,0);
10268 UInt mm = 32; // invalid
10269 UInt ix = 16; // invalid
10270 switch (size) {
10271 case X00:
10272 return False; // b case is not allowed
10273 case X01:
10274 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10275 case X10:
10276 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10277 case X11:
10278 return False; // d case is not allowed
10279 default:
10280 vassert(0);
10281 }
10282 vassert(mm < 32 && ix < 16);
sewardj487559e2014-07-10 14:22:45 +000010283 IROp opMUL = mkVecMUL(size);
10284 IROp opADD = mkVecADD(size);
10285 IROp opSUB = mkVecSUB(size);
sewardj787a67f2014-06-23 09:09:41 +000010286 HChar ch = size == X01 ? 'h' : 's';
sewardj487559e2014-07-10 14:22:45 +000010287 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
sewardj8e91fd42014-07-11 12:05:47 +000010288 IRTemp vecD = newTempV128();
10289 IRTemp vecN = newTempV128();
10290 IRTemp res = newTempV128();
sewardj787a67f2014-06-23 09:09:41 +000010291 assign(vecD, getQReg128(dd));
10292 assign(vecN, getQReg128(nn));
10293 IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
10294 if (isMLA || isMLS) {
10295 assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
10296 } else {
10297 assign(res, prod);
10298 }
sewardjdf9d6d52014-06-27 10:43:22 +000010299 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj787a67f2014-06-23 09:09:41 +000010300 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10301 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
10302 : (isMLS ? "mls" : "mul"),
10303 nameQReg128(dd), arr,
10304 nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
10305 return True;
10306 }
10307
sewardj487559e2014-07-10 14:22:45 +000010308 if (opcode == BITS4(1,0,1,0)
10309 || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
10310 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
10311 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
10312 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
10313 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
10314 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
10315 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
10316 /* Widens, and size refers to the narrowed lanes. */
10317 UInt ks = 3;
10318 switch (opcode) {
10319 case BITS4(1,0,1,0): ks = 0; break;
10320 case BITS4(0,0,1,0): ks = 1; break;
10321 case BITS4(0,1,1,0): ks = 2; break;
10322 default: vassert(0);
10323 }
10324 vassert(ks >= 0 && ks <= 2);
10325 Bool isU = bitU == 1;
10326 Bool is2 = bitQ == 1;
10327 UInt mm = 32; // invalid
10328 UInt ix = 16; // invalid
10329 switch (size) {
10330 case X00:
10331 return False; // h_b_b[] case is not allowed
10332 case X01:
10333 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10334 case X10:
10335 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10336 case X11:
10337 return False; // q_d_d[] case is not allowed
10338 default:
10339 vassert(0);
10340 }
10341 vassert(mm < 32 && ix < 16);
sewardj51d012a2014-07-21 09:19:50 +000010342 IRTemp vecN = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000010343 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
sewardj8e91fd42014-07-11 12:05:47 +000010344 IRTemp vecD = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000010345 assign(vecN, getQReg128(nn));
sewardj51d012a2014-07-21 09:19:50 +000010346 assign(vecD, getQReg128(dd));
10347 IRTemp res = IRTemp_INVALID;
10348 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
10349 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
sewardj487559e2014-07-10 14:22:45 +000010350 putQReg128(dd, mkexpr(res));
10351 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
10352 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10353 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10354 HChar ch = size == X01 ? 'h' : 's';
10355 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
10356 isU ? 'u' : 's', nm, is2 ? "2" : "",
10357 nameQReg128(dd), arrWide,
10358 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
10359 return True;
10360 }
10361
sewardj51d012a2014-07-21 09:19:50 +000010362 if (bitU == 0
10363 && (opcode == BITS4(1,0,1,1)
10364 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
10365 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
10366 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
10367 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
10368 /* Widens, and size refers to the narrowed lanes. */
10369 UInt ks = 3;
10370 switch (opcode) {
10371 case BITS4(1,0,1,1): ks = 0; break;
10372 case BITS4(0,0,1,1): ks = 1; break;
10373 case BITS4(0,1,1,1): ks = 2; break;
10374 default: vassert(0);
10375 }
10376 vassert(ks >= 0 && ks <= 2);
10377 Bool is2 = bitQ == 1;
10378 UInt mm = 32; // invalid
10379 UInt ix = 16; // invalid
10380 switch (size) {
10381 case X00:
10382 return False; // h_b_b[] case is not allowed
10383 case X01:
10384 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10385 case X10:
10386 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10387 case X11:
10388 return False; // q_d_d[] case is not allowed
10389 default:
10390 vassert(0);
10391 }
10392 vassert(mm < 32 && ix < 16);
10393 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
10394 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10395 newTempsV128_2(&vecN, &vecD);
10396 assign(vecN, getQReg128(nn));
10397 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10398 assign(vecD, getQReg128(dd));
10399 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10400 is2, size, "mas"[ks],
10401 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10402 putQReg128(dd, mkexpr(res));
10403 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10404 updateQCFLAGwithDifference(sat1q, sat1n);
10405 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10406 updateQCFLAGwithDifference(sat2q, sat2n);
10407 }
sewardj54ffa1d2014-07-22 09:27:49 +000010408 const HChar* nm = ks == 0 ? "sqdmull"
sewardj51d012a2014-07-21 09:19:50 +000010409 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10410 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10411 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10412 HChar ch = size == X01 ? 'h' : 's';
10413 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
10414 nm, is2 ? "2" : "",
10415 nameQReg128(dd), arrWide,
10416 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
10417 return True;
10418 }
10419
sewardj257e99f2014-08-03 12:45:19 +000010420 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
10421 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
10422 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
10423 UInt mm = 32; // invalid
10424 UInt ix = 16; // invalid
10425 switch (size) {
10426 case X00:
10427 return False; // b case is not allowed
10428 case X01:
10429 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10430 case X10:
10431 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10432 case X11:
10433 return False; // q case is not allowed
10434 default:
10435 vassert(0);
10436 }
10437 vassert(mm < 32 && ix < 16);
10438 Bool isR = opcode == BITS4(1,1,0,1);
10439 IRTemp res, sat1q, sat1n, vN, vM;
10440 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10441 vN = newTempV128();
10442 assign(vN, getQReg128(nn));
10443 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10444 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10445 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10446 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
10447 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10448 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
10449 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10450 HChar ch = size == X01 ? 'h' : 's';
10451 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
10452 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
10453 return True;
10454 }
10455
sewardjdf1628c2014-06-10 22:52:05 +000010456 return False;
10457# undef INSN
10458}
10459
sewardjfc83d2c2014-06-12 10:15:46 +000010460
sewardjdf1628c2014-06-10 22:52:05 +000010461static
10462Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
10463{
10464# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10465 return False;
10466# undef INSN
10467}
10468
sewardjfc83d2c2014-06-12 10:15:46 +000010469
sewardjdf1628c2014-06-10 22:52:05 +000010470static
10471Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
10472{
10473# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10474 return False;
10475# undef INSN
10476}
10477
sewardjfc83d2c2014-06-12 10:15:46 +000010478
sewardjdf1628c2014-06-10 22:52:05 +000010479static
10480Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
10481{
10482# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10483 return False;
10484# undef INSN
10485}
10486
sewardj5747c4a2014-06-11 20:57:23 +000010487
sewardjdf1628c2014-06-10 22:52:05 +000010488static
10489Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
10490{
sewardj5747c4a2014-06-11 20:57:23 +000010491 /* 31 28 23 21 20 15 13 9 4
10492 000 11110 ty 1 m op 1000 n opcode2
10493 The first 3 bits are really "M 0 S", but M and S are always zero.
10494 Decode fields are: ty,op,opcode2
10495 */
sewardjdf1628c2014-06-10 22:52:05 +000010496# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000010497 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
10498 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
10499 return False;
10500 }
10501 UInt ty = INSN(23,22);
10502 UInt mm = INSN(20,16);
10503 UInt op = INSN(15,14);
10504 UInt nn = INSN(9,5);
10505 UInt opcode2 = INSN(4,0);
10506 vassert(ty < 4);
10507
10508 if (ty <= X01 && op == X00
10509 && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
10510 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */
10511 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
10512 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
10513 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
10514 /* 31 23 20 15 9 4
10515 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
10516 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
10517 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
10518 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
10519
10520 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
10521 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
10522 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
10523 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
10524
10525 FCMPE generates Invalid Operation exn if either arg is any kind
10526 of NaN. FCMP generates Invalid Operation exn if either arg is a
10527 signalling NaN. We ignore this detail here and produce the same
10528 IR for both.
10529 */
10530 Bool isD = (ty & 1) == 1;
10531 Bool isCMPE = (opcode2 & 16) == 16;
10532 Bool cmpZero = (opcode2 & 8) == 8;
10533 IRType ity = isD ? Ity_F64 : Ity_F32;
10534 Bool valid = True;
10535 if (cmpZero && mm != 0) valid = False;
10536 if (valid) {
10537 IRTemp argL = newTemp(ity);
10538 IRTemp argR = newTemp(ity);
10539 IRTemp irRes = newTemp(Ity_I32);
10540 assign(argL, getQRegLO(nn, ity));
10541 assign(argR,
10542 cmpZero
10543 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
10544 : getQRegLO(mm, ity));
10545 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
10546 mkexpr(argL), mkexpr(argR)));
10547 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
10548 IRTemp nzcv_28x0 = newTemp(Ity_I64);
10549 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
10550 setFlags_COPY(nzcv_28x0);
10551 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
10552 cmpZero ? "#0.0" : nameQRegLO(mm, ity));
10553 return True;
10554 }
10555 return False;
10556 }
10557
sewardjdf1628c2014-06-10 22:52:05 +000010558 return False;
10559# undef INSN
10560}
10561
sewardj5747c4a2014-06-11 20:57:23 +000010562
sewardjdf1628c2014-06-10 22:52:05 +000010563static
10564Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn)
10565{
10566# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10567 return False;
10568# undef INSN
10569}
10570
sewardjfc83d2c2014-06-12 10:15:46 +000010571
sewardjdf1628c2014-06-10 22:52:05 +000010572static
10573Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
10574{
10575# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10576 return False;
10577# undef INSN
10578}
10579
sewardj5747c4a2014-06-11 20:57:23 +000010580
sewardjdf1628c2014-06-10 22:52:05 +000010581static
10582Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
10583{
10584 /* 31 28 23 21 20 14 9 4
10585 000 11110 ty 1 opcode 10000 n d
10586 The first 3 bits are really "M 0 S", but M and S are always zero.
sewardj5747c4a2014-06-11 20:57:23 +000010587 Decode fields: ty,opcode
sewardjdf1628c2014-06-10 22:52:05 +000010588 */
10589# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10590 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
10591 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
10592 return False;
10593 }
10594 UInt ty = INSN(23,22);
10595 UInt opcode = INSN(20,15);
10596 UInt nn = INSN(9,5);
10597 UInt dd = INSN(4,0);
10598
10599 if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
10600 /* -------- 0x,000000: FMOV d_d, s_s -------- */
10601 /* -------- 0x,000001: FABS d_d, s_s -------- */
10602 /* -------- 0x,000010: FNEG d_d, s_s -------- */
10603 /* -------- 0x,000011: FSQRT d_d, s_s -------- */
10604 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
10605 IRTemp src = newTemp(ity);
10606 IRTemp res = newTemp(ity);
10607 const HChar* nm = "??";
10608 assign(src, getQRegLO(nn, ity));
10609 switch (opcode) {
10610 case BITS6(0,0,0,0,0,0):
10611 nm = "fmov"; assign(res, mkexpr(src)); break;
10612 case BITS6(0,0,0,0,0,1):
10613 nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
10614 case BITS6(0,0,0,0,1,0):
10615 nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
10616 case BITS6(0,0,0,0,1,1):
10617 nm = "fsqrt";
10618 assign(res, binop(mkSQRTF(ity),
10619 mkexpr(mk_get_IR_rounding_mode()),
10620 mkexpr(src))); break;
10621 default:
10622 vassert(0);
10623 }
10624 putQReg128(dd, mkV128(0x0000));
10625 putQRegLO(dd, mkexpr(res));
10626 DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
10627 return True;
10628 }
10629
sewardj5747c4a2014-06-11 20:57:23 +000010630 if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
10631 || opcode == BITS6(0,0,0,1,0,1)))
10632 || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
10633 || opcode == BITS6(0,0,0,1,0,1)))
10634 || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
10635 || opcode == BITS6(0,0,0,1,0,0)))) {
10636 /* -------- 11,000100: FCVT s_h -------- */
10637 /* -------- 11,000101: FCVT d_h -------- */
10638 /* -------- 00,000111: FCVT h_s -------- */
10639 /* -------- 00,000101: FCVT d_s -------- */
10640 /* -------- 01,000111: FCVT h_d -------- */
10641 /* -------- 01,000100: FCVT s_d -------- */
10642 /* 31 23 21 16 14 9 4
10643 000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp)
10644 --------- 11 ----- 01 --------- FCVT Dd, Hn (unimp)
10645 --------- 00 ----- 11 --------- FCVT Hd, Sn (unimp)
10646 --------- 00 ----- 01 --------- FCVT Dd, Sn
10647 --------- 01 ----- 11 --------- FCVT Hd, Dn (unimp)
10648 --------- 01 ----- 00 --------- FCVT Sd, Dn
10649 Rounding, when dst is smaller than src, is per the FPCR.
10650 */
10651 UInt b2322 = ty;
10652 UInt b1615 = opcode & BITS2(1,1);
10653 if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) {
10654 /* Convert S to D */
10655 IRTemp res = newTemp(Ity_F64);
10656 assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32)));
10657 putQReg128(dd, mkV128(0x0000));
10658 putQRegLO(dd, mkexpr(res));
10659 DIP("fcvt %s, %s\n",
10660 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32));
10661 return True;
10662 }
10663 if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) {
10664 /* Convert D to S */
10665 IRTemp res = newTemp(Ity_F32);
10666 assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()),
10667 getQRegLO(nn, Ity_F64)));
10668 putQReg128(dd, mkV128(0x0000));
10669 putQRegLO(dd, mkexpr(res));
10670 DIP("fcvt %s, %s\n",
10671 nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64));
10672 return True;
10673 }
10674 /* else unhandled */
10675 return False;
10676 }
10677
10678 if (ty <= X01
10679 && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
10680 && opcode != BITS6(0,0,1,1,0,1)) {
10681 /* -------- 0x,001000 FRINTN d_d, s_s -------- */
10682 /* -------- 0x,001001 FRINTP d_d, s_s -------- */
10683 /* -------- 0x,001010 FRINTM d_d, s_s -------- */
10684 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
10685 /* -------- 0x,001100 FRINTA d_d, s_s -------- */
10686 /* -------- 0x,001110 FRINTX d_d, s_s -------- */
10687 /* -------- 0x,001111 FRINTI d_d, s_s -------- */
10688 /* 31 23 21 17 14 9 4
10689 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
10690 rm
10691 x==0 => S-registers, x==1 => D-registers
10692 rm (17:15) encodings:
10693 111 per FPCR (FRINTI)
10694 001 +inf (FRINTP)
10695 010 -inf (FRINTM)
10696 011 zero (FRINTZ)
10697 000 tieeven
10698 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
10699 110 per FPCR + "exact = TRUE"
10700 101 unallocated
10701 */
10702 Bool isD = (ty & 1) == 1;
10703 UInt rm = opcode & BITS6(0,0,0,1,1,1);
10704 IRType ity = isD ? Ity_F64 : Ity_F32;
10705 IRExpr* irrmE = NULL;
10706 UChar ch = '?';
10707 switch (rm) {
10708 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
10709 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
10710 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
10711 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
10712 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
10713 default: break;
10714 }
10715 if (irrmE) {
10716 IRTemp src = newTemp(ity);
10717 IRTemp dst = newTemp(ity);
10718 assign(src, getQRegLO(nn, ity));
10719 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
10720 irrmE, mkexpr(src)));
10721 putQReg128(dd, mkV128(0x0000));
10722 putQRegLO(dd, mkexpr(dst));
10723 DIP("frint%c %s, %s\n",
10724 ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
10725 return True;
10726 }
10727 return False;
10728 }
10729
sewardjdf1628c2014-06-10 22:52:05 +000010730 return False;
10731# undef INSN
10732}
10733
10734
10735static
10736Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn)
10737{
10738 /* 31 28 23 21 20 15 11 9 4
10739 000 11110 ty 1 m opcode 10 n d
10740 The first 3 bits are really "M 0 S", but M and S are always zero.
10741 */
10742# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10743 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
10744 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
10745 return False;
10746 }
10747 UInt ty = INSN(23,22);
10748 UInt mm = INSN(20,16);
10749 UInt opcode = INSN(15,12);
10750 UInt nn = INSN(9,5);
10751 UInt dd = INSN(4,0);
10752
10753 if (ty <= X01 && opcode <= BITS4(0,0,1,1)) {
10754 /* ------- 0x,0000: FMUL d_d, s_s ------- */
10755 /* ------- 0x,0001: FDIV d_d, s_s ------- */
10756 /* ------- 0x,0010: FADD d_d, s_s ------- */
10757 /* ------- 0x,0011: FSUB d_d, s_s ------- */
10758 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
10759 IROp iop = Iop_INVALID;
10760 const HChar* nm = "???";
10761 switch (opcode) {
10762 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
10763 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
10764 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
10765 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
10766 default: vassert(0);
10767 }
10768 IRExpr* resE = triop(iop, mkexpr(mk_get_IR_rounding_mode()),
10769 getQRegLO(nn, ity), getQRegLO(mm, ity));
10770 IRTemp res = newTemp(ity);
10771 assign(res, resE);
10772 putQReg128(dd, mkV128(0));
10773 putQRegLO(dd, mkexpr(res));
10774 DIP("%s %s, %s, %s\n",
10775 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10776 return True;
10777 }
10778
10779 if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
10780 /* ------- 0x,1000: FNMUL d_d, s_s ------- */
10781 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
10782 IROp iop = mkMULF(ity);
10783 IROp iopn = mkNEGF(ity);
10784 const HChar* nm = "fnmul";
10785 IRExpr* resE = unop(iopn,
10786 triop(iop, mkexpr(mk_get_IR_rounding_mode()),
10787 getQRegLO(nn, ity), getQRegLO(mm, ity)));
10788 IRTemp res = newTemp(ity);
10789 assign(res, resE);
10790 putQReg128(dd, mkV128(0));
10791 putQRegLO(dd, mkexpr(res));
10792 DIP("%s %s, %s, %s\n",
10793 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10794 return True;
10795 }
10796
sewardjdf1628c2014-06-10 22:52:05 +000010797 return False;
10798# undef INSN
10799}
10800
10801
10802static
10803Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
10804{
sewardj5747c4a2014-06-11 20:57:23 +000010805 /* 31 28 23 21 20 15 14 9 4
10806 000 11111 ty o1 m o0 a n d
10807 The first 3 bits are really "M 0 S", but M and S are always zero.
10808 Decode fields: ty,o1,o0
10809 */
sewardjdf1628c2014-06-10 22:52:05 +000010810# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000010811 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
10812 return False;
10813 }
10814 UInt ty = INSN(23,22);
10815 UInt bitO1 = INSN(21,21);
10816 UInt mm = INSN(20,16);
10817 UInt bitO0 = INSN(15,15);
10818 UInt aa = INSN(14,10);
10819 UInt nn = INSN(9,5);
10820 UInt dd = INSN(4,0);
10821 vassert(ty < 4);
10822
10823 if (ty <= X01) {
10824 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
10825 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
10826 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
10827 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
10828 /* -------------------- F{N}M{ADD,SUB} -------------------- */
10829 /* 31 22 20 15 14 9 4 ix
10830 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
10831 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
10832 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
10833 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
10834 where Fx=Dx when sz=1, Fx=Sx when sz=0
10835
10836 -----SPEC------ ----IMPL----
10837 fmadd a + n * m a + n * m
10838 fmsub a + (-n) * m a - n * m
10839 fnmadd (-a) + (-n) * m -(a + n * m)
10840 fnmsub (-a) + n * m -(a - n * m)
10841 */
10842 Bool isD = (ty & 1) == 1;
10843 UInt ix = (bitO1 << 1) | bitO0;
10844 IRType ity = isD ? Ity_F64 : Ity_F32;
10845 IROp opADD = mkADDF(ity);
10846 IROp opSUB = mkSUBF(ity);
10847 IROp opMUL = mkMULF(ity);
10848 IROp opNEG = mkNEGF(ity);
10849 IRTemp res = newTemp(ity);
10850 IRExpr* eA = getQRegLO(aa, ity);
10851 IRExpr* eN = getQRegLO(nn, ity);
10852 IRExpr* eM = getQRegLO(mm, ity);
10853 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
10854 IRExpr* eNxM = triop(opMUL, rm, eN, eM);
10855 switch (ix) {
10856 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
10857 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
10858 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
10859 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
10860 default: vassert(0);
10861 }
10862 putQReg128(dd, mkV128(0x0000));
10863 putQRegLO(dd, mkexpr(res));
10864 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
10865 DIP("%s %s, %s, %s, %s\n",
10866 names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
10867 nameQRegLO(mm, ity), nameQRegLO(aa, ity));
10868 return True;
10869 }
10870
sewardjdf1628c2014-06-10 22:52:05 +000010871 return False;
10872# undef INSN
10873}
10874
10875
10876static
10877Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
10878{
10879 /* 31 28 23 21 20 12 9 4
10880 000 11110 ty 1 imm8 100 imm5 d
10881 The first 3 bits are really "M 0 S", but M and S are always zero.
10882 */
10883# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10884 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
10885 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
10886 return False;
10887 }
10888 UInt ty = INSN(23,22);
10889 UInt imm8 = INSN(20,13);
10890 UInt imm5 = INSN(9,5);
10891 UInt dd = INSN(4,0);
10892
10893 /* ------- 00,00000: FMOV s_imm ------- */
10894 /* ------- 01,00000: FMOV d_imm ------- */
10895 if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
10896 Bool isD = (ty & 1) == 1;
10897 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
10898 if (!isD) {
10899 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
10900 }
10901 putQReg128(dd, mkV128(0));
10902 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
10903 DIP("fmov %s, #0x%llx\n",
10904 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
10905 return True;
10906 }
10907
10908 return False;
10909# undef INSN
10910}
10911
10912
10913static
10914Bool dis_AdvSIMD_fp_to_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
10915{
10916# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10917 return False;
10918# undef INSN
10919}
10920
10921
10922static
sewardj5747c4a2014-06-11 20:57:23 +000010923Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
sewardjdf1628c2014-06-10 22:52:05 +000010924{
10925 /* 31 30 29 28 23 21 20 18 15 9 4
sewardj5747c4a2014-06-11 20:57:23 +000010926 sf 0 0 11110 type 1 rmode opcode 000000 n d
10927 The first 3 bits are really "sf 0 S", but S is always zero.
sewardjdf1628c2014-06-10 22:52:05 +000010928 */
10929# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000010930 if (INSN(30,29) != BITS2(0,0)
sewardjdf1628c2014-06-10 22:52:05 +000010931 || INSN(28,24) != BITS5(1,1,1,1,0)
10932 || INSN(21,21) != 1
10933 || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
10934 return False;
10935 }
10936 UInt bitSF = INSN(31,31);
sewardjdf1628c2014-06-10 22:52:05 +000010937 UInt ty = INSN(23,22); // type
10938 UInt rm = INSN(20,19); // rmode
10939 UInt op = INSN(18,16); // opcode
10940 UInt nn = INSN(9,5);
10941 UInt dd = INSN(4,0);
10942
sewardj5747c4a2014-06-11 20:57:23 +000010943 // op = 000, 001
10944 /* -------- FCVT{N,P,M,Z}{S,U} (scalar, integer) -------- */
10945 /* 30 23 20 18 15 9 4
10946 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
10947 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
10948 ---------------- 01 -------------- FCVTP-------- (round to +inf)
10949 ---------------- 10 -------------- FCVTM-------- (round to -inf)
10950 ---------------- 11 -------------- FCVTZ-------- (round to zero)
10951
10952 Rd is Xd when sf==1, Wd when sf==0
10953 Fn is Dn when x==1, Sn when x==0
10954 20:19 carry the rounding mode, using the same encoding as FPCR
10955 */
10956 if (ty <= X01 && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) {
10957 Bool isI64 = bitSF == 1;
10958 Bool isF64 = (ty & 1) == 1;
10959 Bool isU = (op & 1) == 1;
10960 /* Decide on the IR rounding mode to use. */
10961 IRRoundingMode irrm = 8; /*impossible*/
10962 HChar ch = '?';
10963 switch (rm) {
10964 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
10965 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
10966 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
10967 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
10968 default: vassert(0);
10969 }
10970 vassert(irrm != 8);
10971 /* Decide on the conversion primop, based on the source size,
10972 dest size and signedness (8 possibilities). Case coding:
10973 F32 ->s I32 0
10974 F32 ->u I32 1
10975 F32 ->s I64 2
10976 F32 ->u I64 3
10977 F64 ->s I32 4
10978 F64 ->u I32 5
10979 F64 ->s I64 6
10980 F64 ->u I64 7
10981 */
10982 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
10983 vassert(ix < 8);
10984 const IROp iops[8]
10985 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
10986 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
10987 IROp iop = iops[ix];
10988 // A bit of ATCery: bounce all cases we haven't seen an example of.
10989 if (/* F32toI32S */
10990 (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
10991 || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
10992 || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
10993 /* F32toI32U */
10994 || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
10995 || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
10996 /* F32toI64S */
10997 || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
10998 /* F32toI64U */
10999 || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
11000 /* F64toI32S */
11001 || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
11002 || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
11003 || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
11004 /* F64toI32U */
11005 || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
11006 || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
11007 || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
11008 /* F64toI64S */
11009 || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
11010 || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
11011 || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
11012 /* F64toI64U */
11013 || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
11014 || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
11015 ) {
11016 /* validated */
11017 } else {
11018 return False;
11019 }
11020 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
11021 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
11022 IRTemp src = newTemp(srcTy);
11023 IRTemp dst = newTemp(dstTy);
11024 assign(src, getQRegLO(nn, srcTy));
11025 assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
11026 putIRegOrZR(isI64, dd, mkexpr(dst));
11027 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
11028 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
11029 return True;
11030 }
11031
11032 // op = 010, 011
sewardjdf1628c2014-06-10 22:52:05 +000011033 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
11034 /* (ix) sf S 28 ty rm op 15 9 4
11035 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
11036 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
11037 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
11038 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
11039
11040 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
11041 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
11042 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
11043 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
11044
11045 These are signed/unsigned conversion from integer registers to
11046 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
11047 */
sewardj5747c4a2014-06-11 20:57:23 +000011048 if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
sewardjdf1628c2014-06-10 22:52:05 +000011049 Bool isI64 = bitSF == 1;
11050 Bool isF64 = (ty & 1) == 1;
11051 Bool isU = (op & 1) == 1;
11052 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
11053 const IROp ops[8]
11054 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
11055 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
11056 IRExpr* src = getIRegOrZR(isI64, nn);
11057 IRExpr* res = (isF64 && !isI64)
11058 ? unop(ops[ix], src)
11059 : binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), src);
11060 putQReg128(dd, mkV128(0));
11061 putQRegLO(dd, res);
11062 DIP("%ccvtf %s, %s\n",
11063 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
11064 nameIRegOrZR(isI64, nn));
11065 return True;
11066 }
11067
sewardj5747c4a2014-06-11 20:57:23 +000011068 // op = 110, 111
sewardjdf1628c2014-06-10 22:52:05 +000011069 /* -------- FMOV (general) -------- */
11070 /* case sf S ty rm op 15 9 4
11071 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
11072 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
11073 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
11074
11075 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
11076 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
11077 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
11078 */
sewardj5747c4a2014-06-11 20:57:23 +000011079 if (1) {
sewardjbbcf1882014-01-12 12:49:10 +000011080 UInt ix = 0; // case
sewardjdf1628c2014-06-10 22:52:05 +000011081 if (bitSF == 0) {
sewardjbbcf1882014-01-12 12:49:10 +000011082 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
11083 ix = 1;
11084 else
11085 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
11086 ix = 4;
11087 } else {
sewardjdf1628c2014-06-10 22:52:05 +000011088 vassert(bitSF == 1);
sewardjbbcf1882014-01-12 12:49:10 +000011089 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
11090 ix = 2;
11091 else
11092 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
11093 ix = 5;
11094 else
11095 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
11096 ix = 3;
11097 else
11098 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
11099 ix = 6;
11100 }
11101 if (ix > 0) {
11102 switch (ix) {
11103 case 1:
11104 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +000011105 putQRegLO(dd, getIReg32orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000011106 DIP("fmov s%u, w%u\n", dd, nn);
11107 break;
11108 case 2:
11109 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +000011110 putQRegLO(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000011111 DIP("fmov d%u, x%u\n", dd, nn);
11112 break;
11113 case 3:
sewardj606c4ba2014-01-26 19:11:14 +000011114 putQRegHI64(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000011115 DIP("fmov v%u.d[1], x%u\n", dd, nn);
11116 break;
11117 case 4:
sewardj606c4ba2014-01-26 19:11:14 +000011118 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
sewardjbbcf1882014-01-12 12:49:10 +000011119 DIP("fmov w%u, s%u\n", dd, nn);
11120 break;
11121 case 5:
sewardj606c4ba2014-01-26 19:11:14 +000011122 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
sewardjbbcf1882014-01-12 12:49:10 +000011123 DIP("fmov x%u, d%u\n", dd, nn);
11124 break;
11125 case 6:
sewardj606c4ba2014-01-26 19:11:14 +000011126 putIReg64orZR(dd, getQRegHI64(nn));
sewardjbbcf1882014-01-12 12:49:10 +000011127 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
11128 break;
11129 default:
11130 vassert(0);
11131 }
11132 return True;
11133 }
11134 /* undecodable; fall through */
11135 }
11136
sewardjdf1628c2014-06-10 22:52:05 +000011137 return False;
11138# undef INSN
11139}
11140
11141
11142static
11143Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
11144{
11145 Bool ok;
11146 ok = dis_AdvSIMD_EXT(dres, insn);
11147 if (UNLIKELY(ok)) return True;
11148 ok = dis_AdvSIMD_TBL_TBX(dres, insn);
11149 if (UNLIKELY(ok)) return True;
11150 ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
11151 if (UNLIKELY(ok)) return True;
11152 ok = dis_AdvSIMD_across_lanes(dres, insn);
11153 if (UNLIKELY(ok)) return True;
11154 ok = dis_AdvSIMD_copy(dres, insn);
11155 if (UNLIKELY(ok)) return True;
11156 ok = dis_AdvSIMD_modified_immediate(dres, insn);
11157 if (UNLIKELY(ok)) return True;
11158 ok = dis_AdvSIMD_scalar_copy(dres, insn);
11159 if (UNLIKELY(ok)) return True;
11160 ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
11161 if (UNLIKELY(ok)) return True;
11162 ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
11163 if (UNLIKELY(ok)) return True;
11164 ok = dis_AdvSIMD_scalar_three_different(dres, insn);
11165 if (UNLIKELY(ok)) return True;
11166 ok = dis_AdvSIMD_scalar_three_same(dres, insn);
11167 if (UNLIKELY(ok)) return True;
11168 ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
11169 if (UNLIKELY(ok)) return True;
11170 ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
11171 if (UNLIKELY(ok)) return True;
11172 ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
11173 if (UNLIKELY(ok)) return True;
11174 ok = dis_AdvSIMD_three_different(dres, insn);
11175 if (UNLIKELY(ok)) return True;
11176 ok = dis_AdvSIMD_three_same(dres, insn);
11177 if (UNLIKELY(ok)) return True;
11178 ok = dis_AdvSIMD_two_reg_misc(dres, insn);
11179 if (UNLIKELY(ok)) return True;
11180 ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
11181 if (UNLIKELY(ok)) return True;
11182 ok = dis_AdvSIMD_crypto_aes(dres, insn);
11183 if (UNLIKELY(ok)) return True;
11184 ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
11185 if (UNLIKELY(ok)) return True;
11186 ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
11187 if (UNLIKELY(ok)) return True;
11188 ok = dis_AdvSIMD_fp_compare(dres, insn);
11189 if (UNLIKELY(ok)) return True;
11190 ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
11191 if (UNLIKELY(ok)) return True;
11192 ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
11193 if (UNLIKELY(ok)) return True;
11194 ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
11195 if (UNLIKELY(ok)) return True;
11196 ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
11197 if (UNLIKELY(ok)) return True;
11198 ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
11199 if (UNLIKELY(ok)) return True;
11200 ok = dis_AdvSIMD_fp_immediate(dres, insn);
11201 if (UNLIKELY(ok)) return True;
11202 ok = dis_AdvSIMD_fp_to_fixedp_conv(dres, insn);
11203 if (UNLIKELY(ok)) return True;
sewardj5747c4a2014-06-11 20:57:23 +000011204 ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
sewardjdf1628c2014-06-10 22:52:05 +000011205 if (UNLIKELY(ok)) return True;
11206 return False;
11207}
11208
sewardjbbcf1882014-01-12 12:49:10 +000011209
11210/*------------------------------------------------------------*/
11211/*--- Disassemble a single ARM64 instruction ---*/
11212/*------------------------------------------------------------*/
11213
11214/* Disassemble a single ARM64 instruction into IR. The instruction
11215 has is located at |guest_instr| and has guest IP of
11216 |guest_PC_curr_instr|, which will have been set before the call
11217 here. Returns True iff the instruction was decoded, in which case
11218 *dres will be set accordingly, or False, in which case *dres should
11219 be ignored by the caller. */
11220
11221static
11222Bool disInstr_ARM64_WRK (
11223 /*MB_OUT*/DisResult* dres,
11224 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
11225 Bool resteerCisOk,
11226 void* callback_opaque,
11227 UChar* guest_instr,
11228 VexArchInfo* archinfo,
11229 VexAbiInfo* abiinfo
11230 )
11231{
11232 // A macro to fish bits out of 'insn'.
11233# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11234
11235//ZZ DisResult dres;
11236//ZZ UInt insn;
11237//ZZ //Bool allow_VFP = False;
11238//ZZ //UInt hwcaps = archinfo->hwcaps;
11239//ZZ IRTemp condT; /* :: Ity_I32 */
11240//ZZ UInt summary;
11241//ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
11242//ZZ
11243//ZZ /* What insn variants are we supporting today? */
11244//ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
11245//ZZ // etc etc
11246
11247 /* Set result defaults. */
11248 dres->whatNext = Dis_Continue;
11249 dres->len = 4;
11250 dres->continueAt = 0;
11251 dres->jk_StopHere = Ijk_INVALID;
11252
11253 /* At least this is simple on ARM64: insns are all 4 bytes long, and
11254 4-aligned. So just fish the whole thing out of memory right now
11255 and have done. */
11256 UInt insn = getUIntLittleEndianly( guest_instr );
11257
11258 if (0) vex_printf("insn: 0x%x\n", insn);
11259
11260 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
11261
11262 vassert(0 == (guest_PC_curr_instr & 3ULL));
11263
11264 /* ----------------------------------------------------------- */
11265
11266 /* Spot "Special" instructions (see comment at top of file). */
11267 {
11268 UChar* code = (UChar*)guest_instr;
11269 /* Spot the 16-byte preamble:
11270 93CC0D8C ror x12, x12, #3
11271 93CC358C ror x12, x12, #13
11272 93CCCD8C ror x12, x12, #51
11273 93CCF58C ror x12, x12, #61
11274 */
11275 UInt word1 = 0x93CC0D8C;
11276 UInt word2 = 0x93CC358C;
11277 UInt word3 = 0x93CCCD8C;
11278 UInt word4 = 0x93CCF58C;
11279 if (getUIntLittleEndianly(code+ 0) == word1 &&
11280 getUIntLittleEndianly(code+ 4) == word2 &&
11281 getUIntLittleEndianly(code+ 8) == word3 &&
11282 getUIntLittleEndianly(code+12) == word4) {
11283 /* Got a "Special" instruction preamble. Which one is it? */
11284 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
11285 /* orr x10,x10,x10 */) {
11286 /* X3 = client_request ( X4 ) */
11287 DIP("x3 = client_request ( x4 )\n");
11288 putPC(mkU64( guest_PC_curr_instr + 20 ));
11289 dres->jk_StopHere = Ijk_ClientReq;
11290 dres->whatNext = Dis_StopHere;
11291 return True;
11292 }
11293 else
11294 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
11295 /* orr x11,x11,x11 */) {
11296 /* X3 = guest_NRADDR */
11297 DIP("x3 = guest_NRADDR\n");
11298 dres->len = 20;
11299 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
11300 return True;
11301 }
11302 else
11303 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
11304 /* orr x12,x12,x12 */) {
11305 /* branch-and-link-to-noredir X8 */
11306 DIP("branch-and-link-to-noredir x8\n");
11307 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
11308 putPC(getIReg64orZR(8));
11309 dres->jk_StopHere = Ijk_NoRedir;
11310 dres->whatNext = Dis_StopHere;
11311 return True;
11312 }
11313 else
11314 if (getUIntLittleEndianly(code+16) == 0xAA090129
11315 /* orr x9,x9,x9 */) {
11316 /* IR injection */
11317 DIP("IR injection\n");
11318 vex_inject_ir(irsb, Iend_LE);
11319 // Invalidate the current insn. The reason is that the IRop we're
11320 // injecting here can change. In which case the translation has to
11321 // be redone. For ease of handling, we simply invalidate all the
11322 // time.
sewardj05f5e012014-05-04 10:52:11 +000011323 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
11324 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
sewardjbbcf1882014-01-12 12:49:10 +000011325 putPC(mkU64( guest_PC_curr_instr + 20 ));
11326 dres->whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +000011327 dres->jk_StopHere = Ijk_InvalICache;
sewardjbbcf1882014-01-12 12:49:10 +000011328 return True;
11329 }
11330 /* We don't know what it is. */
11331 return False;
11332 /*NOTREACHED*/
11333 }
11334 }
11335
11336 /* ----------------------------------------------------------- */
11337
11338 /* Main ARM64 instruction decoder starts here. */
11339
11340 Bool ok = False;
11341
11342 /* insn[28:25] determines the top-level grouping, so let's start
11343 off with that.
11344
11345 For all of these dis_ARM64_ functions, we pass *dres with the
11346 normal default results "insn OK, 4 bytes long, keep decoding" so
11347 they don't need to change it. However, decodes of control-flow
11348 insns may cause *dres to change.
11349 */
11350 switch (INSN(28,25)) {
11351 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
11352 // Data processing - immediate
11353 ok = dis_ARM64_data_processing_immediate(dres, insn);
11354 break;
11355 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
11356 // Branch, exception generation and system instructions
sewardj65902992014-05-03 21:20:56 +000011357 ok = dis_ARM64_branch_etc(dres, insn, archinfo);
sewardjbbcf1882014-01-12 12:49:10 +000011358 break;
11359 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
11360 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
11361 // Loads and stores
11362 ok = dis_ARM64_load_store(dres, insn);
11363 break;
11364 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
11365 // Data processing - register
11366 ok = dis_ARM64_data_processing_register(dres, insn);
11367 break;
11368 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
11369 // Data processing - SIMD and floating point
11370 ok = dis_ARM64_simd_and_fp(dres, insn);
11371 break;
11372 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
11373 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
11374 // UNALLOCATED
11375 break;
11376 default:
11377 vassert(0); /* Can't happen */
11378 }
11379
11380 /* If the next-level down decoders failed, make sure |dres| didn't
11381 get changed. */
11382 if (!ok) {
11383 vassert(dres->whatNext == Dis_Continue);
11384 vassert(dres->len == 4);
11385 vassert(dres->continueAt == 0);
11386 vassert(dres->jk_StopHere == Ijk_INVALID);
11387 }
11388
11389 return ok;
11390
11391# undef INSN
11392}
11393
11394
11395/*------------------------------------------------------------*/
11396/*--- Top-level fn ---*/
11397/*------------------------------------------------------------*/
11398
11399/* Disassemble a single instruction into IR. The instruction
11400 is located in host memory at &guest_code[delta]. */
11401
11402DisResult disInstr_ARM64 ( IRSB* irsb_IN,
11403 Bool (*resteerOkFn) ( void*, Addr64 ),
11404 Bool resteerCisOk,
11405 void* callback_opaque,
11406 UChar* guest_code_IN,
11407 Long delta_IN,
11408 Addr64 guest_IP,
11409 VexArch guest_arch,
11410 VexArchInfo* archinfo,
11411 VexAbiInfo* abiinfo,
sewardj9b769162014-07-24 12:42:03 +000011412 VexEndness host_endness_IN,
sewardjbbcf1882014-01-12 12:49:10 +000011413 Bool sigill_diag_IN )
11414{
11415 DisResult dres;
11416 vex_bzero(&dres, sizeof(dres));
11417
11418 /* Set globals (see top of this file) */
11419 vassert(guest_arch == VexArchARM64);
11420
11421 irsb = irsb_IN;
sewardj9b769162014-07-24 12:42:03 +000011422 host_endness = host_endness_IN;
sewardjbbcf1882014-01-12 12:49:10 +000011423 guest_PC_curr_instr = (Addr64)guest_IP;
11424
sewardj65902992014-05-03 21:20:56 +000011425 /* Sanity checks */
11426 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
11427 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
11428 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
11429
sewardjbbcf1882014-01-12 12:49:10 +000011430 /* Try to decode */
11431 Bool ok = disInstr_ARM64_WRK( &dres,
11432 resteerOkFn, resteerCisOk, callback_opaque,
11433 (UChar*)&guest_code_IN[delta_IN],
11434 archinfo, abiinfo );
11435 if (ok) {
11436 /* All decode successes end up here. */
sewardjdc9259c2014-02-27 11:10:19 +000011437 vassert(dres.len == 4 || dres.len == 20);
sewardjbbcf1882014-01-12 12:49:10 +000011438 switch (dres.whatNext) {
11439 case Dis_Continue:
11440 putPC( mkU64(dres.len + guest_PC_curr_instr) );
11441 break;
11442 case Dis_ResteerU:
11443 case Dis_ResteerC:
11444 putPC(mkU64(dres.continueAt));
11445 break;
11446 case Dis_StopHere:
11447 break;
11448 default:
11449 vassert(0);
11450 }
11451 DIP("\n");
11452 } else {
11453 /* All decode failures end up here. */
11454 if (sigill_diag_IN) {
11455 Int i, j;
11456 UChar buf[64];
11457 UInt insn
11458 = getUIntLittleEndianly( (UChar*)&guest_code_IN[delta_IN] );
11459 vex_bzero(buf, sizeof(buf));
11460 for (i = j = 0; i < 32; i++) {
11461 if (i > 0) {
11462 if ((i & 7) == 0) buf[j++] = ' ';
11463 else if ((i & 3) == 0) buf[j++] = '\'';
11464 }
11465 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
11466 }
11467 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
11468 vex_printf("disInstr(arm64): %s\n", buf);
11469 }
11470
11471 /* Tell the dispatcher that this insn cannot be decoded, and so
11472 has not been executed, and (is currently) the next to be
11473 executed. PC should be up-to-date since it is made so at the
11474 start of each insn, but nevertheless be paranoid and update
11475 it again right now. */
11476 putPC( mkU64(guest_PC_curr_instr) );
sewardjbbcf1882014-01-12 12:49:10 +000011477 dres.len = 0;
philippe2faf5912014-08-11 22:45:47 +000011478 dres.whatNext = Dis_StopHere;
sewardjbbcf1882014-01-12 12:49:10 +000011479 dres.jk_StopHere = Ijk_NoDecode;
philippe2faf5912014-08-11 22:45:47 +000011480 dres.continueAt = 0;
sewardjbbcf1882014-01-12 12:49:10 +000011481 }
11482 return dres;
11483}
11484
sewardjecde6972014-02-05 11:01:19 +000011485
sewardjbbcf1882014-01-12 12:49:10 +000011486/*--------------------------------------------------------------------*/
11487/*--- end guest_arm64_toIR.c ---*/
11488/*--------------------------------------------------------------------*/