blob: 28176a0053954dd5fa69db32a26b56ab3f949f5e [file] [log] [blame]
sewardjbbcf1882014-01-12 12:49:10 +00001/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- begin guest_arm64_toIR.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2013-2013 OpenWorks
12 info@open-works.net
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
32//ZZ /* XXXX thumb to check:
33//ZZ that all cases where putIRegT writes r15, we generate a jump.
34//ZZ
35//ZZ All uses of newTemp assign to an IRTemp and not a UInt
36//ZZ
37//ZZ For all thumb loads and stores, including VFP ones, new-ITSTATE is
38//ZZ backed out before the memory op, and restored afterwards. This
39//ZZ needs to happen even after we go uncond. (and for sure it doesn't
40//ZZ happen for VFP loads/stores right now).
41//ZZ
42//ZZ VFP on thumb: check that we exclude all r13/r15 cases that we
43//ZZ should.
44//ZZ
45//ZZ XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
46//ZZ taking into account the number of insns guarded by an IT.
47//ZZ
48//ZZ remove the nasty hack, in the spechelper, of looking for Or32(...,
49//ZZ 0xE0) in as the first arg to armg_calculate_condition, and instead
50//ZZ use Slice44 as specified in comments in the spechelper.
51//ZZ
52//ZZ add specialisations for armg_calculate_flag_c and _v, as they
53//ZZ are moderately often needed in Thumb code.
54//ZZ
55//ZZ Correctness: ITSTATE handling in Thumb SVCs is wrong.
56//ZZ
57//ZZ Correctness (obscure): in m_transtab, when invalidating code
58//ZZ address ranges, invalidate up to 18 bytes after the end of the
59//ZZ range. This is because the ITSTATE optimisation at the top of
60//ZZ _THUMB_WRK below analyses up to 18 bytes before the start of any
61//ZZ given instruction, and so might depend on the invalidated area.
62//ZZ */
63//ZZ
64//ZZ /* Limitations, etc
65//ZZ
66//ZZ - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
67//ZZ These instructions are non-restartable in the case where the
68//ZZ transfer(s) fault.
69//ZZ
70//ZZ - SWP: the restart jump back is Ijk_Boring; it should be
71//ZZ Ijk_NoRedir but that's expensive. See comments on casLE() in
72//ZZ guest_x86_toIR.c.
73//ZZ */
74
75/* "Special" instructions.
76
77 This instruction decoder can decode four special instructions
78 which mean nothing natively (are no-ops as far as regs/mem are
79 concerned) but have meaning for supporting Valgrind. A special
80 instruction is flagged by a 16-byte preamble:
81
82 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
83 (ror x12, x12, #3; ror x12, x12, #13
84 ror x12, x12, #51; ror x12, x12, #61)
85
86 Following that, one of the following 3 are allowed
87 (standard interpretation in parentheses):
88
89 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
90 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
91 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
92 AA090129 (orr x9,x9,x9) IR injection
93
94 Any other bytes following the 16-byte preamble are illegal and
95 constitute a failure in instruction decoding. This all assumes
96 that the preamble will never occur except in specific code
97 fragments designed for Valgrind to catch.
98*/
99
100/* Translates ARM64 code to IR. */
101
102#include "libvex_basictypes.h"
103#include "libvex_ir.h"
104#include "libvex.h"
105#include "libvex_guest_arm64.h"
106
107#include "main_util.h"
108#include "main_globals.h"
109#include "guest_generic_bb_to_IR.h"
110#include "guest_arm64_defs.h"
111
112
113/*------------------------------------------------------------*/
114/*--- Globals ---*/
115/*------------------------------------------------------------*/
116
117/* These are set at the start of the translation of a instruction, so
118 that we don't have to pass them around endlessly. CONST means does
119 not change during translation of the instruction.
120*/
121
sewardj9b769162014-07-24 12:42:03 +0000122/* CONST: what is the host's endianness? We need to know this in
123 order to do sub-register accesses to the SIMD/FP registers
124 correctly. */
125static VexEndness host_endness;
sewardjbbcf1882014-01-12 12:49:10 +0000126
127/* CONST: The guest address for the instruction currently being
128 translated. */
129static Addr64 guest_PC_curr_instr;
130
131/* MOD: The IRSB* into which we're generating code. */
132static IRSB* irsb;
133
134
135/*------------------------------------------------------------*/
136/*--- Debugging output ---*/
137/*------------------------------------------------------------*/
138
139#define DIP(format, args...) \
140 if (vex_traceflags & VEX_TRACE_FE) \
141 vex_printf(format, ## args)
142
143#define DIS(buf, format, args...) \
144 if (vex_traceflags & VEX_TRACE_FE) \
145 vex_sprintf(buf, format, ## args)
146
147
148/*------------------------------------------------------------*/
149/*--- Helper bits and pieces for deconstructing the ---*/
150/*--- arm insn stream. ---*/
151/*------------------------------------------------------------*/
152
153/* Do a little-endian load of a 32-bit word, regardless of the
154 endianness of the underlying host. */
155static inline UInt getUIntLittleEndianly ( UChar* p )
156{
157 UInt w = 0;
158 w = (w << 8) | p[3];
159 w = (w << 8) | p[2];
160 w = (w << 8) | p[1];
161 w = (w << 8) | p[0];
162 return w;
163}
164
165/* Sign extend a N-bit value up to 64 bits, by copying
166 bit N-1 into all higher positions. */
167static ULong sx_to_64 ( ULong x, UInt n )
168{
169 vassert(n > 1 && n < 64);
170 Long r = (Long)x;
171 r = (r << (64-n)) >> (64-n);
172 return (ULong)r;
173}
174
175//ZZ /* Do a little-endian load of a 16-bit word, regardless of the
176//ZZ endianness of the underlying host. */
177//ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
178//ZZ {
179//ZZ UShort w = 0;
180//ZZ w = (w << 8) | p[1];
181//ZZ w = (w << 8) | p[0];
182//ZZ return w;
183//ZZ }
184//ZZ
185//ZZ static UInt ROR32 ( UInt x, UInt sh ) {
186//ZZ vassert(sh >= 0 && sh < 32);
187//ZZ if (sh == 0)
188//ZZ return x;
189//ZZ else
190//ZZ return (x << (32-sh)) | (x >> sh);
191//ZZ }
192//ZZ
193//ZZ static Int popcount32 ( UInt x )
194//ZZ {
195//ZZ Int res = 0, i;
196//ZZ for (i = 0; i < 32; i++) {
197//ZZ res += (x & 1);
198//ZZ x >>= 1;
199//ZZ }
200//ZZ return res;
201//ZZ }
202//ZZ
203//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
204//ZZ {
205//ZZ UInt mask = 1 << ix;
206//ZZ x &= ~mask;
207//ZZ x |= ((b << ix) & mask);
208//ZZ return x;
209//ZZ }
210
211#define BITS2(_b1,_b0) \
212 (((_b1) << 1) | (_b0))
213
214#define BITS3(_b2,_b1,_b0) \
215 (((_b2) << 2) | ((_b1) << 1) | (_b0))
216
217#define BITS4(_b3,_b2,_b1,_b0) \
218 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
219
220#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
221 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
222 | BITS4((_b3),(_b2),(_b1),(_b0)))
223
224#define BITS5(_b4,_b3,_b2,_b1,_b0) \
225 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
226#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
227 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
228#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
229 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
230
231#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
232 (((_b8) << 8) \
233 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
234
235#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
236 (((_b9) << 9) | ((_b8) << 8) \
237 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
238
239#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
240 (((_b10) << 10) \
241 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
242
sewardjdc9259c2014-02-27 11:10:19 +0000243#define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
244 (((_b11) << 11) \
245 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
246
sewardjdf1628c2014-06-10 22:52:05 +0000247#define X00 BITS2(0,0)
248#define X01 BITS2(0,1)
249#define X10 BITS2(1,0)
250#define X11 BITS2(1,1)
251
sewardjbbcf1882014-01-12 12:49:10 +0000252// produces _uint[_bMax:_bMin]
253#define SLICE_UInt(_uint,_bMax,_bMin) \
254 (( ((UInt)(_uint)) >> (_bMin)) \
255 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
256
257
258/*------------------------------------------------------------*/
259/*--- Helper bits and pieces for creating IR fragments. ---*/
260/*------------------------------------------------------------*/
261
262static IRExpr* mkV128 ( UShort w )
263{
264 return IRExpr_Const(IRConst_V128(w));
265}
266
267static IRExpr* mkU64 ( ULong i )
268{
269 return IRExpr_Const(IRConst_U64(i));
270}
271
272static IRExpr* mkU32 ( UInt i )
273{
274 return IRExpr_Const(IRConst_U32(i));
275}
276
sewardj25523c42014-06-15 19:36:29 +0000277static IRExpr* mkU16 ( UInt i )
278{
279 vassert(i < 65536);
280 return IRExpr_Const(IRConst_U16(i));
281}
282
sewardjbbcf1882014-01-12 12:49:10 +0000283static IRExpr* mkU8 ( UInt i )
284{
285 vassert(i < 256);
286 return IRExpr_Const(IRConst_U8( (UChar)i ));
287}
288
289static IRExpr* mkexpr ( IRTemp tmp )
290{
291 return IRExpr_RdTmp(tmp);
292}
293
294static IRExpr* unop ( IROp op, IRExpr* a )
295{
296 return IRExpr_Unop(op, a);
297}
298
299static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
300{
301 return IRExpr_Binop(op, a1, a2);
302}
303
304static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
305{
306 return IRExpr_Triop(op, a1, a2, a3);
307}
308
309static IRExpr* loadLE ( IRType ty, IRExpr* addr )
310{
311 return IRExpr_Load(Iend_LE, ty, addr);
312}
313
314/* Add a statement to the list held by "irbb". */
315static void stmt ( IRStmt* st )
316{
317 addStmtToIRSB( irsb, st );
318}
319
320static void assign ( IRTemp dst, IRExpr* e )
321{
322 stmt( IRStmt_WrTmp(dst, e) );
323}
324
325static void storeLE ( IRExpr* addr, IRExpr* data )
326{
327 stmt( IRStmt_Store(Iend_LE, addr, data) );
328}
329
330//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
331//ZZ {
332//ZZ if (guardT == IRTemp_INVALID) {
333//ZZ /* unconditional */
334//ZZ storeLE(addr, data);
335//ZZ } else {
336//ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
337//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
338//ZZ }
339//ZZ }
340//ZZ
341//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
342//ZZ IRExpr* addr, IRExpr* alt,
343//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
344//ZZ {
345//ZZ if (guardT == IRTemp_INVALID) {
346//ZZ /* unconditional */
347//ZZ IRExpr* loaded = NULL;
348//ZZ switch (cvt) {
349//ZZ case ILGop_Ident32:
350//ZZ loaded = loadLE(Ity_I32, addr); break;
351//ZZ case ILGop_8Uto32:
352//ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
353//ZZ case ILGop_8Sto32:
354//ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
355//ZZ case ILGop_16Uto32:
356//ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
357//ZZ case ILGop_16Sto32:
358//ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
359//ZZ default:
360//ZZ vassert(0);
361//ZZ }
362//ZZ vassert(loaded != NULL);
363//ZZ assign(dst, loaded);
364//ZZ } else {
365//ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
366//ZZ loaded data before putting the data in 'dst'. If the load
367//ZZ does not take place, 'alt' is placed directly in 'dst'. */
368//ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
369//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
370//ZZ }
371//ZZ }
372
373/* Generate a new temporary of the given type. */
374static IRTemp newTemp ( IRType ty )
375{
376 vassert(isPlausibleIRType(ty));
377 return newIRTemp( irsb->tyenv, ty );
378}
379
sewardj8e91fd42014-07-11 12:05:47 +0000380/* This is used in many places, so the brevity is an advantage. */
381static IRTemp newTempV128(void)
382{
383 return newTemp(Ity_V128);
384}
385
386/* Initialise V128 temporaries en masse. */
387static
sewardj51d012a2014-07-21 09:19:50 +0000388void newTempsV128_2(IRTemp* t1, IRTemp* t2)
389{
390 vassert(t1 && *t1 == IRTemp_INVALID);
391 vassert(t2 && *t2 == IRTemp_INVALID);
392 *t1 = newTempV128();
393 *t2 = newTempV128();
394}
395
sewardj51d012a2014-07-21 09:19:50 +0000396static
397void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
398{
399 vassert(t1 && *t1 == IRTemp_INVALID);
400 vassert(t2 && *t2 == IRTemp_INVALID);
401 vassert(t3 && *t3 == IRTemp_INVALID);
402 *t1 = newTempV128();
403 *t2 = newTempV128();
404 *t3 = newTempV128();
405}
406
sewardj54ffa1d2014-07-22 09:27:49 +0000407//static
408//void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
409//{
410// vassert(t1 && *t1 == IRTemp_INVALID);
411// vassert(t2 && *t2 == IRTemp_INVALID);
412// vassert(t3 && *t3 == IRTemp_INVALID);
413// vassert(t4 && *t4 == IRTemp_INVALID);
414// *t1 = newTempV128();
415// *t2 = newTempV128();
416// *t3 = newTempV128();
417// *t4 = newTempV128();
418//}
419
sewardj51d012a2014-07-21 09:19:50 +0000420static
sewardj8e91fd42014-07-11 12:05:47 +0000421void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
422 IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
423{
424 vassert(t1 && *t1 == IRTemp_INVALID);
425 vassert(t2 && *t2 == IRTemp_INVALID);
426 vassert(t3 && *t3 == IRTemp_INVALID);
427 vassert(t4 && *t4 == IRTemp_INVALID);
428 vassert(t5 && *t5 == IRTemp_INVALID);
429 vassert(t6 && *t6 == IRTemp_INVALID);
430 vassert(t7 && *t7 == IRTemp_INVALID);
431 *t1 = newTempV128();
432 *t2 = newTempV128();
433 *t3 = newTempV128();
434 *t4 = newTempV128();
435 *t5 = newTempV128();
436 *t6 = newTempV128();
437 *t7 = newTempV128();
438}
439
sewardjbbcf1882014-01-12 12:49:10 +0000440//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
441//ZZ IRRoundingMode. */
442//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
443//ZZ {
444//ZZ return mkU32(Irrm_NEAREST);
445//ZZ }
446//ZZ
447//ZZ /* Generate an expression for SRC rotated right by ROT. */
448//ZZ static IRExpr* genROR32( IRTemp src, Int rot )
449//ZZ {
450//ZZ vassert(rot >= 0 && rot < 32);
451//ZZ if (rot == 0)
452//ZZ return mkexpr(src);
453//ZZ return
454//ZZ binop(Iop_Or32,
455//ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
456//ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
457//ZZ }
458//ZZ
459//ZZ static IRExpr* mkU128 ( ULong i )
460//ZZ {
461//ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
462//ZZ }
463//ZZ
464//ZZ /* Generate a 4-aligned version of the given expression if
465//ZZ the given condition is true. Else return it unchanged. */
466//ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
467//ZZ {
468//ZZ if (b)
469//ZZ return binop(Iop_And32, e, mkU32(~3));
470//ZZ else
471//ZZ return e;
472//ZZ }
473
474/* Other IR construction helpers. */
475static IROp mkAND ( IRType ty ) {
476 switch (ty) {
477 case Ity_I32: return Iop_And32;
478 case Ity_I64: return Iop_And64;
479 default: vpanic("mkAND");
480 }
481}
482
483static IROp mkOR ( IRType ty ) {
484 switch (ty) {
485 case Ity_I32: return Iop_Or32;
486 case Ity_I64: return Iop_Or64;
487 default: vpanic("mkOR");
488 }
489}
490
491static IROp mkXOR ( IRType ty ) {
492 switch (ty) {
493 case Ity_I32: return Iop_Xor32;
494 case Ity_I64: return Iop_Xor64;
495 default: vpanic("mkXOR");
496 }
497}
498
499static IROp mkSHL ( IRType ty ) {
500 switch (ty) {
501 case Ity_I32: return Iop_Shl32;
502 case Ity_I64: return Iop_Shl64;
503 default: vpanic("mkSHL");
504 }
505}
506
507static IROp mkSHR ( IRType ty ) {
508 switch (ty) {
509 case Ity_I32: return Iop_Shr32;
510 case Ity_I64: return Iop_Shr64;
511 default: vpanic("mkSHR");
512 }
513}
514
515static IROp mkSAR ( IRType ty ) {
516 switch (ty) {
517 case Ity_I32: return Iop_Sar32;
518 case Ity_I64: return Iop_Sar64;
519 default: vpanic("mkSAR");
520 }
521}
522
523static IROp mkNOT ( IRType ty ) {
524 switch (ty) {
525 case Ity_I32: return Iop_Not32;
526 case Ity_I64: return Iop_Not64;
527 default: vpanic("mkNOT");
528 }
529}
530
531static IROp mkADD ( IRType ty ) {
532 switch (ty) {
533 case Ity_I32: return Iop_Add32;
534 case Ity_I64: return Iop_Add64;
535 default: vpanic("mkADD");
536 }
537}
538
539static IROp mkSUB ( IRType ty ) {
540 switch (ty) {
541 case Ity_I32: return Iop_Sub32;
542 case Ity_I64: return Iop_Sub64;
543 default: vpanic("mkSUB");
544 }
545}
546
547static IROp mkADDF ( IRType ty ) {
548 switch (ty) {
549 case Ity_F32: return Iop_AddF32;
550 case Ity_F64: return Iop_AddF64;
551 default: vpanic("mkADDF");
552 }
553}
554
555static IROp mkSUBF ( IRType ty ) {
556 switch (ty) {
557 case Ity_F32: return Iop_SubF32;
558 case Ity_F64: return Iop_SubF64;
559 default: vpanic("mkSUBF");
560 }
561}
562
563static IROp mkMULF ( IRType ty ) {
564 switch (ty) {
565 case Ity_F32: return Iop_MulF32;
566 case Ity_F64: return Iop_MulF64;
567 default: vpanic("mkMULF");
568 }
569}
570
571static IROp mkDIVF ( IRType ty ) {
572 switch (ty) {
573 case Ity_F32: return Iop_DivF32;
574 case Ity_F64: return Iop_DivF64;
575 default: vpanic("mkMULF");
576 }
577}
578
579static IROp mkNEGF ( IRType ty ) {
580 switch (ty) {
581 case Ity_F32: return Iop_NegF32;
582 case Ity_F64: return Iop_NegF64;
583 default: vpanic("mkNEGF");
584 }
585}
586
587static IROp mkABSF ( IRType ty ) {
588 switch (ty) {
589 case Ity_F32: return Iop_AbsF32;
590 case Ity_F64: return Iop_AbsF64;
591 default: vpanic("mkNEGF");
592 }
593}
594
595static IROp mkSQRTF ( IRType ty ) {
596 switch (ty) {
597 case Ity_F32: return Iop_SqrtF32;
598 case Ity_F64: return Iop_SqrtF64;
599 default: vpanic("mkNEGF");
600 }
601}
602
sewardja5a6b752014-06-30 07:33:56 +0000603static IROp mkVecADD ( UInt size ) {
604 const IROp ops[4]
605 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
606 vassert(size < 4);
607 return ops[size];
608}
609
610static IROp mkVecQADDU ( UInt size ) {
611 const IROp ops[4]
612 = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
613 vassert(size < 4);
614 return ops[size];
615}
616
617static IROp mkVecQADDS ( UInt size ) {
618 const IROp ops[4]
619 = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
620 vassert(size < 4);
621 return ops[size];
622}
623
624static IROp mkVecSUB ( UInt size ) {
625 const IROp ops[4]
626 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
627 vassert(size < 4);
628 return ops[size];
629}
630
631static IROp mkVecQSUBU ( UInt size ) {
632 const IROp ops[4]
633 = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
634 vassert(size < 4);
635 return ops[size];
636}
637
638static IROp mkVecQSUBS ( UInt size ) {
639 const IROp ops[4]
640 = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
641 vassert(size < 4);
642 return ops[size];
643}
644
645static IROp mkVecSARN ( UInt size ) {
646 const IROp ops[4]
647 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
648 vassert(size < 4);
649 return ops[size];
650}
651
652static IROp mkVecSHRN ( UInt size ) {
653 const IROp ops[4]
654 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
655 vassert(size < 4);
656 return ops[size];
657}
658
659static IROp mkVecSHLN ( UInt size ) {
660 const IROp ops[4]
661 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
662 vassert(size < 4);
663 return ops[size];
664}
665
666static IROp mkVecCATEVENLANES ( UInt size ) {
667 const IROp ops[4]
668 = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
669 Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
670 vassert(size < 4);
671 return ops[size];
672}
673
674static IROp mkVecCATODDLANES ( UInt size ) {
675 const IROp ops[4]
676 = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
677 Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
678 vassert(size < 4);
679 return ops[size];
680}
681
sewardj487559e2014-07-10 14:22:45 +0000682static IROp mkVecINTERLEAVELO ( UInt size ) {
683 const IROp ops[4]
684 = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
685 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
686 vassert(size < 4);
687 return ops[size];
688}
689
690static IROp mkVecINTERLEAVEHI ( UInt size ) {
691 const IROp ops[4]
692 = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
693 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
694 vassert(size < 4);
695 return ops[size];
696}
697
sewardja5a6b752014-06-30 07:33:56 +0000698static IROp mkVecMAXU ( UInt size ) {
699 const IROp ops[4]
700 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
701 vassert(size < 4);
702 return ops[size];
703}
704
705static IROp mkVecMAXS ( UInt size ) {
706 const IROp ops[4]
707 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
708 vassert(size < 4);
709 return ops[size];
710}
711
712static IROp mkVecMINU ( UInt size ) {
713 const IROp ops[4]
714 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
715 vassert(size < 4);
716 return ops[size];
717}
718
719static IROp mkVecMINS ( UInt size ) {
720 const IROp ops[4]
721 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
722 vassert(size < 4);
723 return ops[size];
724}
725
sewardj487559e2014-07-10 14:22:45 +0000726static IROp mkVecMUL ( UInt size ) {
727 const IROp ops[4]
728 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
729 vassert(size < 3);
730 return ops[size];
731}
732
733static IROp mkVecMULLU ( UInt sizeNarrow ) {
734 const IROp ops[4]
735 = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
736 vassert(sizeNarrow < 3);
737 return ops[sizeNarrow];
738}
739
740static IROp mkVecMULLS ( UInt sizeNarrow ) {
741 const IROp ops[4]
742 = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
743 vassert(sizeNarrow < 3);
744 return ops[sizeNarrow];
745}
746
sewardj51d012a2014-07-21 09:19:50 +0000747static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
748 const IROp ops[4]
749 = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
750 vassert(sizeNarrow < 3);
751 return ops[sizeNarrow];
752}
753
sewardj8e91fd42014-07-11 12:05:47 +0000754static IROp mkVecCMPEQ ( UInt size ) {
755 const IROp ops[4]
756 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
757 vassert(size < 4);
758 return ops[size];
759}
760
761static IROp mkVecCMPGTU ( UInt size ) {
762 const IROp ops[4]
763 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
764 vassert(size < 4);
765 return ops[size];
766}
767
768static IROp mkVecCMPGTS ( UInt size ) {
769 const IROp ops[4]
770 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
771 vassert(size < 4);
772 return ops[size];
773}
774
775static IROp mkVecABS ( UInt size ) {
776 const IROp ops[4]
777 = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
778 vassert(size < 4);
779 return ops[size];
780}
781
782static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
783 const IROp ops[4]
784 = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
785 Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 };
786 vassert(size < 4);
787 return ops[size];
788}
789
sewardjbbcf1882014-01-12 12:49:10 +0000790static IRExpr* mkU ( IRType ty, ULong imm ) {
791 switch (ty) {
792 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
793 case Ity_I64: return mkU64(imm);
794 default: vpanic("mkU");
795 }
796}
797
sewardj54ffa1d2014-07-22 09:27:49 +0000798static IROp mkVecQDMULHIS ( UInt size ) {
799 const IROp ops[4]
800 = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
801 vassert(size < 4);
802 return ops[size];
803}
804
805static IROp mkVecQRDMULHIS ( UInt size ) {
806 const IROp ops[4]
807 = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
808 vassert(size < 4);
809 return ops[size];
810}
811
sewardjecedd982014-08-11 14:02:47 +0000812static IROp mkVecQANDUQSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000813 const IROp ops[4]
814 = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
815 Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
816 vassert(size < 4);
817 return ops[size];
818}
819
sewardjecedd982014-08-11 14:02:47 +0000820static IROp mkVecQANDSQSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000821 const IROp ops[4]
822 = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
823 Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
824 vassert(size < 4);
825 return ops[size];
826}
827
sewardjecedd982014-08-11 14:02:47 +0000828static IROp mkVecQANDUQRSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000829 const IROp ops[4]
830 = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
831 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
832 vassert(size < 4);
833 return ops[size];
834}
835
sewardjecedd982014-08-11 14:02:47 +0000836static IROp mkVecQANDSQRSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000837 const IROp ops[4]
838 = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
839 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
840 vassert(size < 4);
841 return ops[size];
842}
843
sewardjecedd982014-08-11 14:02:47 +0000844static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
845 const IROp ops[4]
846 = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
847 Iop_NarrowUn64to32x2, Iop_INVALID };
848 vassert(sizeNarrow < 4);
849 return ops[sizeNarrow];
850}
851
852static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
853 const IROp ops[4]
854 = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4,
855 Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
856 vassert(sizeNarrow < 4);
857 return ops[sizeNarrow];
858}
859
860static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
861 const IROp ops[4]
862 = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4,
863 Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
864 vassert(sizeNarrow < 4);
865 return ops[sizeNarrow];
866}
867
868static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
869 const IROp ops[4]
870 = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4,
871 Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
872 vassert(sizeNarrow < 4);
873 return ops[sizeNarrow];
874}
875
876static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
877 const IROp ops[4]
878 = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
879 Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
880 vassert(sizeNarrow < 4);
881 return ops[sizeNarrow];
882}
883
884static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
885 const IROp ops[4]
886 = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4,
887 Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
888 vassert(sizeNarrow < 4);
889 return ops[sizeNarrow];
890}
891
892static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
893 const IROp ops[4]
894 = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4,
895 Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
896 vassert(sizeNarrow < 4);
897 return ops[sizeNarrow];
898}
899
900static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
901 const IROp ops[4]
902 = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4,
903 Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
904 vassert(sizeNarrow < 4);
905 return ops[sizeNarrow];
906}
907
908static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
909 const IROp ops[4]
910 = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4,
911 Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
912 vassert(sizeNarrow < 4);
913 return ops[sizeNarrow];
914}
915
916static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
917 const IROp ops[4]
918 = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4,
919 Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
920 vassert(sizeNarrow < 4);
921 return ops[sizeNarrow];
922}
923
sewardja97dddf2014-08-14 22:26:52 +0000924static IROp mkVecQSHLNSATU2U ( UInt size ) {
925 const IROp ops[4]
926 = { Iop_QShlN8x16, Iop_QShlN16x8, Iop_QShlN32x4, Iop_QShlN64x2 };
927 vassert(size < 4);
928 return ops[size];
929}
930
931static IROp mkVecQSHLNSATS2S ( UInt size ) {
932 const IROp ops[4]
933 = { Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2 };
934 vassert(size < 4);
935 return ops[size];
936}
937
938static IROp mkVecQSHLNSATS2U ( UInt size ) {
939 const IROp ops[4]
940 = { Iop_QShlN8Sx16, Iop_QShlN16Sx8, Iop_QShlN32Sx4, Iop_QShlN64Sx2 };
941 vassert(size < 4);
942 return ops[size];
943}
944
945
sewardjbbcf1882014-01-12 12:49:10 +0000946/* Generate IR to create 'arg rotated right by imm', for sane values
947 of 'ty' and 'imm'. */
948static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
949{
950 UInt w = 0;
951 if (ty == Ity_I64) {
952 w = 64;
953 } else {
954 vassert(ty == Ity_I32);
955 w = 32;
956 }
957 vassert(w != 0);
958 vassert(imm < w);
959 if (imm == 0) {
960 return arg;
961 }
962 IRTemp res = newTemp(ty);
963 assign(res, binop(mkOR(ty),
964 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
965 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
966 return res;
967}
968
969/* Generate IR to set the returned temp to either all-zeroes or
970 all ones, as a copy of arg<imm>. */
971static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
972{
973 UInt w = 0;
974 if (ty == Ity_I64) {
975 w = 64;
976 } else {
977 vassert(ty == Ity_I32);
978 w = 32;
979 }
980 vassert(w != 0);
981 vassert(imm < w);
982 IRTemp res = newTemp(ty);
983 assign(res, binop(mkSAR(ty),
984 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
985 mkU8(w - 1)));
986 return res;
987}
988
sewardj7d009132014-02-20 17:43:38 +0000989/* U-widen 8/16/32/64 bit int expr to 64. */
990static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
991{
992 switch (srcTy) {
993 case Ity_I64: return e;
994 case Ity_I32: return unop(Iop_32Uto64, e);
995 case Ity_I16: return unop(Iop_16Uto64, e);
996 case Ity_I8: return unop(Iop_8Uto64, e);
997 default: vpanic("widenUto64(arm64)");
998 }
999}
1000
1001/* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
1002 of these combinations make sense. */
1003static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
1004{
1005 switch (dstTy) {
1006 case Ity_I64: return e;
1007 case Ity_I32: return unop(Iop_64to32, e);
1008 case Ity_I16: return unop(Iop_64to16, e);
1009 case Ity_I8: return unop(Iop_64to8, e);
1010 default: vpanic("narrowFrom64(arm64)");
1011 }
1012}
1013
sewardjbbcf1882014-01-12 12:49:10 +00001014
1015/*------------------------------------------------------------*/
1016/*--- Helpers for accessing guest registers. ---*/
1017/*------------------------------------------------------------*/
1018
1019#define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
1020#define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
1021#define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
1022#define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
1023#define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
1024#define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
1025#define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
1026#define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
1027#define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
1028#define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
1029#define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
1030#define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
1031#define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
1032#define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
1033#define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
1034#define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
1035#define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
1036#define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
1037#define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
1038#define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
1039#define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
1040#define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
1041#define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
1042#define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
1043#define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
1044#define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
1045#define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
1046#define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
1047#define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
1048#define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
1049#define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
1050
sewardj60687882014-01-15 10:25:21 +00001051#define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
sewardjbbcf1882014-01-12 12:49:10 +00001052#define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
1053
1054#define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
1055#define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
1056#define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
1057#define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
1058
1059#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1060#define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
1061
1062#define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
1063#define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
1064#define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
1065#define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
1066#define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
1067#define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
1068#define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
1069#define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
1070#define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
1071#define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
1072#define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
1073#define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
1074#define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
1075#define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
1076#define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
1077#define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
1078#define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
1079#define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
1080#define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
1081#define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
1082#define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
1083#define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
1084#define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
1085#define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
1086#define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
1087#define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
1088#define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
1089#define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
1090#define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
1091#define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
1092#define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
1093#define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
1094
1095#define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
sewardja0645d52014-06-28 22:11:16 +00001096#define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
sewardjbbcf1882014-01-12 12:49:10 +00001097
sewardj05f5e012014-05-04 10:52:11 +00001098#define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
1099#define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
sewardjbbcf1882014-01-12 12:49:10 +00001100
1101
1102/* ---------------- Integer registers ---------------- */
1103
1104static Int offsetIReg64 ( UInt iregNo )
1105{
1106 /* Do we care about endianness here? We do if sub-parts of integer
1107 registers are accessed. */
1108 switch (iregNo) {
1109 case 0: return OFFB_X0;
1110 case 1: return OFFB_X1;
1111 case 2: return OFFB_X2;
1112 case 3: return OFFB_X3;
1113 case 4: return OFFB_X4;
1114 case 5: return OFFB_X5;
1115 case 6: return OFFB_X6;
1116 case 7: return OFFB_X7;
1117 case 8: return OFFB_X8;
1118 case 9: return OFFB_X9;
1119 case 10: return OFFB_X10;
1120 case 11: return OFFB_X11;
1121 case 12: return OFFB_X12;
1122 case 13: return OFFB_X13;
1123 case 14: return OFFB_X14;
1124 case 15: return OFFB_X15;
1125 case 16: return OFFB_X16;
1126 case 17: return OFFB_X17;
1127 case 18: return OFFB_X18;
1128 case 19: return OFFB_X19;
1129 case 20: return OFFB_X20;
1130 case 21: return OFFB_X21;
1131 case 22: return OFFB_X22;
1132 case 23: return OFFB_X23;
1133 case 24: return OFFB_X24;
1134 case 25: return OFFB_X25;
1135 case 26: return OFFB_X26;
1136 case 27: return OFFB_X27;
1137 case 28: return OFFB_X28;
1138 case 29: return OFFB_X29;
1139 case 30: return OFFB_X30;
1140 /* but not 31 */
1141 default: vassert(0);
1142 }
1143}
1144
1145static Int offsetIReg64orSP ( UInt iregNo )
1146{
sewardj60687882014-01-15 10:25:21 +00001147 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
sewardjbbcf1882014-01-12 12:49:10 +00001148}
1149
1150static const HChar* nameIReg64orZR ( UInt iregNo )
1151{
1152 vassert(iregNo < 32);
1153 static const HChar* names[32]
1154 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
1155 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1156 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1157 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1158 return names[iregNo];
1159}
1160
1161static const HChar* nameIReg64orSP ( UInt iregNo )
1162{
1163 if (iregNo == 31) {
1164 return "sp";
1165 }
1166 vassert(iregNo < 31);
1167 return nameIReg64orZR(iregNo);
1168}
1169
1170static IRExpr* getIReg64orSP ( UInt iregNo )
1171{
1172 vassert(iregNo < 32);
1173 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1174}
1175
1176static IRExpr* getIReg64orZR ( UInt iregNo )
1177{
1178 if (iregNo == 31) {
1179 return mkU64(0);
1180 }
1181 vassert(iregNo < 31);
1182 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1183}
1184
1185static void putIReg64orSP ( UInt iregNo, IRExpr* e )
1186{
1187 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1188 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1189}
1190
1191static void putIReg64orZR ( UInt iregNo, IRExpr* e )
1192{
1193 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1194 if (iregNo == 31) {
1195 return;
1196 }
1197 vassert(iregNo < 31);
1198 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1199}
1200
1201static const HChar* nameIReg32orZR ( UInt iregNo )
1202{
1203 vassert(iregNo < 32);
1204 static const HChar* names[32]
1205 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
1206 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
1207 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1208 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1209 return names[iregNo];
1210}
1211
1212static const HChar* nameIReg32orSP ( UInt iregNo )
1213{
1214 if (iregNo == 31) {
1215 return "wsp";
1216 }
1217 vassert(iregNo < 31);
1218 return nameIReg32orZR(iregNo);
1219}
1220
1221static IRExpr* getIReg32orSP ( UInt iregNo )
1222{
1223 vassert(iregNo < 32);
1224 return unop(Iop_64to32,
1225 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1226}
1227
1228static IRExpr* getIReg32orZR ( UInt iregNo )
1229{
1230 if (iregNo == 31) {
1231 return mkU32(0);
1232 }
1233 vassert(iregNo < 31);
1234 return unop(Iop_64to32,
1235 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1236}
1237
1238static void putIReg32orSP ( UInt iregNo, IRExpr* e )
1239{
1240 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1241 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1242}
1243
1244static void putIReg32orZR ( UInt iregNo, IRExpr* e )
1245{
1246 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1247 if (iregNo == 31) {
1248 return;
1249 }
1250 vassert(iregNo < 31);
1251 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1252}
1253
1254static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
1255{
1256 vassert(is64 == True || is64 == False);
1257 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
1258}
1259
1260static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
1261{
1262 vassert(is64 == True || is64 == False);
1263 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
1264}
1265
1266static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
1267{
1268 vassert(is64 == True || is64 == False);
1269 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
1270}
1271
1272static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
1273{
1274 vassert(is64 == True || is64 == False);
1275 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
1276}
1277
1278static void putPC ( IRExpr* e )
1279{
1280 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1281 stmt( IRStmt_Put(OFFB_PC, e) );
1282}
1283
1284
1285/* ---------------- Vector (Q) registers ---------------- */
1286
1287static Int offsetQReg128 ( UInt qregNo )
1288{
1289 /* We don't care about endianness at this point. It only becomes
1290 relevant when dealing with sections of these registers.*/
1291 switch (qregNo) {
1292 case 0: return OFFB_Q0;
1293 case 1: return OFFB_Q1;
1294 case 2: return OFFB_Q2;
1295 case 3: return OFFB_Q3;
1296 case 4: return OFFB_Q4;
1297 case 5: return OFFB_Q5;
1298 case 6: return OFFB_Q6;
1299 case 7: return OFFB_Q7;
1300 case 8: return OFFB_Q8;
1301 case 9: return OFFB_Q9;
1302 case 10: return OFFB_Q10;
1303 case 11: return OFFB_Q11;
1304 case 12: return OFFB_Q12;
1305 case 13: return OFFB_Q13;
1306 case 14: return OFFB_Q14;
1307 case 15: return OFFB_Q15;
1308 case 16: return OFFB_Q16;
1309 case 17: return OFFB_Q17;
1310 case 18: return OFFB_Q18;
1311 case 19: return OFFB_Q19;
1312 case 20: return OFFB_Q20;
1313 case 21: return OFFB_Q21;
1314 case 22: return OFFB_Q22;
1315 case 23: return OFFB_Q23;
1316 case 24: return OFFB_Q24;
1317 case 25: return OFFB_Q25;
1318 case 26: return OFFB_Q26;
1319 case 27: return OFFB_Q27;
1320 case 28: return OFFB_Q28;
1321 case 29: return OFFB_Q29;
1322 case 30: return OFFB_Q30;
1323 case 31: return OFFB_Q31;
1324 default: vassert(0);
1325 }
1326}
1327
sewardjbbcf1882014-01-12 12:49:10 +00001328/* Write to a complete Qreg. */
1329static void putQReg128 ( UInt qregNo, IRExpr* e )
1330{
1331 vassert(qregNo < 32);
1332 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
1333 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
1334}
1335
1336/* Read a complete Qreg. */
1337static IRExpr* getQReg128 ( UInt qregNo )
1338{
1339 vassert(qregNo < 32);
1340 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
1341}
1342
1343/* Produce the IR type for some sub-part of a vector. For 32- and 64-
1344 bit sub-parts we can choose either integer or float types, and
1345 choose float on the basis that that is the common use case and so
1346 will give least interference with Put-to-Get forwarding later
1347 on. */
1348static IRType preferredVectorSubTypeFromSize ( UInt szB )
1349{
1350 switch (szB) {
1351 case 1: return Ity_I8;
1352 case 2: return Ity_I16;
1353 case 4: return Ity_I32; //Ity_F32;
1354 case 8: return Ity_F64;
1355 case 16: return Ity_V128;
1356 default: vassert(0);
1357 }
1358}
1359
sewardj606c4ba2014-01-26 19:11:14 +00001360/* Find the offset of the laneNo'th lane of type laneTy in the given
1361 Qreg. Since the host is little-endian, the least significant lane
1362 has the lowest offset. */
1363static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
sewardjbbcf1882014-01-12 12:49:10 +00001364{
sewardj9b769162014-07-24 12:42:03 +00001365 vassert(host_endness == VexEndnessLE);
sewardjbbcf1882014-01-12 12:49:10 +00001366 Int base = offsetQReg128(qregNo);
sewardj606c4ba2014-01-26 19:11:14 +00001367 /* Since the host is little-endian, the least significant lane
1368 will be at the lowest address. */
1369 /* Restrict this to known types, so as to avoid silently accepting
1370 stupid types. */
1371 UInt laneSzB = 0;
1372 switch (laneTy) {
sewardj5860ec72014-03-01 11:19:45 +00001373 case Ity_I8: laneSzB = 1; break;
1374 case Ity_I16: laneSzB = 2; break;
sewardj606c4ba2014-01-26 19:11:14 +00001375 case Ity_F32: case Ity_I32: laneSzB = 4; break;
1376 case Ity_F64: case Ity_I64: laneSzB = 8; break;
1377 case Ity_V128: laneSzB = 16; break;
1378 default: break;
sewardjbbcf1882014-01-12 12:49:10 +00001379 }
sewardj606c4ba2014-01-26 19:11:14 +00001380 vassert(laneSzB > 0);
1381 UInt minOff = laneNo * laneSzB;
1382 UInt maxOff = minOff + laneSzB - 1;
1383 vassert(maxOff < 16);
1384 return base + minOff;
sewardjbbcf1882014-01-12 12:49:10 +00001385}
1386
sewardj606c4ba2014-01-26 19:11:14 +00001387/* Put to the least significant lane of a Qreg. */
1388static void putQRegLO ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +00001389{
1390 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +00001391 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +00001392 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001393 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
1394 case Ity_F32: case Ity_F64: case Ity_V128:
1395 break;
1396 default:
1397 vassert(0); // Other cases are probably invalid
sewardjbbcf1882014-01-12 12:49:10 +00001398 }
1399 stmt(IRStmt_Put(off, e));
1400}
1401
sewardj606c4ba2014-01-26 19:11:14 +00001402/* Get from the least significant lane of a Qreg. */
1403static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
sewardjbbcf1882014-01-12 12:49:10 +00001404{
sewardj606c4ba2014-01-26 19:11:14 +00001405 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +00001406 switch (ty) {
sewardjb3553472014-05-15 16:49:21 +00001407 case Ity_I8:
1408 case Ity_I16:
sewardj606c4ba2014-01-26 19:11:14 +00001409 case Ity_I32: case Ity_I64:
1410 case Ity_F32: case Ity_F64: case Ity_V128:
1411 break;
1412 default:
1413 vassert(0); // Other cases are ATC
sewardjbbcf1882014-01-12 12:49:10 +00001414 }
1415 return IRExpr_Get(off, ty);
1416}
1417
sewardj606c4ba2014-01-26 19:11:14 +00001418static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
sewardjbbcf1882014-01-12 12:49:10 +00001419{
1420 static const HChar* namesQ[32]
1421 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1422 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1423 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1424 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1425 static const HChar* namesD[32]
1426 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1427 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1428 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1429 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1430 static const HChar* namesS[32]
1431 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1432 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1433 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1434 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1435 static const HChar* namesH[32]
1436 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1437 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1438 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1439 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1440 static const HChar* namesB[32]
1441 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1442 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1443 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1444 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1445 vassert(qregNo < 32);
sewardj606c4ba2014-01-26 19:11:14 +00001446 switch (sizeofIRType(laneTy)) {
sewardjbbcf1882014-01-12 12:49:10 +00001447 case 1: return namesB[qregNo];
1448 case 2: return namesH[qregNo];
1449 case 4: return namesS[qregNo];
1450 case 8: return namesD[qregNo];
1451 case 16: return namesQ[qregNo];
1452 default: vassert(0);
1453 }
1454 /*NOTREACHED*/
1455}
1456
sewardj606c4ba2014-01-26 19:11:14 +00001457static const HChar* nameQReg128 ( UInt qregNo )
1458{
1459 return nameQRegLO(qregNo, Ity_V128);
1460}
1461
sewardjbbcf1882014-01-12 12:49:10 +00001462/* Find the offset of the most significant half (8 bytes) of the given
1463 Qreg. This requires knowing the endianness of the host. */
sewardj606c4ba2014-01-26 19:11:14 +00001464static Int offsetQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001465{
sewardj606c4ba2014-01-26 19:11:14 +00001466 return offsetQRegLane(qregNo, Ity_I64, 1);
sewardjbbcf1882014-01-12 12:49:10 +00001467}
1468
sewardj606c4ba2014-01-26 19:11:14 +00001469static IRExpr* getQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001470{
sewardj606c4ba2014-01-26 19:11:14 +00001471 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
sewardjbbcf1882014-01-12 12:49:10 +00001472}
1473
sewardj606c4ba2014-01-26 19:11:14 +00001474static void putQRegHI64 ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +00001475{
1476 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +00001477 Int off = offsetQRegHI64(qregNo);
sewardjbbcf1882014-01-12 12:49:10 +00001478 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001479 case Ity_I64: case Ity_F64:
1480 break;
1481 default:
1482 vassert(0); // Other cases are plain wrong
sewardjbbcf1882014-01-12 12:49:10 +00001483 }
1484 stmt(IRStmt_Put(off, e));
1485}
1486
sewardj606c4ba2014-01-26 19:11:14 +00001487/* Put to a specified lane of a Qreg. */
1488static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1489{
1490 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1491 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1492 switch (laneTy) {
1493 case Ity_F64: case Ity_I64:
sewardj32d86752014-03-02 12:47:18 +00001494 case Ity_I32: case Ity_F32:
sewardj5860ec72014-03-01 11:19:45 +00001495 case Ity_I16:
1496 case Ity_I8:
sewardj606c4ba2014-01-26 19:11:14 +00001497 break;
1498 default:
1499 vassert(0); // Other cases are ATC
1500 }
1501 stmt(IRStmt_Put(off, e));
1502}
1503
sewardj32d86752014-03-02 12:47:18 +00001504/* Get from a specified lane of a Qreg. */
sewardj606c4ba2014-01-26 19:11:14 +00001505static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1506{
1507 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1508 switch (laneTy) {
sewardj32d86752014-03-02 12:47:18 +00001509 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
sewardj85fbb022014-06-12 13:16:01 +00001510 case Ity_F64: case Ity_F32:
sewardj606c4ba2014-01-26 19:11:14 +00001511 break;
1512 default:
1513 vassert(0); // Other cases are ATC
1514 }
1515 return IRExpr_Get(off, laneTy);
1516}
1517
1518
sewardjbbcf1882014-01-12 12:49:10 +00001519//ZZ /* ---------------- Misc registers ---------------- */
1520//ZZ
1521//ZZ static void putMiscReg32 ( UInt gsoffset,
1522//ZZ IRExpr* e, /* :: Ity_I32 */
1523//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1524//ZZ {
1525//ZZ switch (gsoffset) {
1526//ZZ case OFFB_FPSCR: break;
1527//ZZ case OFFB_QFLAG32: break;
1528//ZZ case OFFB_GEFLAG0: break;
1529//ZZ case OFFB_GEFLAG1: break;
1530//ZZ case OFFB_GEFLAG2: break;
1531//ZZ case OFFB_GEFLAG3: break;
1532//ZZ default: vassert(0); /* awaiting more cases */
1533//ZZ }
1534//ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1535//ZZ
1536//ZZ if (guardT == IRTemp_INVALID) {
1537//ZZ /* unconditional write */
1538//ZZ stmt(IRStmt_Put(gsoffset, e));
1539//ZZ } else {
1540//ZZ stmt(IRStmt_Put(
1541//ZZ gsoffset,
1542//ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1543//ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1544//ZZ ));
1545//ZZ }
1546//ZZ }
1547//ZZ
1548//ZZ static IRTemp get_ITSTATE ( void )
1549//ZZ {
1550//ZZ ASSERT_IS_THUMB;
1551//ZZ IRTemp t = newTemp(Ity_I32);
1552//ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1553//ZZ return t;
1554//ZZ }
1555//ZZ
1556//ZZ static void put_ITSTATE ( IRTemp t )
1557//ZZ {
1558//ZZ ASSERT_IS_THUMB;
1559//ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1560//ZZ }
1561//ZZ
1562//ZZ static IRTemp get_QFLAG32 ( void )
1563//ZZ {
1564//ZZ IRTemp t = newTemp(Ity_I32);
1565//ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1566//ZZ return t;
1567//ZZ }
1568//ZZ
1569//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1570//ZZ {
1571//ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1572//ZZ }
1573//ZZ
1574//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1575//ZZ Status Register) to indicate that overflow or saturation occurred.
1576//ZZ Nb: t must be zero to denote no saturation, and any nonzero
1577//ZZ value to indicate saturation. */
1578//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1579//ZZ {
1580//ZZ IRTemp old = get_QFLAG32();
1581//ZZ IRTemp nyu = newTemp(Ity_I32);
1582//ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1583//ZZ put_QFLAG32(nyu, condT);
1584//ZZ }
1585
1586
1587/* ---------------- FPCR stuff ---------------- */
1588
1589/* Generate IR to get hold of the rounding mode bits in FPCR, and
1590 convert them to IR format. Bind the final result to the
1591 returned temp. */
1592static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1593{
1594 /* The ARMvfp encoding for rounding mode bits is:
1595 00 to nearest
1596 01 to +infinity
1597 10 to -infinity
1598 11 to zero
1599 We need to convert that to the IR encoding:
1600 00 to nearest (the default)
1601 10 to +infinity
1602 01 to -infinity
1603 11 to zero
1604 Which can be done by swapping bits 0 and 1.
1605 The rmode bits are at 23:22 in FPSCR.
1606 */
1607 IRTemp armEncd = newTemp(Ity_I32);
1608 IRTemp swapped = newTemp(Ity_I32);
1609 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1610 we don't zero out bits 24 and above, since the assignment to
1611 'swapped' will mask them out anyway. */
1612 assign(armEncd,
1613 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1614 /* Now swap them. */
1615 assign(swapped,
1616 binop(Iop_Or32,
1617 binop(Iop_And32,
1618 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1619 mkU32(2)),
1620 binop(Iop_And32,
1621 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1622 mkU32(1))
1623 ));
1624 return swapped;
1625}
1626
1627
1628/*------------------------------------------------------------*/
1629/*--- Helpers for flag handling and conditional insns ---*/
1630/*------------------------------------------------------------*/
1631
1632static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1633{
1634 switch (cond) {
1635 case ARM64CondEQ: return "eq";
1636 case ARM64CondNE: return "ne";
1637 case ARM64CondCS: return "cs"; // or 'hs'
1638 case ARM64CondCC: return "cc"; // or 'lo'
1639 case ARM64CondMI: return "mi";
1640 case ARM64CondPL: return "pl";
1641 case ARM64CondVS: return "vs";
1642 case ARM64CondVC: return "vc";
1643 case ARM64CondHI: return "hi";
1644 case ARM64CondLS: return "ls";
1645 case ARM64CondGE: return "ge";
1646 case ARM64CondLT: return "lt";
1647 case ARM64CondGT: return "gt";
1648 case ARM64CondLE: return "le";
1649 case ARM64CondAL: return "al";
1650 case ARM64CondNV: return "nv";
1651 default: vpanic("name_ARM64Condcode");
1652 }
1653}
1654
1655/* and a handy shorthand for it */
1656static const HChar* nameCC ( ARM64Condcode cond ) {
1657 return nameARM64Condcode(cond);
1658}
1659
1660
1661/* Build IR to calculate some particular condition from stored
1662 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1663 Ity_I64, suitable for narrowing. Although the return type is
1664 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1665 :: Ity_I64 and must denote the condition to compute in
1666 bits 7:4, and be zero everywhere else.
1667*/
1668static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1669{
1670 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1671 /* And 'cond' had better produce a value in which only bits 7:4 are
1672 nonzero. However, obviously we can't assert for that. */
1673
1674 /* So what we're constructing for the first argument is
1675 "(cond << 4) | stored-operation".
1676 However, as per comments above, 'cond' must be supplied
1677 pre-shifted to this function.
1678
1679 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1680 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1681 8 bits of the first argument. */
1682 IRExpr** args
1683 = mkIRExprVec_4(
1684 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1685 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1686 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1687 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1688 );
1689 IRExpr* call
1690 = mkIRExprCCall(
1691 Ity_I64,
1692 0/*regparm*/,
1693 "arm64g_calculate_condition", &arm64g_calculate_condition,
1694 args
1695 );
1696
1697 /* Exclude the requested condition, OP and NDEP from definedness
1698 checking. We're only interested in DEP1 and DEP2. */
1699 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1700 return call;
1701}
1702
1703
1704/* Build IR to calculate some particular condition from stored
1705 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1706 Ity_I64, suitable for narrowing. Although the return type is
1707 Ity_I64, the returned value is either 0 or 1.
1708*/
1709static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1710{
1711 /* First arg is "(cond << 4) | condition". This requires that the
1712 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1713 (COND, OP) pair in the lowest 8 bits of the first argument. */
1714 vassert(cond >= 0 && cond <= 15);
1715 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1716}
1717
1718
sewardjdee30502014-06-04 13:09:44 +00001719/* Build IR to calculate just the carry flag from stored
1720 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1721 Ity_I64. */
1722static IRExpr* mk_arm64g_calculate_flag_c ( void )
1723{
1724 IRExpr** args
1725 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1726 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1727 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1728 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1729 IRExpr* call
1730 = mkIRExprCCall(
1731 Ity_I64,
1732 0/*regparm*/,
1733 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1734 args
1735 );
1736 /* Exclude OP and NDEP from definedness checking. We're only
1737 interested in DEP1 and DEP2. */
1738 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1739 return call;
1740}
1741
1742
sewardjbbcf1882014-01-12 12:49:10 +00001743//ZZ /* Build IR to calculate just the overflow flag from stored
1744//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1745//ZZ Ity_I32. */
1746//ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1747//ZZ {
1748//ZZ IRExpr** args
1749//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1750//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1751//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1752//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1753//ZZ IRExpr* call
1754//ZZ = mkIRExprCCall(
1755//ZZ Ity_I32,
1756//ZZ 0/*regparm*/,
1757//ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1758//ZZ args
1759//ZZ );
1760//ZZ /* Exclude OP and NDEP from definedness checking. We're only
1761//ZZ interested in DEP1 and DEP2. */
1762//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1763//ZZ return call;
1764//ZZ }
1765
1766
1767/* Build IR to calculate N Z C V in bits 31:28 of the
1768 returned word. */
1769static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1770{
1771 IRExpr** args
1772 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1773 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1774 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1775 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1776 IRExpr* call
1777 = mkIRExprCCall(
1778 Ity_I64,
1779 0/*regparm*/,
1780 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1781 args
1782 );
1783 /* Exclude OP and NDEP from definedness checking. We're only
1784 interested in DEP1 and DEP2. */
1785 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1786 return call;
1787}
1788
1789
1790/* Build IR to set the flags thunk, in the most general case. */
1791static
1792void setFlags_D1_D2_ND ( UInt cc_op,
1793 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1794{
1795 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1796 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1797 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1798 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1799 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1800 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1801 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1802 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1803}
1804
1805/* Build IR to set the flags thunk after ADD or SUB. */
1806static
1807void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1808{
1809 IRTemp argL64 = IRTemp_INVALID;
1810 IRTemp argR64 = IRTemp_INVALID;
1811 IRTemp z64 = newTemp(Ity_I64);
1812 if (is64) {
1813 argL64 = argL;
1814 argR64 = argR;
1815 } else {
1816 argL64 = newTemp(Ity_I64);
1817 argR64 = newTemp(Ity_I64);
1818 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1819 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1820 }
1821 assign(z64, mkU64(0));
1822 UInt cc_op = ARM64G_CC_OP_NUMBER;
1823 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1824 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1825 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1826 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1827 else { vassert(0); }
1828 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1829}
1830
sewardjdee30502014-06-04 13:09:44 +00001831/* Build IR to set the flags thunk after ADC or SBC. */
1832static
1833void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
1834 IRTemp argL, IRTemp argR, IRTemp oldC )
1835{
1836 IRTemp argL64 = IRTemp_INVALID;
1837 IRTemp argR64 = IRTemp_INVALID;
1838 IRTemp oldC64 = IRTemp_INVALID;
1839 if (is64) {
1840 argL64 = argL;
1841 argR64 = argR;
1842 oldC64 = oldC;
1843 } else {
1844 argL64 = newTemp(Ity_I64);
1845 argR64 = newTemp(Ity_I64);
1846 oldC64 = newTemp(Ity_I64);
1847 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1848 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1849 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1850 }
1851 UInt cc_op = ARM64G_CC_OP_NUMBER;
1852 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; }
1853 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1854 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; }
1855 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1856 else { vassert(0); }
1857 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1858}
1859
sewardjbbcf1882014-01-12 12:49:10 +00001860/* Build IR to set the flags thunk after ADD or SUB, if the given
1861 condition evaluates to True at run time. If not, the flags are set
1862 to the specified NZCV value. */
1863static
1864void setFlags_ADD_SUB_conditionally (
1865 Bool is64, Bool isSUB,
1866 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1867 )
1868{
1869 /* Generate IR as follows:
1870 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1871 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1872 CC_DEP2 = ITE(cond, argR64, 0)
1873 CC_NDEP = 0
1874 */
1875
1876 IRTemp z64 = newTemp(Ity_I64);
1877 assign(z64, mkU64(0));
1878
1879 /* Establish the operation and operands for the True case. */
1880 IRTemp t_dep1 = IRTemp_INVALID;
1881 IRTemp t_dep2 = IRTemp_INVALID;
1882 UInt t_op = ARM64G_CC_OP_NUMBER;
1883 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1884 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1885 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1886 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1887 else { vassert(0); }
1888 /* */
1889 if (is64) {
1890 t_dep1 = argL;
1891 t_dep2 = argR;
1892 } else {
1893 t_dep1 = newTemp(Ity_I64);
1894 t_dep2 = newTemp(Ity_I64);
1895 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1896 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1897 }
1898
1899 /* Establish the operation and operands for the False case. */
1900 IRTemp f_dep1 = newTemp(Ity_I64);
1901 IRTemp f_dep2 = z64;
1902 UInt f_op = ARM64G_CC_OP_COPY;
1903 assign(f_dep1, mkU64(nzcv << 28));
1904
1905 /* Final thunk values */
1906 IRTemp dep1 = newTemp(Ity_I64);
1907 IRTemp dep2 = newTemp(Ity_I64);
1908 IRTemp op = newTemp(Ity_I64);
1909
1910 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1911 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1912 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1913
1914 /* finally .. */
1915 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
1916 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1917 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1918 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1919}
1920
1921/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1922static
1923void setFlags_LOGIC ( Bool is64, IRTemp res )
1924{
1925 IRTemp res64 = IRTemp_INVALID;
1926 IRTemp z64 = newTemp(Ity_I64);
1927 UInt cc_op = ARM64G_CC_OP_NUMBER;
1928 if (is64) {
1929 res64 = res;
1930 cc_op = ARM64G_CC_OP_LOGIC64;
1931 } else {
1932 res64 = newTemp(Ity_I64);
1933 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1934 cc_op = ARM64G_CC_OP_LOGIC32;
1935 }
1936 assign(z64, mkU64(0));
1937 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1938}
1939
1940/* Build IR to set the flags thunk to a given NZCV value. NZCV is
1941 located in bits 31:28 of the supplied value. */
1942static
1943void setFlags_COPY ( IRTemp nzcv_28x0 )
1944{
1945 IRTemp z64 = newTemp(Ity_I64);
1946 assign(z64, mkU64(0));
1947 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
1948}
1949
1950
1951//ZZ /* Minor variant of the above that sets NDEP to zero (if it
1952//ZZ sets it at all) */
1953//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1954//ZZ IRTemp t_dep2,
1955//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1956//ZZ {
1957//ZZ IRTemp z32 = newTemp(Ity_I32);
1958//ZZ assign( z32, mkU32(0) );
1959//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1960//ZZ }
1961//ZZ
1962//ZZ
1963//ZZ /* Minor variant of the above that sets DEP2 to zero (if it
1964//ZZ sets it at all) */
1965//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1966//ZZ IRTemp t_ndep,
1967//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1968//ZZ {
1969//ZZ IRTemp z32 = newTemp(Ity_I32);
1970//ZZ assign( z32, mkU32(0) );
1971//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1972//ZZ }
1973//ZZ
1974//ZZ
1975//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1976//ZZ sets them at all) */
1977//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1978//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1979//ZZ {
1980//ZZ IRTemp z32 = newTemp(Ity_I32);
1981//ZZ assign( z32, mkU32(0) );
1982//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1983//ZZ }
1984
1985
1986/*------------------------------------------------------------*/
1987/*--- Misc math helpers ---*/
1988/*------------------------------------------------------------*/
1989
sewardj32d86752014-03-02 12:47:18 +00001990/* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
1991static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
sewardjbbcf1882014-01-12 12:49:10 +00001992{
sewardj32d86752014-03-02 12:47:18 +00001993 IRTemp maskT = newTemp(Ity_I64);
1994 IRTemp res = newTemp(Ity_I64);
1995 vassert(sh >= 1 && sh <= 63);
1996 assign(maskT, mkU64(mask));
sewardjdc9259c2014-02-27 11:10:19 +00001997 assign( res,
sewardjbbcf1882014-01-12 12:49:10 +00001998 binop(Iop_Or64,
1999 binop(Iop_Shr64,
sewardj32d86752014-03-02 12:47:18 +00002000 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
2001 mkU8(sh)),
sewardjbbcf1882014-01-12 12:49:10 +00002002 binop(Iop_And64,
sewardj32d86752014-03-02 12:47:18 +00002003 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
2004 mkexpr(maskT))
sewardjbbcf1882014-01-12 12:49:10 +00002005 )
2006 );
sewardjdc9259c2014-02-27 11:10:19 +00002007 return res;
2008}
2009
sewardj32d86752014-03-02 12:47:18 +00002010/* Generates byte swaps within 32-bit lanes. */
2011static IRTemp math_UINTSWAP64 ( IRTemp src )
2012{
2013 IRTemp res;
2014 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2015 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2016 return res;
2017}
2018
2019/* Generates byte swaps within 16-bit lanes. */
2020static IRTemp math_USHORTSWAP64 ( IRTemp src )
2021{
2022 IRTemp res;
2023 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2024 return res;
2025}
2026
2027/* Generates a 64-bit byte swap. */
2028static IRTemp math_BYTESWAP64 ( IRTemp src )
2029{
2030 IRTemp res;
2031 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2032 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2033 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
2034 return res;
2035}
sewardjdc9259c2014-02-27 11:10:19 +00002036
2037/* Generates a 64-bit bit swap. */
2038static IRTemp math_BITSWAP64 ( IRTemp src )
2039{
sewardj32d86752014-03-02 12:47:18 +00002040 IRTemp res;
2041 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
2042 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
2043 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
2044 return math_BYTESWAP64(res);
sewardjbbcf1882014-01-12 12:49:10 +00002045}
2046
sewardj606c4ba2014-01-26 19:11:14 +00002047/* Duplicates the bits at the bottom of the given word to fill the
2048 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
2049 except for the bottom bits. */
2050static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
2051{
2052 if (srcTy == Ity_I8) {
2053 IRTemp t16 = newTemp(Ity_I64);
2054 assign(t16, binop(Iop_Or64, mkexpr(src),
2055 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
2056 IRTemp t32 = newTemp(Ity_I64);
2057 assign(t32, binop(Iop_Or64, mkexpr(t16),
2058 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
2059 IRTemp t64 = newTemp(Ity_I64);
2060 assign(t64, binop(Iop_Or64, mkexpr(t32),
2061 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2062 return t64;
2063 }
2064 if (srcTy == Ity_I16) {
2065 IRTemp t32 = newTemp(Ity_I64);
2066 assign(t32, binop(Iop_Or64, mkexpr(src),
2067 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
2068 IRTemp t64 = newTemp(Ity_I64);
2069 assign(t64, binop(Iop_Or64, mkexpr(t32),
2070 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2071 return t64;
2072 }
2073 if (srcTy == Ity_I32) {
2074 IRTemp t64 = newTemp(Ity_I64);
2075 assign(t64, binop(Iop_Or64, mkexpr(src),
2076 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
2077 return t64;
2078 }
2079 if (srcTy == Ity_I64) {
2080 return src;
2081 }
2082 vassert(0);
2083}
2084
2085
sewardj18bf5172014-06-14 18:05:30 +00002086/* Duplicates the src element exactly so as to fill a V128 value. */
sewardj85fbb022014-06-12 13:16:01 +00002087static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
2088{
sewardj8e91fd42014-07-11 12:05:47 +00002089 IRTemp res = newTempV128();
sewardj85fbb022014-06-12 13:16:01 +00002090 if (srcTy == Ity_F64) {
2091 IRTemp i64 = newTemp(Ity_I64);
2092 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
2093 assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
2094 return res;
2095 }
2096 if (srcTy == Ity_F32) {
2097 IRTemp i64a = newTemp(Ity_I64);
2098 assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
2099 IRTemp i64b = newTemp(Ity_I64);
2100 assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
2101 mkexpr(i64a)));
2102 assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
2103 return res;
2104 }
sewardj18bf5172014-06-14 18:05:30 +00002105 if (srcTy == Ity_I64) {
2106 assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
2107 return res;
2108 }
2109 if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
2110 IRTemp t1 = newTemp(Ity_I64);
2111 assign(t1, widenUto64(srcTy, mkexpr(src)));
2112 IRTemp t2 = math_DUP_TO_64(t1, srcTy);
2113 assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
2114 return res;
2115 }
sewardj85fbb022014-06-12 13:16:01 +00002116 vassert(0);
2117}
2118
2119
sewardjdf9d6d52014-06-27 10:43:22 +00002120/* |fullWidth| is a full V128 width result. Depending on bitQ,
2121 zero out the upper half. */
2122static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
2123{
2124 if (bitQ == 1) return mkexpr(fullWidth);
2125 if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
2126 vassert(0);
2127}
2128
sewardja5a6b752014-06-30 07:33:56 +00002129/* The same, but from an expression instead. */
2130static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
2131{
sewardj8e91fd42014-07-11 12:05:47 +00002132 IRTemp fullWidthT = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00002133 assign(fullWidthT, fullWidth);
2134 return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
2135}
2136
sewardjdf9d6d52014-06-27 10:43:22 +00002137
sewardjbbcf1882014-01-12 12:49:10 +00002138/*------------------------------------------------------------*/
2139/*--- FP comparison helpers ---*/
2140/*------------------------------------------------------------*/
2141
2142/* irRes :: Ity_I32 holds a floating point comparison result encoded
2143 as an IRCmpF64Result. Generate code to convert it to an
2144 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2145 Assign a new temp to hold that value, and return the temp. */
2146static
2147IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
2148{
2149 IRTemp ix = newTemp(Ity_I64);
2150 IRTemp termL = newTemp(Ity_I64);
2151 IRTemp termR = newTemp(Ity_I64);
2152 IRTemp nzcv = newTemp(Ity_I64);
2153 IRTemp irRes = newTemp(Ity_I64);
2154
2155 /* This is where the fun starts. We have to convert 'irRes' from
2156 an IR-convention return result (IRCmpF64Result) to an
2157 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
2158 4 bits of 'nzcv'. */
2159 /* Map compare result from IR to ARM(nzcv) */
2160 /*
2161 FP cmp result | IR | ARM(nzcv)
2162 --------------------------------
2163 UN 0x45 0011
2164 LT 0x01 1000
2165 GT 0x00 0010
2166 EQ 0x40 0110
2167 */
2168 /* Now since you're probably wondering WTF ..
2169
2170 ix fishes the useful bits out of the IR value, bits 6 and 0, and
2171 places them side by side, giving a number which is 0, 1, 2 or 3.
2172
2173 termL is a sequence cooked up by GNU superopt. It converts ix
2174 into an almost correct value NZCV value (incredibly), except
2175 for the case of UN, where it produces 0100 instead of the
2176 required 0011.
2177
2178 termR is therefore a correction term, also computed from ix. It
2179 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
2180 the final correct value, we subtract termR from termL.
2181
2182 Don't take my word for it. There's a test program at the bottom
2183 of guest_arm_toIR.c, to try this out with.
2184 */
2185 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
2186
2187 assign(
2188 ix,
2189 binop(Iop_Or64,
2190 binop(Iop_And64,
2191 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
2192 mkU64(3)),
2193 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
2194
2195 assign(
2196 termL,
2197 binop(Iop_Add64,
2198 binop(Iop_Shr64,
2199 binop(Iop_Sub64,
2200 binop(Iop_Shl64,
2201 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
2202 mkU8(62)),
2203 mkU64(1)),
2204 mkU8(61)),
2205 mkU64(1)));
2206
2207 assign(
2208 termR,
2209 binop(Iop_And64,
2210 binop(Iop_And64,
2211 mkexpr(ix),
2212 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
2213 mkU64(1)));
2214
2215 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
2216 return nzcv;
2217}
2218
2219
2220/*------------------------------------------------------------*/
2221/*--- Data processing (immediate) ---*/
2222/*------------------------------------------------------------*/
2223
2224/* Helper functions for supporting "DecodeBitMasks" */
2225
2226static ULong dbm_ROR ( Int width, ULong x, Int rot )
2227{
2228 vassert(width > 0 && width <= 64);
2229 vassert(rot >= 0 && rot < width);
2230 if (rot == 0) return x;
2231 ULong res = x >> rot;
2232 res |= (x << (width - rot));
2233 if (width < 64)
2234 res &= ((1ULL << width) - 1);
2235 return res;
2236}
2237
2238static ULong dbm_RepTo64( Int esize, ULong x )
2239{
2240 switch (esize) {
2241 case 64:
2242 return x;
2243 case 32:
2244 x &= 0xFFFFFFFF; x |= (x << 32);
2245 return x;
2246 case 16:
2247 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
2248 return x;
2249 case 8:
2250 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
2251 return x;
2252 case 4:
2253 x &= 0xF; x |= (x << 4); x |= (x << 8);
2254 x |= (x << 16); x |= (x << 32);
2255 return x;
2256 case 2:
2257 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
2258 x |= (x << 16); x |= (x << 32);
2259 return x;
2260 default:
2261 break;
2262 }
2263 vpanic("dbm_RepTo64");
2264 /*NOTREACHED*/
2265 return 0;
2266}
2267
2268static Int dbm_highestSetBit ( ULong x )
2269{
2270 Int i;
2271 for (i = 63; i >= 0; i--) {
2272 if (x & (1ULL << i))
2273 return i;
2274 }
2275 vassert(x == 0);
2276 return -1;
2277}
2278
2279static
2280Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
2281 ULong immN, ULong imms, ULong immr, Bool immediate,
2282 UInt M /*32 or 64*/)
2283{
2284 vassert(immN < (1ULL << 1));
2285 vassert(imms < (1ULL << 6));
2286 vassert(immr < (1ULL << 6));
2287 vassert(immediate == False || immediate == True);
2288 vassert(M == 32 || M == 64);
2289
2290 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
2291 if (len < 1) { /* printf("fail1\n"); */ return False; }
2292 vassert(len <= 6);
2293 vassert(M >= (1 << len));
2294
2295 vassert(len >= 1 && len <= 6);
2296 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
2297 (1 << len) - 1;
2298 vassert(levels >= 1 && levels <= 63);
2299
2300 if (immediate && ((imms & levels) == levels)) {
2301 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2302 return False;
2303 }
2304
2305 ULong S = imms & levels;
2306 ULong R = immr & levels;
2307 Int diff = S - R;
2308 diff &= 63;
2309 Int esize = 1 << len;
2310 vassert(2 <= esize && esize <= 64);
2311
2312 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2313 same below with d. S can be 63 in which case we have an out of
2314 range and hence undefined shift. */
2315 vassert(S >= 0 && S <= 63);
2316 vassert(esize >= (S+1));
2317 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
2318 //(1ULL << (S+1)) - 1;
2319 ((1ULL << S) - 1) + (1ULL << S);
2320
2321 Int d = // diff<len-1:0>
2322 diff & ((1 << len)-1);
2323 vassert(esize >= (d+1));
2324 vassert(d >= 0 && d <= 63);
2325
2326 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
2327 //(1ULL << (d+1)) - 1;
2328 ((1ULL << d) - 1) + (1ULL << d);
2329
2330 if (esize != 64) vassert(elem_s < (1ULL << esize));
2331 if (esize != 64) vassert(elem_d < (1ULL << esize));
2332
2333 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
2334 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
2335
2336 return True;
2337}
2338
2339
2340static
2341Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
2342 UInt insn)
2343{
2344# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2345
2346 /* insn[28:23]
2347 10000x PC-rel addressing
2348 10001x Add/subtract (immediate)
2349 100100 Logical (immediate)
2350 100101 Move Wide (immediate)
2351 100110 Bitfield
2352 100111 Extract
2353 */
2354
2355 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2356 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2357 Bool is64 = INSN(31,31) == 1;
2358 Bool isSub = INSN(30,30) == 1;
2359 Bool setCC = INSN(29,29) == 1;
2360 UInt sh = INSN(23,22);
2361 UInt uimm12 = INSN(21,10);
2362 UInt nn = INSN(9,5);
2363 UInt dd = INSN(4,0);
2364 const HChar* nm = isSub ? "sub" : "add";
2365 if (sh >= 2) {
2366 /* Invalid; fall through */
2367 } else {
2368 vassert(sh <= 1);
2369 uimm12 <<= (12 * sh);
2370 if (is64) {
2371 IRTemp argL = newTemp(Ity_I64);
2372 IRTemp argR = newTemp(Ity_I64);
2373 IRTemp res = newTemp(Ity_I64);
2374 assign(argL, getIReg64orSP(nn));
2375 assign(argR, mkU64(uimm12));
2376 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2377 mkexpr(argL), mkexpr(argR)));
2378 if (setCC) {
2379 putIReg64orZR(dd, mkexpr(res));
2380 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2381 DIP("%ss %s, %s, 0x%x\n",
2382 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
2383 } else {
2384 putIReg64orSP(dd, mkexpr(res));
2385 DIP("%s %s, %s, 0x%x\n",
2386 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
2387 }
2388 } else {
2389 IRTemp argL = newTemp(Ity_I32);
2390 IRTemp argR = newTemp(Ity_I32);
2391 IRTemp res = newTemp(Ity_I32);
2392 assign(argL, getIReg32orSP(nn));
2393 assign(argR, mkU32(uimm12));
2394 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
2395 mkexpr(argL), mkexpr(argR)));
2396 if (setCC) {
2397 putIReg32orZR(dd, mkexpr(res));
2398 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
2399 DIP("%ss %s, %s, 0x%x\n",
2400 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
2401 } else {
2402 putIReg32orSP(dd, mkexpr(res));
2403 DIP("%s %s, %s, 0x%x\n",
2404 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
2405 }
2406 }
2407 return True;
2408 }
2409 }
2410
2411 /* -------------------- ADR/ADRP -------------------- */
2412 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2413 UInt bP = INSN(31,31);
2414 UInt immLo = INSN(30,29);
2415 UInt immHi = INSN(23,5);
2416 UInt rD = INSN(4,0);
2417 ULong uimm = (immHi << 2) | immLo;
2418 ULong simm = sx_to_64(uimm, 21);
2419 ULong val;
2420 if (bP) {
2421 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
2422 } else {
2423 val = guest_PC_curr_instr + simm;
2424 }
2425 putIReg64orZR(rD, mkU64(val));
2426 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
2427 return True;
2428 }
2429
2430 /* -------------------- LOGIC(imm) -------------------- */
2431 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2432 /* 31 30 28 22 21 15 9 4
2433 sf op 100100 N immr imms Rn Rd
2434 op=00: AND Rd|SP, Rn, #imm
2435 op=01: ORR Rd|SP, Rn, #imm
2436 op=10: EOR Rd|SP, Rn, #imm
2437 op=11: ANDS Rd|ZR, Rn, #imm
2438 */
2439 Bool is64 = INSN(31,31) == 1;
2440 UInt op = INSN(30,29);
2441 UInt N = INSN(22,22);
2442 UInt immR = INSN(21,16);
2443 UInt immS = INSN(15,10);
2444 UInt nn = INSN(9,5);
2445 UInt dd = INSN(4,0);
2446 ULong imm = 0;
2447 Bool ok;
2448 if (N == 1 && !is64)
2449 goto after_logic_imm; /* not allowed; fall through */
2450 ok = dbm_DecodeBitMasks(&imm, NULL,
2451 N, immS, immR, True, is64 ? 64 : 32);
2452 if (!ok)
2453 goto after_logic_imm;
2454
2455 const HChar* names[4] = { "and", "orr", "eor", "ands" };
2456 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2457 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2458
2459 vassert(op < 4);
2460 if (is64) {
2461 IRExpr* argL = getIReg64orZR(nn);
2462 IRExpr* argR = mkU64(imm);
2463 IRTemp res = newTemp(Ity_I64);
2464 assign(res, binop(ops64[op], argL, argR));
2465 if (op < 3) {
2466 putIReg64orSP(dd, mkexpr(res));
2467 DIP("%s %s, %s, 0x%llx\n", names[op],
2468 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2469 } else {
2470 putIReg64orZR(dd, mkexpr(res));
2471 setFlags_LOGIC(True/*is64*/, res);
2472 DIP("%s %s, %s, 0x%llx\n", names[op],
2473 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2474 }
2475 } else {
2476 IRExpr* argL = getIReg32orZR(nn);
2477 IRExpr* argR = mkU32((UInt)imm);
2478 IRTemp res = newTemp(Ity_I32);
2479 assign(res, binop(ops32[op], argL, argR));
2480 if (op < 3) {
2481 putIReg32orSP(dd, mkexpr(res));
2482 DIP("%s %s, %s, 0x%x\n", names[op],
2483 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2484 } else {
2485 putIReg32orZR(dd, mkexpr(res));
2486 setFlags_LOGIC(False/*!is64*/, res);
2487 DIP("%s %s, %s, 0x%x\n", names[op],
2488 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2489 }
2490 }
2491 return True;
2492 }
2493 after_logic_imm:
2494
2495 /* -------------------- MOV{Z,N,K} -------------------- */
2496 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2497 /* 31 30 28 22 20 4
2498 | | | | | |
2499 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2500 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2501 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2502 */
2503 Bool is64 = INSN(31,31) == 1;
2504 UInt subopc = INSN(30,29);
2505 UInt hw = INSN(22,21);
2506 UInt imm16 = INSN(20,5);
2507 UInt dd = INSN(4,0);
2508 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2509 /* invalid; fall through */
2510 } else {
2511 ULong imm64 = ((ULong)imm16) << (16 * hw);
2512 if (!is64)
2513 vassert(imm64 < 0x100000000ULL);
2514 switch (subopc) {
2515 case BITS2(1,0): // MOVZ
2516 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2517 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2518 break;
2519 case BITS2(0,0): // MOVN
2520 imm64 = ~imm64;
2521 if (!is64)
2522 imm64 &= 0xFFFFFFFFULL;
2523 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2524 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2525 break;
2526 case BITS2(1,1): // MOVK
2527 /* This is more complex. We are inserting a slice into
2528 the destination register, so we need to have the old
2529 value of it. */
2530 if (is64) {
2531 IRTemp old = newTemp(Ity_I64);
2532 assign(old, getIReg64orZR(dd));
2533 ULong mask = 0xFFFFULL << (16 * hw);
2534 IRExpr* res
2535 = binop(Iop_Or64,
2536 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2537 mkU64(imm64));
2538 putIReg64orZR(dd, res);
2539 DIP("movk %s, 0x%x, lsl %u\n",
2540 nameIReg64orZR(dd), imm16, 16*hw);
2541 } else {
2542 IRTemp old = newTemp(Ity_I32);
2543 assign(old, getIReg32orZR(dd));
2544 vassert(hw <= 1);
2545 UInt mask = 0xFFFF << (16 * hw);
2546 IRExpr* res
2547 = binop(Iop_Or32,
2548 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2549 mkU32((UInt)imm64));
2550 putIReg32orZR(dd, res);
2551 DIP("movk %s, 0x%x, lsl %u\n",
2552 nameIReg32orZR(dd), imm16, 16*hw);
2553 }
2554 break;
2555 default:
2556 vassert(0);
2557 }
2558 return True;
2559 }
2560 }
2561
2562 /* -------------------- {U,S,}BFM -------------------- */
2563 /* 30 28 22 21 15 9 4
2564
2565 sf 10 100110 N immr imms nn dd
2566 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2567 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2568
2569 sf 00 100110 N immr imms nn dd
2570 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2571 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2572
2573 sf 01 100110 N immr imms nn dd
2574 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2575 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2576 */
2577 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2578 UInt sf = INSN(31,31);
2579 UInt opc = INSN(30,29);
2580 UInt N = INSN(22,22);
2581 UInt immR = INSN(21,16);
2582 UInt immS = INSN(15,10);
2583 UInt nn = INSN(9,5);
2584 UInt dd = INSN(4,0);
2585 Bool inZero = False;
2586 Bool extend = False;
2587 const HChar* nm = "???";
2588 /* skip invalid combinations */
2589 switch (opc) {
2590 case BITS2(0,0):
2591 inZero = True; extend = True; nm = "sbfm"; break;
2592 case BITS2(0,1):
2593 inZero = False; extend = False; nm = "bfm"; break;
2594 case BITS2(1,0):
2595 inZero = True; extend = False; nm = "ubfm"; break;
2596 case BITS2(1,1):
2597 goto after_bfm; /* invalid */
2598 default:
2599 vassert(0);
2600 }
2601 if (sf == 1 && N != 1) goto after_bfm;
2602 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2603 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2604 ULong wmask = 0, tmask = 0;
2605 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2606 N, immS, immR, False, sf == 1 ? 64 : 32);
2607 if (!ok) goto after_bfm; /* hmmm */
2608
2609 Bool is64 = sf == 1;
2610 IRType ty = is64 ? Ity_I64 : Ity_I32;
2611
2612 IRTemp dst = newTemp(ty);
2613 IRTemp src = newTemp(ty);
2614 IRTemp bot = newTemp(ty);
2615 IRTemp top = newTemp(ty);
2616 IRTemp res = newTemp(ty);
2617 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2618 assign(src, getIRegOrZR(is64, nn));
2619 /* perform bitfield move on low bits */
2620 assign(bot, binop(mkOR(ty),
2621 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2622 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2623 mkU(ty, wmask))));
2624 /* determine extension bits (sign, zero or dest register) */
2625 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2626 /* combine extension bits and result bits */
2627 assign(res, binop(mkOR(ty),
2628 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2629 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2630 putIRegOrZR(is64, dd, mkexpr(res));
2631 DIP("%s %s, %s, immR=%u, immS=%u\n",
2632 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2633 return True;
2634 }
2635 after_bfm:
2636
2637 /* ---------------------- EXTR ---------------------- */
2638 /* 30 28 22 20 15 9 4
2639 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2640 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2641 */
2642 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2643 Bool is64 = INSN(31,31) == 1;
2644 UInt mm = INSN(20,16);
2645 UInt imm6 = INSN(15,10);
2646 UInt nn = INSN(9,5);
2647 UInt dd = INSN(4,0);
2648 Bool valid = True;
2649 if (INSN(31,31) != INSN(22,22))
2650 valid = False;
2651 if (!is64 && imm6 >= 32)
2652 valid = False;
2653 if (!valid) goto after_extr;
2654 IRType ty = is64 ? Ity_I64 : Ity_I32;
2655 IRTemp srcHi = newTemp(ty);
2656 IRTemp srcLo = newTemp(ty);
2657 IRTemp res = newTemp(ty);
2658 assign(srcHi, getIRegOrZR(is64, nn));
2659 assign(srcLo, getIRegOrZR(is64, mm));
2660 if (imm6 == 0) {
2661 assign(res, mkexpr(srcLo));
2662 } else {
2663 UInt szBits = 8 * sizeofIRType(ty);
2664 vassert(imm6 > 0 && imm6 < szBits);
2665 assign(res, binop(mkOR(ty),
2666 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2667 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2668 }
2669 putIRegOrZR(is64, dd, mkexpr(res));
2670 DIP("extr %s, %s, %s, #%u\n",
2671 nameIRegOrZR(is64,dd),
2672 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2673 return True;
2674 }
2675 after_extr:
2676
2677 vex_printf("ARM64 front end: data_processing_immediate\n");
2678 return False;
2679# undef INSN
2680}
2681
2682
2683/*------------------------------------------------------------*/
2684/*--- Data processing (register) instructions ---*/
2685/*------------------------------------------------------------*/
2686
2687static const HChar* nameSH ( UInt sh ) {
2688 switch (sh) {
2689 case 0: return "lsl";
2690 case 1: return "lsr";
2691 case 2: return "asr";
2692 case 3: return "ror";
2693 default: vassert(0);
2694 }
2695}
2696
2697/* Generate IR to get a register value, possibly shifted by an
2698 immediate. Returns either a 32- or 64-bit temporary holding the
2699 result. After the shift, the value can optionally be NOT-ed
2700 too.
2701
2702 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2703 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2704 isn't allowed, but it's the job of the caller to check that.
2705*/
2706static IRTemp getShiftedIRegOrZR ( Bool is64,
2707 UInt sh_how, UInt sh_amt, UInt regNo,
2708 Bool invert )
2709{
2710 vassert(sh_how < 4);
2711 vassert(sh_amt < (is64 ? 64 : 32));
2712 IRType ty = is64 ? Ity_I64 : Ity_I32;
2713 IRTemp t0 = newTemp(ty);
2714 assign(t0, getIRegOrZR(is64, regNo));
2715 IRTemp t1 = newTemp(ty);
2716 switch (sh_how) {
2717 case BITS2(0,0):
2718 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2719 break;
2720 case BITS2(0,1):
2721 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2722 break;
2723 case BITS2(1,0):
2724 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2725 break;
2726 case BITS2(1,1):
2727 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2728 break;
2729 default:
2730 vassert(0);
2731 }
2732 if (invert) {
2733 IRTemp t2 = newTemp(ty);
2734 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2735 return t2;
2736 } else {
2737 return t1;
2738 }
2739}
2740
2741
2742static
2743Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2744 UInt insn)
2745{
2746# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2747
2748 /* ------------------- ADD/SUB(reg) ------------------- */
2749 /* x==0 => 32 bit op x==1 => 64 bit op
2750 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2751
2752 31 30 29 28 23 21 20 15 9 4
2753 | | | | | | | | | |
2754 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2755 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2756 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2757 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2758 */
2759 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2760 UInt bX = INSN(31,31);
2761 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2762 UInt bS = INSN(29, 29); /* set flags? */
2763 UInt sh = INSN(23,22);
2764 UInt rM = INSN(20,16);
2765 UInt imm6 = INSN(15,10);
2766 UInt rN = INSN(9,5);
2767 UInt rD = INSN(4,0);
2768 Bool isSUB = bOP == 1;
2769 Bool is64 = bX == 1;
2770 IRType ty = is64 ? Ity_I64 : Ity_I32;
2771 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2772 /* invalid; fall through */
2773 } else {
2774 IRTemp argL = newTemp(ty);
2775 assign(argL, getIRegOrZR(is64, rN));
2776 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2777 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2778 IRTemp res = newTemp(ty);
2779 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2780 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2781 if (bS) {
2782 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2783 }
2784 DIP("%s%s %s, %s, %s, %s #%u\n",
2785 bOP ? "sub" : "add", bS ? "s" : "",
2786 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2787 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2788 return True;
2789 }
2790 }
2791
sewardjdee30502014-06-04 13:09:44 +00002792 /* ------------------- ADC/SBC(reg) ------------------- */
2793 /* x==0 => 32 bit op x==1 => 64 bit op
2794
2795 31 30 29 28 23 21 20 15 9 4
2796 | | | | | | | | | |
2797 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
2798 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
2799 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
2800 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
2801 */
2802
2803 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2804 UInt bX = INSN(31,31);
2805 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */
2806 UInt bS = INSN(29,29); /* set flags */
2807 UInt rM = INSN(20,16);
2808 UInt rN = INSN(9,5);
2809 UInt rD = INSN(4,0);
2810
2811 Bool isSUB = bOP == 1;
2812 Bool is64 = bX == 1;
2813 IRType ty = is64 ? Ity_I64 : Ity_I32;
2814
2815 IRTemp oldC = newTemp(ty);
2816 assign(oldC,
2817 is64 ? mk_arm64g_calculate_flag_c()
2818 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
2819
2820 IRTemp argL = newTemp(ty);
2821 assign(argL, getIRegOrZR(is64, rN));
2822 IRTemp argR = newTemp(ty);
2823 assign(argR, getIRegOrZR(is64, rM));
2824
2825 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2826 IRTemp res = newTemp(ty);
2827 if (isSUB) {
2828 IRExpr* one = is64 ? mkU64(1) : mkU32(1);
2829 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
2830 assign(res,
2831 binop(op,
2832 binop(op, mkexpr(argL), mkexpr(argR)),
2833 binop(xorOp, mkexpr(oldC), one)));
2834 } else {
2835 assign(res,
2836 binop(op,
2837 binop(op, mkexpr(argL), mkexpr(argR)),
2838 mkexpr(oldC)));
2839 }
2840
2841 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2842
2843 if (bS) {
2844 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
2845 }
2846
2847 DIP("%s%s %s, %s, %s\n",
2848 bOP ? "sbc" : "adc", bS ? "s" : "",
2849 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2850 nameIRegOrZR(is64, rM));
2851 return True;
2852 }
2853
sewardjbbcf1882014-01-12 12:49:10 +00002854 /* -------------------- LOGIC(reg) -------------------- */
2855 /* x==0 => 32 bit op x==1 => 64 bit op
2856 N==0 => inv? is no-op (no inversion)
2857 N==1 => inv? is NOT
2858 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2859
2860 31 30 28 23 21 20 15 9 4
2861 | | | | | | | | |
2862 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2863 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2864 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2865 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2866 With N=1, the names are: BIC ORN EON BICS
2867 */
2868 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2869 UInt bX = INSN(31,31);
2870 UInt sh = INSN(23,22);
2871 UInt bN = INSN(21,21);
2872 UInt rM = INSN(20,16);
2873 UInt imm6 = INSN(15,10);
2874 UInt rN = INSN(9,5);
2875 UInt rD = INSN(4,0);
2876 Bool is64 = bX == 1;
2877 IRType ty = is64 ? Ity_I64 : Ity_I32;
2878 if (!is64 && imm6 > 31) {
2879 /* invalid; fall though */
2880 } else {
2881 IRTemp argL = newTemp(ty);
2882 assign(argL, getIRegOrZR(is64, rN));
2883 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2884 IROp op = Iop_INVALID;
2885 switch (INSN(30,29)) {
2886 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2887 case BITS2(0,1): op = mkOR(ty); break;
2888 case BITS2(1,0): op = mkXOR(ty); break;
2889 default: vassert(0);
2890 }
2891 IRTemp res = newTemp(ty);
2892 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2893 if (INSN(30,29) == BITS2(1,1)) {
2894 setFlags_LOGIC(is64, res);
2895 }
2896 putIRegOrZR(is64, rD, mkexpr(res));
2897
2898 static const HChar* names_op[8]
2899 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2900 vassert(((bN << 2) | INSN(30,29)) < 8);
2901 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2902 /* Special-case the printing of "MOV" */
2903 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2904 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2905 nameIRegOrZR(is64, rM));
2906 } else {
2907 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2908 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2909 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2910 }
2911 return True;
2912 }
2913 }
2914
2915 /* -------------------- {U,S}MULH -------------------- */
2916 /* 31 23 22 20 15 9 4
2917 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2918 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2919 */
2920 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
sewardj7fce7cc2014-05-07 09:41:40 +00002921 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
sewardjbbcf1882014-01-12 12:49:10 +00002922 Bool isU = INSN(23,23) == 1;
2923 UInt mm = INSN(20,16);
2924 UInt nn = INSN(9,5);
2925 UInt dd = INSN(4,0);
2926 putIReg64orZR(dd, unop(Iop_128HIto64,
2927 binop(isU ? Iop_MullU64 : Iop_MullS64,
2928 getIReg64orZR(nn), getIReg64orZR(mm))));
2929 DIP("%cmulh %s, %s, %s\n",
2930 isU ? 'u' : 's',
2931 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2932 return True;
2933 }
2934
2935 /* -------------------- M{ADD,SUB} -------------------- */
2936 /* 31 30 20 15 14 9 4
2937 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
2938 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
2939 */
2940 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2941 Bool is64 = INSN(31,31) == 1;
2942 UInt mm = INSN(20,16);
2943 Bool isAdd = INSN(15,15) == 0;
2944 UInt aa = INSN(14,10);
2945 UInt nn = INSN(9,5);
2946 UInt dd = INSN(4,0);
2947 if (is64) {
2948 putIReg64orZR(
2949 dd,
2950 binop(isAdd ? Iop_Add64 : Iop_Sub64,
2951 getIReg64orZR(aa),
2952 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
2953 } else {
2954 putIReg32orZR(
2955 dd,
2956 binop(isAdd ? Iop_Add32 : Iop_Sub32,
2957 getIReg32orZR(aa),
2958 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
2959 }
2960 DIP("%s %s, %s, %s, %s\n",
2961 isAdd ? "madd" : "msub",
2962 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
2963 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
2964 return True;
2965 }
2966
2967 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
2968 /* 31 30 28 20 15 11 9 4
2969 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
2970 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
2971 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
2972 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
2973 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
2974 */
2975 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
2976 Bool is64 = INSN(31,31) == 1;
2977 UInt b30 = INSN(30,30);
2978 UInt mm = INSN(20,16);
2979 UInt cond = INSN(15,12);
2980 UInt b10 = INSN(10,10);
2981 UInt nn = INSN(9,5);
2982 UInt dd = INSN(4,0);
2983 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
2984 IRType ty = is64 ? Ity_I64 : Ity_I32;
2985 IRExpr* argL = getIRegOrZR(is64, nn);
2986 IRExpr* argR = getIRegOrZR(is64, mm);
2987 switch (op) {
2988 case BITS2(0,0):
2989 break;
2990 case BITS2(0,1):
2991 argR = binop(mkADD(ty), argR, mkU(ty,1));
2992 break;
2993 case BITS2(1,0):
2994 argR = unop(mkNOT(ty), argR);
2995 break;
2996 case BITS2(1,1):
2997 argR = binop(mkSUB(ty), mkU(ty,0), argR);
2998 break;
2999 default:
3000 vassert(0);
3001 }
3002 putIRegOrZR(
3003 is64, dd,
3004 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
3005 argL, argR)
3006 );
3007 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
3008 DIP("%s %s, %s, %s, %s\n", op_nm[op],
3009 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3010 nameIRegOrZR(is64, mm), nameCC(cond));
3011 return True;
3012 }
3013
3014 /* -------------- ADD/SUB(extended reg) -------------- */
3015 /* 28 20 15 12 9 4
3016 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
3017 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
3018
3019 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
3020 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
3021
3022 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
3023 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
3024
3025 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
3026 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
3027
3028 The 'm' operand is extended per opt, thusly:
3029
3030 000 Xm & 0xFF UXTB
3031 001 Xm & 0xFFFF UXTH
3032 010 Xm & (2^32)-1 UXTW
3033 011 Xm UXTX
3034
3035 100 Xm sx from bit 7 SXTB
3036 101 Xm sx from bit 15 SXTH
3037 110 Xm sx from bit 31 SXTW
3038 111 Xm SXTX
3039
3040 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3041 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3042 are the identity operation on Wm.
3043
3044 After extension, the value is shifted left by imm3 bits, which
3045 may only be in the range 0 .. 4 inclusive.
3046 */
3047 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3048 Bool is64 = INSN(31,31) == 1;
3049 Bool isSub = INSN(30,30) == 1;
3050 Bool setCC = INSN(29,29) == 1;
3051 UInt mm = INSN(20,16);
3052 UInt opt = INSN(15,13);
3053 UInt imm3 = INSN(12,10);
3054 UInt nn = INSN(9,5);
3055 UInt dd = INSN(4,0);
3056 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3057 "sxtb", "sxth", "sxtw", "sxtx" };
3058 /* Do almost the same thing in the 32- and 64-bit cases. */
3059 IRTemp xN = newTemp(Ity_I64);
3060 IRTemp xM = newTemp(Ity_I64);
3061 assign(xN, getIReg64orSP(nn));
3062 assign(xM, getIReg64orZR(mm));
3063 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
3064 Int shSX = 0;
3065 /* widen Xm .. */
3066 switch (opt) {
3067 case BITS3(0,0,0): // UXTB
3068 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
3069 case BITS3(0,0,1): // UXTH
3070 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
3071 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3072 if (is64) {
3073 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
3074 }
3075 break;
3076 case BITS3(0,1,1): // UXTX -- always a noop
3077 break;
3078 case BITS3(1,0,0): // SXTB
3079 shSX = 56; goto sxTo64;
3080 case BITS3(1,0,1): // SXTH
3081 shSX = 48; goto sxTo64;
3082 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3083 if (is64) {
3084 shSX = 32; goto sxTo64;
3085 }
3086 break;
3087 case BITS3(1,1,1): // SXTX -- always a noop
3088 break;
3089 sxTo64:
3090 vassert(shSX >= 32);
3091 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
3092 mkU8(shSX));
3093 break;
3094 default:
3095 vassert(0);
3096 }
3097 /* and now shift */
3098 IRTemp argL = xN;
3099 IRTemp argR = newTemp(Ity_I64);
3100 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
3101 IRTemp res = newTemp(Ity_I64);
3102 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
3103 mkexpr(argL), mkexpr(argR)));
3104 if (is64) {
3105 if (setCC) {
3106 putIReg64orZR(dd, mkexpr(res));
3107 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
3108 } else {
3109 putIReg64orSP(dd, mkexpr(res));
3110 }
3111 } else {
3112 if (setCC) {
3113 IRTemp argL32 = newTemp(Ity_I32);
3114 IRTemp argR32 = newTemp(Ity_I32);
3115 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
3116 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
3117 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
3118 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
3119 } else {
3120 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
3121 }
3122 }
3123 DIP("%s%s %s, %s, %s %s lsl %u\n",
3124 isSub ? "sub" : "add", setCC ? "s" : "",
3125 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
3126 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
3127 nameExt[opt], imm3);
3128 return True;
3129 }
3130
3131 /* ---------------- CCMP/CCMN(imm) ---------------- */
3132 /* Bizarrely, these appear in the "data processing register"
3133 category, even though they are operations against an
3134 immediate. */
3135 /* 31 29 20 15 11 9 3
3136 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
3137 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
3138
3139 Operation is:
3140 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3141 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3142 */
3143 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3144 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3145 Bool is64 = INSN(31,31) == 1;
3146 Bool isSUB = INSN(30,30) == 1;
3147 UInt imm5 = INSN(20,16);
3148 UInt cond = INSN(15,12);
3149 UInt nn = INSN(9,5);
3150 UInt nzcv = INSN(3,0);
3151
3152 IRTemp condT = newTemp(Ity_I1);
3153 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3154
3155 IRType ty = is64 ? Ity_I64 : Ity_I32;
3156 IRTemp argL = newTemp(ty);
3157 IRTemp argR = newTemp(ty);
3158
3159 if (is64) {
3160 assign(argL, getIReg64orZR(nn));
3161 assign(argR, mkU64(imm5));
3162 } else {
3163 assign(argL, getIReg32orZR(nn));
3164 assign(argR, mkU32(imm5));
3165 }
3166 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3167
3168 DIP("ccm%c %s, #%u, #%u, %s\n",
3169 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3170 imm5, nzcv, nameCC(cond));
3171 return True;
3172 }
3173
3174 /* ---------------- CCMP/CCMN(reg) ---------------- */
3175 /* 31 29 20 15 11 9 3
3176 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
3177 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
3178 Operation is:
3179 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3180 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3181 */
3182 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3183 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3184 Bool is64 = INSN(31,31) == 1;
3185 Bool isSUB = INSN(30,30) == 1;
3186 UInt mm = INSN(20,16);
3187 UInt cond = INSN(15,12);
3188 UInt nn = INSN(9,5);
3189 UInt nzcv = INSN(3,0);
3190
3191 IRTemp condT = newTemp(Ity_I1);
3192 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3193
3194 IRType ty = is64 ? Ity_I64 : Ity_I32;
3195 IRTemp argL = newTemp(ty);
3196 IRTemp argR = newTemp(ty);
3197
3198 if (is64) {
3199 assign(argL, getIReg64orZR(nn));
3200 assign(argR, getIReg64orZR(mm));
3201 } else {
3202 assign(argL, getIReg32orZR(nn));
3203 assign(argR, getIReg32orZR(mm));
3204 }
3205 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3206
3207 DIP("ccm%c %s, %s, #%u, %s\n",
3208 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3209 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
3210 return True;
3211 }
3212
3213
3214 /* -------------- REV/REV16/REV32/RBIT -------------- */
3215 /* 31 30 28 20 15 11 9 4
3216
sewardj32d86752014-03-02 12:47:18 +00003217 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
3218 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003219
sewardj32d86752014-03-02 12:47:18 +00003220 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
3221 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003222
sewardjdc9259c2014-02-27 11:10:19 +00003223 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
3224 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003225
sewardjdc9259c2014-02-27 11:10:19 +00003226 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
sewardjbbcf1882014-01-12 12:49:10 +00003227 */
sewardjbbcf1882014-01-12 12:49:10 +00003228 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
sewardjdc9259c2014-02-27 11:10:19 +00003229 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3230 UInt b31 = INSN(31,31);
3231 UInt opc = INSN(11,10);
3232
3233 UInt ix = 0;
3234 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
3235 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
3236 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
3237 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
3238 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
3239 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
3240 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
sewardj32d86752014-03-02 12:47:18 +00003241 if (ix >= 1 && ix <= 7) {
3242 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
sewardjdc9259c2014-02-27 11:10:19 +00003243 UInt nn = INSN(9,5);
3244 UInt dd = INSN(4,0);
3245 IRTemp src = newTemp(Ity_I64);
3246 IRTemp dst = IRTemp_INVALID;
sewardj32d86752014-03-02 12:47:18 +00003247 IRTemp (*math)(IRTemp) = NULL;
3248 switch (ix) {
3249 case 1: case 2: math = math_BYTESWAP64; break;
3250 case 3: case 4: math = math_BITSWAP64; break;
3251 case 5: case 6: math = math_USHORTSWAP64; break;
3252 case 7: math = math_UINTSWAP64; break;
3253 default: vassert(0);
3254 }
3255 const HChar* names[7]
3256 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3257 const HChar* nm = names[ix-1];
3258 vassert(math);
3259 if (ix == 6) {
3260 /* This has to be special cased, since the logic below doesn't
3261 handle it correctly. */
sewardjdc9259c2014-02-27 11:10:19 +00003262 assign(src, getIReg64orZR(nn));
sewardj32d86752014-03-02 12:47:18 +00003263 dst = math(src);
3264 putIReg64orZR(dd,
3265 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
3266 } else if (is64) {
3267 assign(src, getIReg64orZR(nn));
3268 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00003269 putIReg64orZR(dd, mkexpr(dst));
3270 } else {
3271 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
sewardj32d86752014-03-02 12:47:18 +00003272 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00003273 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3274 }
sewardj32d86752014-03-02 12:47:18 +00003275 DIP("%s %s, %s\n", nm,
sewardjdc9259c2014-02-27 11:10:19 +00003276 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
3277 return True;
sewardjbbcf1882014-01-12 12:49:10 +00003278 }
sewardjdc9259c2014-02-27 11:10:19 +00003279 /* else fall through */
sewardjbbcf1882014-01-12 12:49:10 +00003280 }
3281
3282 /* -------------------- CLZ/CLS -------------------- */
3283 /* 30 28 24 20 15 9 4
3284 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
3285 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
3286 */
3287 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3288 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3289 Bool is64 = INSN(31,31) == 1;
3290 Bool isCLS = INSN(10,10) == 1;
3291 UInt nn = INSN(9,5);
3292 UInt dd = INSN(4,0);
3293 IRTemp src = newTemp(Ity_I64);
3294 IRTemp dst = newTemp(Ity_I64);
3295 if (!isCLS) { // CLS not yet supported
3296 if (is64) {
3297 assign(src, getIReg64orZR(nn));
3298 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
3299 mkU64(64),
3300 unop(Iop_Clz64, mkexpr(src))));
3301 putIReg64orZR(dd, mkexpr(dst));
3302 } else {
3303 assign(src, binop(Iop_Shl64,
3304 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
3305 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
3306 mkU64(32),
3307 unop(Iop_Clz64, mkexpr(src))));
3308 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3309 }
3310 DIP("cl%c %s, %s\n",
3311 isCLS ? 's' : 'z', nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
3312 return True;
3313 }
3314 }
3315
3316 /* -------------------- LSLV/LSRV/ASRV -------------------- */
3317 /* 30 28 20 15 11 9 4
3318 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
3319 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
3320 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
3321 */
3322 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3323 && INSN(15,12) == BITS4(0,0,1,0) && INSN(11,10) < BITS2(1,1)) {
3324 Bool is64 = INSN(31,31) == 1;
3325 UInt mm = INSN(20,16);
3326 UInt op = INSN(11,10);
3327 UInt nn = INSN(9,5);
3328 UInt dd = INSN(4,0);
3329 IRType ty = is64 ? Ity_I64 : Ity_I32;
3330 IRTemp srcL = newTemp(ty);
3331 IRTemp srcR = newTemp(Ity_I8);
3332 IRTemp res = newTemp(ty);
3333 IROp iop = Iop_INVALID;
3334 assign(srcL, getIRegOrZR(is64, nn));
3335 assign(srcR,
3336 unop(Iop_64to8,
3337 binop(Iop_And64,
3338 getIReg64orZR(mm), mkU64(is64 ? 63 : 31))));
3339 switch (op) {
3340 case BITS2(0,0): iop = mkSHL(ty); break;
3341 case BITS2(0,1): iop = mkSHR(ty); break;
3342 case BITS2(1,0): iop = mkSAR(ty); break;
3343 default: vassert(0);
3344 }
3345 assign(res, binop(iop, mkexpr(srcL), mkexpr(srcR)));
3346 putIRegOrZR(is64, dd, mkexpr(res));
3347 vassert(op < 3);
3348 const HChar* names[3] = { "lslv", "lsrv", "asrv" };
3349 DIP("%s %s, %s, %s\n",
3350 names[op], nameIRegOrZR(is64,dd),
3351 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
3352 return True;
3353 }
3354
3355 /* -------------------- SDIV/UDIV -------------------- */
3356 /* 30 28 20 15 10 9 4
3357 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
3358 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
3359 */
3360 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3361 && INSN(15,11) == BITS5(0,0,0,0,1)) {
3362 Bool is64 = INSN(31,31) == 1;
3363 UInt mm = INSN(20,16);
3364 Bool isS = INSN(10,10) == 1;
3365 UInt nn = INSN(9,5);
3366 UInt dd = INSN(4,0);
3367 if (isS) {
3368 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
3369 getIRegOrZR(is64, nn),
3370 getIRegOrZR(is64, mm)));
3371 } else {
3372 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
3373 getIRegOrZR(is64, nn),
3374 getIRegOrZR(is64, mm)));
3375 }
3376 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
3377 nameIRegOrZR(is64, dd),
3378 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
3379 return True;
3380 }
3381
3382 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3383 /* 31 23 20 15 14 9 4
3384 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
3385 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
3386 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
3387 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
3388 with operation
3389 Xd = Xa +/- (Wn *u/s Wm)
3390 */
3391 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3392 Bool isU = INSN(23,23) == 1;
3393 UInt mm = INSN(20,16);
3394 Bool isAdd = INSN(15,15) == 0;
3395 UInt aa = INSN(14,10);
3396 UInt nn = INSN(9,5);
3397 UInt dd = INSN(4,0);
3398 IRTemp wN = newTemp(Ity_I32);
3399 IRTemp wM = newTemp(Ity_I32);
3400 IRTemp xA = newTemp(Ity_I64);
3401 IRTemp muld = newTemp(Ity_I64);
3402 IRTemp res = newTemp(Ity_I64);
3403 assign(wN, getIReg32orZR(nn));
3404 assign(wM, getIReg32orZR(mm));
3405 assign(xA, getIReg64orZR(aa));
3406 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
3407 mkexpr(wN), mkexpr(wM)));
3408 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
3409 mkexpr(xA), mkexpr(muld)));
3410 putIReg64orZR(dd, mkexpr(res));
3411 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
3412 nameIReg64orZR(dd), nameIReg32orZR(nn),
3413 nameIReg32orZR(mm), nameIReg64orZR(aa));
3414 return True;
3415 }
3416 vex_printf("ARM64 front end: data_processing_register\n");
3417 return False;
3418# undef INSN
3419}
3420
3421
3422/*------------------------------------------------------------*/
3423/*--- Load and Store instructions ---*/
3424/*------------------------------------------------------------*/
3425
3426/* Generate the EA for a "reg + reg" style amode. This is done from
3427 parts of the insn, but for sanity checking sake it takes the whole
3428 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
3429 and S=insn[12]:
3430
3431 The possible forms, along with their opt:S values, are:
3432 011:0 Xn|SP + Xm
3433 111:0 Xn|SP + Xm
3434 011:1 Xn|SP + Xm * transfer_szB
3435 111:1 Xn|SP + Xm * transfer_szB
3436 010:0 Xn|SP + 32Uto64(Wm)
3437 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
3438 110:0 Xn|SP + 32Sto64(Wm)
3439 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
3440
3441 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
3442 the transfer size is insn[23,31,30]. For integer loads/stores,
3443 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
3444
3445 If the decoding fails, it returns IRTemp_INVALID.
3446
3447 isInt is True iff this is decoding is for transfers to/from integer
3448 registers. If False it is for transfers to/from vector registers.
3449*/
3450static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
3451{
3452 UInt optS = SLICE_UInt(insn, 15, 12);
3453 UInt mm = SLICE_UInt(insn, 20, 16);
3454 UInt nn = SLICE_UInt(insn, 9, 5);
3455 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
3456 | SLICE_UInt(insn, 31, 30); // Log2 of the size
3457
3458 buf[0] = 0;
3459
3460 /* Sanity checks, that this really is a load/store insn. */
3461 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
3462 goto fail;
3463
3464 if (isInt
3465 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
3466 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
3467 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
3468 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
3469 goto fail;
3470
3471 if (!isInt
3472 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
3473 goto fail;
3474
3475 /* Throw out non-verified but possibly valid cases. */
3476 switch (szLg2) {
3477 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
3478 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
3479 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
3480 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
3481 case BITS3(1,0,0): // can only ever be valid for the vector case
3482 if (isInt) goto fail; else goto fail;
3483 case BITS3(1,0,1): // these sizes are never valid
3484 case BITS3(1,1,0):
3485 case BITS3(1,1,1): goto fail;
3486
3487 default: vassert(0);
3488 }
3489
3490 IRExpr* rhs = NULL;
3491 switch (optS) {
3492 case BITS4(1,1,1,0): goto fail; //ATC
3493 case BITS4(0,1,1,0):
3494 rhs = getIReg64orZR(mm);
3495 vex_sprintf(buf, "[%s, %s]",
3496 nameIReg64orZR(nn), nameIReg64orZR(mm));
3497 break;
3498 case BITS4(1,1,1,1): goto fail; //ATC
3499 case BITS4(0,1,1,1):
3500 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
3501 vex_sprintf(buf, "[%s, %s lsl %u]",
3502 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
3503 break;
3504 case BITS4(0,1,0,0):
3505 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
3506 vex_sprintf(buf, "[%s, %s uxtx]",
3507 nameIReg64orZR(nn), nameIReg32orZR(mm));
3508 break;
3509 case BITS4(0,1,0,1):
3510 rhs = binop(Iop_Shl64,
3511 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
3512 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
3513 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
3514 break;
3515 case BITS4(1,1,0,0):
3516 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
3517 vex_sprintf(buf, "[%s, %s sxtx]",
3518 nameIReg64orZR(nn), nameIReg32orZR(mm));
3519 break;
3520 case BITS4(1,1,0,1):
3521 rhs = binop(Iop_Shl64,
3522 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
3523 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
3524 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
3525 break;
3526 default:
3527 /* The rest appear to be genuinely invalid */
3528 goto fail;
3529 }
3530
3531 vassert(rhs);
3532 IRTemp res = newTemp(Ity_I64);
3533 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
3534 return res;
3535
3536 fail:
3537 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
3538 return IRTemp_INVALID;
3539}
3540
3541
3542/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
3543 bits of DATAE :: Ity_I64. */
3544static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
3545{
3546 IRExpr* addrE = mkexpr(addr);
3547 switch (szB) {
3548 case 8:
3549 storeLE(addrE, dataE);
3550 break;
3551 case 4:
3552 storeLE(addrE, unop(Iop_64to32, dataE));
3553 break;
3554 case 2:
3555 storeLE(addrE, unop(Iop_64to16, dataE));
3556 break;
3557 case 1:
3558 storeLE(addrE, unop(Iop_64to8, dataE));
3559 break;
3560 default:
3561 vassert(0);
3562 }
3563}
3564
3565
3566/* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
3567 placing the result in an Ity_I64 temporary. */
3568static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
3569{
3570 IRTemp res = newTemp(Ity_I64);
3571 IRExpr* addrE = mkexpr(addr);
3572 switch (szB) {
3573 case 8:
3574 assign(res, loadLE(Ity_I64,addrE));
3575 break;
3576 case 4:
3577 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
3578 break;
3579 case 2:
3580 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
3581 break;
3582 case 1:
3583 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
3584 break;
3585 default:
3586 vassert(0);
3587 }
3588 return res;
3589}
3590
3591
sewardj18bf5172014-06-14 18:05:30 +00003592/* Generate a "standard 7" name, from bitQ and size. But also
3593 allow ".1d" since that's occasionally useful. */
3594static
3595const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
3596{
3597 vassert(bitQ <= 1 && size <= 3);
3598 const HChar* nms[8]
sewardj25523c42014-06-15 19:36:29 +00003599 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
sewardj18bf5172014-06-14 18:05:30 +00003600 UInt ix = (bitQ << 2) | size;
3601 vassert(ix < 8);
3602 return nms[ix];
3603}
3604
3605
sewardjbbcf1882014-01-12 12:49:10 +00003606static
3607Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
3608{
3609# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
3610
3611 /* ------------ LDR,STR (immediate, uimm12) ----------- */
3612 /* uimm12 is scaled by the transfer size
3613
3614 31 29 26 21 9 4
3615 | | | | | |
3616 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
3617 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
3618
3619 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
3620 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
3621
3622 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
3623 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
3624
3625 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
3626 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
3627 */
3628 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
3629 UInt szLg2 = INSN(31,30);
3630 UInt szB = 1 << szLg2;
3631 Bool isLD = INSN(22,22) == 1;
3632 UInt offs = INSN(21,10) * szB;
3633 UInt nn = INSN(9,5);
3634 UInt tt = INSN(4,0);
3635 IRTemp ta = newTemp(Ity_I64);
3636 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
3637 if (nn == 31) { /* FIXME generate stack alignment check */ }
3638 vassert(szLg2 < 4);
3639 if (isLD) {
3640 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
3641 } else {
3642 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
3643 }
3644 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
3645 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
3646 DIP("%s %s, [%s, #%u]\n",
3647 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
3648 nameIReg64orSP(nn), offs);
3649 return True;
3650 }
3651
3652 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
3653 /*
3654 31 29 26 20 11 9 4
3655 | | | | | | |
3656 (at-Rn-then-Rn=EA) | | |
3657 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
3658 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
3659
3660 (at-EA-then-Rn=EA)
3661 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
3662 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
3663
3664 (at-EA)
3665 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
3666 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
3667
3668 simm9 is unscaled.
3669
3670 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
3671 load case this is because would create two competing values for
3672 Rt. In the store case the reason is unclear, but the spec
3673 disallows it anyway.
3674
3675 Stores are narrowing, loads are unsigned widening. sz encodes
3676 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
3677 */
3678 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
3679 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
3680 UInt szLg2 = INSN(31,30);
3681 UInt szB = 1 << szLg2;
3682 Bool isLoad = INSN(22,22) == 1;
3683 UInt imm9 = INSN(20,12);
3684 UInt nn = INSN(9,5);
3685 UInt tt = INSN(4,0);
3686 Bool wBack = INSN(10,10) == 1;
3687 UInt how = INSN(11,10);
3688 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
3689 /* undecodable; fall through */
3690 } else {
3691 if (nn == 31) { /* FIXME generate stack alignment check */ }
3692
3693 // Compute the transfer address TA and the writeback address WA.
3694 IRTemp tRN = newTemp(Ity_I64);
3695 assign(tRN, getIReg64orSP(nn));
3696 IRTemp tEA = newTemp(Ity_I64);
3697 Long simm9 = (Long)sx_to_64(imm9, 9);
3698 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3699
3700 IRTemp tTA = newTemp(Ity_I64);
3701 IRTemp tWA = newTemp(Ity_I64);
3702 switch (how) {
3703 case BITS2(0,1):
3704 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3705 case BITS2(1,1):
3706 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3707 case BITS2(0,0):
3708 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3709 default:
3710 vassert(0); /* NOTREACHED */
3711 }
3712
sewardje0bff8b2014-03-09 09:40:23 +00003713 /* Normally rN would be updated after the transfer. However, in
3714 the special case typifed by
3715 str x30, [sp,#-16]!
3716 it is necessary to update SP before the transfer, (1)
3717 because Memcheck will otherwise complain about a write
3718 below the stack pointer, and (2) because the segfault
3719 stack extension mechanism will otherwise extend the stack
3720 only down to SP before the instruction, which might not be
3721 far enough, if the -16 bit takes the actual access
3722 address to the next page.
3723 */
3724 Bool earlyWBack
3725 = wBack && simm9 < 0 && szB == 8
3726 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
3727
3728 if (wBack && earlyWBack)
3729 putIReg64orSP(nn, mkexpr(tEA));
3730
sewardjbbcf1882014-01-12 12:49:10 +00003731 if (isLoad) {
3732 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
3733 } else {
3734 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
3735 }
3736
sewardje0bff8b2014-03-09 09:40:23 +00003737 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00003738 putIReg64orSP(nn, mkexpr(tEA));
3739
3740 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
3741 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
3742 const HChar* fmt_str = NULL;
3743 switch (how) {
3744 case BITS2(0,1):
3745 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3746 break;
3747 case BITS2(1,1):
3748 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3749 break;
3750 case BITS2(0,0):
3751 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
3752 break;
3753 default:
3754 vassert(0);
3755 }
3756 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
3757 nameIRegOrZR(szB == 8, tt),
3758 nameIReg64orSP(nn), simm9);
3759 return True;
3760 }
3761 }
3762
3763 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
3764 /* L==1 => mm==LD
3765 L==0 => mm==ST
3766 x==0 => 32 bit transfers, and zero extended loads
3767 x==1 => 64 bit transfers
3768 simm7 is scaled by the (single-register) transfer size
3769
3770 (at-Rn-then-Rn=EA)
3771 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
3772
3773 (at-EA-then-Rn=EA)
3774 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
3775
3776 (at-EA)
3777 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
3778 */
3779
3780 UInt insn_30_23 = INSN(30,23);
3781 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
3782 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
3783 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
3784 UInt bL = INSN(22,22);
3785 UInt bX = INSN(31,31);
3786 UInt bWBack = INSN(23,23);
3787 UInt rT1 = INSN(4,0);
3788 UInt rN = INSN(9,5);
3789 UInt rT2 = INSN(14,10);
3790 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
3791 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
3792 || (bL && rT1 == rT2)) {
3793 /* undecodable; fall through */
3794 } else {
3795 if (rN == 31) { /* FIXME generate stack alignment check */ }
3796
3797 // Compute the transfer address TA and the writeback address WA.
3798 IRTemp tRN = newTemp(Ity_I64);
3799 assign(tRN, getIReg64orSP(rN));
3800 IRTemp tEA = newTemp(Ity_I64);
3801 simm7 = (bX ? 8 : 4) * simm7;
3802 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
3803
3804 IRTemp tTA = newTemp(Ity_I64);
3805 IRTemp tWA = newTemp(Ity_I64);
3806 switch (INSN(24,23)) {
3807 case BITS2(0,1):
3808 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3809 case BITS2(1,1):
3810 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3811 case BITS2(1,0):
3812 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3813 default:
3814 vassert(0); /* NOTREACHED */
3815 }
3816
3817 /* Normally rN would be updated after the transfer. However, in
3818 the special case typifed by
3819 stp x29, x30, [sp,#-112]!
3820 it is necessary to update SP before the transfer, (1)
3821 because Memcheck will otherwise complain about a write
3822 below the stack pointer, and (2) because the segfault
3823 stack extension mechanism will otherwise extend the stack
3824 only down to SP before the instruction, which might not be
3825 far enough, if the -112 bit takes the actual access
3826 address to the next page.
3827 */
3828 Bool earlyWBack
3829 = bWBack && simm7 < 0
3830 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
3831
3832 if (bWBack && earlyWBack)
3833 putIReg64orSP(rN, mkexpr(tEA));
3834
3835 /**/ if (bL == 1 && bX == 1) {
3836 // 64 bit load
3837 putIReg64orZR(rT1, loadLE(Ity_I64,
3838 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3839 putIReg64orZR(rT2, loadLE(Ity_I64,
3840 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
3841 } else if (bL == 1 && bX == 0) {
sewardjbbcf1882014-01-12 12:49:10 +00003842 // 32 bit load
3843 putIReg32orZR(rT1, loadLE(Ity_I32,
3844 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3845 putIReg32orZR(rT2, loadLE(Ity_I32,
3846 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
3847 } else if (bL == 0 && bX == 1) {
3848 // 64 bit store
3849 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3850 getIReg64orZR(rT1));
3851 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
3852 getIReg64orZR(rT2));
3853 } else {
3854 vassert(bL == 0 && bX == 0);
sewardjbbcf1882014-01-12 12:49:10 +00003855 // 32 bit store
3856 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3857 getIReg32orZR(rT1));
3858 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
3859 getIReg32orZR(rT2));
3860 }
3861
3862 if (bWBack && !earlyWBack)
3863 putIReg64orSP(rN, mkexpr(tEA));
3864
3865 const HChar* fmt_str = NULL;
3866 switch (INSN(24,23)) {
3867 case BITS2(0,1):
3868 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3869 break;
3870 case BITS2(1,1):
3871 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3872 break;
3873 case BITS2(1,0):
3874 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
3875 break;
3876 default:
3877 vassert(0);
3878 }
3879 DIP(fmt_str, bL == 0 ? "st" : "ld",
3880 nameIRegOrZR(bX == 1, rT1),
3881 nameIRegOrZR(bX == 1, rT2),
3882 nameIReg64orSP(rN), simm7);
3883 return True;
3884 }
3885 }
3886
3887 /* ---------------- LDR (literal, int reg) ---------------- */
3888 /* 31 29 23 4
3889 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
3890 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
3891 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
3892 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
3893 Just handles the first two cases for now.
3894 */
3895 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
3896 UInt imm19 = INSN(23,5);
3897 UInt rT = INSN(4,0);
3898 UInt bX = INSN(30,30);
3899 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
3900 if (bX) {
3901 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
3902 } else {
3903 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
3904 }
3905 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
3906 return True;
3907 }
3908
3909 /* -------------- {LD,ST}R (integer register) --------------- */
3910 /* 31 29 20 15 12 11 9 4
3911 | | | | | | | |
3912 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
3913 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
3914 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
3915 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
3916
3917 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
3918 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
3919 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
3920 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
3921 */
3922 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
3923 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3924 HChar dis_buf[64];
3925 UInt szLg2 = INSN(31,30);
3926 Bool isLD = INSN(22,22) == 1;
3927 UInt tt = INSN(4,0);
3928 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
3929 if (ea != IRTemp_INVALID) {
3930 switch (szLg2) {
3931 case 3: /* 64 bit */
3932 if (isLD) {
3933 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
3934 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
3935 } else {
3936 storeLE(mkexpr(ea), getIReg64orZR(tt));
3937 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
3938 }
3939 break;
3940 case 2: /* 32 bit */
3941 if (isLD) {
3942 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
3943 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
3944 } else {
3945 storeLE(mkexpr(ea), getIReg32orZR(tt));
3946 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
3947 }
3948 break;
3949 case 1: /* 16 bit */
3950 if (isLD) {
3951 putIReg64orZR(tt, unop(Iop_16Uto64,
3952 loadLE(Ity_I16, mkexpr(ea))));
3953 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3954 } else {
3955 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
3956 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3957 }
3958 break;
3959 case 0: /* 8 bit */
3960 if (isLD) {
3961 putIReg64orZR(tt, unop(Iop_8Uto64,
3962 loadLE(Ity_I8, mkexpr(ea))));
3963 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
3964 } else {
3965 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
3966 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
3967 }
3968 break;
3969 default:
3970 vassert(0);
3971 }
3972 return True;
3973 }
3974 }
3975
3976 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
3977 /* 31 29 26 23 21 9 4
3978 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
3979 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
3980 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
3981 where
3982 Rt is Wt when x==1, Xt when x==0
3983 */
3984 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
3985 /* Further checks on bits 31:30 and 22 */
3986 Bool valid = False;
3987 switch ((INSN(31,30) << 1) | INSN(22,22)) {
3988 case BITS3(1,0,0):
3989 case BITS3(0,1,0): case BITS3(0,1,1):
3990 case BITS3(0,0,0): case BITS3(0,0,1):
3991 valid = True;
3992 break;
3993 }
3994 if (valid) {
3995 UInt szLg2 = INSN(31,30);
3996 UInt bitX = INSN(22,22);
3997 UInt imm12 = INSN(21,10);
3998 UInt nn = INSN(9,5);
3999 UInt tt = INSN(4,0);
4000 UInt szB = 1 << szLg2;
4001 IRExpr* ea = binop(Iop_Add64,
4002 getIReg64orSP(nn), mkU64(imm12 * szB));
4003 switch (szB) {
4004 case 4:
4005 vassert(bitX == 0);
4006 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
4007 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
4008 nameIReg64orSP(nn), imm12 * szB);
4009 break;
4010 case 2:
4011 if (bitX == 1) {
4012 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
4013 } else {
4014 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
4015 }
4016 DIP("ldrsh %s, [%s, #%u]\n",
4017 nameIRegOrZR(bitX == 0, tt),
4018 nameIReg64orSP(nn), imm12 * szB);
4019 break;
4020 case 1:
4021 if (bitX == 1) {
4022 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
4023 } else {
4024 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
4025 }
4026 DIP("ldrsb %s, [%s, #%u]\n",
4027 nameIRegOrZR(bitX == 0, tt),
4028 nameIReg64orSP(nn), imm12 * szB);
4029 break;
4030 default:
4031 vassert(0);
4032 }
4033 return True;
4034 }
4035 /* else fall through */
4036 }
4037
4038 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
4039 /* (at-Rn-then-Rn=EA)
4040 31 29 23 21 20 11 9 4
4041 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
4042 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
4043 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
4044
4045 (at-EA-then-Rn=EA)
4046 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
4047 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
4048 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
4049 where
4050 Rt is Wt when x==1, Xt when x==0
4051 transfer-at-Rn when [11]==0, at EA when [11]==1
4052 */
4053 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
4054 && INSN(21,21) == 0 && INSN(10,10) == 1) {
4055 /* Further checks on bits 31:30 and 22 */
4056 Bool valid = False;
4057 switch ((INSN(31,30) << 1) | INSN(22,22)) {
4058 case BITS3(1,0,0): // LDRSW Xt
4059 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
4060 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
4061 valid = True;
4062 break;
4063 }
4064 if (valid) {
4065 UInt szLg2 = INSN(31,30);
4066 UInt imm9 = INSN(20,12);
4067 Bool atRN = INSN(11,11) == 0;
4068 UInt nn = INSN(9,5);
4069 UInt tt = INSN(4,0);
4070 IRTemp tRN = newTemp(Ity_I64);
4071 IRTemp tEA = newTemp(Ity_I64);
4072 IRTemp tTA = IRTemp_INVALID;
4073 ULong simm9 = sx_to_64(imm9, 9);
4074 Bool is64 = INSN(22,22) == 0;
4075 assign(tRN, getIReg64orSP(nn));
4076 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4077 tTA = atRN ? tRN : tEA;
4078 HChar ch = '?';
4079 /* There are 5 cases:
4080 byte load, SX to 64
4081 byte load, SX to 32, ZX to 64
4082 halfword load, SX to 64
4083 halfword load, SX to 32, ZX to 64
4084 word load, SX to 64
4085 The ifs below handle them in the listed order.
4086 */
4087 if (szLg2 == 0) {
4088 ch = 'b';
4089 if (is64) {
4090 putIReg64orZR(tt, unop(Iop_8Sto64,
4091 loadLE(Ity_I8, mkexpr(tTA))));
4092 } else {
4093 putIReg32orZR(tt, unop(Iop_8Sto32,
4094 loadLE(Ity_I8, mkexpr(tTA))));
4095 }
4096 }
4097 else if (szLg2 == 1) {
4098 ch = 'h';
4099 if (is64) {
4100 putIReg64orZR(tt, unop(Iop_16Sto64,
4101 loadLE(Ity_I16, mkexpr(tTA))));
4102 } else {
4103 putIReg32orZR(tt, unop(Iop_16Sto32,
4104 loadLE(Ity_I16, mkexpr(tTA))));
4105 }
4106 }
4107 else if (szLg2 == 2 && is64) {
4108 ch = 'w';
4109 putIReg64orZR(tt, unop(Iop_32Sto64,
4110 loadLE(Ity_I32, mkexpr(tTA))));
4111 }
4112 else {
4113 vassert(0);
4114 }
4115 putIReg64orSP(nn, mkexpr(tEA));
4116 DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
4117 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
4118 return True;
4119 }
4120 /* else fall through */
4121 }
4122
4123 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
4124 /* 31 29 23 21 20 11 9 4
4125 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
4126 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
4127 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
4128 where
4129 Rt is Wt when x==1, Xt when x==0
4130 */
4131 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
4132 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
4133 /* Further checks on bits 31:30 and 22 */
4134 Bool valid = False;
4135 switch ((INSN(31,30) << 1) | INSN(22,22)) {
4136 case BITS3(1,0,0): // LDURSW Xt
4137 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
4138 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
4139 valid = True;
4140 break;
4141 }
4142 if (valid) {
4143 UInt szLg2 = INSN(31,30);
4144 UInt imm9 = INSN(20,12);
4145 UInt nn = INSN(9,5);
4146 UInt tt = INSN(4,0);
4147 IRTemp tRN = newTemp(Ity_I64);
4148 IRTemp tEA = newTemp(Ity_I64);
4149 ULong simm9 = sx_to_64(imm9, 9);
4150 Bool is64 = INSN(22,22) == 0;
4151 assign(tRN, getIReg64orSP(nn));
4152 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4153 HChar ch = '?';
4154 /* There are 5 cases:
4155 byte load, SX to 64
4156 byte load, SX to 32, ZX to 64
4157 halfword load, SX to 64
4158 halfword load, SX to 32, ZX to 64
4159 word load, SX to 64
4160 The ifs below handle them in the listed order.
4161 */
4162 if (szLg2 == 0) {
4163 ch = 'b';
4164 if (is64) {
4165 putIReg64orZR(tt, unop(Iop_8Sto64,
4166 loadLE(Ity_I8, mkexpr(tEA))));
4167 } else {
4168 putIReg32orZR(tt, unop(Iop_8Sto32,
4169 loadLE(Ity_I8, mkexpr(tEA))));
4170 }
4171 }
4172 else if (szLg2 == 1) {
4173 ch = 'h';
4174 if (is64) {
4175 putIReg64orZR(tt, unop(Iop_16Sto64,
4176 loadLE(Ity_I16, mkexpr(tEA))));
4177 } else {
4178 putIReg32orZR(tt, unop(Iop_16Sto32,
4179 loadLE(Ity_I16, mkexpr(tEA))));
4180 }
4181 }
4182 else if (szLg2 == 2 && is64) {
4183 ch = 'w';
4184 putIReg64orZR(tt, unop(Iop_32Sto64,
4185 loadLE(Ity_I32, mkexpr(tEA))));
4186 }
4187 else {
4188 vassert(0);
4189 }
4190 DIP("ldurs%c %s, [%s, #%lld]",
4191 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
4192 return True;
4193 }
4194 /* else fall through */
4195 }
4196
4197 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
4198 /* L==1 => mm==LD
4199 L==0 => mm==ST
4200 sz==00 => 32 bit (S) transfers
4201 sz==01 => 64 bit (D) transfers
4202 sz==10 => 128 bit (Q) transfers
4203 sz==11 isn't allowed
4204 simm7 is scaled by the (single-register) transfer size
4205
4206 31 29 22 21 14 9 4
4207 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
4208 (at-Rn-then-Rn=EA)
4209
4210 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
4211 (at-EA-then-Rn=EA)
4212
4213 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
4214 (at-EA)
4215 */
4216
4217 UInt insn_29_23 = INSN(29,23);
4218 if (insn_29_23 == BITS7(1,0,1,1,0,0,1)
4219 || insn_29_23 == BITS7(1,0,1,1,0,1,1)
4220 || insn_29_23 == BITS7(1,0,1,1,0,1,0)) {
4221 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
4222 Bool isLD = INSN(22,22) == 1;
4223 Bool wBack = INSN(23,23) == 1;
4224 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
4225 UInt tt2 = INSN(14,10);
4226 UInt nn = INSN(9,5);
4227 UInt tt1 = INSN(4,0);
4228 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
4229 /* undecodable; fall through */
4230 } else {
4231 if (nn == 31) { /* FIXME generate stack alignment check */ }
4232
4233 // Compute the transfer address TA and the writeback address WA.
4234 UInt szB = 4 << szSlg2; /* szB is the per-register size */
4235 IRTemp tRN = newTemp(Ity_I64);
4236 assign(tRN, getIReg64orSP(nn));
4237 IRTemp tEA = newTemp(Ity_I64);
4238 simm7 = szB * simm7;
4239 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
4240
4241 IRTemp tTA = newTemp(Ity_I64);
4242 IRTemp tWA = newTemp(Ity_I64);
4243 switch (INSN(24,23)) {
4244 case BITS2(0,1):
4245 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4246 case BITS2(1,1):
4247 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4248 case BITS2(1,0):
4249 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4250 default:
4251 vassert(0); /* NOTREACHED */
4252 }
4253
4254 IRType ty = Ity_INVALID;
4255 switch (szB) {
4256 case 4: ty = Ity_F32; break;
4257 case 8: ty = Ity_F64; break;
4258 case 16: ty = Ity_V128; break;
4259 default: vassert(0);
4260 }
4261
sewardje0bff8b2014-03-09 09:40:23 +00004262 /* Normally rN would be updated after the transfer. However, in
sewardj19551432014-05-07 09:20:11 +00004263 the special cases typifed by
sewardje0bff8b2014-03-09 09:40:23 +00004264 stp q0, q1, [sp,#-512]!
sewardj19551432014-05-07 09:20:11 +00004265 stp d0, d1, [sp,#-512]!
4266 stp s0, s1, [sp,#-512]!
sewardje0bff8b2014-03-09 09:40:23 +00004267 it is necessary to update SP before the transfer, (1)
4268 because Memcheck will otherwise complain about a write
4269 below the stack pointer, and (2) because the segfault
4270 stack extension mechanism will otherwise extend the stack
4271 only down to SP before the instruction, which might not be
4272 far enough, if the -512 bit takes the actual access
4273 address to the next page.
4274 */
4275 Bool earlyWBack
sewardj19551432014-05-07 09:20:11 +00004276 = wBack && simm7 < 0
sewardje0bff8b2014-03-09 09:40:23 +00004277 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
4278
4279 if (wBack && earlyWBack)
4280 putIReg64orSP(nn, mkexpr(tEA));
4281
sewardjbbcf1882014-01-12 12:49:10 +00004282 if (isLD) {
sewardj5ba41302014-03-03 08:42:16 +00004283 if (szB < 16) {
4284 putQReg128(tt1, mkV128(0x0000));
4285 }
sewardj606c4ba2014-01-26 19:11:14 +00004286 putQRegLO(tt1,
4287 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
sewardj5ba41302014-03-03 08:42:16 +00004288 if (szB < 16) {
4289 putQReg128(tt2, mkV128(0x0000));
4290 }
sewardj606c4ba2014-01-26 19:11:14 +00004291 putQRegLO(tt2,
4292 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
sewardjbbcf1882014-01-12 12:49:10 +00004293 } else {
4294 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
sewardj606c4ba2014-01-26 19:11:14 +00004295 getQRegLO(tt1, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004296 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
sewardj606c4ba2014-01-26 19:11:14 +00004297 getQRegLO(tt2, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004298 }
4299
sewardje0bff8b2014-03-09 09:40:23 +00004300 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00004301 putIReg64orSP(nn, mkexpr(tEA));
4302
4303 const HChar* fmt_str = NULL;
4304 switch (INSN(24,23)) {
4305 case BITS2(0,1):
4306 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4307 break;
4308 case BITS2(1,1):
4309 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4310 break;
4311 case BITS2(1,0):
4312 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
4313 break;
4314 default:
4315 vassert(0);
4316 }
4317 DIP(fmt_str, isLD ? "ld" : "st",
sewardj606c4ba2014-01-26 19:11:14 +00004318 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
sewardjbbcf1882014-01-12 12:49:10 +00004319 nameIReg64orSP(nn), simm7);
4320 return True;
4321 }
4322 }
4323
4324 /* -------------- {LD,ST}R (vector register) --------------- */
4325 /* 31 29 23 20 15 12 11 9 4
4326 | | | | | | | | |
4327 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
4328 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
4329 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
4330 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
4331 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
4332
4333 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
4334 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
4335 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
4336 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
4337 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
4338 */
4339 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
4340 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
4341 HChar dis_buf[64];
4342 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
4343 Bool isLD = INSN(22,22) == 1;
4344 UInt tt = INSN(4,0);
4345 if (szLg2 >= 4) goto after_LDR_STR_vector_register;
4346 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
4347 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
4348 switch (szLg2) {
4349 case 0: /* 8 bit */
4350 if (isLD) {
4351 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00004352 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
4353 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004354 } else {
4355 vassert(0); //ATC
sewardj606c4ba2014-01-26 19:11:14 +00004356 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
4357 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004358 }
4359 break;
4360 case 1:
4361 if (isLD) {
4362 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00004363 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
4364 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004365 } else {
4366 vassert(0); //ATC
sewardj606c4ba2014-01-26 19:11:14 +00004367 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
4368 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004369 }
4370 break;
4371 case 2: /* 32 bit */
4372 if (isLD) {
4373 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00004374 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
4375 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004376 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004377 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
4378 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004379 }
4380 break;
4381 case 3: /* 64 bit */
4382 if (isLD) {
4383 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00004384 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
4385 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004386 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004387 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
4388 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004389 }
4390 break;
4391 case 4: return False; //ATC
4392 default: vassert(0);
4393 }
4394 return True;
4395 }
4396 after_LDR_STR_vector_register:
4397
4398 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
4399 /* 31 29 22 20 15 12 11 9 4
4400 | | | | | | | | |
4401 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
4402
4403 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
4404 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
4405
4406 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
4407 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
4408 */
4409 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
4410 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
4411 HChar dis_buf[64];
4412 UInt szLg2 = INSN(31,30);
4413 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
4414 UInt tt = INSN(4,0);
4415 if (szLg2 == 3) goto after_LDRS_integer_register;
4416 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
4417 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
4418 /* Enumerate the 5 variants explicitly. */
4419 if (szLg2 == 2/*32 bit*/ && sxTo64) {
4420 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
4421 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
4422 return True;
4423 }
4424 else
4425 if (szLg2 == 1/*16 bit*/) {
4426 if (sxTo64) {
4427 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
4428 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
4429 } else {
4430 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
4431 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
4432 }
4433 return True;
4434 }
4435 else
4436 if (szLg2 == 0/*8 bit*/) {
4437 if (sxTo64) {
4438 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
4439 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
4440 } else {
4441 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
4442 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
4443 }
4444 return True;
4445 }
4446 /* else it's an invalid combination */
4447 }
4448 after_LDRS_integer_register:
4449
4450 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
4451 /* This is the Unsigned offset variant only. The Post-Index and
4452 Pre-Index variants are below.
4453
4454 31 29 23 21 9 4
4455 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
4456 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
4457 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
4458 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
4459 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
4460
4461 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
4462 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
4463 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
4464 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
4465 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
4466 */
4467 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
4468 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
4469 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
4470 Bool isLD = INSN(22,22) == 1;
4471 UInt pimm12 = INSN(21,10) << szLg2;
4472 UInt nn = INSN(9,5);
4473 UInt tt = INSN(4,0);
4474 IRTemp tEA = newTemp(Ity_I64);
4475 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
4476 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
4477 if (isLD) {
4478 if (szLg2 < 4) {
4479 putQReg128(tt, mkV128(0x0000));
4480 }
sewardj606c4ba2014-01-26 19:11:14 +00004481 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00004482 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004483 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004484 }
4485 DIP("%s %s, [%s, #%u]\n",
4486 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00004487 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
sewardjbbcf1882014-01-12 12:49:10 +00004488 return True;
4489 }
4490
4491 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
4492 /* These are the Post-Index and Pre-Index variants.
4493
4494 31 29 23 20 11 9 4
4495 (at-Rn-then-Rn=EA)
4496 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
4497 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
4498 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
4499 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
4500 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
4501
4502 (at-EA-then-Rn=EA)
4503 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
4504 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
4505 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
4506 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
4507 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
4508
4509 Stores are the same except with bit 22 set to 0.
4510 */
4511 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
4512 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
4513 && INSN(21,21) == 0 && INSN(10,10) == 1) {
4514 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
4515 Bool isLD = INSN(22,22) == 1;
4516 UInt imm9 = INSN(20,12);
4517 Bool atRN = INSN(11,11) == 0;
4518 UInt nn = INSN(9,5);
4519 UInt tt = INSN(4,0);
4520 IRTemp tRN = newTemp(Ity_I64);
4521 IRTemp tEA = newTemp(Ity_I64);
4522 IRTemp tTA = IRTemp_INVALID;
4523 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
4524 ULong simm9 = sx_to_64(imm9, 9);
4525 assign(tRN, getIReg64orSP(nn));
4526 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4527 tTA = atRN ? tRN : tEA;
4528 if (isLD) {
4529 if (szLg2 < 4) {
4530 putQReg128(tt, mkV128(0x0000));
4531 }
sewardj606c4ba2014-01-26 19:11:14 +00004532 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
sewardjbbcf1882014-01-12 12:49:10 +00004533 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004534 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004535 }
4536 putIReg64orSP(nn, mkexpr(tEA));
4537 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
4538 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00004539 nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
sewardjbbcf1882014-01-12 12:49:10 +00004540 return True;
4541 }
4542
4543 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
4544 /* 31 29 23 20 11 9 4
4545 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
4546 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
4547 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
4548 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
4549 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
4550
4551 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
4552 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
4553 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
4554 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
4555 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
4556 */
4557 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
4558 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
4559 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
4560 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
4561 Bool isLD = INSN(22,22) == 1;
4562 UInt imm9 = INSN(20,12);
4563 UInt nn = INSN(9,5);
4564 UInt tt = INSN(4,0);
4565 ULong simm9 = sx_to_64(imm9, 9);
4566 IRTemp tEA = newTemp(Ity_I64);
4567 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
4568 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
4569 if (isLD) {
sewardj606c4ba2014-01-26 19:11:14 +00004570 if (szLg2 < 4) {
4571 putQReg128(tt, mkV128(0x0000));
4572 }
4573 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00004574 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004575 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004576 }
4577 DIP("%s %s, [%s, #%lld]\n",
4578 isLD ? "ldur" : "stur",
sewardj606c4ba2014-01-26 19:11:14 +00004579 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
sewardjbbcf1882014-01-12 12:49:10 +00004580 return True;
4581 }
4582
4583 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
4584 /* 31 29 23 4
4585 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
4586 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
4587 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
4588 */
4589 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
4590 UInt szB = 4 << INSN(31,30);
4591 UInt imm19 = INSN(23,5);
4592 UInt tt = INSN(4,0);
4593 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
4594 IRType ty = preferredVectorSubTypeFromSize(szB);
sewardj606c4ba2014-01-26 19:11:14 +00004595 putQReg128(tt, mkV128(0x0000));
4596 putQRegLO(tt, loadLE(ty, mkU64(ea)));
4597 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
sewardjbbcf1882014-01-12 12:49:10 +00004598 return True;
4599 }
4600
sewardj606c4ba2014-01-26 19:11:14 +00004601 /* ---------- LD1/ST1 (single structure, no offset) ---------- */
sewardjbbcf1882014-01-12 12:49:10 +00004602 /* 31 23
sewardj606c4ba2014-01-26 19:11:14 +00004603 0100 1100 0100 0000 0111 11 N T LD1 {vT.2d}, [Xn|SP]
4604 0100 1100 0000 0000 0111 11 N T ST1 {vT.2d}, [Xn|SP]
4605 0100 1100 0100 0000 0111 10 N T LD1 {vT.4s}, [Xn|SP]
4606 0100 1100 0000 0000 0111 10 N T ST1 {vT.4s}, [Xn|SP]
4607 0100 1100 0100 0000 0111 01 N T LD1 {vT.8h}, [Xn|SP]
4608 0100 1100 0000 0000 0111 01 N T ST1 {vT.8h}, [Xn|SP]
sewardjbbcf1882014-01-12 12:49:10 +00004609 0100 1100 0100 0000 0111 00 N T LD1 {vT.16b}, [Xn|SP]
4610 0100 1100 0000 0000 0111 00 N T ST1 {vT.16b}, [Xn|SP]
sewardj606c4ba2014-01-26 19:11:14 +00004611 FIXME does this assume that the host is little endian?
sewardjbbcf1882014-01-12 12:49:10 +00004612 */
sewardj606c4ba2014-01-26 19:11:14 +00004613 if ( (insn & 0xFFFFF000) == 0x4C407000 // LD1 cases
4614 || (insn & 0xFFFFF000) == 0x4C007000 // ST1 cases
sewardjbbcf1882014-01-12 12:49:10 +00004615 ) {
4616 Bool isLD = INSN(22,22) == 1;
4617 UInt rN = INSN(9,5);
4618 UInt vT = INSN(4,0);
4619 IRTemp tEA = newTemp(Ity_I64);
sewardj606c4ba2014-01-26 19:11:14 +00004620 const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4621 const HChar* name = names[INSN(11,10)];
sewardjbbcf1882014-01-12 12:49:10 +00004622 assign(tEA, getIReg64orSP(rN));
4623 if (rN == 31) { /* FIXME generate stack alignment check */ }
4624 if (isLD) {
4625 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4626 } else {
4627 storeLE(mkexpr(tEA), getQReg128(vT));
4628 }
4629 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
sewardj606c4ba2014-01-26 19:11:14 +00004630 vT, name, nameIReg64orSP(rN));
sewardjbbcf1882014-01-12 12:49:10 +00004631 return True;
4632 }
4633
sewardj606c4ba2014-01-26 19:11:14 +00004634 /* 31 23
4635 0000 1100 0100 0000 0111 11 N T LD1 {vT.1d}, [Xn|SP]
4636 0000 1100 0000 0000 0111 11 N T ST1 {vT.1d}, [Xn|SP]
4637 0000 1100 0100 0000 0111 10 N T LD1 {vT.2s}, [Xn|SP]
4638 0000 1100 0000 0000 0111 10 N T ST1 {vT.2s}, [Xn|SP]
4639 0000 1100 0100 0000 0111 01 N T LD1 {vT.4h}, [Xn|SP]
4640 0000 1100 0000 0000 0111 01 N T ST1 {vT.4h}, [Xn|SP]
4641 0000 1100 0100 0000 0111 00 N T LD1 {vT.8b}, [Xn|SP]
4642 0000 1100 0000 0000 0111 00 N T ST1 {vT.8b}, [Xn|SP]
4643 FIXME does this assume that the host is little endian?
4644 */
4645 if ( (insn & 0xFFFFF000) == 0x0C407000 // LD1 cases
4646 || (insn & 0xFFFFF000) == 0x0C007000 // ST1 cases
4647 ) {
4648 Bool isLD = INSN(22,22) == 1;
4649 UInt rN = INSN(9,5);
4650 UInt vT = INSN(4,0);
4651 IRTemp tEA = newTemp(Ity_I64);
4652 const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4653 const HChar* name = names[INSN(11,10)];
4654 assign(tEA, getIReg64orSP(rN));
4655 if (rN == 31) { /* FIXME generate stack alignment check */ }
4656 if (isLD) {
4657 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4658 putQRegLane(vT, 1, mkU64(0));
4659 } else {
4660 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4661 }
4662 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
4663 vT, name, nameIReg64orSP(rN));
4664 return True;
4665 }
4666
4667 /* ---------- LD1/ST1 (single structure, post index) ---------- */
4668 /* 31 23
sewardj7d009132014-02-20 17:43:38 +00004669 0100 1100 1001 1111 0111 11 N T ST1 {vT.2d}, [xN|SP], #16
4670 0100 1100 1101 1111 0111 11 N T LD1 {vT.2d}, [xN|SP], #16
4671 0100 1100 1001 1111 0111 10 N T ST1 {vT.4s}, [xN|SP], #16
4672 0100 1100 1101 1111 0111 10 N T LD1 {vT.4s}, [xN|SP], #16
4673 0100 1100 1001 1111 0111 01 N T ST1 {vT.8h}, [xN|SP], #16
4674 0100 1100 1101 1111 0111 01 N T LD1 {vT.8h}, [xN|SP], #16
4675 0100 1100 1001 1111 0111 00 N T ST1 {vT.16b}, [xN|SP], #16
sewardjf5b08912014-02-06 12:57:58 +00004676 0100 1100 1101 1111 0111 00 N T LD1 {vT.16b}, [xN|SP], #16
sewardj606c4ba2014-01-26 19:11:14 +00004677 Note that #16 is implied and cannot be any other value.
4678 FIXME does this assume that the host is little endian?
4679 */
sewardj7d009132014-02-20 17:43:38 +00004680 if ( (insn & 0xFFFFF000) == 0x4CDF7000 // LD1 cases
4681 || (insn & 0xFFFFF000) == 0x4C9F7000 // ST1 cases
sewardj606c4ba2014-01-26 19:11:14 +00004682 ) {
4683 Bool isLD = INSN(22,22) == 1;
4684 UInt rN = INSN(9,5);
4685 UInt vT = INSN(4,0);
4686 IRTemp tEA = newTemp(Ity_I64);
4687 const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4688 const HChar* name = names[INSN(11,10)];
4689 assign(tEA, getIReg64orSP(rN));
4690 if (rN == 31) { /* FIXME generate stack alignment check */ }
4691 if (isLD) {
4692 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4693 } else {
4694 storeLE(mkexpr(tEA), getQReg128(vT));
4695 }
4696 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(16)));
4697 DIP("%s {v%u.%s}, [%s], #16\n", isLD ? "ld1" : "st1",
4698 vT, name, nameIReg64orSP(rN));
4699 return True;
4700 }
4701
sewardj950ca7a2014-04-03 23:03:32 +00004702 /* 31 23
4703 0000 1100 1001 1111 0111 11 N T ST1 {vT.1d}, [xN|SP], #8
4704 0000 1100 1101 1111 0111 11 N T LD1 {vT.1d}, [xN|SP], #8
sewardj606c4ba2014-01-26 19:11:14 +00004705 0000 1100 1001 1111 0111 10 N T ST1 {vT.2s}, [xN|SP], #8
sewardj950ca7a2014-04-03 23:03:32 +00004706 0000 1100 1101 1111 0111 10 N T LD1 {vT.2s}, [xN|SP], #8
sewardjf5b08912014-02-06 12:57:58 +00004707 0000 1100 1001 1111 0111 01 N T ST1 {vT.4h}, [xN|SP], #8
sewardj950ca7a2014-04-03 23:03:32 +00004708 0000 1100 1101 1111 0111 01 N T LD1 {vT.4h}, [xN|SP], #8
4709 0000 1100 1001 1111 0111 00 N T ST1 {vT.8b}, [xN|SP], #8
4710 0000 1100 1101 1111 0111 00 N T LD1 {vT.8b}, [xN|SP], #8
sewardj606c4ba2014-01-26 19:11:14 +00004711 Note that #8 is implied and cannot be any other value.
4712 FIXME does this assume that the host is little endian?
4713 */
sewardj950ca7a2014-04-03 23:03:32 +00004714 if ( (insn & 0xFFFFF000) == 0x0CDF7000 // LD1 cases
4715 || (insn & 0xFFFFF000) == 0x0C9F7000 // ST1 cases
sewardj606c4ba2014-01-26 19:11:14 +00004716 ) {
sewardj950ca7a2014-04-03 23:03:32 +00004717 Bool isLD = INSN(22,22) == 1;
sewardj606c4ba2014-01-26 19:11:14 +00004718 UInt rN = INSN(9,5);
4719 UInt vT = INSN(4,0);
4720 IRTemp tEA = newTemp(Ity_I64);
4721 const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4722 const HChar* name = names[INSN(11,10)];
4723 assign(tEA, getIReg64orSP(rN));
4724 if (rN == 31) { /* FIXME generate stack alignment check */ }
sewardj950ca7a2014-04-03 23:03:32 +00004725 if (isLD) {
4726 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4727 putQRegLane(vT, 1, mkU64(0));
4728 } else {
4729 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4730 }
sewardj606c4ba2014-01-26 19:11:14 +00004731 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(8)));
sewardj950ca7a2014-04-03 23:03:32 +00004732 DIP("%s {v%u.%s}, [%s], #8\n", isLD ? "ld1" : "st1",
4733 vT, name, nameIReg64orSP(rN));
4734 return True;
4735 }
4736
sewardj18bf5172014-06-14 18:05:30 +00004737 /* ---------- LD1R (single structure, replicate) ---------- */
4738 /* 31 29 22 20 15 11 9 4
4739 0q 001 1010 10 00000 110 0 sz n t LD1R Vt.T, [Xn|SP]
4740 0q 001 1011 10 m 110 0 sz n t LD1R Vt.T, [Xn|SP], #sz (m=11111)
4741 , Xm (m!=11111)
4742 */
4743 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
4744 && INSN(22,21) == BITS2(1,0) && INSN(15,12) == BITS4(1,1,0,0)) {
sewardjdf9d6d52014-06-27 10:43:22 +00004745 UInt bitQ = INSN(30,30);
sewardj18bf5172014-06-14 18:05:30 +00004746 Bool isPX = INSN(23,23) == 1;
4747 UInt mm = INSN(20,16);
4748 UInt sz = INSN(11,10);
4749 UInt nn = INSN(9,5);
4750 UInt tt = INSN(4,0);
4751 IRType ty = integerIRTypeOfSize(1 << sz);
4752 IRTemp tEA = newTemp(Ity_I64);
4753 assign(tEA, getIReg64orSP(nn));
4754 if (nn == 31) { /* FIXME generate stack alignment check */ }
4755 IRTemp loaded = newTemp(ty);
4756 assign(loaded, loadLE(ty, mkexpr(tEA)));
4757 IRTemp dupd = math_DUP_TO_V128(loaded, ty);
sewardjdf9d6d52014-06-27 10:43:22 +00004758 putQReg128(tt, math_MAYBE_ZERO_HI64(bitQ, dupd));
4759 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
sewardj18bf5172014-06-14 18:05:30 +00004760 /* Deal with the writeback, if any. */
4761 if (!isPX && mm == BITS5(0,0,0,0,0)) {
4762 /* No writeback. */
4763 DIP("ld1r v%u.%s, [%s]\n", tt, arr, nameIReg64orSP(nn));
4764 return True;
4765 }
4766 if (isPX) {
4767 putIReg64orSP(nn, binop(Iop_Add64, mkexpr(tEA),
4768 mm == BITS5(1,1,1,1,1) ? mkU64(1 << sz)
4769 : getIReg64orZR(mm)));
4770 if (mm == BITS5(1,1,1,1,1)) {
4771 DIP("ld1r v%u.%s, [%s], %s\n", tt, arr,
4772 nameIReg64orSP(nn), nameIReg64orZR(mm));
4773 } else {
4774 DIP("ld1r v%u.%s, [%s], #%u\n", tt, arr,
4775 nameIReg64orSP(nn), 1 << sz);
4776 }
4777 return True;
4778 }
4779 return False;
4780 }
4781
sewardj168c8bd2014-06-25 13:05:23 +00004782 /* -------- LD2/ST2 (multi 2-elem structs, 2 regs, post index) -------- */
sewardj950ca7a2014-04-03 23:03:32 +00004783 /* Only a very few cases. */
4784 /* 31 23 11 9 4
4785 0100 1100 1101 1111 1000 11 n t LD2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4786 0100 1100 1001 1111 1000 11 n t ST2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4787 0100 1100 1101 1111 1000 10 n t LD2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4788 0100 1100 1001 1111 1000 10 n t ST2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4789 */
4790 if ( (insn & 0xFFFFFC00) == 0x4CDF8C00 // LD2 .2d
4791 || (insn & 0xFFFFFC00) == 0x4C9F8C00 // ST2 .2d
4792 || (insn & 0xFFFFFC00) == 0x4CDF8800 // LD2 .4s
4793 || (insn & 0xFFFFFC00) == 0x4C9F8800 // ST2 .4s
4794 ) {
4795 Bool isLD = INSN(22,22) == 1;
4796 UInt rN = INSN(9,5);
4797 UInt vT = INSN(4,0);
4798 IRTemp tEA = newTemp(Ity_I64);
4799 UInt sz = INSN(11,10);
4800 const HChar* name = "??";
4801 assign(tEA, getIReg64orSP(rN));
4802 if (rN == 31) { /* FIXME generate stack alignment check */ }
4803 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4804 IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8));
4805 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4806 IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24));
4807 if (sz == BITS2(1,1)) {
4808 name = "2d";
4809 if (isLD) {
4810 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0));
4811 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_16));
4812 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8));
4813 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_24));
4814 } else {
4815 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64));
4816 storeLE(tEA_16, getQRegLane((vT+0) % 32, 1, Ity_I64));
4817 storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64));
4818 storeLE(tEA_24, getQRegLane((vT+1) % 32, 1, Ity_I64));
4819 }
4820 }
4821 else if (sz == BITS2(1,0)) {
4822 /* Uh, this is ugly. TODO: better. */
4823 name = "4s";
4824 IRExpr* tEA_4 = binop(Iop_Add64, mkexpr(tEA), mkU64(4));
4825 IRExpr* tEA_12 = binop(Iop_Add64, mkexpr(tEA), mkU64(12));
4826 IRExpr* tEA_20 = binop(Iop_Add64, mkexpr(tEA), mkU64(20));
4827 IRExpr* tEA_28 = binop(Iop_Add64, mkexpr(tEA), mkU64(28));
4828 if (isLD) {
4829 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I32, tEA_0));
4830 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I32, tEA_8));
4831 putQRegLane((vT+0) % 32, 2, loadLE(Ity_I32, tEA_16));
4832 putQRegLane((vT+0) % 32, 3, loadLE(Ity_I32, tEA_24));
4833 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I32, tEA_4));
4834 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I32, tEA_12));
4835 putQRegLane((vT+1) % 32, 2, loadLE(Ity_I32, tEA_20));
4836 putQRegLane((vT+1) % 32, 3, loadLE(Ity_I32, tEA_28));
4837 } else {
4838 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I32));
4839 storeLE(tEA_8, getQRegLane((vT+0) % 32, 1, Ity_I32));
4840 storeLE(tEA_16, getQRegLane((vT+0) % 32, 2, Ity_I32));
4841 storeLE(tEA_24, getQRegLane((vT+0) % 32, 3, Ity_I32));
4842 storeLE(tEA_4, getQRegLane((vT+1) % 32, 0, Ity_I32));
4843 storeLE(tEA_12, getQRegLane((vT+1) % 32, 1, Ity_I32));
4844 storeLE(tEA_20, getQRegLane((vT+1) % 32, 2, Ity_I32));
4845 storeLE(tEA_28, getQRegLane((vT+1) % 32, 3, Ity_I32));
4846 }
4847 }
4848 else {
4849 vassert(0); // Can't happen.
4850 }
4851 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32)));
4852 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld2" : "st2",
4853 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
4854 return True;
4855 }
4856
sewardj39f754d2014-06-24 10:26:52 +00004857 /* -------- LD1/ST1 (multi 1-elem structs, 2 regs, no offset) -------- */
sewardj950ca7a2014-04-03 23:03:32 +00004858 /* Only a very few cases. */
4859 /* 31 23
4860 0100 1100 0100 0000 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4861 0100 1100 0000 0000 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4862 */
4863 if ( (insn & 0xFFFFFC00) == 0x4C40A000 // LD1
4864 || (insn & 0xFFFFFC00) == 0x4C00A000 // ST1
4865 ) {
4866 Bool isLD = INSN(22,22) == 1;
4867 UInt rN = INSN(9,5);
4868 UInt vT = INSN(4,0);
4869 IRTemp tEA = newTemp(Ity_I64);
4870 const HChar* name = "16b";
4871 assign(tEA, getIReg64orSP(rN));
4872 if (rN == 31) { /* FIXME generate stack alignment check */ }
4873 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4874 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4875 if (isLD) {
4876 putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
4877 putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
4878 } else {
4879 storeLE(tEA_0, getQReg128((vT+0) % 32));
4880 storeLE(tEA_16, getQReg128((vT+1) % 32));
4881 }
sewardj8a5ed542014-07-15 11:08:42 +00004882 DIP("%s {v%u.%s, v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
4883 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
4884 return True;
4885 }
4886
4887 /* -------- LD1/ST1 (multi 1-elem structs, 2 regs, post index) -------- */
4888 /* Only a very few cases. */
4889 /* 31 23
4890 0100 1100 1101 1111 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP], #32
4891 0100 1100 1001 1111 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP], #32
4892 */
4893 if ( (insn & 0xFFFFFC00) == 0x4CDFA000 // LD1
4894 || (insn & 0xFFFFFC00) == 0x4C9FA000 // ST1
4895 ) {
4896 Bool isLD = INSN(22,22) == 1;
4897 UInt rN = INSN(9,5);
4898 UInt vT = INSN(4,0);
4899 IRTemp tEA = newTemp(Ity_I64);
4900 const HChar* name = "16b";
4901 assign(tEA, getIReg64orSP(rN));
4902 if (rN == 31) { /* FIXME generate stack alignment check */ }
4903 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4904 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4905 if (isLD) {
4906 putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
4907 putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
4908 } else {
4909 storeLE(tEA_0, getQReg128((vT+0) % 32));
4910 storeLE(tEA_16, getQReg128((vT+1) % 32));
4911 }
4912 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32)));
sewardj950ca7a2014-04-03 23:03:32 +00004913 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld1" : "st1",
4914 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
sewardj606c4ba2014-01-26 19:11:14 +00004915 return True;
4916 }
4917
sewardj39f754d2014-06-24 10:26:52 +00004918 /* -------- LD1/ST1 (multi 1-elem structs, 3 regs, no offset) -------- */
4919 /* Only a very few cases. */
4920 /* 31 23
4921 0100 1100 0100 0000 0110 00 n t LD1 {Vt.16b .. V(t+2)%32.16b}, [Xn|SP]
4922 0100 1100 0000 0000 0110 00 n t ST1 {Vt.16b .. V(t+2)%32.16b}, [Xn|SP]
4923 */
4924 if ( (insn & 0xFFFFFC00) == 0x4C406000 // LD1
4925 || (insn & 0xFFFFFC00) == 0x4C006000 // ST1
4926 ) {
4927 Bool isLD = INSN(22,22) == 1;
4928 UInt rN = INSN(9,5);
4929 UInt vT = INSN(4,0);
4930 IRTemp tEA = newTemp(Ity_I64);
4931 const HChar* name = "16b";
4932 assign(tEA, getIReg64orSP(rN));
4933 if (rN == 31) { /* FIXME generate stack alignment check */ }
4934 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4935 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4936 IRExpr* tEA_32 = binop(Iop_Add64, mkexpr(tEA), mkU64(32));
4937 if (isLD) {
4938 putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
4939 putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
4940 putQReg128((vT+2) % 32, loadLE(Ity_V128, tEA_32));
4941 } else {
4942 storeLE(tEA_0, getQReg128((vT+0) % 32));
4943 storeLE(tEA_16, getQReg128((vT+1) % 32));
4944 storeLE(tEA_32, getQReg128((vT+2) % 32));
4945 }
4946 DIP("%s {v%u.%s, v%u.%s, v%u.%s}, [%s], #32\n",
4947 isLD ? "ld1" : "st1",
4948 (vT+0) % 32, name, (vT+1) % 32, name, (vT+2) % 32, name,
4949 nameIReg64orSP(rN));
4950 return True;
4951 }
4952
sewardj168c8bd2014-06-25 13:05:23 +00004953 /* -------- LD3/ST3 (multi 3-elem structs, 3 regs, post index) -------- */
4954 /* Only a very few cases. */
4955 /* 31 23 11 9 4
4956 0100 1100 1101 1111 0100 11 n t LD3 {Vt.2d .. V(t+2)%32.2d}, [Xn|SP], #48
4957 0100 1100 1001 1111 0100 11 n t ST3 {Vt.2d .. V(t+2)%32.2d}, [Xn|SP], #48
4958 */
4959 if ( (insn & 0xFFFFFC00) == 0x4CDF4C00 // LD3 .2d
4960 || (insn & 0xFFFFFC00) == 0x4C9F4C00 // ST3 .2d
4961 ) {
4962 Bool isLD = INSN(22,22) == 1;
4963 UInt rN = INSN(9,5);
4964 UInt vT = INSN(4,0);
4965 IRTemp tEA = newTemp(Ity_I64);
4966 UInt sz = INSN(11,10);
4967 const HChar* name = "??";
4968 assign(tEA, getIReg64orSP(rN));
4969 if (rN == 31) { /* FIXME generate stack alignment check */ }
4970 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4971 IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8));
4972 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4973 IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24));
4974 IRExpr* tEA_32 = binop(Iop_Add64, mkexpr(tEA), mkU64(32));
4975 IRExpr* tEA_40 = binop(Iop_Add64, mkexpr(tEA), mkU64(40));
4976 if (sz == BITS2(1,1)) {
4977 name = "2d";
4978 if (isLD) {
4979 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0));
4980 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_24));
4981 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8));
4982 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_32));
4983 putQRegLane((vT+2) % 32, 0, loadLE(Ity_I64, tEA_16));
4984 putQRegLane((vT+2) % 32, 1, loadLE(Ity_I64, tEA_40));
4985 } else {
4986 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64));
4987 storeLE(tEA_24, getQRegLane((vT+0) % 32, 1, Ity_I64));
4988 storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64));
4989 storeLE(tEA_32, getQRegLane((vT+1) % 32, 1, Ity_I64));
4990 storeLE(tEA_16, getQRegLane((vT+2) % 32, 0, Ity_I64));
4991 storeLE(tEA_40, getQRegLane((vT+2) % 32, 1, Ity_I64));
4992 }
4993 }
4994 else {
4995 vassert(0); // Can't happen.
4996 }
4997 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(48)));
4998 DIP("%s {v%u.%s, v%u.%s, v%u.%s}, [%s], #32\n",
4999 isLD ? "ld3" : "st3",
5000 (vT+0) % 32, name, (vT+1) % 32, name, (vT+2) % 32, name,
5001 nameIReg64orSP(rN));
5002 return True;
5003 }
5004
sewardj7d009132014-02-20 17:43:38 +00005005 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
5006 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
5007 /* 31 29 23 20 14 9 4
5008 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
5009 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
5010 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
5011 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
sewardjbbcf1882014-01-12 12:49:10 +00005012 */
sewardj7d009132014-02-20 17:43:38 +00005013 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
5014 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
5015 && INSN(14,10) == BITS5(1,1,1,1,1)) {
sewardjdc9259c2014-02-27 11:10:19 +00005016 UInt szBlg2 = INSN(31,30);
5017 Bool isLD = INSN(22,22) == 1;
5018 Bool isAcqOrRel = INSN(15,15) == 1;
5019 UInt ss = INSN(20,16);
5020 UInt nn = INSN(9,5);
5021 UInt tt = INSN(4,0);
sewardjbbcf1882014-01-12 12:49:10 +00005022
sewardjdc9259c2014-02-27 11:10:19 +00005023 vassert(szBlg2 < 4);
5024 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
5025 IRType ty = integerIRTypeOfSize(szB);
5026 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
sewardj7d009132014-02-20 17:43:38 +00005027
sewardjdc9259c2014-02-27 11:10:19 +00005028 IRTemp ea = newTemp(Ity_I64);
5029 assign(ea, getIReg64orSP(nn));
5030 /* FIXME generate check that ea is szB-aligned */
sewardj7d009132014-02-20 17:43:38 +00005031
sewardjdc9259c2014-02-27 11:10:19 +00005032 if (isLD && ss == BITS5(1,1,1,1,1)) {
5033 IRTemp res = newTemp(ty);
5034 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
5035 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
5036 if (isAcqOrRel) {
5037 stmt(IRStmt_MBE(Imbe_Fence));
5038 }
5039 DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
5040 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
5041 return True;
5042 }
5043 if (!isLD) {
5044 if (isAcqOrRel) {
5045 stmt(IRStmt_MBE(Imbe_Fence));
5046 }
5047 IRTemp res = newTemp(Ity_I1);
5048 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
5049 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
5050 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
5051 Need to set rS to 1 on failure, 0 on success. */
5052 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
5053 mkU64(1)));
5054 DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
5055 nameIRegOrZR(False, ss),
5056 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
5057 return True;
5058 }
5059 /* else fall through */
5060 }
5061
5062 /* ------------------ LDA{R,RH,RB} ------------------ */
5063 /* ------------------ STL{R,RH,RB} ------------------ */
5064 /* 31 29 23 20 14 9 4
5065 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
5066 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
5067 */
5068 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
5069 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
5070 UInt szBlg2 = INSN(31,30);
5071 Bool isLD = INSN(22,22) == 1;
5072 UInt nn = INSN(9,5);
5073 UInt tt = INSN(4,0);
5074
5075 vassert(szBlg2 < 4);
5076 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
5077 IRType ty = integerIRTypeOfSize(szB);
5078 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
5079
5080 IRTemp ea = newTemp(Ity_I64);
5081 assign(ea, getIReg64orSP(nn));
5082 /* FIXME generate check that ea is szB-aligned */
5083
5084 if (isLD) {
5085 IRTemp res = newTemp(ty);
5086 assign(res, loadLE(ty, mkexpr(ea)));
5087 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
5088 stmt(IRStmt_MBE(Imbe_Fence));
5089 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
5090 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
5091 } else {
5092 stmt(IRStmt_MBE(Imbe_Fence));
5093 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
5094 storeLE(mkexpr(ea), data);
5095 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
5096 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
5097 }
5098 return True;
sewardjbbcf1882014-01-12 12:49:10 +00005099 }
5100
5101 vex_printf("ARM64 front end: load_store\n");
5102 return False;
5103# undef INSN
5104}
5105
5106
5107/*------------------------------------------------------------*/
5108/*--- Control flow and misc instructions ---*/
5109/*------------------------------------------------------------*/
5110
5111static
sewardj65902992014-05-03 21:20:56 +00005112Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
5113 VexArchInfo* archinfo)
sewardjbbcf1882014-01-12 12:49:10 +00005114{
5115# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
5116
5117 /* ---------------------- B cond ----------------------- */
5118 /* 31 24 4 3
5119 0101010 0 imm19 0 cond */
5120 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
5121 UInt cond = INSN(3,0);
5122 ULong uimm64 = INSN(23,5) << 2;
5123 Long simm64 = (Long)sx_to_64(uimm64, 21);
5124 vassert(dres->whatNext == Dis_Continue);
5125 vassert(dres->len == 4);
5126 vassert(dres->continueAt == 0);
5127 vassert(dres->jk_StopHere == Ijk_INVALID);
5128 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
5129 Ijk_Boring,
5130 IRConst_U64(guest_PC_curr_instr + simm64),
5131 OFFB_PC) );
5132 putPC(mkU64(guest_PC_curr_instr + 4));
5133 dres->whatNext = Dis_StopHere;
5134 dres->jk_StopHere = Ijk_Boring;
5135 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
5136 return True;
5137 }
5138
5139 /* -------------------- B{L} uncond -------------------- */
5140 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
5141 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
5142 100101 imm26 B (PC + sxTo64(imm26 << 2))
5143 */
5144 UInt bLink = INSN(31,31);
5145 ULong uimm64 = INSN(25,0) << 2;
5146 Long simm64 = (Long)sx_to_64(uimm64, 28);
5147 if (bLink) {
5148 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
5149 }
5150 putPC(mkU64(guest_PC_curr_instr + simm64));
5151 dres->whatNext = Dis_StopHere;
5152 dres->jk_StopHere = Ijk_Call;
5153 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
5154 guest_PC_curr_instr + simm64);
5155 return True;
5156 }
5157
5158 /* --------------------- B{L} reg --------------------- */
5159 /* 31 24 22 20 15 9 4
5160 1101011 00 10 11111 000000 nn 00000 RET Rn
5161 1101011 00 01 11111 000000 nn 00000 CALL Rn
5162 1101011 00 00 11111 000000 nn 00000 JMP Rn
5163 */
5164 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
5165 && INSN(20,16) == BITS5(1,1,1,1,1)
5166 && INSN(15,10) == BITS6(0,0,0,0,0,0)
5167 && INSN(4,0) == BITS5(0,0,0,0,0)) {
5168 UInt branch_type = INSN(22,21);
5169 UInt nn = INSN(9,5);
5170 if (branch_type == BITS2(1,0) /* RET */) {
5171 putPC(getIReg64orZR(nn));
5172 dres->whatNext = Dis_StopHere;
5173 dres->jk_StopHere = Ijk_Ret;
5174 DIP("ret %s\n", nameIReg64orZR(nn));
5175 return True;
5176 }
5177 if (branch_type == BITS2(0,1) /* CALL */) {
sewardj702054e2014-05-07 11:09:28 +00005178 IRTemp dst = newTemp(Ity_I64);
5179 assign(dst, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005180 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
sewardj702054e2014-05-07 11:09:28 +00005181 putPC(mkexpr(dst));
sewardjbbcf1882014-01-12 12:49:10 +00005182 dres->whatNext = Dis_StopHere;
5183 dres->jk_StopHere = Ijk_Call;
5184 DIP("blr %s\n", nameIReg64orZR(nn));
5185 return True;
5186 }
5187 if (branch_type == BITS2(0,0) /* JMP */) {
5188 putPC(getIReg64orZR(nn));
5189 dres->whatNext = Dis_StopHere;
5190 dres->jk_StopHere = Ijk_Boring;
5191 DIP("jmp %s\n", nameIReg64orZR(nn));
5192 return True;
5193 }
5194 }
5195
5196 /* -------------------- CB{N}Z -------------------- */
5197 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
5198 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
5199 */
5200 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
5201 Bool is64 = INSN(31,31) == 1;
5202 Bool bIfZ = INSN(24,24) == 0;
5203 ULong uimm64 = INSN(23,5) << 2;
5204 UInt rT = INSN(4,0);
5205 Long simm64 = (Long)sx_to_64(uimm64, 21);
5206 IRExpr* cond = NULL;
5207 if (is64) {
5208 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
5209 getIReg64orZR(rT), mkU64(0));
5210 } else {
5211 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
5212 getIReg32orZR(rT), mkU32(0));
5213 }
5214 stmt( IRStmt_Exit(cond,
5215 Ijk_Boring,
5216 IRConst_U64(guest_PC_curr_instr + simm64),
5217 OFFB_PC) );
5218 putPC(mkU64(guest_PC_curr_instr + 4));
5219 dres->whatNext = Dis_StopHere;
5220 dres->jk_StopHere = Ijk_Boring;
5221 DIP("cb%sz %s, 0x%llx\n",
5222 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
5223 guest_PC_curr_instr + simm64);
5224 return True;
5225 }
5226
5227 /* -------------------- TB{N}Z -------------------- */
5228 /* 31 30 24 23 18 5 4
5229 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
5230 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
5231 */
5232 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
5233 UInt b5 = INSN(31,31);
5234 Bool bIfZ = INSN(24,24) == 0;
5235 UInt b40 = INSN(23,19);
5236 UInt imm14 = INSN(18,5);
5237 UInt tt = INSN(4,0);
5238 UInt bitNo = (b5 << 5) | b40;
5239 ULong uimm64 = imm14 << 2;
5240 Long simm64 = sx_to_64(uimm64, 16);
5241 IRExpr* cond
5242 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
5243 binop(Iop_And64,
5244 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
5245 mkU64(1)),
5246 mkU64(0));
5247 stmt( IRStmt_Exit(cond,
5248 Ijk_Boring,
5249 IRConst_U64(guest_PC_curr_instr + simm64),
5250 OFFB_PC) );
5251 putPC(mkU64(guest_PC_curr_instr + 4));
5252 dres->whatNext = Dis_StopHere;
5253 dres->jk_StopHere = Ijk_Boring;
5254 DIP("tb%sz %s, #%u, 0x%llx\n",
5255 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
5256 guest_PC_curr_instr + simm64);
5257 return True;
5258 }
5259
5260 /* -------------------- SVC -------------------- */
5261 /* 11010100 000 imm16 000 01
5262 Don't bother with anything except the imm16==0 case.
5263 */
5264 if (INSN(31,0) == 0xD4000001) {
5265 putPC(mkU64(guest_PC_curr_instr + 4));
5266 dres->whatNext = Dis_StopHere;
5267 dres->jk_StopHere = Ijk_Sys_syscall;
5268 DIP("svc #0\n");
5269 return True;
5270 }
5271
5272 /* ------------------ M{SR,RS} ------------------ */
sewardj6eb5ef82014-07-14 20:39:23 +00005273 /* ---- Cases for TPIDR_EL0 ----
sewardjbbcf1882014-01-12 12:49:10 +00005274 0xD51BD0 010 Rt MSR tpidr_el0, rT
5275 0xD53BD0 010 Rt MRS rT, tpidr_el0
5276 */
5277 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
5278 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
5279 Bool toSys = INSN(21,21) == 0;
5280 UInt tt = INSN(4,0);
5281 if (toSys) {
5282 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
5283 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
5284 } else {
5285 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
5286 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
5287 }
5288 return True;
5289 }
sewardj6eb5ef82014-07-14 20:39:23 +00005290 /* ---- Cases for FPCR ----
sewardjbbcf1882014-01-12 12:49:10 +00005291 0xD51B44 000 Rt MSR fpcr, rT
5292 0xD53B44 000 Rt MSR rT, fpcr
5293 */
5294 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
5295 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
5296 Bool toSys = INSN(21,21) == 0;
5297 UInt tt = INSN(4,0);
5298 if (toSys) {
5299 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
5300 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
5301 } else {
5302 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
5303 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
5304 }
5305 return True;
5306 }
sewardj6eb5ef82014-07-14 20:39:23 +00005307 /* ---- Cases for FPSR ----
sewardj7d009132014-02-20 17:43:38 +00005308 0xD51B44 001 Rt MSR fpsr, rT
5309 0xD53B44 001 Rt MSR rT, fpsr
sewardja0645d52014-06-28 22:11:16 +00005310 The only part of this we model is FPSR.QC. All other bits
5311 are ignored when writing to it and RAZ when reading from it.
sewardjbbcf1882014-01-12 12:49:10 +00005312 */
5313 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
5314 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
5315 Bool toSys = INSN(21,21) == 0;
5316 UInt tt = INSN(4,0);
5317 if (toSys) {
sewardja0645d52014-06-28 22:11:16 +00005318 /* Just deal with FPSR.QC. Make up a V128 value which is
5319 zero if Xt[27] is zero and any other value if Xt[27] is
5320 nonzero. */
5321 IRTemp qc64 = newTemp(Ity_I64);
5322 assign(qc64, binop(Iop_And64,
5323 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
5324 mkU64(1)));
5325 IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
5326 stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
sewardjbbcf1882014-01-12 12:49:10 +00005327 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
5328 } else {
sewardja0645d52014-06-28 22:11:16 +00005329 /* Generate a value which is all zeroes except for bit 27,
5330 which must be zero if QCFLAG is all zeroes and one otherwise. */
sewardj8e91fd42014-07-11 12:05:47 +00005331 IRTemp qcV128 = newTempV128();
sewardja0645d52014-06-28 22:11:16 +00005332 assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
5333 IRTemp qc64 = newTemp(Ity_I64);
5334 assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
5335 unop(Iop_V128to64, mkexpr(qcV128))));
5336 IRExpr* res = binop(Iop_Shl64,
5337 unop(Iop_1Uto64,
5338 binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
5339 mkU8(27));
5340 putIReg64orZR(tt, res);
sewardjbbcf1882014-01-12 12:49:10 +00005341 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
5342 }
5343 return True;
5344 }
sewardj6eb5ef82014-07-14 20:39:23 +00005345 /* ---- Cases for NZCV ----
sewardjbbcf1882014-01-12 12:49:10 +00005346 D51B42 000 Rt MSR nzcv, rT
5347 D53B42 000 Rt MRS rT, nzcv
sewardja0645d52014-06-28 22:11:16 +00005348 The only parts of NZCV that actually exist are bits 31:28, which
5349 are the N Z C and V bits themselves. Hence the flags thunk provides
5350 all the state we need.
sewardjbbcf1882014-01-12 12:49:10 +00005351 */
5352 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
5353 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
5354 Bool toSys = INSN(21,21) == 0;
5355 UInt tt = INSN(4,0);
5356 if (toSys) {
5357 IRTemp t = newTemp(Ity_I64);
5358 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
5359 setFlags_COPY(t);
5360 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
5361 } else {
5362 IRTemp res = newTemp(Ity_I64);
5363 assign(res, mk_arm64g_calculate_flags_nzcv());
5364 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
5365 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
5366 }
5367 return True;
5368 }
sewardj6eb5ef82014-07-14 20:39:23 +00005369 /* ---- Cases for DCZID_EL0 ----
sewardjd512d102014-02-21 14:49:44 +00005370 Don't support arbitrary reads and writes to this register. Just
5371 return the value 16, which indicates that the DC ZVA instruction
5372 is not permitted, so we don't have to emulate it.
5373 D5 3B 00 111 Rt MRS rT, dczid_el0
5374 */
5375 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
5376 UInt tt = INSN(4,0);
5377 putIReg64orZR(tt, mkU64(1<<4));
5378 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
5379 return True;
5380 }
sewardj6eb5ef82014-07-14 20:39:23 +00005381 /* ---- Cases for CTR_EL0 ----
sewardj65902992014-05-03 21:20:56 +00005382 We just handle reads, and make up a value from the D and I line
5383 sizes in the VexArchInfo we are given, and patch in the following
5384 fields that the Foundation model gives ("natively"):
5385 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
5386 D5 3B 00 001 Rt MRS rT, dczid_el0
5387 */
5388 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
5389 UInt tt = INSN(4,0);
5390 /* Need to generate a value from dMinLine_lg2_szB and
5391 dMinLine_lg2_szB. The value in the register is in 32-bit
5392 units, so need to subtract 2 from the values in the
5393 VexArchInfo. We can assume that the values here are valid --
5394 disInstr_ARM64 checks them -- so there's no need to deal with
5395 out-of-range cases. */
5396 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
5397 && archinfo->arm64_dMinLine_lg2_szB <= 17
5398 && archinfo->arm64_iMinLine_lg2_szB >= 2
5399 && archinfo->arm64_iMinLine_lg2_szB <= 17);
5400 UInt val
5401 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
5402 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
5403 putIReg64orZR(tt, mkU64(val));
5404 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
5405 return True;
5406 }
sewardj6eb5ef82014-07-14 20:39:23 +00005407 /* ---- Cases for CNTVCT_EL0 ----
5408 This is a timestamp counter of some sort. Support reads of it only
5409 by passing through to the host.
5410 D5 3B E0 010 Rt MRS Xt, cntvct_el0
5411 */
5412 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
5413 UInt tt = INSN(4,0);
5414 IRTemp val = newTemp(Ity_I64);
5415 IRExpr** args = mkIRExprVec_0();
5416 IRDirty* d = unsafeIRDirty_1_N (
5417 val,
5418 0/*regparms*/,
5419 "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
5420 &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
5421 args
5422 );
5423 /* execute the dirty call, dumping the result in val. */
5424 stmt( IRStmt_Dirty(d) );
5425 putIReg64orZR(tt, mkexpr(val));
5426 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
5427 return True;
5428 }
sewardjbbcf1882014-01-12 12:49:10 +00005429
sewardj65902992014-05-03 21:20:56 +00005430 /* ------------------ IC_IVAU ------------------ */
5431 /* D5 0B 75 001 Rt ic ivau, rT
5432 */
5433 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
5434 /* We will always be provided with a valid iMinLine value. */
5435 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
5436 && archinfo->arm64_iMinLine_lg2_szB <= 17);
5437 /* Round the requested address, in rT, down to the start of the
5438 containing block. */
5439 UInt tt = INSN(4,0);
5440 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
5441 IRTemp addr = newTemp(Ity_I64);
5442 assign( addr, binop( Iop_And64,
5443 getIReg64orZR(tt),
5444 mkU64(~(lineszB - 1))) );
5445 /* Set the invalidation range, request exit-and-invalidate, with
5446 continuation at the next instruction. */
sewardj05f5e012014-05-04 10:52:11 +00005447 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
5448 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj65902992014-05-03 21:20:56 +00005449 /* be paranoid ... */
5450 stmt( IRStmt_MBE(Imbe_Fence) );
5451 putPC(mkU64( guest_PC_curr_instr + 4 ));
5452 dres->whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +00005453 dres->jk_StopHere = Ijk_InvalICache;
sewardj65902992014-05-03 21:20:56 +00005454 DIP("ic ivau, %s\n", nameIReg64orZR(tt));
5455 return True;
5456 }
5457
5458 /* ------------------ DC_CVAU ------------------ */
5459 /* D5 0B 7B 001 Rt dc cvau, rT
5460 */
5461 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
5462 /* Exactly the same scheme as for IC IVAU, except we observe the
sewardj05f5e012014-05-04 10:52:11 +00005463 dMinLine size, and request an Ijk_FlushDCache instead of
5464 Ijk_InvalICache. */
sewardj65902992014-05-03 21:20:56 +00005465 /* We will always be provided with a valid dMinLine value. */
5466 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
5467 && archinfo->arm64_dMinLine_lg2_szB <= 17);
5468 /* Round the requested address, in rT, down to the start of the
5469 containing block. */
5470 UInt tt = INSN(4,0);
5471 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
5472 IRTemp addr = newTemp(Ity_I64);
5473 assign( addr, binop( Iop_And64,
5474 getIReg64orZR(tt),
5475 mkU64(~(lineszB - 1))) );
5476 /* Set the flush range, request exit-and-flush, with
5477 continuation at the next instruction. */
sewardj05f5e012014-05-04 10:52:11 +00005478 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
5479 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj65902992014-05-03 21:20:56 +00005480 /* be paranoid ... */
5481 stmt( IRStmt_MBE(Imbe_Fence) );
5482 putPC(mkU64( guest_PC_curr_instr + 4 ));
5483 dres->whatNext = Dis_StopHere;
5484 dres->jk_StopHere = Ijk_FlushDCache;
5485 DIP("dc cvau, %s\n", nameIReg64orZR(tt));
5486 return True;
5487 }
5488
5489 /* ------------------ ISB, DMB, DSB ------------------ */
sewardjbbcf1882014-01-12 12:49:10 +00005490 if (INSN(31,0) == 0xD5033FDF) {
sewardjd512d102014-02-21 14:49:44 +00005491 stmt(IRStmt_MBE(Imbe_Fence));
sewardjbbcf1882014-01-12 12:49:10 +00005492 DIP("isb\n");
5493 return True;
5494 }
5495 if (INSN(31,0) == 0xD5033BBF) {
sewardjd512d102014-02-21 14:49:44 +00005496 stmt(IRStmt_MBE(Imbe_Fence));
sewardjbbcf1882014-01-12 12:49:10 +00005497 DIP("dmb ish\n");
5498 return True;
5499 }
sewardjab102bd2014-06-04 11:44:45 +00005500 if (INSN(31,0) == 0xD5033ABF) {
5501 stmt(IRStmt_MBE(Imbe_Fence));
5502 DIP("dmb ishst\n");
5503 return True;
5504 }
sewardj65902992014-05-03 21:20:56 +00005505 if (INSN(31,0) == 0xD5033B9F) {
5506 stmt(IRStmt_MBE(Imbe_Fence));
5507 DIP("dsb ish\n");
5508 return True;
5509 }
sewardjbbcf1882014-01-12 12:49:10 +00005510
sewardjdc9259c2014-02-27 11:10:19 +00005511 /* -------------------- NOP -------------------- */
5512 if (INSN(31,0) == 0xD503201F) {
5513 DIP("nop\n");
5514 return True;
5515 }
5516
sewardjbbcf1882014-01-12 12:49:10 +00005517 //fail:
5518 vex_printf("ARM64 front end: branch_etc\n");
5519 return False;
5520# undef INSN
5521}
5522
5523
5524/*------------------------------------------------------------*/
sewardj8e91fd42014-07-11 12:05:47 +00005525/*--- SIMD and FP instructions: helper functions ---*/
sewardjbbcf1882014-01-12 12:49:10 +00005526/*------------------------------------------------------------*/
5527
sewardjd96daf62014-06-15 08:17:35 +00005528/* Some constructors for interleave/deinterleave expressions. */
sewardje520bb32014-02-17 11:00:53 +00005529
sewardjd96daf62014-06-15 08:17:35 +00005530static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
5531 // returns a0 b0
5532 return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
5533}
sewardje520bb32014-02-17 11:00:53 +00005534
sewardjd96daf62014-06-15 08:17:35 +00005535static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
5536 // returns a1 b1
5537 return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
5538}
sewardje520bb32014-02-17 11:00:53 +00005539
sewardjd96daf62014-06-15 08:17:35 +00005540static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
5541 // returns a2 a0 b2 b0
5542 return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
5543}
5544
5545static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
5546 // returns a3 a1 b3 b1
5547 return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
5548}
5549
5550static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
5551 // returns a1 b1 a0 b0
5552 return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
5553}
5554
5555static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
5556 // returns a3 b3 a2 b2
5557 return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
5558}
5559
5560static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
5561 // returns a6 a4 a2 a0 b6 b4 b2 b0
5562 return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
5563}
5564
5565static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
5566 // returns a7 a5 a3 a1 b7 b5 b3 b1
5567 return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
5568}
5569
5570static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
5571 // returns a3 b3 a2 b2 a1 b1 a0 b0
5572 return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
5573}
5574
5575static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
5576 // returns a7 b7 a6 b6 a5 b5 a4 b4
5577 return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
5578}
5579
5580static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
5581 IRTemp bFEDCBA9876543210 ) {
5582 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
5583 return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
5584 mkexpr(bFEDCBA9876543210));
5585}
5586
5587static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
5588 IRTemp bFEDCBA9876543210 ) {
5589 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
5590 return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
5591 mkexpr(bFEDCBA9876543210));
5592}
5593
5594static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
5595 IRTemp bFEDCBA9876543210 ) {
5596 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
5597 return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
5598 mkexpr(bFEDCBA9876543210));
5599}
5600
5601static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
5602 IRTemp bFEDCBA9876543210 ) {
5603 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
5604 return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
5605 mkexpr(bFEDCBA9876543210));
5606}
sewardjecde6972014-02-05 11:01:19 +00005607
sewardjbbcf1882014-01-12 12:49:10 +00005608/* Generate N copies of |bit| in the bottom of a ULong. */
5609static ULong Replicate ( ULong bit, Int N )
5610{
sewardj606c4ba2014-01-26 19:11:14 +00005611 vassert(bit <= 1 && N >= 1 && N < 64);
5612 if (bit == 0) {
5613 return 0;
5614 } else {
5615 /* Careful. This won't work for N == 64. */
5616 return (1ULL << N) - 1;
5617 }
sewardjbbcf1882014-01-12 12:49:10 +00005618}
5619
sewardjfab09142014-02-10 10:28:13 +00005620static ULong Replicate32x2 ( ULong bits32 )
5621{
5622 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
5623 return (bits32 << 32) | bits32;
5624}
5625
5626static ULong Replicate16x4 ( ULong bits16 )
5627{
5628 vassert(0 == (bits16 & ~0xFFFFULL));
5629 return Replicate32x2((bits16 << 16) | bits16);
5630}
5631
5632static ULong Replicate8x8 ( ULong bits8 )
5633{
5634 vassert(0 == (bits8 & ~0xFFULL));
5635 return Replicate16x4((bits8 << 8) | bits8);
5636}
5637
5638/* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
5639 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
5640 is 64. In the former case, the upper 32 bits of the returned value
5641 are guaranteed to be zero. */
sewardjbbcf1882014-01-12 12:49:10 +00005642static ULong VFPExpandImm ( ULong imm8, Int N )
5643{
sewardj606c4ba2014-01-26 19:11:14 +00005644 vassert(imm8 <= 0xFF);
5645 vassert(N == 32 || N == 64);
5646 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
5647 Int F = N - E - 1;
5648 ULong imm8_6 = (imm8 >> 6) & 1;
5649 /* sign: 1 bit */
5650 /* exp: E bits */
5651 /* frac: F bits */
5652 ULong sign = (imm8 >> 7) & 1;
5653 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
5654 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
5655 vassert(sign < (1ULL << 1));
5656 vassert(exp < (1ULL << E));
5657 vassert(frac < (1ULL << F));
5658 vassert(1 + E + F == N);
5659 ULong res = (sign << (E+F)) | (exp << F) | frac;
5660 return res;
sewardjbbcf1882014-01-12 12:49:10 +00005661}
5662
sewardjfab09142014-02-10 10:28:13 +00005663/* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
5664 This might fail, as indicated by the returned Bool. Page 2530 of
5665 the manual. */
5666static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
5667 UInt op, UInt cmode, UInt imm8 )
5668{
5669 vassert(op <= 1);
5670 vassert(cmode <= 15);
5671 vassert(imm8 <= 255);
5672
5673 *res = 0; /* will overwrite iff returning True */
5674
5675 ULong imm64 = 0;
5676 Bool testimm8 = False;
5677
5678 switch (cmode >> 1) {
5679 case 0:
5680 testimm8 = False; imm64 = Replicate32x2(imm8); break;
5681 case 1:
5682 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
5683 case 2:
5684 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
5685 case 3:
5686 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
5687 case 4:
5688 testimm8 = False; imm64 = Replicate16x4(imm8); break;
5689 case 5:
5690 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
5691 case 6:
5692 testimm8 = True;
5693 if ((cmode & 1) == 0)
5694 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
5695 else
5696 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
5697 break;
5698 case 7:
5699 testimm8 = False;
5700 if ((cmode & 1) == 0 && op == 0)
5701 imm64 = Replicate8x8(imm8);
5702 if ((cmode & 1) == 0 && op == 1) {
5703 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
5704 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
5705 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
5706 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
5707 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
5708 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
5709 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
5710 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
5711 }
5712 if ((cmode & 1) == 1 && op == 0) {
5713 ULong imm8_7 = (imm8 >> 7) & 1;
5714 ULong imm8_6 = (imm8 >> 6) & 1;
5715 ULong imm8_50 = imm8 & 63;
5716 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
5717 | ((imm8_6 ^ 1) << (5 + 6 + 19))
5718 | (Replicate(imm8_6, 5) << (6 + 19))
5719 | (imm8_50 << 19);
5720 imm64 = Replicate32x2(imm32);
5721 }
5722 if ((cmode & 1) == 1 && op == 1) {
5723 // imm64 = imm8<7>:NOT(imm8<6>)
5724 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
5725 ULong imm8_7 = (imm8 >> 7) & 1;
5726 ULong imm8_6 = (imm8 >> 6) & 1;
5727 ULong imm8_50 = imm8 & 63;
5728 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
5729 | (Replicate(imm8_6, 8) << 54)
5730 | (imm8_50 << 48);
5731 }
5732 break;
5733 default:
5734 vassert(0);
5735 }
5736
5737 if (testimm8 && imm8 == 0)
5738 return False;
5739
5740 *res = imm64;
5741 return True;
5742}
5743
sewardj606c4ba2014-01-26 19:11:14 +00005744/* Help a bit for decoding laneage for vector operations that can be
5745 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
5746 and SZ bits, typically for vector floating point. */
5747static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
5748 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
5749 /*OUT*/const HChar** arrSpec,
5750 Bool bitQ, Bool bitSZ )
5751{
5752 vassert(bitQ == True || bitQ == False);
5753 vassert(bitSZ == True || bitSZ == False);
5754 if (bitQ && bitSZ) { // 2x64
5755 if (tyI) *tyI = Ity_I64;
5756 if (tyF) *tyF = Ity_F64;
5757 if (nLanes) *nLanes = 2;
5758 if (zeroUpper) *zeroUpper = False;
5759 if (arrSpec) *arrSpec = "2d";
5760 return True;
5761 }
5762 if (bitQ && !bitSZ) { // 4x32
5763 if (tyI) *tyI = Ity_I32;
5764 if (tyF) *tyF = Ity_F32;
5765 if (nLanes) *nLanes = 4;
5766 if (zeroUpper) *zeroUpper = False;
5767 if (arrSpec) *arrSpec = "4s";
5768 return True;
5769 }
5770 if (!bitQ && !bitSZ) { // 2x32
5771 if (tyI) *tyI = Ity_I32;
5772 if (tyF) *tyF = Ity_F32;
5773 if (nLanes) *nLanes = 2;
5774 if (zeroUpper) *zeroUpper = True;
5775 if (arrSpec) *arrSpec = "2s";
5776 return True;
5777 }
5778 // Else impliedly 1x64, which isn't allowed.
5779 return False;
5780}
5781
sewardje520bb32014-02-17 11:00:53 +00005782/* Helper for decoding laneage for shift-style vector operations
5783 that involve an immediate shift amount. */
5784static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
5785 UInt immh, UInt immb )
5786{
5787 vassert(immh < (1<<4));
5788 vassert(immb < (1<<3));
5789 UInt immhb = (immh << 3) | immb;
5790 if (immh & 8) {
5791 if (shift) *shift = 128 - immhb;
5792 if (szBlg2) *szBlg2 = 3;
5793 return True;
5794 }
5795 if (immh & 4) {
5796 if (shift) *shift = 64 - immhb;
5797 if (szBlg2) *szBlg2 = 2;
5798 return True;
5799 }
5800 if (immh & 2) {
5801 if (shift) *shift = 32 - immhb;
5802 if (szBlg2) *szBlg2 = 1;
5803 return True;
5804 }
5805 if (immh & 1) {
5806 if (shift) *shift = 16 - immhb;
5807 if (szBlg2) *szBlg2 = 0;
5808 return True;
5809 }
5810 return False;
5811}
5812
sewardjecde6972014-02-05 11:01:19 +00005813/* Generate IR to fold all lanes of the V128 value in 'src' as
5814 characterised by the operator 'op', and return the result in the
5815 bottom bits of a V128, with all other bits set to zero. */
sewardjdf9d6d52014-06-27 10:43:22 +00005816static IRTemp math_FOLDV ( IRTemp src, IROp op )
sewardjecde6972014-02-05 11:01:19 +00005817{
5818 /* The basic idea is to use repeated applications of Iop_CatEven*
5819 and Iop_CatOdd* operators to 'src' so as to clone each lane into
5820 a complete vector. Then fold all those vectors with 'op' and
5821 zero out all but the least significant lane. */
5822 switch (op) {
5823 case Iop_Min8Sx16: case Iop_Min8Ux16:
sewardjb9aff1e2014-06-15 21:55:33 +00005824 case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
sewardjfab09142014-02-10 10:28:13 +00005825 /* NB: temp naming here is misleading -- the naming is for 8
5826 lanes of 16 bit, whereas what is being operated on is 16
5827 lanes of 8 bits. */
5828 IRTemp x76543210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00005829 IRTemp x76547654 = newTempV128();
5830 IRTemp x32103210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005831 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
5832 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
sewardj8e91fd42014-07-11 12:05:47 +00005833 IRTemp x76767676 = newTempV128();
5834 IRTemp x54545454 = newTempV128();
5835 IRTemp x32323232 = newTempV128();
5836 IRTemp x10101010 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005837 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
5838 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
5839 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
5840 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
sewardj8e91fd42014-07-11 12:05:47 +00005841 IRTemp x77777777 = newTempV128();
5842 IRTemp x66666666 = newTempV128();
5843 IRTemp x55555555 = newTempV128();
5844 IRTemp x44444444 = newTempV128();
5845 IRTemp x33333333 = newTempV128();
5846 IRTemp x22222222 = newTempV128();
5847 IRTemp x11111111 = newTempV128();
5848 IRTemp x00000000 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005849 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
5850 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
5851 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
5852 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
5853 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
5854 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
5855 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
5856 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
5857 /* Naming not misleading after here. */
sewardj8e91fd42014-07-11 12:05:47 +00005858 IRTemp xAllF = newTempV128();
5859 IRTemp xAllE = newTempV128();
5860 IRTemp xAllD = newTempV128();
5861 IRTemp xAllC = newTempV128();
5862 IRTemp xAllB = newTempV128();
5863 IRTemp xAllA = newTempV128();
5864 IRTemp xAll9 = newTempV128();
5865 IRTemp xAll8 = newTempV128();
5866 IRTemp xAll7 = newTempV128();
5867 IRTemp xAll6 = newTempV128();
5868 IRTemp xAll5 = newTempV128();
5869 IRTemp xAll4 = newTempV128();
5870 IRTemp xAll3 = newTempV128();
5871 IRTemp xAll2 = newTempV128();
5872 IRTemp xAll1 = newTempV128();
5873 IRTemp xAll0 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005874 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
5875 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
5876 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
5877 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
5878 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
5879 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
5880 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
5881 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
5882 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
5883 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
5884 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
5885 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
5886 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
5887 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
5888 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
5889 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
sewardj8e91fd42014-07-11 12:05:47 +00005890 IRTemp maxFE = newTempV128();
5891 IRTemp maxDC = newTempV128();
5892 IRTemp maxBA = newTempV128();
5893 IRTemp max98 = newTempV128();
5894 IRTemp max76 = newTempV128();
5895 IRTemp max54 = newTempV128();
5896 IRTemp max32 = newTempV128();
5897 IRTemp max10 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005898 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
5899 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
5900 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
5901 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
5902 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
5903 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
5904 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
5905 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
sewardj8e91fd42014-07-11 12:05:47 +00005906 IRTemp maxFEDC = newTempV128();
5907 IRTemp maxBA98 = newTempV128();
5908 IRTemp max7654 = newTempV128();
5909 IRTemp max3210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005910 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
5911 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
5912 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
5913 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00005914 IRTemp maxFEDCBA98 = newTempV128();
5915 IRTemp max76543210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005916 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
5917 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
sewardj8e91fd42014-07-11 12:05:47 +00005918 IRTemp maxAllLanes = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005919 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
5920 mkexpr(max76543210)));
sewardj8e91fd42014-07-11 12:05:47 +00005921 IRTemp res = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005922 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
5923 return res;
sewardjecde6972014-02-05 11:01:19 +00005924 }
5925 case Iop_Min16Sx8: case Iop_Min16Ux8:
sewardjb9aff1e2014-06-15 21:55:33 +00005926 case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
sewardjecde6972014-02-05 11:01:19 +00005927 IRTemp x76543210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00005928 IRTemp x76547654 = newTempV128();
5929 IRTemp x32103210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005930 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
5931 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
sewardj8e91fd42014-07-11 12:05:47 +00005932 IRTemp x76767676 = newTempV128();
5933 IRTemp x54545454 = newTempV128();
5934 IRTemp x32323232 = newTempV128();
5935 IRTemp x10101010 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005936 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
5937 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
5938 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
5939 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
sewardj8e91fd42014-07-11 12:05:47 +00005940 IRTemp x77777777 = newTempV128();
5941 IRTemp x66666666 = newTempV128();
5942 IRTemp x55555555 = newTempV128();
5943 IRTemp x44444444 = newTempV128();
5944 IRTemp x33333333 = newTempV128();
5945 IRTemp x22222222 = newTempV128();
5946 IRTemp x11111111 = newTempV128();
5947 IRTemp x00000000 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005948 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
5949 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
5950 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
5951 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
5952 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
5953 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
5954 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
5955 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
sewardj8e91fd42014-07-11 12:05:47 +00005956 IRTemp max76 = newTempV128();
5957 IRTemp max54 = newTempV128();
5958 IRTemp max32 = newTempV128();
5959 IRTemp max10 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005960 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
5961 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
5962 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
5963 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
sewardj8e91fd42014-07-11 12:05:47 +00005964 IRTemp max7654 = newTempV128();
5965 IRTemp max3210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005966 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
5967 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00005968 IRTemp max76543210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005969 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
sewardj8e91fd42014-07-11 12:05:47 +00005970 IRTemp res = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005971 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
5972 return res;
5973 }
5974 case Iop_Min32Sx4: case Iop_Min32Ux4:
sewardjb9aff1e2014-06-15 21:55:33 +00005975 case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
sewardjecde6972014-02-05 11:01:19 +00005976 IRTemp x3210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00005977 IRTemp x3232 = newTempV128();
5978 IRTemp x1010 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005979 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
5980 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
sewardj8e91fd42014-07-11 12:05:47 +00005981 IRTemp x3333 = newTempV128();
5982 IRTemp x2222 = newTempV128();
5983 IRTemp x1111 = newTempV128();
5984 IRTemp x0000 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005985 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
5986 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
5987 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
5988 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
sewardj8e91fd42014-07-11 12:05:47 +00005989 IRTemp max32 = newTempV128();
5990 IRTemp max10 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005991 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
5992 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
sewardj8e91fd42014-07-11 12:05:47 +00005993 IRTemp max3210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005994 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00005995 IRTemp res = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005996 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
5997 return res;
5998 }
sewardja5a6b752014-06-30 07:33:56 +00005999 case Iop_Add64x2: {
6000 IRTemp x10 = src;
sewardj8e91fd42014-07-11 12:05:47 +00006001 IRTemp x00 = newTempV128();
6002 IRTemp x11 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006003 assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
6004 assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
sewardj8e91fd42014-07-11 12:05:47 +00006005 IRTemp max10 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006006 assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
sewardj8e91fd42014-07-11 12:05:47 +00006007 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006008 assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
6009 return res;
6010 }
sewardjecde6972014-02-05 11:01:19 +00006011 default:
6012 vassert(0);
6013 }
6014}
6015
6016
sewardj92d0ae32014-04-03 13:48:54 +00006017/* Generate IR for TBL and TBX. This deals with the 128 bit case
6018 only. */
6019static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
6020 IRTemp oor_values )
6021{
6022 vassert(len >= 0 && len <= 3);
6023
6024 /* Generate some useful constants as concisely as possible. */
6025 IRTemp half15 = newTemp(Ity_I64);
6026 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
6027 IRTemp half16 = newTemp(Ity_I64);
6028 assign(half16, mkU64(0x1010101010101010ULL));
6029
6030 /* A zero vector */
sewardj8e91fd42014-07-11 12:05:47 +00006031 IRTemp allZero = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006032 assign(allZero, mkV128(0x0000));
6033 /* A vector containing 15 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00006034 IRTemp all15 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006035 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
6036 /* A vector containing 16 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00006037 IRTemp all16 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006038 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
6039 /* A vector containing 32 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00006040 IRTemp all32 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006041 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
6042 /* A vector containing 48 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00006043 IRTemp all48 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006044 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
6045 /* A vector containing 64 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00006046 IRTemp all64 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006047 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
6048
6049 /* Group the 16/32/48/64 vectors so as to be indexable. */
6050 IRTemp allXX[4] = { all16, all32, all48, all64 };
6051
6052 /* Compute the result for each table vector, with zeroes in places
6053 where the index values are out of range, and OR them into the
6054 running vector. */
sewardj8e91fd42014-07-11 12:05:47 +00006055 IRTemp running_result = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006056 assign(running_result, mkV128(0));
6057
6058 UInt tabent;
6059 for (tabent = 0; tabent <= len; tabent++) {
6060 vassert(tabent >= 0 && tabent < 4);
sewardj8e91fd42014-07-11 12:05:47 +00006061 IRTemp bias = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006062 assign(bias,
6063 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
sewardj8e91fd42014-07-11 12:05:47 +00006064 IRTemp biased_indices = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006065 assign(biased_indices,
6066 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
sewardj8e91fd42014-07-11 12:05:47 +00006067 IRTemp valid_mask = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006068 assign(valid_mask,
6069 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
sewardj8e91fd42014-07-11 12:05:47 +00006070 IRTemp safe_biased_indices = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006071 assign(safe_biased_indices,
6072 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
sewardj8e91fd42014-07-11 12:05:47 +00006073 IRTemp results_or_junk = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006074 assign(results_or_junk,
6075 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
6076 mkexpr(safe_biased_indices)));
sewardj8e91fd42014-07-11 12:05:47 +00006077 IRTemp results_or_zero = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006078 assign(results_or_zero,
6079 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
6080 /* And OR that into the running result. */
sewardj8e91fd42014-07-11 12:05:47 +00006081 IRTemp tmp = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006082 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
6083 mkexpr(running_result)));
6084 running_result = tmp;
6085 }
6086
6087 /* So now running_result holds the overall result where the indices
6088 are in range, and zero in out-of-range lanes. Now we need to
6089 compute an overall validity mask and use this to copy in the
6090 lanes in the oor_values for out of range indices. This is
6091 unnecessary for TBL but will get folded out by iropt, so we lean
6092 on that and generate the same code for TBL and TBX here. */
sewardj8e91fd42014-07-11 12:05:47 +00006093 IRTemp overall_valid_mask = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006094 assign(overall_valid_mask,
6095 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
sewardj8e91fd42014-07-11 12:05:47 +00006096 IRTemp result = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006097 assign(result,
6098 binop(Iop_OrV128,
6099 mkexpr(running_result),
6100 binop(Iop_AndV128,
6101 mkexpr(oor_values),
6102 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
6103 return result;
6104}
6105
6106
sewardj31b5a952014-06-26 07:41:14 +00006107/* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
6108 an op which takes two I64s and produces a V128. That is, a widening
6109 operator. Generate IR which applies |opI64x2toV128| to either the
6110 lower (if |is2| is False) or upper (if |is2| is True) halves of
6111 |argL| and |argR|, and return the value in a new IRTemp.
6112*/
6113static
6114IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
6115 IRExpr* argL, IRExpr* argR )
6116{
sewardj8e91fd42014-07-11 12:05:47 +00006117 IRTemp res = newTempV128();
sewardj31b5a952014-06-26 07:41:14 +00006118 IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
6119 assign(res, binop(opI64x2toV128, unop(slice, argL),
6120 unop(slice, argR)));
6121 return res;
6122}
6123
6124
sewardjdf9d6d52014-06-27 10:43:22 +00006125/* Generate signed/unsigned absolute difference vector IR. */
6126static
6127IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
6128{
sewardj6f312d02014-06-28 12:21:37 +00006129 vassert(size <= 3);
sewardj8e91fd42014-07-11 12:05:47 +00006130 IRTemp argL = newTempV128();
6131 IRTemp argR = newTempV128();
6132 IRTemp msk = newTempV128();
6133 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +00006134 assign(argL, argLE);
6135 assign(argR, argRE);
sewardj8e91fd42014-07-11 12:05:47 +00006136 assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
sewardjdf9d6d52014-06-27 10:43:22 +00006137 mkexpr(argL), mkexpr(argR)));
6138 assign(res,
6139 binop(Iop_OrV128,
6140 binop(Iop_AndV128,
sewardj8e91fd42014-07-11 12:05:47 +00006141 binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
sewardjdf9d6d52014-06-27 10:43:22 +00006142 mkexpr(msk)),
6143 binop(Iop_AndV128,
sewardj8e91fd42014-07-11 12:05:47 +00006144 binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
sewardjdf9d6d52014-06-27 10:43:22 +00006145 unop(Iop_NotV128, mkexpr(msk)))));
6146 return res;
6147}
6148
6149
sewardj6f312d02014-06-28 12:21:37 +00006150/* Generate IR that takes a V128 and sign- or zero-widens
6151 either the lower or upper set of lanes to twice-as-wide,
6152 resulting in a new V128 value. */
6153static
sewardja5a6b752014-06-30 07:33:56 +00006154IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
6155 UInt sizeNarrow, IRExpr* srcE )
sewardj6f312d02014-06-28 12:21:37 +00006156{
sewardj8e91fd42014-07-11 12:05:47 +00006157 IRTemp src = newTempV128();
6158 IRTemp res = newTempV128();
sewardj6f312d02014-06-28 12:21:37 +00006159 assign(src, srcE);
6160 switch (sizeNarrow) {
6161 case X10:
6162 assign(res,
6163 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
6164 binop(fromUpperHalf ? Iop_InterleaveHI32x4
6165 : Iop_InterleaveLO32x4,
6166 mkexpr(src),
6167 mkexpr(src)),
6168 mkU8(32)));
6169 break;
6170 case X01:
6171 assign(res,
6172 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
6173 binop(fromUpperHalf ? Iop_InterleaveHI16x8
6174 : Iop_InterleaveLO16x8,
6175 mkexpr(src),
6176 mkexpr(src)),
6177 mkU8(16)));
6178 break;
6179 case X00:
6180 assign(res,
6181 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
6182 binop(fromUpperHalf ? Iop_InterleaveHI8x16
6183 : Iop_InterleaveLO8x16,
6184 mkexpr(src),
6185 mkexpr(src)),
6186 mkU8(8)));
6187 break;
6188 default:
6189 vassert(0);
6190 }
6191 return res;
6192}
6193
6194
sewardja5a6b752014-06-30 07:33:56 +00006195/* Generate IR that takes a V128 and sign- or zero-widens
6196 either the even or odd lanes to twice-as-wide,
6197 resulting in a new V128 value. */
6198static
6199IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
6200 UInt sizeNarrow, IRExpr* srcE )
6201{
sewardj8e91fd42014-07-11 12:05:47 +00006202 IRTemp src = newTempV128();
6203 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006204 IROp opSAR = mkVecSARN(sizeNarrow+1);
6205 IROp opSHR = mkVecSHRN(sizeNarrow+1);
6206 IROp opSHL = mkVecSHLN(sizeNarrow+1);
6207 IROp opSxR = zWiden ? opSHR : opSAR;
6208 UInt amt = 0;
6209 switch (sizeNarrow) {
6210 case X10: amt = 32; break;
6211 case X01: amt = 16; break;
6212 case X00: amt = 8; break;
6213 default: vassert(0);
6214 }
6215 assign(src, srcE);
6216 if (fromOdd) {
6217 assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
6218 } else {
6219 assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
6220 mkU8(amt)));
6221 }
6222 return res;
6223}
6224
6225
6226/* Generate IR that takes two V128s and narrows (takes lower half)
6227 of each lane, producing a single V128 value. */
6228static
6229IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
6230{
sewardj8e91fd42014-07-11 12:05:47 +00006231 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006232 assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
6233 mkexpr(argHi), mkexpr(argLo)));
6234 return res;
6235}
6236
6237
sewardj487559e2014-07-10 14:22:45 +00006238/* Return a temp which holds the vector dup of the lane of width
6239 (1 << size) obtained from src[laneNo]. */
6240static
6241IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
6242{
6243 vassert(size <= 3);
6244 /* Normalise |laneNo| so it is of the form
6245 x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
6246 This puts the bits we want to inspect at constant offsets
6247 regardless of the value of |size|.
6248 */
6249 UInt ix = laneNo << size;
6250 vassert(ix <= 15);
6251 IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
6252 switch (size) {
6253 case 0: /* B */
6254 ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
6255 /* fallthrough */
6256 case 1: /* H */
6257 ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
6258 /* fallthrough */
6259 case 2: /* S */
6260 ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
6261 /* fallthrough */
6262 case 3: /* D */
6263 ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
6264 break;
6265 default:
6266 vassert(0);
6267 }
sewardj8e91fd42014-07-11 12:05:47 +00006268 IRTemp res = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00006269 assign(res, src);
6270 Int i;
6271 for (i = 3; i >= 0; i--) {
6272 if (ops[i] == Iop_INVALID)
6273 break;
sewardj8e91fd42014-07-11 12:05:47 +00006274 IRTemp tmp = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00006275 assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
6276 res = tmp;
6277 }
6278 return res;
6279}
6280
6281
6282/* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
6283 selector encoded as shown below. Return a new V128 holding the
6284 selected lane from |srcV| dup'd out to V128, and also return the
6285 lane number, log2 of the lane size in bytes, and width-character via
6286 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5
6287 is an invalid selector, in which case return
6288 IRTemp_INVALID, 0, 0 and '?' respectively.
6289
6290 imm5 = xxxx1 signifies .b[xxxx]
6291 = xxx10 .h[xxx]
6292 = xx100 .s[xx]
6293 = x1000 .d[x]
6294 otherwise invalid
6295*/
6296static
6297IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
6298 /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
6299 IRExpr* srcV, UInt imm5 )
6300{
6301 *laneNo = 0;
6302 *laneSzLg2 = 0;
6303 *laneCh = '?';
6304
6305 if (imm5 & 1) {
6306 *laneNo = (imm5 >> 1) & 15;
6307 *laneSzLg2 = 0;
6308 *laneCh = 'b';
6309 }
6310 else if (imm5 & 2) {
6311 *laneNo = (imm5 >> 2) & 7;
6312 *laneSzLg2 = 1;
6313 *laneCh = 'h';
6314 }
6315 else if (imm5 & 4) {
6316 *laneNo = (imm5 >> 3) & 3;
6317 *laneSzLg2 = 2;
6318 *laneCh = 's';
6319 }
6320 else if (imm5 & 8) {
6321 *laneNo = (imm5 >> 4) & 1;
6322 *laneSzLg2 = 3;
6323 *laneCh = 'd';
6324 }
6325 else {
6326 /* invalid */
6327 return IRTemp_INVALID;
6328 }
6329
6330 return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
6331}
6332
6333
6334/* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
6335static
6336IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
6337{
6338 IRType ty = Ity_INVALID;
6339 IRTemp rcS = IRTemp_INVALID;
6340 switch (size) {
6341 case X01:
6342 vassert(imm <= 0xFFFFULL);
6343 ty = Ity_I16;
6344 rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
6345 break;
6346 case X10:
6347 vassert(imm <= 0xFFFFFFFFULL);
6348 ty = Ity_I32;
6349 rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
6350 break;
6351 case X11:
6352 ty = Ity_I64;
6353 rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
6354 default:
6355 vassert(0);
6356 }
6357 IRTemp rcV = math_DUP_TO_V128(rcS, ty);
6358 return rcV;
6359}
6360
6361
sewardj25523c42014-06-15 19:36:29 +00006362/* Let |new64| be a V128 in which only the lower 64 bits are interesting,
6363 and the upper can contain any value -- it is ignored. If |is2| is False,
6364 generate IR to put |new64| in the lower half of vector reg |dd| and zero
6365 the upper half. If |is2| is True, generate IR to put |new64| in the upper
6366 half of vector reg |dd| and leave the lower half unchanged. This
6367 simulates the behaviour of the "foo/foo2" instructions in which the
6368 destination is half the width of sources, for example addhn/addhn2.
6369*/
6370static
6371void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
6372{
6373 if (is2) {
6374 /* Get the old contents of Vdd, zero the upper half, and replace
6375 it with 'x'. */
sewardj8e91fd42014-07-11 12:05:47 +00006376 IRTemp t_zero_oldLO = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00006377 assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
sewardj8e91fd42014-07-11 12:05:47 +00006378 IRTemp t_newHI_zero = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00006379 assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
6380 mkV128(0x0000)));
sewardj8e91fd42014-07-11 12:05:47 +00006381 IRTemp res = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00006382 assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
6383 mkexpr(t_newHI_zero)));
6384 putQReg128(dd, mkexpr(res));
6385 } else {
6386 /* This is simple. */
6387 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
6388 }
6389}
6390
6391
sewardj8e91fd42014-07-11 12:05:47 +00006392/* Compute vector SQABS at lane size |size| for |srcE|, returning
6393 the q result in |*qabs| and the normal result in |*nabs|. */
6394static
6395void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
6396 IRExpr* srcE, UInt size )
6397{
6398 IRTemp src, mask, maskn, nsub, qsub;
6399 src = mask = maskn = nsub = qsub = IRTemp_INVALID;
6400 newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
6401 assign(src, srcE);
6402 assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src)));
6403 assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
6404 assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
6405 assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
6406 assign(*nabs, binop(Iop_OrV128,
6407 binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
6408 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
6409 assign(*qabs, binop(Iop_OrV128,
6410 binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
6411 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
6412}
6413
6414
sewardj51d012a2014-07-21 09:19:50 +00006415/* Compute vector SQNEG at lane size |size| for |srcE|, returning
6416 the q result in |*qneg| and the normal result in |*nneg|. */
6417static
6418void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
6419 IRExpr* srcE, UInt size )
6420{
6421 IRTemp src = IRTemp_INVALID;
6422 newTempsV128_3(&src, nneg, qneg);
6423 assign(src, srcE);
6424 assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
6425 assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
6426}
6427
6428
sewardjecedd982014-08-11 14:02:47 +00006429/* Zero all except the least significant lane of |srcE|, where |size|
6430 indicates the lane size in the usual way. */
sewardj257e99f2014-08-03 12:45:19 +00006431static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
sewardj8e91fd42014-07-11 12:05:47 +00006432{
6433 vassert(size < 4);
6434 IRTemp t = newTempV128();
sewardj51d012a2014-07-21 09:19:50 +00006435 assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
sewardj8e91fd42014-07-11 12:05:47 +00006436 return t;
6437}
6438
6439
sewardj51d012a2014-07-21 09:19:50 +00006440/* Generate IR to compute vector widening MULL from either the lower
6441 (is2==False) or upper (is2==True) halves of vecN and vecM. The
6442 widening multiplies are unsigned when isU==True and signed when
6443 isU==False. |size| is the narrow lane size indication. Optionally,
6444 the product may be added to or subtracted from vecD, at the wide lane
6445 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas|
6446 is 'm' (only multiply) then the accumulate part does not happen, and
6447 |vecD| is expected to == IRTemp_INVALID.
6448
6449 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
6450 are allowed. The result is returned in a new IRTemp, which is
6451 returned in *res. */
6452static
6453void math_MULL_ACC ( /*OUT*/IRTemp* res,
6454 Bool is2, Bool isU, UInt size, HChar mas,
6455 IRTemp vecN, IRTemp vecM, IRTemp vecD )
6456{
6457 vassert(res && *res == IRTemp_INVALID);
6458 vassert(size <= 2);
6459 vassert(mas == 'm' || mas == 'a' || mas == 's');
6460 if (mas == 'm') vassert(vecD == IRTemp_INVALID);
6461 IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
6462 IROp accOp = (mas == 'a') ? mkVecADD(size+1)
6463 : (mas == 's' ? mkVecSUB(size+1)
6464 : Iop_INVALID);
6465 IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp,
6466 mkexpr(vecN), mkexpr(vecM));
6467 *res = newTempV128();
6468 assign(*res, mas == 'm' ? mkexpr(mul)
6469 : binop(accOp, mkexpr(vecD), mkexpr(mul)));
6470}
6471
6472
6473/* Same as math_MULL_ACC, except the multiply is signed widening,
6474 the multiplied value is then doubled, before being added to or
6475 subtracted from the accumulated value. And everything is
6476 saturated. In all cases, saturation residuals are returned
6477 via (sat1q, sat1n), and in the accumulate cases,
6478 via (sat2q, sat2n) too. All results are returned in new temporaries.
6479 In the no-accumulate case, *sat2q and *sat2n are never instantiated,
6480 so the caller can tell this has happened. */
6481static
6482void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
6483 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
6484 /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
6485 Bool is2, UInt size, HChar mas,
6486 IRTemp vecN, IRTemp vecM, IRTemp vecD )
6487{
6488 vassert(size <= 2);
6489 vassert(mas == 'm' || mas == 'a' || mas == 's');
6490 /* Compute
6491 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
6492 sat1n = vecN.D[is2] *s vecM.d[is2] * 2
6493 IOW take either the low or high halves of vecN and vecM, signed widen,
6494 multiply, double that, and signedly saturate. Also compute the same
6495 but without saturation.
6496 */
6497 vassert(sat2q && *sat2q == IRTemp_INVALID);
6498 vassert(sat2n && *sat2n == IRTemp_INVALID);
6499 newTempsV128_3(sat1q, sat1n, res);
6500 IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
6501 mkexpr(vecN), mkexpr(vecM));
6502 IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
6503 mkexpr(vecN), mkexpr(vecM));
6504 assign(*sat1q, mkexpr(tq));
6505 assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
6506
6507 /* If there is no accumulation, the final result is sat1q,
6508 and there's no assignment to sat2q or sat2n. */
6509 if (mas == 'm') {
6510 assign(*res, mkexpr(*sat1q));
6511 return;
6512 }
6513
6514 /* Compute
6515 sat2q = vecD +sq/-sq sat1q
6516 sat2n = vecD +/- sat1n
6517 result = sat2q
6518 */
6519 newTempsV128_2(sat2q, sat2n);
6520 assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
6521 mkexpr(vecD), mkexpr(*sat1q)));
6522 assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
6523 mkexpr(vecD), mkexpr(*sat1n)));
6524 assign(*res, mkexpr(*sat2q));
6525}
6526
6527
sewardj54ffa1d2014-07-22 09:27:49 +00006528/* Generate IR for widening signed vector multiplies. The operands
6529 have their lane width signedly widened, and they are then multiplied
6530 at the wider width, returning results in two new IRTemps. */
sewardja5a6b752014-06-30 07:33:56 +00006531static
sewardj54ffa1d2014-07-22 09:27:49 +00006532void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
6533 UInt sizeNarrow, IRTemp argL, IRTemp argR )
6534{
6535 vassert(sizeNarrow <= 2);
6536 newTempsV128_2(resHI, resLO);
6537 IRTemp argLhi = newTemp(Ity_I64);
6538 IRTemp argLlo = newTemp(Ity_I64);
6539 IRTemp argRhi = newTemp(Ity_I64);
6540 IRTemp argRlo = newTemp(Ity_I64);
6541 assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
6542 assign(argLlo, unop(Iop_V128to64, mkexpr(argL)));
6543 assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
6544 assign(argRlo, unop(Iop_V128to64, mkexpr(argR)));
6545 IROp opMulls = mkVecMULLS(sizeNarrow);
6546 assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
6547 assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
6548}
6549
6550
sewardj257e99f2014-08-03 12:45:19 +00006551/* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
6552 double that, possibly add a rounding constant (R variants), and take
6553 the high half. */
sewardj54ffa1d2014-07-22 09:27:49 +00006554static
6555void math_SQDMULH ( /*OUT*/IRTemp* res,
6556 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
6557 Bool isR, UInt size, IRTemp vN, IRTemp vM )
6558{
6559 vassert(size == X01 || size == X10); /* s or h only */
6560
6561 newTempsV128_3(res, sat1q, sat1n);
6562
6563 IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
6564 math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
6565
6566 IRTemp addWide = mkVecADD(size+1);
6567
6568 if (isR) {
6569 assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
6570
6571 Int rcShift = size == X01 ? 15 : 31;
6572 IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
6573 assign(*sat1n,
6574 binop(mkVecCATODDLANES(size),
6575 binop(addWide,
6576 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
6577 mkexpr(roundConst)),
6578 binop(addWide,
6579 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
6580 mkexpr(roundConst))));
6581 } else {
6582 assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
6583
6584 assign(*sat1n,
6585 binop(mkVecCATODDLANES(size),
6586 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
6587 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
6588 }
6589
6590 assign(*res, mkexpr(*sat1q));
6591}
6592
6593
sewardja97dddf2014-08-14 22:26:52 +00006594/* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
6595 a new temp in *res, and the Q difference pair in new temps in
6596 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
6597 three operations it is. */
6598static
6599void math_QSHL_IMM ( /*OUT*/IRTemp* res,
6600 /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
6601 IRTemp src, UInt size, UInt shift, const HChar* nm )
6602{
6603 vassert(size <= 3);
6604 UInt laneBits = 8 << size;
6605 vassert(shift < laneBits);
6606 newTempsV128_3(res, qDiff1, qDiff2);
6607 IRTemp z128 = newTempV128();
6608 assign(z128, mkV128(0x0000));
6609
6610 /* UQSHL */
6611 if (vex_streq(nm, "uqshl")) {
6612 IROp qop = mkVecQSHLNSATU2U(size);
6613 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
6614 if (shift == 0) {
6615 /* No shift means no saturation. */
6616 assign(*qDiff1, mkexpr(z128));
6617 assign(*qDiff2, mkexpr(z128));
6618 } else {
6619 /* Saturation has occurred if any of the shifted-out bits are
6620 nonzero. We get the shifted-out bits by right-shifting the
6621 original value. */
6622 UInt rshift = laneBits - shift;
6623 vassert(rshift >= 1 && rshift < laneBits);
6624 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
6625 assign(*qDiff2, mkexpr(z128));
6626 }
6627 return;
6628 }
6629
6630 /* SQSHL */
6631 if (vex_streq(nm, "sqshl")) {
6632 IROp qop = mkVecQSHLNSATS2S(size);
6633 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
6634 if (shift == 0) {
6635 /* No shift means no saturation. */
6636 assign(*qDiff1, mkexpr(z128));
6637 assign(*qDiff2, mkexpr(z128));
6638 } else {
6639 /* Saturation has occurred if any of the shifted-out bits are
6640 different from the top bit of the original value. */
6641 UInt rshift = laneBits - 1 - shift;
6642 vassert(rshift >= 0 && rshift < laneBits-1);
6643 /* qDiff1 is the shifted out bits, and the top bit of the original
6644 value, preceded by zeroes. */
6645 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
6646 /* qDiff2 is the top bit of the original value, cloned the
6647 correct number of times. */
6648 assign(*qDiff2, binop(mkVecSHRN(size),
6649 binop(mkVecSARN(size), mkexpr(src),
6650 mkU8(laneBits-1)),
6651 mkU8(rshift)));
6652 /* This also succeeds in comparing the top bit of the original
6653 value to itself, which is a bit stupid, but not wrong. */
6654 }
6655 return;
6656 }
6657
6658 /* SQSHLU */
6659 if (vex_streq(nm, "sqshlu")) {
6660 IROp qop = mkVecQSHLNSATS2U(size);
6661 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
6662 /* This is different from the other two cases, in that
6663 saturation can occur even if there is no shift. */
6664 /* Saturation has occurred if any of the shifted-out bits, or
6665 the top bit of the original value, are nonzero. */
6666 UInt rshift = laneBits - 1 - shift;
6667 vassert(rshift >= 0 && rshift < laneBits);
6668 /* qDiff1 is the shifted out bits, and the top bit of the original
6669 value, preceded by zeroes. */
6670 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
6671 assign(*qDiff2, mkexpr(z128));
6672 return;
6673 }
6674
6675 vassert(0);
6676}
6677
6678
sewardj54ffa1d2014-07-22 09:27:49 +00006679/* QCFLAG tracks the SIMD sticky saturation status. Update the status
6680 thusly: if, after application of |opZHI| to both |qres| and |nres|,
6681 they have the same value, leave QCFLAG unchanged. Otherwise, set it
6682 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128
6683 operators, or Iop_INVALID, in which case |qres| and |nres| are used
6684 unmodified. The presence |opZHI| means this function can be used to
6685 generate QCFLAG update code for both scalar and vector SIMD operations.
6686*/
6687static
6688void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
sewardja5a6b752014-06-30 07:33:56 +00006689{
sewardj8e91fd42014-07-11 12:05:47 +00006690 IRTemp diff = newTempV128();
6691 IRTemp oldQCFLAG = newTempV128();
6692 IRTemp newQCFLAG = newTempV128();
sewardj54ffa1d2014-07-22 09:27:49 +00006693 if (opZHI == Iop_INVALID) {
6694 assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
6695 } else {
sewardj257e99f2014-08-03 12:45:19 +00006696 vassert(opZHI == Iop_ZeroHI64ofV128
6697 || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
sewardj54ffa1d2014-07-22 09:27:49 +00006698 assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
6699 }
sewardja5a6b752014-06-30 07:33:56 +00006700 assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
6701 assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
6702 stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
6703}
6704
6705
sewardj54ffa1d2014-07-22 09:27:49 +00006706/* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
6707 are used unmodified, hence suitable for QCFLAG updates for whole-vector
6708 operations. */
6709static
6710void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
6711{
6712 updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
6713}
6714
6715
sewardj8e91fd42014-07-11 12:05:47 +00006716/*------------------------------------------------------------*/
6717/*--- SIMD and FP instructions ---*/
6718/*------------------------------------------------------------*/
6719
sewardjdf1628c2014-06-10 22:52:05 +00006720static
6721Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
sewardjbbcf1882014-01-12 12:49:10 +00006722{
sewardjab33a7a2014-06-19 22:20:47 +00006723 /* 31 29 23 21 20 15 14 10 9 4
6724 0 q 101110 op2 0 m 0 imm4 0 n d
6725 Decode fields: op2
6726 */
sewardjbbcf1882014-01-12 12:49:10 +00006727# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjab33a7a2014-06-19 22:20:47 +00006728 if (INSN(31,31) != 0
6729 || INSN(29,24) != BITS6(1,0,1,1,1,0)
6730 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
6731 return False;
6732 }
6733 UInt bitQ = INSN(30,30);
6734 UInt op2 = INSN(23,22);
6735 UInt mm = INSN(20,16);
6736 UInt imm4 = INSN(14,11);
6737 UInt nn = INSN(9,5);
6738 UInt dd = INSN(4,0);
6739
6740 if (op2 == BITS2(0,0)) {
6741 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00006742 IRTemp sHi = newTempV128();
6743 IRTemp sLo = newTempV128();
6744 IRTemp res = newTempV128();
sewardjab33a7a2014-06-19 22:20:47 +00006745 assign(sHi, getQReg128(mm));
6746 assign(sLo, getQReg128(nn));
6747 if (bitQ == 1) {
6748 if (imm4 == 0) {
6749 assign(res, mkexpr(sLo));
6750 } else {
6751 vassert(imm4 <= 15);
6752 assign(res,
6753 binop(Iop_OrV128,
6754 binop(Iop_ShlV128, mkexpr(sHi), mkU8(8 * (16-imm4))),
6755 binop(Iop_ShrV128, mkexpr(sLo), mkU8(8 * imm4))));
6756 }
6757 putQReg128(dd, mkexpr(res));
6758 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
6759 } else {
6760 if (imm4 >= 8) return False;
6761 if (imm4 == 0) {
6762 assign(res, mkexpr(sLo));
6763 } else {
6764 assign(res,
6765 binop(Iop_ShrV128,
6766 binop(Iop_InterleaveLO64x2, mkexpr(sHi), mkexpr(sLo)),
6767 mkU8(8 * imm4)));
6768 }
6769 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
6770 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
6771 }
6772 return True;
6773 }
6774
sewardjdf1628c2014-06-10 22:52:05 +00006775 return False;
6776# undef INSN
6777}
sewardjbbcf1882014-01-12 12:49:10 +00006778
sewardjbbcf1882014-01-12 12:49:10 +00006779
sewardjdf1628c2014-06-10 22:52:05 +00006780static
6781Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
6782{
6783 /* 31 29 23 21 20 15 14 12 11 9 4
6784 0 q 001110 op2 0 m 0 len op 00 n d
6785 Decode fields: op2,len,op
sewardjbbcf1882014-01-12 12:49:10 +00006786 */
sewardjdf1628c2014-06-10 22:52:05 +00006787# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6788 if (INSN(31,31) != 0
6789 || INSN(29,24) != BITS6(0,0,1,1,1,0)
6790 || INSN(21,21) != 0
6791 || INSN(15,15) != 0
6792 || INSN(11,10) != BITS2(0,0)) {
6793 return False;
6794 }
6795 UInt bitQ = INSN(30,30);
6796 UInt op2 = INSN(23,22);
6797 UInt mm = INSN(20,16);
6798 UInt len = INSN(14,13);
6799 UInt bitOP = INSN(12,12);
6800 UInt nn = INSN(9,5);
6801 UInt dd = INSN(4,0);
6802
6803 if (op2 == X00) {
6804 /* -------- 00,xx,0 TBL, xx register table -------- */
6805 /* -------- 00,xx,1 TBX, xx register table -------- */
6806 /* 31 28 20 15 14 12 9 4
6807 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
6808 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
6809 where Ta = 16b(q=1) or 8b(q=0)
6810 */
sewardjdf1628c2014-06-10 22:52:05 +00006811 Bool isTBX = bitOP == 1;
6812 /* The out-of-range values to use. */
sewardj8e91fd42014-07-11 12:05:47 +00006813 IRTemp oor_values = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00006814 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
6815 /* src value */
sewardj8e91fd42014-07-11 12:05:47 +00006816 IRTemp src = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00006817 assign(src, getQReg128(mm));
6818 /* The table values */
6819 IRTemp tab[4];
6820 UInt i;
6821 for (i = 0; i <= len; i++) {
6822 vassert(i < 4);
sewardj8e91fd42014-07-11 12:05:47 +00006823 tab[i] = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00006824 assign(tab[i], getQReg128((nn + i) % 32));
6825 }
6826 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
sewardjdf9d6d52014-06-27 10:43:22 +00006827 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
6828 const HChar* Ta = bitQ ==1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +00006829 const HChar* nm = isTBX ? "tbx" : "tbl";
6830 DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
6831 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
6832 return True;
6833 }
6834
6835# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6836 return False;
6837# undef INSN
6838}
6839
6840
6841static
6842Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
6843{
6844# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6845 return False;
6846# undef INSN
6847}
6848
6849
6850static
6851Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
6852{
6853 /* 31 28 23 21 16 11 9 4
6854 0 q u 01110 size 11000 opcode 10 n d
6855 Decode fields: u,size,opcode
6856 */
6857# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6858 if (INSN(31,31) != 0
6859 || INSN(28,24) != BITS5(0,1,1,1,0)
6860 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
6861 return False;
6862 }
6863 UInt bitQ = INSN(30,30);
6864 UInt bitU = INSN(29,29);
6865 UInt size = INSN(23,22);
6866 UInt opcode = INSN(16,12);
6867 UInt nn = INSN(9,5);
6868 UInt dd = INSN(4,0);
6869
sewardja5a6b752014-06-30 07:33:56 +00006870 if (opcode == BITS5(0,0,0,1,1)) {
6871 /* -------- 0,xx,00011 SADDLV -------- */
6872 /* -------- 1,xx,00011 UADDLV -------- */
6873 /* size is the narrow size */
6874 if (size == X11 || (size == X10 && bitQ == 0)) return False;
6875 Bool isU = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +00006876 IRTemp src = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006877 assign(src, getQReg128(nn));
6878 /* The basic plan is to widen the lower half, and if Q = 1,
6879 the upper half too. Add them together (if Q = 1), and in
6880 either case fold with add at twice the lane width.
6881 */
6882 IRExpr* widened
6883 = mkexpr(math_WIDEN_LO_OR_HI_LANES(
6884 isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
6885 if (bitQ == 1) {
6886 widened
6887 = binop(mkVecADD(size+1),
6888 widened,
6889 mkexpr(math_WIDEN_LO_OR_HI_LANES(
6890 isU, True/*fromUpperHalf*/, size, mkexpr(src)))
6891 );
6892 }
6893 /* Now fold. */
sewardj8e91fd42014-07-11 12:05:47 +00006894 IRTemp tWi = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006895 assign(tWi, widened);
6896 IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
6897 putQReg128(dd, mkexpr(res));
6898 const HChar* arr = nameArr_Q_SZ(bitQ, size);
6899 const HChar ch = "bhsd"[size];
6900 DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
6901 nameQReg128(dd), ch, nameQReg128(nn), arr);
6902 return True;
6903 }
6904
sewardjb9aff1e2014-06-15 21:55:33 +00006905 UInt ix = 0;
6906 /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
6907 else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
6908 else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
6909 /**/
6910 if (ix != 0) {
6911 /* -------- 0,xx,01010: SMAXV -------- (1) */
6912 /* -------- 1,xx,01010: UMAXV -------- (2) */
6913 /* -------- 0,xx,11010: SMINV -------- (3) */
6914 /* -------- 1,xx,11010: UMINV -------- (4) */
6915 /* -------- 0,xx,11011: ADDV -------- (5) */
6916 vassert(ix >= 1 && ix <= 5);
sewardjdf1628c2014-06-10 22:52:05 +00006917 if (size == X11) return False; // 1d,2d cases not allowed
6918 if (size == X10 && bitQ == 0) return False; // 2s case not allowed
sewardjdf1628c2014-06-10 22:52:05 +00006919 const IROp opMAXS[3]
6920 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
6921 const IROp opMAXU[3]
6922 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
sewardjb9aff1e2014-06-15 21:55:33 +00006923 const IROp opMINS[3]
6924 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
6925 const IROp opMINU[3]
6926 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
6927 const IROp opADD[3]
6928 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 };
sewardjdf1628c2014-06-10 22:52:05 +00006929 vassert(size < 3);
sewardjb9aff1e2014-06-15 21:55:33 +00006930 IROp op = Iop_INVALID;
6931 const HChar* nm = NULL;
6932 switch (ix) {
6933 case 1: op = opMAXS[size]; nm = "smaxv"; break;
6934 case 2: op = opMAXU[size]; nm = "umaxv"; break;
6935 case 3: op = opMINS[size]; nm = "sminv"; break;
6936 case 4: op = opMINU[size]; nm = "uminv"; break;
6937 case 5: op = opADD[size]; nm = "addv"; break;
6938 default: vassert(0);
6939 }
6940 vassert(op != Iop_INVALID && nm != NULL);
sewardj8e91fd42014-07-11 12:05:47 +00006941 IRTemp tN1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00006942 assign(tN1, getQReg128(nn));
6943 /* If Q == 0, we're just folding lanes in the lower half of
6944 the value. In which case, copy the lower half of the
6945 source into the upper half, so we can then treat it the
sewardjb9aff1e2014-06-15 21:55:33 +00006946 same as the full width case. Except for the addition case,
6947 in which we have to zero out the upper half. */
sewardj8e91fd42014-07-11 12:05:47 +00006948 IRTemp tN2 = newTempV128();
sewardjb9aff1e2014-06-15 21:55:33 +00006949 assign(tN2, bitQ == 0
6950 ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
6951 : mk_CatEvenLanes64x2(tN1,tN1))
6952 : mkexpr(tN1));
sewardjdf9d6d52014-06-27 10:43:22 +00006953 IRTemp res = math_FOLDV(tN2, op);
sewardjdf1628c2014-06-10 22:52:05 +00006954 if (res == IRTemp_INVALID)
6955 return False; /* means math_MINMAXV
6956 doesn't handle this case yet */
6957 putQReg128(dd, mkexpr(res));
sewardjdf1628c2014-06-10 22:52:05 +00006958 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
6959 IRType laneTy = tys[size];
6960 const HChar* arr = nameArr_Q_SZ(bitQ, size);
6961 DIP("%s %s, %s.%s\n", nm,
6962 nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
6963 return True;
6964 }
6965
6966# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6967 return False;
6968# undef INSN
6969}
6970
6971
6972static
6973Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
6974{
6975 /* 31 28 20 15 14 10 9 4
6976 0 q op 01110000 imm5 0 imm4 1 n d
6977 Decode fields: q,op,imm4
6978 */
6979# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6980 if (INSN(31,31) != 0
6981 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
6982 || INSN(15,15) != 0 || INSN(10,10) != 1) {
6983 return False;
6984 }
6985 UInt bitQ = INSN(30,30);
6986 UInt bitOP = INSN(29,29);
6987 UInt imm5 = INSN(20,16);
6988 UInt imm4 = INSN(14,11);
6989 UInt nn = INSN(9,5);
6990 UInt dd = INSN(4,0);
6991
6992 /* -------- x,0,0000: DUP (element, vector) -------- */
6993 /* 31 28 20 15 9 4
6994 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
6995 */
6996 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
sewardj487559e2014-07-10 14:22:45 +00006997 UInt laneNo = 0;
6998 UInt laneSzLg2 = 0;
6999 HChar laneCh = '?';
7000 IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
7001 getQReg128(nn), imm5);
7002 if (res == IRTemp_INVALID)
7003 return False;
7004 if (bitQ == 0 && laneSzLg2 == X11)
7005 return False; /* .1d case */
7006 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
7007 const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
7008 DIP("dup %s.%s, %s.%c[%u]\n",
7009 nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
7010 return True;
sewardjdf1628c2014-06-10 22:52:05 +00007011 }
7012
7013 /* -------- x,0,0001: DUP (general, vector) -------- */
7014 /* 31 28 20 15 9 4
7015 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
7016 Q=0 writes 64, Q=1 writes 128
7017 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
7018 xxx10 4H(q=0) or 8H(q=1), R=W
7019 xx100 2S(q=0) or 4S(q=1), R=W
7020 x1000 Invalid(q=0) or 2D(q=1), R=X
7021 x0000 Invalid(q=0) or Invalid(q=1)
7022 Require op=0, imm4=0001
7023 */
7024 if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
7025 Bool isQ = bitQ == 1;
7026 IRTemp w0 = newTemp(Ity_I64);
7027 const HChar* arT = "??";
7028 IRType laneTy = Ity_INVALID;
7029 if (imm5 & 1) {
7030 arT = isQ ? "16b" : "8b";
7031 laneTy = Ity_I8;
7032 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
7033 }
7034 else if (imm5 & 2) {
7035 arT = isQ ? "8h" : "4h";
7036 laneTy = Ity_I16;
7037 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
7038 }
7039 else if (imm5 & 4) {
7040 arT = isQ ? "4s" : "2s";
7041 laneTy = Ity_I32;
7042 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
7043 }
7044 else if ((imm5 & 8) && isQ) {
7045 arT = "2d";
7046 laneTy = Ity_I64;
7047 assign(w0, getIReg64orZR(nn));
7048 }
7049 else {
7050 /* invalid; leave laneTy unchanged. */
7051 }
7052 /* */
7053 if (laneTy != Ity_INVALID) {
7054 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
7055 putQReg128(dd, binop(Iop_64HLtoV128,
7056 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
7057 DIP("dup %s.%s, %s\n",
7058 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
7059 return True;
7060 }
sewardj787a67f2014-06-23 09:09:41 +00007061 /* invalid */
7062 return False;
sewardjdf1628c2014-06-10 22:52:05 +00007063 }
7064
7065 /* -------- 1,0,0011: INS (general) -------- */
7066 /* 31 28 20 15 9 4
7067 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
7068 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
7069 xxx10 -> H, xxx
7070 xx100 -> S, xx
7071 x1000 -> D, x
7072 */
7073 if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
7074 HChar ts = '?';
7075 UInt laneNo = 16;
7076 IRExpr* src = NULL;
7077 if (imm5 & 1) {
7078 src = unop(Iop_64to8, getIReg64orZR(nn));
7079 laneNo = (imm5 >> 1) & 15;
7080 ts = 'b';
7081 }
7082 else if (imm5 & 2) {
7083 src = unop(Iop_64to16, getIReg64orZR(nn));
7084 laneNo = (imm5 >> 2) & 7;
7085 ts = 'h';
7086 }
7087 else if (imm5 & 4) {
7088 src = unop(Iop_64to32, getIReg64orZR(nn));
7089 laneNo = (imm5 >> 3) & 3;
7090 ts = 's';
7091 }
7092 else if (imm5 & 8) {
7093 src = getIReg64orZR(nn);
7094 laneNo = (imm5 >> 4) & 1;
7095 ts = 'd';
7096 }
7097 /* */
7098 if (src) {
7099 vassert(laneNo < 16);
7100 putQRegLane(dd, laneNo, src);
7101 DIP("ins %s.%c[%u], %s\n",
7102 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
7103 return True;
7104 }
sewardj787a67f2014-06-23 09:09:41 +00007105 /* invalid */
7106 return False;
sewardjdf1628c2014-06-10 22:52:05 +00007107 }
7108
7109 /* -------- x,0,0101: SMOV -------- */
7110 /* -------- x,0,0111: UMOV -------- */
7111 /* 31 28 20 15 9 4
7112 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
7113 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
7114 dest is Xd when q==1, Wd when q==0
7115 UMOV:
7116 Ts,index,ops = case q:imm5 of
7117 0:xxxx1 -> B, xxxx, 8Uto64
7118 1:xxxx1 -> invalid
7119 0:xxx10 -> H, xxx, 16Uto64
7120 1:xxx10 -> invalid
7121 0:xx100 -> S, xx, 32Uto64
7122 1:xx100 -> invalid
7123 1:x1000 -> D, x, copy64
7124 other -> invalid
7125 SMOV:
7126 Ts,index,ops = case q:imm5 of
7127 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
7128 1:xxxx1 -> B, xxxx, 8Sto64
7129 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
7130 1:xxx10 -> H, xxx, 16Sto64
7131 0:xx100 -> invalid
7132 1:xx100 -> S, xx, 32Sto64
7133 1:x1000 -> invalid
7134 other -> invalid
7135 */
7136 if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
7137 Bool isU = (imm4 & 2) == 2;
7138 const HChar* arTs = "??";
7139 UInt laneNo = 16; /* invalid */
7140 // Setting 'res' to non-NULL determines valid/invalid
7141 IRExpr* res = NULL;
7142 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
7143 laneNo = (imm5 >> 1) & 15;
7144 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
7145 res = isU ? unop(Iop_8Uto64, lane)
7146 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
7147 arTs = "b";
7148 }
7149 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
7150 laneNo = (imm5 >> 1) & 15;
7151 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
7152 res = isU ? NULL
7153 : unop(Iop_8Sto64, lane);
7154 arTs = "b";
7155 }
7156 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
7157 laneNo = (imm5 >> 2) & 7;
7158 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
7159 res = isU ? unop(Iop_16Uto64, lane)
7160 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
7161 arTs = "h";
7162 }
7163 else if (bitQ && (imm5 & 2)) { // 1:xxx10
7164 laneNo = (imm5 >> 2) & 7;
7165 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
7166 res = isU ? NULL
7167 : unop(Iop_16Sto64, lane);
7168 arTs = "h";
7169 }
7170 else if (!bitQ && (imm5 & 4)) { // 0:xx100
7171 laneNo = (imm5 >> 3) & 3;
7172 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
7173 res = isU ? unop(Iop_32Uto64, lane)
7174 : NULL;
7175 arTs = "s";
7176 }
7177 else if (bitQ && (imm5 & 4)) { // 1:xxx10
7178 laneNo = (imm5 >> 3) & 3;
7179 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
7180 res = isU ? NULL
7181 : unop(Iop_32Sto64, lane);
7182 arTs = "s";
7183 }
7184 else if (bitQ && (imm5 & 8)) { // 1:x1000
7185 laneNo = (imm5 >> 4) & 1;
7186 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
7187 res = isU ? lane
7188 : NULL;
7189 arTs = "d";
7190 }
7191 /* */
7192 if (res) {
7193 vassert(laneNo < 16);
7194 putIReg64orZR(dd, res);
7195 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
7196 nameIRegOrZR(bitQ == 1, dd),
7197 nameQReg128(nn), arTs, laneNo);
7198 return True;
7199 }
sewardj787a67f2014-06-23 09:09:41 +00007200 /* invalid */
7201 return False;
7202 }
7203
7204 /* -------- 1,1,xxxx: INS (element) -------- */
7205 /* 31 28 20 14 9 4
7206 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
7207 where Ts,ix1,ix2
7208 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
7209 xxx10 -> H, xxx, imm4[3:1]
7210 xx100 -> S, xx, imm4[3:2]
7211 x1000 -> D, x, imm4[3:3]
7212 */
7213 if (bitQ == 1 && bitOP == 1) {
7214 HChar ts = '?';
7215 IRType ity = Ity_INVALID;
7216 UInt ix1 = 16;
7217 UInt ix2 = 16;
7218 if (imm5 & 1) {
7219 ts = 'b';
7220 ity = Ity_I8;
7221 ix1 = (imm5 >> 1) & 15;
7222 ix2 = (imm4 >> 0) & 15;
7223 }
7224 else if (imm5 & 2) {
7225 ts = 'h';
7226 ity = Ity_I16;
7227 ix1 = (imm5 >> 2) & 7;
7228 ix2 = (imm4 >> 1) & 7;
7229 }
7230 else if (imm5 & 4) {
7231 ts = 's';
7232 ity = Ity_I32;
7233 ix1 = (imm5 >> 3) & 3;
7234 ix2 = (imm4 >> 2) & 3;
7235 }
7236 else if (imm5 & 8) {
7237 ts = 'd';
7238 ity = Ity_I64;
7239 ix1 = (imm5 >> 4) & 1;
7240 ix2 = (imm4 >> 3) & 1;
7241 }
7242 /* */
7243 if (ity != Ity_INVALID) {
7244 vassert(ix1 < 16);
7245 vassert(ix2 < 16);
7246 putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
7247 DIP("ins %s.%c[%u], %s.%c[%u]\n",
7248 nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
7249 return True;
7250 }
7251 /* invalid */
7252 return False;
sewardjdf1628c2014-06-10 22:52:05 +00007253 }
7254
7255 return False;
7256# undef INSN
7257}
7258
7259
7260static
7261Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
7262{
7263 /* 31 28 18 15 11 9 4
7264 0q op 01111 00000 abc cmode 01 defgh d
sewardj2b6fd5e2014-06-19 14:21:37 +00007265 Decode fields: q,op,cmode
7266 Bit 11 is really "o2", but it is always zero.
sewardjdf1628c2014-06-10 22:52:05 +00007267 */
7268# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7269 if (INSN(31,31) != 0
7270 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
7271 || INSN(11,10) != BITS2(0,1)) {
7272 return False;
7273 }
7274 UInt bitQ = INSN(30,30);
7275 UInt bitOP = INSN(29,29);
7276 UInt cmode = INSN(15,12);
7277 UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
7278 UInt dd = INSN(4,0);
7279
sewardjdf1628c2014-06-10 22:52:05 +00007280 ULong imm64lo = 0;
7281 UInt op_cmode = (bitOP << 4) | cmode;
7282 Bool ok = False;
sewardj2b6fd5e2014-06-19 14:21:37 +00007283 Bool isORR = False;
7284 Bool isBIC = False;
sewardj787a67f2014-06-23 09:09:41 +00007285 Bool isMOV = False;
7286 Bool isMVN = False;
7287 Bool isFMOV = False;
sewardjdf1628c2014-06-10 22:52:05 +00007288 switch (op_cmode) {
sewardj2b6fd5e2014-06-19 14:21:37 +00007289 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
sewardj2b6fd5e2014-06-19 14:21:37 +00007290 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
sewardj787a67f2014-06-23 09:09:41 +00007291 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
7292 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
7293 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
7294 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
7295 ok = True; isMOV = True; break;
sewardj2b6fd5e2014-06-19 14:21:37 +00007296
7297 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
7298 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
7299 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
7300 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
7301 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
7302 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
7303 ok = True; isORR = True; break;
7304
sewardj787a67f2014-06-23 09:09:41 +00007305 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
7306 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
7307 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
7308 ok = True; isMOV = True; break;
7309
7310 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
7311 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
7312 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
7313 ok = True; isORR = True; break;
7314
7315 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
7316 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
7317 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
7318 ok = True; isMOV = True; break;
7319
7320 /* -------- x,0,1110 MOVI 8-bit -------- */
7321 case BITS5(0,1,1,1,0):
7322 ok = True; isMOV = True; break;
7323
7324 /* FMOV (vector, immediate, single precision) */
7325
7326 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
7327 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
7328 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
7329 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
7330 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
7331 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
7332 ok = True; isMVN = True; break;
7333
sewardj2b6fd5e2014-06-19 14:21:37 +00007334 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
7335 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
7336 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
7337 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
7338 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
7339 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
7340 ok = True; isBIC = True; break;
7341
sewardj787a67f2014-06-23 09:09:41 +00007342 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
7343 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
7344 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
7345 ok = True; isMVN = True; break;
7346
7347 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
7348 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
7349 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
7350 ok = True; isBIC = True; break;
7351
7352 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
7353 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
7354 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
7355 ok = True; isMVN = True; break;
7356
7357 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
7358 /* -------- 1,1,1110 MOVI 64-bit vector -------- */
7359 case BITS5(1,1,1,1,0):
7360 ok = True; isMOV = True; break;
7361
7362 /* -------- 1,1,1111 FMOV (vector, immediate) -------- */
7363 case BITS5(1,1,1,1,1): // 1:1111
7364 ok = bitQ == 1; isFMOV = True; break;
7365
sewardjdf1628c2014-06-10 22:52:05 +00007366 default:
7367 break;
7368 }
7369 if (ok) {
sewardj787a67f2014-06-23 09:09:41 +00007370 vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
7371 + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
sewardjdf1628c2014-06-10 22:52:05 +00007372 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
7373 }
7374 if (ok) {
sewardj2b6fd5e2014-06-19 14:21:37 +00007375 if (isORR || isBIC) {
7376 ULong inv
7377 = isORR ? 0ULL : ~0ULL;
7378 IRExpr* immV128
7379 = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
7380 IRExpr* res
7381 = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
sewardj2b6fd5e2014-06-19 14:21:37 +00007382 const HChar* nm = isORR ? "orr" : "bic";
7383 if (bitQ == 0) {
7384 putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
7385 DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
7386 } else {
7387 putQReg128(dd, res);
7388 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
7389 nameQReg128(dd), imm64lo, imm64lo);
7390 }
sewardj787a67f2014-06-23 09:09:41 +00007391 }
7392 else if (isMOV || isMVN || isFMOV) {
7393 if (isMVN) imm64lo = ~imm64lo;
7394 ULong imm64hi = bitQ == 0 ? 0 : imm64lo;
sewardj8e91fd42014-07-11 12:05:47 +00007395 IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
7396 mkU64(imm64lo));
sewardj2b6fd5e2014-06-19 14:21:37 +00007397 putQReg128(dd, immV128);
7398 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
7399 }
sewardjdf1628c2014-06-10 22:52:05 +00007400 return True;
7401 }
7402 /* else fall through */
7403
7404 return False;
7405# undef INSN
7406}
7407
7408
7409static
7410Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
7411{
sewardjab33a7a2014-06-19 22:20:47 +00007412 /* 31 28 20 15 14 10 9 4
7413 01 op 11110000 imm5 0 imm4 1 n d
7414 Decode fields: op,imm4
7415 */
sewardjdf1628c2014-06-10 22:52:05 +00007416# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjab33a7a2014-06-19 22:20:47 +00007417 if (INSN(31,30) != BITS2(0,1)
7418 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
7419 || INSN(15,15) != 0 || INSN(10,10) != 1) {
7420 return False;
7421 }
7422 UInt bitOP = INSN(29,29);
7423 UInt imm5 = INSN(20,16);
7424 UInt imm4 = INSN(14,11);
7425 UInt nn = INSN(9,5);
7426 UInt dd = INSN(4,0);
7427
7428 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
7429 /* -------- 0,0000 DUP (element, scalar) -------- */
7430 IRTemp w0 = newTemp(Ity_I64);
7431 const HChar* arTs = "??";
7432 IRType laneTy = Ity_INVALID;
7433 UInt laneNo = 16; /* invalid */
7434 if (imm5 & 1) {
7435 arTs = "b";
7436 laneNo = (imm5 >> 1) & 15;
7437 laneTy = Ity_I8;
7438 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
7439 }
7440 else if (imm5 & 2) {
7441 arTs = "h";
7442 laneNo = (imm5 >> 2) & 7;
7443 laneTy = Ity_I16;
7444 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
7445 }
7446 else if (imm5 & 4) {
7447 arTs = "s";
7448 laneNo = (imm5 >> 3) & 3;
7449 laneTy = Ity_I32;
7450 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
7451 }
7452 else if (imm5 & 8) {
7453 arTs = "d";
7454 laneNo = (imm5 >> 4) & 1;
7455 laneTy = Ity_I64;
7456 assign(w0, getQRegLane(nn, laneNo, laneTy));
7457 }
7458 else {
7459 /* invalid; leave laneTy unchanged. */
7460 }
7461 /* */
7462 if (laneTy != Ity_INVALID) {
7463 vassert(laneNo < 16);
7464 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
7465 DIP("dup %s, %s.%s[%u]\n",
7466 nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
7467 return True;
7468 }
7469 /* else fall through */
7470 }
7471
sewardjdf1628c2014-06-10 22:52:05 +00007472 return False;
7473# undef INSN
7474}
7475
sewardjfc83d2c2014-06-12 10:15:46 +00007476
sewardjdf1628c2014-06-10 22:52:05 +00007477static
7478Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn)
7479{
sewardjb9aff1e2014-06-15 21:55:33 +00007480 /* 31 28 23 21 16 11 9 4
7481 01 u 11110 sz 11000 opcode 10 n d
7482 Decode fields: u,sz,opcode
7483 */
sewardjdf1628c2014-06-10 22:52:05 +00007484# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjb9aff1e2014-06-15 21:55:33 +00007485 if (INSN(31,30) != BITS2(0,1)
7486 || INSN(28,24) != BITS5(1,1,1,1,0)
7487 || INSN(21,17) != BITS5(1,1,0,0,0)
7488 || INSN(11,10) != BITS2(1,0)) {
7489 return False;
7490 }
7491 UInt bitU = INSN(29,29);
7492 UInt sz = INSN(23,22);
7493 UInt opcode = INSN(16,12);
7494 UInt nn = INSN(9,5);
7495 UInt dd = INSN(4,0);
7496
7497 if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
7498 /* -------- 0,11,11011 ADDP d_2d -------- */
sewardj8e91fd42014-07-11 12:05:47 +00007499 IRTemp xy = newTempV128();
7500 IRTemp xx = newTempV128();
sewardjb9aff1e2014-06-15 21:55:33 +00007501 assign(xy, getQReg128(nn));
7502 assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
7503 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
7504 binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
7505 DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
7506 return True;
7507 }
7508
sewardjdf1628c2014-06-10 22:52:05 +00007509 return False;
7510# undef INSN
7511}
7512
sewardjfc83d2c2014-06-12 10:15:46 +00007513
sewardjdf1628c2014-06-10 22:52:05 +00007514static
7515Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
7516{
7517 /* 31 28 22 18 15 10 9 4
7518 01 u 111110 immh immb opcode 1 n d
7519 Decode fields: u,immh,opcode
7520 */
7521# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7522 if (INSN(31,30) != BITS2(0,1)
7523 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
7524 return False;
7525 }
7526 UInt bitU = INSN(29,29);
7527 UInt immh = INSN(22,19);
7528 UInt immb = INSN(18,16);
7529 UInt opcode = INSN(15,11);
7530 UInt nn = INSN(9,5);
7531 UInt dd = INSN(4,0);
7532 UInt immhb = (immh << 3) | immb;
7533
sewardjfc83d2c2014-06-12 10:15:46 +00007534 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,0,0,0,0)) {
7535 /* -------- 1,1xxx,00000 SHR d_d_#imm -------- */
7536 UInt sh = 128 - immhb;
7537 vassert(sh >= 1 && sh <= 64);
7538 /* Don't generate an out of range IR shift */
7539 putQReg128(dd, sh == 64
7540 ? mkV128(0x0000)
7541 : unop(Iop_ZeroHI64ofV128,
7542 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
7543 DIP("shr d%u, d%u, #%u\n", dd, nn, sh);
7544 return True;
7545 }
7546
sewardjdf1628c2014-06-10 22:52:05 +00007547 if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
7548 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
7549 UInt sh = immhb - 64;
7550 vassert(sh >= 0 && sh < 64);
sewardj8e91fd42014-07-11 12:05:47 +00007551 putQReg128(dd,
7552 unop(Iop_ZeroHI64ofV128,
7553 sh == 0 ? getQReg128(nn)
7554 : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
sewardjdf1628c2014-06-10 22:52:05 +00007555 DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
7556 return True;
7557 }
7558
sewardj8e91fd42014-07-11 12:05:47 +00007559 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
7560 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
7561 UInt sh = 128 - immhb;
7562 vassert(sh >= 1 && sh <= 64);
7563 if (sh == 64) {
7564 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
7565 } else {
7566 /* sh is in range 1 .. 63 */
7567 ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
7568 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
7569 IRTemp res = newTempV128();
7570 assign(res, binop(Iop_OrV128,
7571 binop(Iop_AndV128, getQReg128(dd), nmaskV),
7572 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
7573 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
7574 }
7575 DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
7576 return True;
7577 }
7578
7579 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
7580 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
7581 UInt sh = immhb - 64;
7582 vassert(sh >= 0 && sh < 64);
7583 if (sh == 0) {
7584 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
7585 } else {
7586 /* sh is in range 1 .. 63 */
7587 ULong nmask = (1ULL << sh) - 1;
7588 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
7589 IRTemp res = newTempV128();
7590 assign(res, binop(Iop_OrV128,
7591 binop(Iop_AndV128, getQReg128(dd), nmaskV),
7592 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
7593 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
7594 }
7595 DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
7596 return True;
7597 }
7598
sewardje741d162014-08-13 13:10:47 +00007599 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
7600 || (bitU == 1
7601 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
7602 /* -------- 0,10010 SQSHRN #imm -------- */
7603 /* -------- 1,10010 UQSHRN #imm -------- */
7604 /* -------- 0,10011 SQRSHRN #imm -------- */
7605 /* -------- 1,10011 UQRSHRN #imm -------- */
7606 /* -------- 1,10000 SQSHRUN #imm -------- */
7607 /* -------- 1,10001 SQRSHRUN #imm -------- */
7608 UInt size = 0;
7609 UInt shift = 0;
7610 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
7611 if (!ok || size == X11) return False;
7612 vassert(size >= X00 && size <= X10);
7613 vassert(shift >= 1 && shift <= (8 << size));
7614 const HChar* nm = "??";
7615 IROp op = Iop_INVALID;
7616 /* Decide on the name and the operation. */
7617 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
7618 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
7619 }
7620 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
7621 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
7622 }
7623 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
7624 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
7625 }
7626 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
7627 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
7628 }
7629 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
7630 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
7631 }
7632 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
7633 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
7634 }
7635 else vassert(0);
7636 /* Compute the result (Q, shifted value) pair. */
7637 IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
7638 IRTemp pair = newTempV128();
7639 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
7640 /* Update the result reg */
7641 IRTemp res64in128 = newTempV128();
7642 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
7643 putQReg128(dd, mkexpr(res64in128));
7644 /* Update the Q flag. */
7645 IRTemp q64q64 = newTempV128();
7646 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
7647 IRTemp z128 = newTempV128();
7648 assign(z128, mkV128(0x0000));
7649 updateQCFLAGwithDifference(q64q64, z128);
7650 /* */
7651 const HChar arrNarrow = "bhsd"[size];
7652 const HChar arrWide = "bhsd"[size+1];
7653 DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
7654 return True;
7655 }
7656
sewardjdf1628c2014-06-10 22:52:05 +00007657# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7658 return False;
7659# undef INSN
7660}
7661
sewardjfc83d2c2014-06-12 10:15:46 +00007662
sewardjdf1628c2014-06-10 22:52:05 +00007663static
7664Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
7665{
sewardj54ffa1d2014-07-22 09:27:49 +00007666 /* 31 29 28 23 21 20 15 11 9 4
7667 01 U 11110 size 1 m opcode 00 n d
7668 Decode fields: u,opcode
7669 */
sewardjdf1628c2014-06-10 22:52:05 +00007670# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj54ffa1d2014-07-22 09:27:49 +00007671 if (INSN(31,30) != BITS2(0,1)
7672 || INSN(28,24) != BITS5(1,1,1,1,0)
7673 || INSN(21,21) != 1
7674 || INSN(11,10) != BITS2(0,0)) {
7675 return False;
7676 }
7677 UInt bitU = INSN(29,29);
7678 UInt size = INSN(23,22);
7679 UInt mm = INSN(20,16);
7680 UInt opcode = INSN(15,12);
7681 UInt nn = INSN(9,5);
7682 UInt dd = INSN(4,0);
7683 vassert(size < 4);
7684
7685 if (bitU == 0
7686 && (opcode == BITS4(1,1,0,1)
7687 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
7688 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
7689 /* -------- 0,1001 SQDMLAL -------- */ // 1
7690 /* -------- 0,1011 SQDMLSL -------- */ // 2
7691 /* Widens, and size refers to the narrowed lanes. */
7692 UInt ks = 3;
7693 switch (opcode) {
7694 case BITS4(1,1,0,1): ks = 0; break;
7695 case BITS4(1,0,0,1): ks = 1; break;
7696 case BITS4(1,0,1,1): ks = 2; break;
7697 default: vassert(0);
7698 }
7699 vassert(ks >= 0 && ks <= 2);
7700 if (size == X00 || size == X11) return False;
7701 vassert(size <= 2);
7702 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
7703 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
7704 newTempsV128_3(&vecN, &vecM, &vecD);
7705 assign(vecN, getQReg128(nn));
7706 assign(vecM, getQReg128(mm));
7707 assign(vecD, getQReg128(dd));
7708 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
7709 False/*!is2*/, size, "mas"[ks],
7710 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
7711 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
7712 putQReg128(dd, unop(opZHI, mkexpr(res)));
7713 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
7714 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
7715 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
7716 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
7717 }
7718 const HChar* nm = ks == 0 ? "sqdmull"
7719 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
7720 const HChar arrNarrow = "bhsd"[size];
7721 const HChar arrWide = "bhsd"[size+1];
7722 DIP("%s %c%d, %c%d, %c%d\n",
7723 nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
7724 return True;
7725 }
7726
sewardjdf1628c2014-06-10 22:52:05 +00007727 return False;
7728# undef INSN
7729}
7730
7731
7732static
7733Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
7734{
7735 /* 31 29 28 23 21 20 15 10 9 4
7736 01 U 11110 size 1 m opcode 1 n d
sewardj51d012a2014-07-21 09:19:50 +00007737 Decode fields: u,size,opcode
sewardjdf1628c2014-06-10 22:52:05 +00007738 */
7739# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7740 if (INSN(31,30) != BITS2(0,1)
7741 || INSN(28,24) != BITS5(1,1,1,1,0)
7742 || INSN(21,21) != 1
7743 || INSN(10,10) != 1) {
7744 return False;
7745 }
7746 UInt bitU = INSN(29,29);
7747 UInt size = INSN(23,22);
7748 UInt mm = INSN(20,16);
7749 UInt opcode = INSN(15,11);
7750 UInt nn = INSN(9,5);
7751 UInt dd = INSN(4,0);
7752 vassert(size < 4);
7753
sewardj51d012a2014-07-21 09:19:50 +00007754 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
7755 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
7756 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
7757 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
7758 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
7759 Bool isADD = opcode == BITS5(0,0,0,0,1);
7760 Bool isU = bitU == 1;
7761 IROp qop = Iop_INVALID;
7762 IROp nop = Iop_INVALID;
7763 if (isADD) {
7764 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
7765 nop = mkVecADD(size);
7766 } else {
7767 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
7768 nop = mkVecSUB(size);
7769 }
7770 IRTemp argL = newTempV128();
7771 IRTemp argR = newTempV128();
7772 IRTemp qres = newTempV128();
7773 IRTemp nres = newTempV128();
7774 assign(argL, getQReg128(nn));
7775 assign(argR, getQReg128(mm));
7776 assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
sewardj257e99f2014-08-03 12:45:19 +00007777 size, binop(qop, mkexpr(argL), mkexpr(argR)))));
sewardj51d012a2014-07-21 09:19:50 +00007778 assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
sewardj257e99f2014-08-03 12:45:19 +00007779 size, binop(nop, mkexpr(argL), mkexpr(argR)))));
sewardj51d012a2014-07-21 09:19:50 +00007780 putQReg128(dd, mkexpr(qres));
7781 updateQCFLAGwithDifference(qres, nres);
7782 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
7783 : (isU ? "uqsub" : "sqsub");
7784 const HChar arr = "bhsd"[size];
sewardj12972182014-08-04 08:09:47 +00007785 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
sewardj51d012a2014-07-21 09:19:50 +00007786 return True;
7787 }
7788
sewardj2b6fd5e2014-06-19 14:21:37 +00007789 if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
7790 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
7791 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
7792 Bool isGT = bitU == 0;
7793 IRExpr* argL = getQReg128(nn);
7794 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00007795 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00007796 assign(res,
7797 isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
7798 : binop(Iop_CmpGT64Ux2, argL, argR));
7799 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
7800 DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
7801 nameQRegLO(dd, Ity_I64),
7802 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
7803 return True;
7804 }
7805
7806 if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
7807 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
7808 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
7809 Bool isGE = bitU == 0;
7810 IRExpr* argL = getQReg128(nn);
7811 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00007812 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00007813 assign(res,
7814 isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
7815 : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
7816 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
7817 DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
7818 nameQRegLO(dd, Ity_I64),
7819 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
7820 return True;
7821 }
7822
sewardj12972182014-08-04 08:09:47 +00007823 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
7824 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
7825 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
7826 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
7827 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
7828 Bool isU = bitU == 1;
7829 Bool isR = opcode == BITS5(0,1,0,1,1);
7830 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
7831 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
7832 /* This is a bit tricky. Since we're only interested in the lowest
7833 lane of the result, we zero out all the rest in the operands, so
7834 as to ensure that other lanes don't pollute the returned Q value.
7835 This works because it means, for the lanes we don't care about, we
7836 are shifting zero by zero, which can never saturate. */
7837 IRTemp res256 = newTemp(Ity_V256);
7838 IRTemp resSH = newTempV128();
7839 IRTemp resQ = newTempV128();
7840 IRTemp zero = newTempV128();
7841 assign(
7842 res256,
7843 binop(op,
7844 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
7845 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
7846 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
7847 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
7848 assign(zero, mkV128(0x0000));
7849 putQReg128(dd, mkexpr(resSH));
7850 updateQCFLAGwithDifference(resQ, zero);
7851 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
7852 : (isU ? "uqshl" : "sqshl");
7853 const HChar arr = "bhsd"[size];
7854 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
7855 return True;
7856 }
7857
sewardjdf1628c2014-06-10 22:52:05 +00007858 if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
7859 /* -------- 0,11,10000 ADD d_d_d -------- */
7860 /* -------- 1,11,10000 SUB d_d_d -------- */
7861 Bool isSUB = bitU == 1;
7862 IRTemp res = newTemp(Ity_I64);
7863 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
7864 getQRegLane(nn, 0, Ity_I64),
7865 getQRegLane(mm, 0, Ity_I64)));
7866 putQRegLane(dd, 0, mkexpr(res));
7867 putQRegLane(dd, 1, mkU64(0));
7868 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
7869 nameQRegLO(dd, Ity_I64),
7870 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
7871 return True;
7872 }
7873
sewardj2b6fd5e2014-06-19 14:21:37 +00007874 if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
7875 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
7876 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
7877 Bool isEQ = bitU == 1;
7878 IRExpr* argL = getQReg128(nn);
7879 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00007880 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00007881 assign(res,
7882 isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
7883 : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
7884 binop(Iop_AndV128, argL, argR),
7885 mkV128(0x0000))));
7886 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
7887 DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
7888 nameQRegLO(dd, Ity_I64),
7889 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
7890 return True;
7891 }
7892
sewardj257e99f2014-08-03 12:45:19 +00007893 if (opcode == BITS5(1,0,1,1,0)) {
7894 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
7895 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
7896 if (size == X00 || size == X11) return False;
7897 Bool isR = bitU == 1;
7898 IRTemp res, sat1q, sat1n, vN, vM;
7899 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
7900 newTempsV128_2(&vN, &vM);
7901 assign(vN, getQReg128(nn));
7902 assign(vM, getQReg128(mm));
7903 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
7904 putQReg128(dd,
7905 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
7906 updateQCFLAGwithDifference(
7907 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
7908 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
7909 const HChar arr = "bhsd"[size];
7910 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
7911 DIP("%s %c%d, %c%d, %c%d\n", nm, arr, dd, arr, nn, arr, mm);
7912 return True;
7913 }
7914
sewardjdf1628c2014-06-10 22:52:05 +00007915 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
7916 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
7917 IRType ity = size == X11 ? Ity_F64 : Ity_F32;
7918 IRTemp res = newTemp(ity);
7919 assign(res, unop(mkABSF(ity),
7920 triop(mkSUBF(ity),
7921 mkexpr(mk_get_IR_rounding_mode()),
7922 getQRegLO(nn,ity), getQRegLO(mm,ity))));
7923 putQReg128(dd, mkV128(0x0000));
7924 putQRegLO(dd, mkexpr(res));
7925 DIP("fabd %s, %s, %s\n",
7926 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
7927 return True;
7928 }
7929
sewardjdf1628c2014-06-10 22:52:05 +00007930 return False;
7931# undef INSN
7932}
7933
7934
7935static
7936Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
7937{
7938 /* 31 29 28 23 21 16 11 9 4
7939 01 U 11110 size 10000 opcode 10 n d
sewardj8e91fd42014-07-11 12:05:47 +00007940 Decode fields: u,size,opcode
sewardjdf1628c2014-06-10 22:52:05 +00007941 */
7942# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7943 if (INSN(31,30) != BITS2(0,1)
7944 || INSN(28,24) != BITS5(1,1,1,1,0)
7945 || INSN(21,17) != BITS5(1,0,0,0,0)
7946 || INSN(11,10) != BITS2(1,0)) {
7947 return False;
7948 }
7949 UInt bitU = INSN(29,29);
7950 UInt size = INSN(23,22);
7951 UInt opcode = INSN(16,12);
7952 UInt nn = INSN(9,5);
7953 UInt dd = INSN(4,0);
7954 vassert(size < 4);
7955
sewardj51d012a2014-07-21 09:19:50 +00007956 if (opcode == BITS5(0,0,1,1,1)) {
sewardj8e91fd42014-07-11 12:05:47 +00007957 /* -------- 0,xx,00111 SQABS std4_std4 -------- */
sewardj51d012a2014-07-21 09:19:50 +00007958 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
7959 Bool isNEG = bitU == 1;
7960 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
7961 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
7962 getQReg128(nn), size );
sewardj257e99f2014-08-03 12:45:19 +00007963 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
7964 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
sewardj8e91fd42014-07-11 12:05:47 +00007965 putQReg128(dd, mkexpr(qres));
7966 updateQCFLAGwithDifference(qres, nres);
7967 const HChar arr = "bhsd"[size];
sewardj51d012a2014-07-21 09:19:50 +00007968 DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
sewardj8e91fd42014-07-11 12:05:47 +00007969 return True;
7970 }
7971
sewardj2b6fd5e2014-06-19 14:21:37 +00007972 if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
7973 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
7974 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
7975 Bool isGT = bitU == 0;
7976 IRExpr* argL = getQReg128(nn);
7977 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +00007978 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00007979 assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
7980 : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
7981 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
7982 DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
7983 return True;
7984 }
7985
7986 if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
7987 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
7988 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
7989 Bool isEQ = bitU == 0;
7990 IRExpr* argL = getQReg128(nn);
7991 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +00007992 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00007993 assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
7994 : unop(Iop_NotV128,
7995 binop(Iop_CmpGT64Sx2, argL, argR)));
7996 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
7997 DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
7998 return True;
7999 }
8000
8001 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
8002 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
sewardjdf1628c2014-06-10 22:52:05 +00008003 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
sewardj2b6fd5e2014-06-19 14:21:37 +00008004 binop(Iop_CmpGT64Sx2, mkV128(0x0000),
8005 getQReg128(nn))));
8006 DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
sewardjdf1628c2014-06-10 22:52:05 +00008007 return True;
8008 }
8009
sewardj25523c42014-06-15 19:36:29 +00008010 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
8011 /* -------- 0,11,01011 ABS d_d -------- */
8012 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
8013 unop(Iop_Abs64x2, getQReg128(nn))));
8014 DIP("abs d%u, d%u\n", dd, nn);
8015 return True;
8016 }
8017
8018 if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
8019 /* -------- 1,11,01011 NEG d_d -------- */
8020 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
8021 binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
8022 DIP("neg d%u, d%u\n", dd, nn);
8023 return True;
8024 }
8025
sewardjecedd982014-08-11 14:02:47 +00008026 if (opcode == BITS5(1,0,1,0,0)
8027 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
8028 /* -------- 0,xx,10100: SQXTN -------- */
8029 /* -------- 1,xx,10100: UQXTN -------- */
8030 /* -------- 1,xx,10010: SQXTUN -------- */
8031 if (size == X11) return False;
8032 vassert(size < 3);
8033 IROp opN = Iop_INVALID;
8034 Bool zWiden = True;
8035 const HChar* nm = "??";
8036 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
8037 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
8038 }
8039 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
8040 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
8041 }
8042 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
8043 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
8044 }
8045 else vassert(0);
8046 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
8047 size+1, getQReg128(nn));
8048 IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
8049 size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
8050 putQReg128(dd, mkexpr(resN));
8051 /* This widens zero lanes to zero, and compares it against zero, so all
8052 of the non-participating lanes make no contribution to the
8053 Q flag state. */
8054 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
8055 size, mkexpr(resN));
8056 updateQCFLAGwithDifference(src, resW);
8057 const HChar arrNarrow = "bhsd"[size];
8058 const HChar arrWide = "bhsd"[size+1];
8059 DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
8060 return True;
8061 }
8062
sewardjdf1628c2014-06-10 22:52:05 +00008063# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8064 return False;
8065# undef INSN
8066}
8067
sewardjfc83d2c2014-06-12 10:15:46 +00008068
sewardjdf1628c2014-06-10 22:52:05 +00008069static
8070Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
8071{
sewardj54ffa1d2014-07-22 09:27:49 +00008072 /* 31 28 23 21 20 19 15 11 9 4
8073 01 U 11111 size L M m opcode H 0 n d
8074 Decode fields are: u,size,opcode
8075 M is really part of the mm register number. Individual
8076 cases need to inspect L and H though.
8077 */
sewardjdf1628c2014-06-10 22:52:05 +00008078# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj54ffa1d2014-07-22 09:27:49 +00008079 if (INSN(31,30) != BITS2(0,1)
8080 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) {
8081 return False;
8082 }
8083 UInt bitU = INSN(29,29);
8084 UInt size = INSN(23,22);
8085 UInt bitL = INSN(21,21);
8086 UInt bitM = INSN(20,20);
8087 UInt mmLO4 = INSN(19,16);
8088 UInt opcode = INSN(15,12);
8089 UInt bitH = INSN(11,11);
8090 UInt nn = INSN(9,5);
8091 UInt dd = INSN(4,0);
8092 vassert(size < 4);
8093 vassert(bitH < 2 && bitM < 2 && bitL < 2);
8094
8095 if (bitU == 0
8096 && (opcode == BITS4(1,0,1,1)
8097 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
8098 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
8099 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
8100 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
8101 /* Widens, and size refers to the narrowed lanes. */
8102 UInt ks = 3;
8103 switch (opcode) {
8104 case BITS4(1,0,1,1): ks = 0; break;
8105 case BITS4(0,0,1,1): ks = 1; break;
8106 case BITS4(0,1,1,1): ks = 2; break;
8107 default: vassert(0);
8108 }
8109 vassert(ks >= 0 && ks <= 2);
8110 UInt mm = 32; // invalid
8111 UInt ix = 16; // invalid
8112 switch (size) {
8113 case X00:
8114 return False; // h_b_b[] case is not allowed
8115 case X01:
8116 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
8117 case X10:
8118 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
8119 case X11:
8120 return False; // q_d_d[] case is not allowed
8121 default:
8122 vassert(0);
8123 }
8124 vassert(mm < 32 && ix < 16);
8125 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
8126 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
8127 newTempsV128_2(&vecN, &vecD);
8128 assign(vecN, getQReg128(nn));
8129 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
8130 assign(vecD, getQReg128(dd));
8131 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
8132 False/*!is2*/, size, "mas"[ks],
8133 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
8134 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
8135 putQReg128(dd, unop(opZHI, mkexpr(res)));
8136 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
8137 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
8138 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
8139 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
8140 }
8141 const HChar* nm = ks == 0 ? "sqmull"
8142 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
8143 const HChar arrNarrow = "bhsd"[size];
8144 const HChar arrWide = "bhsd"[size+1];
8145 DIP("%s %c%d, %c%d, v%d.%c[%u]\n",
8146 nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
8147 return True;
8148 }
8149
sewardj257e99f2014-08-03 12:45:19 +00008150 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
8151 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
8152 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
8153 UInt mm = 32; // invalid
8154 UInt ix = 16; // invalid
8155 switch (size) {
8156 case X00:
8157 return False; // b case is not allowed
8158 case X01:
8159 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
8160 case X10:
8161 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
8162 case X11:
8163 return False; // q case is not allowed
8164 default:
8165 vassert(0);
8166 }
8167 vassert(mm < 32 && ix < 16);
8168 Bool isR = opcode == BITS4(1,1,0,1);
8169 IRTemp res, sat1q, sat1n, vN, vM;
8170 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
8171 vN = newTempV128();
8172 assign(vN, getQReg128(nn));
8173 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
8174 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
8175 IROp opZHI = mkVecZEROHIxxOFV128(size);
8176 putQReg128(dd, unop(opZHI, mkexpr(res)));
8177 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
8178 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
8179 HChar ch = size == X01 ? 'h' : 's';
8180 DIP("%s %c%d, %c%d, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, dd, ix);
8181 return True;
8182 }
8183
sewardjdf1628c2014-06-10 22:52:05 +00008184 return False;
8185# undef INSN
8186}
8187
sewardjfc83d2c2014-06-12 10:15:46 +00008188
sewardjdf1628c2014-06-10 22:52:05 +00008189static
8190Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
8191{
8192 /* 31 28 22 18 15 10 9 4
8193 0 q u 011110 immh immb opcode 1 n d
8194 Decode fields: u,opcode
8195 */
8196# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8197 if (INSN(31,31) != 0
8198 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
8199 return False;
8200 }
8201 UInt bitQ = INSN(30,30);
8202 UInt bitU = INSN(29,29);
8203 UInt immh = INSN(22,19);
8204 UInt immb = INSN(18,16);
8205 UInt opcode = INSN(15,11);
8206 UInt nn = INSN(9,5);
8207 UInt dd = INSN(4,0);
8208
8209 if (opcode == BITS5(0,0,0,0,0)) {
8210 /* -------- 0,00000 SSHR std7_std7_#imm -------- */
8211 /* -------- 1,00000 USHR std7_std7_#imm -------- */
8212 /* laneTy, shift = case immh:immb of
8213 0001:xxx -> B, SHR:8-xxx
8214 001x:xxx -> H, SHR:16-xxxx
8215 01xx:xxx -> S, SHR:32-xxxxx
8216 1xxx:xxx -> D, SHR:64-xxxxxx
8217 other -> invalid
8218 */
sewardjdf1628c2014-06-10 22:52:05 +00008219 UInt size = 0;
8220 UInt shift = 0;
8221 Bool isQ = bitQ == 1;
8222 Bool isU = bitU == 1;
8223 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
sewardj8e91fd42014-07-11 12:05:47 +00008224 if (!ok || (bitQ == 0 && size == X11)) return False;
sewardjdf1628c2014-06-10 22:52:05 +00008225 vassert(size >= 0 && size <= 3);
sewardj8e91fd42014-07-11 12:05:47 +00008226 UInt lanebits = 8 << size;
8227 vassert(shift >= 1 && shift <= lanebits);
8228 IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size);
8229 IRExpr* src = getQReg128(nn);
8230 IRTemp res = newTempV128();
8231 if (shift == lanebits && isU) {
8232 assign(res, mkV128(0x0000));
8233 } else {
8234 UInt nudge = 0;
8235 if (shift == lanebits) {
8236 vassert(!isU);
8237 nudge = 1;
8238 }
8239 assign(res, binop(op, src, mkU8(shift - nudge)));
sewardjdf1628c2014-06-10 22:52:05 +00008240 }
sewardj8e91fd42014-07-11 12:05:47 +00008241 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8242 HChar laneCh = "bhsd"[size];
8243 UInt nLanes = (isQ ? 128 : 64) / lanebits;
8244 const HChar* nm = isU ? "ushr" : "sshr";
8245 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
8246 nameQReg128(dd), nLanes, laneCh,
8247 nameQReg128(nn), nLanes, laneCh, shift);
8248 return True;
sewardjdf1628c2014-06-10 22:52:05 +00008249 }
8250
sewardj8e91fd42014-07-11 12:05:47 +00008251 if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
8252 /* -------- 1,01000 SRI std7_std7_#imm -------- */
8253 /* laneTy, shift = case immh:immb of
8254 0001:xxx -> B, SHR:8-xxx
8255 001x:xxx -> H, SHR:16-xxxx
8256 01xx:xxx -> S, SHR:32-xxxxx
8257 1xxx:xxx -> D, SHR:64-xxxxxx
8258 other -> invalid
8259 */
8260 UInt size = 0;
8261 UInt shift = 0;
8262 Bool isQ = bitQ == 1;
8263 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
8264 if (!ok || (bitQ == 0 && size == X11)) return False;
8265 vassert(size >= 0 && size <= 3);
8266 UInt lanebits = 8 << size;
8267 vassert(shift >= 1 && shift <= lanebits);
8268 IRExpr* src = getQReg128(nn);
8269 IRTemp res = newTempV128();
8270 if (shift == lanebits) {
8271 assign(res, getQReg128(dd));
8272 } else {
8273 assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
8274 IRExpr* nmask = binop(mkVecSHLN(size),
8275 mkV128(0xFFFF), mkU8(lanebits - shift));
8276 IRTemp tmp = newTempV128();
8277 assign(tmp, binop(Iop_OrV128,
8278 mkexpr(res),
8279 binop(Iop_AndV128, getQReg128(dd), nmask)));
8280 res = tmp;
8281 }
8282 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8283 HChar laneCh = "bhsd"[size];
8284 UInt nLanes = (isQ ? 128 : 64) / lanebits;
8285 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
8286 nameQReg128(dd), nLanes, laneCh,
8287 nameQReg128(nn), nLanes, laneCh, shift);
8288 return True;
8289 }
8290
8291 if (opcode == BITS5(0,1,0,1,0)) {
sewardjdf1628c2014-06-10 22:52:05 +00008292 /* -------- 0,01010 SHL std7_std7_#imm -------- */
sewardj8e91fd42014-07-11 12:05:47 +00008293 /* -------- 1,01010 SLI std7_std7_#imm -------- */
sewardjdf1628c2014-06-10 22:52:05 +00008294 /* laneTy, shift = case immh:immb of
8295 0001:xxx -> B, xxx
8296 001x:xxx -> H, xxxx
8297 01xx:xxx -> S, xxxxx
8298 1xxx:xxx -> D, xxxxxx
8299 other -> invalid
8300 */
sewardjdf1628c2014-06-10 22:52:05 +00008301 UInt size = 0;
8302 UInt shift = 0;
sewardj8e91fd42014-07-11 12:05:47 +00008303 Bool isSLI = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +00008304 Bool isQ = bitQ == 1;
8305 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
sewardj8e91fd42014-07-11 12:05:47 +00008306 if (!ok || (bitQ == 0 && size == X11)) return False;
sewardjdf1628c2014-06-10 22:52:05 +00008307 vassert(size >= 0 && size <= 3);
8308 /* The shift encoding has opposite sign for the leftwards case.
8309 Adjust shift to compensate. */
sewardj8e91fd42014-07-11 12:05:47 +00008310 UInt lanebits = 8 << size;
8311 shift = lanebits - shift;
8312 vassert(shift >= 0 && shift < lanebits);
8313 IROp op = mkVecSHLN(size);
8314 IRExpr* src = getQReg128(nn);
8315 IRTemp res = newTempV128();
8316 if (shift == 0) {
8317 assign(res, src);
8318 } else {
sewardjdf9d6d52014-06-27 10:43:22 +00008319 assign(res, binop(op, src, mkU8(shift)));
sewardj8e91fd42014-07-11 12:05:47 +00008320 if (isSLI) {
8321 IRExpr* nmask = binop(mkVecSHRN(size),
8322 mkV128(0xFFFF), mkU8(lanebits - shift));
8323 IRTemp tmp = newTempV128();
8324 assign(tmp, binop(Iop_OrV128,
8325 mkexpr(res),
8326 binop(Iop_AndV128, getQReg128(dd), nmask)));
8327 res = tmp;
8328 }
sewardjdf1628c2014-06-10 22:52:05 +00008329 }
sewardj8e91fd42014-07-11 12:05:47 +00008330 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8331 HChar laneCh = "bhsd"[size];
8332 UInt nLanes = (isQ ? 128 : 64) / lanebits;
8333 const HChar* nm = isSLI ? "sli" : "shl";
8334 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
8335 nameQReg128(dd), nLanes, laneCh,
8336 nameQReg128(nn), nLanes, laneCh, shift);
8337 return True;
sewardjdf1628c2014-06-10 22:52:05 +00008338 }
8339
sewardja97dddf2014-08-14 22:26:52 +00008340 if (opcode == BITS5(0,1,1,1,0)
8341 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
8342 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
8343 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
8344 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
8345 UInt size = 0;
8346 UInt shift = 0;
8347 Bool isQ = bitQ == 1;
8348 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
8349 if (!ok || (bitQ == 0 && size == X11)) return False;
8350 vassert(size >= 0 && size <= 3);
8351 /* The shift encoding has opposite sign for the leftwards case.
8352 Adjust shift to compensate. */
8353 UInt lanebits = 8 << size;
8354 shift = lanebits - shift;
8355 vassert(shift >= 0 && shift < lanebits);
8356 const HChar* nm = NULL;
8357 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
8358 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
8359 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
8360 else vassert(0);
8361 IRTemp qDiff1 = IRTemp_INVALID;
8362 IRTemp qDiff2 = IRTemp_INVALID;
8363 IRTemp res = IRTemp_INVALID;
8364 IRTemp src = newTempV128();
8365 assign(src, getQReg128(nn));
8366 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
8367 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8368 updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
8369 isQ ? Iop_ZeroHI64ofV128 : Iop_INVALID);
8370 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8371 DIP("%s %s.%s, %s.%s, #%u\n", nm,
8372 nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
8373 return True;
8374 }
8375
sewardj487559e2014-07-10 14:22:45 +00008376 if (bitU == 0
8377 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
8378 /* -------- 0,10000 SHRN{,2} #imm -------- */
8379 /* -------- 0,10001 RSHRN{,2} #imm -------- */
8380 /* Narrows, and size is the narrow size. */
8381 UInt size = 0;
8382 UInt shift = 0;
8383 Bool is2 = bitQ == 1;
8384 Bool isR = opcode == BITS5(1,0,0,0,1);
8385 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
8386 if (!ok || size == X11) return False;
8387 vassert(shift >= 1);
sewardj8e91fd42014-07-11 12:05:47 +00008388 IRTemp t1 = newTempV128();
8389 IRTemp t2 = newTempV128();
8390 IRTemp t3 = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00008391 assign(t1, getQReg128(nn));
8392 assign(t2, isR ? binop(mkVecADD(size+1),
8393 mkexpr(t1),
8394 mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
8395 : mkexpr(t1));
8396 assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
8397 IRTemp t4 = math_NARROW_LANES(t3, t3, size);
8398 putLO64andZUorPutHI64(is2, dd, t4);
8399 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8400 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8401 DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
8402 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
8403 return True;
8404 }
8405
sewardjecedd982014-08-11 14:02:47 +00008406 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
8407 || (bitU == 1
8408 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
8409 /* -------- 0,10010 SQSHRN{,2} #imm -------- */
8410 /* -------- 1,10010 UQSHRN{,2} #imm -------- */
8411 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
8412 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
8413 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
8414 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
8415 UInt size = 0;
8416 UInt shift = 0;
8417 Bool is2 = bitQ == 1;
8418 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
8419 if (!ok || size == X11) return False;
8420 vassert(shift >= 1 && shift <= (8 << size));
8421 const HChar* nm = "??";
8422 IROp op = Iop_INVALID;
8423 /* Decide on the name and the operation. */
8424 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
8425 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
8426 }
8427 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
8428 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
8429 }
8430 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
8431 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
8432 }
8433 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
8434 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
8435 }
8436 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
8437 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
8438 }
8439 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
8440 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
8441 }
8442 else vassert(0);
8443 /* Compute the result (Q, shifted value) pair. */
8444 IRTemp src128 = newTempV128();
8445 assign(src128, getQReg128(nn));
8446 IRTemp pair = newTempV128();
8447 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
8448 /* Update the result reg */
8449 IRTemp res64in128 = newTempV128();
8450 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
8451 putLO64andZUorPutHI64(is2, dd, res64in128);
8452 /* Update the Q flag. */
8453 IRTemp q64q64 = newTempV128();
8454 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
8455 IRTemp z128 = newTempV128();
8456 assign(z128, mkV128(0x0000));
8457 updateQCFLAGwithDifference(q64q64, z128);
8458 /* */
8459 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8460 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8461 DIP("%s %s.%s, %s.%s, #%u\n", nm,
8462 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
8463 return True;
8464 }
8465
sewardjdf1628c2014-06-10 22:52:05 +00008466 if (opcode == BITS5(1,0,1,0,0)) {
8467 /* -------- 0,10100 SSHLL{,2} #imm -------- */
8468 /* -------- 1,10100 USHLL{,2} #imm -------- */
8469 /* 31 28 22 18 15 9 4
8470 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
8471 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
8472 where Ta,Tb,sh
8473 = case immh of 1xxx -> invalid
8474 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
8475 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
8476 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
8477 0000 -> AdvSIMD modified immediate (???)
8478 */
8479 Bool isQ = bitQ == 1;
8480 Bool isU = bitU == 1;
8481 UInt immhb = (immh << 3) | immb;
sewardj8e91fd42014-07-11 12:05:47 +00008482 IRTemp src = newTempV128();
8483 IRTemp zero = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008484 IRExpr* res = NULL;
8485 UInt sh = 0;
8486 const HChar* ta = "??";
8487 const HChar* tb = "??";
8488 assign(src, getQReg128(nn));
8489 assign(zero, mkV128(0x0000));
8490 if (immh & 8) {
8491 /* invalid; don't assign to res */
8492 }
8493 else if (immh & 4) {
8494 sh = immhb - 32;
8495 vassert(sh < 32); /* so 32-sh is 1..32 */
8496 ta = "2d";
8497 tb = isQ ? "4s" : "2s";
8498 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
8499 : mk_InterleaveLO32x4(src, zero);
8500 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
8501 }
8502 else if (immh & 2) {
8503 sh = immhb - 16;
8504 vassert(sh < 16); /* so 16-sh is 1..16 */
8505 ta = "4s";
8506 tb = isQ ? "8h" : "4h";
8507 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
8508 : mk_InterleaveLO16x8(src, zero);
8509 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
8510 }
8511 else if (immh & 1) {
8512 sh = immhb - 8;
8513 vassert(sh < 8); /* so 8-sh is 1..8 */
8514 ta = "8h";
8515 tb = isQ ? "16b" : "8b";
8516 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
8517 : mk_InterleaveLO8x16(src, zero);
8518 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
8519 } else {
8520 vassert(immh == 0);
8521 /* invalid; don't assign to res */
8522 }
8523 /* */
8524 if (res) {
8525 putQReg128(dd, res);
8526 DIP("%cshll%s %s.%s, %s.%s, #%d\n",
8527 isU ? 'u' : 's', isQ ? "2" : "",
8528 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
8529 return True;
8530 }
8531 return False;
8532 }
8533
8534# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8535 return False;
8536# undef INSN
8537}
8538
sewardjfc83d2c2014-06-12 10:15:46 +00008539
sewardjdf1628c2014-06-10 22:52:05 +00008540static
8541Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
8542{
sewardj25523c42014-06-15 19:36:29 +00008543 /* 31 30 29 28 23 21 20 15 11 9 4
8544 0 Q U 01110 size 1 m opcode 00 n d
8545 Decode fields: u,opcode
8546 */
sewardjdf1628c2014-06-10 22:52:05 +00008547# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj25523c42014-06-15 19:36:29 +00008548 if (INSN(31,31) != 0
8549 || INSN(28,24) != BITS5(0,1,1,1,0)
8550 || INSN(21,21) != 1
8551 || INSN(11,10) != BITS2(0,0)) {
8552 return False;
8553 }
8554 UInt bitQ = INSN(30,30);
8555 UInt bitU = INSN(29,29);
8556 UInt size = INSN(23,22);
8557 UInt mm = INSN(20,16);
8558 UInt opcode = INSN(15,12);
8559 UInt nn = INSN(9,5);
8560 UInt dd = INSN(4,0);
8561 vassert(size < 4);
8562 Bool is2 = bitQ == 1;
8563
sewardj6f312d02014-06-28 12:21:37 +00008564 if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
8565 /* -------- 0,0000 SADDL{2} -------- */
8566 /* -------- 1,0000 UADDL{2} -------- */
8567 /* -------- 0,0010 SSUBL{2} -------- */
8568 /* -------- 1,0010 USUBL{2} -------- */
8569 /* Widens, and size refers to the narrowed lanes. */
sewardj6f312d02014-06-28 12:21:37 +00008570 if (size == X11) return False;
8571 vassert(size <= 2);
8572 Bool isU = bitU == 1;
8573 Bool isADD = opcode == BITS4(0,0,0,0);
sewardja5a6b752014-06-30 07:33:56 +00008574 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
8575 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +00008576 IRTemp res = newTempV128();
sewardj54ffa1d2014-07-22 09:27:49 +00008577 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
sewardj6f312d02014-06-28 12:21:37 +00008578 mkexpr(argL), mkexpr(argR)));
8579 putQReg128(dd, mkexpr(res));
8580 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8581 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8582 const HChar* nm = isADD ? (isU ? "uaddl" : "saddl")
8583 : (isU ? "usubl" : "ssubl");
8584 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
8585 nameQReg128(dd), arrWide,
8586 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
8587 return True;
8588 }
8589
sewardja5a6b752014-06-30 07:33:56 +00008590 if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
8591 /* -------- 0,0001 SADDW{2} -------- */
8592 /* -------- 1,0001 UADDW{2} -------- */
8593 /* -------- 0,0011 SSUBW{2} -------- */
8594 /* -------- 1,0011 USUBW{2} -------- */
8595 /* Widens, and size refers to the narrowed lanes. */
8596 if (size == X11) return False;
8597 vassert(size <= 2);
8598 Bool isU = bitU == 1;
8599 Bool isADD = opcode == BITS4(0,0,0,1);
8600 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +00008601 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00008602 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
8603 getQReg128(nn), mkexpr(argR)));
8604 putQReg128(dd, mkexpr(res));
8605 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8606 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8607 const HChar* nm = isADD ? (isU ? "uaddw" : "saddw")
8608 : (isU ? "usubw" : "ssubw");
8609 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
8610 nameQReg128(dd), arrWide,
8611 nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
8612 return True;
8613 }
8614
sewardj25523c42014-06-15 19:36:29 +00008615 if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
8616 /* -------- 0,0100 ADDHN{2} -------- */
8617 /* -------- 1,0100 RADDHN{2} -------- */
8618 /* -------- 0,0110 SUBHN{2} -------- */
8619 /* -------- 1,0110 RSUBHN{2} -------- */
8620 /* Narrows, and size refers to the narrowed lanes. */
8621 if (size == X11) return False;
8622 vassert(size <= 2);
sewardj487559e2014-07-10 14:22:45 +00008623 const UInt shift[3] = { 8, 16, 32 };
sewardj25523c42014-06-15 19:36:29 +00008624 Bool isADD = opcode == BITS4(0,1,0,0);
8625 Bool isR = bitU == 1;
8626 /* Combined elements in wide lanes */
sewardj8e91fd42014-07-11 12:05:47 +00008627 IRTemp wide = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00008628 IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
sewardj25523c42014-06-15 19:36:29 +00008629 getQReg128(nn), getQReg128(mm));
8630 if (isR) {
sewardj487559e2014-07-10 14:22:45 +00008631 wideE = binop(mkVecADD(size+1),
8632 wideE,
8633 mkexpr(math_VEC_DUP_IMM(size+1,
8634 1ULL << (shift[size]-1))));
sewardj25523c42014-06-15 19:36:29 +00008635 }
8636 assign(wide, wideE);
8637 /* Top halves of elements, still in wide lanes */
sewardj8e91fd42014-07-11 12:05:47 +00008638 IRTemp shrd = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00008639 assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
sewardj25523c42014-06-15 19:36:29 +00008640 /* Elements now compacted into lower 64 bits */
sewardj8e91fd42014-07-11 12:05:47 +00008641 IRTemp new64 = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00008642 assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
sewardj25523c42014-06-15 19:36:29 +00008643 putLO64andZUorPutHI64(is2, dd, new64);
8644 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8645 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8646 const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
8647 : (isR ? "rsubhn" : "subhn");
8648 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
8649 nameQReg128(dd), arrNarrow,
8650 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
8651 return True;
8652 }
8653
sewardj6f312d02014-06-28 12:21:37 +00008654 if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
8655 /* -------- 0,0101 SABAL{2} -------- */
8656 /* -------- 1,0101 UABAL{2} -------- */
8657 /* -------- 0,0111 SABDL{2} -------- */
8658 /* -------- 1,0111 UABDL{2} -------- */
8659 /* Widens, and size refers to the narrowed lanes. */
sewardj6f312d02014-06-28 12:21:37 +00008660 if (size == X11) return False;
8661 vassert(size <= 2);
8662 Bool isU = bitU == 1;
8663 Bool isACC = opcode == BITS4(0,1,0,1);
sewardja5a6b752014-06-30 07:33:56 +00008664 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
8665 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj6f312d02014-06-28 12:21:37 +00008666 IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
sewardj8e91fd42014-07-11 12:05:47 +00008667 IRTemp res = newTempV128();
8668 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
sewardj6f312d02014-06-28 12:21:37 +00008669 : mkexpr(abd));
8670 putQReg128(dd, mkexpr(res));
8671 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8672 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8673 const HChar* nm = isACC ? (isU ? "uabal" : "sabal")
8674 : (isU ? "uabdl" : "sabdl");
8675 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
8676 nameQReg128(dd), arrWide,
8677 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
8678 return True;
8679 }
8680
8681 if (opcode == BITS4(1,1,0,0)
8682 || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
sewardj487559e2014-07-10 14:22:45 +00008683 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
sewardj6f312d02014-06-28 12:21:37 +00008684 /* -------- 1,1100 UMULL{2} -------- */ // 0
8685 /* -------- 0,1000 SMLAL{2} -------- */ // 1
8686 /* -------- 1,1000 UMLAL{2} -------- */ // 1
8687 /* -------- 0,1010 SMLSL{2} -------- */ // 2
8688 /* -------- 1,1010 UMLSL{2} -------- */ // 2
8689 /* Widens, and size refers to the narrowed lanes. */
sewardj487559e2014-07-10 14:22:45 +00008690 UInt ks = 3;
sewardj6f312d02014-06-28 12:21:37 +00008691 switch (opcode) {
sewardj487559e2014-07-10 14:22:45 +00008692 case BITS4(1,1,0,0): ks = 0; break;
8693 case BITS4(1,0,0,0): ks = 1; break;
8694 case BITS4(1,0,1,0): ks = 2; break;
sewardj6f312d02014-06-28 12:21:37 +00008695 default: vassert(0);
8696 }
sewardj487559e2014-07-10 14:22:45 +00008697 vassert(ks >= 0 && ks <= 2);
sewardj6f312d02014-06-28 12:21:37 +00008698 if (size == X11) return False;
8699 vassert(size <= 2);
sewardj51d012a2014-07-21 09:19:50 +00008700 Bool isU = bitU == 1;
8701 IRTemp vecN = newTempV128();
8702 IRTemp vecM = newTempV128();
8703 IRTemp vecD = newTempV128();
8704 assign(vecN, getQReg128(nn));
8705 assign(vecM, getQReg128(mm));
8706 assign(vecD, getQReg128(dd));
8707 IRTemp res = IRTemp_INVALID;
8708 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
8709 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
sewardj6f312d02014-06-28 12:21:37 +00008710 putQReg128(dd, mkexpr(res));
8711 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8712 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
sewardj487559e2014-07-10 14:22:45 +00008713 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
sewardj6f312d02014-06-28 12:21:37 +00008714 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
8715 nameQReg128(dd), arrWide,
8716 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
8717 return True;
8718 }
8719
sewardj54ffa1d2014-07-22 09:27:49 +00008720 if (bitU == 0
8721 && (opcode == BITS4(1,1,0,1)
8722 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
8723 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
8724 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1
8725 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2
8726 /* Widens, and size refers to the narrowed lanes. */
8727 UInt ks = 3;
8728 switch (opcode) {
8729 case BITS4(1,1,0,1): ks = 0; break;
8730 case BITS4(1,0,0,1): ks = 1; break;
8731 case BITS4(1,0,1,1): ks = 2; break;
8732 default: vassert(0);
8733 }
8734 vassert(ks >= 0 && ks <= 2);
8735 if (size == X00 || size == X11) return False;
8736 vassert(size <= 2);
8737 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
8738 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
8739 newTempsV128_3(&vecN, &vecM, &vecD);
8740 assign(vecN, getQReg128(nn));
8741 assign(vecM, getQReg128(mm));
8742 assign(vecD, getQReg128(dd));
8743 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
8744 is2, size, "mas"[ks],
8745 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
8746 putQReg128(dd, mkexpr(res));
8747 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
8748 updateQCFLAGwithDifference(sat1q, sat1n);
8749 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
8750 updateQCFLAGwithDifference(sat2q, sat2n);
8751 }
8752 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8753 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8754 const HChar* nm = ks == 0 ? "sqdmull"
8755 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
8756 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
8757 nameQReg128(dd), arrWide,
8758 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
8759 return True;
8760 }
8761
sewardj31b5a952014-06-26 07:41:14 +00008762 if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
8763 /* -------- 0,1110 PMULL{2} -------- */
sewardj6f312d02014-06-28 12:21:37 +00008764 /* Widens, and size refers to the narrowed lanes. */
sewardj31b5a952014-06-26 07:41:14 +00008765 if (size != X00) return False;
8766 IRTemp res
8767 = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
8768 getQReg128(nn), getQReg128(mm));
8769 putQReg128(dd, mkexpr(res));
8770 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8771 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8772 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
8773 nameQReg128(dd), arrNarrow,
8774 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
8775 return True;
8776 }
8777
sewardjdf1628c2014-06-10 22:52:05 +00008778 return False;
8779# undef INSN
8780}
8781
8782
8783static
8784Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
8785{
8786 /* 31 30 29 28 23 21 20 15 10 9 4
8787 0 Q U 01110 size 1 m opcode 1 n d
8788 Decode fields: u,size,opcode
8789 */
8790# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8791 if (INSN(31,31) != 0
8792 || INSN(28,24) != BITS5(0,1,1,1,0)
8793 || INSN(21,21) != 1
8794 || INSN(10,10) != 1) {
8795 return False;
8796 }
8797 UInt bitQ = INSN(30,30);
8798 UInt bitU = INSN(29,29);
8799 UInt size = INSN(23,22);
8800 UInt mm = INSN(20,16);
8801 UInt opcode = INSN(15,11);
8802 UInt nn = INSN(9,5);
8803 UInt dd = INSN(4,0);
8804 vassert(size < 4);
8805
sewardja5a6b752014-06-30 07:33:56 +00008806 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
8807 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
8808 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
8809 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
8810 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
8811 if (size == X11) return False;
8812 Bool isADD = opcode == BITS5(0,0,0,0,0);
8813 Bool isU = bitU == 1;
8814 /* Widen both args out, do the math, narrow to final result. */
sewardj8e91fd42014-07-11 12:05:47 +00008815 IRTemp argL = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00008816 IRTemp argLhi = IRTemp_INVALID;
8817 IRTemp argLlo = IRTemp_INVALID;
sewardj8e91fd42014-07-11 12:05:47 +00008818 IRTemp argR = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00008819 IRTemp argRhi = IRTemp_INVALID;
8820 IRTemp argRlo = IRTemp_INVALID;
sewardj8e91fd42014-07-11 12:05:47 +00008821 IRTemp resHi = newTempV128();
8822 IRTemp resLo = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00008823 IRTemp res = IRTemp_INVALID;
8824 assign(argL, getQReg128(nn));
8825 argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
8826 argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL));
8827 assign(argR, getQReg128(mm));
8828 argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
8829 argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR));
8830 IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
8831 IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
8832 assign(resHi, binop(opSxR,
8833 binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
8834 mkU8(1)));
8835 assign(resLo, binop(opSxR,
8836 binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
8837 mkU8(1)));
8838 res = math_NARROW_LANES ( resHi, resLo, size );
8839 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8840 const HChar* nm = isADD ? (isU ? "uhadd" : "shadd")
8841 : (isU ? "uhsub" : "shsub");
8842 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8843 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8844 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8845 return True;
8846 }
8847
8848 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
8849 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
8850 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
8851 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
8852 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
8853 if (bitQ == 0 && size == X11) return False; // implied 1d case
8854 Bool isADD = opcode == BITS5(0,0,0,0,1);
8855 Bool isU = bitU == 1;
8856 IROp qop = Iop_INVALID;
8857 IROp nop = Iop_INVALID;
8858 if (isADD) {
8859 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
8860 nop = mkVecADD(size);
8861 } else {
8862 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
8863 nop = mkVecSUB(size);
8864 }
sewardj8e91fd42014-07-11 12:05:47 +00008865 IRTemp argL = newTempV128();
8866 IRTemp argR = newTempV128();
8867 IRTemp qres = newTempV128();
8868 IRTemp nres = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00008869 assign(argL, getQReg128(nn));
8870 assign(argR, getQReg128(mm));
8871 assign(qres, math_MAYBE_ZERO_HI64_fromE(
8872 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
8873 assign(nres, math_MAYBE_ZERO_HI64_fromE(
8874 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
8875 putQReg128(dd, mkexpr(qres));
sewardj8e91fd42014-07-11 12:05:47 +00008876 updateQCFLAGwithDifference(qres, nres);
sewardja5a6b752014-06-30 07:33:56 +00008877 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
8878 : (isU ? "uqsub" : "sqsub");
8879 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8880 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8881 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8882 return True;
8883 }
8884
sewardjdf1628c2014-06-10 22:52:05 +00008885 if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
8886 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
8887 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
8888 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
8889 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
sewardjdf9d6d52014-06-27 10:43:22 +00008890 Bool isORx = (size & 2) == 2;
sewardjdf1628c2014-06-10 22:52:05 +00008891 Bool invert = (size & 1) == 1;
sewardj8e91fd42014-07-11 12:05:47 +00008892 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +00008893 assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
sewardjdf1628c2014-06-10 22:52:05 +00008894 getQReg128(nn),
8895 invert ? unop(Iop_NotV128, getQReg128(mm))
8896 : getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00008897 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00008898 const HChar* names[4] = { "and", "bic", "orr", "orn" };
sewardjdf9d6d52014-06-27 10:43:22 +00008899 const HChar* ar = bitQ == 1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +00008900 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
8901 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
8902 return True;
8903 }
8904
8905 if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
8906 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
8907 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
8908 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
8909 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00008910 IRTemp argD = newTempV128();
8911 IRTemp argN = newTempV128();
8912 IRTemp argM = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008913 assign(argD, getQReg128(dd));
8914 assign(argN, getQReg128(nn));
8915 assign(argM, getQReg128(mm));
8916 const IROp opXOR = Iop_XorV128;
8917 const IROp opAND = Iop_AndV128;
8918 const IROp opNOT = Iop_NotV128;
sewardj8e91fd42014-07-11 12:05:47 +00008919 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008920 switch (size) {
8921 case BITS2(0,0): /* EOR */
sewardjdf9d6d52014-06-27 10:43:22 +00008922 assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
sewardjdf1628c2014-06-10 22:52:05 +00008923 break;
8924 case BITS2(0,1): /* BSL */
sewardjdf9d6d52014-06-27 10:43:22 +00008925 assign(res, binop(opXOR, mkexpr(argM),
8926 binop(opAND,
8927 binop(opXOR, mkexpr(argM), mkexpr(argN)),
8928 mkexpr(argD))));
sewardjdf1628c2014-06-10 22:52:05 +00008929 break;
8930 case BITS2(1,0): /* BIT */
sewardjdf9d6d52014-06-27 10:43:22 +00008931 assign(res, binop(opXOR, mkexpr(argD),
8932 binop(opAND,
8933 binop(opXOR, mkexpr(argD), mkexpr(argN)),
8934 mkexpr(argM))));
sewardjdf1628c2014-06-10 22:52:05 +00008935 break;
8936 case BITS2(1,1): /* BIF */
sewardjdf9d6d52014-06-27 10:43:22 +00008937 assign(res, binop(opXOR, mkexpr(argD),
8938 binop(opAND,
8939 binop(opXOR, mkexpr(argD), mkexpr(argN)),
8940 unop(opNOT, mkexpr(argM)))));
sewardjdf1628c2014-06-10 22:52:05 +00008941 break;
8942 default:
8943 vassert(0);
8944 }
sewardjdf9d6d52014-06-27 10:43:22 +00008945 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00008946 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
sewardjdf9d6d52014-06-27 10:43:22 +00008947 const HChar* arr = bitQ == 1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +00008948 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
8949 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8950 return True;
8951 }
8952
8953 if (opcode == BITS5(0,0,1,1,0)) {
8954 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
8955 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
8956 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00008957 Bool isGT = bitU == 0;
sewardjdf1628c2014-06-10 22:52:05 +00008958 IRExpr* argL = getQReg128(nn);
8959 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00008960 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008961 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +00008962 isGT ? binop(mkVecCMPGTS(size), argL, argR)
8963 : binop(mkVecCMPGTU(size), argL, argR));
sewardjdf9d6d52014-06-27 10:43:22 +00008964 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00008965 const HChar* nm = isGT ? "cmgt" : "cmhi";
8966 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8967 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8968 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8969 return True;
8970 }
8971
8972 if (opcode == BITS5(0,0,1,1,1)) {
8973 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
8974 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
8975 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00008976 Bool isGE = bitU == 0;
sewardjdf1628c2014-06-10 22:52:05 +00008977 IRExpr* argL = getQReg128(nn);
8978 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00008979 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008980 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +00008981 isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
8982 : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
sewardjdf9d6d52014-06-27 10:43:22 +00008983 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00008984 const HChar* nm = isGE ? "cmge" : "cmhs";
8985 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8986 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8987 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8988 return True;
8989 }
8990
sewardj12972182014-08-04 08:09:47 +00008991 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
8992 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
8993 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
8994 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
8995 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
8996 if (bitQ == 0 && size == X11) return False; // implied 1d case
8997 Bool isU = bitU == 1;
8998 Bool isR = opcode == BITS5(0,1,0,1,1);
8999 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
9000 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
9001 /* This is a bit tricky. If we're only interested in the lowest 64 bits
9002 of the result (viz, bitQ == 0), then we must adjust the operands to
9003 ensure that the upper part of the result, that we don't care about,
9004 doesn't pollute the returned Q value. To do this, zero out the upper
9005 operand halves beforehand. This works because it means, for the
9006 lanes we don't care about, we are shifting zero by zero, which can
9007 never saturate. */
9008 IRTemp res256 = newTemp(Ity_V256);
9009 IRTemp resSH = newTempV128();
9010 IRTemp resQ = newTempV128();
9011 IRTemp zero = newTempV128();
9012 assign(res256, binop(op,
9013 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
9014 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
9015 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
9016 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
9017 assign(zero, mkV128(0x0000));
9018 putQReg128(dd, mkexpr(resSH));
9019 updateQCFLAGwithDifference(resQ, zero);
9020 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
9021 : (isU ? "uqshl" : "sqshl");
9022 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9023 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9024 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9025 return True;
9026 }
9027
sewardjdf1628c2014-06-10 22:52:05 +00009028 if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
9029 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
9030 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
9031 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
9032 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
9033 if (bitQ == 0 && size == X11) return False; // implied 1d case
9034 Bool isU = bitU == 1;
9035 Bool isMAX = (opcode & 1) == 0;
sewardj8e91fd42014-07-11 12:05:47 +00009036 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
9037 : (isU ? mkVecMINU(size) : mkVecMINS(size));
9038 IRTemp t = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009039 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009040 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
sewardjdf1628c2014-06-10 22:52:05 +00009041 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
9042 : (isU ? "umin" : "smin");
9043 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9044 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9045 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9046 return True;
9047 }
9048
sewardjdf9d6d52014-06-27 10:43:22 +00009049 if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
9050 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
9051 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
9052 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
9053 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
9054 if (size == X11) return False; // 1d/2d cases not allowed
9055 Bool isU = bitU == 1;
9056 Bool isACC = opcode == BITS5(0,1,1,1,1);
sewardjdf9d6d52014-06-27 10:43:22 +00009057 vassert(size <= 2);
9058 IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +00009059 IRTemp t2 = newTempV128();
9060 assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
sewardjdf9d6d52014-06-27 10:43:22 +00009061 : mkexpr(t1));
9062 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
9063 const HChar* nm = isACC ? (isU ? "uaba" : "saba")
9064 : (isU ? "uabd" : "sabd");
9065 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9066 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9067 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9068 return True;
9069 }
9070
sewardjdf1628c2014-06-10 22:52:05 +00009071 if (opcode == BITS5(1,0,0,0,0)) {
9072 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
9073 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
9074 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009075 Bool isSUB = bitU == 1;
9076 IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size);
9077 IRTemp t = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009078 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009079 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
sewardjdf1628c2014-06-10 22:52:05 +00009080 const HChar* nm = isSUB ? "sub" : "add";
9081 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9082 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9083 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9084 return True;
9085 }
9086
9087 if (opcode == BITS5(1,0,0,0,1)) {
9088 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
9089 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
9090 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009091 Bool isEQ = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +00009092 IRExpr* argL = getQReg128(nn);
9093 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00009094 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009095 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +00009096 isEQ ? binop(mkVecCMPEQ(size), argL, argR)
9097 : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
sewardjdf1628c2014-06-10 22:52:05 +00009098 binop(Iop_AndV128, argL, argR),
9099 mkV128(0x0000))));
sewardjdf9d6d52014-06-27 10:43:22 +00009100 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009101 const HChar* nm = isEQ ? "cmeq" : "cmtst";
9102 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9103 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9104 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9105 return True;
9106 }
9107
9108 if (opcode == BITS5(1,0,0,1,0)) {
9109 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
9110 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
9111 if (bitQ == 0 && size == X11) return False; // implied 1d case
9112 Bool isMLS = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +00009113 IROp opMUL = mkVecMUL(size);
9114 IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
9115 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009116 if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
9117 assign(res, binop(opADDSUB,
9118 getQReg128(dd),
9119 binop(opMUL, getQReg128(nn), getQReg128(mm))));
sewardjdf9d6d52014-06-27 10:43:22 +00009120 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009121 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9122 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
9123 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9124 return True;
9125 }
9126 return False;
9127 }
9128
9129 if (opcode == BITS5(1,0,0,1,1)) {
9130 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
9131 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
9132 if (bitQ == 0 && size == X11) return False; // implied 1d case
9133 Bool isPMUL = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +00009134 const IROp opsPMUL[4]
9135 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
sewardj8e91fd42014-07-11 12:05:47 +00009136 IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
9137 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009138 if (opMUL != Iop_INVALID) {
9139 assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009140 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009141 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9142 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
9143 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9144 return True;
9145 }
9146 return False;
9147 }
9148
sewardja5a6b752014-06-30 07:33:56 +00009149 if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
9150 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
9151 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
9152 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
9153 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
9154 if (size == X11) return False;
9155 Bool isU = bitU == 1;
9156 Bool isMAX = opcode == BITS5(1,0,1,0,0);
sewardj8e91fd42014-07-11 12:05:47 +00009157 IRTemp vN = newTempV128();
9158 IRTemp vM = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009159 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
9160 : (isU ? mkVecMINU(size) : mkVecMINS(size));
9161 assign(vN, getQReg128(nn));
9162 assign(vM, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +00009163 IRTemp res128 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009164 assign(res128,
9165 binop(op,
9166 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
9167 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
9168 /* In the half-width case, use CatEL32x4 to extract the half-width
9169 result from the full-width result. */
9170 IRExpr* res
9171 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
9172 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
9173 mkexpr(res128)))
9174 : mkexpr(res128);
9175 putQReg128(dd, res);
9176 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9177 const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp")
9178 : (isU ? "uminp" : "sminp");
9179 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9180 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9181 return True;
9182 }
9183
sewardj54ffa1d2014-07-22 09:27:49 +00009184 if (opcode == BITS5(1,0,1,1,0)) {
9185 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
9186 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
9187 if (size == X00 || size == X11) return False;
9188 Bool isR = bitU == 1;
9189 IRTemp res, sat1q, sat1n, vN, vM;
9190 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
9191 newTempsV128_2(&vN, &vM);
9192 assign(vN, getQReg128(nn));
9193 assign(vM, getQReg128(mm));
9194 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
9195 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9196 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
9197 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
9198 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9199 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
9200 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9201 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9202 return True;
9203 }
9204
sewardja5a6b752014-06-30 07:33:56 +00009205 if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
9206 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
9207 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009208 IRTemp vN = newTempV128();
9209 IRTemp vM = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009210 assign(vN, getQReg128(nn));
9211 assign(vM, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +00009212 IRTemp res128 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009213 assign(res128,
9214 binop(mkVecADD(size),
9215 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
9216 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
9217 /* In the half-width case, use CatEL32x4 to extract the half-width
9218 result from the full-width result. */
9219 IRExpr* res
9220 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
9221 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
9222 mkexpr(res128)))
9223 : mkexpr(res128);
9224 putQReg128(dd, res);
9225 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9226 DIP("addp %s.%s, %s.%s, %s.%s\n",
9227 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9228 return True;
9229 }
9230
sewardjdf1628c2014-06-10 22:52:05 +00009231 if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
9232 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9233 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9234 Bool isD = (size & 1) == 1;
9235 Bool isSUB = (size & 2) == 2;
9236 if (bitQ == 0 && isD) return False; // implied 1d case
9237 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
9238 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
9239 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
9240 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +00009241 IRTemp t1 = newTempV128();
9242 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009243 // FIXME: double rounding; use FMA primops instead
9244 assign(t1, triop(opMUL,
9245 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
9246 assign(t2, triop(isSUB ? opSUB : opADD,
9247 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
sewardjdf9d6d52014-06-27 10:43:22 +00009248 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
sewardjdf1628c2014-06-10 22:52:05 +00009249 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9250 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
9251 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9252 return True;
9253 }
9254
9255 if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
9256 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9257 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9258 Bool isD = (size & 1) == 1;
9259 Bool isSUB = (size & 2) == 2;
9260 if (bitQ == 0 && isD) return False; // implied 1d case
9261 const IROp ops[4]
9262 = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
9263 IROp op = ops[size];
9264 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +00009265 IRTemp t1 = newTempV128();
9266 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009267 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009268 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009269 putQReg128(dd, mkexpr(t2));
9270 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9271 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
9272 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9273 return True;
9274 }
9275
9276 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
9277 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9278 Bool isD = (size & 1) == 1;
9279 if (bitQ == 0 && isD) return False; // implied 1d case
9280 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
9281 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
9282 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +00009283 IRTemp t1 = newTempV128();
9284 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009285 // FIXME: use Abd primop instead?
sewardjdf9d6d52014-06-27 10:43:22 +00009286 assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf1628c2014-06-10 22:52:05 +00009287 assign(t2, unop(opABS, mkexpr(t1)));
sewardjdf9d6d52014-06-27 10:43:22 +00009288 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
sewardjdf1628c2014-06-10 22:52:05 +00009289 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9290 DIP("fabd %s.%s, %s.%s, %s.%s\n",
9291 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9292 return True;
9293 }
9294
9295 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
9296 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9297 Bool isD = (size & 1) == 1;
9298 if (bitQ == 0 && isD) return False; // implied 1d case
9299 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +00009300 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009301 assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
9302 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009303 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009304 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9305 DIP("fmul %s.%s, %s.%s, %s.%s\n",
9306 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9307 return True;
9308 }
9309
9310 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
9311 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9312 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9313 Bool isD = (size & 1) == 1;
9314 if (bitQ == 0 && isD) return False; // implied 1d case
9315 Bool isGE = bitU == 1;
9316 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
9317 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
sewardj8e91fd42014-07-11 12:05:47 +00009318 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009319 assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
9320 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009321 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009322 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9323 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
9324 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9325 return True;
9326 }
9327
9328 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
9329 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9330 Bool isD = (size & 1) == 1;
9331 if (bitQ == 0 && isD) return False; // implied 1d case
9332 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
sewardj8e91fd42014-07-11 12:05:47 +00009333 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009334 assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
sewardjdf9d6d52014-06-27 10:43:22 +00009335 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009336 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9337 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
9338 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9339 return True;
9340 }
9341
9342 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
9343 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9344 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9345 Bool isD = (size & 1) == 1;
9346 Bool isGT = (size & 2) == 2;
9347 if (bitQ == 0 && isD) return False; // implied 1d case
9348 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
9349 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
9350 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
sewardj8e91fd42014-07-11 12:05:47 +00009351 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009352 assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
9353 unop(opABS, getQReg128(nn)))); // swapd
sewardjdf9d6d52014-06-27 10:43:22 +00009354 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009355 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9356 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
9357 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9358 return True;
9359 }
9360
9361 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
9362 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9363 Bool isD = (size & 1) == 1;
9364 if (bitQ == 0 && isD) return False; // implied 1d case
9365 vassert(size <= 1);
9366 const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
9367 IROp op = ops[size];
9368 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +00009369 IRTemp t1 = newTempV128();
9370 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009371 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009372 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009373 putQReg128(dd, mkexpr(t2));
9374 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9375 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
9376 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9377 return True;
9378 }
9379
9380 return False;
9381# undef INSN
9382}
9383
9384
9385static
9386Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
9387{
9388 /* 31 30 29 28 23 21 16 11 9 4
9389 0 Q U 01110 size 10000 opcode 10 n d
9390 Decode fields: U,size,opcode
9391 */
9392# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9393 if (INSN(31,31) != 0
9394 || INSN(28,24) != BITS5(0,1,1,1,0)
9395 || INSN(21,17) != BITS5(1,0,0,0,0)
9396 || INSN(11,10) != BITS2(1,0)) {
9397 return False;
9398 }
9399 UInt bitQ = INSN(30,30);
9400 UInt bitU = INSN(29,29);
9401 UInt size = INSN(23,22);
9402 UInt opcode = INSN(16,12);
9403 UInt nn = INSN(9,5);
9404 UInt dd = INSN(4,0);
9405 vassert(size < 4);
9406
sewardjdf9d6d52014-06-27 10:43:22 +00009407 if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
9408 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
9409 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
9410 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
9411 const IROp iops[3] = { Iop_Reverse8sIn64_x2,
9412 Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
9413 vassert(size <= 2);
sewardj8e91fd42014-07-11 12:05:47 +00009414 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +00009415 assign(res, unop(iops[size], getQReg128(nn)));
9416 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9417 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9418 DIP("%s %s.%s, %s.%s\n", "rev64",
9419 nameQReg128(dd), arr, nameQReg128(nn), arr);
9420 return True;
9421 }
9422
9423 if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
9424 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
9425 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
9426 Bool isH = size == X01;
sewardj8e91fd42014-07-11 12:05:47 +00009427 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +00009428 IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
9429 assign(res, unop(iop, getQReg128(nn)));
9430 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9431 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9432 DIP("%s %s.%s, %s.%s\n", "rev32",
9433 nameQReg128(dd), arr, nameQReg128(nn), arr);
9434 return True;
9435 }
9436
sewardj715d1622014-06-26 12:39:05 +00009437 if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
9438 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00009439 IRTemp res = newTempV128();
sewardj715d1622014-06-26 12:39:05 +00009440 assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
9441 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf9d6d52014-06-27 10:43:22 +00009442 const HChar* arr = nameArr_Q_SZ(bitQ, size);
sewardj715d1622014-06-26 12:39:05 +00009443 DIP("%s %s.%s, %s.%s\n", "rev16",
9444 nameQReg128(dd), arr, nameQReg128(nn), arr);
9445 return True;
9446 }
9447
sewardja5a6b752014-06-30 07:33:56 +00009448 if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
9449 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
9450 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
9451 /* -------- 0,xx,00110: SADALP std6_std6 -------- */
9452 /* -------- 1,xx,00110: UADALP std6_std6 -------- */
9453 /* Widens, and size refers to the narrow size. */
9454 if (size == X11) return False; // no 1d or 2d cases
9455 Bool isU = bitU == 1;
9456 Bool isACC = opcode == BITS5(0,0,1,1,0);
sewardj8e91fd42014-07-11 12:05:47 +00009457 IRTemp src = newTempV128();
9458 IRTemp sum = newTempV128();
9459 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009460 assign(src, getQReg128(nn));
9461 assign(sum,
9462 binop(mkVecADD(size+1),
9463 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
9464 isU, True/*fromOdd*/, size, mkexpr(src))),
9465 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
9466 isU, False/*!fromOdd*/, size, mkexpr(src)))));
9467 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
9468 : mkexpr(sum));
9469 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9470 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9471 const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1);
9472 DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
9473 : (isU ? "uaddlp" : "saddlp"),
9474 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
9475 return True;
9476 }
9477
sewardj2b6fd5e2014-06-19 14:21:37 +00009478 if (opcode == BITS5(0,0,1,0,0)) {
9479 /* -------- 0,xx,00100: CLS std6_std6 -------- */
9480 /* -------- 1,xx,00100: CLZ std6_std6 -------- */
9481 if (size == X11) return False; // no 1d or 2d cases
sewardja8c7b0f2014-06-26 08:18:08 +00009482 const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
9483 const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
sewardj2b6fd5e2014-06-19 14:21:37 +00009484 Bool isCLZ = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +00009485 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00009486 vassert(size <= 2);
9487 assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +00009488 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj2b6fd5e2014-06-19 14:21:37 +00009489 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9490 DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
9491 nameQReg128(dd), arr, nameQReg128(nn), arr);
9492 return True;
9493 }
9494
sewardj787a67f2014-06-23 09:09:41 +00009495 if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
sewardj2b6fd5e2014-06-19 14:21:37 +00009496 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
sewardj787a67f2014-06-23 09:09:41 +00009497 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00009498 IRTemp res = newTempV128();
sewardj787a67f2014-06-23 09:09:41 +00009499 assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +00009500 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj715d1622014-06-26 12:39:05 +00009501 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
sewardj787a67f2014-06-23 09:09:41 +00009502 DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
sewardj2b6fd5e2014-06-19 14:21:37 +00009503 nameQReg128(dd), arr, nameQReg128(nn), arr);
9504 return True;
9505 }
9506
sewardj715d1622014-06-26 12:39:05 +00009507 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
9508 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00009509 IRTemp res = newTempV128();
sewardj715d1622014-06-26 12:39:05 +00009510 assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +00009511 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj715d1622014-06-26 12:39:05 +00009512 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
9513 DIP("%s %s.%s, %s.%s\n", "rbit",
9514 nameQReg128(dd), arr, nameQReg128(nn), arr);
9515 return True;
9516 }
9517
sewardj51d012a2014-07-21 09:19:50 +00009518 if (opcode == BITS5(0,0,1,1,1)) {
sewardj8e91fd42014-07-11 12:05:47 +00009519 /* -------- 0,xx,00111 SQABS std7_std7 -------- */
sewardj51d012a2014-07-21 09:19:50 +00009520 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
sewardj8e91fd42014-07-11 12:05:47 +00009521 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj51d012a2014-07-21 09:19:50 +00009522 Bool isNEG = bitU == 1;
9523 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
9524 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
9525 getQReg128(nn), size );
sewardj8e91fd42014-07-11 12:05:47 +00009526 IRTemp qres = newTempV128(), nres = newTempV128();
sewardj51d012a2014-07-21 09:19:50 +00009527 assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
9528 assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
sewardj8e91fd42014-07-11 12:05:47 +00009529 putQReg128(dd, mkexpr(qres));
9530 updateQCFLAGwithDifference(qres, nres);
9531 const HChar* arr = nameArr_Q_SZ(bitQ, size);
sewardj51d012a2014-07-21 09:19:50 +00009532 DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
sewardj8e91fd42014-07-11 12:05:47 +00009533 nameQReg128(dd), arr, nameQReg128(nn), arr);
9534 return True;
9535 }
9536
sewardjdf1628c2014-06-10 22:52:05 +00009537 if (opcode == BITS5(0,1,0,0,0)) {
9538 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
9539 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
9540 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009541 Bool isGT = bitU == 0;
9542 IRExpr* argL = getQReg128(nn);
9543 IRExpr* argR = mkV128(0x0000);
9544 IRTemp res = newTempV128();
9545 IROp opGTS = mkVecCMPGTS(size);
9546 assign(res, isGT ? binop(opGTS, argL, argR)
9547 : unop(Iop_NotV128, binop(opGTS, argR, argL)));
sewardjdf9d6d52014-06-27 10:43:22 +00009548 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009549 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9550 DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
9551 nameQReg128(dd), arr, nameQReg128(nn), arr);
9552 return True;
9553 }
9554
9555 if (opcode == BITS5(0,1,0,0,1)) {
9556 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
9557 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
9558 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardjdf1628c2014-06-10 22:52:05 +00009559 Bool isEQ = bitU == 0;
9560 IRExpr* argL = getQReg128(nn);
9561 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +00009562 IRTemp res = newTempV128();
9563 assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
sewardjdf1628c2014-06-10 22:52:05 +00009564 : unop(Iop_NotV128,
sewardj8e91fd42014-07-11 12:05:47 +00009565 binop(mkVecCMPGTS(size), argL, argR)));
sewardjdf9d6d52014-06-27 10:43:22 +00009566 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009567 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9568 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
9569 nameQReg128(dd), arr, nameQReg128(nn), arr);
9570 return True;
9571 }
9572
9573 if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
9574 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
9575 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardjdf1628c2014-06-10 22:52:05 +00009576 IRExpr* argL = getQReg128(nn);
9577 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +00009578 IRTemp res = newTempV128();
9579 assign(res, binop(mkVecCMPGTS(size), argR, argL));
sewardjdf9d6d52014-06-27 10:43:22 +00009580 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009581 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9582 DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
9583 nameQReg128(dd), arr, nameQReg128(nn), arr);
9584 return True;
9585 }
9586
sewardj25523c42014-06-15 19:36:29 +00009587 if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
9588 /* -------- 0,xx,01011: ABS std7_std7 -------- */
9589 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009590 IRTemp res = newTempV128();
9591 assign(res, unop(mkVecABS(size), getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +00009592 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj25523c42014-06-15 19:36:29 +00009593 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9594 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
9595 return True;
9596 }
9597
sewardjdf1628c2014-06-10 22:52:05 +00009598 if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
9599 /* -------- 1,xx,01011: NEG std7_std7 -------- */
9600 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009601 IRTemp res = newTempV128();
9602 assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +00009603 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009604 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9605 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
9606 return True;
9607 }
9608
9609 if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
9610 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
9611 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
9612 if (bitQ == 0 && size == X11) return False; // implied 1d case
9613 Bool isFNEG = bitU == 1;
9614 IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
9615 : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
sewardj8e91fd42014-07-11 12:05:47 +00009616 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009617 assign(res, unop(op, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +00009618 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009619 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
9620 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
9621 nameQReg128(dd), arr, nameQReg128(nn), arr);
9622 return True;
9623 }
9624
9625 if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
9626 /* -------- 0,xx,10010: XTN{,2} -------- */
sewardjecedd982014-08-11 14:02:47 +00009627 if (size == X11) return False;
9628 vassert(size < 3);
9629 Bool is2 = bitQ == 1;
9630 IROp opN = mkVecNARROWUN(size);
9631 IRTemp resN = newTempV128();
9632 assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
9633 putLO64andZUorPutHI64(is2, dd, resN);
9634 const HChar* nm = "xtn";
9635 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9636 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
9637 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
9638 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
9639 return True;
9640 }
9641
9642 if (opcode == BITS5(1,0,1,0,0)
9643 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
9644 /* -------- 0,xx,10100: SQXTN{,2} -------- */
9645 /* -------- 1,xx,10100: UQXTN{,2} -------- */
9646 /* -------- 1,xx,10010: SQXTUN{,2} -------- */
9647 if (size == X11) return False;
9648 vassert(size < 3);
9649 Bool is2 = bitQ == 1;
9650 IROp opN = Iop_INVALID;
9651 Bool zWiden = True;
9652 const HChar* nm = "??";
9653 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
9654 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
sewardjdf1628c2014-06-10 22:52:05 +00009655 }
sewardjecedd982014-08-11 14:02:47 +00009656 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
9657 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
sewardjdf1628c2014-06-10 22:52:05 +00009658 }
sewardjecedd982014-08-11 14:02:47 +00009659 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9660 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
9661 }
9662 else vassert(0);
9663 IRTemp src = newTempV128();
9664 assign(src, getQReg128(nn));
9665 IRTemp resN = newTempV128();
9666 assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
9667 putLO64andZUorPutHI64(is2, dd, resN);
9668 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
9669 size, mkexpr(resN));
9670 updateQCFLAGwithDifference(src, resW);
9671 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9672 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
9673 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
9674 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
9675 return True;
sewardjdf1628c2014-06-10 22:52:05 +00009676 }
9677
sewardj487559e2014-07-10 14:22:45 +00009678 if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
9679 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
9680 /* Widens, and size is the narrow size. */
9681 if (size == X11) return False;
9682 Bool is2 = bitQ == 1;
9683 IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
9684 IROp opSHL = mkVecSHLN(size+1);
sewardj8e91fd42014-07-11 12:05:47 +00009685 IRTemp src = newTempV128();
9686 IRTemp res = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00009687 assign(src, getQReg128(nn));
9688 assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
9689 mkU8(8 << size)));
9690 putQReg128(dd, mkexpr(res));
9691 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9692 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
9693 DIP("shll%s %s.%s, %s.%s, #%u\n", is2 ? "2" : "",
9694 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
9695 return True;
9696 }
9697
sewardjdf1628c2014-06-10 22:52:05 +00009698 if (bitU == 0 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
9699 /* -------- 0,01,10110: FCVTN 2s/4s_2d -------- */
9700 IRTemp rm = mk_get_IR_rounding_mode();
9701 IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64);
9702 IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64);
9703 putQRegLane(dd, 2 * bitQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo));
9704 putQRegLane(dd, 2 * bitQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi));
9705 if (bitQ == 0) {
9706 putQRegLane(dd, 1, mkU64(0));
9707 }
9708 DIP("fcvtn%s %s.%s, %s.2d\n", bitQ ? "2" : "",
9709 nameQReg128(dd), bitQ ? "4s" : "2s", nameQReg128(nn));
9710 return True;
9711 }
9712
sewardj5747c4a2014-06-11 20:57:23 +00009713 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
9714 /* -------- 0,0x,11101: SCVTF -------- */
9715 /* -------- 1,0x,11101: UCVTF -------- */
9716 /* 31 28 22 21 15 9 4
9717 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
9718 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
9719 with laneage:
9720 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
9721 */
9722 Bool isQ = bitQ == 1;
9723 Bool isU = bitU == 1;
9724 Bool isF64 = (size & 1) == 1;
9725 if (isQ || !isF64) {
9726 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
9727 UInt nLanes = 0;
9728 Bool zeroHI = False;
9729 const HChar* arrSpec = NULL;
9730 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
9731 isQ, isF64 );
9732 IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
9733 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
9734 IRTemp rm = mk_get_IR_rounding_mode();
9735 UInt i;
9736 vassert(ok); /* the 'if' above should ensure this */
9737 for (i = 0; i < nLanes; i++) {
9738 putQRegLane(dd, i,
9739 binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
9740 }
9741 if (zeroHI) {
9742 putQRegLane(dd, 1, mkU64(0));
9743 }
9744 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
9745 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
9746 return True;
9747 }
9748 /* else fall through */
9749 }
9750
sewardjdf1628c2014-06-10 22:52:05 +00009751 return False;
9752# undef INSN
9753}
9754
sewardjfc83d2c2014-06-12 10:15:46 +00009755
sewardjdf1628c2014-06-10 22:52:05 +00009756static
9757Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
9758{
sewardj85fbb022014-06-12 13:16:01 +00009759 /* 31 28 23 21 20 19 15 11 9 4
9760 0 Q U 01111 size L M m opcode H 0 n d
9761 Decode fields are: u,size,opcode
sewardj787a67f2014-06-23 09:09:41 +00009762 M is really part of the mm register number. Individual
9763 cases need to inspect L and H though.
sewardj85fbb022014-06-12 13:16:01 +00009764 */
sewardjdf1628c2014-06-10 22:52:05 +00009765# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj85fbb022014-06-12 13:16:01 +00009766 if (INSN(31,31) != 0
sewardj8e91fd42014-07-11 12:05:47 +00009767 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
sewardj85fbb022014-06-12 13:16:01 +00009768 return False;
9769 }
9770 UInt bitQ = INSN(30,30);
9771 UInt bitU = INSN(29,29);
9772 UInt size = INSN(23,22);
9773 UInt bitL = INSN(21,21);
9774 UInt bitM = INSN(20,20);
9775 UInt mmLO4 = INSN(19,16);
9776 UInt opcode = INSN(15,12);
9777 UInt bitH = INSN(11,11);
9778 UInt nn = INSN(9,5);
9779 UInt dd = INSN(4,0);
sewardj85fbb022014-06-12 13:16:01 +00009780 vassert(size < 4);
sewardj787a67f2014-06-23 09:09:41 +00009781 vassert(bitH < 2 && bitM < 2 && bitL < 2);
sewardj85fbb022014-06-12 13:16:01 +00009782
9783 if (bitU == 0 && size >= X10 && opcode == BITS4(1,0,0,1)) {
9784 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
9785 if (bitQ == 0 && size == X11) return False; // implied 1d case
9786 Bool isD = (size & 1) == 1;
9787 UInt index;
9788 if (!isD) index = (bitH << 1) | bitL;
9789 else if (isD && bitL == 0) index = bitH;
9790 else return False; // sz:L == x11 => unallocated encoding
9791 vassert(index < (isD ? 2 : 4));
9792 IRType ity = isD ? Ity_F64 : Ity_F32;
9793 IRTemp elem = newTemp(ity);
sewardj787a67f2014-06-23 09:09:41 +00009794 UInt mm = (bitM << 4) | mmLO4;
sewardj85fbb022014-06-12 13:16:01 +00009795 assign(elem, getQRegLane(mm, index, ity));
9796 IRTemp dupd = math_DUP_TO_V128(elem, ity);
sewardj8e91fd42014-07-11 12:05:47 +00009797 IRTemp res = newTempV128();
sewardj85fbb022014-06-12 13:16:01 +00009798 assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
9799 mkexpr(mk_get_IR_rounding_mode()),
9800 getQReg128(nn), mkexpr(dupd)));
sewardjdf9d6d52014-06-27 10:43:22 +00009801 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj85fbb022014-06-12 13:16:01 +00009802 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9803 DIP("fmul %s.%s, %s.%s, %s.%c[%u]\n", nameQReg128(dd), arr,
9804 nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
9805 return True;
9806 }
9807
sewardj787a67f2014-06-23 09:09:41 +00009808 if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
9809 || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
9810 /* -------- 1,xx,0000 MLA s/h variants only -------- */
9811 /* -------- 1,xx,0100 MLS s/h variants only -------- */
9812 /* -------- 0,xx,1000 MUL s/h variants only -------- */
9813 Bool isMLA = opcode == BITS4(0,0,0,0);
9814 Bool isMLS = opcode == BITS4(0,1,0,0);
9815 UInt mm = 32; // invalid
9816 UInt ix = 16; // invalid
9817 switch (size) {
9818 case X00:
9819 return False; // b case is not allowed
9820 case X01:
9821 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
9822 case X10:
9823 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
9824 case X11:
9825 return False; // d case is not allowed
9826 default:
9827 vassert(0);
9828 }
9829 vassert(mm < 32 && ix < 16);
sewardj487559e2014-07-10 14:22:45 +00009830 IROp opMUL = mkVecMUL(size);
9831 IROp opADD = mkVecADD(size);
9832 IROp opSUB = mkVecSUB(size);
sewardj787a67f2014-06-23 09:09:41 +00009833 HChar ch = size == X01 ? 'h' : 's';
sewardj487559e2014-07-10 14:22:45 +00009834 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
sewardj8e91fd42014-07-11 12:05:47 +00009835 IRTemp vecD = newTempV128();
9836 IRTemp vecN = newTempV128();
9837 IRTemp res = newTempV128();
sewardj787a67f2014-06-23 09:09:41 +00009838 assign(vecD, getQReg128(dd));
9839 assign(vecN, getQReg128(nn));
9840 IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
9841 if (isMLA || isMLS) {
9842 assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
9843 } else {
9844 assign(res, prod);
9845 }
sewardjdf9d6d52014-06-27 10:43:22 +00009846 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj787a67f2014-06-23 09:09:41 +00009847 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9848 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
9849 : (isMLS ? "mls" : "mul"),
9850 nameQReg128(dd), arr,
9851 nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
9852 return True;
9853 }
9854
sewardj487559e2014-07-10 14:22:45 +00009855 if (opcode == BITS4(1,0,1,0)
9856 || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
9857 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
9858 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
9859 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
9860 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
9861 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
9862 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
9863 /* Widens, and size refers to the narrowed lanes. */
9864 UInt ks = 3;
9865 switch (opcode) {
9866 case BITS4(1,0,1,0): ks = 0; break;
9867 case BITS4(0,0,1,0): ks = 1; break;
9868 case BITS4(0,1,1,0): ks = 2; break;
9869 default: vassert(0);
9870 }
9871 vassert(ks >= 0 && ks <= 2);
9872 Bool isU = bitU == 1;
9873 Bool is2 = bitQ == 1;
9874 UInt mm = 32; // invalid
9875 UInt ix = 16; // invalid
9876 switch (size) {
9877 case X00:
9878 return False; // h_b_b[] case is not allowed
9879 case X01:
9880 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
9881 case X10:
9882 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
9883 case X11:
9884 return False; // q_d_d[] case is not allowed
9885 default:
9886 vassert(0);
9887 }
9888 vassert(mm < 32 && ix < 16);
sewardj51d012a2014-07-21 09:19:50 +00009889 IRTemp vecN = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00009890 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
sewardj8e91fd42014-07-11 12:05:47 +00009891 IRTemp vecD = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00009892 assign(vecN, getQReg128(nn));
sewardj51d012a2014-07-21 09:19:50 +00009893 assign(vecD, getQReg128(dd));
9894 IRTemp res = IRTemp_INVALID;
9895 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
9896 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
sewardj487559e2014-07-10 14:22:45 +00009897 putQReg128(dd, mkexpr(res));
9898 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
9899 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9900 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
9901 HChar ch = size == X01 ? 'h' : 's';
9902 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
9903 isU ? 'u' : 's', nm, is2 ? "2" : "",
9904 nameQReg128(dd), arrWide,
9905 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
9906 return True;
9907 }
9908
sewardj51d012a2014-07-21 09:19:50 +00009909 if (bitU == 0
9910 && (opcode == BITS4(1,0,1,1)
9911 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
9912 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
9913 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
9914 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
9915 /* Widens, and size refers to the narrowed lanes. */
9916 UInt ks = 3;
9917 switch (opcode) {
9918 case BITS4(1,0,1,1): ks = 0; break;
9919 case BITS4(0,0,1,1): ks = 1; break;
9920 case BITS4(0,1,1,1): ks = 2; break;
9921 default: vassert(0);
9922 }
9923 vassert(ks >= 0 && ks <= 2);
9924 Bool is2 = bitQ == 1;
9925 UInt mm = 32; // invalid
9926 UInt ix = 16; // invalid
9927 switch (size) {
9928 case X00:
9929 return False; // h_b_b[] case is not allowed
9930 case X01:
9931 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
9932 case X10:
9933 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
9934 case X11:
9935 return False; // q_d_d[] case is not allowed
9936 default:
9937 vassert(0);
9938 }
9939 vassert(mm < 32 && ix < 16);
9940 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
9941 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
9942 newTempsV128_2(&vecN, &vecD);
9943 assign(vecN, getQReg128(nn));
9944 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
9945 assign(vecD, getQReg128(dd));
9946 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
9947 is2, size, "mas"[ks],
9948 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
9949 putQReg128(dd, mkexpr(res));
9950 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
9951 updateQCFLAGwithDifference(sat1q, sat1n);
9952 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
9953 updateQCFLAGwithDifference(sat2q, sat2n);
9954 }
sewardj54ffa1d2014-07-22 09:27:49 +00009955 const HChar* nm = ks == 0 ? "sqdmull"
sewardj51d012a2014-07-21 09:19:50 +00009956 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
9957 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9958 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
9959 HChar ch = size == X01 ? 'h' : 's';
9960 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
9961 nm, is2 ? "2" : "",
9962 nameQReg128(dd), arrWide,
9963 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
9964 return True;
9965 }
9966
sewardj257e99f2014-08-03 12:45:19 +00009967 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
9968 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
9969 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
9970 UInt mm = 32; // invalid
9971 UInt ix = 16; // invalid
9972 switch (size) {
9973 case X00:
9974 return False; // b case is not allowed
9975 case X01:
9976 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
9977 case X10:
9978 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
9979 case X11:
9980 return False; // q case is not allowed
9981 default:
9982 vassert(0);
9983 }
9984 vassert(mm < 32 && ix < 16);
9985 Bool isR = opcode == BITS4(1,1,0,1);
9986 IRTemp res, sat1q, sat1n, vN, vM;
9987 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
9988 vN = newTempV128();
9989 assign(vN, getQReg128(nn));
9990 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
9991 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
9992 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9993 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
9994 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
9995 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
9996 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9997 HChar ch = size == X01 ? 'h' : 's';
9998 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
9999 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
10000 return True;
10001 }
10002
sewardjdf1628c2014-06-10 22:52:05 +000010003 return False;
10004# undef INSN
10005}
10006
sewardjfc83d2c2014-06-12 10:15:46 +000010007
sewardjdf1628c2014-06-10 22:52:05 +000010008static
10009Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
10010{
10011# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10012 return False;
10013# undef INSN
10014}
10015
sewardjfc83d2c2014-06-12 10:15:46 +000010016
sewardjdf1628c2014-06-10 22:52:05 +000010017static
10018Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
10019{
10020# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10021 return False;
10022# undef INSN
10023}
10024
sewardjfc83d2c2014-06-12 10:15:46 +000010025
sewardjdf1628c2014-06-10 22:52:05 +000010026static
10027Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
10028{
10029# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10030 return False;
10031# undef INSN
10032}
10033
sewardj5747c4a2014-06-11 20:57:23 +000010034
sewardjdf1628c2014-06-10 22:52:05 +000010035static
10036Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
10037{
sewardj5747c4a2014-06-11 20:57:23 +000010038 /* 31 28 23 21 20 15 13 9 4
10039 000 11110 ty 1 m op 1000 n opcode2
10040 The first 3 bits are really "M 0 S", but M and S are always zero.
10041 Decode fields are: ty,op,opcode2
10042 */
sewardjdf1628c2014-06-10 22:52:05 +000010043# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000010044 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
10045 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
10046 return False;
10047 }
10048 UInt ty = INSN(23,22);
10049 UInt mm = INSN(20,16);
10050 UInt op = INSN(15,14);
10051 UInt nn = INSN(9,5);
10052 UInt opcode2 = INSN(4,0);
10053 vassert(ty < 4);
10054
10055 if (ty <= X01 && op == X00
10056 && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
10057 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */
10058 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
10059 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
10060 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
10061 /* 31 23 20 15 9 4
10062 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
10063 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
10064 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
10065 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
10066
10067 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
10068 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
10069 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
10070 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
10071
10072 FCMPE generates Invalid Operation exn if either arg is any kind
10073 of NaN. FCMP generates Invalid Operation exn if either arg is a
10074 signalling NaN. We ignore this detail here and produce the same
10075 IR for both.
10076 */
10077 Bool isD = (ty & 1) == 1;
10078 Bool isCMPE = (opcode2 & 16) == 16;
10079 Bool cmpZero = (opcode2 & 8) == 8;
10080 IRType ity = isD ? Ity_F64 : Ity_F32;
10081 Bool valid = True;
10082 if (cmpZero && mm != 0) valid = False;
10083 if (valid) {
10084 IRTemp argL = newTemp(ity);
10085 IRTemp argR = newTemp(ity);
10086 IRTemp irRes = newTemp(Ity_I32);
10087 assign(argL, getQRegLO(nn, ity));
10088 assign(argR,
10089 cmpZero
10090 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
10091 : getQRegLO(mm, ity));
10092 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
10093 mkexpr(argL), mkexpr(argR)));
10094 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
10095 IRTemp nzcv_28x0 = newTemp(Ity_I64);
10096 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
10097 setFlags_COPY(nzcv_28x0);
10098 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
10099 cmpZero ? "#0.0" : nameQRegLO(mm, ity));
10100 return True;
10101 }
10102 return False;
10103 }
10104
sewardjdf1628c2014-06-10 22:52:05 +000010105 return False;
10106# undef INSN
10107}
10108
sewardj5747c4a2014-06-11 20:57:23 +000010109
sewardjdf1628c2014-06-10 22:52:05 +000010110static
10111Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn)
10112{
10113# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10114 return False;
10115# undef INSN
10116}
10117
sewardjfc83d2c2014-06-12 10:15:46 +000010118
sewardjdf1628c2014-06-10 22:52:05 +000010119static
10120Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
10121{
10122# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10123 return False;
10124# undef INSN
10125}
10126
sewardj5747c4a2014-06-11 20:57:23 +000010127
sewardjdf1628c2014-06-10 22:52:05 +000010128static
10129Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
10130{
10131 /* 31 28 23 21 20 14 9 4
10132 000 11110 ty 1 opcode 10000 n d
10133 The first 3 bits are really "M 0 S", but M and S are always zero.
sewardj5747c4a2014-06-11 20:57:23 +000010134 Decode fields: ty,opcode
sewardjdf1628c2014-06-10 22:52:05 +000010135 */
10136# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10137 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
10138 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
10139 return False;
10140 }
10141 UInt ty = INSN(23,22);
10142 UInt opcode = INSN(20,15);
10143 UInt nn = INSN(9,5);
10144 UInt dd = INSN(4,0);
10145
10146 if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
10147 /* -------- 0x,000000: FMOV d_d, s_s -------- */
10148 /* -------- 0x,000001: FABS d_d, s_s -------- */
10149 /* -------- 0x,000010: FNEG d_d, s_s -------- */
10150 /* -------- 0x,000011: FSQRT d_d, s_s -------- */
10151 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
10152 IRTemp src = newTemp(ity);
10153 IRTemp res = newTemp(ity);
10154 const HChar* nm = "??";
10155 assign(src, getQRegLO(nn, ity));
10156 switch (opcode) {
10157 case BITS6(0,0,0,0,0,0):
10158 nm = "fmov"; assign(res, mkexpr(src)); break;
10159 case BITS6(0,0,0,0,0,1):
10160 nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
10161 case BITS6(0,0,0,0,1,0):
10162 nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
10163 case BITS6(0,0,0,0,1,1):
10164 nm = "fsqrt";
10165 assign(res, binop(mkSQRTF(ity),
10166 mkexpr(mk_get_IR_rounding_mode()),
10167 mkexpr(src))); break;
10168 default:
10169 vassert(0);
10170 }
10171 putQReg128(dd, mkV128(0x0000));
10172 putQRegLO(dd, mkexpr(res));
10173 DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
10174 return True;
10175 }
10176
sewardj5747c4a2014-06-11 20:57:23 +000010177 if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
10178 || opcode == BITS6(0,0,0,1,0,1)))
10179 || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
10180 || opcode == BITS6(0,0,0,1,0,1)))
10181 || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
10182 || opcode == BITS6(0,0,0,1,0,0)))) {
10183 /* -------- 11,000100: FCVT s_h -------- */
10184 /* -------- 11,000101: FCVT d_h -------- */
10185 /* -------- 00,000111: FCVT h_s -------- */
10186 /* -------- 00,000101: FCVT d_s -------- */
10187 /* -------- 01,000111: FCVT h_d -------- */
10188 /* -------- 01,000100: FCVT s_d -------- */
10189 /* 31 23 21 16 14 9 4
10190 000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp)
10191 --------- 11 ----- 01 --------- FCVT Dd, Hn (unimp)
10192 --------- 00 ----- 11 --------- FCVT Hd, Sn (unimp)
10193 --------- 00 ----- 01 --------- FCVT Dd, Sn
10194 --------- 01 ----- 11 --------- FCVT Hd, Dn (unimp)
10195 --------- 01 ----- 00 --------- FCVT Sd, Dn
10196 Rounding, when dst is smaller than src, is per the FPCR.
10197 */
10198 UInt b2322 = ty;
10199 UInt b1615 = opcode & BITS2(1,1);
10200 if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) {
10201 /* Convert S to D */
10202 IRTemp res = newTemp(Ity_F64);
10203 assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32)));
10204 putQReg128(dd, mkV128(0x0000));
10205 putQRegLO(dd, mkexpr(res));
10206 DIP("fcvt %s, %s\n",
10207 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32));
10208 return True;
10209 }
10210 if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) {
10211 /* Convert D to S */
10212 IRTemp res = newTemp(Ity_F32);
10213 assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()),
10214 getQRegLO(nn, Ity_F64)));
10215 putQReg128(dd, mkV128(0x0000));
10216 putQRegLO(dd, mkexpr(res));
10217 DIP("fcvt %s, %s\n",
10218 nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64));
10219 return True;
10220 }
10221 /* else unhandled */
10222 return False;
10223 }
10224
10225 if (ty <= X01
10226 && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
10227 && opcode != BITS6(0,0,1,1,0,1)) {
10228 /* -------- 0x,001000 FRINTN d_d, s_s -------- */
10229 /* -------- 0x,001001 FRINTP d_d, s_s -------- */
10230 /* -------- 0x,001010 FRINTM d_d, s_s -------- */
10231 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
10232 /* -------- 0x,001100 FRINTA d_d, s_s -------- */
10233 /* -------- 0x,001110 FRINTX d_d, s_s -------- */
10234 /* -------- 0x,001111 FRINTI d_d, s_s -------- */
10235 /* 31 23 21 17 14 9 4
10236 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
10237 rm
10238 x==0 => S-registers, x==1 => D-registers
10239 rm (17:15) encodings:
10240 111 per FPCR (FRINTI)
10241 001 +inf (FRINTP)
10242 010 -inf (FRINTM)
10243 011 zero (FRINTZ)
10244 000 tieeven
10245 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
10246 110 per FPCR + "exact = TRUE"
10247 101 unallocated
10248 */
10249 Bool isD = (ty & 1) == 1;
10250 UInt rm = opcode & BITS6(0,0,0,1,1,1);
10251 IRType ity = isD ? Ity_F64 : Ity_F32;
10252 IRExpr* irrmE = NULL;
10253 UChar ch = '?';
10254 switch (rm) {
10255 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
10256 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
10257 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
10258 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
10259 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
10260 default: break;
10261 }
10262 if (irrmE) {
10263 IRTemp src = newTemp(ity);
10264 IRTemp dst = newTemp(ity);
10265 assign(src, getQRegLO(nn, ity));
10266 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
10267 irrmE, mkexpr(src)));
10268 putQReg128(dd, mkV128(0x0000));
10269 putQRegLO(dd, mkexpr(dst));
10270 DIP("frint%c %s, %s\n",
10271 ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
10272 return True;
10273 }
10274 return False;
10275 }
10276
sewardjdf1628c2014-06-10 22:52:05 +000010277 return False;
10278# undef INSN
10279}
10280
10281
10282static
10283Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn)
10284{
10285 /* 31 28 23 21 20 15 11 9 4
10286 000 11110 ty 1 m opcode 10 n d
10287 The first 3 bits are really "M 0 S", but M and S are always zero.
10288 */
10289# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10290 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
10291 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
10292 return False;
10293 }
10294 UInt ty = INSN(23,22);
10295 UInt mm = INSN(20,16);
10296 UInt opcode = INSN(15,12);
10297 UInt nn = INSN(9,5);
10298 UInt dd = INSN(4,0);
10299
10300 if (ty <= X01 && opcode <= BITS4(0,0,1,1)) {
10301 /* ------- 0x,0000: FMUL d_d, s_s ------- */
10302 /* ------- 0x,0001: FDIV d_d, s_s ------- */
10303 /* ------- 0x,0010: FADD d_d, s_s ------- */
10304 /* ------- 0x,0011: FSUB d_d, s_s ------- */
10305 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
10306 IROp iop = Iop_INVALID;
10307 const HChar* nm = "???";
10308 switch (opcode) {
10309 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
10310 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
10311 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
10312 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
10313 default: vassert(0);
10314 }
10315 IRExpr* resE = triop(iop, mkexpr(mk_get_IR_rounding_mode()),
10316 getQRegLO(nn, ity), getQRegLO(mm, ity));
10317 IRTemp res = newTemp(ity);
10318 assign(res, resE);
10319 putQReg128(dd, mkV128(0));
10320 putQRegLO(dd, mkexpr(res));
10321 DIP("%s %s, %s, %s\n",
10322 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10323 return True;
10324 }
10325
10326 if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
10327 /* ------- 0x,1000: FNMUL d_d, s_s ------- */
10328 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
10329 IROp iop = mkMULF(ity);
10330 IROp iopn = mkNEGF(ity);
10331 const HChar* nm = "fnmul";
10332 IRExpr* resE = unop(iopn,
10333 triop(iop, mkexpr(mk_get_IR_rounding_mode()),
10334 getQRegLO(nn, ity), getQRegLO(mm, ity)));
10335 IRTemp res = newTemp(ity);
10336 assign(res, resE);
10337 putQReg128(dd, mkV128(0));
10338 putQRegLO(dd, mkexpr(res));
10339 DIP("%s %s, %s, %s\n",
10340 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10341 return True;
10342 }
10343
sewardjdf1628c2014-06-10 22:52:05 +000010344 return False;
10345# undef INSN
10346}
10347
10348
10349static
10350Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
10351{
sewardj5747c4a2014-06-11 20:57:23 +000010352 /* 31 28 23 21 20 15 14 9 4
10353 000 11111 ty o1 m o0 a n d
10354 The first 3 bits are really "M 0 S", but M and S are always zero.
10355 Decode fields: ty,o1,o0
10356 */
sewardjdf1628c2014-06-10 22:52:05 +000010357# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000010358 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
10359 return False;
10360 }
10361 UInt ty = INSN(23,22);
10362 UInt bitO1 = INSN(21,21);
10363 UInt mm = INSN(20,16);
10364 UInt bitO0 = INSN(15,15);
10365 UInt aa = INSN(14,10);
10366 UInt nn = INSN(9,5);
10367 UInt dd = INSN(4,0);
10368 vassert(ty < 4);
10369
10370 if (ty <= X01) {
10371 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
10372 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
10373 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
10374 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
10375 /* -------------------- F{N}M{ADD,SUB} -------------------- */
10376 /* 31 22 20 15 14 9 4 ix
10377 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
10378 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
10379 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
10380 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
10381 where Fx=Dx when sz=1, Fx=Sx when sz=0
10382
10383 -----SPEC------ ----IMPL----
10384 fmadd a + n * m a + n * m
10385 fmsub a + (-n) * m a - n * m
10386 fnmadd (-a) + (-n) * m -(a + n * m)
10387 fnmsub (-a) + n * m -(a - n * m)
10388 */
10389 Bool isD = (ty & 1) == 1;
10390 UInt ix = (bitO1 << 1) | bitO0;
10391 IRType ity = isD ? Ity_F64 : Ity_F32;
10392 IROp opADD = mkADDF(ity);
10393 IROp opSUB = mkSUBF(ity);
10394 IROp opMUL = mkMULF(ity);
10395 IROp opNEG = mkNEGF(ity);
10396 IRTemp res = newTemp(ity);
10397 IRExpr* eA = getQRegLO(aa, ity);
10398 IRExpr* eN = getQRegLO(nn, ity);
10399 IRExpr* eM = getQRegLO(mm, ity);
10400 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
10401 IRExpr* eNxM = triop(opMUL, rm, eN, eM);
10402 switch (ix) {
10403 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
10404 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
10405 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
10406 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
10407 default: vassert(0);
10408 }
10409 putQReg128(dd, mkV128(0x0000));
10410 putQRegLO(dd, mkexpr(res));
10411 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
10412 DIP("%s %s, %s, %s, %s\n",
10413 names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
10414 nameQRegLO(mm, ity), nameQRegLO(aa, ity));
10415 return True;
10416 }
10417
sewardjdf1628c2014-06-10 22:52:05 +000010418 return False;
10419# undef INSN
10420}
10421
10422
10423static
10424Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
10425{
10426 /* 31 28 23 21 20 12 9 4
10427 000 11110 ty 1 imm8 100 imm5 d
10428 The first 3 bits are really "M 0 S", but M and S are always zero.
10429 */
10430# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10431 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
10432 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
10433 return False;
10434 }
10435 UInt ty = INSN(23,22);
10436 UInt imm8 = INSN(20,13);
10437 UInt imm5 = INSN(9,5);
10438 UInt dd = INSN(4,0);
10439
10440 /* ------- 00,00000: FMOV s_imm ------- */
10441 /* ------- 01,00000: FMOV d_imm ------- */
10442 if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
10443 Bool isD = (ty & 1) == 1;
10444 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
10445 if (!isD) {
10446 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
10447 }
10448 putQReg128(dd, mkV128(0));
10449 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
10450 DIP("fmov %s, #0x%llx\n",
10451 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
10452 return True;
10453 }
10454
10455 return False;
10456# undef INSN
10457}
10458
10459
10460static
10461Bool dis_AdvSIMD_fp_to_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
10462{
10463# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10464 return False;
10465# undef INSN
10466}
10467
10468
10469static
sewardj5747c4a2014-06-11 20:57:23 +000010470Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
sewardjdf1628c2014-06-10 22:52:05 +000010471{
10472 /* 31 30 29 28 23 21 20 18 15 9 4
sewardj5747c4a2014-06-11 20:57:23 +000010473 sf 0 0 11110 type 1 rmode opcode 000000 n d
10474 The first 3 bits are really "sf 0 S", but S is always zero.
sewardjdf1628c2014-06-10 22:52:05 +000010475 */
10476# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000010477 if (INSN(30,29) != BITS2(0,0)
sewardjdf1628c2014-06-10 22:52:05 +000010478 || INSN(28,24) != BITS5(1,1,1,1,0)
10479 || INSN(21,21) != 1
10480 || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
10481 return False;
10482 }
10483 UInt bitSF = INSN(31,31);
sewardjdf1628c2014-06-10 22:52:05 +000010484 UInt ty = INSN(23,22); // type
10485 UInt rm = INSN(20,19); // rmode
10486 UInt op = INSN(18,16); // opcode
10487 UInt nn = INSN(9,5);
10488 UInt dd = INSN(4,0);
10489
sewardj5747c4a2014-06-11 20:57:23 +000010490 // op = 000, 001
10491 /* -------- FCVT{N,P,M,Z}{S,U} (scalar, integer) -------- */
10492 /* 30 23 20 18 15 9 4
10493 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
10494 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
10495 ---------------- 01 -------------- FCVTP-------- (round to +inf)
10496 ---------------- 10 -------------- FCVTM-------- (round to -inf)
10497 ---------------- 11 -------------- FCVTZ-------- (round to zero)
10498
10499 Rd is Xd when sf==1, Wd when sf==0
10500 Fn is Dn when x==1, Sn when x==0
10501 20:19 carry the rounding mode, using the same encoding as FPCR
10502 */
10503 if (ty <= X01 && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) {
10504 Bool isI64 = bitSF == 1;
10505 Bool isF64 = (ty & 1) == 1;
10506 Bool isU = (op & 1) == 1;
10507 /* Decide on the IR rounding mode to use. */
10508 IRRoundingMode irrm = 8; /*impossible*/
10509 HChar ch = '?';
10510 switch (rm) {
10511 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
10512 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
10513 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
10514 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
10515 default: vassert(0);
10516 }
10517 vassert(irrm != 8);
10518 /* Decide on the conversion primop, based on the source size,
10519 dest size and signedness (8 possibilities). Case coding:
10520 F32 ->s I32 0
10521 F32 ->u I32 1
10522 F32 ->s I64 2
10523 F32 ->u I64 3
10524 F64 ->s I32 4
10525 F64 ->u I32 5
10526 F64 ->s I64 6
10527 F64 ->u I64 7
10528 */
10529 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
10530 vassert(ix < 8);
10531 const IROp iops[8]
10532 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
10533 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
10534 IROp iop = iops[ix];
10535 // A bit of ATCery: bounce all cases we haven't seen an example of.
10536 if (/* F32toI32S */
10537 (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
10538 || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
10539 || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
10540 /* F32toI32U */
10541 || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
10542 || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
10543 /* F32toI64S */
10544 || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
10545 /* F32toI64U */
10546 || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
10547 /* F64toI32S */
10548 || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
10549 || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
10550 || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
10551 /* F64toI32U */
10552 || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
10553 || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
10554 || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
10555 /* F64toI64S */
10556 || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
10557 || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
10558 || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
10559 /* F64toI64U */
10560 || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
10561 || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
10562 ) {
10563 /* validated */
10564 } else {
10565 return False;
10566 }
10567 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
10568 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
10569 IRTemp src = newTemp(srcTy);
10570 IRTemp dst = newTemp(dstTy);
10571 assign(src, getQRegLO(nn, srcTy));
10572 assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
10573 putIRegOrZR(isI64, dd, mkexpr(dst));
10574 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
10575 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
10576 return True;
10577 }
10578
10579 // op = 010, 011
sewardjdf1628c2014-06-10 22:52:05 +000010580 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
10581 /* (ix) sf S 28 ty rm op 15 9 4
10582 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
10583 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
10584 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
10585 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
10586
10587 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
10588 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
10589 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
10590 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
10591
10592 These are signed/unsigned conversion from integer registers to
10593 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
10594 */
sewardj5747c4a2014-06-11 20:57:23 +000010595 if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
sewardjdf1628c2014-06-10 22:52:05 +000010596 Bool isI64 = bitSF == 1;
10597 Bool isF64 = (ty & 1) == 1;
10598 Bool isU = (op & 1) == 1;
10599 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
10600 const IROp ops[8]
10601 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
10602 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
10603 IRExpr* src = getIRegOrZR(isI64, nn);
10604 IRExpr* res = (isF64 && !isI64)
10605 ? unop(ops[ix], src)
10606 : binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), src);
10607 putQReg128(dd, mkV128(0));
10608 putQRegLO(dd, res);
10609 DIP("%ccvtf %s, %s\n",
10610 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
10611 nameIRegOrZR(isI64, nn));
10612 return True;
10613 }
10614
sewardj5747c4a2014-06-11 20:57:23 +000010615 // op = 110, 111
sewardjdf1628c2014-06-10 22:52:05 +000010616 /* -------- FMOV (general) -------- */
10617 /* case sf S ty rm op 15 9 4
10618 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
10619 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
10620 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
10621
10622 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
10623 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
10624 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
10625 */
sewardj5747c4a2014-06-11 20:57:23 +000010626 if (1) {
sewardjbbcf1882014-01-12 12:49:10 +000010627 UInt ix = 0; // case
sewardjdf1628c2014-06-10 22:52:05 +000010628 if (bitSF == 0) {
sewardjbbcf1882014-01-12 12:49:10 +000010629 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
10630 ix = 1;
10631 else
10632 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
10633 ix = 4;
10634 } else {
sewardjdf1628c2014-06-10 22:52:05 +000010635 vassert(bitSF == 1);
sewardjbbcf1882014-01-12 12:49:10 +000010636 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
10637 ix = 2;
10638 else
10639 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
10640 ix = 5;
10641 else
10642 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
10643 ix = 3;
10644 else
10645 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
10646 ix = 6;
10647 }
10648 if (ix > 0) {
10649 switch (ix) {
10650 case 1:
10651 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +000010652 putQRegLO(dd, getIReg32orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000010653 DIP("fmov s%u, w%u\n", dd, nn);
10654 break;
10655 case 2:
10656 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +000010657 putQRegLO(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000010658 DIP("fmov d%u, x%u\n", dd, nn);
10659 break;
10660 case 3:
sewardj606c4ba2014-01-26 19:11:14 +000010661 putQRegHI64(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000010662 DIP("fmov v%u.d[1], x%u\n", dd, nn);
10663 break;
10664 case 4:
sewardj606c4ba2014-01-26 19:11:14 +000010665 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
sewardjbbcf1882014-01-12 12:49:10 +000010666 DIP("fmov w%u, s%u\n", dd, nn);
10667 break;
10668 case 5:
sewardj606c4ba2014-01-26 19:11:14 +000010669 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
sewardjbbcf1882014-01-12 12:49:10 +000010670 DIP("fmov x%u, d%u\n", dd, nn);
10671 break;
10672 case 6:
sewardj606c4ba2014-01-26 19:11:14 +000010673 putIReg64orZR(dd, getQRegHI64(nn));
sewardjbbcf1882014-01-12 12:49:10 +000010674 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
10675 break;
10676 default:
10677 vassert(0);
10678 }
10679 return True;
10680 }
10681 /* undecodable; fall through */
10682 }
10683
sewardjdf1628c2014-06-10 22:52:05 +000010684 return False;
10685# undef INSN
10686}
10687
10688
10689static
10690Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
10691{
10692 Bool ok;
10693 ok = dis_AdvSIMD_EXT(dres, insn);
10694 if (UNLIKELY(ok)) return True;
10695 ok = dis_AdvSIMD_TBL_TBX(dres, insn);
10696 if (UNLIKELY(ok)) return True;
10697 ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
10698 if (UNLIKELY(ok)) return True;
10699 ok = dis_AdvSIMD_across_lanes(dres, insn);
10700 if (UNLIKELY(ok)) return True;
10701 ok = dis_AdvSIMD_copy(dres, insn);
10702 if (UNLIKELY(ok)) return True;
10703 ok = dis_AdvSIMD_modified_immediate(dres, insn);
10704 if (UNLIKELY(ok)) return True;
10705 ok = dis_AdvSIMD_scalar_copy(dres, insn);
10706 if (UNLIKELY(ok)) return True;
10707 ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
10708 if (UNLIKELY(ok)) return True;
10709 ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
10710 if (UNLIKELY(ok)) return True;
10711 ok = dis_AdvSIMD_scalar_three_different(dres, insn);
10712 if (UNLIKELY(ok)) return True;
10713 ok = dis_AdvSIMD_scalar_three_same(dres, insn);
10714 if (UNLIKELY(ok)) return True;
10715 ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
10716 if (UNLIKELY(ok)) return True;
10717 ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
10718 if (UNLIKELY(ok)) return True;
10719 ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
10720 if (UNLIKELY(ok)) return True;
10721 ok = dis_AdvSIMD_three_different(dres, insn);
10722 if (UNLIKELY(ok)) return True;
10723 ok = dis_AdvSIMD_three_same(dres, insn);
10724 if (UNLIKELY(ok)) return True;
10725 ok = dis_AdvSIMD_two_reg_misc(dres, insn);
10726 if (UNLIKELY(ok)) return True;
10727 ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
10728 if (UNLIKELY(ok)) return True;
10729 ok = dis_AdvSIMD_crypto_aes(dres, insn);
10730 if (UNLIKELY(ok)) return True;
10731 ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
10732 if (UNLIKELY(ok)) return True;
10733 ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
10734 if (UNLIKELY(ok)) return True;
10735 ok = dis_AdvSIMD_fp_compare(dres, insn);
10736 if (UNLIKELY(ok)) return True;
10737 ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
10738 if (UNLIKELY(ok)) return True;
10739 ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
10740 if (UNLIKELY(ok)) return True;
10741 ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
10742 if (UNLIKELY(ok)) return True;
10743 ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
10744 if (UNLIKELY(ok)) return True;
10745 ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
10746 if (UNLIKELY(ok)) return True;
10747 ok = dis_AdvSIMD_fp_immediate(dres, insn);
10748 if (UNLIKELY(ok)) return True;
10749 ok = dis_AdvSIMD_fp_to_fixedp_conv(dres, insn);
10750 if (UNLIKELY(ok)) return True;
sewardj5747c4a2014-06-11 20:57:23 +000010751 ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
sewardjdf1628c2014-06-10 22:52:05 +000010752 if (UNLIKELY(ok)) return True;
10753 return False;
10754}
10755
sewardjbbcf1882014-01-12 12:49:10 +000010756
10757/*------------------------------------------------------------*/
10758/*--- Disassemble a single ARM64 instruction ---*/
10759/*------------------------------------------------------------*/
10760
10761/* Disassemble a single ARM64 instruction into IR. The instruction
10762 has is located at |guest_instr| and has guest IP of
10763 |guest_PC_curr_instr|, which will have been set before the call
10764 here. Returns True iff the instruction was decoded, in which case
10765 *dres will be set accordingly, or False, in which case *dres should
10766 be ignored by the caller. */
10767
10768static
10769Bool disInstr_ARM64_WRK (
10770 /*MB_OUT*/DisResult* dres,
10771 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
10772 Bool resteerCisOk,
10773 void* callback_opaque,
10774 UChar* guest_instr,
10775 VexArchInfo* archinfo,
10776 VexAbiInfo* abiinfo
10777 )
10778{
10779 // A macro to fish bits out of 'insn'.
10780# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10781
10782//ZZ DisResult dres;
10783//ZZ UInt insn;
10784//ZZ //Bool allow_VFP = False;
10785//ZZ //UInt hwcaps = archinfo->hwcaps;
10786//ZZ IRTemp condT; /* :: Ity_I32 */
10787//ZZ UInt summary;
10788//ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
10789//ZZ
10790//ZZ /* What insn variants are we supporting today? */
10791//ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
10792//ZZ // etc etc
10793
10794 /* Set result defaults. */
10795 dres->whatNext = Dis_Continue;
10796 dres->len = 4;
10797 dres->continueAt = 0;
10798 dres->jk_StopHere = Ijk_INVALID;
10799
10800 /* At least this is simple on ARM64: insns are all 4 bytes long, and
10801 4-aligned. So just fish the whole thing out of memory right now
10802 and have done. */
10803 UInt insn = getUIntLittleEndianly( guest_instr );
10804
10805 if (0) vex_printf("insn: 0x%x\n", insn);
10806
10807 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
10808
10809 vassert(0 == (guest_PC_curr_instr & 3ULL));
10810
10811 /* ----------------------------------------------------------- */
10812
10813 /* Spot "Special" instructions (see comment at top of file). */
10814 {
10815 UChar* code = (UChar*)guest_instr;
10816 /* Spot the 16-byte preamble:
10817 93CC0D8C ror x12, x12, #3
10818 93CC358C ror x12, x12, #13
10819 93CCCD8C ror x12, x12, #51
10820 93CCF58C ror x12, x12, #61
10821 */
10822 UInt word1 = 0x93CC0D8C;
10823 UInt word2 = 0x93CC358C;
10824 UInt word3 = 0x93CCCD8C;
10825 UInt word4 = 0x93CCF58C;
10826 if (getUIntLittleEndianly(code+ 0) == word1 &&
10827 getUIntLittleEndianly(code+ 4) == word2 &&
10828 getUIntLittleEndianly(code+ 8) == word3 &&
10829 getUIntLittleEndianly(code+12) == word4) {
10830 /* Got a "Special" instruction preamble. Which one is it? */
10831 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
10832 /* orr x10,x10,x10 */) {
10833 /* X3 = client_request ( X4 ) */
10834 DIP("x3 = client_request ( x4 )\n");
10835 putPC(mkU64( guest_PC_curr_instr + 20 ));
10836 dres->jk_StopHere = Ijk_ClientReq;
10837 dres->whatNext = Dis_StopHere;
10838 return True;
10839 }
10840 else
10841 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
10842 /* orr x11,x11,x11 */) {
10843 /* X3 = guest_NRADDR */
10844 DIP("x3 = guest_NRADDR\n");
10845 dres->len = 20;
10846 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
10847 return True;
10848 }
10849 else
10850 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
10851 /* orr x12,x12,x12 */) {
10852 /* branch-and-link-to-noredir X8 */
10853 DIP("branch-and-link-to-noredir x8\n");
10854 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
10855 putPC(getIReg64orZR(8));
10856 dres->jk_StopHere = Ijk_NoRedir;
10857 dres->whatNext = Dis_StopHere;
10858 return True;
10859 }
10860 else
10861 if (getUIntLittleEndianly(code+16) == 0xAA090129
10862 /* orr x9,x9,x9 */) {
10863 /* IR injection */
10864 DIP("IR injection\n");
10865 vex_inject_ir(irsb, Iend_LE);
10866 // Invalidate the current insn. The reason is that the IRop we're
10867 // injecting here can change. In which case the translation has to
10868 // be redone. For ease of handling, we simply invalidate all the
10869 // time.
sewardj05f5e012014-05-04 10:52:11 +000010870 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
10871 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
sewardjbbcf1882014-01-12 12:49:10 +000010872 putPC(mkU64( guest_PC_curr_instr + 20 ));
10873 dres->whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +000010874 dres->jk_StopHere = Ijk_InvalICache;
sewardjbbcf1882014-01-12 12:49:10 +000010875 return True;
10876 }
10877 /* We don't know what it is. */
10878 return False;
10879 /*NOTREACHED*/
10880 }
10881 }
10882
10883 /* ----------------------------------------------------------- */
10884
10885 /* Main ARM64 instruction decoder starts here. */
10886
10887 Bool ok = False;
10888
10889 /* insn[28:25] determines the top-level grouping, so let's start
10890 off with that.
10891
10892 For all of these dis_ARM64_ functions, we pass *dres with the
10893 normal default results "insn OK, 4 bytes long, keep decoding" so
10894 they don't need to change it. However, decodes of control-flow
10895 insns may cause *dres to change.
10896 */
10897 switch (INSN(28,25)) {
10898 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
10899 // Data processing - immediate
10900 ok = dis_ARM64_data_processing_immediate(dres, insn);
10901 break;
10902 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
10903 // Branch, exception generation and system instructions
sewardj65902992014-05-03 21:20:56 +000010904 ok = dis_ARM64_branch_etc(dres, insn, archinfo);
sewardjbbcf1882014-01-12 12:49:10 +000010905 break;
10906 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
10907 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
10908 // Loads and stores
10909 ok = dis_ARM64_load_store(dres, insn);
10910 break;
10911 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
10912 // Data processing - register
10913 ok = dis_ARM64_data_processing_register(dres, insn);
10914 break;
10915 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
10916 // Data processing - SIMD and floating point
10917 ok = dis_ARM64_simd_and_fp(dres, insn);
10918 break;
10919 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
10920 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
10921 // UNALLOCATED
10922 break;
10923 default:
10924 vassert(0); /* Can't happen */
10925 }
10926
10927 /* If the next-level down decoders failed, make sure |dres| didn't
10928 get changed. */
10929 if (!ok) {
10930 vassert(dres->whatNext == Dis_Continue);
10931 vassert(dres->len == 4);
10932 vassert(dres->continueAt == 0);
10933 vassert(dres->jk_StopHere == Ijk_INVALID);
10934 }
10935
10936 return ok;
10937
10938# undef INSN
10939}
10940
10941
10942/*------------------------------------------------------------*/
10943/*--- Top-level fn ---*/
10944/*------------------------------------------------------------*/
10945
10946/* Disassemble a single instruction into IR. The instruction
10947 is located in host memory at &guest_code[delta]. */
10948
10949DisResult disInstr_ARM64 ( IRSB* irsb_IN,
10950 Bool (*resteerOkFn) ( void*, Addr64 ),
10951 Bool resteerCisOk,
10952 void* callback_opaque,
10953 UChar* guest_code_IN,
10954 Long delta_IN,
10955 Addr64 guest_IP,
10956 VexArch guest_arch,
10957 VexArchInfo* archinfo,
10958 VexAbiInfo* abiinfo,
sewardj9b769162014-07-24 12:42:03 +000010959 VexEndness host_endness_IN,
sewardjbbcf1882014-01-12 12:49:10 +000010960 Bool sigill_diag_IN )
10961{
10962 DisResult dres;
10963 vex_bzero(&dres, sizeof(dres));
10964
10965 /* Set globals (see top of this file) */
10966 vassert(guest_arch == VexArchARM64);
10967
10968 irsb = irsb_IN;
sewardj9b769162014-07-24 12:42:03 +000010969 host_endness = host_endness_IN;
sewardjbbcf1882014-01-12 12:49:10 +000010970 guest_PC_curr_instr = (Addr64)guest_IP;
10971
sewardj65902992014-05-03 21:20:56 +000010972 /* Sanity checks */
10973 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
10974 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
10975 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
10976
sewardjbbcf1882014-01-12 12:49:10 +000010977 /* Try to decode */
10978 Bool ok = disInstr_ARM64_WRK( &dres,
10979 resteerOkFn, resteerCisOk, callback_opaque,
10980 (UChar*)&guest_code_IN[delta_IN],
10981 archinfo, abiinfo );
10982 if (ok) {
10983 /* All decode successes end up here. */
sewardjdc9259c2014-02-27 11:10:19 +000010984 vassert(dres.len == 4 || dres.len == 20);
sewardjbbcf1882014-01-12 12:49:10 +000010985 switch (dres.whatNext) {
10986 case Dis_Continue:
10987 putPC( mkU64(dres.len + guest_PC_curr_instr) );
10988 break;
10989 case Dis_ResteerU:
10990 case Dis_ResteerC:
10991 putPC(mkU64(dres.continueAt));
10992 break;
10993 case Dis_StopHere:
10994 break;
10995 default:
10996 vassert(0);
10997 }
10998 DIP("\n");
10999 } else {
11000 /* All decode failures end up here. */
11001 if (sigill_diag_IN) {
11002 Int i, j;
11003 UChar buf[64];
11004 UInt insn
11005 = getUIntLittleEndianly( (UChar*)&guest_code_IN[delta_IN] );
11006 vex_bzero(buf, sizeof(buf));
11007 for (i = j = 0; i < 32; i++) {
11008 if (i > 0) {
11009 if ((i & 7) == 0) buf[j++] = ' ';
11010 else if ((i & 3) == 0) buf[j++] = '\'';
11011 }
11012 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
11013 }
11014 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
11015 vex_printf("disInstr(arm64): %s\n", buf);
11016 }
11017
11018 /* Tell the dispatcher that this insn cannot be decoded, and so
11019 has not been executed, and (is currently) the next to be
11020 executed. PC should be up-to-date since it is made so at the
11021 start of each insn, but nevertheless be paranoid and update
11022 it again right now. */
11023 putPC( mkU64(guest_PC_curr_instr) );
sewardjbbcf1882014-01-12 12:49:10 +000011024 dres.len = 0;
philippe2faf5912014-08-11 22:45:47 +000011025 dres.whatNext = Dis_StopHere;
sewardjbbcf1882014-01-12 12:49:10 +000011026 dres.jk_StopHere = Ijk_NoDecode;
philippe2faf5912014-08-11 22:45:47 +000011027 dres.continueAt = 0;
sewardjbbcf1882014-01-12 12:49:10 +000011028 }
11029 return dres;
11030}
11031
sewardjecde6972014-02-05 11:01:19 +000011032
sewardjbbcf1882014-01-12 12:49:10 +000011033/*--------------------------------------------------------------------*/
11034/*--- end guest_arm64_toIR.c ---*/
11035/*--------------------------------------------------------------------*/