blob: 997565cfb52cc152718cf0e5e3c442b2ddf4680a [file] [log] [blame]
sewardjbbcf1882014-01-12 12:49:10 +00001/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- begin guest_arm64_toIR.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2013-2013 OpenWorks
12 info@open-works.net
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
32//ZZ /* XXXX thumb to check:
33//ZZ that all cases where putIRegT writes r15, we generate a jump.
34//ZZ
35//ZZ All uses of newTemp assign to an IRTemp and not a UInt
36//ZZ
37//ZZ For all thumb loads and stores, including VFP ones, new-ITSTATE is
38//ZZ backed out before the memory op, and restored afterwards. This
39//ZZ needs to happen even after we go uncond. (and for sure it doesn't
40//ZZ happen for VFP loads/stores right now).
41//ZZ
42//ZZ VFP on thumb: check that we exclude all r13/r15 cases that we
43//ZZ should.
44//ZZ
45//ZZ XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
46//ZZ taking into account the number of insns guarded by an IT.
47//ZZ
48//ZZ remove the nasty hack, in the spechelper, of looking for Or32(...,
49//ZZ 0xE0) in as the first arg to armg_calculate_condition, and instead
50//ZZ use Slice44 as specified in comments in the spechelper.
51//ZZ
52//ZZ add specialisations for armg_calculate_flag_c and _v, as they
53//ZZ are moderately often needed in Thumb code.
54//ZZ
55//ZZ Correctness: ITSTATE handling in Thumb SVCs is wrong.
56//ZZ
57//ZZ Correctness (obscure): in m_transtab, when invalidating code
58//ZZ address ranges, invalidate up to 18 bytes after the end of the
59//ZZ range. This is because the ITSTATE optimisation at the top of
60//ZZ _THUMB_WRK below analyses up to 18 bytes before the start of any
61//ZZ given instruction, and so might depend on the invalidated area.
62//ZZ */
63//ZZ
64//ZZ /* Limitations, etc
65//ZZ
66//ZZ - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
67//ZZ These instructions are non-restartable in the case where the
68//ZZ transfer(s) fault.
69//ZZ
70//ZZ - SWP: the restart jump back is Ijk_Boring; it should be
71//ZZ Ijk_NoRedir but that's expensive. See comments on casLE() in
72//ZZ guest_x86_toIR.c.
73//ZZ */
74
75/* "Special" instructions.
76
77 This instruction decoder can decode four special instructions
78 which mean nothing natively (are no-ops as far as regs/mem are
79 concerned) but have meaning for supporting Valgrind. A special
80 instruction is flagged by a 16-byte preamble:
81
82 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
83 (ror x12, x12, #3; ror x12, x12, #13
84 ror x12, x12, #51; ror x12, x12, #61)
85
86 Following that, one of the following 3 are allowed
87 (standard interpretation in parentheses):
88
89 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
90 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
91 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
92 AA090129 (orr x9,x9,x9) IR injection
93
94 Any other bytes following the 16-byte preamble are illegal and
95 constitute a failure in instruction decoding. This all assumes
96 that the preamble will never occur except in specific code
97 fragments designed for Valgrind to catch.
98*/
99
100/* Translates ARM64 code to IR. */
101
102#include "libvex_basictypes.h"
103#include "libvex_ir.h"
104#include "libvex.h"
105#include "libvex_guest_arm64.h"
106
107#include "main_util.h"
108#include "main_globals.h"
109#include "guest_generic_bb_to_IR.h"
110#include "guest_arm64_defs.h"
111
112
113/*------------------------------------------------------------*/
114/*--- Globals ---*/
115/*------------------------------------------------------------*/
116
117/* These are set at the start of the translation of a instruction, so
118 that we don't have to pass them around endlessly. CONST means does
119 not change during translation of the instruction.
120*/
121
sewardj9b769162014-07-24 12:42:03 +0000122/* CONST: what is the host's endianness? We need to know this in
123 order to do sub-register accesses to the SIMD/FP registers
124 correctly. */
125static VexEndness host_endness;
sewardjbbcf1882014-01-12 12:49:10 +0000126
127/* CONST: The guest address for the instruction currently being
128 translated. */
129static Addr64 guest_PC_curr_instr;
130
131/* MOD: The IRSB* into which we're generating code. */
132static IRSB* irsb;
133
134
135/*------------------------------------------------------------*/
136/*--- Debugging output ---*/
137/*------------------------------------------------------------*/
138
139#define DIP(format, args...) \
140 if (vex_traceflags & VEX_TRACE_FE) \
141 vex_printf(format, ## args)
142
143#define DIS(buf, format, args...) \
144 if (vex_traceflags & VEX_TRACE_FE) \
145 vex_sprintf(buf, format, ## args)
146
147
148/*------------------------------------------------------------*/
149/*--- Helper bits and pieces for deconstructing the ---*/
150/*--- arm insn stream. ---*/
151/*------------------------------------------------------------*/
152
153/* Do a little-endian load of a 32-bit word, regardless of the
154 endianness of the underlying host. */
155static inline UInt getUIntLittleEndianly ( UChar* p )
156{
157 UInt w = 0;
158 w = (w << 8) | p[3];
159 w = (w << 8) | p[2];
160 w = (w << 8) | p[1];
161 w = (w << 8) | p[0];
162 return w;
163}
164
165/* Sign extend a N-bit value up to 64 bits, by copying
166 bit N-1 into all higher positions. */
167static ULong sx_to_64 ( ULong x, UInt n )
168{
169 vassert(n > 1 && n < 64);
170 Long r = (Long)x;
171 r = (r << (64-n)) >> (64-n);
172 return (ULong)r;
173}
174
175//ZZ /* Do a little-endian load of a 16-bit word, regardless of the
176//ZZ endianness of the underlying host. */
177//ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
178//ZZ {
179//ZZ UShort w = 0;
180//ZZ w = (w << 8) | p[1];
181//ZZ w = (w << 8) | p[0];
182//ZZ return w;
183//ZZ }
184//ZZ
185//ZZ static UInt ROR32 ( UInt x, UInt sh ) {
186//ZZ vassert(sh >= 0 && sh < 32);
187//ZZ if (sh == 0)
188//ZZ return x;
189//ZZ else
190//ZZ return (x << (32-sh)) | (x >> sh);
191//ZZ }
192//ZZ
193//ZZ static Int popcount32 ( UInt x )
194//ZZ {
195//ZZ Int res = 0, i;
196//ZZ for (i = 0; i < 32; i++) {
197//ZZ res += (x & 1);
198//ZZ x >>= 1;
199//ZZ }
200//ZZ return res;
201//ZZ }
202//ZZ
203//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
204//ZZ {
205//ZZ UInt mask = 1 << ix;
206//ZZ x &= ~mask;
207//ZZ x |= ((b << ix) & mask);
208//ZZ return x;
209//ZZ }
210
211#define BITS2(_b1,_b0) \
212 (((_b1) << 1) | (_b0))
213
214#define BITS3(_b2,_b1,_b0) \
215 (((_b2) << 2) | ((_b1) << 1) | (_b0))
216
217#define BITS4(_b3,_b2,_b1,_b0) \
218 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
219
220#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
221 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
222 | BITS4((_b3),(_b2),(_b1),(_b0)))
223
224#define BITS5(_b4,_b3,_b2,_b1,_b0) \
225 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
226#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
227 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
228#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
229 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
230
231#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
232 (((_b8) << 8) \
233 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
234
235#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
236 (((_b9) << 9) | ((_b8) << 8) \
237 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
238
239#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
240 (((_b10) << 10) \
241 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
242
sewardjdc9259c2014-02-27 11:10:19 +0000243#define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
244 (((_b11) << 11) \
245 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
246
sewardjdf1628c2014-06-10 22:52:05 +0000247#define X00 BITS2(0,0)
248#define X01 BITS2(0,1)
249#define X10 BITS2(1,0)
250#define X11 BITS2(1,1)
251
sewardjbbcf1882014-01-12 12:49:10 +0000252// produces _uint[_bMax:_bMin]
253#define SLICE_UInt(_uint,_bMax,_bMin) \
254 (( ((UInt)(_uint)) >> (_bMin)) \
255 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
256
257
258/*------------------------------------------------------------*/
259/*--- Helper bits and pieces for creating IR fragments. ---*/
260/*------------------------------------------------------------*/
261
262static IRExpr* mkV128 ( UShort w )
263{
264 return IRExpr_Const(IRConst_V128(w));
265}
266
267static IRExpr* mkU64 ( ULong i )
268{
269 return IRExpr_Const(IRConst_U64(i));
270}
271
272static IRExpr* mkU32 ( UInt i )
273{
274 return IRExpr_Const(IRConst_U32(i));
275}
276
sewardj25523c42014-06-15 19:36:29 +0000277static IRExpr* mkU16 ( UInt i )
278{
279 vassert(i < 65536);
280 return IRExpr_Const(IRConst_U16(i));
281}
282
sewardjbbcf1882014-01-12 12:49:10 +0000283static IRExpr* mkU8 ( UInt i )
284{
285 vassert(i < 256);
286 return IRExpr_Const(IRConst_U8( (UChar)i ));
287}
288
289static IRExpr* mkexpr ( IRTemp tmp )
290{
291 return IRExpr_RdTmp(tmp);
292}
293
294static IRExpr* unop ( IROp op, IRExpr* a )
295{
296 return IRExpr_Unop(op, a);
297}
298
299static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
300{
301 return IRExpr_Binop(op, a1, a2);
302}
303
304static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
305{
306 return IRExpr_Triop(op, a1, a2, a3);
307}
308
309static IRExpr* loadLE ( IRType ty, IRExpr* addr )
310{
311 return IRExpr_Load(Iend_LE, ty, addr);
312}
313
314/* Add a statement to the list held by "irbb". */
315static void stmt ( IRStmt* st )
316{
317 addStmtToIRSB( irsb, st );
318}
319
320static void assign ( IRTemp dst, IRExpr* e )
321{
322 stmt( IRStmt_WrTmp(dst, e) );
323}
324
325static void storeLE ( IRExpr* addr, IRExpr* data )
326{
327 stmt( IRStmt_Store(Iend_LE, addr, data) );
328}
329
330//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
331//ZZ {
332//ZZ if (guardT == IRTemp_INVALID) {
333//ZZ /* unconditional */
334//ZZ storeLE(addr, data);
335//ZZ } else {
336//ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
337//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
338//ZZ }
339//ZZ }
340//ZZ
341//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
342//ZZ IRExpr* addr, IRExpr* alt,
343//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
344//ZZ {
345//ZZ if (guardT == IRTemp_INVALID) {
346//ZZ /* unconditional */
347//ZZ IRExpr* loaded = NULL;
348//ZZ switch (cvt) {
349//ZZ case ILGop_Ident32:
350//ZZ loaded = loadLE(Ity_I32, addr); break;
351//ZZ case ILGop_8Uto32:
352//ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
353//ZZ case ILGop_8Sto32:
354//ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
355//ZZ case ILGop_16Uto32:
356//ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
357//ZZ case ILGop_16Sto32:
358//ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
359//ZZ default:
360//ZZ vassert(0);
361//ZZ }
362//ZZ vassert(loaded != NULL);
363//ZZ assign(dst, loaded);
364//ZZ } else {
365//ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
366//ZZ loaded data before putting the data in 'dst'. If the load
367//ZZ does not take place, 'alt' is placed directly in 'dst'. */
368//ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
369//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
370//ZZ }
371//ZZ }
372
373/* Generate a new temporary of the given type. */
374static IRTemp newTemp ( IRType ty )
375{
376 vassert(isPlausibleIRType(ty));
377 return newIRTemp( irsb->tyenv, ty );
378}
379
sewardj8e91fd42014-07-11 12:05:47 +0000380/* This is used in many places, so the brevity is an advantage. */
381static IRTemp newTempV128(void)
382{
383 return newTemp(Ity_V128);
384}
385
386/* Initialise V128 temporaries en masse. */
387static
sewardj51d012a2014-07-21 09:19:50 +0000388void newTempsV128_2(IRTemp* t1, IRTemp* t2)
389{
390 vassert(t1 && *t1 == IRTemp_INVALID);
391 vassert(t2 && *t2 == IRTemp_INVALID);
392 *t1 = newTempV128();
393 *t2 = newTempV128();
394}
395
sewardj51d012a2014-07-21 09:19:50 +0000396static
397void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
398{
399 vassert(t1 && *t1 == IRTemp_INVALID);
400 vassert(t2 && *t2 == IRTemp_INVALID);
401 vassert(t3 && *t3 == IRTemp_INVALID);
402 *t1 = newTempV128();
403 *t2 = newTempV128();
404 *t3 = newTempV128();
405}
406
sewardj54ffa1d2014-07-22 09:27:49 +0000407//static
408//void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
409//{
410// vassert(t1 && *t1 == IRTemp_INVALID);
411// vassert(t2 && *t2 == IRTemp_INVALID);
412// vassert(t3 && *t3 == IRTemp_INVALID);
413// vassert(t4 && *t4 == IRTemp_INVALID);
414// *t1 = newTempV128();
415// *t2 = newTempV128();
416// *t3 = newTempV128();
417// *t4 = newTempV128();
418//}
419
sewardj51d012a2014-07-21 09:19:50 +0000420static
sewardj8e91fd42014-07-11 12:05:47 +0000421void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
422 IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
423{
424 vassert(t1 && *t1 == IRTemp_INVALID);
425 vassert(t2 && *t2 == IRTemp_INVALID);
426 vassert(t3 && *t3 == IRTemp_INVALID);
427 vassert(t4 && *t4 == IRTemp_INVALID);
428 vassert(t5 && *t5 == IRTemp_INVALID);
429 vassert(t6 && *t6 == IRTemp_INVALID);
430 vassert(t7 && *t7 == IRTemp_INVALID);
431 *t1 = newTempV128();
432 *t2 = newTempV128();
433 *t3 = newTempV128();
434 *t4 = newTempV128();
435 *t5 = newTempV128();
436 *t6 = newTempV128();
437 *t7 = newTempV128();
438}
439
sewardjbbcf1882014-01-12 12:49:10 +0000440//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
441//ZZ IRRoundingMode. */
442//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
443//ZZ {
444//ZZ return mkU32(Irrm_NEAREST);
445//ZZ }
446//ZZ
447//ZZ /* Generate an expression for SRC rotated right by ROT. */
448//ZZ static IRExpr* genROR32( IRTemp src, Int rot )
449//ZZ {
450//ZZ vassert(rot >= 0 && rot < 32);
451//ZZ if (rot == 0)
452//ZZ return mkexpr(src);
453//ZZ return
454//ZZ binop(Iop_Or32,
455//ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
456//ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
457//ZZ }
458//ZZ
459//ZZ static IRExpr* mkU128 ( ULong i )
460//ZZ {
461//ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
462//ZZ }
463//ZZ
464//ZZ /* Generate a 4-aligned version of the given expression if
465//ZZ the given condition is true. Else return it unchanged. */
466//ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
467//ZZ {
468//ZZ if (b)
469//ZZ return binop(Iop_And32, e, mkU32(~3));
470//ZZ else
471//ZZ return e;
472//ZZ }
473
474/* Other IR construction helpers. */
475static IROp mkAND ( IRType ty ) {
476 switch (ty) {
477 case Ity_I32: return Iop_And32;
478 case Ity_I64: return Iop_And64;
479 default: vpanic("mkAND");
480 }
481}
482
483static IROp mkOR ( IRType ty ) {
484 switch (ty) {
485 case Ity_I32: return Iop_Or32;
486 case Ity_I64: return Iop_Or64;
487 default: vpanic("mkOR");
488 }
489}
490
491static IROp mkXOR ( IRType ty ) {
492 switch (ty) {
493 case Ity_I32: return Iop_Xor32;
494 case Ity_I64: return Iop_Xor64;
495 default: vpanic("mkXOR");
496 }
497}
498
499static IROp mkSHL ( IRType ty ) {
500 switch (ty) {
501 case Ity_I32: return Iop_Shl32;
502 case Ity_I64: return Iop_Shl64;
503 default: vpanic("mkSHL");
504 }
505}
506
507static IROp mkSHR ( IRType ty ) {
508 switch (ty) {
509 case Ity_I32: return Iop_Shr32;
510 case Ity_I64: return Iop_Shr64;
511 default: vpanic("mkSHR");
512 }
513}
514
515static IROp mkSAR ( IRType ty ) {
516 switch (ty) {
517 case Ity_I32: return Iop_Sar32;
518 case Ity_I64: return Iop_Sar64;
519 default: vpanic("mkSAR");
520 }
521}
522
523static IROp mkNOT ( IRType ty ) {
524 switch (ty) {
525 case Ity_I32: return Iop_Not32;
526 case Ity_I64: return Iop_Not64;
527 default: vpanic("mkNOT");
528 }
529}
530
531static IROp mkADD ( IRType ty ) {
532 switch (ty) {
533 case Ity_I32: return Iop_Add32;
534 case Ity_I64: return Iop_Add64;
535 default: vpanic("mkADD");
536 }
537}
538
539static IROp mkSUB ( IRType ty ) {
540 switch (ty) {
541 case Ity_I32: return Iop_Sub32;
542 case Ity_I64: return Iop_Sub64;
543 default: vpanic("mkSUB");
544 }
545}
546
547static IROp mkADDF ( IRType ty ) {
548 switch (ty) {
549 case Ity_F32: return Iop_AddF32;
550 case Ity_F64: return Iop_AddF64;
551 default: vpanic("mkADDF");
552 }
553}
554
555static IROp mkSUBF ( IRType ty ) {
556 switch (ty) {
557 case Ity_F32: return Iop_SubF32;
558 case Ity_F64: return Iop_SubF64;
559 default: vpanic("mkSUBF");
560 }
561}
562
563static IROp mkMULF ( IRType ty ) {
564 switch (ty) {
565 case Ity_F32: return Iop_MulF32;
566 case Ity_F64: return Iop_MulF64;
567 default: vpanic("mkMULF");
568 }
569}
570
571static IROp mkDIVF ( IRType ty ) {
572 switch (ty) {
573 case Ity_F32: return Iop_DivF32;
574 case Ity_F64: return Iop_DivF64;
575 default: vpanic("mkMULF");
576 }
577}
578
579static IROp mkNEGF ( IRType ty ) {
580 switch (ty) {
581 case Ity_F32: return Iop_NegF32;
582 case Ity_F64: return Iop_NegF64;
583 default: vpanic("mkNEGF");
584 }
585}
586
587static IROp mkABSF ( IRType ty ) {
588 switch (ty) {
589 case Ity_F32: return Iop_AbsF32;
590 case Ity_F64: return Iop_AbsF64;
591 default: vpanic("mkNEGF");
592 }
593}
594
595static IROp mkSQRTF ( IRType ty ) {
596 switch (ty) {
597 case Ity_F32: return Iop_SqrtF32;
598 case Ity_F64: return Iop_SqrtF64;
599 default: vpanic("mkNEGF");
600 }
601}
602
sewardja5a6b752014-06-30 07:33:56 +0000603static IROp mkVecADD ( UInt size ) {
604 const IROp ops[4]
605 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
606 vassert(size < 4);
607 return ops[size];
608}
609
610static IROp mkVecQADDU ( UInt size ) {
611 const IROp ops[4]
612 = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
613 vassert(size < 4);
614 return ops[size];
615}
616
617static IROp mkVecQADDS ( UInt size ) {
618 const IROp ops[4]
619 = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
620 vassert(size < 4);
621 return ops[size];
622}
623
624static IROp mkVecSUB ( UInt size ) {
625 const IROp ops[4]
626 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
627 vassert(size < 4);
628 return ops[size];
629}
630
631static IROp mkVecQSUBU ( UInt size ) {
632 const IROp ops[4]
633 = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
634 vassert(size < 4);
635 return ops[size];
636}
637
638static IROp mkVecQSUBS ( UInt size ) {
639 const IROp ops[4]
640 = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
641 vassert(size < 4);
642 return ops[size];
643}
644
645static IROp mkVecSARN ( UInt size ) {
646 const IROp ops[4]
647 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
648 vassert(size < 4);
649 return ops[size];
650}
651
652static IROp mkVecSHRN ( UInt size ) {
653 const IROp ops[4]
654 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
655 vassert(size < 4);
656 return ops[size];
657}
658
659static IROp mkVecSHLN ( UInt size ) {
660 const IROp ops[4]
661 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
662 vassert(size < 4);
663 return ops[size];
664}
665
666static IROp mkVecCATEVENLANES ( UInt size ) {
667 const IROp ops[4]
668 = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
669 Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
670 vassert(size < 4);
671 return ops[size];
672}
673
674static IROp mkVecCATODDLANES ( UInt size ) {
675 const IROp ops[4]
676 = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
677 Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
678 vassert(size < 4);
679 return ops[size];
680}
681
sewardj487559e2014-07-10 14:22:45 +0000682static IROp mkVecINTERLEAVELO ( UInt size ) {
683 const IROp ops[4]
684 = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
685 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
686 vassert(size < 4);
687 return ops[size];
688}
689
690static IROp mkVecINTERLEAVEHI ( UInt size ) {
691 const IROp ops[4]
692 = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
693 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
694 vassert(size < 4);
695 return ops[size];
696}
697
sewardja5a6b752014-06-30 07:33:56 +0000698static IROp mkVecMAXU ( UInt size ) {
699 const IROp ops[4]
700 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
701 vassert(size < 4);
702 return ops[size];
703}
704
705static IROp mkVecMAXS ( UInt size ) {
706 const IROp ops[4]
707 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
708 vassert(size < 4);
709 return ops[size];
710}
711
712static IROp mkVecMINU ( UInt size ) {
713 const IROp ops[4]
714 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
715 vassert(size < 4);
716 return ops[size];
717}
718
719static IROp mkVecMINS ( UInt size ) {
720 const IROp ops[4]
721 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
722 vassert(size < 4);
723 return ops[size];
724}
725
sewardj487559e2014-07-10 14:22:45 +0000726static IROp mkVecMUL ( UInt size ) {
727 const IROp ops[4]
728 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
729 vassert(size < 3);
730 return ops[size];
731}
732
733static IROp mkVecMULLU ( UInt sizeNarrow ) {
734 const IROp ops[4]
735 = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
736 vassert(sizeNarrow < 3);
737 return ops[sizeNarrow];
738}
739
740static IROp mkVecMULLS ( UInt sizeNarrow ) {
741 const IROp ops[4]
742 = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
743 vassert(sizeNarrow < 3);
744 return ops[sizeNarrow];
745}
746
sewardj51d012a2014-07-21 09:19:50 +0000747static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
748 const IROp ops[4]
749 = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
750 vassert(sizeNarrow < 3);
751 return ops[sizeNarrow];
752}
753
sewardj8e91fd42014-07-11 12:05:47 +0000754static IROp mkVecCMPEQ ( UInt size ) {
755 const IROp ops[4]
756 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
757 vassert(size < 4);
758 return ops[size];
759}
760
761static IROp mkVecCMPGTU ( UInt size ) {
762 const IROp ops[4]
763 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
764 vassert(size < 4);
765 return ops[size];
766}
767
768static IROp mkVecCMPGTS ( UInt size ) {
769 const IROp ops[4]
770 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
771 vassert(size < 4);
772 return ops[size];
773}
774
775static IROp mkVecABS ( UInt size ) {
776 const IROp ops[4]
777 = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
778 vassert(size < 4);
779 return ops[size];
780}
781
782static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
783 const IROp ops[4]
784 = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
785 Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 };
786 vassert(size < 4);
787 return ops[size];
788}
789
sewardjbbcf1882014-01-12 12:49:10 +0000790static IRExpr* mkU ( IRType ty, ULong imm ) {
791 switch (ty) {
792 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
793 case Ity_I64: return mkU64(imm);
794 default: vpanic("mkU");
795 }
796}
797
sewardj54ffa1d2014-07-22 09:27:49 +0000798static IROp mkVecQDMULHIS ( UInt size ) {
799 const IROp ops[4]
800 = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
801 vassert(size < 4);
802 return ops[size];
803}
804
805static IROp mkVecQRDMULHIS ( UInt size ) {
806 const IROp ops[4]
807 = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
808 vassert(size < 4);
809 return ops[size];
810}
811
sewardjecedd982014-08-11 14:02:47 +0000812static IROp mkVecQANDUQSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000813 const IROp ops[4]
814 = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
815 Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
816 vassert(size < 4);
817 return ops[size];
818}
819
sewardjecedd982014-08-11 14:02:47 +0000820static IROp mkVecQANDSQSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000821 const IROp ops[4]
822 = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
823 Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
824 vassert(size < 4);
825 return ops[size];
826}
827
sewardjecedd982014-08-11 14:02:47 +0000828static IROp mkVecQANDUQRSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000829 const IROp ops[4]
830 = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
831 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
832 vassert(size < 4);
833 return ops[size];
834}
835
sewardjecedd982014-08-11 14:02:47 +0000836static IROp mkVecQANDSQRSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000837 const IROp ops[4]
838 = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
839 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
840 vassert(size < 4);
841 return ops[size];
842}
843
sewardjecedd982014-08-11 14:02:47 +0000844static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
845 const IROp ops[4]
846 = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
847 Iop_NarrowUn64to32x2, Iop_INVALID };
848 vassert(sizeNarrow < 4);
849 return ops[sizeNarrow];
850}
851
852static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
853 const IROp ops[4]
854 = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4,
855 Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
856 vassert(sizeNarrow < 4);
857 return ops[sizeNarrow];
858}
859
860static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
861 const IROp ops[4]
862 = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4,
863 Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
864 vassert(sizeNarrow < 4);
865 return ops[sizeNarrow];
866}
867
868static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
869 const IROp ops[4]
870 = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4,
871 Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
872 vassert(sizeNarrow < 4);
873 return ops[sizeNarrow];
874}
875
876static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
877 const IROp ops[4]
878 = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
879 Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
880 vassert(sizeNarrow < 4);
881 return ops[sizeNarrow];
882}
883
884static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
885 const IROp ops[4]
886 = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4,
887 Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
888 vassert(sizeNarrow < 4);
889 return ops[sizeNarrow];
890}
891
892static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
893 const IROp ops[4]
894 = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4,
895 Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
896 vassert(sizeNarrow < 4);
897 return ops[sizeNarrow];
898}
899
900static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
901 const IROp ops[4]
902 = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4,
903 Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
904 vassert(sizeNarrow < 4);
905 return ops[sizeNarrow];
906}
907
908static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
909 const IROp ops[4]
910 = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4,
911 Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
912 vassert(sizeNarrow < 4);
913 return ops[sizeNarrow];
914}
915
916static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
917 const IROp ops[4]
918 = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4,
919 Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
920 vassert(sizeNarrow < 4);
921 return ops[sizeNarrow];
922}
923
sewardja97dddf2014-08-14 22:26:52 +0000924static IROp mkVecQSHLNSATU2U ( UInt size ) {
925 const IROp ops[4]
926 = { Iop_QShlN8x16, Iop_QShlN16x8, Iop_QShlN32x4, Iop_QShlN64x2 };
927 vassert(size < 4);
928 return ops[size];
929}
930
931static IROp mkVecQSHLNSATS2S ( UInt size ) {
932 const IROp ops[4]
933 = { Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2 };
934 vassert(size < 4);
935 return ops[size];
936}
937
938static IROp mkVecQSHLNSATS2U ( UInt size ) {
939 const IROp ops[4]
940 = { Iop_QShlN8Sx16, Iop_QShlN16Sx8, Iop_QShlN32Sx4, Iop_QShlN64Sx2 };
941 vassert(size < 4);
942 return ops[size];
943}
944
945
sewardjbbcf1882014-01-12 12:49:10 +0000946/* Generate IR to create 'arg rotated right by imm', for sane values
947 of 'ty' and 'imm'. */
948static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
949{
950 UInt w = 0;
951 if (ty == Ity_I64) {
952 w = 64;
953 } else {
954 vassert(ty == Ity_I32);
955 w = 32;
956 }
957 vassert(w != 0);
958 vassert(imm < w);
959 if (imm == 0) {
960 return arg;
961 }
962 IRTemp res = newTemp(ty);
963 assign(res, binop(mkOR(ty),
964 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
965 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
966 return res;
967}
968
969/* Generate IR to set the returned temp to either all-zeroes or
970 all ones, as a copy of arg<imm>. */
971static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
972{
973 UInt w = 0;
974 if (ty == Ity_I64) {
975 w = 64;
976 } else {
977 vassert(ty == Ity_I32);
978 w = 32;
979 }
980 vassert(w != 0);
981 vassert(imm < w);
982 IRTemp res = newTemp(ty);
983 assign(res, binop(mkSAR(ty),
984 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
985 mkU8(w - 1)));
986 return res;
987}
988
sewardj7d009132014-02-20 17:43:38 +0000989/* U-widen 8/16/32/64 bit int expr to 64. */
990static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
991{
992 switch (srcTy) {
993 case Ity_I64: return e;
994 case Ity_I32: return unop(Iop_32Uto64, e);
995 case Ity_I16: return unop(Iop_16Uto64, e);
996 case Ity_I8: return unop(Iop_8Uto64, e);
997 default: vpanic("widenUto64(arm64)");
998 }
999}
1000
1001/* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
1002 of these combinations make sense. */
1003static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
1004{
1005 switch (dstTy) {
1006 case Ity_I64: return e;
1007 case Ity_I32: return unop(Iop_64to32, e);
1008 case Ity_I16: return unop(Iop_64to16, e);
1009 case Ity_I8: return unop(Iop_64to8, e);
1010 default: vpanic("narrowFrom64(arm64)");
1011 }
1012}
1013
sewardjbbcf1882014-01-12 12:49:10 +00001014
1015/*------------------------------------------------------------*/
1016/*--- Helpers for accessing guest registers. ---*/
1017/*------------------------------------------------------------*/
1018
1019#define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
1020#define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
1021#define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
1022#define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
1023#define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
1024#define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
1025#define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
1026#define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
1027#define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
1028#define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
1029#define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
1030#define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
1031#define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
1032#define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
1033#define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
1034#define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
1035#define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
1036#define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
1037#define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
1038#define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
1039#define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
1040#define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
1041#define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
1042#define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
1043#define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
1044#define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
1045#define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
1046#define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
1047#define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
1048#define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
1049#define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
1050
sewardj60687882014-01-15 10:25:21 +00001051#define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
sewardjbbcf1882014-01-12 12:49:10 +00001052#define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
1053
1054#define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
1055#define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
1056#define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
1057#define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
1058
1059#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1060#define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
1061
1062#define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
1063#define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
1064#define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
1065#define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
1066#define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
1067#define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
1068#define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
1069#define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
1070#define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
1071#define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
1072#define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
1073#define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
1074#define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
1075#define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
1076#define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
1077#define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
1078#define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
1079#define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
1080#define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
1081#define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
1082#define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
1083#define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
1084#define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
1085#define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
1086#define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
1087#define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
1088#define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
1089#define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
1090#define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
1091#define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
1092#define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
1093#define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
1094
1095#define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
sewardja0645d52014-06-28 22:11:16 +00001096#define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
sewardjbbcf1882014-01-12 12:49:10 +00001097
sewardj05f5e012014-05-04 10:52:11 +00001098#define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
1099#define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
sewardjbbcf1882014-01-12 12:49:10 +00001100
1101
1102/* ---------------- Integer registers ---------------- */
1103
1104static Int offsetIReg64 ( UInt iregNo )
1105{
1106 /* Do we care about endianness here? We do if sub-parts of integer
1107 registers are accessed. */
1108 switch (iregNo) {
1109 case 0: return OFFB_X0;
1110 case 1: return OFFB_X1;
1111 case 2: return OFFB_X2;
1112 case 3: return OFFB_X3;
1113 case 4: return OFFB_X4;
1114 case 5: return OFFB_X5;
1115 case 6: return OFFB_X6;
1116 case 7: return OFFB_X7;
1117 case 8: return OFFB_X8;
1118 case 9: return OFFB_X9;
1119 case 10: return OFFB_X10;
1120 case 11: return OFFB_X11;
1121 case 12: return OFFB_X12;
1122 case 13: return OFFB_X13;
1123 case 14: return OFFB_X14;
1124 case 15: return OFFB_X15;
1125 case 16: return OFFB_X16;
1126 case 17: return OFFB_X17;
1127 case 18: return OFFB_X18;
1128 case 19: return OFFB_X19;
1129 case 20: return OFFB_X20;
1130 case 21: return OFFB_X21;
1131 case 22: return OFFB_X22;
1132 case 23: return OFFB_X23;
1133 case 24: return OFFB_X24;
1134 case 25: return OFFB_X25;
1135 case 26: return OFFB_X26;
1136 case 27: return OFFB_X27;
1137 case 28: return OFFB_X28;
1138 case 29: return OFFB_X29;
1139 case 30: return OFFB_X30;
1140 /* but not 31 */
1141 default: vassert(0);
1142 }
1143}
1144
1145static Int offsetIReg64orSP ( UInt iregNo )
1146{
sewardj60687882014-01-15 10:25:21 +00001147 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
sewardjbbcf1882014-01-12 12:49:10 +00001148}
1149
1150static const HChar* nameIReg64orZR ( UInt iregNo )
1151{
1152 vassert(iregNo < 32);
1153 static const HChar* names[32]
1154 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
1155 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1156 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1157 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1158 return names[iregNo];
1159}
1160
1161static const HChar* nameIReg64orSP ( UInt iregNo )
1162{
1163 if (iregNo == 31) {
1164 return "sp";
1165 }
1166 vassert(iregNo < 31);
1167 return nameIReg64orZR(iregNo);
1168}
1169
1170static IRExpr* getIReg64orSP ( UInt iregNo )
1171{
1172 vassert(iregNo < 32);
1173 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1174}
1175
1176static IRExpr* getIReg64orZR ( UInt iregNo )
1177{
1178 if (iregNo == 31) {
1179 return mkU64(0);
1180 }
1181 vassert(iregNo < 31);
1182 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1183}
1184
1185static void putIReg64orSP ( UInt iregNo, IRExpr* e )
1186{
1187 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1188 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1189}
1190
1191static void putIReg64orZR ( UInt iregNo, IRExpr* e )
1192{
1193 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1194 if (iregNo == 31) {
1195 return;
1196 }
1197 vassert(iregNo < 31);
1198 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1199}
1200
1201static const HChar* nameIReg32orZR ( UInt iregNo )
1202{
1203 vassert(iregNo < 32);
1204 static const HChar* names[32]
1205 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
1206 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
1207 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1208 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1209 return names[iregNo];
1210}
1211
1212static const HChar* nameIReg32orSP ( UInt iregNo )
1213{
1214 if (iregNo == 31) {
1215 return "wsp";
1216 }
1217 vassert(iregNo < 31);
1218 return nameIReg32orZR(iregNo);
1219}
1220
1221static IRExpr* getIReg32orSP ( UInt iregNo )
1222{
1223 vassert(iregNo < 32);
1224 return unop(Iop_64to32,
1225 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1226}
1227
1228static IRExpr* getIReg32orZR ( UInt iregNo )
1229{
1230 if (iregNo == 31) {
1231 return mkU32(0);
1232 }
1233 vassert(iregNo < 31);
1234 return unop(Iop_64to32,
1235 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1236}
1237
1238static void putIReg32orSP ( UInt iregNo, IRExpr* e )
1239{
1240 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1241 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1242}
1243
1244static void putIReg32orZR ( UInt iregNo, IRExpr* e )
1245{
1246 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1247 if (iregNo == 31) {
1248 return;
1249 }
1250 vassert(iregNo < 31);
1251 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1252}
1253
1254static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
1255{
1256 vassert(is64 == True || is64 == False);
1257 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
1258}
1259
1260static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
1261{
1262 vassert(is64 == True || is64 == False);
1263 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
1264}
1265
1266static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
1267{
1268 vassert(is64 == True || is64 == False);
1269 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
1270}
1271
1272static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
1273{
1274 vassert(is64 == True || is64 == False);
1275 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
1276}
1277
1278static void putPC ( IRExpr* e )
1279{
1280 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1281 stmt( IRStmt_Put(OFFB_PC, e) );
1282}
1283
1284
1285/* ---------------- Vector (Q) registers ---------------- */
1286
1287static Int offsetQReg128 ( UInt qregNo )
1288{
1289 /* We don't care about endianness at this point. It only becomes
1290 relevant when dealing with sections of these registers.*/
1291 switch (qregNo) {
1292 case 0: return OFFB_Q0;
1293 case 1: return OFFB_Q1;
1294 case 2: return OFFB_Q2;
1295 case 3: return OFFB_Q3;
1296 case 4: return OFFB_Q4;
1297 case 5: return OFFB_Q5;
1298 case 6: return OFFB_Q6;
1299 case 7: return OFFB_Q7;
1300 case 8: return OFFB_Q8;
1301 case 9: return OFFB_Q9;
1302 case 10: return OFFB_Q10;
1303 case 11: return OFFB_Q11;
1304 case 12: return OFFB_Q12;
1305 case 13: return OFFB_Q13;
1306 case 14: return OFFB_Q14;
1307 case 15: return OFFB_Q15;
1308 case 16: return OFFB_Q16;
1309 case 17: return OFFB_Q17;
1310 case 18: return OFFB_Q18;
1311 case 19: return OFFB_Q19;
1312 case 20: return OFFB_Q20;
1313 case 21: return OFFB_Q21;
1314 case 22: return OFFB_Q22;
1315 case 23: return OFFB_Q23;
1316 case 24: return OFFB_Q24;
1317 case 25: return OFFB_Q25;
1318 case 26: return OFFB_Q26;
1319 case 27: return OFFB_Q27;
1320 case 28: return OFFB_Q28;
1321 case 29: return OFFB_Q29;
1322 case 30: return OFFB_Q30;
1323 case 31: return OFFB_Q31;
1324 default: vassert(0);
1325 }
1326}
1327
sewardjbbcf1882014-01-12 12:49:10 +00001328/* Write to a complete Qreg. */
1329static void putQReg128 ( UInt qregNo, IRExpr* e )
1330{
1331 vassert(qregNo < 32);
1332 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
1333 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
1334}
1335
1336/* Read a complete Qreg. */
1337static IRExpr* getQReg128 ( UInt qregNo )
1338{
1339 vassert(qregNo < 32);
1340 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
1341}
1342
1343/* Produce the IR type for some sub-part of a vector. For 32- and 64-
1344 bit sub-parts we can choose either integer or float types, and
1345 choose float on the basis that that is the common use case and so
1346 will give least interference with Put-to-Get forwarding later
1347 on. */
1348static IRType preferredVectorSubTypeFromSize ( UInt szB )
1349{
1350 switch (szB) {
1351 case 1: return Ity_I8;
1352 case 2: return Ity_I16;
1353 case 4: return Ity_I32; //Ity_F32;
1354 case 8: return Ity_F64;
1355 case 16: return Ity_V128;
1356 default: vassert(0);
1357 }
1358}
1359
sewardj606c4ba2014-01-26 19:11:14 +00001360/* Find the offset of the laneNo'th lane of type laneTy in the given
1361 Qreg. Since the host is little-endian, the least significant lane
1362 has the lowest offset. */
1363static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
sewardjbbcf1882014-01-12 12:49:10 +00001364{
sewardj9b769162014-07-24 12:42:03 +00001365 vassert(host_endness == VexEndnessLE);
sewardjbbcf1882014-01-12 12:49:10 +00001366 Int base = offsetQReg128(qregNo);
sewardj606c4ba2014-01-26 19:11:14 +00001367 /* Since the host is little-endian, the least significant lane
1368 will be at the lowest address. */
1369 /* Restrict this to known types, so as to avoid silently accepting
1370 stupid types. */
1371 UInt laneSzB = 0;
1372 switch (laneTy) {
sewardj5860ec72014-03-01 11:19:45 +00001373 case Ity_I8: laneSzB = 1; break;
1374 case Ity_I16: laneSzB = 2; break;
sewardj606c4ba2014-01-26 19:11:14 +00001375 case Ity_F32: case Ity_I32: laneSzB = 4; break;
1376 case Ity_F64: case Ity_I64: laneSzB = 8; break;
1377 case Ity_V128: laneSzB = 16; break;
1378 default: break;
sewardjbbcf1882014-01-12 12:49:10 +00001379 }
sewardj606c4ba2014-01-26 19:11:14 +00001380 vassert(laneSzB > 0);
1381 UInt minOff = laneNo * laneSzB;
1382 UInt maxOff = minOff + laneSzB - 1;
1383 vassert(maxOff < 16);
1384 return base + minOff;
sewardjbbcf1882014-01-12 12:49:10 +00001385}
1386
sewardj606c4ba2014-01-26 19:11:14 +00001387/* Put to the least significant lane of a Qreg. */
1388static void putQRegLO ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +00001389{
1390 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +00001391 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +00001392 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001393 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
1394 case Ity_F32: case Ity_F64: case Ity_V128:
1395 break;
1396 default:
1397 vassert(0); // Other cases are probably invalid
sewardjbbcf1882014-01-12 12:49:10 +00001398 }
1399 stmt(IRStmt_Put(off, e));
1400}
1401
sewardj606c4ba2014-01-26 19:11:14 +00001402/* Get from the least significant lane of a Qreg. */
1403static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
sewardjbbcf1882014-01-12 12:49:10 +00001404{
sewardj606c4ba2014-01-26 19:11:14 +00001405 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +00001406 switch (ty) {
sewardjb3553472014-05-15 16:49:21 +00001407 case Ity_I8:
1408 case Ity_I16:
sewardj606c4ba2014-01-26 19:11:14 +00001409 case Ity_I32: case Ity_I64:
1410 case Ity_F32: case Ity_F64: case Ity_V128:
1411 break;
1412 default:
1413 vassert(0); // Other cases are ATC
sewardjbbcf1882014-01-12 12:49:10 +00001414 }
1415 return IRExpr_Get(off, ty);
1416}
1417
sewardj606c4ba2014-01-26 19:11:14 +00001418static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
sewardjbbcf1882014-01-12 12:49:10 +00001419{
1420 static const HChar* namesQ[32]
1421 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1422 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1423 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1424 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1425 static const HChar* namesD[32]
1426 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1427 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1428 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1429 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1430 static const HChar* namesS[32]
1431 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1432 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1433 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1434 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1435 static const HChar* namesH[32]
1436 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1437 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1438 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1439 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1440 static const HChar* namesB[32]
1441 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1442 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1443 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1444 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1445 vassert(qregNo < 32);
sewardj606c4ba2014-01-26 19:11:14 +00001446 switch (sizeofIRType(laneTy)) {
sewardjbbcf1882014-01-12 12:49:10 +00001447 case 1: return namesB[qregNo];
1448 case 2: return namesH[qregNo];
1449 case 4: return namesS[qregNo];
1450 case 8: return namesD[qregNo];
1451 case 16: return namesQ[qregNo];
1452 default: vassert(0);
1453 }
1454 /*NOTREACHED*/
1455}
1456
sewardj606c4ba2014-01-26 19:11:14 +00001457static const HChar* nameQReg128 ( UInt qregNo )
1458{
1459 return nameQRegLO(qregNo, Ity_V128);
1460}
1461
sewardjbbcf1882014-01-12 12:49:10 +00001462/* Find the offset of the most significant half (8 bytes) of the given
1463 Qreg. This requires knowing the endianness of the host. */
sewardj606c4ba2014-01-26 19:11:14 +00001464static Int offsetQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001465{
sewardj606c4ba2014-01-26 19:11:14 +00001466 return offsetQRegLane(qregNo, Ity_I64, 1);
sewardjbbcf1882014-01-12 12:49:10 +00001467}
1468
sewardj606c4ba2014-01-26 19:11:14 +00001469static IRExpr* getQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001470{
sewardj606c4ba2014-01-26 19:11:14 +00001471 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
sewardjbbcf1882014-01-12 12:49:10 +00001472}
1473
sewardj606c4ba2014-01-26 19:11:14 +00001474static void putQRegHI64 ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +00001475{
1476 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +00001477 Int off = offsetQRegHI64(qregNo);
sewardjbbcf1882014-01-12 12:49:10 +00001478 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001479 case Ity_I64: case Ity_F64:
1480 break;
1481 default:
1482 vassert(0); // Other cases are plain wrong
sewardjbbcf1882014-01-12 12:49:10 +00001483 }
1484 stmt(IRStmt_Put(off, e));
1485}
1486
sewardj606c4ba2014-01-26 19:11:14 +00001487/* Put to a specified lane of a Qreg. */
1488static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1489{
1490 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1491 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1492 switch (laneTy) {
1493 case Ity_F64: case Ity_I64:
sewardj32d86752014-03-02 12:47:18 +00001494 case Ity_I32: case Ity_F32:
sewardj5860ec72014-03-01 11:19:45 +00001495 case Ity_I16:
1496 case Ity_I8:
sewardj606c4ba2014-01-26 19:11:14 +00001497 break;
1498 default:
1499 vassert(0); // Other cases are ATC
1500 }
1501 stmt(IRStmt_Put(off, e));
1502}
1503
sewardj32d86752014-03-02 12:47:18 +00001504/* Get from a specified lane of a Qreg. */
sewardj606c4ba2014-01-26 19:11:14 +00001505static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1506{
1507 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1508 switch (laneTy) {
sewardj32d86752014-03-02 12:47:18 +00001509 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
sewardj85fbb022014-06-12 13:16:01 +00001510 case Ity_F64: case Ity_F32:
sewardj606c4ba2014-01-26 19:11:14 +00001511 break;
1512 default:
1513 vassert(0); // Other cases are ATC
1514 }
1515 return IRExpr_Get(off, laneTy);
1516}
1517
1518
sewardjbbcf1882014-01-12 12:49:10 +00001519//ZZ /* ---------------- Misc registers ---------------- */
1520//ZZ
1521//ZZ static void putMiscReg32 ( UInt gsoffset,
1522//ZZ IRExpr* e, /* :: Ity_I32 */
1523//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1524//ZZ {
1525//ZZ switch (gsoffset) {
1526//ZZ case OFFB_FPSCR: break;
1527//ZZ case OFFB_QFLAG32: break;
1528//ZZ case OFFB_GEFLAG0: break;
1529//ZZ case OFFB_GEFLAG1: break;
1530//ZZ case OFFB_GEFLAG2: break;
1531//ZZ case OFFB_GEFLAG3: break;
1532//ZZ default: vassert(0); /* awaiting more cases */
1533//ZZ }
1534//ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1535//ZZ
1536//ZZ if (guardT == IRTemp_INVALID) {
1537//ZZ /* unconditional write */
1538//ZZ stmt(IRStmt_Put(gsoffset, e));
1539//ZZ } else {
1540//ZZ stmt(IRStmt_Put(
1541//ZZ gsoffset,
1542//ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1543//ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1544//ZZ ));
1545//ZZ }
1546//ZZ }
1547//ZZ
1548//ZZ static IRTemp get_ITSTATE ( void )
1549//ZZ {
1550//ZZ ASSERT_IS_THUMB;
1551//ZZ IRTemp t = newTemp(Ity_I32);
1552//ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1553//ZZ return t;
1554//ZZ }
1555//ZZ
1556//ZZ static void put_ITSTATE ( IRTemp t )
1557//ZZ {
1558//ZZ ASSERT_IS_THUMB;
1559//ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1560//ZZ }
1561//ZZ
1562//ZZ static IRTemp get_QFLAG32 ( void )
1563//ZZ {
1564//ZZ IRTemp t = newTemp(Ity_I32);
1565//ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1566//ZZ return t;
1567//ZZ }
1568//ZZ
1569//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1570//ZZ {
1571//ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1572//ZZ }
1573//ZZ
1574//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1575//ZZ Status Register) to indicate that overflow or saturation occurred.
1576//ZZ Nb: t must be zero to denote no saturation, and any nonzero
1577//ZZ value to indicate saturation. */
1578//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1579//ZZ {
1580//ZZ IRTemp old = get_QFLAG32();
1581//ZZ IRTemp nyu = newTemp(Ity_I32);
1582//ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1583//ZZ put_QFLAG32(nyu, condT);
1584//ZZ }
1585
1586
1587/* ---------------- FPCR stuff ---------------- */
1588
1589/* Generate IR to get hold of the rounding mode bits in FPCR, and
1590 convert them to IR format. Bind the final result to the
1591 returned temp. */
1592static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1593{
1594 /* The ARMvfp encoding for rounding mode bits is:
1595 00 to nearest
1596 01 to +infinity
1597 10 to -infinity
1598 11 to zero
1599 We need to convert that to the IR encoding:
1600 00 to nearest (the default)
1601 10 to +infinity
1602 01 to -infinity
1603 11 to zero
1604 Which can be done by swapping bits 0 and 1.
1605 The rmode bits are at 23:22 in FPSCR.
1606 */
1607 IRTemp armEncd = newTemp(Ity_I32);
1608 IRTemp swapped = newTemp(Ity_I32);
1609 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1610 we don't zero out bits 24 and above, since the assignment to
1611 'swapped' will mask them out anyway. */
1612 assign(armEncd,
1613 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1614 /* Now swap them. */
1615 assign(swapped,
1616 binop(Iop_Or32,
1617 binop(Iop_And32,
1618 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1619 mkU32(2)),
1620 binop(Iop_And32,
1621 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1622 mkU32(1))
1623 ));
1624 return swapped;
1625}
1626
1627
1628/*------------------------------------------------------------*/
1629/*--- Helpers for flag handling and conditional insns ---*/
1630/*------------------------------------------------------------*/
1631
1632static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1633{
1634 switch (cond) {
1635 case ARM64CondEQ: return "eq";
1636 case ARM64CondNE: return "ne";
1637 case ARM64CondCS: return "cs"; // or 'hs'
1638 case ARM64CondCC: return "cc"; // or 'lo'
1639 case ARM64CondMI: return "mi";
1640 case ARM64CondPL: return "pl";
1641 case ARM64CondVS: return "vs";
1642 case ARM64CondVC: return "vc";
1643 case ARM64CondHI: return "hi";
1644 case ARM64CondLS: return "ls";
1645 case ARM64CondGE: return "ge";
1646 case ARM64CondLT: return "lt";
1647 case ARM64CondGT: return "gt";
1648 case ARM64CondLE: return "le";
1649 case ARM64CondAL: return "al";
1650 case ARM64CondNV: return "nv";
1651 default: vpanic("name_ARM64Condcode");
1652 }
1653}
1654
1655/* and a handy shorthand for it */
1656static const HChar* nameCC ( ARM64Condcode cond ) {
1657 return nameARM64Condcode(cond);
1658}
1659
1660
1661/* Build IR to calculate some particular condition from stored
1662 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1663 Ity_I64, suitable for narrowing. Although the return type is
1664 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1665 :: Ity_I64 and must denote the condition to compute in
1666 bits 7:4, and be zero everywhere else.
1667*/
1668static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1669{
1670 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1671 /* And 'cond' had better produce a value in which only bits 7:4 are
1672 nonzero. However, obviously we can't assert for that. */
1673
1674 /* So what we're constructing for the first argument is
1675 "(cond << 4) | stored-operation".
1676 However, as per comments above, 'cond' must be supplied
1677 pre-shifted to this function.
1678
1679 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1680 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1681 8 bits of the first argument. */
1682 IRExpr** args
1683 = mkIRExprVec_4(
1684 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1685 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1686 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1687 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1688 );
1689 IRExpr* call
1690 = mkIRExprCCall(
1691 Ity_I64,
1692 0/*regparm*/,
1693 "arm64g_calculate_condition", &arm64g_calculate_condition,
1694 args
1695 );
1696
1697 /* Exclude the requested condition, OP and NDEP from definedness
1698 checking. We're only interested in DEP1 and DEP2. */
1699 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1700 return call;
1701}
1702
1703
1704/* Build IR to calculate some particular condition from stored
1705 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1706 Ity_I64, suitable for narrowing. Although the return type is
1707 Ity_I64, the returned value is either 0 or 1.
1708*/
1709static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1710{
1711 /* First arg is "(cond << 4) | condition". This requires that the
1712 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1713 (COND, OP) pair in the lowest 8 bits of the first argument. */
1714 vassert(cond >= 0 && cond <= 15);
1715 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1716}
1717
1718
sewardjdee30502014-06-04 13:09:44 +00001719/* Build IR to calculate just the carry flag from stored
1720 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1721 Ity_I64. */
1722static IRExpr* mk_arm64g_calculate_flag_c ( void )
1723{
1724 IRExpr** args
1725 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1726 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1727 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1728 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1729 IRExpr* call
1730 = mkIRExprCCall(
1731 Ity_I64,
1732 0/*regparm*/,
1733 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1734 args
1735 );
1736 /* Exclude OP and NDEP from definedness checking. We're only
1737 interested in DEP1 and DEP2. */
1738 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1739 return call;
1740}
1741
1742
sewardjbbcf1882014-01-12 12:49:10 +00001743//ZZ /* Build IR to calculate just the overflow flag from stored
1744//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1745//ZZ Ity_I32. */
1746//ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1747//ZZ {
1748//ZZ IRExpr** args
1749//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1750//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1751//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1752//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1753//ZZ IRExpr* call
1754//ZZ = mkIRExprCCall(
1755//ZZ Ity_I32,
1756//ZZ 0/*regparm*/,
1757//ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1758//ZZ args
1759//ZZ );
1760//ZZ /* Exclude OP and NDEP from definedness checking. We're only
1761//ZZ interested in DEP1 and DEP2. */
1762//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1763//ZZ return call;
1764//ZZ }
1765
1766
1767/* Build IR to calculate N Z C V in bits 31:28 of the
1768 returned word. */
1769static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1770{
1771 IRExpr** args
1772 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1773 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1774 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1775 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1776 IRExpr* call
1777 = mkIRExprCCall(
1778 Ity_I64,
1779 0/*regparm*/,
1780 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1781 args
1782 );
1783 /* Exclude OP and NDEP from definedness checking. We're only
1784 interested in DEP1 and DEP2. */
1785 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1786 return call;
1787}
1788
1789
1790/* Build IR to set the flags thunk, in the most general case. */
1791static
1792void setFlags_D1_D2_ND ( UInt cc_op,
1793 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1794{
1795 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1796 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1797 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1798 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1799 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1800 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1801 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1802 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1803}
1804
1805/* Build IR to set the flags thunk after ADD or SUB. */
1806static
1807void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1808{
1809 IRTemp argL64 = IRTemp_INVALID;
1810 IRTemp argR64 = IRTemp_INVALID;
1811 IRTemp z64 = newTemp(Ity_I64);
1812 if (is64) {
1813 argL64 = argL;
1814 argR64 = argR;
1815 } else {
1816 argL64 = newTemp(Ity_I64);
1817 argR64 = newTemp(Ity_I64);
1818 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1819 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1820 }
1821 assign(z64, mkU64(0));
1822 UInt cc_op = ARM64G_CC_OP_NUMBER;
1823 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1824 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1825 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1826 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1827 else { vassert(0); }
1828 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1829}
1830
sewardjdee30502014-06-04 13:09:44 +00001831/* Build IR to set the flags thunk after ADC or SBC. */
1832static
1833void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
1834 IRTemp argL, IRTemp argR, IRTemp oldC )
1835{
1836 IRTemp argL64 = IRTemp_INVALID;
1837 IRTemp argR64 = IRTemp_INVALID;
1838 IRTemp oldC64 = IRTemp_INVALID;
1839 if (is64) {
1840 argL64 = argL;
1841 argR64 = argR;
1842 oldC64 = oldC;
1843 } else {
1844 argL64 = newTemp(Ity_I64);
1845 argR64 = newTemp(Ity_I64);
1846 oldC64 = newTemp(Ity_I64);
1847 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1848 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1849 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1850 }
1851 UInt cc_op = ARM64G_CC_OP_NUMBER;
1852 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; }
1853 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1854 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; }
1855 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1856 else { vassert(0); }
1857 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1858}
1859
sewardjbbcf1882014-01-12 12:49:10 +00001860/* Build IR to set the flags thunk after ADD or SUB, if the given
1861 condition evaluates to True at run time. If not, the flags are set
1862 to the specified NZCV value. */
1863static
1864void setFlags_ADD_SUB_conditionally (
1865 Bool is64, Bool isSUB,
1866 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1867 )
1868{
1869 /* Generate IR as follows:
1870 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1871 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1872 CC_DEP2 = ITE(cond, argR64, 0)
1873 CC_NDEP = 0
1874 */
1875
1876 IRTemp z64 = newTemp(Ity_I64);
1877 assign(z64, mkU64(0));
1878
1879 /* Establish the operation and operands for the True case. */
1880 IRTemp t_dep1 = IRTemp_INVALID;
1881 IRTemp t_dep2 = IRTemp_INVALID;
1882 UInt t_op = ARM64G_CC_OP_NUMBER;
1883 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1884 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1885 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1886 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1887 else { vassert(0); }
1888 /* */
1889 if (is64) {
1890 t_dep1 = argL;
1891 t_dep2 = argR;
1892 } else {
1893 t_dep1 = newTemp(Ity_I64);
1894 t_dep2 = newTemp(Ity_I64);
1895 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1896 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1897 }
1898
1899 /* Establish the operation and operands for the False case. */
1900 IRTemp f_dep1 = newTemp(Ity_I64);
1901 IRTemp f_dep2 = z64;
1902 UInt f_op = ARM64G_CC_OP_COPY;
1903 assign(f_dep1, mkU64(nzcv << 28));
1904
1905 /* Final thunk values */
1906 IRTemp dep1 = newTemp(Ity_I64);
1907 IRTemp dep2 = newTemp(Ity_I64);
1908 IRTemp op = newTemp(Ity_I64);
1909
1910 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1911 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1912 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1913
1914 /* finally .. */
1915 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
1916 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1917 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1918 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1919}
1920
1921/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1922static
1923void setFlags_LOGIC ( Bool is64, IRTemp res )
1924{
1925 IRTemp res64 = IRTemp_INVALID;
1926 IRTemp z64 = newTemp(Ity_I64);
1927 UInt cc_op = ARM64G_CC_OP_NUMBER;
1928 if (is64) {
1929 res64 = res;
1930 cc_op = ARM64G_CC_OP_LOGIC64;
1931 } else {
1932 res64 = newTemp(Ity_I64);
1933 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1934 cc_op = ARM64G_CC_OP_LOGIC32;
1935 }
1936 assign(z64, mkU64(0));
1937 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1938}
1939
1940/* Build IR to set the flags thunk to a given NZCV value. NZCV is
1941 located in bits 31:28 of the supplied value. */
1942static
1943void setFlags_COPY ( IRTemp nzcv_28x0 )
1944{
1945 IRTemp z64 = newTemp(Ity_I64);
1946 assign(z64, mkU64(0));
1947 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
1948}
1949
1950
1951//ZZ /* Minor variant of the above that sets NDEP to zero (if it
1952//ZZ sets it at all) */
1953//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1954//ZZ IRTemp t_dep2,
1955//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1956//ZZ {
1957//ZZ IRTemp z32 = newTemp(Ity_I32);
1958//ZZ assign( z32, mkU32(0) );
1959//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1960//ZZ }
1961//ZZ
1962//ZZ
1963//ZZ /* Minor variant of the above that sets DEP2 to zero (if it
1964//ZZ sets it at all) */
1965//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1966//ZZ IRTemp t_ndep,
1967//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1968//ZZ {
1969//ZZ IRTemp z32 = newTemp(Ity_I32);
1970//ZZ assign( z32, mkU32(0) );
1971//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1972//ZZ }
1973//ZZ
1974//ZZ
1975//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1976//ZZ sets them at all) */
1977//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1978//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1979//ZZ {
1980//ZZ IRTemp z32 = newTemp(Ity_I32);
1981//ZZ assign( z32, mkU32(0) );
1982//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1983//ZZ }
1984
1985
1986/*------------------------------------------------------------*/
1987/*--- Misc math helpers ---*/
1988/*------------------------------------------------------------*/
1989
sewardj32d86752014-03-02 12:47:18 +00001990/* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
1991static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
sewardjbbcf1882014-01-12 12:49:10 +00001992{
sewardj32d86752014-03-02 12:47:18 +00001993 IRTemp maskT = newTemp(Ity_I64);
1994 IRTemp res = newTemp(Ity_I64);
1995 vassert(sh >= 1 && sh <= 63);
1996 assign(maskT, mkU64(mask));
sewardjdc9259c2014-02-27 11:10:19 +00001997 assign( res,
sewardjbbcf1882014-01-12 12:49:10 +00001998 binop(Iop_Or64,
1999 binop(Iop_Shr64,
sewardj32d86752014-03-02 12:47:18 +00002000 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
2001 mkU8(sh)),
sewardjbbcf1882014-01-12 12:49:10 +00002002 binop(Iop_And64,
sewardj32d86752014-03-02 12:47:18 +00002003 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
2004 mkexpr(maskT))
sewardjbbcf1882014-01-12 12:49:10 +00002005 )
2006 );
sewardjdc9259c2014-02-27 11:10:19 +00002007 return res;
2008}
2009
sewardj32d86752014-03-02 12:47:18 +00002010/* Generates byte swaps within 32-bit lanes. */
2011static IRTemp math_UINTSWAP64 ( IRTemp src )
2012{
2013 IRTemp res;
2014 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2015 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2016 return res;
2017}
2018
2019/* Generates byte swaps within 16-bit lanes. */
2020static IRTemp math_USHORTSWAP64 ( IRTemp src )
2021{
2022 IRTemp res;
2023 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2024 return res;
2025}
2026
2027/* Generates a 64-bit byte swap. */
2028static IRTemp math_BYTESWAP64 ( IRTemp src )
2029{
2030 IRTemp res;
2031 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2032 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2033 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
2034 return res;
2035}
sewardjdc9259c2014-02-27 11:10:19 +00002036
2037/* Generates a 64-bit bit swap. */
2038static IRTemp math_BITSWAP64 ( IRTemp src )
2039{
sewardj32d86752014-03-02 12:47:18 +00002040 IRTemp res;
2041 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
2042 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
2043 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
2044 return math_BYTESWAP64(res);
sewardjbbcf1882014-01-12 12:49:10 +00002045}
2046
sewardj606c4ba2014-01-26 19:11:14 +00002047/* Duplicates the bits at the bottom of the given word to fill the
2048 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
2049 except for the bottom bits. */
2050static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
2051{
2052 if (srcTy == Ity_I8) {
2053 IRTemp t16 = newTemp(Ity_I64);
2054 assign(t16, binop(Iop_Or64, mkexpr(src),
2055 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
2056 IRTemp t32 = newTemp(Ity_I64);
2057 assign(t32, binop(Iop_Or64, mkexpr(t16),
2058 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
2059 IRTemp t64 = newTemp(Ity_I64);
2060 assign(t64, binop(Iop_Or64, mkexpr(t32),
2061 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2062 return t64;
2063 }
2064 if (srcTy == Ity_I16) {
2065 IRTemp t32 = newTemp(Ity_I64);
2066 assign(t32, binop(Iop_Or64, mkexpr(src),
2067 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
2068 IRTemp t64 = newTemp(Ity_I64);
2069 assign(t64, binop(Iop_Or64, mkexpr(t32),
2070 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2071 return t64;
2072 }
2073 if (srcTy == Ity_I32) {
2074 IRTemp t64 = newTemp(Ity_I64);
2075 assign(t64, binop(Iop_Or64, mkexpr(src),
2076 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
2077 return t64;
2078 }
2079 if (srcTy == Ity_I64) {
2080 return src;
2081 }
2082 vassert(0);
2083}
2084
2085
sewardj18bf5172014-06-14 18:05:30 +00002086/* Duplicates the src element exactly so as to fill a V128 value. */
sewardj85fbb022014-06-12 13:16:01 +00002087static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
2088{
sewardj8e91fd42014-07-11 12:05:47 +00002089 IRTemp res = newTempV128();
sewardj85fbb022014-06-12 13:16:01 +00002090 if (srcTy == Ity_F64) {
2091 IRTemp i64 = newTemp(Ity_I64);
2092 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
2093 assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
2094 return res;
2095 }
2096 if (srcTy == Ity_F32) {
2097 IRTemp i64a = newTemp(Ity_I64);
2098 assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
2099 IRTemp i64b = newTemp(Ity_I64);
2100 assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
2101 mkexpr(i64a)));
2102 assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
2103 return res;
2104 }
sewardj18bf5172014-06-14 18:05:30 +00002105 if (srcTy == Ity_I64) {
2106 assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
2107 return res;
2108 }
2109 if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
2110 IRTemp t1 = newTemp(Ity_I64);
2111 assign(t1, widenUto64(srcTy, mkexpr(src)));
2112 IRTemp t2 = math_DUP_TO_64(t1, srcTy);
2113 assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
2114 return res;
2115 }
sewardj85fbb022014-06-12 13:16:01 +00002116 vassert(0);
2117}
2118
2119
sewardjdf9d6d52014-06-27 10:43:22 +00002120/* |fullWidth| is a full V128 width result. Depending on bitQ,
2121 zero out the upper half. */
2122static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
2123{
2124 if (bitQ == 1) return mkexpr(fullWidth);
2125 if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
2126 vassert(0);
2127}
2128
sewardja5a6b752014-06-30 07:33:56 +00002129/* The same, but from an expression instead. */
2130static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
2131{
sewardj8e91fd42014-07-11 12:05:47 +00002132 IRTemp fullWidthT = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00002133 assign(fullWidthT, fullWidth);
2134 return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
2135}
2136
sewardjdf9d6d52014-06-27 10:43:22 +00002137
sewardjbbcf1882014-01-12 12:49:10 +00002138/*------------------------------------------------------------*/
2139/*--- FP comparison helpers ---*/
2140/*------------------------------------------------------------*/
2141
2142/* irRes :: Ity_I32 holds a floating point comparison result encoded
2143 as an IRCmpF64Result. Generate code to convert it to an
2144 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2145 Assign a new temp to hold that value, and return the temp. */
2146static
2147IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
2148{
2149 IRTemp ix = newTemp(Ity_I64);
2150 IRTemp termL = newTemp(Ity_I64);
2151 IRTemp termR = newTemp(Ity_I64);
2152 IRTemp nzcv = newTemp(Ity_I64);
2153 IRTemp irRes = newTemp(Ity_I64);
2154
2155 /* This is where the fun starts. We have to convert 'irRes' from
2156 an IR-convention return result (IRCmpF64Result) to an
2157 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
2158 4 bits of 'nzcv'. */
2159 /* Map compare result from IR to ARM(nzcv) */
2160 /*
2161 FP cmp result | IR | ARM(nzcv)
2162 --------------------------------
2163 UN 0x45 0011
2164 LT 0x01 1000
2165 GT 0x00 0010
2166 EQ 0x40 0110
2167 */
2168 /* Now since you're probably wondering WTF ..
2169
2170 ix fishes the useful bits out of the IR value, bits 6 and 0, and
2171 places them side by side, giving a number which is 0, 1, 2 or 3.
2172
2173 termL is a sequence cooked up by GNU superopt. It converts ix
2174 into an almost correct value NZCV value (incredibly), except
2175 for the case of UN, where it produces 0100 instead of the
2176 required 0011.
2177
2178 termR is therefore a correction term, also computed from ix. It
2179 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
2180 the final correct value, we subtract termR from termL.
2181
2182 Don't take my word for it. There's a test program at the bottom
2183 of guest_arm_toIR.c, to try this out with.
2184 */
2185 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
2186
2187 assign(
2188 ix,
2189 binop(Iop_Or64,
2190 binop(Iop_And64,
2191 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
2192 mkU64(3)),
2193 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
2194
2195 assign(
2196 termL,
2197 binop(Iop_Add64,
2198 binop(Iop_Shr64,
2199 binop(Iop_Sub64,
2200 binop(Iop_Shl64,
2201 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
2202 mkU8(62)),
2203 mkU64(1)),
2204 mkU8(61)),
2205 mkU64(1)));
2206
2207 assign(
2208 termR,
2209 binop(Iop_And64,
2210 binop(Iop_And64,
2211 mkexpr(ix),
2212 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
2213 mkU64(1)));
2214
2215 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
2216 return nzcv;
2217}
2218
2219
2220/*------------------------------------------------------------*/
2221/*--- Data processing (immediate) ---*/
2222/*------------------------------------------------------------*/
2223
2224/* Helper functions for supporting "DecodeBitMasks" */
2225
2226static ULong dbm_ROR ( Int width, ULong x, Int rot )
2227{
2228 vassert(width > 0 && width <= 64);
2229 vassert(rot >= 0 && rot < width);
2230 if (rot == 0) return x;
2231 ULong res = x >> rot;
2232 res |= (x << (width - rot));
2233 if (width < 64)
2234 res &= ((1ULL << width) - 1);
2235 return res;
2236}
2237
2238static ULong dbm_RepTo64( Int esize, ULong x )
2239{
2240 switch (esize) {
2241 case 64:
2242 return x;
2243 case 32:
2244 x &= 0xFFFFFFFF; x |= (x << 32);
2245 return x;
2246 case 16:
2247 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
2248 return x;
2249 case 8:
2250 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
2251 return x;
2252 case 4:
2253 x &= 0xF; x |= (x << 4); x |= (x << 8);
2254 x |= (x << 16); x |= (x << 32);
2255 return x;
2256 case 2:
2257 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
2258 x |= (x << 16); x |= (x << 32);
2259 return x;
2260 default:
2261 break;
2262 }
2263 vpanic("dbm_RepTo64");
2264 /*NOTREACHED*/
2265 return 0;
2266}
2267
2268static Int dbm_highestSetBit ( ULong x )
2269{
2270 Int i;
2271 for (i = 63; i >= 0; i--) {
2272 if (x & (1ULL << i))
2273 return i;
2274 }
2275 vassert(x == 0);
2276 return -1;
2277}
2278
2279static
2280Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
2281 ULong immN, ULong imms, ULong immr, Bool immediate,
2282 UInt M /*32 or 64*/)
2283{
2284 vassert(immN < (1ULL << 1));
2285 vassert(imms < (1ULL << 6));
2286 vassert(immr < (1ULL << 6));
2287 vassert(immediate == False || immediate == True);
2288 vassert(M == 32 || M == 64);
2289
2290 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
2291 if (len < 1) { /* printf("fail1\n"); */ return False; }
2292 vassert(len <= 6);
2293 vassert(M >= (1 << len));
2294
2295 vassert(len >= 1 && len <= 6);
2296 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
2297 (1 << len) - 1;
2298 vassert(levels >= 1 && levels <= 63);
2299
2300 if (immediate && ((imms & levels) == levels)) {
2301 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2302 return False;
2303 }
2304
2305 ULong S = imms & levels;
2306 ULong R = immr & levels;
2307 Int diff = S - R;
2308 diff &= 63;
2309 Int esize = 1 << len;
2310 vassert(2 <= esize && esize <= 64);
2311
2312 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2313 same below with d. S can be 63 in which case we have an out of
2314 range and hence undefined shift. */
2315 vassert(S >= 0 && S <= 63);
2316 vassert(esize >= (S+1));
2317 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
2318 //(1ULL << (S+1)) - 1;
2319 ((1ULL << S) - 1) + (1ULL << S);
2320
2321 Int d = // diff<len-1:0>
2322 diff & ((1 << len)-1);
2323 vassert(esize >= (d+1));
2324 vassert(d >= 0 && d <= 63);
2325
2326 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
2327 //(1ULL << (d+1)) - 1;
2328 ((1ULL << d) - 1) + (1ULL << d);
2329
2330 if (esize != 64) vassert(elem_s < (1ULL << esize));
2331 if (esize != 64) vassert(elem_d < (1ULL << esize));
2332
2333 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
2334 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
2335
2336 return True;
2337}
2338
2339
2340static
2341Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
2342 UInt insn)
2343{
2344# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2345
2346 /* insn[28:23]
2347 10000x PC-rel addressing
2348 10001x Add/subtract (immediate)
2349 100100 Logical (immediate)
2350 100101 Move Wide (immediate)
2351 100110 Bitfield
2352 100111 Extract
2353 */
2354
2355 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2356 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2357 Bool is64 = INSN(31,31) == 1;
2358 Bool isSub = INSN(30,30) == 1;
2359 Bool setCC = INSN(29,29) == 1;
2360 UInt sh = INSN(23,22);
2361 UInt uimm12 = INSN(21,10);
2362 UInt nn = INSN(9,5);
2363 UInt dd = INSN(4,0);
2364 const HChar* nm = isSub ? "sub" : "add";
2365 if (sh >= 2) {
2366 /* Invalid; fall through */
2367 } else {
2368 vassert(sh <= 1);
2369 uimm12 <<= (12 * sh);
2370 if (is64) {
2371 IRTemp argL = newTemp(Ity_I64);
2372 IRTemp argR = newTemp(Ity_I64);
2373 IRTemp res = newTemp(Ity_I64);
2374 assign(argL, getIReg64orSP(nn));
2375 assign(argR, mkU64(uimm12));
2376 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2377 mkexpr(argL), mkexpr(argR)));
2378 if (setCC) {
2379 putIReg64orZR(dd, mkexpr(res));
2380 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2381 DIP("%ss %s, %s, 0x%x\n",
2382 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
2383 } else {
2384 putIReg64orSP(dd, mkexpr(res));
2385 DIP("%s %s, %s, 0x%x\n",
2386 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
2387 }
2388 } else {
2389 IRTemp argL = newTemp(Ity_I32);
2390 IRTemp argR = newTemp(Ity_I32);
2391 IRTemp res = newTemp(Ity_I32);
2392 assign(argL, getIReg32orSP(nn));
2393 assign(argR, mkU32(uimm12));
2394 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
2395 mkexpr(argL), mkexpr(argR)));
2396 if (setCC) {
2397 putIReg32orZR(dd, mkexpr(res));
2398 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
2399 DIP("%ss %s, %s, 0x%x\n",
2400 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
2401 } else {
2402 putIReg32orSP(dd, mkexpr(res));
2403 DIP("%s %s, %s, 0x%x\n",
2404 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
2405 }
2406 }
2407 return True;
2408 }
2409 }
2410
2411 /* -------------------- ADR/ADRP -------------------- */
2412 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2413 UInt bP = INSN(31,31);
2414 UInt immLo = INSN(30,29);
2415 UInt immHi = INSN(23,5);
2416 UInt rD = INSN(4,0);
2417 ULong uimm = (immHi << 2) | immLo;
2418 ULong simm = sx_to_64(uimm, 21);
2419 ULong val;
2420 if (bP) {
2421 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
2422 } else {
2423 val = guest_PC_curr_instr + simm;
2424 }
2425 putIReg64orZR(rD, mkU64(val));
2426 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
2427 return True;
2428 }
2429
2430 /* -------------------- LOGIC(imm) -------------------- */
2431 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2432 /* 31 30 28 22 21 15 9 4
2433 sf op 100100 N immr imms Rn Rd
2434 op=00: AND Rd|SP, Rn, #imm
2435 op=01: ORR Rd|SP, Rn, #imm
2436 op=10: EOR Rd|SP, Rn, #imm
2437 op=11: ANDS Rd|ZR, Rn, #imm
2438 */
2439 Bool is64 = INSN(31,31) == 1;
2440 UInt op = INSN(30,29);
2441 UInt N = INSN(22,22);
2442 UInt immR = INSN(21,16);
2443 UInt immS = INSN(15,10);
2444 UInt nn = INSN(9,5);
2445 UInt dd = INSN(4,0);
2446 ULong imm = 0;
2447 Bool ok;
2448 if (N == 1 && !is64)
2449 goto after_logic_imm; /* not allowed; fall through */
2450 ok = dbm_DecodeBitMasks(&imm, NULL,
2451 N, immS, immR, True, is64 ? 64 : 32);
2452 if (!ok)
2453 goto after_logic_imm;
2454
2455 const HChar* names[4] = { "and", "orr", "eor", "ands" };
2456 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2457 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2458
2459 vassert(op < 4);
2460 if (is64) {
2461 IRExpr* argL = getIReg64orZR(nn);
2462 IRExpr* argR = mkU64(imm);
2463 IRTemp res = newTemp(Ity_I64);
2464 assign(res, binop(ops64[op], argL, argR));
2465 if (op < 3) {
2466 putIReg64orSP(dd, mkexpr(res));
2467 DIP("%s %s, %s, 0x%llx\n", names[op],
2468 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2469 } else {
2470 putIReg64orZR(dd, mkexpr(res));
2471 setFlags_LOGIC(True/*is64*/, res);
2472 DIP("%s %s, %s, 0x%llx\n", names[op],
2473 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2474 }
2475 } else {
2476 IRExpr* argL = getIReg32orZR(nn);
2477 IRExpr* argR = mkU32((UInt)imm);
2478 IRTemp res = newTemp(Ity_I32);
2479 assign(res, binop(ops32[op], argL, argR));
2480 if (op < 3) {
2481 putIReg32orSP(dd, mkexpr(res));
2482 DIP("%s %s, %s, 0x%x\n", names[op],
2483 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2484 } else {
2485 putIReg32orZR(dd, mkexpr(res));
2486 setFlags_LOGIC(False/*!is64*/, res);
2487 DIP("%s %s, %s, 0x%x\n", names[op],
2488 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2489 }
2490 }
2491 return True;
2492 }
2493 after_logic_imm:
2494
2495 /* -------------------- MOV{Z,N,K} -------------------- */
2496 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2497 /* 31 30 28 22 20 4
2498 | | | | | |
2499 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2500 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2501 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2502 */
2503 Bool is64 = INSN(31,31) == 1;
2504 UInt subopc = INSN(30,29);
2505 UInt hw = INSN(22,21);
2506 UInt imm16 = INSN(20,5);
2507 UInt dd = INSN(4,0);
2508 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2509 /* invalid; fall through */
2510 } else {
2511 ULong imm64 = ((ULong)imm16) << (16 * hw);
2512 if (!is64)
2513 vassert(imm64 < 0x100000000ULL);
2514 switch (subopc) {
2515 case BITS2(1,0): // MOVZ
2516 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2517 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2518 break;
2519 case BITS2(0,0): // MOVN
2520 imm64 = ~imm64;
2521 if (!is64)
2522 imm64 &= 0xFFFFFFFFULL;
2523 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2524 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2525 break;
2526 case BITS2(1,1): // MOVK
2527 /* This is more complex. We are inserting a slice into
2528 the destination register, so we need to have the old
2529 value of it. */
2530 if (is64) {
2531 IRTemp old = newTemp(Ity_I64);
2532 assign(old, getIReg64orZR(dd));
2533 ULong mask = 0xFFFFULL << (16 * hw);
2534 IRExpr* res
2535 = binop(Iop_Or64,
2536 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2537 mkU64(imm64));
2538 putIReg64orZR(dd, res);
2539 DIP("movk %s, 0x%x, lsl %u\n",
2540 nameIReg64orZR(dd), imm16, 16*hw);
2541 } else {
2542 IRTemp old = newTemp(Ity_I32);
2543 assign(old, getIReg32orZR(dd));
2544 vassert(hw <= 1);
2545 UInt mask = 0xFFFF << (16 * hw);
2546 IRExpr* res
2547 = binop(Iop_Or32,
2548 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2549 mkU32((UInt)imm64));
2550 putIReg32orZR(dd, res);
2551 DIP("movk %s, 0x%x, lsl %u\n",
2552 nameIReg32orZR(dd), imm16, 16*hw);
2553 }
2554 break;
2555 default:
2556 vassert(0);
2557 }
2558 return True;
2559 }
2560 }
2561
2562 /* -------------------- {U,S,}BFM -------------------- */
2563 /* 30 28 22 21 15 9 4
2564
2565 sf 10 100110 N immr imms nn dd
2566 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2567 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2568
2569 sf 00 100110 N immr imms nn dd
2570 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2571 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2572
2573 sf 01 100110 N immr imms nn dd
2574 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2575 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2576 */
2577 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2578 UInt sf = INSN(31,31);
2579 UInt opc = INSN(30,29);
2580 UInt N = INSN(22,22);
2581 UInt immR = INSN(21,16);
2582 UInt immS = INSN(15,10);
2583 UInt nn = INSN(9,5);
2584 UInt dd = INSN(4,0);
2585 Bool inZero = False;
2586 Bool extend = False;
2587 const HChar* nm = "???";
2588 /* skip invalid combinations */
2589 switch (opc) {
2590 case BITS2(0,0):
2591 inZero = True; extend = True; nm = "sbfm"; break;
2592 case BITS2(0,1):
2593 inZero = False; extend = False; nm = "bfm"; break;
2594 case BITS2(1,0):
2595 inZero = True; extend = False; nm = "ubfm"; break;
2596 case BITS2(1,1):
2597 goto after_bfm; /* invalid */
2598 default:
2599 vassert(0);
2600 }
2601 if (sf == 1 && N != 1) goto after_bfm;
2602 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2603 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2604 ULong wmask = 0, tmask = 0;
2605 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2606 N, immS, immR, False, sf == 1 ? 64 : 32);
2607 if (!ok) goto after_bfm; /* hmmm */
2608
2609 Bool is64 = sf == 1;
2610 IRType ty = is64 ? Ity_I64 : Ity_I32;
2611
2612 IRTemp dst = newTemp(ty);
2613 IRTemp src = newTemp(ty);
2614 IRTemp bot = newTemp(ty);
2615 IRTemp top = newTemp(ty);
2616 IRTemp res = newTemp(ty);
2617 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2618 assign(src, getIRegOrZR(is64, nn));
2619 /* perform bitfield move on low bits */
2620 assign(bot, binop(mkOR(ty),
2621 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2622 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2623 mkU(ty, wmask))));
2624 /* determine extension bits (sign, zero or dest register) */
2625 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2626 /* combine extension bits and result bits */
2627 assign(res, binop(mkOR(ty),
2628 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2629 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2630 putIRegOrZR(is64, dd, mkexpr(res));
2631 DIP("%s %s, %s, immR=%u, immS=%u\n",
2632 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2633 return True;
2634 }
2635 after_bfm:
2636
2637 /* ---------------------- EXTR ---------------------- */
2638 /* 30 28 22 20 15 9 4
2639 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2640 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2641 */
2642 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2643 Bool is64 = INSN(31,31) == 1;
2644 UInt mm = INSN(20,16);
2645 UInt imm6 = INSN(15,10);
2646 UInt nn = INSN(9,5);
2647 UInt dd = INSN(4,0);
2648 Bool valid = True;
2649 if (INSN(31,31) != INSN(22,22))
2650 valid = False;
2651 if (!is64 && imm6 >= 32)
2652 valid = False;
2653 if (!valid) goto after_extr;
2654 IRType ty = is64 ? Ity_I64 : Ity_I32;
2655 IRTemp srcHi = newTemp(ty);
2656 IRTemp srcLo = newTemp(ty);
2657 IRTemp res = newTemp(ty);
2658 assign(srcHi, getIRegOrZR(is64, nn));
2659 assign(srcLo, getIRegOrZR(is64, mm));
2660 if (imm6 == 0) {
2661 assign(res, mkexpr(srcLo));
2662 } else {
2663 UInt szBits = 8 * sizeofIRType(ty);
2664 vassert(imm6 > 0 && imm6 < szBits);
2665 assign(res, binop(mkOR(ty),
2666 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2667 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2668 }
2669 putIRegOrZR(is64, dd, mkexpr(res));
2670 DIP("extr %s, %s, %s, #%u\n",
2671 nameIRegOrZR(is64,dd),
2672 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2673 return True;
2674 }
2675 after_extr:
2676
2677 vex_printf("ARM64 front end: data_processing_immediate\n");
2678 return False;
2679# undef INSN
2680}
2681
2682
2683/*------------------------------------------------------------*/
2684/*--- Data processing (register) instructions ---*/
2685/*------------------------------------------------------------*/
2686
2687static const HChar* nameSH ( UInt sh ) {
2688 switch (sh) {
2689 case 0: return "lsl";
2690 case 1: return "lsr";
2691 case 2: return "asr";
2692 case 3: return "ror";
2693 default: vassert(0);
2694 }
2695}
2696
2697/* Generate IR to get a register value, possibly shifted by an
2698 immediate. Returns either a 32- or 64-bit temporary holding the
2699 result. After the shift, the value can optionally be NOT-ed
2700 too.
2701
2702 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2703 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2704 isn't allowed, but it's the job of the caller to check that.
2705*/
2706static IRTemp getShiftedIRegOrZR ( Bool is64,
2707 UInt sh_how, UInt sh_amt, UInt regNo,
2708 Bool invert )
2709{
2710 vassert(sh_how < 4);
2711 vassert(sh_amt < (is64 ? 64 : 32));
2712 IRType ty = is64 ? Ity_I64 : Ity_I32;
2713 IRTemp t0 = newTemp(ty);
2714 assign(t0, getIRegOrZR(is64, regNo));
2715 IRTemp t1 = newTemp(ty);
2716 switch (sh_how) {
2717 case BITS2(0,0):
2718 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2719 break;
2720 case BITS2(0,1):
2721 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2722 break;
2723 case BITS2(1,0):
2724 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2725 break;
2726 case BITS2(1,1):
2727 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2728 break;
2729 default:
2730 vassert(0);
2731 }
2732 if (invert) {
2733 IRTemp t2 = newTemp(ty);
2734 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2735 return t2;
2736 } else {
2737 return t1;
2738 }
2739}
2740
2741
2742static
2743Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2744 UInt insn)
2745{
2746# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2747
2748 /* ------------------- ADD/SUB(reg) ------------------- */
2749 /* x==0 => 32 bit op x==1 => 64 bit op
2750 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2751
2752 31 30 29 28 23 21 20 15 9 4
2753 | | | | | | | | | |
2754 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2755 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2756 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2757 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2758 */
2759 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2760 UInt bX = INSN(31,31);
2761 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2762 UInt bS = INSN(29, 29); /* set flags? */
2763 UInt sh = INSN(23,22);
2764 UInt rM = INSN(20,16);
2765 UInt imm6 = INSN(15,10);
2766 UInt rN = INSN(9,5);
2767 UInt rD = INSN(4,0);
2768 Bool isSUB = bOP == 1;
2769 Bool is64 = bX == 1;
2770 IRType ty = is64 ? Ity_I64 : Ity_I32;
2771 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2772 /* invalid; fall through */
2773 } else {
2774 IRTemp argL = newTemp(ty);
2775 assign(argL, getIRegOrZR(is64, rN));
2776 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2777 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2778 IRTemp res = newTemp(ty);
2779 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2780 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2781 if (bS) {
2782 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2783 }
2784 DIP("%s%s %s, %s, %s, %s #%u\n",
2785 bOP ? "sub" : "add", bS ? "s" : "",
2786 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2787 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2788 return True;
2789 }
2790 }
2791
sewardjdee30502014-06-04 13:09:44 +00002792 /* ------------------- ADC/SBC(reg) ------------------- */
2793 /* x==0 => 32 bit op x==1 => 64 bit op
2794
2795 31 30 29 28 23 21 20 15 9 4
2796 | | | | | | | | | |
2797 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
2798 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
2799 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
2800 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
2801 */
2802
2803 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2804 UInt bX = INSN(31,31);
2805 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */
2806 UInt bS = INSN(29,29); /* set flags */
2807 UInt rM = INSN(20,16);
2808 UInt rN = INSN(9,5);
2809 UInt rD = INSN(4,0);
2810
2811 Bool isSUB = bOP == 1;
2812 Bool is64 = bX == 1;
2813 IRType ty = is64 ? Ity_I64 : Ity_I32;
2814
2815 IRTemp oldC = newTemp(ty);
2816 assign(oldC,
2817 is64 ? mk_arm64g_calculate_flag_c()
2818 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
2819
2820 IRTemp argL = newTemp(ty);
2821 assign(argL, getIRegOrZR(is64, rN));
2822 IRTemp argR = newTemp(ty);
2823 assign(argR, getIRegOrZR(is64, rM));
2824
2825 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2826 IRTemp res = newTemp(ty);
2827 if (isSUB) {
2828 IRExpr* one = is64 ? mkU64(1) : mkU32(1);
2829 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
2830 assign(res,
2831 binop(op,
2832 binop(op, mkexpr(argL), mkexpr(argR)),
2833 binop(xorOp, mkexpr(oldC), one)));
2834 } else {
2835 assign(res,
2836 binop(op,
2837 binop(op, mkexpr(argL), mkexpr(argR)),
2838 mkexpr(oldC)));
2839 }
2840
2841 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2842
2843 if (bS) {
2844 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
2845 }
2846
2847 DIP("%s%s %s, %s, %s\n",
2848 bOP ? "sbc" : "adc", bS ? "s" : "",
2849 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2850 nameIRegOrZR(is64, rM));
2851 return True;
2852 }
2853
sewardjbbcf1882014-01-12 12:49:10 +00002854 /* -------------------- LOGIC(reg) -------------------- */
2855 /* x==0 => 32 bit op x==1 => 64 bit op
2856 N==0 => inv? is no-op (no inversion)
2857 N==1 => inv? is NOT
2858 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2859
2860 31 30 28 23 21 20 15 9 4
2861 | | | | | | | | |
2862 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2863 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2864 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2865 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2866 With N=1, the names are: BIC ORN EON BICS
2867 */
2868 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2869 UInt bX = INSN(31,31);
2870 UInt sh = INSN(23,22);
2871 UInt bN = INSN(21,21);
2872 UInt rM = INSN(20,16);
2873 UInt imm6 = INSN(15,10);
2874 UInt rN = INSN(9,5);
2875 UInt rD = INSN(4,0);
2876 Bool is64 = bX == 1;
2877 IRType ty = is64 ? Ity_I64 : Ity_I32;
2878 if (!is64 && imm6 > 31) {
2879 /* invalid; fall though */
2880 } else {
2881 IRTemp argL = newTemp(ty);
2882 assign(argL, getIRegOrZR(is64, rN));
2883 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2884 IROp op = Iop_INVALID;
2885 switch (INSN(30,29)) {
2886 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2887 case BITS2(0,1): op = mkOR(ty); break;
2888 case BITS2(1,0): op = mkXOR(ty); break;
2889 default: vassert(0);
2890 }
2891 IRTemp res = newTemp(ty);
2892 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2893 if (INSN(30,29) == BITS2(1,1)) {
2894 setFlags_LOGIC(is64, res);
2895 }
2896 putIRegOrZR(is64, rD, mkexpr(res));
2897
2898 static const HChar* names_op[8]
2899 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2900 vassert(((bN << 2) | INSN(30,29)) < 8);
2901 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2902 /* Special-case the printing of "MOV" */
2903 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2904 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2905 nameIRegOrZR(is64, rM));
2906 } else {
2907 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2908 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2909 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2910 }
2911 return True;
2912 }
2913 }
2914
2915 /* -------------------- {U,S}MULH -------------------- */
2916 /* 31 23 22 20 15 9 4
2917 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2918 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2919 */
2920 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
sewardj7fce7cc2014-05-07 09:41:40 +00002921 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
sewardjbbcf1882014-01-12 12:49:10 +00002922 Bool isU = INSN(23,23) == 1;
2923 UInt mm = INSN(20,16);
2924 UInt nn = INSN(9,5);
2925 UInt dd = INSN(4,0);
2926 putIReg64orZR(dd, unop(Iop_128HIto64,
2927 binop(isU ? Iop_MullU64 : Iop_MullS64,
2928 getIReg64orZR(nn), getIReg64orZR(mm))));
2929 DIP("%cmulh %s, %s, %s\n",
2930 isU ? 'u' : 's',
2931 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2932 return True;
2933 }
2934
2935 /* -------------------- M{ADD,SUB} -------------------- */
2936 /* 31 30 20 15 14 9 4
2937 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
2938 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
2939 */
2940 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2941 Bool is64 = INSN(31,31) == 1;
2942 UInt mm = INSN(20,16);
2943 Bool isAdd = INSN(15,15) == 0;
2944 UInt aa = INSN(14,10);
2945 UInt nn = INSN(9,5);
2946 UInt dd = INSN(4,0);
2947 if (is64) {
2948 putIReg64orZR(
2949 dd,
2950 binop(isAdd ? Iop_Add64 : Iop_Sub64,
2951 getIReg64orZR(aa),
2952 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
2953 } else {
2954 putIReg32orZR(
2955 dd,
2956 binop(isAdd ? Iop_Add32 : Iop_Sub32,
2957 getIReg32orZR(aa),
2958 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
2959 }
2960 DIP("%s %s, %s, %s, %s\n",
2961 isAdd ? "madd" : "msub",
2962 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
2963 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
2964 return True;
2965 }
2966
2967 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
2968 /* 31 30 28 20 15 11 9 4
2969 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
2970 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
2971 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
2972 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
2973 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
2974 */
2975 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
2976 Bool is64 = INSN(31,31) == 1;
2977 UInt b30 = INSN(30,30);
2978 UInt mm = INSN(20,16);
2979 UInt cond = INSN(15,12);
2980 UInt b10 = INSN(10,10);
2981 UInt nn = INSN(9,5);
2982 UInt dd = INSN(4,0);
2983 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
2984 IRType ty = is64 ? Ity_I64 : Ity_I32;
2985 IRExpr* argL = getIRegOrZR(is64, nn);
2986 IRExpr* argR = getIRegOrZR(is64, mm);
2987 switch (op) {
2988 case BITS2(0,0):
2989 break;
2990 case BITS2(0,1):
2991 argR = binop(mkADD(ty), argR, mkU(ty,1));
2992 break;
2993 case BITS2(1,0):
2994 argR = unop(mkNOT(ty), argR);
2995 break;
2996 case BITS2(1,1):
2997 argR = binop(mkSUB(ty), mkU(ty,0), argR);
2998 break;
2999 default:
3000 vassert(0);
3001 }
3002 putIRegOrZR(
3003 is64, dd,
3004 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
3005 argL, argR)
3006 );
3007 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
3008 DIP("%s %s, %s, %s, %s\n", op_nm[op],
3009 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3010 nameIRegOrZR(is64, mm), nameCC(cond));
3011 return True;
3012 }
3013
3014 /* -------------- ADD/SUB(extended reg) -------------- */
3015 /* 28 20 15 12 9 4
3016 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
3017 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
3018
3019 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
3020 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
3021
3022 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
3023 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
3024
3025 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
3026 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
3027
3028 The 'm' operand is extended per opt, thusly:
3029
3030 000 Xm & 0xFF UXTB
3031 001 Xm & 0xFFFF UXTH
3032 010 Xm & (2^32)-1 UXTW
3033 011 Xm UXTX
3034
3035 100 Xm sx from bit 7 SXTB
3036 101 Xm sx from bit 15 SXTH
3037 110 Xm sx from bit 31 SXTW
3038 111 Xm SXTX
3039
3040 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3041 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3042 are the identity operation on Wm.
3043
3044 After extension, the value is shifted left by imm3 bits, which
3045 may only be in the range 0 .. 4 inclusive.
3046 */
3047 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3048 Bool is64 = INSN(31,31) == 1;
3049 Bool isSub = INSN(30,30) == 1;
3050 Bool setCC = INSN(29,29) == 1;
3051 UInt mm = INSN(20,16);
3052 UInt opt = INSN(15,13);
3053 UInt imm3 = INSN(12,10);
3054 UInt nn = INSN(9,5);
3055 UInt dd = INSN(4,0);
3056 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3057 "sxtb", "sxth", "sxtw", "sxtx" };
3058 /* Do almost the same thing in the 32- and 64-bit cases. */
3059 IRTemp xN = newTemp(Ity_I64);
3060 IRTemp xM = newTemp(Ity_I64);
3061 assign(xN, getIReg64orSP(nn));
3062 assign(xM, getIReg64orZR(mm));
3063 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
3064 Int shSX = 0;
3065 /* widen Xm .. */
3066 switch (opt) {
3067 case BITS3(0,0,0): // UXTB
3068 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
3069 case BITS3(0,0,1): // UXTH
3070 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
3071 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3072 if (is64) {
3073 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
3074 }
3075 break;
3076 case BITS3(0,1,1): // UXTX -- always a noop
3077 break;
3078 case BITS3(1,0,0): // SXTB
3079 shSX = 56; goto sxTo64;
3080 case BITS3(1,0,1): // SXTH
3081 shSX = 48; goto sxTo64;
3082 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3083 if (is64) {
3084 shSX = 32; goto sxTo64;
3085 }
3086 break;
3087 case BITS3(1,1,1): // SXTX -- always a noop
3088 break;
3089 sxTo64:
3090 vassert(shSX >= 32);
3091 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
3092 mkU8(shSX));
3093 break;
3094 default:
3095 vassert(0);
3096 }
3097 /* and now shift */
3098 IRTemp argL = xN;
3099 IRTemp argR = newTemp(Ity_I64);
3100 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
3101 IRTemp res = newTemp(Ity_I64);
3102 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
3103 mkexpr(argL), mkexpr(argR)));
3104 if (is64) {
3105 if (setCC) {
3106 putIReg64orZR(dd, mkexpr(res));
3107 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
3108 } else {
3109 putIReg64orSP(dd, mkexpr(res));
3110 }
3111 } else {
3112 if (setCC) {
3113 IRTemp argL32 = newTemp(Ity_I32);
3114 IRTemp argR32 = newTemp(Ity_I32);
3115 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
3116 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
3117 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
3118 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
3119 } else {
3120 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
3121 }
3122 }
3123 DIP("%s%s %s, %s, %s %s lsl %u\n",
3124 isSub ? "sub" : "add", setCC ? "s" : "",
3125 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
3126 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
3127 nameExt[opt], imm3);
3128 return True;
3129 }
3130
3131 /* ---------------- CCMP/CCMN(imm) ---------------- */
3132 /* Bizarrely, these appear in the "data processing register"
3133 category, even though they are operations against an
3134 immediate. */
3135 /* 31 29 20 15 11 9 3
3136 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
3137 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
3138
3139 Operation is:
3140 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3141 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3142 */
3143 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3144 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3145 Bool is64 = INSN(31,31) == 1;
3146 Bool isSUB = INSN(30,30) == 1;
3147 UInt imm5 = INSN(20,16);
3148 UInt cond = INSN(15,12);
3149 UInt nn = INSN(9,5);
3150 UInt nzcv = INSN(3,0);
3151
3152 IRTemp condT = newTemp(Ity_I1);
3153 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3154
3155 IRType ty = is64 ? Ity_I64 : Ity_I32;
3156 IRTemp argL = newTemp(ty);
3157 IRTemp argR = newTemp(ty);
3158
3159 if (is64) {
3160 assign(argL, getIReg64orZR(nn));
3161 assign(argR, mkU64(imm5));
3162 } else {
3163 assign(argL, getIReg32orZR(nn));
3164 assign(argR, mkU32(imm5));
3165 }
3166 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3167
3168 DIP("ccm%c %s, #%u, #%u, %s\n",
3169 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3170 imm5, nzcv, nameCC(cond));
3171 return True;
3172 }
3173
3174 /* ---------------- CCMP/CCMN(reg) ---------------- */
3175 /* 31 29 20 15 11 9 3
3176 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
3177 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
3178 Operation is:
3179 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3180 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3181 */
3182 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3183 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3184 Bool is64 = INSN(31,31) == 1;
3185 Bool isSUB = INSN(30,30) == 1;
3186 UInt mm = INSN(20,16);
3187 UInt cond = INSN(15,12);
3188 UInt nn = INSN(9,5);
3189 UInt nzcv = INSN(3,0);
3190
3191 IRTemp condT = newTemp(Ity_I1);
3192 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3193
3194 IRType ty = is64 ? Ity_I64 : Ity_I32;
3195 IRTemp argL = newTemp(ty);
3196 IRTemp argR = newTemp(ty);
3197
3198 if (is64) {
3199 assign(argL, getIReg64orZR(nn));
3200 assign(argR, getIReg64orZR(mm));
3201 } else {
3202 assign(argL, getIReg32orZR(nn));
3203 assign(argR, getIReg32orZR(mm));
3204 }
3205 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3206
3207 DIP("ccm%c %s, %s, #%u, %s\n",
3208 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3209 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
3210 return True;
3211 }
3212
3213
3214 /* -------------- REV/REV16/REV32/RBIT -------------- */
3215 /* 31 30 28 20 15 11 9 4
3216
sewardj32d86752014-03-02 12:47:18 +00003217 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
3218 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003219
sewardj32d86752014-03-02 12:47:18 +00003220 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
3221 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003222
sewardjdc9259c2014-02-27 11:10:19 +00003223 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
3224 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003225
sewardjdc9259c2014-02-27 11:10:19 +00003226 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
sewardjbbcf1882014-01-12 12:49:10 +00003227 */
sewardjbbcf1882014-01-12 12:49:10 +00003228 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
sewardjdc9259c2014-02-27 11:10:19 +00003229 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3230 UInt b31 = INSN(31,31);
3231 UInt opc = INSN(11,10);
3232
3233 UInt ix = 0;
3234 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
3235 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
3236 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
3237 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
3238 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
3239 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
3240 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
sewardj32d86752014-03-02 12:47:18 +00003241 if (ix >= 1 && ix <= 7) {
3242 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
sewardjdc9259c2014-02-27 11:10:19 +00003243 UInt nn = INSN(9,5);
3244 UInt dd = INSN(4,0);
3245 IRTemp src = newTemp(Ity_I64);
3246 IRTemp dst = IRTemp_INVALID;
sewardj32d86752014-03-02 12:47:18 +00003247 IRTemp (*math)(IRTemp) = NULL;
3248 switch (ix) {
3249 case 1: case 2: math = math_BYTESWAP64; break;
3250 case 3: case 4: math = math_BITSWAP64; break;
3251 case 5: case 6: math = math_USHORTSWAP64; break;
3252 case 7: math = math_UINTSWAP64; break;
3253 default: vassert(0);
3254 }
3255 const HChar* names[7]
3256 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3257 const HChar* nm = names[ix-1];
3258 vassert(math);
3259 if (ix == 6) {
3260 /* This has to be special cased, since the logic below doesn't
3261 handle it correctly. */
sewardjdc9259c2014-02-27 11:10:19 +00003262 assign(src, getIReg64orZR(nn));
sewardj32d86752014-03-02 12:47:18 +00003263 dst = math(src);
3264 putIReg64orZR(dd,
3265 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
3266 } else if (is64) {
3267 assign(src, getIReg64orZR(nn));
3268 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00003269 putIReg64orZR(dd, mkexpr(dst));
3270 } else {
3271 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
sewardj32d86752014-03-02 12:47:18 +00003272 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00003273 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3274 }
sewardj32d86752014-03-02 12:47:18 +00003275 DIP("%s %s, %s\n", nm,
sewardjdc9259c2014-02-27 11:10:19 +00003276 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
3277 return True;
sewardjbbcf1882014-01-12 12:49:10 +00003278 }
sewardjdc9259c2014-02-27 11:10:19 +00003279 /* else fall through */
sewardjbbcf1882014-01-12 12:49:10 +00003280 }
3281
3282 /* -------------------- CLZ/CLS -------------------- */
3283 /* 30 28 24 20 15 9 4
3284 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
3285 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
3286 */
3287 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3288 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3289 Bool is64 = INSN(31,31) == 1;
3290 Bool isCLS = INSN(10,10) == 1;
3291 UInt nn = INSN(9,5);
3292 UInt dd = INSN(4,0);
3293 IRTemp src = newTemp(Ity_I64);
3294 IRTemp dst = newTemp(Ity_I64);
3295 if (!isCLS) { // CLS not yet supported
3296 if (is64) {
3297 assign(src, getIReg64orZR(nn));
3298 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
3299 mkU64(64),
3300 unop(Iop_Clz64, mkexpr(src))));
3301 putIReg64orZR(dd, mkexpr(dst));
3302 } else {
3303 assign(src, binop(Iop_Shl64,
3304 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
3305 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
3306 mkU64(32),
3307 unop(Iop_Clz64, mkexpr(src))));
3308 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3309 }
3310 DIP("cl%c %s, %s\n",
3311 isCLS ? 's' : 'z', nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
3312 return True;
3313 }
3314 }
3315
3316 /* -------------------- LSLV/LSRV/ASRV -------------------- */
3317 /* 30 28 20 15 11 9 4
3318 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
3319 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
3320 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
3321 */
3322 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3323 && INSN(15,12) == BITS4(0,0,1,0) && INSN(11,10) < BITS2(1,1)) {
3324 Bool is64 = INSN(31,31) == 1;
3325 UInt mm = INSN(20,16);
3326 UInt op = INSN(11,10);
3327 UInt nn = INSN(9,5);
3328 UInt dd = INSN(4,0);
3329 IRType ty = is64 ? Ity_I64 : Ity_I32;
3330 IRTemp srcL = newTemp(ty);
3331 IRTemp srcR = newTemp(Ity_I8);
3332 IRTemp res = newTemp(ty);
3333 IROp iop = Iop_INVALID;
3334 assign(srcL, getIRegOrZR(is64, nn));
3335 assign(srcR,
3336 unop(Iop_64to8,
3337 binop(Iop_And64,
3338 getIReg64orZR(mm), mkU64(is64 ? 63 : 31))));
3339 switch (op) {
3340 case BITS2(0,0): iop = mkSHL(ty); break;
3341 case BITS2(0,1): iop = mkSHR(ty); break;
3342 case BITS2(1,0): iop = mkSAR(ty); break;
3343 default: vassert(0);
3344 }
3345 assign(res, binop(iop, mkexpr(srcL), mkexpr(srcR)));
3346 putIRegOrZR(is64, dd, mkexpr(res));
3347 vassert(op < 3);
3348 const HChar* names[3] = { "lslv", "lsrv", "asrv" };
3349 DIP("%s %s, %s, %s\n",
3350 names[op], nameIRegOrZR(is64,dd),
3351 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
3352 return True;
3353 }
3354
3355 /* -------------------- SDIV/UDIV -------------------- */
3356 /* 30 28 20 15 10 9 4
3357 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
3358 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
3359 */
3360 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3361 && INSN(15,11) == BITS5(0,0,0,0,1)) {
3362 Bool is64 = INSN(31,31) == 1;
3363 UInt mm = INSN(20,16);
3364 Bool isS = INSN(10,10) == 1;
3365 UInt nn = INSN(9,5);
3366 UInt dd = INSN(4,0);
3367 if (isS) {
3368 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
3369 getIRegOrZR(is64, nn),
3370 getIRegOrZR(is64, mm)));
3371 } else {
3372 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
3373 getIRegOrZR(is64, nn),
3374 getIRegOrZR(is64, mm)));
3375 }
3376 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
3377 nameIRegOrZR(is64, dd),
3378 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
3379 return True;
3380 }
3381
3382 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3383 /* 31 23 20 15 14 9 4
3384 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
3385 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
3386 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
3387 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
3388 with operation
3389 Xd = Xa +/- (Wn *u/s Wm)
3390 */
3391 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3392 Bool isU = INSN(23,23) == 1;
3393 UInt mm = INSN(20,16);
3394 Bool isAdd = INSN(15,15) == 0;
3395 UInt aa = INSN(14,10);
3396 UInt nn = INSN(9,5);
3397 UInt dd = INSN(4,0);
3398 IRTemp wN = newTemp(Ity_I32);
3399 IRTemp wM = newTemp(Ity_I32);
3400 IRTemp xA = newTemp(Ity_I64);
3401 IRTemp muld = newTemp(Ity_I64);
3402 IRTemp res = newTemp(Ity_I64);
3403 assign(wN, getIReg32orZR(nn));
3404 assign(wM, getIReg32orZR(mm));
3405 assign(xA, getIReg64orZR(aa));
3406 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
3407 mkexpr(wN), mkexpr(wM)));
3408 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
3409 mkexpr(xA), mkexpr(muld)));
3410 putIReg64orZR(dd, mkexpr(res));
3411 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
3412 nameIReg64orZR(dd), nameIReg32orZR(nn),
3413 nameIReg32orZR(mm), nameIReg64orZR(aa));
3414 return True;
3415 }
3416 vex_printf("ARM64 front end: data_processing_register\n");
3417 return False;
3418# undef INSN
3419}
3420
3421
3422/*------------------------------------------------------------*/
3423/*--- Load and Store instructions ---*/
3424/*------------------------------------------------------------*/
3425
3426/* Generate the EA for a "reg + reg" style amode. This is done from
3427 parts of the insn, but for sanity checking sake it takes the whole
3428 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
3429 and S=insn[12]:
3430
3431 The possible forms, along with their opt:S values, are:
3432 011:0 Xn|SP + Xm
3433 111:0 Xn|SP + Xm
3434 011:1 Xn|SP + Xm * transfer_szB
3435 111:1 Xn|SP + Xm * transfer_szB
3436 010:0 Xn|SP + 32Uto64(Wm)
3437 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
3438 110:0 Xn|SP + 32Sto64(Wm)
3439 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
3440
3441 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
3442 the transfer size is insn[23,31,30]. For integer loads/stores,
3443 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
3444
3445 If the decoding fails, it returns IRTemp_INVALID.
3446
3447 isInt is True iff this is decoding is for transfers to/from integer
3448 registers. If False it is for transfers to/from vector registers.
3449*/
3450static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
3451{
3452 UInt optS = SLICE_UInt(insn, 15, 12);
3453 UInt mm = SLICE_UInt(insn, 20, 16);
3454 UInt nn = SLICE_UInt(insn, 9, 5);
3455 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
3456 | SLICE_UInt(insn, 31, 30); // Log2 of the size
3457
3458 buf[0] = 0;
3459
3460 /* Sanity checks, that this really is a load/store insn. */
3461 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
3462 goto fail;
3463
3464 if (isInt
3465 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
3466 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
3467 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
3468 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
3469 goto fail;
3470
3471 if (!isInt
3472 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
3473 goto fail;
3474
3475 /* Throw out non-verified but possibly valid cases. */
3476 switch (szLg2) {
3477 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
3478 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
3479 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
3480 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
3481 case BITS3(1,0,0): // can only ever be valid for the vector case
3482 if (isInt) goto fail; else goto fail;
3483 case BITS3(1,0,1): // these sizes are never valid
3484 case BITS3(1,1,0):
3485 case BITS3(1,1,1): goto fail;
3486
3487 default: vassert(0);
3488 }
3489
3490 IRExpr* rhs = NULL;
3491 switch (optS) {
3492 case BITS4(1,1,1,0): goto fail; //ATC
3493 case BITS4(0,1,1,0):
3494 rhs = getIReg64orZR(mm);
3495 vex_sprintf(buf, "[%s, %s]",
3496 nameIReg64orZR(nn), nameIReg64orZR(mm));
3497 break;
3498 case BITS4(1,1,1,1): goto fail; //ATC
3499 case BITS4(0,1,1,1):
3500 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
3501 vex_sprintf(buf, "[%s, %s lsl %u]",
3502 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
3503 break;
3504 case BITS4(0,1,0,0):
3505 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
3506 vex_sprintf(buf, "[%s, %s uxtx]",
3507 nameIReg64orZR(nn), nameIReg32orZR(mm));
3508 break;
3509 case BITS4(0,1,0,1):
3510 rhs = binop(Iop_Shl64,
3511 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
3512 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
3513 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
3514 break;
3515 case BITS4(1,1,0,0):
3516 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
3517 vex_sprintf(buf, "[%s, %s sxtx]",
3518 nameIReg64orZR(nn), nameIReg32orZR(mm));
3519 break;
3520 case BITS4(1,1,0,1):
3521 rhs = binop(Iop_Shl64,
3522 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
3523 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
3524 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
3525 break;
3526 default:
3527 /* The rest appear to be genuinely invalid */
3528 goto fail;
3529 }
3530
3531 vassert(rhs);
3532 IRTemp res = newTemp(Ity_I64);
3533 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
3534 return res;
3535
3536 fail:
3537 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
3538 return IRTemp_INVALID;
3539}
3540
3541
3542/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
3543 bits of DATAE :: Ity_I64. */
3544static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
3545{
3546 IRExpr* addrE = mkexpr(addr);
3547 switch (szB) {
3548 case 8:
3549 storeLE(addrE, dataE);
3550 break;
3551 case 4:
3552 storeLE(addrE, unop(Iop_64to32, dataE));
3553 break;
3554 case 2:
3555 storeLE(addrE, unop(Iop_64to16, dataE));
3556 break;
3557 case 1:
3558 storeLE(addrE, unop(Iop_64to8, dataE));
3559 break;
3560 default:
3561 vassert(0);
3562 }
3563}
3564
3565
3566/* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
3567 placing the result in an Ity_I64 temporary. */
3568static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
3569{
3570 IRTemp res = newTemp(Ity_I64);
3571 IRExpr* addrE = mkexpr(addr);
3572 switch (szB) {
3573 case 8:
3574 assign(res, loadLE(Ity_I64,addrE));
3575 break;
3576 case 4:
3577 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
3578 break;
3579 case 2:
3580 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
3581 break;
3582 case 1:
3583 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
3584 break;
3585 default:
3586 vassert(0);
3587 }
3588 return res;
3589}
3590
3591
sewardj18bf5172014-06-14 18:05:30 +00003592/* Generate a "standard 7" name, from bitQ and size. But also
3593 allow ".1d" since that's occasionally useful. */
3594static
3595const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
3596{
3597 vassert(bitQ <= 1 && size <= 3);
3598 const HChar* nms[8]
sewardj25523c42014-06-15 19:36:29 +00003599 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
sewardj18bf5172014-06-14 18:05:30 +00003600 UInt ix = (bitQ << 2) | size;
3601 vassert(ix < 8);
3602 return nms[ix];
3603}
3604
3605
sewardjbbcf1882014-01-12 12:49:10 +00003606static
3607Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
3608{
3609# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
3610
3611 /* ------------ LDR,STR (immediate, uimm12) ----------- */
3612 /* uimm12 is scaled by the transfer size
3613
3614 31 29 26 21 9 4
3615 | | | | | |
3616 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
3617 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
3618
3619 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
3620 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
3621
3622 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
3623 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
3624
3625 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
3626 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
3627 */
3628 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
3629 UInt szLg2 = INSN(31,30);
3630 UInt szB = 1 << szLg2;
3631 Bool isLD = INSN(22,22) == 1;
3632 UInt offs = INSN(21,10) * szB;
3633 UInt nn = INSN(9,5);
3634 UInt tt = INSN(4,0);
3635 IRTemp ta = newTemp(Ity_I64);
3636 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
3637 if (nn == 31) { /* FIXME generate stack alignment check */ }
3638 vassert(szLg2 < 4);
3639 if (isLD) {
3640 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
3641 } else {
3642 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
3643 }
3644 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
3645 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
3646 DIP("%s %s, [%s, #%u]\n",
3647 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
3648 nameIReg64orSP(nn), offs);
3649 return True;
3650 }
3651
3652 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
3653 /*
3654 31 29 26 20 11 9 4
3655 | | | | | | |
3656 (at-Rn-then-Rn=EA) | | |
3657 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
3658 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
3659
3660 (at-EA-then-Rn=EA)
3661 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
3662 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
3663
3664 (at-EA)
3665 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
3666 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
3667
3668 simm9 is unscaled.
3669
3670 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
3671 load case this is because would create two competing values for
3672 Rt. In the store case the reason is unclear, but the spec
3673 disallows it anyway.
3674
3675 Stores are narrowing, loads are unsigned widening. sz encodes
3676 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
3677 */
3678 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
3679 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
3680 UInt szLg2 = INSN(31,30);
3681 UInt szB = 1 << szLg2;
3682 Bool isLoad = INSN(22,22) == 1;
3683 UInt imm9 = INSN(20,12);
3684 UInt nn = INSN(9,5);
3685 UInt tt = INSN(4,0);
3686 Bool wBack = INSN(10,10) == 1;
3687 UInt how = INSN(11,10);
3688 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
3689 /* undecodable; fall through */
3690 } else {
3691 if (nn == 31) { /* FIXME generate stack alignment check */ }
3692
3693 // Compute the transfer address TA and the writeback address WA.
3694 IRTemp tRN = newTemp(Ity_I64);
3695 assign(tRN, getIReg64orSP(nn));
3696 IRTemp tEA = newTemp(Ity_I64);
3697 Long simm9 = (Long)sx_to_64(imm9, 9);
3698 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3699
3700 IRTemp tTA = newTemp(Ity_I64);
3701 IRTemp tWA = newTemp(Ity_I64);
3702 switch (how) {
3703 case BITS2(0,1):
3704 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3705 case BITS2(1,1):
3706 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3707 case BITS2(0,0):
3708 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3709 default:
3710 vassert(0); /* NOTREACHED */
3711 }
3712
sewardje0bff8b2014-03-09 09:40:23 +00003713 /* Normally rN would be updated after the transfer. However, in
3714 the special case typifed by
3715 str x30, [sp,#-16]!
3716 it is necessary to update SP before the transfer, (1)
3717 because Memcheck will otherwise complain about a write
3718 below the stack pointer, and (2) because the segfault
3719 stack extension mechanism will otherwise extend the stack
3720 only down to SP before the instruction, which might not be
3721 far enough, if the -16 bit takes the actual access
3722 address to the next page.
3723 */
3724 Bool earlyWBack
3725 = wBack && simm9 < 0 && szB == 8
3726 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
3727
3728 if (wBack && earlyWBack)
3729 putIReg64orSP(nn, mkexpr(tEA));
3730
sewardjbbcf1882014-01-12 12:49:10 +00003731 if (isLoad) {
3732 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
3733 } else {
3734 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
3735 }
3736
sewardje0bff8b2014-03-09 09:40:23 +00003737 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00003738 putIReg64orSP(nn, mkexpr(tEA));
3739
3740 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
3741 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
3742 const HChar* fmt_str = NULL;
3743 switch (how) {
3744 case BITS2(0,1):
3745 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3746 break;
3747 case BITS2(1,1):
3748 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3749 break;
3750 case BITS2(0,0):
3751 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
3752 break;
3753 default:
3754 vassert(0);
3755 }
3756 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
3757 nameIRegOrZR(szB == 8, tt),
3758 nameIReg64orSP(nn), simm9);
3759 return True;
3760 }
3761 }
3762
3763 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
3764 /* L==1 => mm==LD
3765 L==0 => mm==ST
3766 x==0 => 32 bit transfers, and zero extended loads
3767 x==1 => 64 bit transfers
3768 simm7 is scaled by the (single-register) transfer size
3769
3770 (at-Rn-then-Rn=EA)
3771 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
3772
3773 (at-EA-then-Rn=EA)
3774 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
3775
3776 (at-EA)
3777 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
3778 */
3779
3780 UInt insn_30_23 = INSN(30,23);
3781 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
3782 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
3783 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
3784 UInt bL = INSN(22,22);
3785 UInt bX = INSN(31,31);
3786 UInt bWBack = INSN(23,23);
3787 UInt rT1 = INSN(4,0);
3788 UInt rN = INSN(9,5);
3789 UInt rT2 = INSN(14,10);
3790 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
3791 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
3792 || (bL && rT1 == rT2)) {
3793 /* undecodable; fall through */
3794 } else {
3795 if (rN == 31) { /* FIXME generate stack alignment check */ }
3796
3797 // Compute the transfer address TA and the writeback address WA.
3798 IRTemp tRN = newTemp(Ity_I64);
3799 assign(tRN, getIReg64orSP(rN));
3800 IRTemp tEA = newTemp(Ity_I64);
3801 simm7 = (bX ? 8 : 4) * simm7;
3802 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
3803
3804 IRTemp tTA = newTemp(Ity_I64);
3805 IRTemp tWA = newTemp(Ity_I64);
3806 switch (INSN(24,23)) {
3807 case BITS2(0,1):
3808 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3809 case BITS2(1,1):
3810 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3811 case BITS2(1,0):
3812 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3813 default:
3814 vassert(0); /* NOTREACHED */
3815 }
3816
3817 /* Normally rN would be updated after the transfer. However, in
3818 the special case typifed by
3819 stp x29, x30, [sp,#-112]!
3820 it is necessary to update SP before the transfer, (1)
3821 because Memcheck will otherwise complain about a write
3822 below the stack pointer, and (2) because the segfault
3823 stack extension mechanism will otherwise extend the stack
3824 only down to SP before the instruction, which might not be
3825 far enough, if the -112 bit takes the actual access
3826 address to the next page.
3827 */
3828 Bool earlyWBack
3829 = bWBack && simm7 < 0
3830 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
3831
3832 if (bWBack && earlyWBack)
3833 putIReg64orSP(rN, mkexpr(tEA));
3834
3835 /**/ if (bL == 1 && bX == 1) {
3836 // 64 bit load
3837 putIReg64orZR(rT1, loadLE(Ity_I64,
3838 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3839 putIReg64orZR(rT2, loadLE(Ity_I64,
3840 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
3841 } else if (bL == 1 && bX == 0) {
sewardjbbcf1882014-01-12 12:49:10 +00003842 // 32 bit load
3843 putIReg32orZR(rT1, loadLE(Ity_I32,
3844 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3845 putIReg32orZR(rT2, loadLE(Ity_I32,
3846 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
3847 } else if (bL == 0 && bX == 1) {
3848 // 64 bit store
3849 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3850 getIReg64orZR(rT1));
3851 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
3852 getIReg64orZR(rT2));
3853 } else {
3854 vassert(bL == 0 && bX == 0);
sewardjbbcf1882014-01-12 12:49:10 +00003855 // 32 bit store
3856 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3857 getIReg32orZR(rT1));
3858 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
3859 getIReg32orZR(rT2));
3860 }
3861
3862 if (bWBack && !earlyWBack)
3863 putIReg64orSP(rN, mkexpr(tEA));
3864
3865 const HChar* fmt_str = NULL;
3866 switch (INSN(24,23)) {
3867 case BITS2(0,1):
3868 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3869 break;
3870 case BITS2(1,1):
3871 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3872 break;
3873 case BITS2(1,0):
3874 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
3875 break;
3876 default:
3877 vassert(0);
3878 }
3879 DIP(fmt_str, bL == 0 ? "st" : "ld",
3880 nameIRegOrZR(bX == 1, rT1),
3881 nameIRegOrZR(bX == 1, rT2),
3882 nameIReg64orSP(rN), simm7);
3883 return True;
3884 }
3885 }
3886
3887 /* ---------------- LDR (literal, int reg) ---------------- */
3888 /* 31 29 23 4
3889 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
3890 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
3891 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
3892 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
3893 Just handles the first two cases for now.
3894 */
3895 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
3896 UInt imm19 = INSN(23,5);
3897 UInt rT = INSN(4,0);
3898 UInt bX = INSN(30,30);
3899 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
3900 if (bX) {
3901 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
3902 } else {
3903 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
3904 }
3905 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
3906 return True;
3907 }
3908
3909 /* -------------- {LD,ST}R (integer register) --------------- */
3910 /* 31 29 20 15 12 11 9 4
3911 | | | | | | | |
3912 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
3913 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
3914 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
3915 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
3916
3917 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
3918 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
3919 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
3920 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
3921 */
3922 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
3923 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3924 HChar dis_buf[64];
3925 UInt szLg2 = INSN(31,30);
3926 Bool isLD = INSN(22,22) == 1;
3927 UInt tt = INSN(4,0);
3928 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
3929 if (ea != IRTemp_INVALID) {
3930 switch (szLg2) {
3931 case 3: /* 64 bit */
3932 if (isLD) {
3933 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
3934 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
3935 } else {
3936 storeLE(mkexpr(ea), getIReg64orZR(tt));
3937 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
3938 }
3939 break;
3940 case 2: /* 32 bit */
3941 if (isLD) {
3942 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
3943 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
3944 } else {
3945 storeLE(mkexpr(ea), getIReg32orZR(tt));
3946 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
3947 }
3948 break;
3949 case 1: /* 16 bit */
3950 if (isLD) {
3951 putIReg64orZR(tt, unop(Iop_16Uto64,
3952 loadLE(Ity_I16, mkexpr(ea))));
3953 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3954 } else {
3955 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
3956 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3957 }
3958 break;
3959 case 0: /* 8 bit */
3960 if (isLD) {
3961 putIReg64orZR(tt, unop(Iop_8Uto64,
3962 loadLE(Ity_I8, mkexpr(ea))));
3963 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
3964 } else {
3965 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
3966 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
3967 }
3968 break;
3969 default:
3970 vassert(0);
3971 }
3972 return True;
3973 }
3974 }
3975
3976 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
3977 /* 31 29 26 23 21 9 4
3978 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
3979 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
3980 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
3981 where
3982 Rt is Wt when x==1, Xt when x==0
3983 */
3984 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
3985 /* Further checks on bits 31:30 and 22 */
3986 Bool valid = False;
3987 switch ((INSN(31,30) << 1) | INSN(22,22)) {
3988 case BITS3(1,0,0):
3989 case BITS3(0,1,0): case BITS3(0,1,1):
3990 case BITS3(0,0,0): case BITS3(0,0,1):
3991 valid = True;
3992 break;
3993 }
3994 if (valid) {
3995 UInt szLg2 = INSN(31,30);
3996 UInt bitX = INSN(22,22);
3997 UInt imm12 = INSN(21,10);
3998 UInt nn = INSN(9,5);
3999 UInt tt = INSN(4,0);
4000 UInt szB = 1 << szLg2;
4001 IRExpr* ea = binop(Iop_Add64,
4002 getIReg64orSP(nn), mkU64(imm12 * szB));
4003 switch (szB) {
4004 case 4:
4005 vassert(bitX == 0);
4006 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
4007 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
4008 nameIReg64orSP(nn), imm12 * szB);
4009 break;
4010 case 2:
4011 if (bitX == 1) {
4012 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
4013 } else {
4014 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
4015 }
4016 DIP("ldrsh %s, [%s, #%u]\n",
4017 nameIRegOrZR(bitX == 0, tt),
4018 nameIReg64orSP(nn), imm12 * szB);
4019 break;
4020 case 1:
4021 if (bitX == 1) {
4022 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
4023 } else {
4024 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
4025 }
4026 DIP("ldrsb %s, [%s, #%u]\n",
4027 nameIRegOrZR(bitX == 0, tt),
4028 nameIReg64orSP(nn), imm12 * szB);
4029 break;
4030 default:
4031 vassert(0);
4032 }
4033 return True;
4034 }
4035 /* else fall through */
4036 }
4037
4038 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
4039 /* (at-Rn-then-Rn=EA)
4040 31 29 23 21 20 11 9 4
4041 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
4042 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
4043 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
4044
4045 (at-EA-then-Rn=EA)
4046 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
4047 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
4048 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
4049 where
4050 Rt is Wt when x==1, Xt when x==0
4051 transfer-at-Rn when [11]==0, at EA when [11]==1
4052 */
4053 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
4054 && INSN(21,21) == 0 && INSN(10,10) == 1) {
4055 /* Further checks on bits 31:30 and 22 */
4056 Bool valid = False;
4057 switch ((INSN(31,30) << 1) | INSN(22,22)) {
4058 case BITS3(1,0,0): // LDRSW Xt
4059 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
4060 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
4061 valid = True;
4062 break;
4063 }
4064 if (valid) {
4065 UInt szLg2 = INSN(31,30);
4066 UInt imm9 = INSN(20,12);
4067 Bool atRN = INSN(11,11) == 0;
4068 UInt nn = INSN(9,5);
4069 UInt tt = INSN(4,0);
4070 IRTemp tRN = newTemp(Ity_I64);
4071 IRTemp tEA = newTemp(Ity_I64);
4072 IRTemp tTA = IRTemp_INVALID;
4073 ULong simm9 = sx_to_64(imm9, 9);
4074 Bool is64 = INSN(22,22) == 0;
4075 assign(tRN, getIReg64orSP(nn));
4076 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4077 tTA = atRN ? tRN : tEA;
4078 HChar ch = '?';
4079 /* There are 5 cases:
4080 byte load, SX to 64
4081 byte load, SX to 32, ZX to 64
4082 halfword load, SX to 64
4083 halfword load, SX to 32, ZX to 64
4084 word load, SX to 64
4085 The ifs below handle them in the listed order.
4086 */
4087 if (szLg2 == 0) {
4088 ch = 'b';
4089 if (is64) {
4090 putIReg64orZR(tt, unop(Iop_8Sto64,
4091 loadLE(Ity_I8, mkexpr(tTA))));
4092 } else {
4093 putIReg32orZR(tt, unop(Iop_8Sto32,
4094 loadLE(Ity_I8, mkexpr(tTA))));
4095 }
4096 }
4097 else if (szLg2 == 1) {
4098 ch = 'h';
4099 if (is64) {
4100 putIReg64orZR(tt, unop(Iop_16Sto64,
4101 loadLE(Ity_I16, mkexpr(tTA))));
4102 } else {
4103 putIReg32orZR(tt, unop(Iop_16Sto32,
4104 loadLE(Ity_I16, mkexpr(tTA))));
4105 }
4106 }
4107 else if (szLg2 == 2 && is64) {
4108 ch = 'w';
4109 putIReg64orZR(tt, unop(Iop_32Sto64,
4110 loadLE(Ity_I32, mkexpr(tTA))));
4111 }
4112 else {
4113 vassert(0);
4114 }
4115 putIReg64orSP(nn, mkexpr(tEA));
4116 DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
4117 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
4118 return True;
4119 }
4120 /* else fall through */
4121 }
4122
4123 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
4124 /* 31 29 23 21 20 11 9 4
4125 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
4126 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
4127 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
4128 where
4129 Rt is Wt when x==1, Xt when x==0
4130 */
4131 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
4132 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
4133 /* Further checks on bits 31:30 and 22 */
4134 Bool valid = False;
4135 switch ((INSN(31,30) << 1) | INSN(22,22)) {
4136 case BITS3(1,0,0): // LDURSW Xt
4137 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
4138 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
4139 valid = True;
4140 break;
4141 }
4142 if (valid) {
4143 UInt szLg2 = INSN(31,30);
4144 UInt imm9 = INSN(20,12);
4145 UInt nn = INSN(9,5);
4146 UInt tt = INSN(4,0);
4147 IRTemp tRN = newTemp(Ity_I64);
4148 IRTemp tEA = newTemp(Ity_I64);
4149 ULong simm9 = sx_to_64(imm9, 9);
4150 Bool is64 = INSN(22,22) == 0;
4151 assign(tRN, getIReg64orSP(nn));
4152 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4153 HChar ch = '?';
4154 /* There are 5 cases:
4155 byte load, SX to 64
4156 byte load, SX to 32, ZX to 64
4157 halfword load, SX to 64
4158 halfword load, SX to 32, ZX to 64
4159 word load, SX to 64
4160 The ifs below handle them in the listed order.
4161 */
4162 if (szLg2 == 0) {
4163 ch = 'b';
4164 if (is64) {
4165 putIReg64orZR(tt, unop(Iop_8Sto64,
4166 loadLE(Ity_I8, mkexpr(tEA))));
4167 } else {
4168 putIReg32orZR(tt, unop(Iop_8Sto32,
4169 loadLE(Ity_I8, mkexpr(tEA))));
4170 }
4171 }
4172 else if (szLg2 == 1) {
4173 ch = 'h';
4174 if (is64) {
4175 putIReg64orZR(tt, unop(Iop_16Sto64,
4176 loadLE(Ity_I16, mkexpr(tEA))));
4177 } else {
4178 putIReg32orZR(tt, unop(Iop_16Sto32,
4179 loadLE(Ity_I16, mkexpr(tEA))));
4180 }
4181 }
4182 else if (szLg2 == 2 && is64) {
4183 ch = 'w';
4184 putIReg64orZR(tt, unop(Iop_32Sto64,
4185 loadLE(Ity_I32, mkexpr(tEA))));
4186 }
4187 else {
4188 vassert(0);
4189 }
4190 DIP("ldurs%c %s, [%s, #%lld]",
4191 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
4192 return True;
4193 }
4194 /* else fall through */
4195 }
4196
4197 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
4198 /* L==1 => mm==LD
4199 L==0 => mm==ST
4200 sz==00 => 32 bit (S) transfers
4201 sz==01 => 64 bit (D) transfers
4202 sz==10 => 128 bit (Q) transfers
4203 sz==11 isn't allowed
4204 simm7 is scaled by the (single-register) transfer size
4205
4206 31 29 22 21 14 9 4
4207 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
4208 (at-Rn-then-Rn=EA)
4209
4210 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
4211 (at-EA-then-Rn=EA)
4212
4213 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
4214 (at-EA)
4215 */
4216
4217 UInt insn_29_23 = INSN(29,23);
4218 if (insn_29_23 == BITS7(1,0,1,1,0,0,1)
4219 || insn_29_23 == BITS7(1,0,1,1,0,1,1)
4220 || insn_29_23 == BITS7(1,0,1,1,0,1,0)) {
4221 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
4222 Bool isLD = INSN(22,22) == 1;
4223 Bool wBack = INSN(23,23) == 1;
4224 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
4225 UInt tt2 = INSN(14,10);
4226 UInt nn = INSN(9,5);
4227 UInt tt1 = INSN(4,0);
4228 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
4229 /* undecodable; fall through */
4230 } else {
4231 if (nn == 31) { /* FIXME generate stack alignment check */ }
4232
4233 // Compute the transfer address TA and the writeback address WA.
4234 UInt szB = 4 << szSlg2; /* szB is the per-register size */
4235 IRTemp tRN = newTemp(Ity_I64);
4236 assign(tRN, getIReg64orSP(nn));
4237 IRTemp tEA = newTemp(Ity_I64);
4238 simm7 = szB * simm7;
4239 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
4240
4241 IRTemp tTA = newTemp(Ity_I64);
4242 IRTemp tWA = newTemp(Ity_I64);
4243 switch (INSN(24,23)) {
4244 case BITS2(0,1):
4245 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4246 case BITS2(1,1):
4247 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4248 case BITS2(1,0):
4249 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4250 default:
4251 vassert(0); /* NOTREACHED */
4252 }
4253
4254 IRType ty = Ity_INVALID;
4255 switch (szB) {
4256 case 4: ty = Ity_F32; break;
4257 case 8: ty = Ity_F64; break;
4258 case 16: ty = Ity_V128; break;
4259 default: vassert(0);
4260 }
4261
sewardje0bff8b2014-03-09 09:40:23 +00004262 /* Normally rN would be updated after the transfer. However, in
sewardj19551432014-05-07 09:20:11 +00004263 the special cases typifed by
sewardje0bff8b2014-03-09 09:40:23 +00004264 stp q0, q1, [sp,#-512]!
sewardj19551432014-05-07 09:20:11 +00004265 stp d0, d1, [sp,#-512]!
4266 stp s0, s1, [sp,#-512]!
sewardje0bff8b2014-03-09 09:40:23 +00004267 it is necessary to update SP before the transfer, (1)
4268 because Memcheck will otherwise complain about a write
4269 below the stack pointer, and (2) because the segfault
4270 stack extension mechanism will otherwise extend the stack
4271 only down to SP before the instruction, which might not be
4272 far enough, if the -512 bit takes the actual access
4273 address to the next page.
4274 */
4275 Bool earlyWBack
sewardj19551432014-05-07 09:20:11 +00004276 = wBack && simm7 < 0
sewardje0bff8b2014-03-09 09:40:23 +00004277 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
4278
4279 if (wBack && earlyWBack)
4280 putIReg64orSP(nn, mkexpr(tEA));
4281
sewardjbbcf1882014-01-12 12:49:10 +00004282 if (isLD) {
sewardj5ba41302014-03-03 08:42:16 +00004283 if (szB < 16) {
4284 putQReg128(tt1, mkV128(0x0000));
4285 }
sewardj606c4ba2014-01-26 19:11:14 +00004286 putQRegLO(tt1,
4287 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
sewardj5ba41302014-03-03 08:42:16 +00004288 if (szB < 16) {
4289 putQReg128(tt2, mkV128(0x0000));
4290 }
sewardj606c4ba2014-01-26 19:11:14 +00004291 putQRegLO(tt2,
4292 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
sewardjbbcf1882014-01-12 12:49:10 +00004293 } else {
4294 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
sewardj606c4ba2014-01-26 19:11:14 +00004295 getQRegLO(tt1, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004296 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
sewardj606c4ba2014-01-26 19:11:14 +00004297 getQRegLO(tt2, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004298 }
4299
sewardje0bff8b2014-03-09 09:40:23 +00004300 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00004301 putIReg64orSP(nn, mkexpr(tEA));
4302
4303 const HChar* fmt_str = NULL;
4304 switch (INSN(24,23)) {
4305 case BITS2(0,1):
4306 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4307 break;
4308 case BITS2(1,1):
4309 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4310 break;
4311 case BITS2(1,0):
4312 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
4313 break;
4314 default:
4315 vassert(0);
4316 }
4317 DIP(fmt_str, isLD ? "ld" : "st",
sewardj606c4ba2014-01-26 19:11:14 +00004318 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
sewardjbbcf1882014-01-12 12:49:10 +00004319 nameIReg64orSP(nn), simm7);
4320 return True;
4321 }
4322 }
4323
4324 /* -------------- {LD,ST}R (vector register) --------------- */
4325 /* 31 29 23 20 15 12 11 9 4
4326 | | | | | | | | |
4327 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
4328 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
4329 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
4330 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
4331 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
4332
4333 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
4334 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
4335 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
4336 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
4337 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
4338 */
4339 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
4340 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
4341 HChar dis_buf[64];
4342 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
4343 Bool isLD = INSN(22,22) == 1;
4344 UInt tt = INSN(4,0);
4345 if (szLg2 >= 4) goto after_LDR_STR_vector_register;
4346 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
4347 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
4348 switch (szLg2) {
4349 case 0: /* 8 bit */
4350 if (isLD) {
4351 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00004352 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
4353 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004354 } else {
4355 vassert(0); //ATC
sewardj606c4ba2014-01-26 19:11:14 +00004356 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
4357 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004358 }
4359 break;
4360 case 1:
4361 if (isLD) {
4362 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00004363 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
4364 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004365 } else {
4366 vassert(0); //ATC
sewardj606c4ba2014-01-26 19:11:14 +00004367 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
4368 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004369 }
4370 break;
4371 case 2: /* 32 bit */
4372 if (isLD) {
4373 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00004374 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
4375 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004376 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004377 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
4378 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004379 }
4380 break;
4381 case 3: /* 64 bit */
4382 if (isLD) {
4383 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00004384 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
4385 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004386 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004387 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
4388 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00004389 }
4390 break;
4391 case 4: return False; //ATC
4392 default: vassert(0);
4393 }
4394 return True;
4395 }
4396 after_LDR_STR_vector_register:
4397
4398 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
4399 /* 31 29 22 20 15 12 11 9 4
4400 | | | | | | | | |
4401 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
4402
4403 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
4404 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
4405
4406 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
4407 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
4408 */
4409 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
4410 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
4411 HChar dis_buf[64];
4412 UInt szLg2 = INSN(31,30);
4413 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
4414 UInt tt = INSN(4,0);
4415 if (szLg2 == 3) goto after_LDRS_integer_register;
4416 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
4417 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
4418 /* Enumerate the 5 variants explicitly. */
4419 if (szLg2 == 2/*32 bit*/ && sxTo64) {
4420 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
4421 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
4422 return True;
4423 }
4424 else
4425 if (szLg2 == 1/*16 bit*/) {
4426 if (sxTo64) {
4427 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
4428 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
4429 } else {
4430 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
4431 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
4432 }
4433 return True;
4434 }
4435 else
4436 if (szLg2 == 0/*8 bit*/) {
4437 if (sxTo64) {
4438 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
4439 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
4440 } else {
4441 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
4442 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
4443 }
4444 return True;
4445 }
4446 /* else it's an invalid combination */
4447 }
4448 after_LDRS_integer_register:
4449
4450 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
4451 /* This is the Unsigned offset variant only. The Post-Index and
4452 Pre-Index variants are below.
4453
4454 31 29 23 21 9 4
4455 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
4456 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
4457 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
4458 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
4459 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
4460
4461 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
4462 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
4463 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
4464 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
4465 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
4466 */
4467 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
4468 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
4469 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
4470 Bool isLD = INSN(22,22) == 1;
4471 UInt pimm12 = INSN(21,10) << szLg2;
4472 UInt nn = INSN(9,5);
4473 UInt tt = INSN(4,0);
4474 IRTemp tEA = newTemp(Ity_I64);
4475 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
4476 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
4477 if (isLD) {
4478 if (szLg2 < 4) {
4479 putQReg128(tt, mkV128(0x0000));
4480 }
sewardj606c4ba2014-01-26 19:11:14 +00004481 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00004482 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004483 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004484 }
4485 DIP("%s %s, [%s, #%u]\n",
4486 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00004487 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
sewardjbbcf1882014-01-12 12:49:10 +00004488 return True;
4489 }
4490
4491 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
4492 /* These are the Post-Index and Pre-Index variants.
4493
4494 31 29 23 20 11 9 4
4495 (at-Rn-then-Rn=EA)
4496 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
4497 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
4498 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
4499 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
4500 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
4501
4502 (at-EA-then-Rn=EA)
4503 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
4504 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
4505 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
4506 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
4507 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
4508
4509 Stores are the same except with bit 22 set to 0.
4510 */
4511 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
4512 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
4513 && INSN(21,21) == 0 && INSN(10,10) == 1) {
4514 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
4515 Bool isLD = INSN(22,22) == 1;
4516 UInt imm9 = INSN(20,12);
4517 Bool atRN = INSN(11,11) == 0;
4518 UInt nn = INSN(9,5);
4519 UInt tt = INSN(4,0);
4520 IRTemp tRN = newTemp(Ity_I64);
4521 IRTemp tEA = newTemp(Ity_I64);
4522 IRTemp tTA = IRTemp_INVALID;
4523 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
4524 ULong simm9 = sx_to_64(imm9, 9);
4525 assign(tRN, getIReg64orSP(nn));
4526 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4527 tTA = atRN ? tRN : tEA;
4528 if (isLD) {
4529 if (szLg2 < 4) {
4530 putQReg128(tt, mkV128(0x0000));
4531 }
sewardj606c4ba2014-01-26 19:11:14 +00004532 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
sewardjbbcf1882014-01-12 12:49:10 +00004533 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004534 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004535 }
4536 putIReg64orSP(nn, mkexpr(tEA));
4537 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
4538 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00004539 nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
sewardjbbcf1882014-01-12 12:49:10 +00004540 return True;
4541 }
4542
4543 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
4544 /* 31 29 23 20 11 9 4
4545 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
4546 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
4547 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
4548 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
4549 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
4550
4551 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
4552 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
4553 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
4554 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
4555 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
4556 */
4557 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
4558 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
4559 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
4560 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
4561 Bool isLD = INSN(22,22) == 1;
4562 UInt imm9 = INSN(20,12);
4563 UInt nn = INSN(9,5);
4564 UInt tt = INSN(4,0);
4565 ULong simm9 = sx_to_64(imm9, 9);
4566 IRTemp tEA = newTemp(Ity_I64);
4567 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
4568 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
4569 if (isLD) {
sewardj606c4ba2014-01-26 19:11:14 +00004570 if (szLg2 < 4) {
4571 putQReg128(tt, mkV128(0x0000));
4572 }
4573 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00004574 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004575 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004576 }
4577 DIP("%s %s, [%s, #%lld]\n",
4578 isLD ? "ldur" : "stur",
sewardj606c4ba2014-01-26 19:11:14 +00004579 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
sewardjbbcf1882014-01-12 12:49:10 +00004580 return True;
4581 }
4582
4583 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
4584 /* 31 29 23 4
4585 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
4586 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
4587 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
4588 */
4589 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
4590 UInt szB = 4 << INSN(31,30);
4591 UInt imm19 = INSN(23,5);
4592 UInt tt = INSN(4,0);
4593 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
4594 IRType ty = preferredVectorSubTypeFromSize(szB);
sewardj606c4ba2014-01-26 19:11:14 +00004595 putQReg128(tt, mkV128(0x0000));
4596 putQRegLO(tt, loadLE(ty, mkU64(ea)));
4597 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
sewardjbbcf1882014-01-12 12:49:10 +00004598 return True;
4599 }
4600
sewardj606c4ba2014-01-26 19:11:14 +00004601 /* ---------- LD1/ST1 (single structure, no offset) ---------- */
sewardjbbcf1882014-01-12 12:49:10 +00004602 /* 31 23
sewardj606c4ba2014-01-26 19:11:14 +00004603 0100 1100 0100 0000 0111 11 N T LD1 {vT.2d}, [Xn|SP]
4604 0100 1100 0000 0000 0111 11 N T ST1 {vT.2d}, [Xn|SP]
4605 0100 1100 0100 0000 0111 10 N T LD1 {vT.4s}, [Xn|SP]
4606 0100 1100 0000 0000 0111 10 N T ST1 {vT.4s}, [Xn|SP]
4607 0100 1100 0100 0000 0111 01 N T LD1 {vT.8h}, [Xn|SP]
4608 0100 1100 0000 0000 0111 01 N T ST1 {vT.8h}, [Xn|SP]
sewardjbbcf1882014-01-12 12:49:10 +00004609 0100 1100 0100 0000 0111 00 N T LD1 {vT.16b}, [Xn|SP]
4610 0100 1100 0000 0000 0111 00 N T ST1 {vT.16b}, [Xn|SP]
sewardj606c4ba2014-01-26 19:11:14 +00004611 FIXME does this assume that the host is little endian?
sewardjbbcf1882014-01-12 12:49:10 +00004612 */
sewardj606c4ba2014-01-26 19:11:14 +00004613 if ( (insn & 0xFFFFF000) == 0x4C407000 // LD1 cases
4614 || (insn & 0xFFFFF000) == 0x4C007000 // ST1 cases
sewardjbbcf1882014-01-12 12:49:10 +00004615 ) {
4616 Bool isLD = INSN(22,22) == 1;
4617 UInt rN = INSN(9,5);
4618 UInt vT = INSN(4,0);
4619 IRTemp tEA = newTemp(Ity_I64);
sewardj606c4ba2014-01-26 19:11:14 +00004620 const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4621 const HChar* name = names[INSN(11,10)];
sewardjbbcf1882014-01-12 12:49:10 +00004622 assign(tEA, getIReg64orSP(rN));
4623 if (rN == 31) { /* FIXME generate stack alignment check */ }
4624 if (isLD) {
4625 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4626 } else {
4627 storeLE(mkexpr(tEA), getQReg128(vT));
4628 }
4629 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
sewardj606c4ba2014-01-26 19:11:14 +00004630 vT, name, nameIReg64orSP(rN));
sewardjbbcf1882014-01-12 12:49:10 +00004631 return True;
4632 }
4633
sewardj606c4ba2014-01-26 19:11:14 +00004634 /* 31 23
4635 0000 1100 0100 0000 0111 11 N T LD1 {vT.1d}, [Xn|SP]
4636 0000 1100 0000 0000 0111 11 N T ST1 {vT.1d}, [Xn|SP]
4637 0000 1100 0100 0000 0111 10 N T LD1 {vT.2s}, [Xn|SP]
4638 0000 1100 0000 0000 0111 10 N T ST1 {vT.2s}, [Xn|SP]
4639 0000 1100 0100 0000 0111 01 N T LD1 {vT.4h}, [Xn|SP]
4640 0000 1100 0000 0000 0111 01 N T ST1 {vT.4h}, [Xn|SP]
4641 0000 1100 0100 0000 0111 00 N T LD1 {vT.8b}, [Xn|SP]
4642 0000 1100 0000 0000 0111 00 N T ST1 {vT.8b}, [Xn|SP]
4643 FIXME does this assume that the host is little endian?
4644 */
4645 if ( (insn & 0xFFFFF000) == 0x0C407000 // LD1 cases
4646 || (insn & 0xFFFFF000) == 0x0C007000 // ST1 cases
4647 ) {
4648 Bool isLD = INSN(22,22) == 1;
4649 UInt rN = INSN(9,5);
4650 UInt vT = INSN(4,0);
4651 IRTemp tEA = newTemp(Ity_I64);
4652 const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4653 const HChar* name = names[INSN(11,10)];
4654 assign(tEA, getIReg64orSP(rN));
4655 if (rN == 31) { /* FIXME generate stack alignment check */ }
4656 if (isLD) {
4657 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4658 putQRegLane(vT, 1, mkU64(0));
4659 } else {
4660 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4661 }
4662 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
4663 vT, name, nameIReg64orSP(rN));
4664 return True;
4665 }
4666
4667 /* ---------- LD1/ST1 (single structure, post index) ---------- */
4668 /* 31 23
sewardj7d009132014-02-20 17:43:38 +00004669 0100 1100 1001 1111 0111 11 N T ST1 {vT.2d}, [xN|SP], #16
4670 0100 1100 1101 1111 0111 11 N T LD1 {vT.2d}, [xN|SP], #16
4671 0100 1100 1001 1111 0111 10 N T ST1 {vT.4s}, [xN|SP], #16
4672 0100 1100 1101 1111 0111 10 N T LD1 {vT.4s}, [xN|SP], #16
4673 0100 1100 1001 1111 0111 01 N T ST1 {vT.8h}, [xN|SP], #16
4674 0100 1100 1101 1111 0111 01 N T LD1 {vT.8h}, [xN|SP], #16
4675 0100 1100 1001 1111 0111 00 N T ST1 {vT.16b}, [xN|SP], #16
sewardjf5b08912014-02-06 12:57:58 +00004676 0100 1100 1101 1111 0111 00 N T LD1 {vT.16b}, [xN|SP], #16
sewardj606c4ba2014-01-26 19:11:14 +00004677 Note that #16 is implied and cannot be any other value.
4678 FIXME does this assume that the host is little endian?
4679 */
sewardj7d009132014-02-20 17:43:38 +00004680 if ( (insn & 0xFFFFF000) == 0x4CDF7000 // LD1 cases
4681 || (insn & 0xFFFFF000) == 0x4C9F7000 // ST1 cases
sewardj606c4ba2014-01-26 19:11:14 +00004682 ) {
4683 Bool isLD = INSN(22,22) == 1;
4684 UInt rN = INSN(9,5);
4685 UInt vT = INSN(4,0);
4686 IRTemp tEA = newTemp(Ity_I64);
4687 const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4688 const HChar* name = names[INSN(11,10)];
4689 assign(tEA, getIReg64orSP(rN));
4690 if (rN == 31) { /* FIXME generate stack alignment check */ }
4691 if (isLD) {
4692 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4693 } else {
4694 storeLE(mkexpr(tEA), getQReg128(vT));
4695 }
4696 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(16)));
4697 DIP("%s {v%u.%s}, [%s], #16\n", isLD ? "ld1" : "st1",
4698 vT, name, nameIReg64orSP(rN));
4699 return True;
4700 }
4701
sewardj950ca7a2014-04-03 23:03:32 +00004702 /* 31 23
4703 0000 1100 1001 1111 0111 11 N T ST1 {vT.1d}, [xN|SP], #8
4704 0000 1100 1101 1111 0111 11 N T LD1 {vT.1d}, [xN|SP], #8
sewardj606c4ba2014-01-26 19:11:14 +00004705 0000 1100 1001 1111 0111 10 N T ST1 {vT.2s}, [xN|SP], #8
sewardj950ca7a2014-04-03 23:03:32 +00004706 0000 1100 1101 1111 0111 10 N T LD1 {vT.2s}, [xN|SP], #8
sewardjf5b08912014-02-06 12:57:58 +00004707 0000 1100 1001 1111 0111 01 N T ST1 {vT.4h}, [xN|SP], #8
sewardj950ca7a2014-04-03 23:03:32 +00004708 0000 1100 1101 1111 0111 01 N T LD1 {vT.4h}, [xN|SP], #8
4709 0000 1100 1001 1111 0111 00 N T ST1 {vT.8b}, [xN|SP], #8
4710 0000 1100 1101 1111 0111 00 N T LD1 {vT.8b}, [xN|SP], #8
sewardj606c4ba2014-01-26 19:11:14 +00004711 Note that #8 is implied and cannot be any other value.
4712 FIXME does this assume that the host is little endian?
4713 */
sewardj950ca7a2014-04-03 23:03:32 +00004714 if ( (insn & 0xFFFFF000) == 0x0CDF7000 // LD1 cases
4715 || (insn & 0xFFFFF000) == 0x0C9F7000 // ST1 cases
sewardj606c4ba2014-01-26 19:11:14 +00004716 ) {
sewardj950ca7a2014-04-03 23:03:32 +00004717 Bool isLD = INSN(22,22) == 1;
sewardj606c4ba2014-01-26 19:11:14 +00004718 UInt rN = INSN(9,5);
4719 UInt vT = INSN(4,0);
4720 IRTemp tEA = newTemp(Ity_I64);
4721 const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4722 const HChar* name = names[INSN(11,10)];
4723 assign(tEA, getIReg64orSP(rN));
4724 if (rN == 31) { /* FIXME generate stack alignment check */ }
sewardj950ca7a2014-04-03 23:03:32 +00004725 if (isLD) {
4726 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4727 putQRegLane(vT, 1, mkU64(0));
4728 } else {
4729 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4730 }
sewardj606c4ba2014-01-26 19:11:14 +00004731 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(8)));
sewardj950ca7a2014-04-03 23:03:32 +00004732 DIP("%s {v%u.%s}, [%s], #8\n", isLD ? "ld1" : "st1",
4733 vT, name, nameIReg64orSP(rN));
4734 return True;
4735 }
4736
sewardj18bf5172014-06-14 18:05:30 +00004737 /* ---------- LD1R (single structure, replicate) ---------- */
4738 /* 31 29 22 20 15 11 9 4
4739 0q 001 1010 10 00000 110 0 sz n t LD1R Vt.T, [Xn|SP]
4740 0q 001 1011 10 m 110 0 sz n t LD1R Vt.T, [Xn|SP], #sz (m=11111)
4741 , Xm (m!=11111)
4742 */
4743 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
4744 && INSN(22,21) == BITS2(1,0) && INSN(15,12) == BITS4(1,1,0,0)) {
sewardjdf9d6d52014-06-27 10:43:22 +00004745 UInt bitQ = INSN(30,30);
sewardj18bf5172014-06-14 18:05:30 +00004746 Bool isPX = INSN(23,23) == 1;
4747 UInt mm = INSN(20,16);
4748 UInt sz = INSN(11,10);
4749 UInt nn = INSN(9,5);
4750 UInt tt = INSN(4,0);
4751 IRType ty = integerIRTypeOfSize(1 << sz);
4752 IRTemp tEA = newTemp(Ity_I64);
4753 assign(tEA, getIReg64orSP(nn));
4754 if (nn == 31) { /* FIXME generate stack alignment check */ }
4755 IRTemp loaded = newTemp(ty);
4756 assign(loaded, loadLE(ty, mkexpr(tEA)));
4757 IRTemp dupd = math_DUP_TO_V128(loaded, ty);
sewardjdf9d6d52014-06-27 10:43:22 +00004758 putQReg128(tt, math_MAYBE_ZERO_HI64(bitQ, dupd));
4759 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
sewardj18bf5172014-06-14 18:05:30 +00004760 /* Deal with the writeback, if any. */
4761 if (!isPX && mm == BITS5(0,0,0,0,0)) {
4762 /* No writeback. */
4763 DIP("ld1r v%u.%s, [%s]\n", tt, arr, nameIReg64orSP(nn));
4764 return True;
4765 }
4766 if (isPX) {
4767 putIReg64orSP(nn, binop(Iop_Add64, mkexpr(tEA),
4768 mm == BITS5(1,1,1,1,1) ? mkU64(1 << sz)
4769 : getIReg64orZR(mm)));
4770 if (mm == BITS5(1,1,1,1,1)) {
4771 DIP("ld1r v%u.%s, [%s], %s\n", tt, arr,
4772 nameIReg64orSP(nn), nameIReg64orZR(mm));
4773 } else {
4774 DIP("ld1r v%u.%s, [%s], #%u\n", tt, arr,
4775 nameIReg64orSP(nn), 1 << sz);
4776 }
4777 return True;
4778 }
4779 return False;
4780 }
4781
sewardj168c8bd2014-06-25 13:05:23 +00004782 /* -------- LD2/ST2 (multi 2-elem structs, 2 regs, post index) -------- */
sewardj950ca7a2014-04-03 23:03:32 +00004783 /* Only a very few cases. */
4784 /* 31 23 11 9 4
4785 0100 1100 1101 1111 1000 11 n t LD2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4786 0100 1100 1001 1111 1000 11 n t ST2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4787 0100 1100 1101 1111 1000 10 n t LD2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4788 0100 1100 1001 1111 1000 10 n t ST2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4789 */
4790 if ( (insn & 0xFFFFFC00) == 0x4CDF8C00 // LD2 .2d
4791 || (insn & 0xFFFFFC00) == 0x4C9F8C00 // ST2 .2d
4792 || (insn & 0xFFFFFC00) == 0x4CDF8800 // LD2 .4s
4793 || (insn & 0xFFFFFC00) == 0x4C9F8800 // ST2 .4s
4794 ) {
4795 Bool isLD = INSN(22,22) == 1;
4796 UInt rN = INSN(9,5);
4797 UInt vT = INSN(4,0);
4798 IRTemp tEA = newTemp(Ity_I64);
4799 UInt sz = INSN(11,10);
4800 const HChar* name = "??";
4801 assign(tEA, getIReg64orSP(rN));
4802 if (rN == 31) { /* FIXME generate stack alignment check */ }
4803 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4804 IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8));
4805 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4806 IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24));
4807 if (sz == BITS2(1,1)) {
4808 name = "2d";
4809 if (isLD) {
4810 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0));
4811 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_16));
4812 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8));
4813 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_24));
4814 } else {
4815 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64));
4816 storeLE(tEA_16, getQRegLane((vT+0) % 32, 1, Ity_I64));
4817 storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64));
4818 storeLE(tEA_24, getQRegLane((vT+1) % 32, 1, Ity_I64));
4819 }
4820 }
4821 else if (sz == BITS2(1,0)) {
4822 /* Uh, this is ugly. TODO: better. */
4823 name = "4s";
4824 IRExpr* tEA_4 = binop(Iop_Add64, mkexpr(tEA), mkU64(4));
4825 IRExpr* tEA_12 = binop(Iop_Add64, mkexpr(tEA), mkU64(12));
4826 IRExpr* tEA_20 = binop(Iop_Add64, mkexpr(tEA), mkU64(20));
4827 IRExpr* tEA_28 = binop(Iop_Add64, mkexpr(tEA), mkU64(28));
4828 if (isLD) {
4829 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I32, tEA_0));
4830 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I32, tEA_8));
4831 putQRegLane((vT+0) % 32, 2, loadLE(Ity_I32, tEA_16));
4832 putQRegLane((vT+0) % 32, 3, loadLE(Ity_I32, tEA_24));
4833 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I32, tEA_4));
4834 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I32, tEA_12));
4835 putQRegLane((vT+1) % 32, 2, loadLE(Ity_I32, tEA_20));
4836 putQRegLane((vT+1) % 32, 3, loadLE(Ity_I32, tEA_28));
4837 } else {
4838 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I32));
4839 storeLE(tEA_8, getQRegLane((vT+0) % 32, 1, Ity_I32));
4840 storeLE(tEA_16, getQRegLane((vT+0) % 32, 2, Ity_I32));
4841 storeLE(tEA_24, getQRegLane((vT+0) % 32, 3, Ity_I32));
4842 storeLE(tEA_4, getQRegLane((vT+1) % 32, 0, Ity_I32));
4843 storeLE(tEA_12, getQRegLane((vT+1) % 32, 1, Ity_I32));
4844 storeLE(tEA_20, getQRegLane((vT+1) % 32, 2, Ity_I32));
4845 storeLE(tEA_28, getQRegLane((vT+1) % 32, 3, Ity_I32));
4846 }
4847 }
4848 else {
4849 vassert(0); // Can't happen.
4850 }
4851 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32)));
4852 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld2" : "st2",
4853 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
4854 return True;
4855 }
4856
sewardj39f754d2014-06-24 10:26:52 +00004857 /* -------- LD1/ST1 (multi 1-elem structs, 2 regs, no offset) -------- */
sewardj950ca7a2014-04-03 23:03:32 +00004858 /* Only a very few cases. */
4859 /* 31 23
4860 0100 1100 0100 0000 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4861 0100 1100 0000 0000 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4862 */
4863 if ( (insn & 0xFFFFFC00) == 0x4C40A000 // LD1
4864 || (insn & 0xFFFFFC00) == 0x4C00A000 // ST1
4865 ) {
4866 Bool isLD = INSN(22,22) == 1;
4867 UInt rN = INSN(9,5);
4868 UInt vT = INSN(4,0);
4869 IRTemp tEA = newTemp(Ity_I64);
4870 const HChar* name = "16b";
4871 assign(tEA, getIReg64orSP(rN));
4872 if (rN == 31) { /* FIXME generate stack alignment check */ }
4873 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4874 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4875 if (isLD) {
4876 putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
4877 putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
4878 } else {
4879 storeLE(tEA_0, getQReg128((vT+0) % 32));
4880 storeLE(tEA_16, getQReg128((vT+1) % 32));
4881 }
sewardj8a5ed542014-07-15 11:08:42 +00004882 DIP("%s {v%u.%s, v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
4883 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
4884 return True;
4885 }
4886
4887 /* -------- LD1/ST1 (multi 1-elem structs, 2 regs, post index) -------- */
4888 /* Only a very few cases. */
4889 /* 31 23
4890 0100 1100 1101 1111 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP], #32
4891 0100 1100 1001 1111 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP], #32
4892 */
4893 if ( (insn & 0xFFFFFC00) == 0x4CDFA000 // LD1
4894 || (insn & 0xFFFFFC00) == 0x4C9FA000 // ST1
4895 ) {
4896 Bool isLD = INSN(22,22) == 1;
4897 UInt rN = INSN(9,5);
4898 UInt vT = INSN(4,0);
4899 IRTemp tEA = newTemp(Ity_I64);
4900 const HChar* name = "16b";
4901 assign(tEA, getIReg64orSP(rN));
4902 if (rN == 31) { /* FIXME generate stack alignment check */ }
4903 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4904 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4905 if (isLD) {
4906 putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
4907 putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
4908 } else {
4909 storeLE(tEA_0, getQReg128((vT+0) % 32));
4910 storeLE(tEA_16, getQReg128((vT+1) % 32));
4911 }
4912 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32)));
sewardj950ca7a2014-04-03 23:03:32 +00004913 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld1" : "st1",
4914 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
sewardj606c4ba2014-01-26 19:11:14 +00004915 return True;
4916 }
4917
sewardj39f754d2014-06-24 10:26:52 +00004918 /* -------- LD1/ST1 (multi 1-elem structs, 3 regs, no offset) -------- */
4919 /* Only a very few cases. */
4920 /* 31 23
4921 0100 1100 0100 0000 0110 00 n t LD1 {Vt.16b .. V(t+2)%32.16b}, [Xn|SP]
4922 0100 1100 0000 0000 0110 00 n t ST1 {Vt.16b .. V(t+2)%32.16b}, [Xn|SP]
4923 */
4924 if ( (insn & 0xFFFFFC00) == 0x4C406000 // LD1
4925 || (insn & 0xFFFFFC00) == 0x4C006000 // ST1
4926 ) {
4927 Bool isLD = INSN(22,22) == 1;
4928 UInt rN = INSN(9,5);
4929 UInt vT = INSN(4,0);
4930 IRTemp tEA = newTemp(Ity_I64);
4931 const HChar* name = "16b";
4932 assign(tEA, getIReg64orSP(rN));
4933 if (rN == 31) { /* FIXME generate stack alignment check */ }
4934 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4935 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4936 IRExpr* tEA_32 = binop(Iop_Add64, mkexpr(tEA), mkU64(32));
4937 if (isLD) {
4938 putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
4939 putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
4940 putQReg128((vT+2) % 32, loadLE(Ity_V128, tEA_32));
4941 } else {
4942 storeLE(tEA_0, getQReg128((vT+0) % 32));
4943 storeLE(tEA_16, getQReg128((vT+1) % 32));
4944 storeLE(tEA_32, getQReg128((vT+2) % 32));
4945 }
4946 DIP("%s {v%u.%s, v%u.%s, v%u.%s}, [%s], #32\n",
4947 isLD ? "ld1" : "st1",
4948 (vT+0) % 32, name, (vT+1) % 32, name, (vT+2) % 32, name,
4949 nameIReg64orSP(rN));
4950 return True;
4951 }
4952
sewardj168c8bd2014-06-25 13:05:23 +00004953 /* -------- LD3/ST3 (multi 3-elem structs, 3 regs, post index) -------- */
4954 /* Only a very few cases. */
4955 /* 31 23 11 9 4
4956 0100 1100 1101 1111 0100 11 n t LD3 {Vt.2d .. V(t+2)%32.2d}, [Xn|SP], #48
4957 0100 1100 1001 1111 0100 11 n t ST3 {Vt.2d .. V(t+2)%32.2d}, [Xn|SP], #48
4958 */
4959 if ( (insn & 0xFFFFFC00) == 0x4CDF4C00 // LD3 .2d
4960 || (insn & 0xFFFFFC00) == 0x4C9F4C00 // ST3 .2d
4961 ) {
4962 Bool isLD = INSN(22,22) == 1;
4963 UInt rN = INSN(9,5);
4964 UInt vT = INSN(4,0);
4965 IRTemp tEA = newTemp(Ity_I64);
4966 UInt sz = INSN(11,10);
4967 const HChar* name = "??";
4968 assign(tEA, getIReg64orSP(rN));
4969 if (rN == 31) { /* FIXME generate stack alignment check */ }
4970 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4971 IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8));
4972 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4973 IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24));
4974 IRExpr* tEA_32 = binop(Iop_Add64, mkexpr(tEA), mkU64(32));
4975 IRExpr* tEA_40 = binop(Iop_Add64, mkexpr(tEA), mkU64(40));
4976 if (sz == BITS2(1,1)) {
4977 name = "2d";
4978 if (isLD) {
4979 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0));
4980 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_24));
4981 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8));
4982 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_32));
4983 putQRegLane((vT+2) % 32, 0, loadLE(Ity_I64, tEA_16));
4984 putQRegLane((vT+2) % 32, 1, loadLE(Ity_I64, tEA_40));
4985 } else {
4986 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64));
4987 storeLE(tEA_24, getQRegLane((vT+0) % 32, 1, Ity_I64));
4988 storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64));
4989 storeLE(tEA_32, getQRegLane((vT+1) % 32, 1, Ity_I64));
4990 storeLE(tEA_16, getQRegLane((vT+2) % 32, 0, Ity_I64));
4991 storeLE(tEA_40, getQRegLane((vT+2) % 32, 1, Ity_I64));
4992 }
4993 }
4994 else {
4995 vassert(0); // Can't happen.
4996 }
4997 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(48)));
4998 DIP("%s {v%u.%s, v%u.%s, v%u.%s}, [%s], #32\n",
4999 isLD ? "ld3" : "st3",
5000 (vT+0) % 32, name, (vT+1) % 32, name, (vT+2) % 32, name,
5001 nameIReg64orSP(rN));
5002 return True;
5003 }
5004
sewardj7d009132014-02-20 17:43:38 +00005005 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
5006 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
5007 /* 31 29 23 20 14 9 4
5008 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
5009 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
5010 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
5011 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
sewardjbbcf1882014-01-12 12:49:10 +00005012 */
sewardj7d009132014-02-20 17:43:38 +00005013 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
5014 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
5015 && INSN(14,10) == BITS5(1,1,1,1,1)) {
sewardjdc9259c2014-02-27 11:10:19 +00005016 UInt szBlg2 = INSN(31,30);
5017 Bool isLD = INSN(22,22) == 1;
5018 Bool isAcqOrRel = INSN(15,15) == 1;
5019 UInt ss = INSN(20,16);
5020 UInt nn = INSN(9,5);
5021 UInt tt = INSN(4,0);
sewardjbbcf1882014-01-12 12:49:10 +00005022
sewardjdc9259c2014-02-27 11:10:19 +00005023 vassert(szBlg2 < 4);
5024 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
5025 IRType ty = integerIRTypeOfSize(szB);
5026 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
sewardj7d009132014-02-20 17:43:38 +00005027
sewardjdc9259c2014-02-27 11:10:19 +00005028 IRTemp ea = newTemp(Ity_I64);
5029 assign(ea, getIReg64orSP(nn));
5030 /* FIXME generate check that ea is szB-aligned */
sewardj7d009132014-02-20 17:43:38 +00005031
sewardjdc9259c2014-02-27 11:10:19 +00005032 if (isLD && ss == BITS5(1,1,1,1,1)) {
5033 IRTemp res = newTemp(ty);
5034 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
5035 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
5036 if (isAcqOrRel) {
5037 stmt(IRStmt_MBE(Imbe_Fence));
5038 }
5039 DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
5040 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
5041 return True;
5042 }
5043 if (!isLD) {
5044 if (isAcqOrRel) {
5045 stmt(IRStmt_MBE(Imbe_Fence));
5046 }
5047 IRTemp res = newTemp(Ity_I1);
5048 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
5049 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
5050 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
5051 Need to set rS to 1 on failure, 0 on success. */
5052 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
5053 mkU64(1)));
5054 DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
5055 nameIRegOrZR(False, ss),
5056 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
5057 return True;
5058 }
5059 /* else fall through */
5060 }
5061
5062 /* ------------------ LDA{R,RH,RB} ------------------ */
5063 /* ------------------ STL{R,RH,RB} ------------------ */
5064 /* 31 29 23 20 14 9 4
5065 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
5066 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
5067 */
5068 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
5069 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
5070 UInt szBlg2 = INSN(31,30);
5071 Bool isLD = INSN(22,22) == 1;
5072 UInt nn = INSN(9,5);
5073 UInt tt = INSN(4,0);
5074
5075 vassert(szBlg2 < 4);
5076 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
5077 IRType ty = integerIRTypeOfSize(szB);
5078 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
5079
5080 IRTemp ea = newTemp(Ity_I64);
5081 assign(ea, getIReg64orSP(nn));
5082 /* FIXME generate check that ea is szB-aligned */
5083
5084 if (isLD) {
5085 IRTemp res = newTemp(ty);
5086 assign(res, loadLE(ty, mkexpr(ea)));
5087 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
5088 stmt(IRStmt_MBE(Imbe_Fence));
5089 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
5090 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
5091 } else {
5092 stmt(IRStmt_MBE(Imbe_Fence));
5093 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
5094 storeLE(mkexpr(ea), data);
5095 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
5096 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
5097 }
5098 return True;
sewardjbbcf1882014-01-12 12:49:10 +00005099 }
5100
5101 vex_printf("ARM64 front end: load_store\n");
5102 return False;
5103# undef INSN
5104}
5105
5106
5107/*------------------------------------------------------------*/
5108/*--- Control flow and misc instructions ---*/
5109/*------------------------------------------------------------*/
5110
5111static
sewardj65902992014-05-03 21:20:56 +00005112Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
5113 VexArchInfo* archinfo)
sewardjbbcf1882014-01-12 12:49:10 +00005114{
5115# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
5116
5117 /* ---------------------- B cond ----------------------- */
5118 /* 31 24 4 3
5119 0101010 0 imm19 0 cond */
5120 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
5121 UInt cond = INSN(3,0);
5122 ULong uimm64 = INSN(23,5) << 2;
5123 Long simm64 = (Long)sx_to_64(uimm64, 21);
5124 vassert(dres->whatNext == Dis_Continue);
5125 vassert(dres->len == 4);
5126 vassert(dres->continueAt == 0);
5127 vassert(dres->jk_StopHere == Ijk_INVALID);
5128 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
5129 Ijk_Boring,
5130 IRConst_U64(guest_PC_curr_instr + simm64),
5131 OFFB_PC) );
5132 putPC(mkU64(guest_PC_curr_instr + 4));
5133 dres->whatNext = Dis_StopHere;
5134 dres->jk_StopHere = Ijk_Boring;
5135 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
5136 return True;
5137 }
5138
5139 /* -------------------- B{L} uncond -------------------- */
5140 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
5141 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
5142 100101 imm26 B (PC + sxTo64(imm26 << 2))
5143 */
5144 UInt bLink = INSN(31,31);
5145 ULong uimm64 = INSN(25,0) << 2;
5146 Long simm64 = (Long)sx_to_64(uimm64, 28);
5147 if (bLink) {
5148 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
5149 }
5150 putPC(mkU64(guest_PC_curr_instr + simm64));
5151 dres->whatNext = Dis_StopHere;
5152 dres->jk_StopHere = Ijk_Call;
5153 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
5154 guest_PC_curr_instr + simm64);
5155 return True;
5156 }
5157
5158 /* --------------------- B{L} reg --------------------- */
5159 /* 31 24 22 20 15 9 4
5160 1101011 00 10 11111 000000 nn 00000 RET Rn
5161 1101011 00 01 11111 000000 nn 00000 CALL Rn
5162 1101011 00 00 11111 000000 nn 00000 JMP Rn
5163 */
5164 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
5165 && INSN(20,16) == BITS5(1,1,1,1,1)
5166 && INSN(15,10) == BITS6(0,0,0,0,0,0)
5167 && INSN(4,0) == BITS5(0,0,0,0,0)) {
5168 UInt branch_type = INSN(22,21);
5169 UInt nn = INSN(9,5);
5170 if (branch_type == BITS2(1,0) /* RET */) {
5171 putPC(getIReg64orZR(nn));
5172 dres->whatNext = Dis_StopHere;
5173 dres->jk_StopHere = Ijk_Ret;
5174 DIP("ret %s\n", nameIReg64orZR(nn));
5175 return True;
5176 }
5177 if (branch_type == BITS2(0,1) /* CALL */) {
sewardj702054e2014-05-07 11:09:28 +00005178 IRTemp dst = newTemp(Ity_I64);
5179 assign(dst, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005180 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
sewardj702054e2014-05-07 11:09:28 +00005181 putPC(mkexpr(dst));
sewardjbbcf1882014-01-12 12:49:10 +00005182 dres->whatNext = Dis_StopHere;
5183 dres->jk_StopHere = Ijk_Call;
5184 DIP("blr %s\n", nameIReg64orZR(nn));
5185 return True;
5186 }
5187 if (branch_type == BITS2(0,0) /* JMP */) {
5188 putPC(getIReg64orZR(nn));
5189 dres->whatNext = Dis_StopHere;
5190 dres->jk_StopHere = Ijk_Boring;
5191 DIP("jmp %s\n", nameIReg64orZR(nn));
5192 return True;
5193 }
5194 }
5195
5196 /* -------------------- CB{N}Z -------------------- */
5197 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
5198 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
5199 */
5200 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
5201 Bool is64 = INSN(31,31) == 1;
5202 Bool bIfZ = INSN(24,24) == 0;
5203 ULong uimm64 = INSN(23,5) << 2;
5204 UInt rT = INSN(4,0);
5205 Long simm64 = (Long)sx_to_64(uimm64, 21);
5206 IRExpr* cond = NULL;
5207 if (is64) {
5208 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
5209 getIReg64orZR(rT), mkU64(0));
5210 } else {
5211 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
5212 getIReg32orZR(rT), mkU32(0));
5213 }
5214 stmt( IRStmt_Exit(cond,
5215 Ijk_Boring,
5216 IRConst_U64(guest_PC_curr_instr + simm64),
5217 OFFB_PC) );
5218 putPC(mkU64(guest_PC_curr_instr + 4));
5219 dres->whatNext = Dis_StopHere;
5220 dres->jk_StopHere = Ijk_Boring;
5221 DIP("cb%sz %s, 0x%llx\n",
5222 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
5223 guest_PC_curr_instr + simm64);
5224 return True;
5225 }
5226
5227 /* -------------------- TB{N}Z -------------------- */
5228 /* 31 30 24 23 18 5 4
5229 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
5230 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
5231 */
5232 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
5233 UInt b5 = INSN(31,31);
5234 Bool bIfZ = INSN(24,24) == 0;
5235 UInt b40 = INSN(23,19);
5236 UInt imm14 = INSN(18,5);
5237 UInt tt = INSN(4,0);
5238 UInt bitNo = (b5 << 5) | b40;
5239 ULong uimm64 = imm14 << 2;
5240 Long simm64 = sx_to_64(uimm64, 16);
5241 IRExpr* cond
5242 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
5243 binop(Iop_And64,
5244 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
5245 mkU64(1)),
5246 mkU64(0));
5247 stmt( IRStmt_Exit(cond,
5248 Ijk_Boring,
5249 IRConst_U64(guest_PC_curr_instr + simm64),
5250 OFFB_PC) );
5251 putPC(mkU64(guest_PC_curr_instr + 4));
5252 dres->whatNext = Dis_StopHere;
5253 dres->jk_StopHere = Ijk_Boring;
5254 DIP("tb%sz %s, #%u, 0x%llx\n",
5255 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
5256 guest_PC_curr_instr + simm64);
5257 return True;
5258 }
5259
5260 /* -------------------- SVC -------------------- */
5261 /* 11010100 000 imm16 000 01
5262 Don't bother with anything except the imm16==0 case.
5263 */
5264 if (INSN(31,0) == 0xD4000001) {
5265 putPC(mkU64(guest_PC_curr_instr + 4));
5266 dres->whatNext = Dis_StopHere;
5267 dres->jk_StopHere = Ijk_Sys_syscall;
5268 DIP("svc #0\n");
5269 return True;
5270 }
5271
5272 /* ------------------ M{SR,RS} ------------------ */
sewardj6eb5ef82014-07-14 20:39:23 +00005273 /* ---- Cases for TPIDR_EL0 ----
sewardjbbcf1882014-01-12 12:49:10 +00005274 0xD51BD0 010 Rt MSR tpidr_el0, rT
5275 0xD53BD0 010 Rt MRS rT, tpidr_el0
5276 */
5277 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
5278 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
5279 Bool toSys = INSN(21,21) == 0;
5280 UInt tt = INSN(4,0);
5281 if (toSys) {
5282 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
5283 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
5284 } else {
5285 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
5286 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
5287 }
5288 return True;
5289 }
sewardj6eb5ef82014-07-14 20:39:23 +00005290 /* ---- Cases for FPCR ----
sewardjbbcf1882014-01-12 12:49:10 +00005291 0xD51B44 000 Rt MSR fpcr, rT
5292 0xD53B44 000 Rt MSR rT, fpcr
5293 */
5294 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
5295 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
5296 Bool toSys = INSN(21,21) == 0;
5297 UInt tt = INSN(4,0);
5298 if (toSys) {
5299 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
5300 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
5301 } else {
5302 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
5303 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
5304 }
5305 return True;
5306 }
sewardj6eb5ef82014-07-14 20:39:23 +00005307 /* ---- Cases for FPSR ----
sewardj7d009132014-02-20 17:43:38 +00005308 0xD51B44 001 Rt MSR fpsr, rT
5309 0xD53B44 001 Rt MSR rT, fpsr
sewardja0645d52014-06-28 22:11:16 +00005310 The only part of this we model is FPSR.QC. All other bits
5311 are ignored when writing to it and RAZ when reading from it.
sewardjbbcf1882014-01-12 12:49:10 +00005312 */
5313 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
5314 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
5315 Bool toSys = INSN(21,21) == 0;
5316 UInt tt = INSN(4,0);
5317 if (toSys) {
sewardja0645d52014-06-28 22:11:16 +00005318 /* Just deal with FPSR.QC. Make up a V128 value which is
5319 zero if Xt[27] is zero and any other value if Xt[27] is
5320 nonzero. */
5321 IRTemp qc64 = newTemp(Ity_I64);
5322 assign(qc64, binop(Iop_And64,
5323 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
5324 mkU64(1)));
5325 IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
5326 stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
sewardjbbcf1882014-01-12 12:49:10 +00005327 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
5328 } else {
sewardja0645d52014-06-28 22:11:16 +00005329 /* Generate a value which is all zeroes except for bit 27,
5330 which must be zero if QCFLAG is all zeroes and one otherwise. */
sewardj8e91fd42014-07-11 12:05:47 +00005331 IRTemp qcV128 = newTempV128();
sewardja0645d52014-06-28 22:11:16 +00005332 assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
5333 IRTemp qc64 = newTemp(Ity_I64);
5334 assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
5335 unop(Iop_V128to64, mkexpr(qcV128))));
5336 IRExpr* res = binop(Iop_Shl64,
5337 unop(Iop_1Uto64,
5338 binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
5339 mkU8(27));
5340 putIReg64orZR(tt, res);
sewardjbbcf1882014-01-12 12:49:10 +00005341 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
5342 }
5343 return True;
5344 }
sewardj6eb5ef82014-07-14 20:39:23 +00005345 /* ---- Cases for NZCV ----
sewardjbbcf1882014-01-12 12:49:10 +00005346 D51B42 000 Rt MSR nzcv, rT
5347 D53B42 000 Rt MRS rT, nzcv
sewardja0645d52014-06-28 22:11:16 +00005348 The only parts of NZCV that actually exist are bits 31:28, which
5349 are the N Z C and V bits themselves. Hence the flags thunk provides
5350 all the state we need.
sewardjbbcf1882014-01-12 12:49:10 +00005351 */
5352 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
5353 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
5354 Bool toSys = INSN(21,21) == 0;
5355 UInt tt = INSN(4,0);
5356 if (toSys) {
5357 IRTemp t = newTemp(Ity_I64);
5358 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
5359 setFlags_COPY(t);
5360 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
5361 } else {
5362 IRTemp res = newTemp(Ity_I64);
5363 assign(res, mk_arm64g_calculate_flags_nzcv());
5364 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
5365 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
5366 }
5367 return True;
5368 }
sewardj6eb5ef82014-07-14 20:39:23 +00005369 /* ---- Cases for DCZID_EL0 ----
sewardjd512d102014-02-21 14:49:44 +00005370 Don't support arbitrary reads and writes to this register. Just
5371 return the value 16, which indicates that the DC ZVA instruction
5372 is not permitted, so we don't have to emulate it.
5373 D5 3B 00 111 Rt MRS rT, dczid_el0
5374 */
5375 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
5376 UInt tt = INSN(4,0);
5377 putIReg64orZR(tt, mkU64(1<<4));
5378 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
5379 return True;
5380 }
sewardj6eb5ef82014-07-14 20:39:23 +00005381 /* ---- Cases for CTR_EL0 ----
sewardj65902992014-05-03 21:20:56 +00005382 We just handle reads, and make up a value from the D and I line
5383 sizes in the VexArchInfo we are given, and patch in the following
5384 fields that the Foundation model gives ("natively"):
5385 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
5386 D5 3B 00 001 Rt MRS rT, dczid_el0
5387 */
5388 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
5389 UInt tt = INSN(4,0);
5390 /* Need to generate a value from dMinLine_lg2_szB and
5391 dMinLine_lg2_szB. The value in the register is in 32-bit
5392 units, so need to subtract 2 from the values in the
5393 VexArchInfo. We can assume that the values here are valid --
5394 disInstr_ARM64 checks them -- so there's no need to deal with
5395 out-of-range cases. */
5396 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
5397 && archinfo->arm64_dMinLine_lg2_szB <= 17
5398 && archinfo->arm64_iMinLine_lg2_szB >= 2
5399 && archinfo->arm64_iMinLine_lg2_szB <= 17);
5400 UInt val
5401 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
5402 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
5403 putIReg64orZR(tt, mkU64(val));
5404 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
5405 return True;
5406 }
sewardj6eb5ef82014-07-14 20:39:23 +00005407 /* ---- Cases for CNTVCT_EL0 ----
5408 This is a timestamp counter of some sort. Support reads of it only
5409 by passing through to the host.
5410 D5 3B E0 010 Rt MRS Xt, cntvct_el0
5411 */
5412 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
5413 UInt tt = INSN(4,0);
5414 IRTemp val = newTemp(Ity_I64);
5415 IRExpr** args = mkIRExprVec_0();
5416 IRDirty* d = unsafeIRDirty_1_N (
5417 val,
5418 0/*regparms*/,
5419 "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
5420 &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
5421 args
5422 );
5423 /* execute the dirty call, dumping the result in val. */
5424 stmt( IRStmt_Dirty(d) );
5425 putIReg64orZR(tt, mkexpr(val));
5426 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
5427 return True;
5428 }
sewardjbbcf1882014-01-12 12:49:10 +00005429
sewardj65902992014-05-03 21:20:56 +00005430 /* ------------------ IC_IVAU ------------------ */
5431 /* D5 0B 75 001 Rt ic ivau, rT
5432 */
5433 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
5434 /* We will always be provided with a valid iMinLine value. */
5435 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
5436 && archinfo->arm64_iMinLine_lg2_szB <= 17);
5437 /* Round the requested address, in rT, down to the start of the
5438 containing block. */
5439 UInt tt = INSN(4,0);
5440 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
5441 IRTemp addr = newTemp(Ity_I64);
5442 assign( addr, binop( Iop_And64,
5443 getIReg64orZR(tt),
5444 mkU64(~(lineszB - 1))) );
5445 /* Set the invalidation range, request exit-and-invalidate, with
5446 continuation at the next instruction. */
sewardj05f5e012014-05-04 10:52:11 +00005447 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
5448 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj65902992014-05-03 21:20:56 +00005449 /* be paranoid ... */
5450 stmt( IRStmt_MBE(Imbe_Fence) );
5451 putPC(mkU64( guest_PC_curr_instr + 4 ));
5452 dres->whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +00005453 dres->jk_StopHere = Ijk_InvalICache;
sewardj65902992014-05-03 21:20:56 +00005454 DIP("ic ivau, %s\n", nameIReg64orZR(tt));
5455 return True;
5456 }
5457
5458 /* ------------------ DC_CVAU ------------------ */
5459 /* D5 0B 7B 001 Rt dc cvau, rT
5460 */
5461 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
5462 /* Exactly the same scheme as for IC IVAU, except we observe the
sewardj05f5e012014-05-04 10:52:11 +00005463 dMinLine size, and request an Ijk_FlushDCache instead of
5464 Ijk_InvalICache. */
sewardj65902992014-05-03 21:20:56 +00005465 /* We will always be provided with a valid dMinLine value. */
5466 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
5467 && archinfo->arm64_dMinLine_lg2_szB <= 17);
5468 /* Round the requested address, in rT, down to the start of the
5469 containing block. */
5470 UInt tt = INSN(4,0);
5471 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
5472 IRTemp addr = newTemp(Ity_I64);
5473 assign( addr, binop( Iop_And64,
5474 getIReg64orZR(tt),
5475 mkU64(~(lineszB - 1))) );
5476 /* Set the flush range, request exit-and-flush, with
5477 continuation at the next instruction. */
sewardj05f5e012014-05-04 10:52:11 +00005478 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
5479 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj65902992014-05-03 21:20:56 +00005480 /* be paranoid ... */
5481 stmt( IRStmt_MBE(Imbe_Fence) );
5482 putPC(mkU64( guest_PC_curr_instr + 4 ));
5483 dres->whatNext = Dis_StopHere;
5484 dres->jk_StopHere = Ijk_FlushDCache;
5485 DIP("dc cvau, %s\n", nameIReg64orZR(tt));
5486 return True;
5487 }
5488
5489 /* ------------------ ISB, DMB, DSB ------------------ */
sewardjbbcf1882014-01-12 12:49:10 +00005490 if (INSN(31,0) == 0xD5033FDF) {
sewardjd512d102014-02-21 14:49:44 +00005491 stmt(IRStmt_MBE(Imbe_Fence));
sewardjbbcf1882014-01-12 12:49:10 +00005492 DIP("isb\n");
5493 return True;
5494 }
5495 if (INSN(31,0) == 0xD5033BBF) {
sewardjd512d102014-02-21 14:49:44 +00005496 stmt(IRStmt_MBE(Imbe_Fence));
sewardjbbcf1882014-01-12 12:49:10 +00005497 DIP("dmb ish\n");
5498 return True;
5499 }
sewardjab102bd2014-06-04 11:44:45 +00005500 if (INSN(31,0) == 0xD5033ABF) {
5501 stmt(IRStmt_MBE(Imbe_Fence));
5502 DIP("dmb ishst\n");
5503 return True;
5504 }
sewardj65902992014-05-03 21:20:56 +00005505 if (INSN(31,0) == 0xD5033B9F) {
5506 stmt(IRStmt_MBE(Imbe_Fence));
5507 DIP("dsb ish\n");
5508 return True;
5509 }
sewardjbbcf1882014-01-12 12:49:10 +00005510
sewardjdc9259c2014-02-27 11:10:19 +00005511 /* -------------------- NOP -------------------- */
5512 if (INSN(31,0) == 0xD503201F) {
5513 DIP("nop\n");
5514 return True;
5515 }
5516
sewardjbbcf1882014-01-12 12:49:10 +00005517 //fail:
5518 vex_printf("ARM64 front end: branch_etc\n");
5519 return False;
5520# undef INSN
5521}
5522
5523
5524/*------------------------------------------------------------*/
sewardj8e91fd42014-07-11 12:05:47 +00005525/*--- SIMD and FP instructions: helper functions ---*/
sewardjbbcf1882014-01-12 12:49:10 +00005526/*------------------------------------------------------------*/
5527
sewardjd96daf62014-06-15 08:17:35 +00005528/* Some constructors for interleave/deinterleave expressions. */
sewardje520bb32014-02-17 11:00:53 +00005529
sewardjd96daf62014-06-15 08:17:35 +00005530static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
5531 // returns a0 b0
5532 return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
5533}
sewardje520bb32014-02-17 11:00:53 +00005534
sewardjd96daf62014-06-15 08:17:35 +00005535static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
5536 // returns a1 b1
5537 return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
5538}
sewardje520bb32014-02-17 11:00:53 +00005539
sewardjd96daf62014-06-15 08:17:35 +00005540static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
5541 // returns a2 a0 b2 b0
5542 return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
5543}
5544
5545static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
5546 // returns a3 a1 b3 b1
5547 return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
5548}
5549
5550static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
5551 // returns a1 b1 a0 b0
5552 return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
5553}
5554
5555static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
5556 // returns a3 b3 a2 b2
5557 return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
5558}
5559
5560static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
5561 // returns a6 a4 a2 a0 b6 b4 b2 b0
5562 return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
5563}
5564
5565static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
5566 // returns a7 a5 a3 a1 b7 b5 b3 b1
5567 return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
5568}
5569
5570static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
5571 // returns a3 b3 a2 b2 a1 b1 a0 b0
5572 return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
5573}
5574
5575static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
5576 // returns a7 b7 a6 b6 a5 b5 a4 b4
5577 return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
5578}
5579
5580static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
5581 IRTemp bFEDCBA9876543210 ) {
5582 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
5583 return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
5584 mkexpr(bFEDCBA9876543210));
5585}
5586
5587static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
5588 IRTemp bFEDCBA9876543210 ) {
5589 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
5590 return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
5591 mkexpr(bFEDCBA9876543210));
5592}
5593
5594static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
5595 IRTemp bFEDCBA9876543210 ) {
5596 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
5597 return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
5598 mkexpr(bFEDCBA9876543210));
5599}
5600
5601static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
5602 IRTemp bFEDCBA9876543210 ) {
5603 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
5604 return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
5605 mkexpr(bFEDCBA9876543210));
5606}
sewardjecde6972014-02-05 11:01:19 +00005607
sewardjbbcf1882014-01-12 12:49:10 +00005608/* Generate N copies of |bit| in the bottom of a ULong. */
5609static ULong Replicate ( ULong bit, Int N )
5610{
sewardj606c4ba2014-01-26 19:11:14 +00005611 vassert(bit <= 1 && N >= 1 && N < 64);
5612 if (bit == 0) {
5613 return 0;
5614 } else {
5615 /* Careful. This won't work for N == 64. */
5616 return (1ULL << N) - 1;
5617 }
sewardjbbcf1882014-01-12 12:49:10 +00005618}
5619
sewardjfab09142014-02-10 10:28:13 +00005620static ULong Replicate32x2 ( ULong bits32 )
5621{
5622 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
5623 return (bits32 << 32) | bits32;
5624}
5625
5626static ULong Replicate16x4 ( ULong bits16 )
5627{
5628 vassert(0 == (bits16 & ~0xFFFFULL));
5629 return Replicate32x2((bits16 << 16) | bits16);
5630}
5631
5632static ULong Replicate8x8 ( ULong bits8 )
5633{
5634 vassert(0 == (bits8 & ~0xFFULL));
5635 return Replicate16x4((bits8 << 8) | bits8);
5636}
5637
5638/* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
5639 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
5640 is 64. In the former case, the upper 32 bits of the returned value
5641 are guaranteed to be zero. */
sewardjbbcf1882014-01-12 12:49:10 +00005642static ULong VFPExpandImm ( ULong imm8, Int N )
5643{
sewardj606c4ba2014-01-26 19:11:14 +00005644 vassert(imm8 <= 0xFF);
5645 vassert(N == 32 || N == 64);
5646 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
5647 Int F = N - E - 1;
5648 ULong imm8_6 = (imm8 >> 6) & 1;
5649 /* sign: 1 bit */
5650 /* exp: E bits */
5651 /* frac: F bits */
5652 ULong sign = (imm8 >> 7) & 1;
5653 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
5654 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
5655 vassert(sign < (1ULL << 1));
5656 vassert(exp < (1ULL << E));
5657 vassert(frac < (1ULL << F));
5658 vassert(1 + E + F == N);
5659 ULong res = (sign << (E+F)) | (exp << F) | frac;
5660 return res;
sewardjbbcf1882014-01-12 12:49:10 +00005661}
5662
sewardjfab09142014-02-10 10:28:13 +00005663/* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
5664 This might fail, as indicated by the returned Bool. Page 2530 of
5665 the manual. */
5666static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
5667 UInt op, UInt cmode, UInt imm8 )
5668{
5669 vassert(op <= 1);
5670 vassert(cmode <= 15);
5671 vassert(imm8 <= 255);
5672
5673 *res = 0; /* will overwrite iff returning True */
5674
5675 ULong imm64 = 0;
5676 Bool testimm8 = False;
5677
5678 switch (cmode >> 1) {
5679 case 0:
5680 testimm8 = False; imm64 = Replicate32x2(imm8); break;
5681 case 1:
5682 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
5683 case 2:
5684 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
5685 case 3:
5686 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
5687 case 4:
5688 testimm8 = False; imm64 = Replicate16x4(imm8); break;
5689 case 5:
5690 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
5691 case 6:
5692 testimm8 = True;
5693 if ((cmode & 1) == 0)
5694 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
5695 else
5696 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
5697 break;
5698 case 7:
5699 testimm8 = False;
5700 if ((cmode & 1) == 0 && op == 0)
5701 imm64 = Replicate8x8(imm8);
5702 if ((cmode & 1) == 0 && op == 1) {
5703 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
5704 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
5705 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
5706 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
5707 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
5708 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
5709 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
5710 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
5711 }
5712 if ((cmode & 1) == 1 && op == 0) {
5713 ULong imm8_7 = (imm8 >> 7) & 1;
5714 ULong imm8_6 = (imm8 >> 6) & 1;
5715 ULong imm8_50 = imm8 & 63;
5716 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
5717 | ((imm8_6 ^ 1) << (5 + 6 + 19))
5718 | (Replicate(imm8_6, 5) << (6 + 19))
5719 | (imm8_50 << 19);
5720 imm64 = Replicate32x2(imm32);
5721 }
5722 if ((cmode & 1) == 1 && op == 1) {
5723 // imm64 = imm8<7>:NOT(imm8<6>)
5724 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
5725 ULong imm8_7 = (imm8 >> 7) & 1;
5726 ULong imm8_6 = (imm8 >> 6) & 1;
5727 ULong imm8_50 = imm8 & 63;
5728 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
5729 | (Replicate(imm8_6, 8) << 54)
5730 | (imm8_50 << 48);
5731 }
5732 break;
5733 default:
5734 vassert(0);
5735 }
5736
5737 if (testimm8 && imm8 == 0)
5738 return False;
5739
5740 *res = imm64;
5741 return True;
5742}
5743
sewardj606c4ba2014-01-26 19:11:14 +00005744/* Help a bit for decoding laneage for vector operations that can be
5745 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
5746 and SZ bits, typically for vector floating point. */
5747static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
5748 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
5749 /*OUT*/const HChar** arrSpec,
5750 Bool bitQ, Bool bitSZ )
5751{
5752 vassert(bitQ == True || bitQ == False);
5753 vassert(bitSZ == True || bitSZ == False);
5754 if (bitQ && bitSZ) { // 2x64
5755 if (tyI) *tyI = Ity_I64;
5756 if (tyF) *tyF = Ity_F64;
5757 if (nLanes) *nLanes = 2;
5758 if (zeroUpper) *zeroUpper = False;
5759 if (arrSpec) *arrSpec = "2d";
5760 return True;
5761 }
5762 if (bitQ && !bitSZ) { // 4x32
5763 if (tyI) *tyI = Ity_I32;
5764 if (tyF) *tyF = Ity_F32;
5765 if (nLanes) *nLanes = 4;
5766 if (zeroUpper) *zeroUpper = False;
5767 if (arrSpec) *arrSpec = "4s";
5768 return True;
5769 }
5770 if (!bitQ && !bitSZ) { // 2x32
5771 if (tyI) *tyI = Ity_I32;
5772 if (tyF) *tyF = Ity_F32;
5773 if (nLanes) *nLanes = 2;
5774 if (zeroUpper) *zeroUpper = True;
5775 if (arrSpec) *arrSpec = "2s";
5776 return True;
5777 }
5778 // Else impliedly 1x64, which isn't allowed.
5779 return False;
5780}
5781
sewardje520bb32014-02-17 11:00:53 +00005782/* Helper for decoding laneage for shift-style vector operations
5783 that involve an immediate shift amount. */
5784static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
5785 UInt immh, UInt immb )
5786{
5787 vassert(immh < (1<<4));
5788 vassert(immb < (1<<3));
5789 UInt immhb = (immh << 3) | immb;
5790 if (immh & 8) {
5791 if (shift) *shift = 128 - immhb;
5792 if (szBlg2) *szBlg2 = 3;
5793 return True;
5794 }
5795 if (immh & 4) {
5796 if (shift) *shift = 64 - immhb;
5797 if (szBlg2) *szBlg2 = 2;
5798 return True;
5799 }
5800 if (immh & 2) {
5801 if (shift) *shift = 32 - immhb;
5802 if (szBlg2) *szBlg2 = 1;
5803 return True;
5804 }
5805 if (immh & 1) {
5806 if (shift) *shift = 16 - immhb;
5807 if (szBlg2) *szBlg2 = 0;
5808 return True;
5809 }
5810 return False;
5811}
5812
sewardjecde6972014-02-05 11:01:19 +00005813/* Generate IR to fold all lanes of the V128 value in 'src' as
5814 characterised by the operator 'op', and return the result in the
5815 bottom bits of a V128, with all other bits set to zero. */
sewardjdf9d6d52014-06-27 10:43:22 +00005816static IRTemp math_FOLDV ( IRTemp src, IROp op )
sewardjecde6972014-02-05 11:01:19 +00005817{
5818 /* The basic idea is to use repeated applications of Iop_CatEven*
5819 and Iop_CatOdd* operators to 'src' so as to clone each lane into
5820 a complete vector. Then fold all those vectors with 'op' and
5821 zero out all but the least significant lane. */
5822 switch (op) {
5823 case Iop_Min8Sx16: case Iop_Min8Ux16:
sewardjb9aff1e2014-06-15 21:55:33 +00005824 case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
sewardjfab09142014-02-10 10:28:13 +00005825 /* NB: temp naming here is misleading -- the naming is for 8
5826 lanes of 16 bit, whereas what is being operated on is 16
5827 lanes of 8 bits. */
5828 IRTemp x76543210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00005829 IRTemp x76547654 = newTempV128();
5830 IRTemp x32103210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005831 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
5832 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
sewardj8e91fd42014-07-11 12:05:47 +00005833 IRTemp x76767676 = newTempV128();
5834 IRTemp x54545454 = newTempV128();
5835 IRTemp x32323232 = newTempV128();
5836 IRTemp x10101010 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005837 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
5838 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
5839 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
5840 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
sewardj8e91fd42014-07-11 12:05:47 +00005841 IRTemp x77777777 = newTempV128();
5842 IRTemp x66666666 = newTempV128();
5843 IRTemp x55555555 = newTempV128();
5844 IRTemp x44444444 = newTempV128();
5845 IRTemp x33333333 = newTempV128();
5846 IRTemp x22222222 = newTempV128();
5847 IRTemp x11111111 = newTempV128();
5848 IRTemp x00000000 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005849 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
5850 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
5851 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
5852 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
5853 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
5854 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
5855 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
5856 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
5857 /* Naming not misleading after here. */
sewardj8e91fd42014-07-11 12:05:47 +00005858 IRTemp xAllF = newTempV128();
5859 IRTemp xAllE = newTempV128();
5860 IRTemp xAllD = newTempV128();
5861 IRTemp xAllC = newTempV128();
5862 IRTemp xAllB = newTempV128();
5863 IRTemp xAllA = newTempV128();
5864 IRTemp xAll9 = newTempV128();
5865 IRTemp xAll8 = newTempV128();
5866 IRTemp xAll7 = newTempV128();
5867 IRTemp xAll6 = newTempV128();
5868 IRTemp xAll5 = newTempV128();
5869 IRTemp xAll4 = newTempV128();
5870 IRTemp xAll3 = newTempV128();
5871 IRTemp xAll2 = newTempV128();
5872 IRTemp xAll1 = newTempV128();
5873 IRTemp xAll0 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005874 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
5875 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
5876 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
5877 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
5878 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
5879 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
5880 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
5881 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
5882 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
5883 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
5884 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
5885 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
5886 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
5887 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
5888 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
5889 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
sewardj8e91fd42014-07-11 12:05:47 +00005890 IRTemp maxFE = newTempV128();
5891 IRTemp maxDC = newTempV128();
5892 IRTemp maxBA = newTempV128();
5893 IRTemp max98 = newTempV128();
5894 IRTemp max76 = newTempV128();
5895 IRTemp max54 = newTempV128();
5896 IRTemp max32 = newTempV128();
5897 IRTemp max10 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005898 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
5899 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
5900 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
5901 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
5902 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
5903 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
5904 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
5905 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
sewardj8e91fd42014-07-11 12:05:47 +00005906 IRTemp maxFEDC = newTempV128();
5907 IRTemp maxBA98 = newTempV128();
5908 IRTemp max7654 = newTempV128();
5909 IRTemp max3210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005910 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
5911 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
5912 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
5913 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00005914 IRTemp maxFEDCBA98 = newTempV128();
5915 IRTemp max76543210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005916 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
5917 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
sewardj8e91fd42014-07-11 12:05:47 +00005918 IRTemp maxAllLanes = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005919 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
5920 mkexpr(max76543210)));
sewardj8e91fd42014-07-11 12:05:47 +00005921 IRTemp res = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00005922 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
5923 return res;
sewardjecde6972014-02-05 11:01:19 +00005924 }
5925 case Iop_Min16Sx8: case Iop_Min16Ux8:
sewardjb9aff1e2014-06-15 21:55:33 +00005926 case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
sewardjecde6972014-02-05 11:01:19 +00005927 IRTemp x76543210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00005928 IRTemp x76547654 = newTempV128();
5929 IRTemp x32103210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005930 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
5931 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
sewardj8e91fd42014-07-11 12:05:47 +00005932 IRTemp x76767676 = newTempV128();
5933 IRTemp x54545454 = newTempV128();
5934 IRTemp x32323232 = newTempV128();
5935 IRTemp x10101010 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005936 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
5937 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
5938 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
5939 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
sewardj8e91fd42014-07-11 12:05:47 +00005940 IRTemp x77777777 = newTempV128();
5941 IRTemp x66666666 = newTempV128();
5942 IRTemp x55555555 = newTempV128();
5943 IRTemp x44444444 = newTempV128();
5944 IRTemp x33333333 = newTempV128();
5945 IRTemp x22222222 = newTempV128();
5946 IRTemp x11111111 = newTempV128();
5947 IRTemp x00000000 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005948 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
5949 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
5950 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
5951 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
5952 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
5953 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
5954 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
5955 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
sewardj8e91fd42014-07-11 12:05:47 +00005956 IRTemp max76 = newTempV128();
5957 IRTemp max54 = newTempV128();
5958 IRTemp max32 = newTempV128();
5959 IRTemp max10 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005960 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
5961 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
5962 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
5963 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
sewardj8e91fd42014-07-11 12:05:47 +00005964 IRTemp max7654 = newTempV128();
5965 IRTemp max3210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005966 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
5967 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00005968 IRTemp max76543210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005969 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
sewardj8e91fd42014-07-11 12:05:47 +00005970 IRTemp res = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005971 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
5972 return res;
5973 }
5974 case Iop_Min32Sx4: case Iop_Min32Ux4:
sewardjb9aff1e2014-06-15 21:55:33 +00005975 case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
sewardjecde6972014-02-05 11:01:19 +00005976 IRTemp x3210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00005977 IRTemp x3232 = newTempV128();
5978 IRTemp x1010 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005979 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
5980 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
sewardj8e91fd42014-07-11 12:05:47 +00005981 IRTemp x3333 = newTempV128();
5982 IRTemp x2222 = newTempV128();
5983 IRTemp x1111 = newTempV128();
5984 IRTemp x0000 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005985 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
5986 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
5987 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
5988 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
sewardj8e91fd42014-07-11 12:05:47 +00005989 IRTemp max32 = newTempV128();
5990 IRTemp max10 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005991 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
5992 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
sewardj8e91fd42014-07-11 12:05:47 +00005993 IRTemp max3210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005994 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00005995 IRTemp res = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00005996 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
5997 return res;
5998 }
sewardja5a6b752014-06-30 07:33:56 +00005999 case Iop_Add64x2: {
6000 IRTemp x10 = src;
sewardj8e91fd42014-07-11 12:05:47 +00006001 IRTemp x00 = newTempV128();
6002 IRTemp x11 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006003 assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
6004 assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
sewardj8e91fd42014-07-11 12:05:47 +00006005 IRTemp max10 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006006 assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
sewardj8e91fd42014-07-11 12:05:47 +00006007 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006008 assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
6009 return res;
6010 }
sewardjecde6972014-02-05 11:01:19 +00006011 default:
6012 vassert(0);
6013 }
6014}
6015
6016
sewardj92d0ae32014-04-03 13:48:54 +00006017/* Generate IR for TBL and TBX. This deals with the 128 bit case
6018 only. */
6019static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
6020 IRTemp oor_values )
6021{
6022 vassert(len >= 0 && len <= 3);
6023
6024 /* Generate some useful constants as concisely as possible. */
6025 IRTemp half15 = newTemp(Ity_I64);
6026 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
6027 IRTemp half16 = newTemp(Ity_I64);
6028 assign(half16, mkU64(0x1010101010101010ULL));
6029
6030 /* A zero vector */
sewardj8e91fd42014-07-11 12:05:47 +00006031 IRTemp allZero = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006032 assign(allZero, mkV128(0x0000));
6033 /* A vector containing 15 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00006034 IRTemp all15 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006035 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
6036 /* A vector containing 16 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00006037 IRTemp all16 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006038 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
6039 /* A vector containing 32 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00006040 IRTemp all32 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006041 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
6042 /* A vector containing 48 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00006043 IRTemp all48 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006044 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
6045 /* A vector containing 64 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00006046 IRTemp all64 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006047 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
6048
6049 /* Group the 16/32/48/64 vectors so as to be indexable. */
6050 IRTemp allXX[4] = { all16, all32, all48, all64 };
6051
6052 /* Compute the result for each table vector, with zeroes in places
6053 where the index values are out of range, and OR them into the
6054 running vector. */
sewardj8e91fd42014-07-11 12:05:47 +00006055 IRTemp running_result = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006056 assign(running_result, mkV128(0));
6057
6058 UInt tabent;
6059 for (tabent = 0; tabent <= len; tabent++) {
6060 vassert(tabent >= 0 && tabent < 4);
sewardj8e91fd42014-07-11 12:05:47 +00006061 IRTemp bias = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006062 assign(bias,
6063 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
sewardj8e91fd42014-07-11 12:05:47 +00006064 IRTemp biased_indices = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006065 assign(biased_indices,
6066 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
sewardj8e91fd42014-07-11 12:05:47 +00006067 IRTemp valid_mask = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006068 assign(valid_mask,
6069 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
sewardj8e91fd42014-07-11 12:05:47 +00006070 IRTemp safe_biased_indices = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006071 assign(safe_biased_indices,
6072 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
sewardj8e91fd42014-07-11 12:05:47 +00006073 IRTemp results_or_junk = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006074 assign(results_or_junk,
6075 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
6076 mkexpr(safe_biased_indices)));
sewardj8e91fd42014-07-11 12:05:47 +00006077 IRTemp results_or_zero = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006078 assign(results_or_zero,
6079 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
6080 /* And OR that into the running result. */
sewardj8e91fd42014-07-11 12:05:47 +00006081 IRTemp tmp = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006082 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
6083 mkexpr(running_result)));
6084 running_result = tmp;
6085 }
6086
6087 /* So now running_result holds the overall result where the indices
6088 are in range, and zero in out-of-range lanes. Now we need to
6089 compute an overall validity mask and use this to copy in the
6090 lanes in the oor_values for out of range indices. This is
6091 unnecessary for TBL but will get folded out by iropt, so we lean
6092 on that and generate the same code for TBL and TBX here. */
sewardj8e91fd42014-07-11 12:05:47 +00006093 IRTemp overall_valid_mask = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006094 assign(overall_valid_mask,
6095 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
sewardj8e91fd42014-07-11 12:05:47 +00006096 IRTemp result = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00006097 assign(result,
6098 binop(Iop_OrV128,
6099 mkexpr(running_result),
6100 binop(Iop_AndV128,
6101 mkexpr(oor_values),
6102 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
6103 return result;
6104}
6105
6106
sewardj31b5a952014-06-26 07:41:14 +00006107/* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
6108 an op which takes two I64s and produces a V128. That is, a widening
6109 operator. Generate IR which applies |opI64x2toV128| to either the
6110 lower (if |is2| is False) or upper (if |is2| is True) halves of
6111 |argL| and |argR|, and return the value in a new IRTemp.
6112*/
6113static
6114IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
6115 IRExpr* argL, IRExpr* argR )
6116{
sewardj8e91fd42014-07-11 12:05:47 +00006117 IRTemp res = newTempV128();
sewardj31b5a952014-06-26 07:41:14 +00006118 IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
6119 assign(res, binop(opI64x2toV128, unop(slice, argL),
6120 unop(slice, argR)));
6121 return res;
6122}
6123
6124
sewardjdf9d6d52014-06-27 10:43:22 +00006125/* Generate signed/unsigned absolute difference vector IR. */
6126static
6127IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
6128{
sewardj6f312d02014-06-28 12:21:37 +00006129 vassert(size <= 3);
sewardj8e91fd42014-07-11 12:05:47 +00006130 IRTemp argL = newTempV128();
6131 IRTemp argR = newTempV128();
6132 IRTemp msk = newTempV128();
6133 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +00006134 assign(argL, argLE);
6135 assign(argR, argRE);
sewardj8e91fd42014-07-11 12:05:47 +00006136 assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
sewardjdf9d6d52014-06-27 10:43:22 +00006137 mkexpr(argL), mkexpr(argR)));
6138 assign(res,
6139 binop(Iop_OrV128,
6140 binop(Iop_AndV128,
sewardj8e91fd42014-07-11 12:05:47 +00006141 binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
sewardjdf9d6d52014-06-27 10:43:22 +00006142 mkexpr(msk)),
6143 binop(Iop_AndV128,
sewardj8e91fd42014-07-11 12:05:47 +00006144 binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
sewardjdf9d6d52014-06-27 10:43:22 +00006145 unop(Iop_NotV128, mkexpr(msk)))));
6146 return res;
6147}
6148
6149
sewardj6f312d02014-06-28 12:21:37 +00006150/* Generate IR that takes a V128 and sign- or zero-widens
6151 either the lower or upper set of lanes to twice-as-wide,
6152 resulting in a new V128 value. */
6153static
sewardja5a6b752014-06-30 07:33:56 +00006154IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
6155 UInt sizeNarrow, IRExpr* srcE )
sewardj6f312d02014-06-28 12:21:37 +00006156{
sewardj8e91fd42014-07-11 12:05:47 +00006157 IRTemp src = newTempV128();
6158 IRTemp res = newTempV128();
sewardj6f312d02014-06-28 12:21:37 +00006159 assign(src, srcE);
6160 switch (sizeNarrow) {
6161 case X10:
6162 assign(res,
6163 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
6164 binop(fromUpperHalf ? Iop_InterleaveHI32x4
6165 : Iop_InterleaveLO32x4,
6166 mkexpr(src),
6167 mkexpr(src)),
6168 mkU8(32)));
6169 break;
6170 case X01:
6171 assign(res,
6172 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
6173 binop(fromUpperHalf ? Iop_InterleaveHI16x8
6174 : Iop_InterleaveLO16x8,
6175 mkexpr(src),
6176 mkexpr(src)),
6177 mkU8(16)));
6178 break;
6179 case X00:
6180 assign(res,
6181 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
6182 binop(fromUpperHalf ? Iop_InterleaveHI8x16
6183 : Iop_InterleaveLO8x16,
6184 mkexpr(src),
6185 mkexpr(src)),
6186 mkU8(8)));
6187 break;
6188 default:
6189 vassert(0);
6190 }
6191 return res;
6192}
6193
6194
sewardja5a6b752014-06-30 07:33:56 +00006195/* Generate IR that takes a V128 and sign- or zero-widens
6196 either the even or odd lanes to twice-as-wide,
6197 resulting in a new V128 value. */
6198static
6199IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
6200 UInt sizeNarrow, IRExpr* srcE )
6201{
sewardj8e91fd42014-07-11 12:05:47 +00006202 IRTemp src = newTempV128();
6203 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006204 IROp opSAR = mkVecSARN(sizeNarrow+1);
6205 IROp opSHR = mkVecSHRN(sizeNarrow+1);
6206 IROp opSHL = mkVecSHLN(sizeNarrow+1);
6207 IROp opSxR = zWiden ? opSHR : opSAR;
6208 UInt amt = 0;
6209 switch (sizeNarrow) {
6210 case X10: amt = 32; break;
6211 case X01: amt = 16; break;
6212 case X00: amt = 8; break;
6213 default: vassert(0);
6214 }
6215 assign(src, srcE);
6216 if (fromOdd) {
6217 assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
6218 } else {
6219 assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
6220 mkU8(amt)));
6221 }
6222 return res;
6223}
6224
6225
6226/* Generate IR that takes two V128s and narrows (takes lower half)
6227 of each lane, producing a single V128 value. */
6228static
6229IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
6230{
sewardj8e91fd42014-07-11 12:05:47 +00006231 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006232 assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
6233 mkexpr(argHi), mkexpr(argLo)));
6234 return res;
6235}
6236
6237
sewardj487559e2014-07-10 14:22:45 +00006238/* Return a temp which holds the vector dup of the lane of width
6239 (1 << size) obtained from src[laneNo]. */
6240static
6241IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
6242{
6243 vassert(size <= 3);
6244 /* Normalise |laneNo| so it is of the form
6245 x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
6246 This puts the bits we want to inspect at constant offsets
6247 regardless of the value of |size|.
6248 */
6249 UInt ix = laneNo << size;
6250 vassert(ix <= 15);
6251 IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
6252 switch (size) {
6253 case 0: /* B */
6254 ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
6255 /* fallthrough */
6256 case 1: /* H */
6257 ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
6258 /* fallthrough */
6259 case 2: /* S */
6260 ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
6261 /* fallthrough */
6262 case 3: /* D */
6263 ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
6264 break;
6265 default:
6266 vassert(0);
6267 }
sewardj8e91fd42014-07-11 12:05:47 +00006268 IRTemp res = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00006269 assign(res, src);
6270 Int i;
6271 for (i = 3; i >= 0; i--) {
6272 if (ops[i] == Iop_INVALID)
6273 break;
sewardj8e91fd42014-07-11 12:05:47 +00006274 IRTemp tmp = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00006275 assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
6276 res = tmp;
6277 }
6278 return res;
6279}
6280
6281
6282/* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
6283 selector encoded as shown below. Return a new V128 holding the
6284 selected lane from |srcV| dup'd out to V128, and also return the
6285 lane number, log2 of the lane size in bytes, and width-character via
6286 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5
6287 is an invalid selector, in which case return
6288 IRTemp_INVALID, 0, 0 and '?' respectively.
6289
6290 imm5 = xxxx1 signifies .b[xxxx]
6291 = xxx10 .h[xxx]
6292 = xx100 .s[xx]
6293 = x1000 .d[x]
6294 otherwise invalid
6295*/
6296static
6297IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
6298 /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
6299 IRExpr* srcV, UInt imm5 )
6300{
6301 *laneNo = 0;
6302 *laneSzLg2 = 0;
6303 *laneCh = '?';
6304
6305 if (imm5 & 1) {
6306 *laneNo = (imm5 >> 1) & 15;
6307 *laneSzLg2 = 0;
6308 *laneCh = 'b';
6309 }
6310 else if (imm5 & 2) {
6311 *laneNo = (imm5 >> 2) & 7;
6312 *laneSzLg2 = 1;
6313 *laneCh = 'h';
6314 }
6315 else if (imm5 & 4) {
6316 *laneNo = (imm5 >> 3) & 3;
6317 *laneSzLg2 = 2;
6318 *laneCh = 's';
6319 }
6320 else if (imm5 & 8) {
6321 *laneNo = (imm5 >> 4) & 1;
6322 *laneSzLg2 = 3;
6323 *laneCh = 'd';
6324 }
6325 else {
6326 /* invalid */
6327 return IRTemp_INVALID;
6328 }
6329
6330 return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
6331}
6332
6333
6334/* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
6335static
6336IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
6337{
6338 IRType ty = Ity_INVALID;
6339 IRTemp rcS = IRTemp_INVALID;
6340 switch (size) {
6341 case X01:
6342 vassert(imm <= 0xFFFFULL);
6343 ty = Ity_I16;
6344 rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
6345 break;
6346 case X10:
6347 vassert(imm <= 0xFFFFFFFFULL);
6348 ty = Ity_I32;
6349 rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
6350 break;
6351 case X11:
6352 ty = Ity_I64;
6353 rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
6354 default:
6355 vassert(0);
6356 }
6357 IRTemp rcV = math_DUP_TO_V128(rcS, ty);
6358 return rcV;
6359}
6360
6361
sewardj25523c42014-06-15 19:36:29 +00006362/* Let |new64| be a V128 in which only the lower 64 bits are interesting,
6363 and the upper can contain any value -- it is ignored. If |is2| is False,
6364 generate IR to put |new64| in the lower half of vector reg |dd| and zero
6365 the upper half. If |is2| is True, generate IR to put |new64| in the upper
6366 half of vector reg |dd| and leave the lower half unchanged. This
6367 simulates the behaviour of the "foo/foo2" instructions in which the
6368 destination is half the width of sources, for example addhn/addhn2.
6369*/
6370static
6371void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
6372{
6373 if (is2) {
6374 /* Get the old contents of Vdd, zero the upper half, and replace
6375 it with 'x'. */
sewardj8e91fd42014-07-11 12:05:47 +00006376 IRTemp t_zero_oldLO = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00006377 assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
sewardj8e91fd42014-07-11 12:05:47 +00006378 IRTemp t_newHI_zero = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00006379 assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
6380 mkV128(0x0000)));
sewardj8e91fd42014-07-11 12:05:47 +00006381 IRTemp res = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00006382 assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
6383 mkexpr(t_newHI_zero)));
6384 putQReg128(dd, mkexpr(res));
6385 } else {
6386 /* This is simple. */
6387 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
6388 }
6389}
6390
6391
sewardj8e91fd42014-07-11 12:05:47 +00006392/* Compute vector SQABS at lane size |size| for |srcE|, returning
6393 the q result in |*qabs| and the normal result in |*nabs|. */
6394static
6395void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
6396 IRExpr* srcE, UInt size )
6397{
6398 IRTemp src, mask, maskn, nsub, qsub;
6399 src = mask = maskn = nsub = qsub = IRTemp_INVALID;
6400 newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
6401 assign(src, srcE);
6402 assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src)));
6403 assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
6404 assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
6405 assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
6406 assign(*nabs, binop(Iop_OrV128,
6407 binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
6408 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
6409 assign(*qabs, binop(Iop_OrV128,
6410 binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
6411 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
6412}
6413
6414
sewardj51d012a2014-07-21 09:19:50 +00006415/* Compute vector SQNEG at lane size |size| for |srcE|, returning
6416 the q result in |*qneg| and the normal result in |*nneg|. */
6417static
6418void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
6419 IRExpr* srcE, UInt size )
6420{
6421 IRTemp src = IRTemp_INVALID;
6422 newTempsV128_3(&src, nneg, qneg);
6423 assign(src, srcE);
6424 assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
6425 assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
6426}
6427
6428
sewardjecedd982014-08-11 14:02:47 +00006429/* Zero all except the least significant lane of |srcE|, where |size|
6430 indicates the lane size in the usual way. */
sewardj257e99f2014-08-03 12:45:19 +00006431static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
sewardj8e91fd42014-07-11 12:05:47 +00006432{
6433 vassert(size < 4);
6434 IRTemp t = newTempV128();
sewardj51d012a2014-07-21 09:19:50 +00006435 assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
sewardj8e91fd42014-07-11 12:05:47 +00006436 return t;
6437}
6438
6439
sewardj51d012a2014-07-21 09:19:50 +00006440/* Generate IR to compute vector widening MULL from either the lower
6441 (is2==False) or upper (is2==True) halves of vecN and vecM. The
6442 widening multiplies are unsigned when isU==True and signed when
6443 isU==False. |size| is the narrow lane size indication. Optionally,
6444 the product may be added to or subtracted from vecD, at the wide lane
6445 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas|
6446 is 'm' (only multiply) then the accumulate part does not happen, and
6447 |vecD| is expected to == IRTemp_INVALID.
6448
6449 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
6450 are allowed. The result is returned in a new IRTemp, which is
6451 returned in *res. */
6452static
6453void math_MULL_ACC ( /*OUT*/IRTemp* res,
6454 Bool is2, Bool isU, UInt size, HChar mas,
6455 IRTemp vecN, IRTemp vecM, IRTemp vecD )
6456{
6457 vassert(res && *res == IRTemp_INVALID);
6458 vassert(size <= 2);
6459 vassert(mas == 'm' || mas == 'a' || mas == 's');
6460 if (mas == 'm') vassert(vecD == IRTemp_INVALID);
6461 IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
6462 IROp accOp = (mas == 'a') ? mkVecADD(size+1)
6463 : (mas == 's' ? mkVecSUB(size+1)
6464 : Iop_INVALID);
6465 IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp,
6466 mkexpr(vecN), mkexpr(vecM));
6467 *res = newTempV128();
6468 assign(*res, mas == 'm' ? mkexpr(mul)
6469 : binop(accOp, mkexpr(vecD), mkexpr(mul)));
6470}
6471
6472
6473/* Same as math_MULL_ACC, except the multiply is signed widening,
6474 the multiplied value is then doubled, before being added to or
6475 subtracted from the accumulated value. And everything is
6476 saturated. In all cases, saturation residuals are returned
6477 via (sat1q, sat1n), and in the accumulate cases,
6478 via (sat2q, sat2n) too. All results are returned in new temporaries.
6479 In the no-accumulate case, *sat2q and *sat2n are never instantiated,
6480 so the caller can tell this has happened. */
6481static
6482void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
6483 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
6484 /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
6485 Bool is2, UInt size, HChar mas,
6486 IRTemp vecN, IRTemp vecM, IRTemp vecD )
6487{
6488 vassert(size <= 2);
6489 vassert(mas == 'm' || mas == 'a' || mas == 's');
6490 /* Compute
6491 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
6492 sat1n = vecN.D[is2] *s vecM.d[is2] * 2
6493 IOW take either the low or high halves of vecN and vecM, signed widen,
6494 multiply, double that, and signedly saturate. Also compute the same
6495 but without saturation.
6496 */
6497 vassert(sat2q && *sat2q == IRTemp_INVALID);
6498 vassert(sat2n && *sat2n == IRTemp_INVALID);
6499 newTempsV128_3(sat1q, sat1n, res);
6500 IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
6501 mkexpr(vecN), mkexpr(vecM));
6502 IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
6503 mkexpr(vecN), mkexpr(vecM));
6504 assign(*sat1q, mkexpr(tq));
6505 assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
6506
6507 /* If there is no accumulation, the final result is sat1q,
6508 and there's no assignment to sat2q or sat2n. */
6509 if (mas == 'm') {
6510 assign(*res, mkexpr(*sat1q));
6511 return;
6512 }
6513
6514 /* Compute
6515 sat2q = vecD +sq/-sq sat1q
6516 sat2n = vecD +/- sat1n
6517 result = sat2q
6518 */
6519 newTempsV128_2(sat2q, sat2n);
6520 assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
6521 mkexpr(vecD), mkexpr(*sat1q)));
6522 assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
6523 mkexpr(vecD), mkexpr(*sat1n)));
6524 assign(*res, mkexpr(*sat2q));
6525}
6526
6527
sewardj54ffa1d2014-07-22 09:27:49 +00006528/* Generate IR for widening signed vector multiplies. The operands
6529 have their lane width signedly widened, and they are then multiplied
6530 at the wider width, returning results in two new IRTemps. */
sewardja5a6b752014-06-30 07:33:56 +00006531static
sewardj54ffa1d2014-07-22 09:27:49 +00006532void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
6533 UInt sizeNarrow, IRTemp argL, IRTemp argR )
6534{
6535 vassert(sizeNarrow <= 2);
6536 newTempsV128_2(resHI, resLO);
6537 IRTemp argLhi = newTemp(Ity_I64);
6538 IRTemp argLlo = newTemp(Ity_I64);
6539 IRTemp argRhi = newTemp(Ity_I64);
6540 IRTemp argRlo = newTemp(Ity_I64);
6541 assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
6542 assign(argLlo, unop(Iop_V128to64, mkexpr(argL)));
6543 assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
6544 assign(argRlo, unop(Iop_V128to64, mkexpr(argR)));
6545 IROp opMulls = mkVecMULLS(sizeNarrow);
6546 assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
6547 assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
6548}
6549
6550
sewardj257e99f2014-08-03 12:45:19 +00006551/* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
6552 double that, possibly add a rounding constant (R variants), and take
6553 the high half. */
sewardj54ffa1d2014-07-22 09:27:49 +00006554static
6555void math_SQDMULH ( /*OUT*/IRTemp* res,
6556 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
6557 Bool isR, UInt size, IRTemp vN, IRTemp vM )
6558{
6559 vassert(size == X01 || size == X10); /* s or h only */
6560
6561 newTempsV128_3(res, sat1q, sat1n);
6562
6563 IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
6564 math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
6565
6566 IRTemp addWide = mkVecADD(size+1);
6567
6568 if (isR) {
6569 assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
6570
6571 Int rcShift = size == X01 ? 15 : 31;
6572 IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
6573 assign(*sat1n,
6574 binop(mkVecCATODDLANES(size),
6575 binop(addWide,
6576 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
6577 mkexpr(roundConst)),
6578 binop(addWide,
6579 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
6580 mkexpr(roundConst))));
6581 } else {
6582 assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
6583
6584 assign(*sat1n,
6585 binop(mkVecCATODDLANES(size),
6586 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
6587 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
6588 }
6589
6590 assign(*res, mkexpr(*sat1q));
6591}
6592
6593
sewardja97dddf2014-08-14 22:26:52 +00006594/* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
6595 a new temp in *res, and the Q difference pair in new temps in
6596 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
6597 three operations it is. */
6598static
6599void math_QSHL_IMM ( /*OUT*/IRTemp* res,
6600 /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
6601 IRTemp src, UInt size, UInt shift, const HChar* nm )
6602{
6603 vassert(size <= 3);
6604 UInt laneBits = 8 << size;
6605 vassert(shift < laneBits);
6606 newTempsV128_3(res, qDiff1, qDiff2);
6607 IRTemp z128 = newTempV128();
6608 assign(z128, mkV128(0x0000));
6609
6610 /* UQSHL */
6611 if (vex_streq(nm, "uqshl")) {
6612 IROp qop = mkVecQSHLNSATU2U(size);
6613 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
6614 if (shift == 0) {
6615 /* No shift means no saturation. */
6616 assign(*qDiff1, mkexpr(z128));
6617 assign(*qDiff2, mkexpr(z128));
6618 } else {
6619 /* Saturation has occurred if any of the shifted-out bits are
6620 nonzero. We get the shifted-out bits by right-shifting the
6621 original value. */
6622 UInt rshift = laneBits - shift;
6623 vassert(rshift >= 1 && rshift < laneBits);
6624 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
6625 assign(*qDiff2, mkexpr(z128));
6626 }
6627 return;
6628 }
6629
6630 /* SQSHL */
6631 if (vex_streq(nm, "sqshl")) {
6632 IROp qop = mkVecQSHLNSATS2S(size);
6633 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
6634 if (shift == 0) {
6635 /* No shift means no saturation. */
6636 assign(*qDiff1, mkexpr(z128));
6637 assign(*qDiff2, mkexpr(z128));
6638 } else {
6639 /* Saturation has occurred if any of the shifted-out bits are
6640 different from the top bit of the original value. */
6641 UInt rshift = laneBits - 1 - shift;
6642 vassert(rshift >= 0 && rshift < laneBits-1);
6643 /* qDiff1 is the shifted out bits, and the top bit of the original
6644 value, preceded by zeroes. */
6645 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
6646 /* qDiff2 is the top bit of the original value, cloned the
6647 correct number of times. */
6648 assign(*qDiff2, binop(mkVecSHRN(size),
6649 binop(mkVecSARN(size), mkexpr(src),
6650 mkU8(laneBits-1)),
6651 mkU8(rshift)));
6652 /* This also succeeds in comparing the top bit of the original
6653 value to itself, which is a bit stupid, but not wrong. */
6654 }
6655 return;
6656 }
6657
6658 /* SQSHLU */
6659 if (vex_streq(nm, "sqshlu")) {
6660 IROp qop = mkVecQSHLNSATS2U(size);
6661 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
sewardjacc29642014-08-15 05:35:35 +00006662 if (shift == 0) {
6663 /* If there's no shift, saturation depends on the top bit
6664 of the source. */
6665 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
6666 assign(*qDiff2, mkexpr(z128));
6667 } else {
6668 /* Saturation has occurred if any of the shifted-out bits are
6669 nonzero. We get the shifted-out bits by right-shifting the
6670 original value. */
6671 UInt rshift = laneBits - shift;
6672 vassert(rshift >= 1 && rshift < laneBits);
6673 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
6674 assign(*qDiff2, mkexpr(z128));
6675 }
sewardja97dddf2014-08-14 22:26:52 +00006676 return;
6677 }
6678
6679 vassert(0);
6680}
6681
6682
sewardj54ffa1d2014-07-22 09:27:49 +00006683/* QCFLAG tracks the SIMD sticky saturation status. Update the status
6684 thusly: if, after application of |opZHI| to both |qres| and |nres|,
6685 they have the same value, leave QCFLAG unchanged. Otherwise, set it
6686 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128
6687 operators, or Iop_INVALID, in which case |qres| and |nres| are used
6688 unmodified. The presence |opZHI| means this function can be used to
6689 generate QCFLAG update code for both scalar and vector SIMD operations.
6690*/
6691static
6692void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
sewardja5a6b752014-06-30 07:33:56 +00006693{
sewardj8e91fd42014-07-11 12:05:47 +00006694 IRTemp diff = newTempV128();
6695 IRTemp oldQCFLAG = newTempV128();
6696 IRTemp newQCFLAG = newTempV128();
sewardj54ffa1d2014-07-22 09:27:49 +00006697 if (opZHI == Iop_INVALID) {
6698 assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
6699 } else {
sewardj257e99f2014-08-03 12:45:19 +00006700 vassert(opZHI == Iop_ZeroHI64ofV128
6701 || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
sewardj54ffa1d2014-07-22 09:27:49 +00006702 assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
6703 }
sewardja5a6b752014-06-30 07:33:56 +00006704 assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
6705 assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
6706 stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
6707}
6708
6709
sewardj54ffa1d2014-07-22 09:27:49 +00006710/* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
6711 are used unmodified, hence suitable for QCFLAG updates for whole-vector
6712 operations. */
6713static
6714void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
6715{
6716 updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
6717}
6718
6719
sewardj8e91fd42014-07-11 12:05:47 +00006720/*------------------------------------------------------------*/
6721/*--- SIMD and FP instructions ---*/
6722/*------------------------------------------------------------*/
6723
sewardjdf1628c2014-06-10 22:52:05 +00006724static
6725Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
sewardjbbcf1882014-01-12 12:49:10 +00006726{
sewardjab33a7a2014-06-19 22:20:47 +00006727 /* 31 29 23 21 20 15 14 10 9 4
6728 0 q 101110 op2 0 m 0 imm4 0 n d
6729 Decode fields: op2
6730 */
sewardjbbcf1882014-01-12 12:49:10 +00006731# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjab33a7a2014-06-19 22:20:47 +00006732 if (INSN(31,31) != 0
6733 || INSN(29,24) != BITS6(1,0,1,1,1,0)
6734 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
6735 return False;
6736 }
6737 UInt bitQ = INSN(30,30);
6738 UInt op2 = INSN(23,22);
6739 UInt mm = INSN(20,16);
6740 UInt imm4 = INSN(14,11);
6741 UInt nn = INSN(9,5);
6742 UInt dd = INSN(4,0);
6743
6744 if (op2 == BITS2(0,0)) {
6745 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00006746 IRTemp sHi = newTempV128();
6747 IRTemp sLo = newTempV128();
6748 IRTemp res = newTempV128();
sewardjab33a7a2014-06-19 22:20:47 +00006749 assign(sHi, getQReg128(mm));
6750 assign(sLo, getQReg128(nn));
6751 if (bitQ == 1) {
6752 if (imm4 == 0) {
6753 assign(res, mkexpr(sLo));
6754 } else {
6755 vassert(imm4 <= 15);
6756 assign(res,
6757 binop(Iop_OrV128,
6758 binop(Iop_ShlV128, mkexpr(sHi), mkU8(8 * (16-imm4))),
6759 binop(Iop_ShrV128, mkexpr(sLo), mkU8(8 * imm4))));
6760 }
6761 putQReg128(dd, mkexpr(res));
6762 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
6763 } else {
6764 if (imm4 >= 8) return False;
6765 if (imm4 == 0) {
6766 assign(res, mkexpr(sLo));
6767 } else {
6768 assign(res,
6769 binop(Iop_ShrV128,
6770 binop(Iop_InterleaveLO64x2, mkexpr(sHi), mkexpr(sLo)),
6771 mkU8(8 * imm4)));
6772 }
6773 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
6774 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
6775 }
6776 return True;
6777 }
6778
sewardjdf1628c2014-06-10 22:52:05 +00006779 return False;
6780# undef INSN
6781}
sewardjbbcf1882014-01-12 12:49:10 +00006782
sewardjbbcf1882014-01-12 12:49:10 +00006783
sewardjdf1628c2014-06-10 22:52:05 +00006784static
6785Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
6786{
6787 /* 31 29 23 21 20 15 14 12 11 9 4
6788 0 q 001110 op2 0 m 0 len op 00 n d
6789 Decode fields: op2,len,op
sewardjbbcf1882014-01-12 12:49:10 +00006790 */
sewardjdf1628c2014-06-10 22:52:05 +00006791# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6792 if (INSN(31,31) != 0
6793 || INSN(29,24) != BITS6(0,0,1,1,1,0)
6794 || INSN(21,21) != 0
6795 || INSN(15,15) != 0
6796 || INSN(11,10) != BITS2(0,0)) {
6797 return False;
6798 }
6799 UInt bitQ = INSN(30,30);
6800 UInt op2 = INSN(23,22);
6801 UInt mm = INSN(20,16);
6802 UInt len = INSN(14,13);
6803 UInt bitOP = INSN(12,12);
6804 UInt nn = INSN(9,5);
6805 UInt dd = INSN(4,0);
6806
6807 if (op2 == X00) {
6808 /* -------- 00,xx,0 TBL, xx register table -------- */
6809 /* -------- 00,xx,1 TBX, xx register table -------- */
6810 /* 31 28 20 15 14 12 9 4
6811 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
6812 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
6813 where Ta = 16b(q=1) or 8b(q=0)
6814 */
sewardjdf1628c2014-06-10 22:52:05 +00006815 Bool isTBX = bitOP == 1;
6816 /* The out-of-range values to use. */
sewardj8e91fd42014-07-11 12:05:47 +00006817 IRTemp oor_values = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00006818 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
6819 /* src value */
sewardj8e91fd42014-07-11 12:05:47 +00006820 IRTemp src = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00006821 assign(src, getQReg128(mm));
6822 /* The table values */
6823 IRTemp tab[4];
6824 UInt i;
6825 for (i = 0; i <= len; i++) {
6826 vassert(i < 4);
sewardj8e91fd42014-07-11 12:05:47 +00006827 tab[i] = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00006828 assign(tab[i], getQReg128((nn + i) % 32));
6829 }
6830 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
sewardjdf9d6d52014-06-27 10:43:22 +00006831 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
6832 const HChar* Ta = bitQ ==1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +00006833 const HChar* nm = isTBX ? "tbx" : "tbl";
6834 DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
6835 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
6836 return True;
6837 }
6838
6839# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6840 return False;
6841# undef INSN
6842}
6843
6844
6845static
6846Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
6847{
6848# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6849 return False;
6850# undef INSN
6851}
6852
6853
6854static
6855Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
6856{
6857 /* 31 28 23 21 16 11 9 4
6858 0 q u 01110 size 11000 opcode 10 n d
6859 Decode fields: u,size,opcode
6860 */
6861# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6862 if (INSN(31,31) != 0
6863 || INSN(28,24) != BITS5(0,1,1,1,0)
6864 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
6865 return False;
6866 }
6867 UInt bitQ = INSN(30,30);
6868 UInt bitU = INSN(29,29);
6869 UInt size = INSN(23,22);
6870 UInt opcode = INSN(16,12);
6871 UInt nn = INSN(9,5);
6872 UInt dd = INSN(4,0);
6873
sewardja5a6b752014-06-30 07:33:56 +00006874 if (opcode == BITS5(0,0,0,1,1)) {
6875 /* -------- 0,xx,00011 SADDLV -------- */
6876 /* -------- 1,xx,00011 UADDLV -------- */
6877 /* size is the narrow size */
6878 if (size == X11 || (size == X10 && bitQ == 0)) return False;
6879 Bool isU = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +00006880 IRTemp src = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006881 assign(src, getQReg128(nn));
6882 /* The basic plan is to widen the lower half, and if Q = 1,
6883 the upper half too. Add them together (if Q = 1), and in
6884 either case fold with add at twice the lane width.
6885 */
6886 IRExpr* widened
6887 = mkexpr(math_WIDEN_LO_OR_HI_LANES(
6888 isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
6889 if (bitQ == 1) {
6890 widened
6891 = binop(mkVecADD(size+1),
6892 widened,
6893 mkexpr(math_WIDEN_LO_OR_HI_LANES(
6894 isU, True/*fromUpperHalf*/, size, mkexpr(src)))
6895 );
6896 }
6897 /* Now fold. */
sewardj8e91fd42014-07-11 12:05:47 +00006898 IRTemp tWi = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00006899 assign(tWi, widened);
6900 IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
6901 putQReg128(dd, mkexpr(res));
6902 const HChar* arr = nameArr_Q_SZ(bitQ, size);
6903 const HChar ch = "bhsd"[size];
6904 DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
6905 nameQReg128(dd), ch, nameQReg128(nn), arr);
6906 return True;
6907 }
6908
sewardjb9aff1e2014-06-15 21:55:33 +00006909 UInt ix = 0;
6910 /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
6911 else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
6912 else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
6913 /**/
6914 if (ix != 0) {
6915 /* -------- 0,xx,01010: SMAXV -------- (1) */
6916 /* -------- 1,xx,01010: UMAXV -------- (2) */
6917 /* -------- 0,xx,11010: SMINV -------- (3) */
6918 /* -------- 1,xx,11010: UMINV -------- (4) */
6919 /* -------- 0,xx,11011: ADDV -------- (5) */
6920 vassert(ix >= 1 && ix <= 5);
sewardjdf1628c2014-06-10 22:52:05 +00006921 if (size == X11) return False; // 1d,2d cases not allowed
6922 if (size == X10 && bitQ == 0) return False; // 2s case not allowed
sewardjdf1628c2014-06-10 22:52:05 +00006923 const IROp opMAXS[3]
6924 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
6925 const IROp opMAXU[3]
6926 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
sewardjb9aff1e2014-06-15 21:55:33 +00006927 const IROp opMINS[3]
6928 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
6929 const IROp opMINU[3]
6930 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
6931 const IROp opADD[3]
6932 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 };
sewardjdf1628c2014-06-10 22:52:05 +00006933 vassert(size < 3);
sewardjb9aff1e2014-06-15 21:55:33 +00006934 IROp op = Iop_INVALID;
6935 const HChar* nm = NULL;
6936 switch (ix) {
6937 case 1: op = opMAXS[size]; nm = "smaxv"; break;
6938 case 2: op = opMAXU[size]; nm = "umaxv"; break;
6939 case 3: op = opMINS[size]; nm = "sminv"; break;
6940 case 4: op = opMINU[size]; nm = "uminv"; break;
6941 case 5: op = opADD[size]; nm = "addv"; break;
6942 default: vassert(0);
6943 }
6944 vassert(op != Iop_INVALID && nm != NULL);
sewardj8e91fd42014-07-11 12:05:47 +00006945 IRTemp tN1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00006946 assign(tN1, getQReg128(nn));
6947 /* If Q == 0, we're just folding lanes in the lower half of
6948 the value. In which case, copy the lower half of the
6949 source into the upper half, so we can then treat it the
sewardjb9aff1e2014-06-15 21:55:33 +00006950 same as the full width case. Except for the addition case,
6951 in which we have to zero out the upper half. */
sewardj8e91fd42014-07-11 12:05:47 +00006952 IRTemp tN2 = newTempV128();
sewardjb9aff1e2014-06-15 21:55:33 +00006953 assign(tN2, bitQ == 0
6954 ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
6955 : mk_CatEvenLanes64x2(tN1,tN1))
6956 : mkexpr(tN1));
sewardjdf9d6d52014-06-27 10:43:22 +00006957 IRTemp res = math_FOLDV(tN2, op);
sewardjdf1628c2014-06-10 22:52:05 +00006958 if (res == IRTemp_INVALID)
6959 return False; /* means math_MINMAXV
6960 doesn't handle this case yet */
6961 putQReg128(dd, mkexpr(res));
sewardjdf1628c2014-06-10 22:52:05 +00006962 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
6963 IRType laneTy = tys[size];
6964 const HChar* arr = nameArr_Q_SZ(bitQ, size);
6965 DIP("%s %s, %s.%s\n", nm,
6966 nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
6967 return True;
6968 }
6969
6970# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6971 return False;
6972# undef INSN
6973}
6974
6975
6976static
6977Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
6978{
6979 /* 31 28 20 15 14 10 9 4
6980 0 q op 01110000 imm5 0 imm4 1 n d
6981 Decode fields: q,op,imm4
6982 */
6983# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6984 if (INSN(31,31) != 0
6985 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
6986 || INSN(15,15) != 0 || INSN(10,10) != 1) {
6987 return False;
6988 }
6989 UInt bitQ = INSN(30,30);
6990 UInt bitOP = INSN(29,29);
6991 UInt imm5 = INSN(20,16);
6992 UInt imm4 = INSN(14,11);
6993 UInt nn = INSN(9,5);
6994 UInt dd = INSN(4,0);
6995
6996 /* -------- x,0,0000: DUP (element, vector) -------- */
6997 /* 31 28 20 15 9 4
6998 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
6999 */
7000 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
sewardj487559e2014-07-10 14:22:45 +00007001 UInt laneNo = 0;
7002 UInt laneSzLg2 = 0;
7003 HChar laneCh = '?';
7004 IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
7005 getQReg128(nn), imm5);
7006 if (res == IRTemp_INVALID)
7007 return False;
7008 if (bitQ == 0 && laneSzLg2 == X11)
7009 return False; /* .1d case */
7010 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
7011 const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
7012 DIP("dup %s.%s, %s.%c[%u]\n",
7013 nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
7014 return True;
sewardjdf1628c2014-06-10 22:52:05 +00007015 }
7016
7017 /* -------- x,0,0001: DUP (general, vector) -------- */
7018 /* 31 28 20 15 9 4
7019 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
7020 Q=0 writes 64, Q=1 writes 128
7021 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
7022 xxx10 4H(q=0) or 8H(q=1), R=W
7023 xx100 2S(q=0) or 4S(q=1), R=W
7024 x1000 Invalid(q=0) or 2D(q=1), R=X
7025 x0000 Invalid(q=0) or Invalid(q=1)
7026 Require op=0, imm4=0001
7027 */
7028 if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
7029 Bool isQ = bitQ == 1;
7030 IRTemp w0 = newTemp(Ity_I64);
7031 const HChar* arT = "??";
7032 IRType laneTy = Ity_INVALID;
7033 if (imm5 & 1) {
7034 arT = isQ ? "16b" : "8b";
7035 laneTy = Ity_I8;
7036 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
7037 }
7038 else if (imm5 & 2) {
7039 arT = isQ ? "8h" : "4h";
7040 laneTy = Ity_I16;
7041 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
7042 }
7043 else if (imm5 & 4) {
7044 arT = isQ ? "4s" : "2s";
7045 laneTy = Ity_I32;
7046 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
7047 }
7048 else if ((imm5 & 8) && isQ) {
7049 arT = "2d";
7050 laneTy = Ity_I64;
7051 assign(w0, getIReg64orZR(nn));
7052 }
7053 else {
7054 /* invalid; leave laneTy unchanged. */
7055 }
7056 /* */
7057 if (laneTy != Ity_INVALID) {
7058 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
7059 putQReg128(dd, binop(Iop_64HLtoV128,
7060 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
7061 DIP("dup %s.%s, %s\n",
7062 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
7063 return True;
7064 }
sewardj787a67f2014-06-23 09:09:41 +00007065 /* invalid */
7066 return False;
sewardjdf1628c2014-06-10 22:52:05 +00007067 }
7068
7069 /* -------- 1,0,0011: INS (general) -------- */
7070 /* 31 28 20 15 9 4
7071 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
7072 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
7073 xxx10 -> H, xxx
7074 xx100 -> S, xx
7075 x1000 -> D, x
7076 */
7077 if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
7078 HChar ts = '?';
7079 UInt laneNo = 16;
7080 IRExpr* src = NULL;
7081 if (imm5 & 1) {
7082 src = unop(Iop_64to8, getIReg64orZR(nn));
7083 laneNo = (imm5 >> 1) & 15;
7084 ts = 'b';
7085 }
7086 else if (imm5 & 2) {
7087 src = unop(Iop_64to16, getIReg64orZR(nn));
7088 laneNo = (imm5 >> 2) & 7;
7089 ts = 'h';
7090 }
7091 else if (imm5 & 4) {
7092 src = unop(Iop_64to32, getIReg64orZR(nn));
7093 laneNo = (imm5 >> 3) & 3;
7094 ts = 's';
7095 }
7096 else if (imm5 & 8) {
7097 src = getIReg64orZR(nn);
7098 laneNo = (imm5 >> 4) & 1;
7099 ts = 'd';
7100 }
7101 /* */
7102 if (src) {
7103 vassert(laneNo < 16);
7104 putQRegLane(dd, laneNo, src);
7105 DIP("ins %s.%c[%u], %s\n",
7106 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
7107 return True;
7108 }
sewardj787a67f2014-06-23 09:09:41 +00007109 /* invalid */
7110 return False;
sewardjdf1628c2014-06-10 22:52:05 +00007111 }
7112
7113 /* -------- x,0,0101: SMOV -------- */
7114 /* -------- x,0,0111: UMOV -------- */
7115 /* 31 28 20 15 9 4
7116 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
7117 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
7118 dest is Xd when q==1, Wd when q==0
7119 UMOV:
7120 Ts,index,ops = case q:imm5 of
7121 0:xxxx1 -> B, xxxx, 8Uto64
7122 1:xxxx1 -> invalid
7123 0:xxx10 -> H, xxx, 16Uto64
7124 1:xxx10 -> invalid
7125 0:xx100 -> S, xx, 32Uto64
7126 1:xx100 -> invalid
7127 1:x1000 -> D, x, copy64
7128 other -> invalid
7129 SMOV:
7130 Ts,index,ops = case q:imm5 of
7131 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
7132 1:xxxx1 -> B, xxxx, 8Sto64
7133 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
7134 1:xxx10 -> H, xxx, 16Sto64
7135 0:xx100 -> invalid
7136 1:xx100 -> S, xx, 32Sto64
7137 1:x1000 -> invalid
7138 other -> invalid
7139 */
7140 if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
7141 Bool isU = (imm4 & 2) == 2;
7142 const HChar* arTs = "??";
7143 UInt laneNo = 16; /* invalid */
7144 // Setting 'res' to non-NULL determines valid/invalid
7145 IRExpr* res = NULL;
7146 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
7147 laneNo = (imm5 >> 1) & 15;
7148 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
7149 res = isU ? unop(Iop_8Uto64, lane)
7150 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
7151 arTs = "b";
7152 }
7153 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
7154 laneNo = (imm5 >> 1) & 15;
7155 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
7156 res = isU ? NULL
7157 : unop(Iop_8Sto64, lane);
7158 arTs = "b";
7159 }
7160 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
7161 laneNo = (imm5 >> 2) & 7;
7162 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
7163 res = isU ? unop(Iop_16Uto64, lane)
7164 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
7165 arTs = "h";
7166 }
7167 else if (bitQ && (imm5 & 2)) { // 1:xxx10
7168 laneNo = (imm5 >> 2) & 7;
7169 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
7170 res = isU ? NULL
7171 : unop(Iop_16Sto64, lane);
7172 arTs = "h";
7173 }
7174 else if (!bitQ && (imm5 & 4)) { // 0:xx100
7175 laneNo = (imm5 >> 3) & 3;
7176 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
7177 res = isU ? unop(Iop_32Uto64, lane)
7178 : NULL;
7179 arTs = "s";
7180 }
7181 else if (bitQ && (imm5 & 4)) { // 1:xxx10
7182 laneNo = (imm5 >> 3) & 3;
7183 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
7184 res = isU ? NULL
7185 : unop(Iop_32Sto64, lane);
7186 arTs = "s";
7187 }
7188 else if (bitQ && (imm5 & 8)) { // 1:x1000
7189 laneNo = (imm5 >> 4) & 1;
7190 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
7191 res = isU ? lane
7192 : NULL;
7193 arTs = "d";
7194 }
7195 /* */
7196 if (res) {
7197 vassert(laneNo < 16);
7198 putIReg64orZR(dd, res);
7199 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
7200 nameIRegOrZR(bitQ == 1, dd),
7201 nameQReg128(nn), arTs, laneNo);
7202 return True;
7203 }
sewardj787a67f2014-06-23 09:09:41 +00007204 /* invalid */
7205 return False;
7206 }
7207
7208 /* -------- 1,1,xxxx: INS (element) -------- */
7209 /* 31 28 20 14 9 4
7210 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
7211 where Ts,ix1,ix2
7212 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
7213 xxx10 -> H, xxx, imm4[3:1]
7214 xx100 -> S, xx, imm4[3:2]
7215 x1000 -> D, x, imm4[3:3]
7216 */
7217 if (bitQ == 1 && bitOP == 1) {
7218 HChar ts = '?';
7219 IRType ity = Ity_INVALID;
7220 UInt ix1 = 16;
7221 UInt ix2 = 16;
7222 if (imm5 & 1) {
7223 ts = 'b';
7224 ity = Ity_I8;
7225 ix1 = (imm5 >> 1) & 15;
7226 ix2 = (imm4 >> 0) & 15;
7227 }
7228 else if (imm5 & 2) {
7229 ts = 'h';
7230 ity = Ity_I16;
7231 ix1 = (imm5 >> 2) & 7;
7232 ix2 = (imm4 >> 1) & 7;
7233 }
7234 else if (imm5 & 4) {
7235 ts = 's';
7236 ity = Ity_I32;
7237 ix1 = (imm5 >> 3) & 3;
7238 ix2 = (imm4 >> 2) & 3;
7239 }
7240 else if (imm5 & 8) {
7241 ts = 'd';
7242 ity = Ity_I64;
7243 ix1 = (imm5 >> 4) & 1;
7244 ix2 = (imm4 >> 3) & 1;
7245 }
7246 /* */
7247 if (ity != Ity_INVALID) {
7248 vassert(ix1 < 16);
7249 vassert(ix2 < 16);
7250 putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
7251 DIP("ins %s.%c[%u], %s.%c[%u]\n",
7252 nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
7253 return True;
7254 }
7255 /* invalid */
7256 return False;
sewardjdf1628c2014-06-10 22:52:05 +00007257 }
7258
7259 return False;
7260# undef INSN
7261}
7262
7263
7264static
7265Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
7266{
7267 /* 31 28 18 15 11 9 4
7268 0q op 01111 00000 abc cmode 01 defgh d
sewardj2b6fd5e2014-06-19 14:21:37 +00007269 Decode fields: q,op,cmode
7270 Bit 11 is really "o2", but it is always zero.
sewardjdf1628c2014-06-10 22:52:05 +00007271 */
7272# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7273 if (INSN(31,31) != 0
7274 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
7275 || INSN(11,10) != BITS2(0,1)) {
7276 return False;
7277 }
7278 UInt bitQ = INSN(30,30);
7279 UInt bitOP = INSN(29,29);
7280 UInt cmode = INSN(15,12);
7281 UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
7282 UInt dd = INSN(4,0);
7283
sewardjdf1628c2014-06-10 22:52:05 +00007284 ULong imm64lo = 0;
7285 UInt op_cmode = (bitOP << 4) | cmode;
7286 Bool ok = False;
sewardj2b6fd5e2014-06-19 14:21:37 +00007287 Bool isORR = False;
7288 Bool isBIC = False;
sewardj787a67f2014-06-23 09:09:41 +00007289 Bool isMOV = False;
7290 Bool isMVN = False;
7291 Bool isFMOV = False;
sewardjdf1628c2014-06-10 22:52:05 +00007292 switch (op_cmode) {
sewardj2b6fd5e2014-06-19 14:21:37 +00007293 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
sewardj2b6fd5e2014-06-19 14:21:37 +00007294 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
sewardj787a67f2014-06-23 09:09:41 +00007295 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
7296 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
7297 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
7298 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
7299 ok = True; isMOV = True; break;
sewardj2b6fd5e2014-06-19 14:21:37 +00007300
7301 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
7302 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
7303 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
7304 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
7305 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
7306 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
7307 ok = True; isORR = True; break;
7308
sewardj787a67f2014-06-23 09:09:41 +00007309 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
7310 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
7311 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
7312 ok = True; isMOV = True; break;
7313
7314 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
7315 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
7316 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
7317 ok = True; isORR = True; break;
7318
7319 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
7320 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
7321 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
7322 ok = True; isMOV = True; break;
7323
7324 /* -------- x,0,1110 MOVI 8-bit -------- */
7325 case BITS5(0,1,1,1,0):
7326 ok = True; isMOV = True; break;
7327
7328 /* FMOV (vector, immediate, single precision) */
7329
7330 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
7331 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
7332 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
7333 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
7334 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
7335 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
7336 ok = True; isMVN = True; break;
7337
sewardj2b6fd5e2014-06-19 14:21:37 +00007338 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
7339 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
7340 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
7341 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
7342 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
7343 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
7344 ok = True; isBIC = True; break;
7345
sewardj787a67f2014-06-23 09:09:41 +00007346 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
7347 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
7348 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
7349 ok = True; isMVN = True; break;
7350
7351 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
7352 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
7353 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
7354 ok = True; isBIC = True; break;
7355
7356 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
7357 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
7358 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
7359 ok = True; isMVN = True; break;
7360
7361 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
7362 /* -------- 1,1,1110 MOVI 64-bit vector -------- */
7363 case BITS5(1,1,1,1,0):
7364 ok = True; isMOV = True; break;
7365
7366 /* -------- 1,1,1111 FMOV (vector, immediate) -------- */
7367 case BITS5(1,1,1,1,1): // 1:1111
7368 ok = bitQ == 1; isFMOV = True; break;
7369
sewardjdf1628c2014-06-10 22:52:05 +00007370 default:
7371 break;
7372 }
7373 if (ok) {
sewardj787a67f2014-06-23 09:09:41 +00007374 vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
7375 + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
sewardjdf1628c2014-06-10 22:52:05 +00007376 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
7377 }
7378 if (ok) {
sewardj2b6fd5e2014-06-19 14:21:37 +00007379 if (isORR || isBIC) {
7380 ULong inv
7381 = isORR ? 0ULL : ~0ULL;
7382 IRExpr* immV128
7383 = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
7384 IRExpr* res
7385 = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
sewardj2b6fd5e2014-06-19 14:21:37 +00007386 const HChar* nm = isORR ? "orr" : "bic";
7387 if (bitQ == 0) {
7388 putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
7389 DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
7390 } else {
7391 putQReg128(dd, res);
7392 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
7393 nameQReg128(dd), imm64lo, imm64lo);
7394 }
sewardj787a67f2014-06-23 09:09:41 +00007395 }
7396 else if (isMOV || isMVN || isFMOV) {
7397 if (isMVN) imm64lo = ~imm64lo;
7398 ULong imm64hi = bitQ == 0 ? 0 : imm64lo;
sewardj8e91fd42014-07-11 12:05:47 +00007399 IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
7400 mkU64(imm64lo));
sewardj2b6fd5e2014-06-19 14:21:37 +00007401 putQReg128(dd, immV128);
7402 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
7403 }
sewardjdf1628c2014-06-10 22:52:05 +00007404 return True;
7405 }
7406 /* else fall through */
7407
7408 return False;
7409# undef INSN
7410}
7411
7412
7413static
7414Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
7415{
sewardjab33a7a2014-06-19 22:20:47 +00007416 /* 31 28 20 15 14 10 9 4
7417 01 op 11110000 imm5 0 imm4 1 n d
7418 Decode fields: op,imm4
7419 */
sewardjdf1628c2014-06-10 22:52:05 +00007420# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjab33a7a2014-06-19 22:20:47 +00007421 if (INSN(31,30) != BITS2(0,1)
7422 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
7423 || INSN(15,15) != 0 || INSN(10,10) != 1) {
7424 return False;
7425 }
7426 UInt bitOP = INSN(29,29);
7427 UInt imm5 = INSN(20,16);
7428 UInt imm4 = INSN(14,11);
7429 UInt nn = INSN(9,5);
7430 UInt dd = INSN(4,0);
7431
7432 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
7433 /* -------- 0,0000 DUP (element, scalar) -------- */
7434 IRTemp w0 = newTemp(Ity_I64);
7435 const HChar* arTs = "??";
7436 IRType laneTy = Ity_INVALID;
7437 UInt laneNo = 16; /* invalid */
7438 if (imm5 & 1) {
7439 arTs = "b";
7440 laneNo = (imm5 >> 1) & 15;
7441 laneTy = Ity_I8;
7442 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
7443 }
7444 else if (imm5 & 2) {
7445 arTs = "h";
7446 laneNo = (imm5 >> 2) & 7;
7447 laneTy = Ity_I16;
7448 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
7449 }
7450 else if (imm5 & 4) {
7451 arTs = "s";
7452 laneNo = (imm5 >> 3) & 3;
7453 laneTy = Ity_I32;
7454 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
7455 }
7456 else if (imm5 & 8) {
7457 arTs = "d";
7458 laneNo = (imm5 >> 4) & 1;
7459 laneTy = Ity_I64;
7460 assign(w0, getQRegLane(nn, laneNo, laneTy));
7461 }
7462 else {
7463 /* invalid; leave laneTy unchanged. */
7464 }
7465 /* */
7466 if (laneTy != Ity_INVALID) {
7467 vassert(laneNo < 16);
7468 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
7469 DIP("dup %s, %s.%s[%u]\n",
7470 nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
7471 return True;
7472 }
7473 /* else fall through */
7474 }
7475
sewardjdf1628c2014-06-10 22:52:05 +00007476 return False;
7477# undef INSN
7478}
7479
sewardjfc83d2c2014-06-12 10:15:46 +00007480
sewardjdf1628c2014-06-10 22:52:05 +00007481static
7482Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn)
7483{
sewardjb9aff1e2014-06-15 21:55:33 +00007484 /* 31 28 23 21 16 11 9 4
7485 01 u 11110 sz 11000 opcode 10 n d
7486 Decode fields: u,sz,opcode
7487 */
sewardjdf1628c2014-06-10 22:52:05 +00007488# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjb9aff1e2014-06-15 21:55:33 +00007489 if (INSN(31,30) != BITS2(0,1)
7490 || INSN(28,24) != BITS5(1,1,1,1,0)
7491 || INSN(21,17) != BITS5(1,1,0,0,0)
7492 || INSN(11,10) != BITS2(1,0)) {
7493 return False;
7494 }
7495 UInt bitU = INSN(29,29);
7496 UInt sz = INSN(23,22);
7497 UInt opcode = INSN(16,12);
7498 UInt nn = INSN(9,5);
7499 UInt dd = INSN(4,0);
7500
7501 if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
7502 /* -------- 0,11,11011 ADDP d_2d -------- */
sewardj8e91fd42014-07-11 12:05:47 +00007503 IRTemp xy = newTempV128();
7504 IRTemp xx = newTempV128();
sewardjb9aff1e2014-06-15 21:55:33 +00007505 assign(xy, getQReg128(nn));
7506 assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
7507 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
7508 binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
7509 DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
7510 return True;
7511 }
7512
sewardjdf1628c2014-06-10 22:52:05 +00007513 return False;
7514# undef INSN
7515}
7516
sewardjfc83d2c2014-06-12 10:15:46 +00007517
sewardjdf1628c2014-06-10 22:52:05 +00007518static
7519Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
7520{
7521 /* 31 28 22 18 15 10 9 4
7522 01 u 111110 immh immb opcode 1 n d
7523 Decode fields: u,immh,opcode
7524 */
7525# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7526 if (INSN(31,30) != BITS2(0,1)
7527 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
7528 return False;
7529 }
7530 UInt bitU = INSN(29,29);
7531 UInt immh = INSN(22,19);
7532 UInt immb = INSN(18,16);
7533 UInt opcode = INSN(15,11);
7534 UInt nn = INSN(9,5);
7535 UInt dd = INSN(4,0);
7536 UInt immhb = (immh << 3) | immb;
7537
sewardjfc83d2c2014-06-12 10:15:46 +00007538 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,0,0,0,0)) {
7539 /* -------- 1,1xxx,00000 SHR d_d_#imm -------- */
7540 UInt sh = 128 - immhb;
7541 vassert(sh >= 1 && sh <= 64);
7542 /* Don't generate an out of range IR shift */
7543 putQReg128(dd, sh == 64
7544 ? mkV128(0x0000)
7545 : unop(Iop_ZeroHI64ofV128,
7546 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
7547 DIP("shr d%u, d%u, #%u\n", dd, nn, sh);
7548 return True;
7549 }
7550
sewardj8e91fd42014-07-11 12:05:47 +00007551 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
7552 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
7553 UInt sh = 128 - immhb;
7554 vassert(sh >= 1 && sh <= 64);
7555 if (sh == 64) {
7556 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
7557 } else {
7558 /* sh is in range 1 .. 63 */
7559 ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
7560 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
7561 IRTemp res = newTempV128();
7562 assign(res, binop(Iop_OrV128,
7563 binop(Iop_AndV128, getQReg128(dd), nmaskV),
7564 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
7565 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
7566 }
7567 DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
7568 return True;
7569 }
7570
sewardjacc29642014-08-15 05:35:35 +00007571 if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
7572 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
7573 UInt sh = immhb - 64;
7574 vassert(sh >= 0 && sh < 64);
7575 putQReg128(dd,
7576 unop(Iop_ZeroHI64ofV128,
7577 sh == 0 ? getQReg128(nn)
7578 : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
7579 DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
7580 return True;
7581 }
7582
sewardj8e91fd42014-07-11 12:05:47 +00007583 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
7584 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
7585 UInt sh = immhb - 64;
7586 vassert(sh >= 0 && sh < 64);
7587 if (sh == 0) {
7588 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
7589 } else {
7590 /* sh is in range 1 .. 63 */
7591 ULong nmask = (1ULL << sh) - 1;
7592 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
7593 IRTemp res = newTempV128();
7594 assign(res, binop(Iop_OrV128,
7595 binop(Iop_AndV128, getQReg128(dd), nmaskV),
7596 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
7597 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
7598 }
7599 DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
7600 return True;
7601 }
7602
sewardjacc29642014-08-15 05:35:35 +00007603 if (opcode == BITS5(0,1,1,1,0)
7604 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
7605 /* -------- 0,01110 SQSHL #imm -------- */
7606 /* -------- 1,01110 UQSHL #imm -------- */
7607 /* -------- 1,01100 SQSHLU #imm -------- */
7608 UInt size = 0;
7609 UInt shift = 0;
7610 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
7611 if (!ok) return False;
7612 vassert(size >= 0 && size <= 3);
7613 /* The shift encoding has opposite sign for the leftwards case.
7614 Adjust shift to compensate. */
7615 UInt lanebits = 8 << size;
7616 shift = lanebits - shift;
7617 vassert(shift >= 0 && shift < lanebits);
7618 const HChar* nm = NULL;
7619 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
7620 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
7621 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
7622 else vassert(0);
7623 IRTemp qDiff1 = IRTemp_INVALID;
7624 IRTemp qDiff2 = IRTemp_INVALID;
7625 IRTemp res = IRTemp_INVALID;
7626 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
7627 /* This relies on the fact that the zeroed out lanes generate zeroed
7628 result lanes and don't saturate, so there's no point in trimming
7629 the resulting res, qDiff1 or qDiff2 values. */
7630 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
7631 putQReg128(dd, mkexpr(res));
7632 updateQCFLAGwithDifference(qDiff1, qDiff2);
7633 const HChar arr = "bhsd"[size];
7634 DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
7635 return True;
7636 }
7637
sewardje741d162014-08-13 13:10:47 +00007638 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
7639 || (bitU == 1
7640 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
7641 /* -------- 0,10010 SQSHRN #imm -------- */
7642 /* -------- 1,10010 UQSHRN #imm -------- */
7643 /* -------- 0,10011 SQRSHRN #imm -------- */
7644 /* -------- 1,10011 UQRSHRN #imm -------- */
7645 /* -------- 1,10000 SQSHRUN #imm -------- */
7646 /* -------- 1,10001 SQRSHRUN #imm -------- */
7647 UInt size = 0;
7648 UInt shift = 0;
7649 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
7650 if (!ok || size == X11) return False;
7651 vassert(size >= X00 && size <= X10);
7652 vassert(shift >= 1 && shift <= (8 << size));
7653 const HChar* nm = "??";
7654 IROp op = Iop_INVALID;
7655 /* Decide on the name and the operation. */
7656 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
7657 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
7658 }
7659 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
7660 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
7661 }
7662 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
7663 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
7664 }
7665 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
7666 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
7667 }
7668 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
7669 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
7670 }
7671 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
7672 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
7673 }
7674 else vassert(0);
7675 /* Compute the result (Q, shifted value) pair. */
7676 IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
7677 IRTemp pair = newTempV128();
7678 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
7679 /* Update the result reg */
7680 IRTemp res64in128 = newTempV128();
7681 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
7682 putQReg128(dd, mkexpr(res64in128));
7683 /* Update the Q flag. */
7684 IRTemp q64q64 = newTempV128();
7685 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
7686 IRTemp z128 = newTempV128();
7687 assign(z128, mkV128(0x0000));
7688 updateQCFLAGwithDifference(q64q64, z128);
7689 /* */
7690 const HChar arrNarrow = "bhsd"[size];
7691 const HChar arrWide = "bhsd"[size+1];
7692 DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
7693 return True;
7694 }
7695
sewardjdf1628c2014-06-10 22:52:05 +00007696# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7697 return False;
7698# undef INSN
7699}
7700
sewardjfc83d2c2014-06-12 10:15:46 +00007701
sewardjdf1628c2014-06-10 22:52:05 +00007702static
7703Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
7704{
sewardj54ffa1d2014-07-22 09:27:49 +00007705 /* 31 29 28 23 21 20 15 11 9 4
7706 01 U 11110 size 1 m opcode 00 n d
7707 Decode fields: u,opcode
7708 */
sewardjdf1628c2014-06-10 22:52:05 +00007709# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj54ffa1d2014-07-22 09:27:49 +00007710 if (INSN(31,30) != BITS2(0,1)
7711 || INSN(28,24) != BITS5(1,1,1,1,0)
7712 || INSN(21,21) != 1
7713 || INSN(11,10) != BITS2(0,0)) {
7714 return False;
7715 }
7716 UInt bitU = INSN(29,29);
7717 UInt size = INSN(23,22);
7718 UInt mm = INSN(20,16);
7719 UInt opcode = INSN(15,12);
7720 UInt nn = INSN(9,5);
7721 UInt dd = INSN(4,0);
7722 vassert(size < 4);
7723
7724 if (bitU == 0
7725 && (opcode == BITS4(1,1,0,1)
7726 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
7727 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
7728 /* -------- 0,1001 SQDMLAL -------- */ // 1
7729 /* -------- 0,1011 SQDMLSL -------- */ // 2
7730 /* Widens, and size refers to the narrowed lanes. */
7731 UInt ks = 3;
7732 switch (opcode) {
7733 case BITS4(1,1,0,1): ks = 0; break;
7734 case BITS4(1,0,0,1): ks = 1; break;
7735 case BITS4(1,0,1,1): ks = 2; break;
7736 default: vassert(0);
7737 }
7738 vassert(ks >= 0 && ks <= 2);
7739 if (size == X00 || size == X11) return False;
7740 vassert(size <= 2);
7741 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
7742 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
7743 newTempsV128_3(&vecN, &vecM, &vecD);
7744 assign(vecN, getQReg128(nn));
7745 assign(vecM, getQReg128(mm));
7746 assign(vecD, getQReg128(dd));
7747 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
7748 False/*!is2*/, size, "mas"[ks],
7749 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
7750 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
7751 putQReg128(dd, unop(opZHI, mkexpr(res)));
7752 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
7753 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
7754 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
7755 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
7756 }
7757 const HChar* nm = ks == 0 ? "sqdmull"
7758 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
7759 const HChar arrNarrow = "bhsd"[size];
7760 const HChar arrWide = "bhsd"[size+1];
7761 DIP("%s %c%d, %c%d, %c%d\n",
7762 nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
7763 return True;
7764 }
7765
sewardjdf1628c2014-06-10 22:52:05 +00007766 return False;
7767# undef INSN
7768}
7769
7770
7771static
7772Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
7773{
7774 /* 31 29 28 23 21 20 15 10 9 4
7775 01 U 11110 size 1 m opcode 1 n d
sewardj51d012a2014-07-21 09:19:50 +00007776 Decode fields: u,size,opcode
sewardjdf1628c2014-06-10 22:52:05 +00007777 */
7778# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7779 if (INSN(31,30) != BITS2(0,1)
7780 || INSN(28,24) != BITS5(1,1,1,1,0)
7781 || INSN(21,21) != 1
7782 || INSN(10,10) != 1) {
7783 return False;
7784 }
7785 UInt bitU = INSN(29,29);
7786 UInt size = INSN(23,22);
7787 UInt mm = INSN(20,16);
7788 UInt opcode = INSN(15,11);
7789 UInt nn = INSN(9,5);
7790 UInt dd = INSN(4,0);
7791 vassert(size < 4);
7792
sewardj51d012a2014-07-21 09:19:50 +00007793 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
7794 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
7795 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
7796 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
7797 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
7798 Bool isADD = opcode == BITS5(0,0,0,0,1);
7799 Bool isU = bitU == 1;
7800 IROp qop = Iop_INVALID;
7801 IROp nop = Iop_INVALID;
7802 if (isADD) {
7803 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
7804 nop = mkVecADD(size);
7805 } else {
7806 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
7807 nop = mkVecSUB(size);
7808 }
7809 IRTemp argL = newTempV128();
7810 IRTemp argR = newTempV128();
7811 IRTemp qres = newTempV128();
7812 IRTemp nres = newTempV128();
7813 assign(argL, getQReg128(nn));
7814 assign(argR, getQReg128(mm));
7815 assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
sewardj257e99f2014-08-03 12:45:19 +00007816 size, binop(qop, mkexpr(argL), mkexpr(argR)))));
sewardj51d012a2014-07-21 09:19:50 +00007817 assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
sewardj257e99f2014-08-03 12:45:19 +00007818 size, binop(nop, mkexpr(argL), mkexpr(argR)))));
sewardj51d012a2014-07-21 09:19:50 +00007819 putQReg128(dd, mkexpr(qres));
7820 updateQCFLAGwithDifference(qres, nres);
7821 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
7822 : (isU ? "uqsub" : "sqsub");
7823 const HChar arr = "bhsd"[size];
sewardj12972182014-08-04 08:09:47 +00007824 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
sewardj51d012a2014-07-21 09:19:50 +00007825 return True;
7826 }
7827
sewardj2b6fd5e2014-06-19 14:21:37 +00007828 if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
7829 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
7830 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
7831 Bool isGT = bitU == 0;
7832 IRExpr* argL = getQReg128(nn);
7833 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00007834 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00007835 assign(res,
7836 isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
7837 : binop(Iop_CmpGT64Ux2, argL, argR));
7838 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
7839 DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
7840 nameQRegLO(dd, Ity_I64),
7841 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
7842 return True;
7843 }
7844
7845 if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
7846 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
7847 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
7848 Bool isGE = bitU == 0;
7849 IRExpr* argL = getQReg128(nn);
7850 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00007851 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00007852 assign(res,
7853 isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
7854 : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
7855 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
7856 DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
7857 nameQRegLO(dd, Ity_I64),
7858 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
7859 return True;
7860 }
7861
sewardj12972182014-08-04 08:09:47 +00007862 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
7863 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
7864 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
7865 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
7866 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
7867 Bool isU = bitU == 1;
7868 Bool isR = opcode == BITS5(0,1,0,1,1);
7869 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
7870 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
7871 /* This is a bit tricky. Since we're only interested in the lowest
7872 lane of the result, we zero out all the rest in the operands, so
7873 as to ensure that other lanes don't pollute the returned Q value.
7874 This works because it means, for the lanes we don't care about, we
7875 are shifting zero by zero, which can never saturate. */
7876 IRTemp res256 = newTemp(Ity_V256);
7877 IRTemp resSH = newTempV128();
7878 IRTemp resQ = newTempV128();
7879 IRTemp zero = newTempV128();
7880 assign(
7881 res256,
7882 binop(op,
7883 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
7884 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
7885 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
7886 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
7887 assign(zero, mkV128(0x0000));
7888 putQReg128(dd, mkexpr(resSH));
7889 updateQCFLAGwithDifference(resQ, zero);
7890 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
7891 : (isU ? "uqshl" : "sqshl");
7892 const HChar arr = "bhsd"[size];
7893 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
7894 return True;
7895 }
7896
sewardjdf1628c2014-06-10 22:52:05 +00007897 if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
7898 /* -------- 0,11,10000 ADD d_d_d -------- */
7899 /* -------- 1,11,10000 SUB d_d_d -------- */
7900 Bool isSUB = bitU == 1;
7901 IRTemp res = newTemp(Ity_I64);
7902 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
7903 getQRegLane(nn, 0, Ity_I64),
7904 getQRegLane(mm, 0, Ity_I64)));
7905 putQRegLane(dd, 0, mkexpr(res));
7906 putQRegLane(dd, 1, mkU64(0));
7907 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
7908 nameQRegLO(dd, Ity_I64),
7909 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
7910 return True;
7911 }
7912
sewardj2b6fd5e2014-06-19 14:21:37 +00007913 if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
7914 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
7915 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
7916 Bool isEQ = bitU == 1;
7917 IRExpr* argL = getQReg128(nn);
7918 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00007919 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00007920 assign(res,
7921 isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
7922 : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
7923 binop(Iop_AndV128, argL, argR),
7924 mkV128(0x0000))));
7925 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
7926 DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
7927 nameQRegLO(dd, Ity_I64),
7928 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
7929 return True;
7930 }
7931
sewardj257e99f2014-08-03 12:45:19 +00007932 if (opcode == BITS5(1,0,1,1,0)) {
7933 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
7934 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
7935 if (size == X00 || size == X11) return False;
7936 Bool isR = bitU == 1;
7937 IRTemp res, sat1q, sat1n, vN, vM;
7938 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
7939 newTempsV128_2(&vN, &vM);
7940 assign(vN, getQReg128(nn));
7941 assign(vM, getQReg128(mm));
7942 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
7943 putQReg128(dd,
7944 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
7945 updateQCFLAGwithDifference(
7946 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
7947 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
7948 const HChar arr = "bhsd"[size];
7949 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
7950 DIP("%s %c%d, %c%d, %c%d\n", nm, arr, dd, arr, nn, arr, mm);
7951 return True;
7952 }
7953
sewardjdf1628c2014-06-10 22:52:05 +00007954 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
7955 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
7956 IRType ity = size == X11 ? Ity_F64 : Ity_F32;
7957 IRTemp res = newTemp(ity);
7958 assign(res, unop(mkABSF(ity),
7959 triop(mkSUBF(ity),
7960 mkexpr(mk_get_IR_rounding_mode()),
7961 getQRegLO(nn,ity), getQRegLO(mm,ity))));
7962 putQReg128(dd, mkV128(0x0000));
7963 putQRegLO(dd, mkexpr(res));
7964 DIP("fabd %s, %s, %s\n",
7965 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
7966 return True;
7967 }
7968
sewardjdf1628c2014-06-10 22:52:05 +00007969 return False;
7970# undef INSN
7971}
7972
7973
7974static
7975Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
7976{
7977 /* 31 29 28 23 21 16 11 9 4
7978 01 U 11110 size 10000 opcode 10 n d
sewardj8e91fd42014-07-11 12:05:47 +00007979 Decode fields: u,size,opcode
sewardjdf1628c2014-06-10 22:52:05 +00007980 */
7981# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7982 if (INSN(31,30) != BITS2(0,1)
7983 || INSN(28,24) != BITS5(1,1,1,1,0)
7984 || INSN(21,17) != BITS5(1,0,0,0,0)
7985 || INSN(11,10) != BITS2(1,0)) {
7986 return False;
7987 }
7988 UInt bitU = INSN(29,29);
7989 UInt size = INSN(23,22);
7990 UInt opcode = INSN(16,12);
7991 UInt nn = INSN(9,5);
7992 UInt dd = INSN(4,0);
7993 vassert(size < 4);
7994
sewardj51d012a2014-07-21 09:19:50 +00007995 if (opcode == BITS5(0,0,1,1,1)) {
sewardj8e91fd42014-07-11 12:05:47 +00007996 /* -------- 0,xx,00111 SQABS std4_std4 -------- */
sewardj51d012a2014-07-21 09:19:50 +00007997 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
7998 Bool isNEG = bitU == 1;
7999 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
8000 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
8001 getQReg128(nn), size );
sewardj257e99f2014-08-03 12:45:19 +00008002 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
8003 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
sewardj8e91fd42014-07-11 12:05:47 +00008004 putQReg128(dd, mkexpr(qres));
8005 updateQCFLAGwithDifference(qres, nres);
8006 const HChar arr = "bhsd"[size];
sewardj51d012a2014-07-21 09:19:50 +00008007 DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
sewardj8e91fd42014-07-11 12:05:47 +00008008 return True;
8009 }
8010
sewardj2b6fd5e2014-06-19 14:21:37 +00008011 if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
8012 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
8013 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
8014 Bool isGT = bitU == 0;
8015 IRExpr* argL = getQReg128(nn);
8016 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +00008017 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00008018 assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
8019 : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
8020 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8021 DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
8022 return True;
8023 }
8024
8025 if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
8026 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
8027 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
8028 Bool isEQ = bitU == 0;
8029 IRExpr* argL = getQReg128(nn);
8030 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +00008031 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00008032 assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
8033 : unop(Iop_NotV128,
8034 binop(Iop_CmpGT64Sx2, argL, argR)));
8035 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8036 DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
8037 return True;
8038 }
8039
8040 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
8041 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
sewardjdf1628c2014-06-10 22:52:05 +00008042 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
sewardj2b6fd5e2014-06-19 14:21:37 +00008043 binop(Iop_CmpGT64Sx2, mkV128(0x0000),
8044 getQReg128(nn))));
8045 DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
sewardjdf1628c2014-06-10 22:52:05 +00008046 return True;
8047 }
8048
sewardj25523c42014-06-15 19:36:29 +00008049 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
8050 /* -------- 0,11,01011 ABS d_d -------- */
8051 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
8052 unop(Iop_Abs64x2, getQReg128(nn))));
8053 DIP("abs d%u, d%u\n", dd, nn);
8054 return True;
8055 }
8056
8057 if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
8058 /* -------- 1,11,01011 NEG d_d -------- */
8059 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
8060 binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
8061 DIP("neg d%u, d%u\n", dd, nn);
8062 return True;
8063 }
8064
sewardjecedd982014-08-11 14:02:47 +00008065 if (opcode == BITS5(1,0,1,0,0)
8066 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
8067 /* -------- 0,xx,10100: SQXTN -------- */
8068 /* -------- 1,xx,10100: UQXTN -------- */
8069 /* -------- 1,xx,10010: SQXTUN -------- */
8070 if (size == X11) return False;
8071 vassert(size < 3);
8072 IROp opN = Iop_INVALID;
8073 Bool zWiden = True;
8074 const HChar* nm = "??";
8075 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
8076 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
8077 }
8078 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
8079 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
8080 }
8081 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
8082 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
8083 }
8084 else vassert(0);
8085 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
8086 size+1, getQReg128(nn));
8087 IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
8088 size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
8089 putQReg128(dd, mkexpr(resN));
8090 /* This widens zero lanes to zero, and compares it against zero, so all
8091 of the non-participating lanes make no contribution to the
8092 Q flag state. */
8093 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
8094 size, mkexpr(resN));
8095 updateQCFLAGwithDifference(src, resW);
8096 const HChar arrNarrow = "bhsd"[size];
8097 const HChar arrWide = "bhsd"[size+1];
8098 DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
8099 return True;
8100 }
8101
sewardjdf1628c2014-06-10 22:52:05 +00008102# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8103 return False;
8104# undef INSN
8105}
8106
sewardjfc83d2c2014-06-12 10:15:46 +00008107
sewardjdf1628c2014-06-10 22:52:05 +00008108static
8109Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
8110{
sewardj54ffa1d2014-07-22 09:27:49 +00008111 /* 31 28 23 21 20 19 15 11 9 4
8112 01 U 11111 size L M m opcode H 0 n d
8113 Decode fields are: u,size,opcode
8114 M is really part of the mm register number. Individual
8115 cases need to inspect L and H though.
8116 */
sewardjdf1628c2014-06-10 22:52:05 +00008117# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj54ffa1d2014-07-22 09:27:49 +00008118 if (INSN(31,30) != BITS2(0,1)
8119 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) {
8120 return False;
8121 }
8122 UInt bitU = INSN(29,29);
8123 UInt size = INSN(23,22);
8124 UInt bitL = INSN(21,21);
8125 UInt bitM = INSN(20,20);
8126 UInt mmLO4 = INSN(19,16);
8127 UInt opcode = INSN(15,12);
8128 UInt bitH = INSN(11,11);
8129 UInt nn = INSN(9,5);
8130 UInt dd = INSN(4,0);
8131 vassert(size < 4);
8132 vassert(bitH < 2 && bitM < 2 && bitL < 2);
8133
8134 if (bitU == 0
8135 && (opcode == BITS4(1,0,1,1)
8136 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
8137 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
8138 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
8139 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
8140 /* Widens, and size refers to the narrowed lanes. */
8141 UInt ks = 3;
8142 switch (opcode) {
8143 case BITS4(1,0,1,1): ks = 0; break;
8144 case BITS4(0,0,1,1): ks = 1; break;
8145 case BITS4(0,1,1,1): ks = 2; break;
8146 default: vassert(0);
8147 }
8148 vassert(ks >= 0 && ks <= 2);
8149 UInt mm = 32; // invalid
8150 UInt ix = 16; // invalid
8151 switch (size) {
8152 case X00:
8153 return False; // h_b_b[] case is not allowed
8154 case X01:
8155 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
8156 case X10:
8157 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
8158 case X11:
8159 return False; // q_d_d[] case is not allowed
8160 default:
8161 vassert(0);
8162 }
8163 vassert(mm < 32 && ix < 16);
8164 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
8165 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
8166 newTempsV128_2(&vecN, &vecD);
8167 assign(vecN, getQReg128(nn));
8168 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
8169 assign(vecD, getQReg128(dd));
8170 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
8171 False/*!is2*/, size, "mas"[ks],
8172 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
8173 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
8174 putQReg128(dd, unop(opZHI, mkexpr(res)));
8175 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
8176 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
8177 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
8178 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
8179 }
8180 const HChar* nm = ks == 0 ? "sqmull"
8181 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
8182 const HChar arrNarrow = "bhsd"[size];
8183 const HChar arrWide = "bhsd"[size+1];
8184 DIP("%s %c%d, %c%d, v%d.%c[%u]\n",
8185 nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
8186 return True;
8187 }
8188
sewardj257e99f2014-08-03 12:45:19 +00008189 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
8190 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
8191 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
8192 UInt mm = 32; // invalid
8193 UInt ix = 16; // invalid
8194 switch (size) {
8195 case X00:
8196 return False; // b case is not allowed
8197 case X01:
8198 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
8199 case X10:
8200 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
8201 case X11:
8202 return False; // q case is not allowed
8203 default:
8204 vassert(0);
8205 }
8206 vassert(mm < 32 && ix < 16);
8207 Bool isR = opcode == BITS4(1,1,0,1);
8208 IRTemp res, sat1q, sat1n, vN, vM;
8209 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
8210 vN = newTempV128();
8211 assign(vN, getQReg128(nn));
8212 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
8213 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
8214 IROp opZHI = mkVecZEROHIxxOFV128(size);
8215 putQReg128(dd, unop(opZHI, mkexpr(res)));
8216 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
8217 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
8218 HChar ch = size == X01 ? 'h' : 's';
8219 DIP("%s %c%d, %c%d, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, dd, ix);
8220 return True;
8221 }
8222
sewardjdf1628c2014-06-10 22:52:05 +00008223 return False;
8224# undef INSN
8225}
8226
sewardjfc83d2c2014-06-12 10:15:46 +00008227
sewardjdf1628c2014-06-10 22:52:05 +00008228static
8229Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
8230{
8231 /* 31 28 22 18 15 10 9 4
8232 0 q u 011110 immh immb opcode 1 n d
8233 Decode fields: u,opcode
8234 */
8235# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8236 if (INSN(31,31) != 0
8237 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
8238 return False;
8239 }
8240 UInt bitQ = INSN(30,30);
8241 UInt bitU = INSN(29,29);
8242 UInt immh = INSN(22,19);
8243 UInt immb = INSN(18,16);
8244 UInt opcode = INSN(15,11);
8245 UInt nn = INSN(9,5);
8246 UInt dd = INSN(4,0);
8247
8248 if (opcode == BITS5(0,0,0,0,0)) {
8249 /* -------- 0,00000 SSHR std7_std7_#imm -------- */
8250 /* -------- 1,00000 USHR std7_std7_#imm -------- */
8251 /* laneTy, shift = case immh:immb of
8252 0001:xxx -> B, SHR:8-xxx
8253 001x:xxx -> H, SHR:16-xxxx
8254 01xx:xxx -> S, SHR:32-xxxxx
8255 1xxx:xxx -> D, SHR:64-xxxxxx
8256 other -> invalid
8257 */
sewardjdf1628c2014-06-10 22:52:05 +00008258 UInt size = 0;
8259 UInt shift = 0;
8260 Bool isQ = bitQ == 1;
8261 Bool isU = bitU == 1;
8262 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
sewardj8e91fd42014-07-11 12:05:47 +00008263 if (!ok || (bitQ == 0 && size == X11)) return False;
sewardjdf1628c2014-06-10 22:52:05 +00008264 vassert(size >= 0 && size <= 3);
sewardj8e91fd42014-07-11 12:05:47 +00008265 UInt lanebits = 8 << size;
8266 vassert(shift >= 1 && shift <= lanebits);
8267 IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size);
8268 IRExpr* src = getQReg128(nn);
8269 IRTemp res = newTempV128();
8270 if (shift == lanebits && isU) {
8271 assign(res, mkV128(0x0000));
8272 } else {
8273 UInt nudge = 0;
8274 if (shift == lanebits) {
8275 vassert(!isU);
8276 nudge = 1;
8277 }
8278 assign(res, binop(op, src, mkU8(shift - nudge)));
sewardjdf1628c2014-06-10 22:52:05 +00008279 }
sewardj8e91fd42014-07-11 12:05:47 +00008280 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8281 HChar laneCh = "bhsd"[size];
8282 UInt nLanes = (isQ ? 128 : 64) / lanebits;
8283 const HChar* nm = isU ? "ushr" : "sshr";
8284 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
8285 nameQReg128(dd), nLanes, laneCh,
8286 nameQReg128(nn), nLanes, laneCh, shift);
8287 return True;
sewardjdf1628c2014-06-10 22:52:05 +00008288 }
8289
sewardj8e91fd42014-07-11 12:05:47 +00008290 if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
8291 /* -------- 1,01000 SRI std7_std7_#imm -------- */
8292 /* laneTy, shift = case immh:immb of
8293 0001:xxx -> B, SHR:8-xxx
8294 001x:xxx -> H, SHR:16-xxxx
8295 01xx:xxx -> S, SHR:32-xxxxx
8296 1xxx:xxx -> D, SHR:64-xxxxxx
8297 other -> invalid
8298 */
8299 UInt size = 0;
8300 UInt shift = 0;
8301 Bool isQ = bitQ == 1;
8302 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
8303 if (!ok || (bitQ == 0 && size == X11)) return False;
8304 vassert(size >= 0 && size <= 3);
8305 UInt lanebits = 8 << size;
8306 vassert(shift >= 1 && shift <= lanebits);
8307 IRExpr* src = getQReg128(nn);
8308 IRTemp res = newTempV128();
8309 if (shift == lanebits) {
8310 assign(res, getQReg128(dd));
8311 } else {
8312 assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
8313 IRExpr* nmask = binop(mkVecSHLN(size),
8314 mkV128(0xFFFF), mkU8(lanebits - shift));
8315 IRTemp tmp = newTempV128();
8316 assign(tmp, binop(Iop_OrV128,
8317 mkexpr(res),
8318 binop(Iop_AndV128, getQReg128(dd), nmask)));
8319 res = tmp;
8320 }
8321 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8322 HChar laneCh = "bhsd"[size];
8323 UInt nLanes = (isQ ? 128 : 64) / lanebits;
8324 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
8325 nameQReg128(dd), nLanes, laneCh,
8326 nameQReg128(nn), nLanes, laneCh, shift);
8327 return True;
8328 }
8329
8330 if (opcode == BITS5(0,1,0,1,0)) {
sewardjdf1628c2014-06-10 22:52:05 +00008331 /* -------- 0,01010 SHL std7_std7_#imm -------- */
sewardj8e91fd42014-07-11 12:05:47 +00008332 /* -------- 1,01010 SLI std7_std7_#imm -------- */
sewardjdf1628c2014-06-10 22:52:05 +00008333 /* laneTy, shift = case immh:immb of
8334 0001:xxx -> B, xxx
8335 001x:xxx -> H, xxxx
8336 01xx:xxx -> S, xxxxx
8337 1xxx:xxx -> D, xxxxxx
8338 other -> invalid
8339 */
sewardjdf1628c2014-06-10 22:52:05 +00008340 UInt size = 0;
8341 UInt shift = 0;
sewardj8e91fd42014-07-11 12:05:47 +00008342 Bool isSLI = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +00008343 Bool isQ = bitQ == 1;
8344 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
sewardj8e91fd42014-07-11 12:05:47 +00008345 if (!ok || (bitQ == 0 && size == X11)) return False;
sewardjdf1628c2014-06-10 22:52:05 +00008346 vassert(size >= 0 && size <= 3);
8347 /* The shift encoding has opposite sign for the leftwards case.
8348 Adjust shift to compensate. */
sewardj8e91fd42014-07-11 12:05:47 +00008349 UInt lanebits = 8 << size;
8350 shift = lanebits - shift;
8351 vassert(shift >= 0 && shift < lanebits);
8352 IROp op = mkVecSHLN(size);
8353 IRExpr* src = getQReg128(nn);
8354 IRTemp res = newTempV128();
8355 if (shift == 0) {
8356 assign(res, src);
8357 } else {
sewardjdf9d6d52014-06-27 10:43:22 +00008358 assign(res, binop(op, src, mkU8(shift)));
sewardj8e91fd42014-07-11 12:05:47 +00008359 if (isSLI) {
8360 IRExpr* nmask = binop(mkVecSHRN(size),
8361 mkV128(0xFFFF), mkU8(lanebits - shift));
8362 IRTemp tmp = newTempV128();
8363 assign(tmp, binop(Iop_OrV128,
8364 mkexpr(res),
8365 binop(Iop_AndV128, getQReg128(dd), nmask)));
8366 res = tmp;
8367 }
sewardjdf1628c2014-06-10 22:52:05 +00008368 }
sewardj8e91fd42014-07-11 12:05:47 +00008369 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8370 HChar laneCh = "bhsd"[size];
8371 UInt nLanes = (isQ ? 128 : 64) / lanebits;
8372 const HChar* nm = isSLI ? "sli" : "shl";
8373 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
8374 nameQReg128(dd), nLanes, laneCh,
8375 nameQReg128(nn), nLanes, laneCh, shift);
8376 return True;
sewardjdf1628c2014-06-10 22:52:05 +00008377 }
8378
sewardja97dddf2014-08-14 22:26:52 +00008379 if (opcode == BITS5(0,1,1,1,0)
8380 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
8381 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
8382 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
8383 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
8384 UInt size = 0;
8385 UInt shift = 0;
8386 Bool isQ = bitQ == 1;
8387 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
8388 if (!ok || (bitQ == 0 && size == X11)) return False;
8389 vassert(size >= 0 && size <= 3);
8390 /* The shift encoding has opposite sign for the leftwards case.
8391 Adjust shift to compensate. */
8392 UInt lanebits = 8 << size;
8393 shift = lanebits - shift;
8394 vassert(shift >= 0 && shift < lanebits);
8395 const HChar* nm = NULL;
8396 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
8397 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
8398 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
8399 else vassert(0);
8400 IRTemp qDiff1 = IRTemp_INVALID;
8401 IRTemp qDiff2 = IRTemp_INVALID;
8402 IRTemp res = IRTemp_INVALID;
8403 IRTemp src = newTempV128();
8404 assign(src, getQReg128(nn));
8405 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
8406 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8407 updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
sewardjacc29642014-08-15 05:35:35 +00008408 isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
sewardja97dddf2014-08-14 22:26:52 +00008409 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8410 DIP("%s %s.%s, %s.%s, #%u\n", nm,
8411 nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
8412 return True;
8413 }
8414
sewardj487559e2014-07-10 14:22:45 +00008415 if (bitU == 0
8416 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
8417 /* -------- 0,10000 SHRN{,2} #imm -------- */
8418 /* -------- 0,10001 RSHRN{,2} #imm -------- */
8419 /* Narrows, and size is the narrow size. */
8420 UInt size = 0;
8421 UInt shift = 0;
8422 Bool is2 = bitQ == 1;
8423 Bool isR = opcode == BITS5(1,0,0,0,1);
8424 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
8425 if (!ok || size == X11) return False;
8426 vassert(shift >= 1);
sewardj8e91fd42014-07-11 12:05:47 +00008427 IRTemp t1 = newTempV128();
8428 IRTemp t2 = newTempV128();
8429 IRTemp t3 = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00008430 assign(t1, getQReg128(nn));
8431 assign(t2, isR ? binop(mkVecADD(size+1),
8432 mkexpr(t1),
8433 mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
8434 : mkexpr(t1));
8435 assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
8436 IRTemp t4 = math_NARROW_LANES(t3, t3, size);
8437 putLO64andZUorPutHI64(is2, dd, t4);
8438 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8439 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8440 DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
8441 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
8442 return True;
8443 }
8444
sewardjecedd982014-08-11 14:02:47 +00008445 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
8446 || (bitU == 1
8447 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
8448 /* -------- 0,10010 SQSHRN{,2} #imm -------- */
8449 /* -------- 1,10010 UQSHRN{,2} #imm -------- */
8450 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
8451 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
8452 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
8453 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
8454 UInt size = 0;
8455 UInt shift = 0;
8456 Bool is2 = bitQ == 1;
8457 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
8458 if (!ok || size == X11) return False;
8459 vassert(shift >= 1 && shift <= (8 << size));
8460 const HChar* nm = "??";
8461 IROp op = Iop_INVALID;
8462 /* Decide on the name and the operation. */
8463 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
8464 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
8465 }
8466 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
8467 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
8468 }
8469 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
8470 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
8471 }
8472 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
8473 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
8474 }
8475 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
8476 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
8477 }
8478 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
8479 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
8480 }
8481 else vassert(0);
8482 /* Compute the result (Q, shifted value) pair. */
8483 IRTemp src128 = newTempV128();
8484 assign(src128, getQReg128(nn));
8485 IRTemp pair = newTempV128();
8486 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
8487 /* Update the result reg */
8488 IRTemp res64in128 = newTempV128();
8489 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
8490 putLO64andZUorPutHI64(is2, dd, res64in128);
8491 /* Update the Q flag. */
8492 IRTemp q64q64 = newTempV128();
8493 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
8494 IRTemp z128 = newTempV128();
8495 assign(z128, mkV128(0x0000));
8496 updateQCFLAGwithDifference(q64q64, z128);
8497 /* */
8498 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8499 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8500 DIP("%s %s.%s, %s.%s, #%u\n", nm,
8501 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
8502 return True;
8503 }
8504
sewardjdf1628c2014-06-10 22:52:05 +00008505 if (opcode == BITS5(1,0,1,0,0)) {
8506 /* -------- 0,10100 SSHLL{,2} #imm -------- */
8507 /* -------- 1,10100 USHLL{,2} #imm -------- */
8508 /* 31 28 22 18 15 9 4
8509 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
8510 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
8511 where Ta,Tb,sh
8512 = case immh of 1xxx -> invalid
8513 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
8514 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
8515 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
8516 0000 -> AdvSIMD modified immediate (???)
8517 */
8518 Bool isQ = bitQ == 1;
8519 Bool isU = bitU == 1;
8520 UInt immhb = (immh << 3) | immb;
sewardj8e91fd42014-07-11 12:05:47 +00008521 IRTemp src = newTempV128();
8522 IRTemp zero = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008523 IRExpr* res = NULL;
8524 UInt sh = 0;
8525 const HChar* ta = "??";
8526 const HChar* tb = "??";
8527 assign(src, getQReg128(nn));
8528 assign(zero, mkV128(0x0000));
8529 if (immh & 8) {
8530 /* invalid; don't assign to res */
8531 }
8532 else if (immh & 4) {
8533 sh = immhb - 32;
8534 vassert(sh < 32); /* so 32-sh is 1..32 */
8535 ta = "2d";
8536 tb = isQ ? "4s" : "2s";
8537 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
8538 : mk_InterleaveLO32x4(src, zero);
8539 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
8540 }
8541 else if (immh & 2) {
8542 sh = immhb - 16;
8543 vassert(sh < 16); /* so 16-sh is 1..16 */
8544 ta = "4s";
8545 tb = isQ ? "8h" : "4h";
8546 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
8547 : mk_InterleaveLO16x8(src, zero);
8548 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
8549 }
8550 else if (immh & 1) {
8551 sh = immhb - 8;
8552 vassert(sh < 8); /* so 8-sh is 1..8 */
8553 ta = "8h";
8554 tb = isQ ? "16b" : "8b";
8555 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
8556 : mk_InterleaveLO8x16(src, zero);
8557 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
8558 } else {
8559 vassert(immh == 0);
8560 /* invalid; don't assign to res */
8561 }
8562 /* */
8563 if (res) {
8564 putQReg128(dd, res);
8565 DIP("%cshll%s %s.%s, %s.%s, #%d\n",
8566 isU ? 'u' : 's', isQ ? "2" : "",
8567 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
8568 return True;
8569 }
8570 return False;
8571 }
8572
8573# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8574 return False;
8575# undef INSN
8576}
8577
sewardjfc83d2c2014-06-12 10:15:46 +00008578
sewardjdf1628c2014-06-10 22:52:05 +00008579static
8580Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
8581{
sewardj25523c42014-06-15 19:36:29 +00008582 /* 31 30 29 28 23 21 20 15 11 9 4
8583 0 Q U 01110 size 1 m opcode 00 n d
8584 Decode fields: u,opcode
8585 */
sewardjdf1628c2014-06-10 22:52:05 +00008586# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj25523c42014-06-15 19:36:29 +00008587 if (INSN(31,31) != 0
8588 || INSN(28,24) != BITS5(0,1,1,1,0)
8589 || INSN(21,21) != 1
8590 || INSN(11,10) != BITS2(0,0)) {
8591 return False;
8592 }
8593 UInt bitQ = INSN(30,30);
8594 UInt bitU = INSN(29,29);
8595 UInt size = INSN(23,22);
8596 UInt mm = INSN(20,16);
8597 UInt opcode = INSN(15,12);
8598 UInt nn = INSN(9,5);
8599 UInt dd = INSN(4,0);
8600 vassert(size < 4);
8601 Bool is2 = bitQ == 1;
8602
sewardj6f312d02014-06-28 12:21:37 +00008603 if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
8604 /* -------- 0,0000 SADDL{2} -------- */
8605 /* -------- 1,0000 UADDL{2} -------- */
8606 /* -------- 0,0010 SSUBL{2} -------- */
8607 /* -------- 1,0010 USUBL{2} -------- */
8608 /* Widens, and size refers to the narrowed lanes. */
sewardj6f312d02014-06-28 12:21:37 +00008609 if (size == X11) return False;
8610 vassert(size <= 2);
8611 Bool isU = bitU == 1;
8612 Bool isADD = opcode == BITS4(0,0,0,0);
sewardja5a6b752014-06-30 07:33:56 +00008613 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
8614 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +00008615 IRTemp res = newTempV128();
sewardj54ffa1d2014-07-22 09:27:49 +00008616 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
sewardj6f312d02014-06-28 12:21:37 +00008617 mkexpr(argL), mkexpr(argR)));
8618 putQReg128(dd, mkexpr(res));
8619 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8620 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8621 const HChar* nm = isADD ? (isU ? "uaddl" : "saddl")
8622 : (isU ? "usubl" : "ssubl");
8623 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
8624 nameQReg128(dd), arrWide,
8625 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
8626 return True;
8627 }
8628
sewardja5a6b752014-06-30 07:33:56 +00008629 if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
8630 /* -------- 0,0001 SADDW{2} -------- */
8631 /* -------- 1,0001 UADDW{2} -------- */
8632 /* -------- 0,0011 SSUBW{2} -------- */
8633 /* -------- 1,0011 USUBW{2} -------- */
8634 /* Widens, and size refers to the narrowed lanes. */
8635 if (size == X11) return False;
8636 vassert(size <= 2);
8637 Bool isU = bitU == 1;
8638 Bool isADD = opcode == BITS4(0,0,0,1);
8639 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +00008640 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00008641 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
8642 getQReg128(nn), mkexpr(argR)));
8643 putQReg128(dd, mkexpr(res));
8644 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8645 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8646 const HChar* nm = isADD ? (isU ? "uaddw" : "saddw")
8647 : (isU ? "usubw" : "ssubw");
8648 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
8649 nameQReg128(dd), arrWide,
8650 nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
8651 return True;
8652 }
8653
sewardj25523c42014-06-15 19:36:29 +00008654 if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
8655 /* -------- 0,0100 ADDHN{2} -------- */
8656 /* -------- 1,0100 RADDHN{2} -------- */
8657 /* -------- 0,0110 SUBHN{2} -------- */
8658 /* -------- 1,0110 RSUBHN{2} -------- */
8659 /* Narrows, and size refers to the narrowed lanes. */
8660 if (size == X11) return False;
8661 vassert(size <= 2);
sewardj487559e2014-07-10 14:22:45 +00008662 const UInt shift[3] = { 8, 16, 32 };
sewardj25523c42014-06-15 19:36:29 +00008663 Bool isADD = opcode == BITS4(0,1,0,0);
8664 Bool isR = bitU == 1;
8665 /* Combined elements in wide lanes */
sewardj8e91fd42014-07-11 12:05:47 +00008666 IRTemp wide = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00008667 IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
sewardj25523c42014-06-15 19:36:29 +00008668 getQReg128(nn), getQReg128(mm));
8669 if (isR) {
sewardj487559e2014-07-10 14:22:45 +00008670 wideE = binop(mkVecADD(size+1),
8671 wideE,
8672 mkexpr(math_VEC_DUP_IMM(size+1,
8673 1ULL << (shift[size]-1))));
sewardj25523c42014-06-15 19:36:29 +00008674 }
8675 assign(wide, wideE);
8676 /* Top halves of elements, still in wide lanes */
sewardj8e91fd42014-07-11 12:05:47 +00008677 IRTemp shrd = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00008678 assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
sewardj25523c42014-06-15 19:36:29 +00008679 /* Elements now compacted into lower 64 bits */
sewardj8e91fd42014-07-11 12:05:47 +00008680 IRTemp new64 = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00008681 assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
sewardj25523c42014-06-15 19:36:29 +00008682 putLO64andZUorPutHI64(is2, dd, new64);
8683 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8684 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8685 const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
8686 : (isR ? "rsubhn" : "subhn");
8687 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
8688 nameQReg128(dd), arrNarrow,
8689 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
8690 return True;
8691 }
8692
sewardj6f312d02014-06-28 12:21:37 +00008693 if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
8694 /* -------- 0,0101 SABAL{2} -------- */
8695 /* -------- 1,0101 UABAL{2} -------- */
8696 /* -------- 0,0111 SABDL{2} -------- */
8697 /* -------- 1,0111 UABDL{2} -------- */
8698 /* Widens, and size refers to the narrowed lanes. */
sewardj6f312d02014-06-28 12:21:37 +00008699 if (size == X11) return False;
8700 vassert(size <= 2);
8701 Bool isU = bitU == 1;
8702 Bool isACC = opcode == BITS4(0,1,0,1);
sewardja5a6b752014-06-30 07:33:56 +00008703 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
8704 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj6f312d02014-06-28 12:21:37 +00008705 IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
sewardj8e91fd42014-07-11 12:05:47 +00008706 IRTemp res = newTempV128();
8707 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
sewardj6f312d02014-06-28 12:21:37 +00008708 : mkexpr(abd));
8709 putQReg128(dd, mkexpr(res));
8710 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8711 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8712 const HChar* nm = isACC ? (isU ? "uabal" : "sabal")
8713 : (isU ? "uabdl" : "sabdl");
8714 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
8715 nameQReg128(dd), arrWide,
8716 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
8717 return True;
8718 }
8719
8720 if (opcode == BITS4(1,1,0,0)
8721 || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
sewardj487559e2014-07-10 14:22:45 +00008722 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
sewardj6f312d02014-06-28 12:21:37 +00008723 /* -------- 1,1100 UMULL{2} -------- */ // 0
8724 /* -------- 0,1000 SMLAL{2} -------- */ // 1
8725 /* -------- 1,1000 UMLAL{2} -------- */ // 1
8726 /* -------- 0,1010 SMLSL{2} -------- */ // 2
8727 /* -------- 1,1010 UMLSL{2} -------- */ // 2
8728 /* Widens, and size refers to the narrowed lanes. */
sewardj487559e2014-07-10 14:22:45 +00008729 UInt ks = 3;
sewardj6f312d02014-06-28 12:21:37 +00008730 switch (opcode) {
sewardj487559e2014-07-10 14:22:45 +00008731 case BITS4(1,1,0,0): ks = 0; break;
8732 case BITS4(1,0,0,0): ks = 1; break;
8733 case BITS4(1,0,1,0): ks = 2; break;
sewardj6f312d02014-06-28 12:21:37 +00008734 default: vassert(0);
8735 }
sewardj487559e2014-07-10 14:22:45 +00008736 vassert(ks >= 0 && ks <= 2);
sewardj6f312d02014-06-28 12:21:37 +00008737 if (size == X11) return False;
8738 vassert(size <= 2);
sewardj51d012a2014-07-21 09:19:50 +00008739 Bool isU = bitU == 1;
8740 IRTemp vecN = newTempV128();
8741 IRTemp vecM = newTempV128();
8742 IRTemp vecD = newTempV128();
8743 assign(vecN, getQReg128(nn));
8744 assign(vecM, getQReg128(mm));
8745 assign(vecD, getQReg128(dd));
8746 IRTemp res = IRTemp_INVALID;
8747 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
8748 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
sewardj6f312d02014-06-28 12:21:37 +00008749 putQReg128(dd, mkexpr(res));
8750 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8751 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
sewardj487559e2014-07-10 14:22:45 +00008752 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
sewardj6f312d02014-06-28 12:21:37 +00008753 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
8754 nameQReg128(dd), arrWide,
8755 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
8756 return True;
8757 }
8758
sewardj54ffa1d2014-07-22 09:27:49 +00008759 if (bitU == 0
8760 && (opcode == BITS4(1,1,0,1)
8761 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
8762 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
8763 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1
8764 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2
8765 /* Widens, and size refers to the narrowed lanes. */
8766 UInt ks = 3;
8767 switch (opcode) {
8768 case BITS4(1,1,0,1): ks = 0; break;
8769 case BITS4(1,0,0,1): ks = 1; break;
8770 case BITS4(1,0,1,1): ks = 2; break;
8771 default: vassert(0);
8772 }
8773 vassert(ks >= 0 && ks <= 2);
8774 if (size == X00 || size == X11) return False;
8775 vassert(size <= 2);
8776 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
8777 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
8778 newTempsV128_3(&vecN, &vecM, &vecD);
8779 assign(vecN, getQReg128(nn));
8780 assign(vecM, getQReg128(mm));
8781 assign(vecD, getQReg128(dd));
8782 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
8783 is2, size, "mas"[ks],
8784 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
8785 putQReg128(dd, mkexpr(res));
8786 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
8787 updateQCFLAGwithDifference(sat1q, sat1n);
8788 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
8789 updateQCFLAGwithDifference(sat2q, sat2n);
8790 }
8791 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8792 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8793 const HChar* nm = ks == 0 ? "sqdmull"
8794 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
8795 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
8796 nameQReg128(dd), arrWide,
8797 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
8798 return True;
8799 }
8800
sewardj31b5a952014-06-26 07:41:14 +00008801 if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
8802 /* -------- 0,1110 PMULL{2} -------- */
sewardj6f312d02014-06-28 12:21:37 +00008803 /* Widens, and size refers to the narrowed lanes. */
sewardj31b5a952014-06-26 07:41:14 +00008804 if (size != X00) return False;
8805 IRTemp res
8806 = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
8807 getQReg128(nn), getQReg128(mm));
8808 putQReg128(dd, mkexpr(res));
8809 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
8810 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
8811 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
8812 nameQReg128(dd), arrNarrow,
8813 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
8814 return True;
8815 }
8816
sewardjdf1628c2014-06-10 22:52:05 +00008817 return False;
8818# undef INSN
8819}
8820
8821
8822static
8823Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
8824{
8825 /* 31 30 29 28 23 21 20 15 10 9 4
8826 0 Q U 01110 size 1 m opcode 1 n d
8827 Decode fields: u,size,opcode
8828 */
8829# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8830 if (INSN(31,31) != 0
8831 || INSN(28,24) != BITS5(0,1,1,1,0)
8832 || INSN(21,21) != 1
8833 || INSN(10,10) != 1) {
8834 return False;
8835 }
8836 UInt bitQ = INSN(30,30);
8837 UInt bitU = INSN(29,29);
8838 UInt size = INSN(23,22);
8839 UInt mm = INSN(20,16);
8840 UInt opcode = INSN(15,11);
8841 UInt nn = INSN(9,5);
8842 UInt dd = INSN(4,0);
8843 vassert(size < 4);
8844
sewardja5a6b752014-06-30 07:33:56 +00008845 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
8846 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
8847 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
8848 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
8849 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
8850 if (size == X11) return False;
8851 Bool isADD = opcode == BITS5(0,0,0,0,0);
8852 Bool isU = bitU == 1;
8853 /* Widen both args out, do the math, narrow to final result. */
sewardj8e91fd42014-07-11 12:05:47 +00008854 IRTemp argL = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00008855 IRTemp argLhi = IRTemp_INVALID;
8856 IRTemp argLlo = IRTemp_INVALID;
sewardj8e91fd42014-07-11 12:05:47 +00008857 IRTemp argR = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00008858 IRTemp argRhi = IRTemp_INVALID;
8859 IRTemp argRlo = IRTemp_INVALID;
sewardj8e91fd42014-07-11 12:05:47 +00008860 IRTemp resHi = newTempV128();
8861 IRTemp resLo = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00008862 IRTemp res = IRTemp_INVALID;
8863 assign(argL, getQReg128(nn));
8864 argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
8865 argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL));
8866 assign(argR, getQReg128(mm));
8867 argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
8868 argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR));
8869 IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
8870 IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
8871 assign(resHi, binop(opSxR,
8872 binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
8873 mkU8(1)));
8874 assign(resLo, binop(opSxR,
8875 binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
8876 mkU8(1)));
8877 res = math_NARROW_LANES ( resHi, resLo, size );
8878 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8879 const HChar* nm = isADD ? (isU ? "uhadd" : "shadd")
8880 : (isU ? "uhsub" : "shsub");
8881 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8882 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8883 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8884 return True;
8885 }
8886
8887 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
8888 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
8889 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
8890 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
8891 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
8892 if (bitQ == 0 && size == X11) return False; // implied 1d case
8893 Bool isADD = opcode == BITS5(0,0,0,0,1);
8894 Bool isU = bitU == 1;
8895 IROp qop = Iop_INVALID;
8896 IROp nop = Iop_INVALID;
8897 if (isADD) {
8898 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
8899 nop = mkVecADD(size);
8900 } else {
8901 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
8902 nop = mkVecSUB(size);
8903 }
sewardj8e91fd42014-07-11 12:05:47 +00008904 IRTemp argL = newTempV128();
8905 IRTemp argR = newTempV128();
8906 IRTemp qres = newTempV128();
8907 IRTemp nres = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00008908 assign(argL, getQReg128(nn));
8909 assign(argR, getQReg128(mm));
8910 assign(qres, math_MAYBE_ZERO_HI64_fromE(
8911 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
8912 assign(nres, math_MAYBE_ZERO_HI64_fromE(
8913 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
8914 putQReg128(dd, mkexpr(qres));
sewardj8e91fd42014-07-11 12:05:47 +00008915 updateQCFLAGwithDifference(qres, nres);
sewardja5a6b752014-06-30 07:33:56 +00008916 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
8917 : (isU ? "uqsub" : "sqsub");
8918 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8919 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8920 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8921 return True;
8922 }
8923
sewardjdf1628c2014-06-10 22:52:05 +00008924 if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
8925 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
8926 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
8927 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
8928 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
sewardjdf9d6d52014-06-27 10:43:22 +00008929 Bool isORx = (size & 2) == 2;
sewardjdf1628c2014-06-10 22:52:05 +00008930 Bool invert = (size & 1) == 1;
sewardj8e91fd42014-07-11 12:05:47 +00008931 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +00008932 assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
sewardjdf1628c2014-06-10 22:52:05 +00008933 getQReg128(nn),
8934 invert ? unop(Iop_NotV128, getQReg128(mm))
8935 : getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00008936 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00008937 const HChar* names[4] = { "and", "bic", "orr", "orn" };
sewardjdf9d6d52014-06-27 10:43:22 +00008938 const HChar* ar = bitQ == 1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +00008939 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
8940 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
8941 return True;
8942 }
8943
8944 if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
8945 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
8946 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
8947 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
8948 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00008949 IRTemp argD = newTempV128();
8950 IRTemp argN = newTempV128();
8951 IRTemp argM = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008952 assign(argD, getQReg128(dd));
8953 assign(argN, getQReg128(nn));
8954 assign(argM, getQReg128(mm));
8955 const IROp opXOR = Iop_XorV128;
8956 const IROp opAND = Iop_AndV128;
8957 const IROp opNOT = Iop_NotV128;
sewardj8e91fd42014-07-11 12:05:47 +00008958 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008959 switch (size) {
8960 case BITS2(0,0): /* EOR */
sewardjdf9d6d52014-06-27 10:43:22 +00008961 assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
sewardjdf1628c2014-06-10 22:52:05 +00008962 break;
8963 case BITS2(0,1): /* BSL */
sewardjdf9d6d52014-06-27 10:43:22 +00008964 assign(res, binop(opXOR, mkexpr(argM),
8965 binop(opAND,
8966 binop(opXOR, mkexpr(argM), mkexpr(argN)),
8967 mkexpr(argD))));
sewardjdf1628c2014-06-10 22:52:05 +00008968 break;
8969 case BITS2(1,0): /* BIT */
sewardjdf9d6d52014-06-27 10:43:22 +00008970 assign(res, binop(opXOR, mkexpr(argD),
8971 binop(opAND,
8972 binop(opXOR, mkexpr(argD), mkexpr(argN)),
8973 mkexpr(argM))));
sewardjdf1628c2014-06-10 22:52:05 +00008974 break;
8975 case BITS2(1,1): /* BIF */
sewardjdf9d6d52014-06-27 10:43:22 +00008976 assign(res, binop(opXOR, mkexpr(argD),
8977 binop(opAND,
8978 binop(opXOR, mkexpr(argD), mkexpr(argN)),
8979 unop(opNOT, mkexpr(argM)))));
sewardjdf1628c2014-06-10 22:52:05 +00008980 break;
8981 default:
8982 vassert(0);
8983 }
sewardjdf9d6d52014-06-27 10:43:22 +00008984 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00008985 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
sewardjdf9d6d52014-06-27 10:43:22 +00008986 const HChar* arr = bitQ == 1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +00008987 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
8988 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8989 return True;
8990 }
8991
8992 if (opcode == BITS5(0,0,1,1,0)) {
8993 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
8994 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
8995 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00008996 Bool isGT = bitU == 0;
sewardjdf1628c2014-06-10 22:52:05 +00008997 IRExpr* argL = getQReg128(nn);
8998 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00008999 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009000 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +00009001 isGT ? binop(mkVecCMPGTS(size), argL, argR)
9002 : binop(mkVecCMPGTU(size), argL, argR));
sewardjdf9d6d52014-06-27 10:43:22 +00009003 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009004 const HChar* nm = isGT ? "cmgt" : "cmhi";
9005 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9006 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9007 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9008 return True;
9009 }
9010
9011 if (opcode == BITS5(0,0,1,1,1)) {
9012 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
9013 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
9014 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009015 Bool isGE = bitU == 0;
sewardjdf1628c2014-06-10 22:52:05 +00009016 IRExpr* argL = getQReg128(nn);
9017 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00009018 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009019 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +00009020 isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
9021 : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
sewardjdf9d6d52014-06-27 10:43:22 +00009022 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009023 const HChar* nm = isGE ? "cmge" : "cmhs";
9024 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9025 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9026 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9027 return True;
9028 }
9029
sewardj12972182014-08-04 08:09:47 +00009030 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
9031 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
9032 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
9033 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
9034 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
9035 if (bitQ == 0 && size == X11) return False; // implied 1d case
9036 Bool isU = bitU == 1;
9037 Bool isR = opcode == BITS5(0,1,0,1,1);
9038 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
9039 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
9040 /* This is a bit tricky. If we're only interested in the lowest 64 bits
9041 of the result (viz, bitQ == 0), then we must adjust the operands to
9042 ensure that the upper part of the result, that we don't care about,
9043 doesn't pollute the returned Q value. To do this, zero out the upper
9044 operand halves beforehand. This works because it means, for the
9045 lanes we don't care about, we are shifting zero by zero, which can
9046 never saturate. */
9047 IRTemp res256 = newTemp(Ity_V256);
9048 IRTemp resSH = newTempV128();
9049 IRTemp resQ = newTempV128();
9050 IRTemp zero = newTempV128();
9051 assign(res256, binop(op,
9052 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
9053 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
9054 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
9055 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
9056 assign(zero, mkV128(0x0000));
9057 putQReg128(dd, mkexpr(resSH));
9058 updateQCFLAGwithDifference(resQ, zero);
9059 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
9060 : (isU ? "uqshl" : "sqshl");
9061 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9062 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9063 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9064 return True;
9065 }
9066
sewardjdf1628c2014-06-10 22:52:05 +00009067 if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
9068 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
9069 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
9070 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
9071 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
9072 if (bitQ == 0 && size == X11) return False; // implied 1d case
9073 Bool isU = bitU == 1;
9074 Bool isMAX = (opcode & 1) == 0;
sewardj8e91fd42014-07-11 12:05:47 +00009075 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
9076 : (isU ? mkVecMINU(size) : mkVecMINS(size));
9077 IRTemp t = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009078 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009079 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
sewardjdf1628c2014-06-10 22:52:05 +00009080 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
9081 : (isU ? "umin" : "smin");
9082 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9083 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9084 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9085 return True;
9086 }
9087
sewardjdf9d6d52014-06-27 10:43:22 +00009088 if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
9089 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
9090 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
9091 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
9092 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
9093 if (size == X11) return False; // 1d/2d cases not allowed
9094 Bool isU = bitU == 1;
9095 Bool isACC = opcode == BITS5(0,1,1,1,1);
sewardjdf9d6d52014-06-27 10:43:22 +00009096 vassert(size <= 2);
9097 IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +00009098 IRTemp t2 = newTempV128();
9099 assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
sewardjdf9d6d52014-06-27 10:43:22 +00009100 : mkexpr(t1));
9101 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
9102 const HChar* nm = isACC ? (isU ? "uaba" : "saba")
9103 : (isU ? "uabd" : "sabd");
9104 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9105 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9106 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9107 return True;
9108 }
9109
sewardjdf1628c2014-06-10 22:52:05 +00009110 if (opcode == BITS5(1,0,0,0,0)) {
9111 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
9112 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
9113 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009114 Bool isSUB = bitU == 1;
9115 IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size);
9116 IRTemp t = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009117 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009118 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
sewardjdf1628c2014-06-10 22:52:05 +00009119 const HChar* nm = isSUB ? "sub" : "add";
9120 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9121 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9122 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9123 return True;
9124 }
9125
9126 if (opcode == BITS5(1,0,0,0,1)) {
9127 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
9128 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
9129 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009130 Bool isEQ = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +00009131 IRExpr* argL = getQReg128(nn);
9132 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00009133 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009134 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +00009135 isEQ ? binop(mkVecCMPEQ(size), argL, argR)
9136 : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
sewardjdf1628c2014-06-10 22:52:05 +00009137 binop(Iop_AndV128, argL, argR),
9138 mkV128(0x0000))));
sewardjdf9d6d52014-06-27 10:43:22 +00009139 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009140 const HChar* nm = isEQ ? "cmeq" : "cmtst";
9141 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9142 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9143 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9144 return True;
9145 }
9146
9147 if (opcode == BITS5(1,0,0,1,0)) {
9148 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
9149 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
9150 if (bitQ == 0 && size == X11) return False; // implied 1d case
9151 Bool isMLS = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +00009152 IROp opMUL = mkVecMUL(size);
9153 IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
9154 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009155 if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
9156 assign(res, binop(opADDSUB,
9157 getQReg128(dd),
9158 binop(opMUL, getQReg128(nn), getQReg128(mm))));
sewardjdf9d6d52014-06-27 10:43:22 +00009159 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009160 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9161 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
9162 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9163 return True;
9164 }
9165 return False;
9166 }
9167
9168 if (opcode == BITS5(1,0,0,1,1)) {
9169 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
9170 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
9171 if (bitQ == 0 && size == X11) return False; // implied 1d case
9172 Bool isPMUL = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +00009173 const IROp opsPMUL[4]
9174 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
sewardj8e91fd42014-07-11 12:05:47 +00009175 IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
9176 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009177 if (opMUL != Iop_INVALID) {
9178 assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009179 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009180 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9181 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
9182 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9183 return True;
9184 }
9185 return False;
9186 }
9187
sewardja5a6b752014-06-30 07:33:56 +00009188 if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
9189 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
9190 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
9191 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
9192 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
9193 if (size == X11) return False;
9194 Bool isU = bitU == 1;
9195 Bool isMAX = opcode == BITS5(1,0,1,0,0);
sewardj8e91fd42014-07-11 12:05:47 +00009196 IRTemp vN = newTempV128();
9197 IRTemp vM = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009198 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
9199 : (isU ? mkVecMINU(size) : mkVecMINS(size));
9200 assign(vN, getQReg128(nn));
9201 assign(vM, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +00009202 IRTemp res128 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009203 assign(res128,
9204 binop(op,
9205 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
9206 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
9207 /* In the half-width case, use CatEL32x4 to extract the half-width
9208 result from the full-width result. */
9209 IRExpr* res
9210 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
9211 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
9212 mkexpr(res128)))
9213 : mkexpr(res128);
9214 putQReg128(dd, res);
9215 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9216 const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp")
9217 : (isU ? "uminp" : "sminp");
9218 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9219 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9220 return True;
9221 }
9222
sewardj54ffa1d2014-07-22 09:27:49 +00009223 if (opcode == BITS5(1,0,1,1,0)) {
9224 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
9225 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
9226 if (size == X00 || size == X11) return False;
9227 Bool isR = bitU == 1;
9228 IRTemp res, sat1q, sat1n, vN, vM;
9229 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
9230 newTempsV128_2(&vN, &vM);
9231 assign(vN, getQReg128(nn));
9232 assign(vM, getQReg128(mm));
9233 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
9234 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9235 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
9236 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
9237 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9238 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
9239 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
9240 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9241 return True;
9242 }
9243
sewardja5a6b752014-06-30 07:33:56 +00009244 if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
9245 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
9246 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009247 IRTemp vN = newTempV128();
9248 IRTemp vM = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009249 assign(vN, getQReg128(nn));
9250 assign(vM, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +00009251 IRTemp res128 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009252 assign(res128,
9253 binop(mkVecADD(size),
9254 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
9255 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
9256 /* In the half-width case, use CatEL32x4 to extract the half-width
9257 result from the full-width result. */
9258 IRExpr* res
9259 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
9260 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
9261 mkexpr(res128)))
9262 : mkexpr(res128);
9263 putQReg128(dd, res);
9264 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9265 DIP("addp %s.%s, %s.%s, %s.%s\n",
9266 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9267 return True;
9268 }
9269
sewardjdf1628c2014-06-10 22:52:05 +00009270 if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
9271 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9272 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9273 Bool isD = (size & 1) == 1;
9274 Bool isSUB = (size & 2) == 2;
9275 if (bitQ == 0 && isD) return False; // implied 1d case
9276 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
9277 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
9278 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
9279 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +00009280 IRTemp t1 = newTempV128();
9281 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009282 // FIXME: double rounding; use FMA primops instead
9283 assign(t1, triop(opMUL,
9284 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
9285 assign(t2, triop(isSUB ? opSUB : opADD,
9286 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
sewardjdf9d6d52014-06-27 10:43:22 +00009287 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
sewardjdf1628c2014-06-10 22:52:05 +00009288 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9289 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
9290 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9291 return True;
9292 }
9293
9294 if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
9295 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9296 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9297 Bool isD = (size & 1) == 1;
9298 Bool isSUB = (size & 2) == 2;
9299 if (bitQ == 0 && isD) return False; // implied 1d case
9300 const IROp ops[4]
9301 = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
9302 IROp op = ops[size];
9303 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +00009304 IRTemp t1 = newTempV128();
9305 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009306 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009307 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009308 putQReg128(dd, mkexpr(t2));
9309 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9310 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
9311 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9312 return True;
9313 }
9314
9315 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
9316 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9317 Bool isD = (size & 1) == 1;
9318 if (bitQ == 0 && isD) return False; // implied 1d case
9319 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
9320 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
9321 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +00009322 IRTemp t1 = newTempV128();
9323 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009324 // FIXME: use Abd primop instead?
sewardjdf9d6d52014-06-27 10:43:22 +00009325 assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf1628c2014-06-10 22:52:05 +00009326 assign(t2, unop(opABS, mkexpr(t1)));
sewardjdf9d6d52014-06-27 10:43:22 +00009327 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
sewardjdf1628c2014-06-10 22:52:05 +00009328 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9329 DIP("fabd %s.%s, %s.%s, %s.%s\n",
9330 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9331 return True;
9332 }
9333
9334 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
9335 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9336 Bool isD = (size & 1) == 1;
9337 if (bitQ == 0 && isD) return False; // implied 1d case
9338 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +00009339 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009340 assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
9341 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009342 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009343 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9344 DIP("fmul %s.%s, %s.%s, %s.%s\n",
9345 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9346 return True;
9347 }
9348
9349 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
9350 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9351 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9352 Bool isD = (size & 1) == 1;
9353 if (bitQ == 0 && isD) return False; // implied 1d case
9354 Bool isGE = bitU == 1;
9355 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
9356 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
sewardj8e91fd42014-07-11 12:05:47 +00009357 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009358 assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
9359 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009360 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009361 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9362 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
9363 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9364 return True;
9365 }
9366
9367 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
9368 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9369 Bool isD = (size & 1) == 1;
9370 if (bitQ == 0 && isD) return False; // implied 1d case
9371 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
sewardj8e91fd42014-07-11 12:05:47 +00009372 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009373 assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
sewardjdf9d6d52014-06-27 10:43:22 +00009374 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009375 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9376 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
9377 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9378 return True;
9379 }
9380
9381 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
9382 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9383 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9384 Bool isD = (size & 1) == 1;
9385 Bool isGT = (size & 2) == 2;
9386 if (bitQ == 0 && isD) return False; // implied 1d case
9387 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
9388 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
9389 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
sewardj8e91fd42014-07-11 12:05:47 +00009390 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009391 assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
9392 unop(opABS, getQReg128(nn)))); // swapd
sewardjdf9d6d52014-06-27 10:43:22 +00009393 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009394 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9395 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
9396 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9397 return True;
9398 }
9399
9400 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
9401 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
9402 Bool isD = (size & 1) == 1;
9403 if (bitQ == 0 && isD) return False; // implied 1d case
9404 vassert(size <= 1);
9405 const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
9406 IROp op = ops[size];
9407 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +00009408 IRTemp t1 = newTempV128();
9409 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009410 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +00009411 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +00009412 putQReg128(dd, mkexpr(t2));
9413 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9414 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
9415 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
9416 return True;
9417 }
9418
9419 return False;
9420# undef INSN
9421}
9422
9423
9424static
9425Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
9426{
9427 /* 31 30 29 28 23 21 16 11 9 4
9428 0 Q U 01110 size 10000 opcode 10 n d
9429 Decode fields: U,size,opcode
9430 */
9431# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9432 if (INSN(31,31) != 0
9433 || INSN(28,24) != BITS5(0,1,1,1,0)
9434 || INSN(21,17) != BITS5(1,0,0,0,0)
9435 || INSN(11,10) != BITS2(1,0)) {
9436 return False;
9437 }
9438 UInt bitQ = INSN(30,30);
9439 UInt bitU = INSN(29,29);
9440 UInt size = INSN(23,22);
9441 UInt opcode = INSN(16,12);
9442 UInt nn = INSN(9,5);
9443 UInt dd = INSN(4,0);
9444 vassert(size < 4);
9445
sewardjdf9d6d52014-06-27 10:43:22 +00009446 if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
9447 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
9448 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
9449 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
9450 const IROp iops[3] = { Iop_Reverse8sIn64_x2,
9451 Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
9452 vassert(size <= 2);
sewardj8e91fd42014-07-11 12:05:47 +00009453 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +00009454 assign(res, unop(iops[size], getQReg128(nn)));
9455 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9456 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9457 DIP("%s %s.%s, %s.%s\n", "rev64",
9458 nameQReg128(dd), arr, nameQReg128(nn), arr);
9459 return True;
9460 }
9461
9462 if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
9463 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
9464 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
9465 Bool isH = size == X01;
sewardj8e91fd42014-07-11 12:05:47 +00009466 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +00009467 IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
9468 assign(res, unop(iop, getQReg128(nn)));
9469 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9470 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9471 DIP("%s %s.%s, %s.%s\n", "rev32",
9472 nameQReg128(dd), arr, nameQReg128(nn), arr);
9473 return True;
9474 }
9475
sewardj715d1622014-06-26 12:39:05 +00009476 if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
9477 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00009478 IRTemp res = newTempV128();
sewardj715d1622014-06-26 12:39:05 +00009479 assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
9480 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf9d6d52014-06-27 10:43:22 +00009481 const HChar* arr = nameArr_Q_SZ(bitQ, size);
sewardj715d1622014-06-26 12:39:05 +00009482 DIP("%s %s.%s, %s.%s\n", "rev16",
9483 nameQReg128(dd), arr, nameQReg128(nn), arr);
9484 return True;
9485 }
9486
sewardja5a6b752014-06-30 07:33:56 +00009487 if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
9488 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
9489 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
9490 /* -------- 0,xx,00110: SADALP std6_std6 -------- */
9491 /* -------- 1,xx,00110: UADALP std6_std6 -------- */
9492 /* Widens, and size refers to the narrow size. */
9493 if (size == X11) return False; // no 1d or 2d cases
9494 Bool isU = bitU == 1;
9495 Bool isACC = opcode == BITS5(0,0,1,1,0);
sewardj8e91fd42014-07-11 12:05:47 +00009496 IRTemp src = newTempV128();
9497 IRTemp sum = newTempV128();
9498 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00009499 assign(src, getQReg128(nn));
9500 assign(sum,
9501 binop(mkVecADD(size+1),
9502 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
9503 isU, True/*fromOdd*/, size, mkexpr(src))),
9504 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
9505 isU, False/*!fromOdd*/, size, mkexpr(src)))));
9506 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
9507 : mkexpr(sum));
9508 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9509 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9510 const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1);
9511 DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
9512 : (isU ? "uaddlp" : "saddlp"),
9513 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
9514 return True;
9515 }
9516
sewardj2b6fd5e2014-06-19 14:21:37 +00009517 if (opcode == BITS5(0,0,1,0,0)) {
9518 /* -------- 0,xx,00100: CLS std6_std6 -------- */
9519 /* -------- 1,xx,00100: CLZ std6_std6 -------- */
9520 if (size == X11) return False; // no 1d or 2d cases
sewardja8c7b0f2014-06-26 08:18:08 +00009521 const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
9522 const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
sewardj2b6fd5e2014-06-19 14:21:37 +00009523 Bool isCLZ = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +00009524 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00009525 vassert(size <= 2);
9526 assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +00009527 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj2b6fd5e2014-06-19 14:21:37 +00009528 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9529 DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
9530 nameQReg128(dd), arr, nameQReg128(nn), arr);
9531 return True;
9532 }
9533
sewardj787a67f2014-06-23 09:09:41 +00009534 if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
sewardj2b6fd5e2014-06-19 14:21:37 +00009535 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
sewardj787a67f2014-06-23 09:09:41 +00009536 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00009537 IRTemp res = newTempV128();
sewardj787a67f2014-06-23 09:09:41 +00009538 assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +00009539 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj715d1622014-06-26 12:39:05 +00009540 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
sewardj787a67f2014-06-23 09:09:41 +00009541 DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
sewardj2b6fd5e2014-06-19 14:21:37 +00009542 nameQReg128(dd), arr, nameQReg128(nn), arr);
9543 return True;
9544 }
9545
sewardj715d1622014-06-26 12:39:05 +00009546 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
9547 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00009548 IRTemp res = newTempV128();
sewardj715d1622014-06-26 12:39:05 +00009549 assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +00009550 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj715d1622014-06-26 12:39:05 +00009551 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
9552 DIP("%s %s.%s, %s.%s\n", "rbit",
9553 nameQReg128(dd), arr, nameQReg128(nn), arr);
9554 return True;
9555 }
9556
sewardj51d012a2014-07-21 09:19:50 +00009557 if (opcode == BITS5(0,0,1,1,1)) {
sewardj8e91fd42014-07-11 12:05:47 +00009558 /* -------- 0,xx,00111 SQABS std7_std7 -------- */
sewardj51d012a2014-07-21 09:19:50 +00009559 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
sewardj8e91fd42014-07-11 12:05:47 +00009560 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj51d012a2014-07-21 09:19:50 +00009561 Bool isNEG = bitU == 1;
9562 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
9563 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
9564 getQReg128(nn), size );
sewardj8e91fd42014-07-11 12:05:47 +00009565 IRTemp qres = newTempV128(), nres = newTempV128();
sewardj51d012a2014-07-21 09:19:50 +00009566 assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
9567 assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
sewardj8e91fd42014-07-11 12:05:47 +00009568 putQReg128(dd, mkexpr(qres));
9569 updateQCFLAGwithDifference(qres, nres);
9570 const HChar* arr = nameArr_Q_SZ(bitQ, size);
sewardj51d012a2014-07-21 09:19:50 +00009571 DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
sewardj8e91fd42014-07-11 12:05:47 +00009572 nameQReg128(dd), arr, nameQReg128(nn), arr);
9573 return True;
9574 }
9575
sewardjdf1628c2014-06-10 22:52:05 +00009576 if (opcode == BITS5(0,1,0,0,0)) {
9577 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
9578 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
9579 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009580 Bool isGT = bitU == 0;
9581 IRExpr* argL = getQReg128(nn);
9582 IRExpr* argR = mkV128(0x0000);
9583 IRTemp res = newTempV128();
9584 IROp opGTS = mkVecCMPGTS(size);
9585 assign(res, isGT ? binop(opGTS, argL, argR)
9586 : unop(Iop_NotV128, binop(opGTS, argR, argL)));
sewardjdf9d6d52014-06-27 10:43:22 +00009587 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009588 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9589 DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
9590 nameQReg128(dd), arr, nameQReg128(nn), arr);
9591 return True;
9592 }
9593
9594 if (opcode == BITS5(0,1,0,0,1)) {
9595 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
9596 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
9597 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardjdf1628c2014-06-10 22:52:05 +00009598 Bool isEQ = bitU == 0;
9599 IRExpr* argL = getQReg128(nn);
9600 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +00009601 IRTemp res = newTempV128();
9602 assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
sewardjdf1628c2014-06-10 22:52:05 +00009603 : unop(Iop_NotV128,
sewardj8e91fd42014-07-11 12:05:47 +00009604 binop(mkVecCMPGTS(size), argL, argR)));
sewardjdf9d6d52014-06-27 10:43:22 +00009605 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009606 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9607 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
9608 nameQReg128(dd), arr, nameQReg128(nn), arr);
9609 return True;
9610 }
9611
9612 if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
9613 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
9614 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardjdf1628c2014-06-10 22:52:05 +00009615 IRExpr* argL = getQReg128(nn);
9616 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +00009617 IRTemp res = newTempV128();
9618 assign(res, binop(mkVecCMPGTS(size), argR, argL));
sewardjdf9d6d52014-06-27 10:43:22 +00009619 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009620 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9621 DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
9622 nameQReg128(dd), arr, nameQReg128(nn), arr);
9623 return True;
9624 }
9625
sewardj25523c42014-06-15 19:36:29 +00009626 if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
9627 /* -------- 0,xx,01011: ABS std7_std7 -------- */
9628 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009629 IRTemp res = newTempV128();
9630 assign(res, unop(mkVecABS(size), getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +00009631 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj25523c42014-06-15 19:36:29 +00009632 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9633 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
9634 return True;
9635 }
9636
sewardjdf1628c2014-06-10 22:52:05 +00009637 if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
9638 /* -------- 1,xx,01011: NEG std7_std7 -------- */
9639 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +00009640 IRTemp res = newTempV128();
9641 assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +00009642 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009643 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9644 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
9645 return True;
9646 }
9647
9648 if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
9649 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
9650 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
9651 if (bitQ == 0 && size == X11) return False; // implied 1d case
9652 Bool isFNEG = bitU == 1;
9653 IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
9654 : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
sewardj8e91fd42014-07-11 12:05:47 +00009655 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00009656 assign(res, unop(op, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +00009657 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +00009658 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
9659 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
9660 nameQReg128(dd), arr, nameQReg128(nn), arr);
9661 return True;
9662 }
9663
9664 if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
9665 /* -------- 0,xx,10010: XTN{,2} -------- */
sewardjecedd982014-08-11 14:02:47 +00009666 if (size == X11) return False;
9667 vassert(size < 3);
9668 Bool is2 = bitQ == 1;
9669 IROp opN = mkVecNARROWUN(size);
9670 IRTemp resN = newTempV128();
9671 assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
9672 putLO64andZUorPutHI64(is2, dd, resN);
9673 const HChar* nm = "xtn";
9674 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9675 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
9676 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
9677 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
9678 return True;
9679 }
9680
9681 if (opcode == BITS5(1,0,1,0,0)
9682 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
9683 /* -------- 0,xx,10100: SQXTN{,2} -------- */
9684 /* -------- 1,xx,10100: UQXTN{,2} -------- */
9685 /* -------- 1,xx,10010: SQXTUN{,2} -------- */
9686 if (size == X11) return False;
9687 vassert(size < 3);
9688 Bool is2 = bitQ == 1;
9689 IROp opN = Iop_INVALID;
9690 Bool zWiden = True;
9691 const HChar* nm = "??";
9692 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
9693 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
sewardjdf1628c2014-06-10 22:52:05 +00009694 }
sewardjecedd982014-08-11 14:02:47 +00009695 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
9696 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
sewardjdf1628c2014-06-10 22:52:05 +00009697 }
sewardjecedd982014-08-11 14:02:47 +00009698 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9699 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
9700 }
9701 else vassert(0);
9702 IRTemp src = newTempV128();
9703 assign(src, getQReg128(nn));
9704 IRTemp resN = newTempV128();
9705 assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
9706 putLO64andZUorPutHI64(is2, dd, resN);
9707 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
9708 size, mkexpr(resN));
9709 updateQCFLAGwithDifference(src, resW);
9710 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9711 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
9712 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
9713 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
9714 return True;
sewardjdf1628c2014-06-10 22:52:05 +00009715 }
9716
sewardj487559e2014-07-10 14:22:45 +00009717 if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
9718 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
9719 /* Widens, and size is the narrow size. */
9720 if (size == X11) return False;
9721 Bool is2 = bitQ == 1;
9722 IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
9723 IROp opSHL = mkVecSHLN(size+1);
sewardj8e91fd42014-07-11 12:05:47 +00009724 IRTemp src = newTempV128();
9725 IRTemp res = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00009726 assign(src, getQReg128(nn));
9727 assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
9728 mkU8(8 << size)));
9729 putQReg128(dd, mkexpr(res));
9730 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9731 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
9732 DIP("shll%s %s.%s, %s.%s, #%u\n", is2 ? "2" : "",
9733 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
9734 return True;
9735 }
9736
sewardjdf1628c2014-06-10 22:52:05 +00009737 if (bitU == 0 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
9738 /* -------- 0,01,10110: FCVTN 2s/4s_2d -------- */
9739 IRTemp rm = mk_get_IR_rounding_mode();
9740 IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64);
9741 IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64);
9742 putQRegLane(dd, 2 * bitQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo));
9743 putQRegLane(dd, 2 * bitQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi));
9744 if (bitQ == 0) {
9745 putQRegLane(dd, 1, mkU64(0));
9746 }
9747 DIP("fcvtn%s %s.%s, %s.2d\n", bitQ ? "2" : "",
9748 nameQReg128(dd), bitQ ? "4s" : "2s", nameQReg128(nn));
9749 return True;
9750 }
9751
sewardj5747c4a2014-06-11 20:57:23 +00009752 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
9753 /* -------- 0,0x,11101: SCVTF -------- */
9754 /* -------- 1,0x,11101: UCVTF -------- */
9755 /* 31 28 22 21 15 9 4
9756 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
9757 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
9758 with laneage:
9759 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
9760 */
9761 Bool isQ = bitQ == 1;
9762 Bool isU = bitU == 1;
9763 Bool isF64 = (size & 1) == 1;
9764 if (isQ || !isF64) {
9765 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
9766 UInt nLanes = 0;
9767 Bool zeroHI = False;
9768 const HChar* arrSpec = NULL;
9769 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
9770 isQ, isF64 );
9771 IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
9772 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
9773 IRTemp rm = mk_get_IR_rounding_mode();
9774 UInt i;
9775 vassert(ok); /* the 'if' above should ensure this */
9776 for (i = 0; i < nLanes; i++) {
9777 putQRegLane(dd, i,
9778 binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
9779 }
9780 if (zeroHI) {
9781 putQRegLane(dd, 1, mkU64(0));
9782 }
9783 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
9784 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
9785 return True;
9786 }
9787 /* else fall through */
9788 }
9789
sewardjdf1628c2014-06-10 22:52:05 +00009790 return False;
9791# undef INSN
9792}
9793
sewardjfc83d2c2014-06-12 10:15:46 +00009794
sewardjdf1628c2014-06-10 22:52:05 +00009795static
9796Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
9797{
sewardj85fbb022014-06-12 13:16:01 +00009798 /* 31 28 23 21 20 19 15 11 9 4
9799 0 Q U 01111 size L M m opcode H 0 n d
9800 Decode fields are: u,size,opcode
sewardj787a67f2014-06-23 09:09:41 +00009801 M is really part of the mm register number. Individual
9802 cases need to inspect L and H though.
sewardj85fbb022014-06-12 13:16:01 +00009803 */
sewardjdf1628c2014-06-10 22:52:05 +00009804# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj85fbb022014-06-12 13:16:01 +00009805 if (INSN(31,31) != 0
sewardj8e91fd42014-07-11 12:05:47 +00009806 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
sewardj85fbb022014-06-12 13:16:01 +00009807 return False;
9808 }
9809 UInt bitQ = INSN(30,30);
9810 UInt bitU = INSN(29,29);
9811 UInt size = INSN(23,22);
9812 UInt bitL = INSN(21,21);
9813 UInt bitM = INSN(20,20);
9814 UInt mmLO4 = INSN(19,16);
9815 UInt opcode = INSN(15,12);
9816 UInt bitH = INSN(11,11);
9817 UInt nn = INSN(9,5);
9818 UInt dd = INSN(4,0);
sewardj85fbb022014-06-12 13:16:01 +00009819 vassert(size < 4);
sewardj787a67f2014-06-23 09:09:41 +00009820 vassert(bitH < 2 && bitM < 2 && bitL < 2);
sewardj85fbb022014-06-12 13:16:01 +00009821
9822 if (bitU == 0 && size >= X10 && opcode == BITS4(1,0,0,1)) {
9823 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
9824 if (bitQ == 0 && size == X11) return False; // implied 1d case
9825 Bool isD = (size & 1) == 1;
9826 UInt index;
9827 if (!isD) index = (bitH << 1) | bitL;
9828 else if (isD && bitL == 0) index = bitH;
9829 else return False; // sz:L == x11 => unallocated encoding
9830 vassert(index < (isD ? 2 : 4));
9831 IRType ity = isD ? Ity_F64 : Ity_F32;
9832 IRTemp elem = newTemp(ity);
sewardj787a67f2014-06-23 09:09:41 +00009833 UInt mm = (bitM << 4) | mmLO4;
sewardj85fbb022014-06-12 13:16:01 +00009834 assign(elem, getQRegLane(mm, index, ity));
9835 IRTemp dupd = math_DUP_TO_V128(elem, ity);
sewardj8e91fd42014-07-11 12:05:47 +00009836 IRTemp res = newTempV128();
sewardj85fbb022014-06-12 13:16:01 +00009837 assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
9838 mkexpr(mk_get_IR_rounding_mode()),
9839 getQReg128(nn), mkexpr(dupd)));
sewardjdf9d6d52014-06-27 10:43:22 +00009840 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj85fbb022014-06-12 13:16:01 +00009841 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
9842 DIP("fmul %s.%s, %s.%s, %s.%c[%u]\n", nameQReg128(dd), arr,
9843 nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
9844 return True;
9845 }
9846
sewardj787a67f2014-06-23 09:09:41 +00009847 if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
9848 || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
9849 /* -------- 1,xx,0000 MLA s/h variants only -------- */
9850 /* -------- 1,xx,0100 MLS s/h variants only -------- */
9851 /* -------- 0,xx,1000 MUL s/h variants only -------- */
9852 Bool isMLA = opcode == BITS4(0,0,0,0);
9853 Bool isMLS = opcode == BITS4(0,1,0,0);
9854 UInt mm = 32; // invalid
9855 UInt ix = 16; // invalid
9856 switch (size) {
9857 case X00:
9858 return False; // b case is not allowed
9859 case X01:
9860 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
9861 case X10:
9862 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
9863 case X11:
9864 return False; // d case is not allowed
9865 default:
9866 vassert(0);
9867 }
9868 vassert(mm < 32 && ix < 16);
sewardj487559e2014-07-10 14:22:45 +00009869 IROp opMUL = mkVecMUL(size);
9870 IROp opADD = mkVecADD(size);
9871 IROp opSUB = mkVecSUB(size);
sewardj787a67f2014-06-23 09:09:41 +00009872 HChar ch = size == X01 ? 'h' : 's';
sewardj487559e2014-07-10 14:22:45 +00009873 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
sewardj8e91fd42014-07-11 12:05:47 +00009874 IRTemp vecD = newTempV128();
9875 IRTemp vecN = newTempV128();
9876 IRTemp res = newTempV128();
sewardj787a67f2014-06-23 09:09:41 +00009877 assign(vecD, getQReg128(dd));
9878 assign(vecN, getQReg128(nn));
9879 IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
9880 if (isMLA || isMLS) {
9881 assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
9882 } else {
9883 assign(res, prod);
9884 }
sewardjdf9d6d52014-06-27 10:43:22 +00009885 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj787a67f2014-06-23 09:09:41 +00009886 const HChar* arr = nameArr_Q_SZ(bitQ, size);
9887 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
9888 : (isMLS ? "mls" : "mul"),
9889 nameQReg128(dd), arr,
9890 nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
9891 return True;
9892 }
9893
sewardj487559e2014-07-10 14:22:45 +00009894 if (opcode == BITS4(1,0,1,0)
9895 || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
9896 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
9897 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
9898 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
9899 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
9900 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
9901 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
9902 /* Widens, and size refers to the narrowed lanes. */
9903 UInt ks = 3;
9904 switch (opcode) {
9905 case BITS4(1,0,1,0): ks = 0; break;
9906 case BITS4(0,0,1,0): ks = 1; break;
9907 case BITS4(0,1,1,0): ks = 2; break;
9908 default: vassert(0);
9909 }
9910 vassert(ks >= 0 && ks <= 2);
9911 Bool isU = bitU == 1;
9912 Bool is2 = bitQ == 1;
9913 UInt mm = 32; // invalid
9914 UInt ix = 16; // invalid
9915 switch (size) {
9916 case X00:
9917 return False; // h_b_b[] case is not allowed
9918 case X01:
9919 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
9920 case X10:
9921 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
9922 case X11:
9923 return False; // q_d_d[] case is not allowed
9924 default:
9925 vassert(0);
9926 }
9927 vassert(mm < 32 && ix < 16);
sewardj51d012a2014-07-21 09:19:50 +00009928 IRTemp vecN = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00009929 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
sewardj8e91fd42014-07-11 12:05:47 +00009930 IRTemp vecD = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00009931 assign(vecN, getQReg128(nn));
sewardj51d012a2014-07-21 09:19:50 +00009932 assign(vecD, getQReg128(dd));
9933 IRTemp res = IRTemp_INVALID;
9934 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
9935 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
sewardj487559e2014-07-10 14:22:45 +00009936 putQReg128(dd, mkexpr(res));
9937 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
9938 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9939 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
9940 HChar ch = size == X01 ? 'h' : 's';
9941 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
9942 isU ? 'u' : 's', nm, is2 ? "2" : "",
9943 nameQReg128(dd), arrWide,
9944 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
9945 return True;
9946 }
9947
sewardj51d012a2014-07-21 09:19:50 +00009948 if (bitU == 0
9949 && (opcode == BITS4(1,0,1,1)
9950 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
9951 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
9952 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
9953 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
9954 /* Widens, and size refers to the narrowed lanes. */
9955 UInt ks = 3;
9956 switch (opcode) {
9957 case BITS4(1,0,1,1): ks = 0; break;
9958 case BITS4(0,0,1,1): ks = 1; break;
9959 case BITS4(0,1,1,1): ks = 2; break;
9960 default: vassert(0);
9961 }
9962 vassert(ks >= 0 && ks <= 2);
9963 Bool is2 = bitQ == 1;
9964 UInt mm = 32; // invalid
9965 UInt ix = 16; // invalid
9966 switch (size) {
9967 case X00:
9968 return False; // h_b_b[] case is not allowed
9969 case X01:
9970 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
9971 case X10:
9972 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
9973 case X11:
9974 return False; // q_d_d[] case is not allowed
9975 default:
9976 vassert(0);
9977 }
9978 vassert(mm < 32 && ix < 16);
9979 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
9980 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
9981 newTempsV128_2(&vecN, &vecD);
9982 assign(vecN, getQReg128(nn));
9983 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
9984 assign(vecD, getQReg128(dd));
9985 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
9986 is2, size, "mas"[ks],
9987 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
9988 putQReg128(dd, mkexpr(res));
9989 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
9990 updateQCFLAGwithDifference(sat1q, sat1n);
9991 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
9992 updateQCFLAGwithDifference(sat2q, sat2n);
9993 }
sewardj54ffa1d2014-07-22 09:27:49 +00009994 const HChar* nm = ks == 0 ? "sqdmull"
sewardj51d012a2014-07-21 09:19:50 +00009995 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
9996 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
9997 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
9998 HChar ch = size == X01 ? 'h' : 's';
9999 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
10000 nm, is2 ? "2" : "",
10001 nameQReg128(dd), arrWide,
10002 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
10003 return True;
10004 }
10005
sewardj257e99f2014-08-03 12:45:19 +000010006 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
10007 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
10008 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
10009 UInt mm = 32; // invalid
10010 UInt ix = 16; // invalid
10011 switch (size) {
10012 case X00:
10013 return False; // b case is not allowed
10014 case X01:
10015 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10016 case X10:
10017 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10018 case X11:
10019 return False; // q case is not allowed
10020 default:
10021 vassert(0);
10022 }
10023 vassert(mm < 32 && ix < 16);
10024 Bool isR = opcode == BITS4(1,1,0,1);
10025 IRTemp res, sat1q, sat1n, vN, vM;
10026 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10027 vN = newTempV128();
10028 assign(vN, getQReg128(nn));
10029 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10030 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10031 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10032 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
10033 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10034 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
10035 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10036 HChar ch = size == X01 ? 'h' : 's';
10037 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
10038 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
10039 return True;
10040 }
10041
sewardjdf1628c2014-06-10 22:52:05 +000010042 return False;
10043# undef INSN
10044}
10045
sewardjfc83d2c2014-06-12 10:15:46 +000010046
sewardjdf1628c2014-06-10 22:52:05 +000010047static
10048Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
10049{
10050# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10051 return False;
10052# undef INSN
10053}
10054
sewardjfc83d2c2014-06-12 10:15:46 +000010055
sewardjdf1628c2014-06-10 22:52:05 +000010056static
10057Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
10058{
10059# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10060 return False;
10061# undef INSN
10062}
10063
sewardjfc83d2c2014-06-12 10:15:46 +000010064
sewardjdf1628c2014-06-10 22:52:05 +000010065static
10066Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
10067{
10068# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10069 return False;
10070# undef INSN
10071}
10072
sewardj5747c4a2014-06-11 20:57:23 +000010073
sewardjdf1628c2014-06-10 22:52:05 +000010074static
10075Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
10076{
sewardj5747c4a2014-06-11 20:57:23 +000010077 /* 31 28 23 21 20 15 13 9 4
10078 000 11110 ty 1 m op 1000 n opcode2
10079 The first 3 bits are really "M 0 S", but M and S are always zero.
10080 Decode fields are: ty,op,opcode2
10081 */
sewardjdf1628c2014-06-10 22:52:05 +000010082# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000010083 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
10084 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
10085 return False;
10086 }
10087 UInt ty = INSN(23,22);
10088 UInt mm = INSN(20,16);
10089 UInt op = INSN(15,14);
10090 UInt nn = INSN(9,5);
10091 UInt opcode2 = INSN(4,0);
10092 vassert(ty < 4);
10093
10094 if (ty <= X01 && op == X00
10095 && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
10096 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */
10097 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
10098 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
10099 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
10100 /* 31 23 20 15 9 4
10101 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
10102 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
10103 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
10104 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
10105
10106 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
10107 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
10108 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
10109 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
10110
10111 FCMPE generates Invalid Operation exn if either arg is any kind
10112 of NaN. FCMP generates Invalid Operation exn if either arg is a
10113 signalling NaN. We ignore this detail here and produce the same
10114 IR for both.
10115 */
10116 Bool isD = (ty & 1) == 1;
10117 Bool isCMPE = (opcode2 & 16) == 16;
10118 Bool cmpZero = (opcode2 & 8) == 8;
10119 IRType ity = isD ? Ity_F64 : Ity_F32;
10120 Bool valid = True;
10121 if (cmpZero && mm != 0) valid = False;
10122 if (valid) {
10123 IRTemp argL = newTemp(ity);
10124 IRTemp argR = newTemp(ity);
10125 IRTemp irRes = newTemp(Ity_I32);
10126 assign(argL, getQRegLO(nn, ity));
10127 assign(argR,
10128 cmpZero
10129 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
10130 : getQRegLO(mm, ity));
10131 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
10132 mkexpr(argL), mkexpr(argR)));
10133 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
10134 IRTemp nzcv_28x0 = newTemp(Ity_I64);
10135 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
10136 setFlags_COPY(nzcv_28x0);
10137 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
10138 cmpZero ? "#0.0" : nameQRegLO(mm, ity));
10139 return True;
10140 }
10141 return False;
10142 }
10143
sewardjdf1628c2014-06-10 22:52:05 +000010144 return False;
10145# undef INSN
10146}
10147
sewardj5747c4a2014-06-11 20:57:23 +000010148
sewardjdf1628c2014-06-10 22:52:05 +000010149static
10150Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn)
10151{
10152# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10153 return False;
10154# undef INSN
10155}
10156
sewardjfc83d2c2014-06-12 10:15:46 +000010157
sewardjdf1628c2014-06-10 22:52:05 +000010158static
10159Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
10160{
10161# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10162 return False;
10163# undef INSN
10164}
10165
sewardj5747c4a2014-06-11 20:57:23 +000010166
sewardjdf1628c2014-06-10 22:52:05 +000010167static
10168Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
10169{
10170 /* 31 28 23 21 20 14 9 4
10171 000 11110 ty 1 opcode 10000 n d
10172 The first 3 bits are really "M 0 S", but M and S are always zero.
sewardj5747c4a2014-06-11 20:57:23 +000010173 Decode fields: ty,opcode
sewardjdf1628c2014-06-10 22:52:05 +000010174 */
10175# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10176 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
10177 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
10178 return False;
10179 }
10180 UInt ty = INSN(23,22);
10181 UInt opcode = INSN(20,15);
10182 UInt nn = INSN(9,5);
10183 UInt dd = INSN(4,0);
10184
10185 if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
10186 /* -------- 0x,000000: FMOV d_d, s_s -------- */
10187 /* -------- 0x,000001: FABS d_d, s_s -------- */
10188 /* -------- 0x,000010: FNEG d_d, s_s -------- */
10189 /* -------- 0x,000011: FSQRT d_d, s_s -------- */
10190 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
10191 IRTemp src = newTemp(ity);
10192 IRTemp res = newTemp(ity);
10193 const HChar* nm = "??";
10194 assign(src, getQRegLO(nn, ity));
10195 switch (opcode) {
10196 case BITS6(0,0,0,0,0,0):
10197 nm = "fmov"; assign(res, mkexpr(src)); break;
10198 case BITS6(0,0,0,0,0,1):
10199 nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
10200 case BITS6(0,0,0,0,1,0):
10201 nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
10202 case BITS6(0,0,0,0,1,1):
10203 nm = "fsqrt";
10204 assign(res, binop(mkSQRTF(ity),
10205 mkexpr(mk_get_IR_rounding_mode()),
10206 mkexpr(src))); break;
10207 default:
10208 vassert(0);
10209 }
10210 putQReg128(dd, mkV128(0x0000));
10211 putQRegLO(dd, mkexpr(res));
10212 DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
10213 return True;
10214 }
10215
sewardj5747c4a2014-06-11 20:57:23 +000010216 if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
10217 || opcode == BITS6(0,0,0,1,0,1)))
10218 || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
10219 || opcode == BITS6(0,0,0,1,0,1)))
10220 || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
10221 || opcode == BITS6(0,0,0,1,0,0)))) {
10222 /* -------- 11,000100: FCVT s_h -------- */
10223 /* -------- 11,000101: FCVT d_h -------- */
10224 /* -------- 00,000111: FCVT h_s -------- */
10225 /* -------- 00,000101: FCVT d_s -------- */
10226 /* -------- 01,000111: FCVT h_d -------- */
10227 /* -------- 01,000100: FCVT s_d -------- */
10228 /* 31 23 21 16 14 9 4
10229 000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp)
10230 --------- 11 ----- 01 --------- FCVT Dd, Hn (unimp)
10231 --------- 00 ----- 11 --------- FCVT Hd, Sn (unimp)
10232 --------- 00 ----- 01 --------- FCVT Dd, Sn
10233 --------- 01 ----- 11 --------- FCVT Hd, Dn (unimp)
10234 --------- 01 ----- 00 --------- FCVT Sd, Dn
10235 Rounding, when dst is smaller than src, is per the FPCR.
10236 */
10237 UInt b2322 = ty;
10238 UInt b1615 = opcode & BITS2(1,1);
10239 if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) {
10240 /* Convert S to D */
10241 IRTemp res = newTemp(Ity_F64);
10242 assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32)));
10243 putQReg128(dd, mkV128(0x0000));
10244 putQRegLO(dd, mkexpr(res));
10245 DIP("fcvt %s, %s\n",
10246 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32));
10247 return True;
10248 }
10249 if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) {
10250 /* Convert D to S */
10251 IRTemp res = newTemp(Ity_F32);
10252 assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()),
10253 getQRegLO(nn, Ity_F64)));
10254 putQReg128(dd, mkV128(0x0000));
10255 putQRegLO(dd, mkexpr(res));
10256 DIP("fcvt %s, %s\n",
10257 nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64));
10258 return True;
10259 }
10260 /* else unhandled */
10261 return False;
10262 }
10263
10264 if (ty <= X01
10265 && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
10266 && opcode != BITS6(0,0,1,1,0,1)) {
10267 /* -------- 0x,001000 FRINTN d_d, s_s -------- */
10268 /* -------- 0x,001001 FRINTP d_d, s_s -------- */
10269 /* -------- 0x,001010 FRINTM d_d, s_s -------- */
10270 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
10271 /* -------- 0x,001100 FRINTA d_d, s_s -------- */
10272 /* -------- 0x,001110 FRINTX d_d, s_s -------- */
10273 /* -------- 0x,001111 FRINTI d_d, s_s -------- */
10274 /* 31 23 21 17 14 9 4
10275 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
10276 rm
10277 x==0 => S-registers, x==1 => D-registers
10278 rm (17:15) encodings:
10279 111 per FPCR (FRINTI)
10280 001 +inf (FRINTP)
10281 010 -inf (FRINTM)
10282 011 zero (FRINTZ)
10283 000 tieeven
10284 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
10285 110 per FPCR + "exact = TRUE"
10286 101 unallocated
10287 */
10288 Bool isD = (ty & 1) == 1;
10289 UInt rm = opcode & BITS6(0,0,0,1,1,1);
10290 IRType ity = isD ? Ity_F64 : Ity_F32;
10291 IRExpr* irrmE = NULL;
10292 UChar ch = '?';
10293 switch (rm) {
10294 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
10295 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
10296 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
10297 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
10298 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
10299 default: break;
10300 }
10301 if (irrmE) {
10302 IRTemp src = newTemp(ity);
10303 IRTemp dst = newTemp(ity);
10304 assign(src, getQRegLO(nn, ity));
10305 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
10306 irrmE, mkexpr(src)));
10307 putQReg128(dd, mkV128(0x0000));
10308 putQRegLO(dd, mkexpr(dst));
10309 DIP("frint%c %s, %s\n",
10310 ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
10311 return True;
10312 }
10313 return False;
10314 }
10315
sewardjdf1628c2014-06-10 22:52:05 +000010316 return False;
10317# undef INSN
10318}
10319
10320
10321static
10322Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn)
10323{
10324 /* 31 28 23 21 20 15 11 9 4
10325 000 11110 ty 1 m opcode 10 n d
10326 The first 3 bits are really "M 0 S", but M and S are always zero.
10327 */
10328# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10329 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
10330 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
10331 return False;
10332 }
10333 UInt ty = INSN(23,22);
10334 UInt mm = INSN(20,16);
10335 UInt opcode = INSN(15,12);
10336 UInt nn = INSN(9,5);
10337 UInt dd = INSN(4,0);
10338
10339 if (ty <= X01 && opcode <= BITS4(0,0,1,1)) {
10340 /* ------- 0x,0000: FMUL d_d, s_s ------- */
10341 /* ------- 0x,0001: FDIV d_d, s_s ------- */
10342 /* ------- 0x,0010: FADD d_d, s_s ------- */
10343 /* ------- 0x,0011: FSUB d_d, s_s ------- */
10344 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
10345 IROp iop = Iop_INVALID;
10346 const HChar* nm = "???";
10347 switch (opcode) {
10348 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
10349 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
10350 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
10351 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
10352 default: vassert(0);
10353 }
10354 IRExpr* resE = triop(iop, mkexpr(mk_get_IR_rounding_mode()),
10355 getQRegLO(nn, ity), getQRegLO(mm, ity));
10356 IRTemp res = newTemp(ity);
10357 assign(res, resE);
10358 putQReg128(dd, mkV128(0));
10359 putQRegLO(dd, mkexpr(res));
10360 DIP("%s %s, %s, %s\n",
10361 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10362 return True;
10363 }
10364
10365 if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
10366 /* ------- 0x,1000: FNMUL d_d, s_s ------- */
10367 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
10368 IROp iop = mkMULF(ity);
10369 IROp iopn = mkNEGF(ity);
10370 const HChar* nm = "fnmul";
10371 IRExpr* resE = unop(iopn,
10372 triop(iop, mkexpr(mk_get_IR_rounding_mode()),
10373 getQRegLO(nn, ity), getQRegLO(mm, ity)));
10374 IRTemp res = newTemp(ity);
10375 assign(res, resE);
10376 putQReg128(dd, mkV128(0));
10377 putQRegLO(dd, mkexpr(res));
10378 DIP("%s %s, %s, %s\n",
10379 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
10380 return True;
10381 }
10382
sewardjdf1628c2014-06-10 22:52:05 +000010383 return False;
10384# undef INSN
10385}
10386
10387
10388static
10389Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
10390{
sewardj5747c4a2014-06-11 20:57:23 +000010391 /* 31 28 23 21 20 15 14 9 4
10392 000 11111 ty o1 m o0 a n d
10393 The first 3 bits are really "M 0 S", but M and S are always zero.
10394 Decode fields: ty,o1,o0
10395 */
sewardjdf1628c2014-06-10 22:52:05 +000010396# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000010397 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
10398 return False;
10399 }
10400 UInt ty = INSN(23,22);
10401 UInt bitO1 = INSN(21,21);
10402 UInt mm = INSN(20,16);
10403 UInt bitO0 = INSN(15,15);
10404 UInt aa = INSN(14,10);
10405 UInt nn = INSN(9,5);
10406 UInt dd = INSN(4,0);
10407 vassert(ty < 4);
10408
10409 if (ty <= X01) {
10410 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
10411 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
10412 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
10413 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
10414 /* -------------------- F{N}M{ADD,SUB} -------------------- */
10415 /* 31 22 20 15 14 9 4 ix
10416 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
10417 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
10418 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
10419 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
10420 where Fx=Dx when sz=1, Fx=Sx when sz=0
10421
10422 -----SPEC------ ----IMPL----
10423 fmadd a + n * m a + n * m
10424 fmsub a + (-n) * m a - n * m
10425 fnmadd (-a) + (-n) * m -(a + n * m)
10426 fnmsub (-a) + n * m -(a - n * m)
10427 */
10428 Bool isD = (ty & 1) == 1;
10429 UInt ix = (bitO1 << 1) | bitO0;
10430 IRType ity = isD ? Ity_F64 : Ity_F32;
10431 IROp opADD = mkADDF(ity);
10432 IROp opSUB = mkSUBF(ity);
10433 IROp opMUL = mkMULF(ity);
10434 IROp opNEG = mkNEGF(ity);
10435 IRTemp res = newTemp(ity);
10436 IRExpr* eA = getQRegLO(aa, ity);
10437 IRExpr* eN = getQRegLO(nn, ity);
10438 IRExpr* eM = getQRegLO(mm, ity);
10439 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
10440 IRExpr* eNxM = triop(opMUL, rm, eN, eM);
10441 switch (ix) {
10442 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
10443 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
10444 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
10445 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
10446 default: vassert(0);
10447 }
10448 putQReg128(dd, mkV128(0x0000));
10449 putQRegLO(dd, mkexpr(res));
10450 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
10451 DIP("%s %s, %s, %s, %s\n",
10452 names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
10453 nameQRegLO(mm, ity), nameQRegLO(aa, ity));
10454 return True;
10455 }
10456
sewardjdf1628c2014-06-10 22:52:05 +000010457 return False;
10458# undef INSN
10459}
10460
10461
10462static
10463Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
10464{
10465 /* 31 28 23 21 20 12 9 4
10466 000 11110 ty 1 imm8 100 imm5 d
10467 The first 3 bits are really "M 0 S", but M and S are always zero.
10468 */
10469# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10470 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
10471 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
10472 return False;
10473 }
10474 UInt ty = INSN(23,22);
10475 UInt imm8 = INSN(20,13);
10476 UInt imm5 = INSN(9,5);
10477 UInt dd = INSN(4,0);
10478
10479 /* ------- 00,00000: FMOV s_imm ------- */
10480 /* ------- 01,00000: FMOV d_imm ------- */
10481 if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
10482 Bool isD = (ty & 1) == 1;
10483 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
10484 if (!isD) {
10485 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
10486 }
10487 putQReg128(dd, mkV128(0));
10488 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
10489 DIP("fmov %s, #0x%llx\n",
10490 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
10491 return True;
10492 }
10493
10494 return False;
10495# undef INSN
10496}
10497
10498
10499static
10500Bool dis_AdvSIMD_fp_to_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
10501{
10502# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10503 return False;
10504# undef INSN
10505}
10506
10507
10508static
sewardj5747c4a2014-06-11 20:57:23 +000010509Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
sewardjdf1628c2014-06-10 22:52:05 +000010510{
10511 /* 31 30 29 28 23 21 20 18 15 9 4
sewardj5747c4a2014-06-11 20:57:23 +000010512 sf 0 0 11110 type 1 rmode opcode 000000 n d
10513 The first 3 bits are really "sf 0 S", but S is always zero.
sewardjdf1628c2014-06-10 22:52:05 +000010514 */
10515# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000010516 if (INSN(30,29) != BITS2(0,0)
sewardjdf1628c2014-06-10 22:52:05 +000010517 || INSN(28,24) != BITS5(1,1,1,1,0)
10518 || INSN(21,21) != 1
10519 || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
10520 return False;
10521 }
10522 UInt bitSF = INSN(31,31);
sewardjdf1628c2014-06-10 22:52:05 +000010523 UInt ty = INSN(23,22); // type
10524 UInt rm = INSN(20,19); // rmode
10525 UInt op = INSN(18,16); // opcode
10526 UInt nn = INSN(9,5);
10527 UInt dd = INSN(4,0);
10528
sewardj5747c4a2014-06-11 20:57:23 +000010529 // op = 000, 001
10530 /* -------- FCVT{N,P,M,Z}{S,U} (scalar, integer) -------- */
10531 /* 30 23 20 18 15 9 4
10532 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
10533 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
10534 ---------------- 01 -------------- FCVTP-------- (round to +inf)
10535 ---------------- 10 -------------- FCVTM-------- (round to -inf)
10536 ---------------- 11 -------------- FCVTZ-------- (round to zero)
10537
10538 Rd is Xd when sf==1, Wd when sf==0
10539 Fn is Dn when x==1, Sn when x==0
10540 20:19 carry the rounding mode, using the same encoding as FPCR
10541 */
10542 if (ty <= X01 && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) {
10543 Bool isI64 = bitSF == 1;
10544 Bool isF64 = (ty & 1) == 1;
10545 Bool isU = (op & 1) == 1;
10546 /* Decide on the IR rounding mode to use. */
10547 IRRoundingMode irrm = 8; /*impossible*/
10548 HChar ch = '?';
10549 switch (rm) {
10550 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
10551 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
10552 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
10553 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
10554 default: vassert(0);
10555 }
10556 vassert(irrm != 8);
10557 /* Decide on the conversion primop, based on the source size,
10558 dest size and signedness (8 possibilities). Case coding:
10559 F32 ->s I32 0
10560 F32 ->u I32 1
10561 F32 ->s I64 2
10562 F32 ->u I64 3
10563 F64 ->s I32 4
10564 F64 ->u I32 5
10565 F64 ->s I64 6
10566 F64 ->u I64 7
10567 */
10568 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
10569 vassert(ix < 8);
10570 const IROp iops[8]
10571 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
10572 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
10573 IROp iop = iops[ix];
10574 // A bit of ATCery: bounce all cases we haven't seen an example of.
10575 if (/* F32toI32S */
10576 (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
10577 || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
10578 || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
10579 /* F32toI32U */
10580 || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
10581 || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
10582 /* F32toI64S */
10583 || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
10584 /* F32toI64U */
10585 || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
10586 /* F64toI32S */
10587 || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
10588 || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
10589 || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
10590 /* F64toI32U */
10591 || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
10592 || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
10593 || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
10594 /* F64toI64S */
10595 || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
10596 || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
10597 || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
10598 /* F64toI64U */
10599 || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
10600 || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
10601 ) {
10602 /* validated */
10603 } else {
10604 return False;
10605 }
10606 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
10607 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
10608 IRTemp src = newTemp(srcTy);
10609 IRTemp dst = newTemp(dstTy);
10610 assign(src, getQRegLO(nn, srcTy));
10611 assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
10612 putIRegOrZR(isI64, dd, mkexpr(dst));
10613 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
10614 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
10615 return True;
10616 }
10617
10618 // op = 010, 011
sewardjdf1628c2014-06-10 22:52:05 +000010619 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
10620 /* (ix) sf S 28 ty rm op 15 9 4
10621 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
10622 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
10623 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
10624 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
10625
10626 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
10627 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
10628 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
10629 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
10630
10631 These are signed/unsigned conversion from integer registers to
10632 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
10633 */
sewardj5747c4a2014-06-11 20:57:23 +000010634 if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
sewardjdf1628c2014-06-10 22:52:05 +000010635 Bool isI64 = bitSF == 1;
10636 Bool isF64 = (ty & 1) == 1;
10637 Bool isU = (op & 1) == 1;
10638 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
10639 const IROp ops[8]
10640 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
10641 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
10642 IRExpr* src = getIRegOrZR(isI64, nn);
10643 IRExpr* res = (isF64 && !isI64)
10644 ? unop(ops[ix], src)
10645 : binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), src);
10646 putQReg128(dd, mkV128(0));
10647 putQRegLO(dd, res);
10648 DIP("%ccvtf %s, %s\n",
10649 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
10650 nameIRegOrZR(isI64, nn));
10651 return True;
10652 }
10653
sewardj5747c4a2014-06-11 20:57:23 +000010654 // op = 110, 111
sewardjdf1628c2014-06-10 22:52:05 +000010655 /* -------- FMOV (general) -------- */
10656 /* case sf S ty rm op 15 9 4
10657 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
10658 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
10659 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
10660
10661 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
10662 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
10663 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
10664 */
sewardj5747c4a2014-06-11 20:57:23 +000010665 if (1) {
sewardjbbcf1882014-01-12 12:49:10 +000010666 UInt ix = 0; // case
sewardjdf1628c2014-06-10 22:52:05 +000010667 if (bitSF == 0) {
sewardjbbcf1882014-01-12 12:49:10 +000010668 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
10669 ix = 1;
10670 else
10671 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
10672 ix = 4;
10673 } else {
sewardjdf1628c2014-06-10 22:52:05 +000010674 vassert(bitSF == 1);
sewardjbbcf1882014-01-12 12:49:10 +000010675 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
10676 ix = 2;
10677 else
10678 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
10679 ix = 5;
10680 else
10681 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
10682 ix = 3;
10683 else
10684 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
10685 ix = 6;
10686 }
10687 if (ix > 0) {
10688 switch (ix) {
10689 case 1:
10690 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +000010691 putQRegLO(dd, getIReg32orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000010692 DIP("fmov s%u, w%u\n", dd, nn);
10693 break;
10694 case 2:
10695 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +000010696 putQRegLO(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000010697 DIP("fmov d%u, x%u\n", dd, nn);
10698 break;
10699 case 3:
sewardj606c4ba2014-01-26 19:11:14 +000010700 putQRegHI64(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000010701 DIP("fmov v%u.d[1], x%u\n", dd, nn);
10702 break;
10703 case 4:
sewardj606c4ba2014-01-26 19:11:14 +000010704 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
sewardjbbcf1882014-01-12 12:49:10 +000010705 DIP("fmov w%u, s%u\n", dd, nn);
10706 break;
10707 case 5:
sewardj606c4ba2014-01-26 19:11:14 +000010708 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
sewardjbbcf1882014-01-12 12:49:10 +000010709 DIP("fmov x%u, d%u\n", dd, nn);
10710 break;
10711 case 6:
sewardj606c4ba2014-01-26 19:11:14 +000010712 putIReg64orZR(dd, getQRegHI64(nn));
sewardjbbcf1882014-01-12 12:49:10 +000010713 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
10714 break;
10715 default:
10716 vassert(0);
10717 }
10718 return True;
10719 }
10720 /* undecodable; fall through */
10721 }
10722
sewardjdf1628c2014-06-10 22:52:05 +000010723 return False;
10724# undef INSN
10725}
10726
10727
10728static
10729Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
10730{
10731 Bool ok;
10732 ok = dis_AdvSIMD_EXT(dres, insn);
10733 if (UNLIKELY(ok)) return True;
10734 ok = dis_AdvSIMD_TBL_TBX(dres, insn);
10735 if (UNLIKELY(ok)) return True;
10736 ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
10737 if (UNLIKELY(ok)) return True;
10738 ok = dis_AdvSIMD_across_lanes(dres, insn);
10739 if (UNLIKELY(ok)) return True;
10740 ok = dis_AdvSIMD_copy(dres, insn);
10741 if (UNLIKELY(ok)) return True;
10742 ok = dis_AdvSIMD_modified_immediate(dres, insn);
10743 if (UNLIKELY(ok)) return True;
10744 ok = dis_AdvSIMD_scalar_copy(dres, insn);
10745 if (UNLIKELY(ok)) return True;
10746 ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
10747 if (UNLIKELY(ok)) return True;
10748 ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
10749 if (UNLIKELY(ok)) return True;
10750 ok = dis_AdvSIMD_scalar_three_different(dres, insn);
10751 if (UNLIKELY(ok)) return True;
10752 ok = dis_AdvSIMD_scalar_three_same(dres, insn);
10753 if (UNLIKELY(ok)) return True;
10754 ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
10755 if (UNLIKELY(ok)) return True;
10756 ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
10757 if (UNLIKELY(ok)) return True;
10758 ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
10759 if (UNLIKELY(ok)) return True;
10760 ok = dis_AdvSIMD_three_different(dres, insn);
10761 if (UNLIKELY(ok)) return True;
10762 ok = dis_AdvSIMD_three_same(dres, insn);
10763 if (UNLIKELY(ok)) return True;
10764 ok = dis_AdvSIMD_two_reg_misc(dres, insn);
10765 if (UNLIKELY(ok)) return True;
10766 ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
10767 if (UNLIKELY(ok)) return True;
10768 ok = dis_AdvSIMD_crypto_aes(dres, insn);
10769 if (UNLIKELY(ok)) return True;
10770 ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
10771 if (UNLIKELY(ok)) return True;
10772 ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
10773 if (UNLIKELY(ok)) return True;
10774 ok = dis_AdvSIMD_fp_compare(dres, insn);
10775 if (UNLIKELY(ok)) return True;
10776 ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
10777 if (UNLIKELY(ok)) return True;
10778 ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
10779 if (UNLIKELY(ok)) return True;
10780 ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
10781 if (UNLIKELY(ok)) return True;
10782 ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
10783 if (UNLIKELY(ok)) return True;
10784 ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
10785 if (UNLIKELY(ok)) return True;
10786 ok = dis_AdvSIMD_fp_immediate(dres, insn);
10787 if (UNLIKELY(ok)) return True;
10788 ok = dis_AdvSIMD_fp_to_fixedp_conv(dres, insn);
10789 if (UNLIKELY(ok)) return True;
sewardj5747c4a2014-06-11 20:57:23 +000010790 ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
sewardjdf1628c2014-06-10 22:52:05 +000010791 if (UNLIKELY(ok)) return True;
10792 return False;
10793}
10794
sewardjbbcf1882014-01-12 12:49:10 +000010795
10796/*------------------------------------------------------------*/
10797/*--- Disassemble a single ARM64 instruction ---*/
10798/*------------------------------------------------------------*/
10799
10800/* Disassemble a single ARM64 instruction into IR. The instruction
10801 has is located at |guest_instr| and has guest IP of
10802 |guest_PC_curr_instr|, which will have been set before the call
10803 here. Returns True iff the instruction was decoded, in which case
10804 *dres will be set accordingly, or False, in which case *dres should
10805 be ignored by the caller. */
10806
10807static
10808Bool disInstr_ARM64_WRK (
10809 /*MB_OUT*/DisResult* dres,
10810 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
10811 Bool resteerCisOk,
10812 void* callback_opaque,
10813 UChar* guest_instr,
10814 VexArchInfo* archinfo,
10815 VexAbiInfo* abiinfo
10816 )
10817{
10818 // A macro to fish bits out of 'insn'.
10819# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10820
10821//ZZ DisResult dres;
10822//ZZ UInt insn;
10823//ZZ //Bool allow_VFP = False;
10824//ZZ //UInt hwcaps = archinfo->hwcaps;
10825//ZZ IRTemp condT; /* :: Ity_I32 */
10826//ZZ UInt summary;
10827//ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
10828//ZZ
10829//ZZ /* What insn variants are we supporting today? */
10830//ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
10831//ZZ // etc etc
10832
10833 /* Set result defaults. */
10834 dres->whatNext = Dis_Continue;
10835 dres->len = 4;
10836 dres->continueAt = 0;
10837 dres->jk_StopHere = Ijk_INVALID;
10838
10839 /* At least this is simple on ARM64: insns are all 4 bytes long, and
10840 4-aligned. So just fish the whole thing out of memory right now
10841 and have done. */
10842 UInt insn = getUIntLittleEndianly( guest_instr );
10843
10844 if (0) vex_printf("insn: 0x%x\n", insn);
10845
10846 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
10847
10848 vassert(0 == (guest_PC_curr_instr & 3ULL));
10849
10850 /* ----------------------------------------------------------- */
10851
10852 /* Spot "Special" instructions (see comment at top of file). */
10853 {
10854 UChar* code = (UChar*)guest_instr;
10855 /* Spot the 16-byte preamble:
10856 93CC0D8C ror x12, x12, #3
10857 93CC358C ror x12, x12, #13
10858 93CCCD8C ror x12, x12, #51
10859 93CCF58C ror x12, x12, #61
10860 */
10861 UInt word1 = 0x93CC0D8C;
10862 UInt word2 = 0x93CC358C;
10863 UInt word3 = 0x93CCCD8C;
10864 UInt word4 = 0x93CCF58C;
10865 if (getUIntLittleEndianly(code+ 0) == word1 &&
10866 getUIntLittleEndianly(code+ 4) == word2 &&
10867 getUIntLittleEndianly(code+ 8) == word3 &&
10868 getUIntLittleEndianly(code+12) == word4) {
10869 /* Got a "Special" instruction preamble. Which one is it? */
10870 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
10871 /* orr x10,x10,x10 */) {
10872 /* X3 = client_request ( X4 ) */
10873 DIP("x3 = client_request ( x4 )\n");
10874 putPC(mkU64( guest_PC_curr_instr + 20 ));
10875 dres->jk_StopHere = Ijk_ClientReq;
10876 dres->whatNext = Dis_StopHere;
10877 return True;
10878 }
10879 else
10880 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
10881 /* orr x11,x11,x11 */) {
10882 /* X3 = guest_NRADDR */
10883 DIP("x3 = guest_NRADDR\n");
10884 dres->len = 20;
10885 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
10886 return True;
10887 }
10888 else
10889 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
10890 /* orr x12,x12,x12 */) {
10891 /* branch-and-link-to-noredir X8 */
10892 DIP("branch-and-link-to-noredir x8\n");
10893 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
10894 putPC(getIReg64orZR(8));
10895 dres->jk_StopHere = Ijk_NoRedir;
10896 dres->whatNext = Dis_StopHere;
10897 return True;
10898 }
10899 else
10900 if (getUIntLittleEndianly(code+16) == 0xAA090129
10901 /* orr x9,x9,x9 */) {
10902 /* IR injection */
10903 DIP("IR injection\n");
10904 vex_inject_ir(irsb, Iend_LE);
10905 // Invalidate the current insn. The reason is that the IRop we're
10906 // injecting here can change. In which case the translation has to
10907 // be redone. For ease of handling, we simply invalidate all the
10908 // time.
sewardj05f5e012014-05-04 10:52:11 +000010909 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
10910 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
sewardjbbcf1882014-01-12 12:49:10 +000010911 putPC(mkU64( guest_PC_curr_instr + 20 ));
10912 dres->whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +000010913 dres->jk_StopHere = Ijk_InvalICache;
sewardjbbcf1882014-01-12 12:49:10 +000010914 return True;
10915 }
10916 /* We don't know what it is. */
10917 return False;
10918 /*NOTREACHED*/
10919 }
10920 }
10921
10922 /* ----------------------------------------------------------- */
10923
10924 /* Main ARM64 instruction decoder starts here. */
10925
10926 Bool ok = False;
10927
10928 /* insn[28:25] determines the top-level grouping, so let's start
10929 off with that.
10930
10931 For all of these dis_ARM64_ functions, we pass *dres with the
10932 normal default results "insn OK, 4 bytes long, keep decoding" so
10933 they don't need to change it. However, decodes of control-flow
10934 insns may cause *dres to change.
10935 */
10936 switch (INSN(28,25)) {
10937 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
10938 // Data processing - immediate
10939 ok = dis_ARM64_data_processing_immediate(dres, insn);
10940 break;
10941 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
10942 // Branch, exception generation and system instructions
sewardj65902992014-05-03 21:20:56 +000010943 ok = dis_ARM64_branch_etc(dres, insn, archinfo);
sewardjbbcf1882014-01-12 12:49:10 +000010944 break;
10945 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
10946 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
10947 // Loads and stores
10948 ok = dis_ARM64_load_store(dres, insn);
10949 break;
10950 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
10951 // Data processing - register
10952 ok = dis_ARM64_data_processing_register(dres, insn);
10953 break;
10954 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
10955 // Data processing - SIMD and floating point
10956 ok = dis_ARM64_simd_and_fp(dres, insn);
10957 break;
10958 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
10959 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
10960 // UNALLOCATED
10961 break;
10962 default:
10963 vassert(0); /* Can't happen */
10964 }
10965
10966 /* If the next-level down decoders failed, make sure |dres| didn't
10967 get changed. */
10968 if (!ok) {
10969 vassert(dres->whatNext == Dis_Continue);
10970 vassert(dres->len == 4);
10971 vassert(dres->continueAt == 0);
10972 vassert(dres->jk_StopHere == Ijk_INVALID);
10973 }
10974
10975 return ok;
10976
10977# undef INSN
10978}
10979
10980
10981/*------------------------------------------------------------*/
10982/*--- Top-level fn ---*/
10983/*------------------------------------------------------------*/
10984
10985/* Disassemble a single instruction into IR. The instruction
10986 is located in host memory at &guest_code[delta]. */
10987
10988DisResult disInstr_ARM64 ( IRSB* irsb_IN,
10989 Bool (*resteerOkFn) ( void*, Addr64 ),
10990 Bool resteerCisOk,
10991 void* callback_opaque,
10992 UChar* guest_code_IN,
10993 Long delta_IN,
10994 Addr64 guest_IP,
10995 VexArch guest_arch,
10996 VexArchInfo* archinfo,
10997 VexAbiInfo* abiinfo,
sewardj9b769162014-07-24 12:42:03 +000010998 VexEndness host_endness_IN,
sewardjbbcf1882014-01-12 12:49:10 +000010999 Bool sigill_diag_IN )
11000{
11001 DisResult dres;
11002 vex_bzero(&dres, sizeof(dres));
11003
11004 /* Set globals (see top of this file) */
11005 vassert(guest_arch == VexArchARM64);
11006
11007 irsb = irsb_IN;
sewardj9b769162014-07-24 12:42:03 +000011008 host_endness = host_endness_IN;
sewardjbbcf1882014-01-12 12:49:10 +000011009 guest_PC_curr_instr = (Addr64)guest_IP;
11010
sewardj65902992014-05-03 21:20:56 +000011011 /* Sanity checks */
11012 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
11013 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
11014 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
11015
sewardjbbcf1882014-01-12 12:49:10 +000011016 /* Try to decode */
11017 Bool ok = disInstr_ARM64_WRK( &dres,
11018 resteerOkFn, resteerCisOk, callback_opaque,
11019 (UChar*)&guest_code_IN[delta_IN],
11020 archinfo, abiinfo );
11021 if (ok) {
11022 /* All decode successes end up here. */
sewardjdc9259c2014-02-27 11:10:19 +000011023 vassert(dres.len == 4 || dres.len == 20);
sewardjbbcf1882014-01-12 12:49:10 +000011024 switch (dres.whatNext) {
11025 case Dis_Continue:
11026 putPC( mkU64(dres.len + guest_PC_curr_instr) );
11027 break;
11028 case Dis_ResteerU:
11029 case Dis_ResteerC:
11030 putPC(mkU64(dres.continueAt));
11031 break;
11032 case Dis_StopHere:
11033 break;
11034 default:
11035 vassert(0);
11036 }
11037 DIP("\n");
11038 } else {
11039 /* All decode failures end up here. */
11040 if (sigill_diag_IN) {
11041 Int i, j;
11042 UChar buf[64];
11043 UInt insn
11044 = getUIntLittleEndianly( (UChar*)&guest_code_IN[delta_IN] );
11045 vex_bzero(buf, sizeof(buf));
11046 for (i = j = 0; i < 32; i++) {
11047 if (i > 0) {
11048 if ((i & 7) == 0) buf[j++] = ' ';
11049 else if ((i & 3) == 0) buf[j++] = '\'';
11050 }
11051 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
11052 }
11053 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
11054 vex_printf("disInstr(arm64): %s\n", buf);
11055 }
11056
11057 /* Tell the dispatcher that this insn cannot be decoded, and so
11058 has not been executed, and (is currently) the next to be
11059 executed. PC should be up-to-date since it is made so at the
11060 start of each insn, but nevertheless be paranoid and update
11061 it again right now. */
11062 putPC( mkU64(guest_PC_curr_instr) );
sewardjbbcf1882014-01-12 12:49:10 +000011063 dres.len = 0;
philippe2faf5912014-08-11 22:45:47 +000011064 dres.whatNext = Dis_StopHere;
sewardjbbcf1882014-01-12 12:49:10 +000011065 dres.jk_StopHere = Ijk_NoDecode;
philippe2faf5912014-08-11 22:45:47 +000011066 dres.continueAt = 0;
sewardjbbcf1882014-01-12 12:49:10 +000011067 }
11068 return dres;
11069}
11070
sewardjecde6972014-02-05 11:01:19 +000011071
sewardjbbcf1882014-01-12 12:49:10 +000011072/*--------------------------------------------------------------------*/
11073/*--- end guest_arm64_toIR.c ---*/
11074/*--------------------------------------------------------------------*/