blob: 1249de18e901a9222b7e024aa888b10a6d061751 [file] [log] [blame]
sewardjbbcf1882014-01-12 12:49:10 +00001/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- begin guest_arm64_toIR.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2013-2013 OpenWorks
12 info@open-works.net
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
sewardj76927e62014-11-17 11:21:21 +000032/* KNOWN LIMITATIONS 2014-Nov-16
33
34 * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
35
36 Also FP comparison "unordered" .. is implemented as normal FP
37 comparison.
38
39 Both should be fixed. They behave incorrectly in the presence of
40 NaNs.
41
sewardjee3db332015-02-08 18:24:38 +000042 FMULX is treated the same as FMUL. That's also not correct.
43
sewardj76927e62014-11-17 11:21:21 +000044 * Floating multiply-add (etc) insns. Are split into a multiply and
45 an add, and so suffer double rounding and hence sometimes the
46 least significant mantissa bit is incorrect. Fix: use the IR
47 multiply-add IROps instead.
sewardj6a785df2015-02-09 09:07:47 +000048
49 * FRINTA, FRINTN are kludged .. they just round to nearest. No special
50 handling for the "ties" case. FRINTX might be dubious too.
sewardj76927e62014-11-17 11:21:21 +000051*/
sewardjbbcf1882014-01-12 12:49:10 +000052
53/* "Special" instructions.
54
55 This instruction decoder can decode four special instructions
56 which mean nothing natively (are no-ops as far as regs/mem are
57 concerned) but have meaning for supporting Valgrind. A special
58 instruction is flagged by a 16-byte preamble:
59
60 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
61 (ror x12, x12, #3; ror x12, x12, #13
62 ror x12, x12, #51; ror x12, x12, #61)
63
64 Following that, one of the following 3 are allowed
65 (standard interpretation in parentheses):
66
67 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
68 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
69 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
70 AA090129 (orr x9,x9,x9) IR injection
71
72 Any other bytes following the 16-byte preamble are illegal and
73 constitute a failure in instruction decoding. This all assumes
74 that the preamble will never occur except in specific code
75 fragments designed for Valgrind to catch.
76*/
77
78/* Translates ARM64 code to IR. */
79
80#include "libvex_basictypes.h"
81#include "libvex_ir.h"
82#include "libvex.h"
83#include "libvex_guest_arm64.h"
84
85#include "main_util.h"
86#include "main_globals.h"
87#include "guest_generic_bb_to_IR.h"
88#include "guest_arm64_defs.h"
89
90
91/*------------------------------------------------------------*/
92/*--- Globals ---*/
93/*------------------------------------------------------------*/
94
95/* These are set at the start of the translation of a instruction, so
96 that we don't have to pass them around endlessly. CONST means does
97 not change during translation of the instruction.
98*/
99
sewardj9b769162014-07-24 12:42:03 +0000100/* CONST: what is the host's endianness? We need to know this in
101 order to do sub-register accesses to the SIMD/FP registers
102 correctly. */
103static VexEndness host_endness;
sewardjbbcf1882014-01-12 12:49:10 +0000104
105/* CONST: The guest address for the instruction currently being
106 translated. */
107static Addr64 guest_PC_curr_instr;
108
109/* MOD: The IRSB* into which we're generating code. */
110static IRSB* irsb;
111
112
113/*------------------------------------------------------------*/
114/*--- Debugging output ---*/
115/*------------------------------------------------------------*/
116
117#define DIP(format, args...) \
118 if (vex_traceflags & VEX_TRACE_FE) \
119 vex_printf(format, ## args)
120
121#define DIS(buf, format, args...) \
122 if (vex_traceflags & VEX_TRACE_FE) \
123 vex_sprintf(buf, format, ## args)
124
125
126/*------------------------------------------------------------*/
127/*--- Helper bits and pieces for deconstructing the ---*/
128/*--- arm insn stream. ---*/
129/*------------------------------------------------------------*/
130
131/* Do a little-endian load of a 32-bit word, regardless of the
132 endianness of the underlying host. */
florian8462d112014-09-24 15:18:09 +0000133static inline UInt getUIntLittleEndianly ( const UChar* p )
sewardjbbcf1882014-01-12 12:49:10 +0000134{
135 UInt w = 0;
136 w = (w << 8) | p[3];
137 w = (w << 8) | p[2];
138 w = (w << 8) | p[1];
139 w = (w << 8) | p[0];
140 return w;
141}
142
143/* Sign extend a N-bit value up to 64 bits, by copying
144 bit N-1 into all higher positions. */
145static ULong sx_to_64 ( ULong x, UInt n )
146{
147 vassert(n > 1 && n < 64);
148 Long r = (Long)x;
149 r = (r << (64-n)) >> (64-n);
150 return (ULong)r;
151}
152
153//ZZ /* Do a little-endian load of a 16-bit word, regardless of the
154//ZZ endianness of the underlying host. */
155//ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
156//ZZ {
157//ZZ UShort w = 0;
158//ZZ w = (w << 8) | p[1];
159//ZZ w = (w << 8) | p[0];
160//ZZ return w;
161//ZZ }
162//ZZ
163//ZZ static UInt ROR32 ( UInt x, UInt sh ) {
164//ZZ vassert(sh >= 0 && sh < 32);
165//ZZ if (sh == 0)
166//ZZ return x;
167//ZZ else
168//ZZ return (x << (32-sh)) | (x >> sh);
169//ZZ }
170//ZZ
171//ZZ static Int popcount32 ( UInt x )
172//ZZ {
173//ZZ Int res = 0, i;
174//ZZ for (i = 0; i < 32; i++) {
175//ZZ res += (x & 1);
176//ZZ x >>= 1;
177//ZZ }
178//ZZ return res;
179//ZZ }
180//ZZ
181//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
182//ZZ {
183//ZZ UInt mask = 1 << ix;
184//ZZ x &= ~mask;
185//ZZ x |= ((b << ix) & mask);
186//ZZ return x;
187//ZZ }
188
189#define BITS2(_b1,_b0) \
190 (((_b1) << 1) | (_b0))
191
192#define BITS3(_b2,_b1,_b0) \
193 (((_b2) << 2) | ((_b1) << 1) | (_b0))
194
195#define BITS4(_b3,_b2,_b1,_b0) \
196 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
197
198#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
199 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
200 | BITS4((_b3),(_b2),(_b1),(_b0)))
201
202#define BITS5(_b4,_b3,_b2,_b1,_b0) \
203 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
204#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
205 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
206#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
207 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
208
209#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
210 (((_b8) << 8) \
211 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
212
213#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
214 (((_b9) << 9) | ((_b8) << 8) \
215 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
216
217#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
218 (((_b10) << 10) \
219 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
220
sewardjdc9259c2014-02-27 11:10:19 +0000221#define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
222 (((_b11) << 11) \
223 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
224
sewardjdf1628c2014-06-10 22:52:05 +0000225#define X00 BITS2(0,0)
226#define X01 BITS2(0,1)
227#define X10 BITS2(1,0)
228#define X11 BITS2(1,1)
229
sewardjbbcf1882014-01-12 12:49:10 +0000230// produces _uint[_bMax:_bMin]
231#define SLICE_UInt(_uint,_bMax,_bMin) \
232 (( ((UInt)(_uint)) >> (_bMin)) \
233 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
234
235
236/*------------------------------------------------------------*/
237/*--- Helper bits and pieces for creating IR fragments. ---*/
238/*------------------------------------------------------------*/
239
240static IRExpr* mkV128 ( UShort w )
241{
242 return IRExpr_Const(IRConst_V128(w));
243}
244
245static IRExpr* mkU64 ( ULong i )
246{
247 return IRExpr_Const(IRConst_U64(i));
248}
249
250static IRExpr* mkU32 ( UInt i )
251{
252 return IRExpr_Const(IRConst_U32(i));
253}
254
sewardj25523c42014-06-15 19:36:29 +0000255static IRExpr* mkU16 ( UInt i )
256{
257 vassert(i < 65536);
258 return IRExpr_Const(IRConst_U16(i));
259}
260
sewardjbbcf1882014-01-12 12:49:10 +0000261static IRExpr* mkU8 ( UInt i )
262{
263 vassert(i < 256);
264 return IRExpr_Const(IRConst_U8( (UChar)i ));
265}
266
267static IRExpr* mkexpr ( IRTemp tmp )
268{
269 return IRExpr_RdTmp(tmp);
270}
271
272static IRExpr* unop ( IROp op, IRExpr* a )
273{
274 return IRExpr_Unop(op, a);
275}
276
277static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
278{
279 return IRExpr_Binop(op, a1, a2);
280}
281
282static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
283{
284 return IRExpr_Triop(op, a1, a2, a3);
285}
286
287static IRExpr* loadLE ( IRType ty, IRExpr* addr )
288{
289 return IRExpr_Load(Iend_LE, ty, addr);
290}
291
292/* Add a statement to the list held by "irbb". */
293static void stmt ( IRStmt* st )
294{
295 addStmtToIRSB( irsb, st );
296}
297
298static void assign ( IRTemp dst, IRExpr* e )
299{
300 stmt( IRStmt_WrTmp(dst, e) );
301}
302
303static void storeLE ( IRExpr* addr, IRExpr* data )
304{
305 stmt( IRStmt_Store(Iend_LE, addr, data) );
306}
307
308//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
309//ZZ {
310//ZZ if (guardT == IRTemp_INVALID) {
311//ZZ /* unconditional */
312//ZZ storeLE(addr, data);
313//ZZ } else {
314//ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
315//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
316//ZZ }
317//ZZ }
318//ZZ
319//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
320//ZZ IRExpr* addr, IRExpr* alt,
321//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
322//ZZ {
323//ZZ if (guardT == IRTemp_INVALID) {
324//ZZ /* unconditional */
325//ZZ IRExpr* loaded = NULL;
326//ZZ switch (cvt) {
327//ZZ case ILGop_Ident32:
328//ZZ loaded = loadLE(Ity_I32, addr); break;
329//ZZ case ILGop_8Uto32:
330//ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
331//ZZ case ILGop_8Sto32:
332//ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
333//ZZ case ILGop_16Uto32:
334//ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
335//ZZ case ILGop_16Sto32:
336//ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
337//ZZ default:
338//ZZ vassert(0);
339//ZZ }
340//ZZ vassert(loaded != NULL);
341//ZZ assign(dst, loaded);
342//ZZ } else {
343//ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
344//ZZ loaded data before putting the data in 'dst'. If the load
345//ZZ does not take place, 'alt' is placed directly in 'dst'. */
346//ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
347//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
348//ZZ }
349//ZZ }
350
351/* Generate a new temporary of the given type. */
352static IRTemp newTemp ( IRType ty )
353{
354 vassert(isPlausibleIRType(ty));
355 return newIRTemp( irsb->tyenv, ty );
356}
357
sewardj8e91fd42014-07-11 12:05:47 +0000358/* This is used in many places, so the brevity is an advantage. */
359static IRTemp newTempV128(void)
360{
361 return newTemp(Ity_V128);
362}
363
364/* Initialise V128 temporaries en masse. */
365static
sewardj51d012a2014-07-21 09:19:50 +0000366void newTempsV128_2(IRTemp* t1, IRTemp* t2)
367{
368 vassert(t1 && *t1 == IRTemp_INVALID);
369 vassert(t2 && *t2 == IRTemp_INVALID);
370 *t1 = newTempV128();
371 *t2 = newTempV128();
372}
373
sewardj51d012a2014-07-21 09:19:50 +0000374static
375void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
376{
377 vassert(t1 && *t1 == IRTemp_INVALID);
378 vassert(t2 && *t2 == IRTemp_INVALID);
379 vassert(t3 && *t3 == IRTemp_INVALID);
380 *t1 = newTempV128();
381 *t2 = newTempV128();
382 *t3 = newTempV128();
383}
384
sewardj208a7762014-10-22 13:52:51 +0000385static
386void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
387{
388 vassert(t1 && *t1 == IRTemp_INVALID);
389 vassert(t2 && *t2 == IRTemp_INVALID);
390 vassert(t3 && *t3 == IRTemp_INVALID);
391 vassert(t4 && *t4 == IRTemp_INVALID);
392 *t1 = newTempV128();
393 *t2 = newTempV128();
394 *t3 = newTempV128();
395 *t4 = newTempV128();
396}
sewardj54ffa1d2014-07-22 09:27:49 +0000397
sewardj51d012a2014-07-21 09:19:50 +0000398static
sewardj8e91fd42014-07-11 12:05:47 +0000399void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
400 IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
401{
402 vassert(t1 && *t1 == IRTemp_INVALID);
403 vassert(t2 && *t2 == IRTemp_INVALID);
404 vassert(t3 && *t3 == IRTemp_INVALID);
405 vassert(t4 && *t4 == IRTemp_INVALID);
406 vassert(t5 && *t5 == IRTemp_INVALID);
407 vassert(t6 && *t6 == IRTemp_INVALID);
408 vassert(t7 && *t7 == IRTemp_INVALID);
409 *t1 = newTempV128();
410 *t2 = newTempV128();
411 *t3 = newTempV128();
412 *t4 = newTempV128();
413 *t5 = newTempV128();
414 *t6 = newTempV128();
415 *t7 = newTempV128();
416}
417
sewardjbbcf1882014-01-12 12:49:10 +0000418//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
419//ZZ IRRoundingMode. */
420//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
421//ZZ {
422//ZZ return mkU32(Irrm_NEAREST);
423//ZZ }
424//ZZ
425//ZZ /* Generate an expression for SRC rotated right by ROT. */
426//ZZ static IRExpr* genROR32( IRTemp src, Int rot )
427//ZZ {
428//ZZ vassert(rot >= 0 && rot < 32);
429//ZZ if (rot == 0)
430//ZZ return mkexpr(src);
431//ZZ return
432//ZZ binop(Iop_Or32,
433//ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
434//ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
435//ZZ }
436//ZZ
437//ZZ static IRExpr* mkU128 ( ULong i )
438//ZZ {
439//ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
440//ZZ }
441//ZZ
442//ZZ /* Generate a 4-aligned version of the given expression if
443//ZZ the given condition is true. Else return it unchanged. */
444//ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
445//ZZ {
446//ZZ if (b)
447//ZZ return binop(Iop_And32, e, mkU32(~3));
448//ZZ else
449//ZZ return e;
450//ZZ }
451
452/* Other IR construction helpers. */
453static IROp mkAND ( IRType ty ) {
454 switch (ty) {
455 case Ity_I32: return Iop_And32;
456 case Ity_I64: return Iop_And64;
457 default: vpanic("mkAND");
458 }
459}
460
461static IROp mkOR ( IRType ty ) {
462 switch (ty) {
463 case Ity_I32: return Iop_Or32;
464 case Ity_I64: return Iop_Or64;
465 default: vpanic("mkOR");
466 }
467}
468
469static IROp mkXOR ( IRType ty ) {
470 switch (ty) {
471 case Ity_I32: return Iop_Xor32;
472 case Ity_I64: return Iop_Xor64;
473 default: vpanic("mkXOR");
474 }
475}
476
477static IROp mkSHL ( IRType ty ) {
478 switch (ty) {
479 case Ity_I32: return Iop_Shl32;
480 case Ity_I64: return Iop_Shl64;
481 default: vpanic("mkSHL");
482 }
483}
484
485static IROp mkSHR ( IRType ty ) {
486 switch (ty) {
487 case Ity_I32: return Iop_Shr32;
488 case Ity_I64: return Iop_Shr64;
489 default: vpanic("mkSHR");
490 }
491}
492
493static IROp mkSAR ( IRType ty ) {
494 switch (ty) {
495 case Ity_I32: return Iop_Sar32;
496 case Ity_I64: return Iop_Sar64;
497 default: vpanic("mkSAR");
498 }
499}
500
501static IROp mkNOT ( IRType ty ) {
502 switch (ty) {
503 case Ity_I32: return Iop_Not32;
504 case Ity_I64: return Iop_Not64;
505 default: vpanic("mkNOT");
506 }
507}
508
509static IROp mkADD ( IRType ty ) {
510 switch (ty) {
511 case Ity_I32: return Iop_Add32;
512 case Ity_I64: return Iop_Add64;
513 default: vpanic("mkADD");
514 }
515}
516
517static IROp mkSUB ( IRType ty ) {
518 switch (ty) {
519 case Ity_I32: return Iop_Sub32;
520 case Ity_I64: return Iop_Sub64;
521 default: vpanic("mkSUB");
522 }
523}
524
525static IROp mkADDF ( IRType ty ) {
526 switch (ty) {
527 case Ity_F32: return Iop_AddF32;
528 case Ity_F64: return Iop_AddF64;
529 default: vpanic("mkADDF");
530 }
531}
532
533static IROp mkSUBF ( IRType ty ) {
534 switch (ty) {
535 case Ity_F32: return Iop_SubF32;
536 case Ity_F64: return Iop_SubF64;
537 default: vpanic("mkSUBF");
538 }
539}
540
541static IROp mkMULF ( IRType ty ) {
542 switch (ty) {
543 case Ity_F32: return Iop_MulF32;
544 case Ity_F64: return Iop_MulF64;
545 default: vpanic("mkMULF");
546 }
547}
548
549static IROp mkDIVF ( IRType ty ) {
550 switch (ty) {
551 case Ity_F32: return Iop_DivF32;
552 case Ity_F64: return Iop_DivF64;
553 default: vpanic("mkMULF");
554 }
555}
556
557static IROp mkNEGF ( IRType ty ) {
558 switch (ty) {
559 case Ity_F32: return Iop_NegF32;
560 case Ity_F64: return Iop_NegF64;
561 default: vpanic("mkNEGF");
562 }
563}
564
565static IROp mkABSF ( IRType ty ) {
566 switch (ty) {
567 case Ity_F32: return Iop_AbsF32;
568 case Ity_F64: return Iop_AbsF64;
569 default: vpanic("mkNEGF");
570 }
571}
572
573static IROp mkSQRTF ( IRType ty ) {
574 switch (ty) {
575 case Ity_F32: return Iop_SqrtF32;
576 case Ity_F64: return Iop_SqrtF64;
577 default: vpanic("mkNEGF");
578 }
579}
580
sewardja5a6b752014-06-30 07:33:56 +0000581static IROp mkVecADD ( UInt size ) {
582 const IROp ops[4]
583 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
584 vassert(size < 4);
585 return ops[size];
586}
587
588static IROp mkVecQADDU ( UInt size ) {
589 const IROp ops[4]
590 = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
591 vassert(size < 4);
592 return ops[size];
593}
594
595static IROp mkVecQADDS ( UInt size ) {
596 const IROp ops[4]
597 = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
598 vassert(size < 4);
599 return ops[size];
600}
601
sewardjf7003bc2014-08-18 12:28:02 +0000602static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
603 const IROp ops[4]
604 = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
605 Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
606 vassert(size < 4);
607 return ops[size];
608}
609
610static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
611 const IROp ops[4]
612 = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
613 Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
614 vassert(size < 4);
615 return ops[size];
616}
617
sewardja5a6b752014-06-30 07:33:56 +0000618static IROp mkVecSUB ( UInt size ) {
619 const IROp ops[4]
620 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
621 vassert(size < 4);
622 return ops[size];
623}
624
625static IROp mkVecQSUBU ( UInt size ) {
626 const IROp ops[4]
627 = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
628 vassert(size < 4);
629 return ops[size];
630}
631
632static IROp mkVecQSUBS ( UInt size ) {
633 const IROp ops[4]
634 = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
635 vassert(size < 4);
636 return ops[size];
637}
638
639static IROp mkVecSARN ( UInt size ) {
640 const IROp ops[4]
641 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
642 vassert(size < 4);
643 return ops[size];
644}
645
646static IROp mkVecSHRN ( UInt size ) {
647 const IROp ops[4]
648 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
649 vassert(size < 4);
650 return ops[size];
651}
652
653static IROp mkVecSHLN ( UInt size ) {
654 const IROp ops[4]
655 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
656 vassert(size < 4);
657 return ops[size];
658}
659
660static IROp mkVecCATEVENLANES ( UInt size ) {
661 const IROp ops[4]
662 = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
663 Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
664 vassert(size < 4);
665 return ops[size];
666}
667
668static IROp mkVecCATODDLANES ( UInt size ) {
669 const IROp ops[4]
670 = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
671 Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
672 vassert(size < 4);
673 return ops[size];
674}
675
sewardj487559e2014-07-10 14:22:45 +0000676static IROp mkVecINTERLEAVELO ( UInt size ) {
677 const IROp ops[4]
678 = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
679 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
680 vassert(size < 4);
681 return ops[size];
682}
683
684static IROp mkVecINTERLEAVEHI ( UInt size ) {
685 const IROp ops[4]
686 = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
687 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
688 vassert(size < 4);
689 return ops[size];
690}
691
sewardja5a6b752014-06-30 07:33:56 +0000692static IROp mkVecMAXU ( UInt size ) {
693 const IROp ops[4]
694 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
695 vassert(size < 4);
696 return ops[size];
697}
698
699static IROp mkVecMAXS ( UInt size ) {
700 const IROp ops[4]
701 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
702 vassert(size < 4);
703 return ops[size];
704}
705
706static IROp mkVecMINU ( UInt size ) {
707 const IROp ops[4]
708 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
709 vassert(size < 4);
710 return ops[size];
711}
712
713static IROp mkVecMINS ( UInt size ) {
714 const IROp ops[4]
715 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
716 vassert(size < 4);
717 return ops[size];
718}
719
sewardj487559e2014-07-10 14:22:45 +0000720static IROp mkVecMUL ( UInt size ) {
721 const IROp ops[4]
722 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
723 vassert(size < 3);
724 return ops[size];
725}
726
727static IROp mkVecMULLU ( UInt sizeNarrow ) {
728 const IROp ops[4]
729 = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
730 vassert(sizeNarrow < 3);
731 return ops[sizeNarrow];
732}
733
734static IROp mkVecMULLS ( UInt sizeNarrow ) {
735 const IROp ops[4]
736 = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
737 vassert(sizeNarrow < 3);
738 return ops[sizeNarrow];
739}
740
sewardj51d012a2014-07-21 09:19:50 +0000741static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
742 const IROp ops[4]
743 = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
744 vassert(sizeNarrow < 3);
745 return ops[sizeNarrow];
746}
747
sewardj8e91fd42014-07-11 12:05:47 +0000748static IROp mkVecCMPEQ ( UInt size ) {
749 const IROp ops[4]
750 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
751 vassert(size < 4);
752 return ops[size];
753}
754
755static IROp mkVecCMPGTU ( UInt size ) {
756 const IROp ops[4]
757 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
758 vassert(size < 4);
759 return ops[size];
760}
761
762static IROp mkVecCMPGTS ( UInt size ) {
763 const IROp ops[4]
764 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
765 vassert(size < 4);
766 return ops[size];
767}
768
769static IROp mkVecABS ( UInt size ) {
770 const IROp ops[4]
771 = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
772 vassert(size < 4);
773 return ops[size];
774}
775
776static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
777 const IROp ops[4]
778 = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
779 Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 };
780 vassert(size < 4);
781 return ops[size];
782}
783
sewardjbbcf1882014-01-12 12:49:10 +0000784static IRExpr* mkU ( IRType ty, ULong imm ) {
785 switch (ty) {
786 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
787 case Ity_I64: return mkU64(imm);
788 default: vpanic("mkU");
789 }
790}
791
sewardj54ffa1d2014-07-22 09:27:49 +0000792static IROp mkVecQDMULHIS ( UInt size ) {
793 const IROp ops[4]
794 = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
795 vassert(size < 4);
796 return ops[size];
797}
798
799static IROp mkVecQRDMULHIS ( UInt size ) {
800 const IROp ops[4]
801 = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
802 vassert(size < 4);
803 return ops[size];
804}
805
sewardjecedd982014-08-11 14:02:47 +0000806static IROp mkVecQANDUQSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000807 const IROp ops[4]
808 = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
809 Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
810 vassert(size < 4);
811 return ops[size];
812}
813
sewardjecedd982014-08-11 14:02:47 +0000814static IROp mkVecQANDSQSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000815 const IROp ops[4]
816 = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
817 Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
818 vassert(size < 4);
819 return ops[size];
820}
821
sewardjecedd982014-08-11 14:02:47 +0000822static IROp mkVecQANDUQRSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000823 const IROp ops[4]
824 = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
825 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
826 vassert(size < 4);
827 return ops[size];
828}
829
sewardjecedd982014-08-11 14:02:47 +0000830static IROp mkVecQANDSQRSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000831 const IROp ops[4]
832 = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
833 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
834 vassert(size < 4);
835 return ops[size];
836}
837
sewardja6b61f02014-08-17 18:32:14 +0000838static IROp mkVecSHU ( UInt size ) {
839 const IROp ops[4]
840 = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
841 vassert(size < 4);
842 return ops[size];
843}
844
845static IROp mkVecSHS ( UInt size ) {
846 const IROp ops[4]
847 = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
848 vassert(size < 4);
849 return ops[size];
850}
851
852static IROp mkVecRSHU ( UInt size ) {
853 const IROp ops[4]
854 = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
855 vassert(size < 4);
856 return ops[size];
857}
858
859static IROp mkVecRSHS ( UInt size ) {
860 const IROp ops[4]
861 = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
862 vassert(size < 4);
863 return ops[size];
864}
865
sewardjecedd982014-08-11 14:02:47 +0000866static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
867 const IROp ops[4]
868 = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
869 Iop_NarrowUn64to32x2, Iop_INVALID };
870 vassert(sizeNarrow < 4);
871 return ops[sizeNarrow];
872}
873
874static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
875 const IROp ops[4]
876 = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4,
877 Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
878 vassert(sizeNarrow < 4);
879 return ops[sizeNarrow];
880}
881
882static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
883 const IROp ops[4]
884 = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4,
885 Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
886 vassert(sizeNarrow < 4);
887 return ops[sizeNarrow];
888}
889
890static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
891 const IROp ops[4]
892 = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4,
893 Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
894 vassert(sizeNarrow < 4);
895 return ops[sizeNarrow];
896}
897
898static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
899 const IROp ops[4]
900 = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
901 Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
902 vassert(sizeNarrow < 4);
903 return ops[sizeNarrow];
904}
905
906static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
907 const IROp ops[4]
908 = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4,
909 Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
910 vassert(sizeNarrow < 4);
911 return ops[sizeNarrow];
912}
913
914static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
915 const IROp ops[4]
916 = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4,
917 Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
918 vassert(sizeNarrow < 4);
919 return ops[sizeNarrow];
920}
921
922static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
923 const IROp ops[4]
924 = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4,
925 Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
926 vassert(sizeNarrow < 4);
927 return ops[sizeNarrow];
928}
929
930static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
931 const IROp ops[4]
932 = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4,
933 Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
934 vassert(sizeNarrow < 4);
935 return ops[sizeNarrow];
936}
937
938static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
939 const IROp ops[4]
940 = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4,
941 Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
942 vassert(sizeNarrow < 4);
943 return ops[sizeNarrow];
944}
945
sewardj1dd3ec12014-08-15 09:11:08 +0000946static IROp mkVecQSHLNSATUU ( UInt size ) {
sewardja97dddf2014-08-14 22:26:52 +0000947 const IROp ops[4]
sewardj1dd3ec12014-08-15 09:11:08 +0000948 = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
949 Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
sewardja97dddf2014-08-14 22:26:52 +0000950 vassert(size < 4);
951 return ops[size];
952}
953
sewardj1dd3ec12014-08-15 09:11:08 +0000954static IROp mkVecQSHLNSATSS ( UInt size ) {
sewardja97dddf2014-08-14 22:26:52 +0000955 const IROp ops[4]
sewardj1dd3ec12014-08-15 09:11:08 +0000956 = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
957 Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
sewardja97dddf2014-08-14 22:26:52 +0000958 vassert(size < 4);
959 return ops[size];
960}
961
sewardj1dd3ec12014-08-15 09:11:08 +0000962static IROp mkVecQSHLNSATSU ( UInt size ) {
sewardja97dddf2014-08-14 22:26:52 +0000963 const IROp ops[4]
sewardj1dd3ec12014-08-15 09:11:08 +0000964 = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
965 Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
sewardja97dddf2014-08-14 22:26:52 +0000966 vassert(size < 4);
967 return ops[size];
968}
969
sewardj76927e62014-11-17 11:21:21 +0000970static IROp mkVecADDF ( UInt size ) {
971 const IROp ops[4]
972 = { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 };
973 vassert(size < 4);
974 return ops[size];
975}
976
977static IROp mkVecMAXF ( UInt size ) {
978 const IROp ops[4]
979 = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 };
980 vassert(size < 4);
981 return ops[size];
982}
983
984static IROp mkVecMINF ( UInt size ) {
985 const IROp ops[4]
986 = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 };
987 vassert(size < 4);
988 return ops[size];
989}
sewardja97dddf2014-08-14 22:26:52 +0000990
sewardjbbcf1882014-01-12 12:49:10 +0000991/* Generate IR to create 'arg rotated right by imm', for sane values
992 of 'ty' and 'imm'. */
993static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
994{
995 UInt w = 0;
996 if (ty == Ity_I64) {
997 w = 64;
998 } else {
999 vassert(ty == Ity_I32);
1000 w = 32;
1001 }
1002 vassert(w != 0);
1003 vassert(imm < w);
1004 if (imm == 0) {
1005 return arg;
1006 }
1007 IRTemp res = newTemp(ty);
1008 assign(res, binop(mkOR(ty),
1009 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
1010 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
1011 return res;
1012}
1013
1014/* Generate IR to set the returned temp to either all-zeroes or
1015 all ones, as a copy of arg<imm>. */
1016static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
1017{
1018 UInt w = 0;
1019 if (ty == Ity_I64) {
1020 w = 64;
1021 } else {
1022 vassert(ty == Ity_I32);
1023 w = 32;
1024 }
1025 vassert(w != 0);
1026 vassert(imm < w);
1027 IRTemp res = newTemp(ty);
1028 assign(res, binop(mkSAR(ty),
1029 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
1030 mkU8(w - 1)));
1031 return res;
1032}
1033
sewardj7d009132014-02-20 17:43:38 +00001034/* U-widen 8/16/32/64 bit int expr to 64. */
1035static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
1036{
1037 switch (srcTy) {
1038 case Ity_I64: return e;
1039 case Ity_I32: return unop(Iop_32Uto64, e);
1040 case Ity_I16: return unop(Iop_16Uto64, e);
1041 case Ity_I8: return unop(Iop_8Uto64, e);
1042 default: vpanic("widenUto64(arm64)");
1043 }
1044}
1045
1046/* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
1047 of these combinations make sense. */
1048static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
1049{
1050 switch (dstTy) {
1051 case Ity_I64: return e;
1052 case Ity_I32: return unop(Iop_64to32, e);
1053 case Ity_I16: return unop(Iop_64to16, e);
1054 case Ity_I8: return unop(Iop_64to8, e);
1055 default: vpanic("narrowFrom64(arm64)");
1056 }
1057}
1058
sewardjbbcf1882014-01-12 12:49:10 +00001059
1060/*------------------------------------------------------------*/
1061/*--- Helpers for accessing guest registers. ---*/
1062/*------------------------------------------------------------*/
1063
1064#define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
1065#define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
1066#define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
1067#define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
1068#define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
1069#define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
1070#define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
1071#define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
1072#define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
1073#define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
1074#define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
1075#define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
1076#define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
1077#define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
1078#define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
1079#define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
1080#define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
1081#define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
1082#define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
1083#define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
1084#define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
1085#define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
1086#define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
1087#define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
1088#define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
1089#define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
1090#define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
1091#define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
1092#define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
1093#define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
1094#define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
1095
sewardj60687882014-01-15 10:25:21 +00001096#define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
sewardjbbcf1882014-01-12 12:49:10 +00001097#define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
1098
1099#define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
1100#define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
1101#define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
1102#define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
1103
1104#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1105#define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
1106
1107#define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
1108#define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
1109#define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
1110#define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
1111#define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
1112#define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
1113#define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
1114#define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
1115#define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
1116#define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
1117#define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
1118#define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
1119#define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
1120#define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
1121#define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
1122#define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
1123#define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
1124#define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
1125#define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
1126#define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
1127#define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
1128#define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
1129#define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
1130#define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
1131#define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
1132#define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
1133#define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
1134#define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
1135#define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
1136#define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
1137#define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
1138#define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
1139
1140#define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
sewardja0645d52014-06-28 22:11:16 +00001141#define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
sewardjbbcf1882014-01-12 12:49:10 +00001142
sewardj05f5e012014-05-04 10:52:11 +00001143#define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
1144#define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
sewardjbbcf1882014-01-12 12:49:10 +00001145
1146
1147/* ---------------- Integer registers ---------------- */
1148
1149static Int offsetIReg64 ( UInt iregNo )
1150{
1151 /* Do we care about endianness here? We do if sub-parts of integer
1152 registers are accessed. */
1153 switch (iregNo) {
1154 case 0: return OFFB_X0;
1155 case 1: return OFFB_X1;
1156 case 2: return OFFB_X2;
1157 case 3: return OFFB_X3;
1158 case 4: return OFFB_X4;
1159 case 5: return OFFB_X5;
1160 case 6: return OFFB_X6;
1161 case 7: return OFFB_X7;
1162 case 8: return OFFB_X8;
1163 case 9: return OFFB_X9;
1164 case 10: return OFFB_X10;
1165 case 11: return OFFB_X11;
1166 case 12: return OFFB_X12;
1167 case 13: return OFFB_X13;
1168 case 14: return OFFB_X14;
1169 case 15: return OFFB_X15;
1170 case 16: return OFFB_X16;
1171 case 17: return OFFB_X17;
1172 case 18: return OFFB_X18;
1173 case 19: return OFFB_X19;
1174 case 20: return OFFB_X20;
1175 case 21: return OFFB_X21;
1176 case 22: return OFFB_X22;
1177 case 23: return OFFB_X23;
1178 case 24: return OFFB_X24;
1179 case 25: return OFFB_X25;
1180 case 26: return OFFB_X26;
1181 case 27: return OFFB_X27;
1182 case 28: return OFFB_X28;
1183 case 29: return OFFB_X29;
1184 case 30: return OFFB_X30;
1185 /* but not 31 */
1186 default: vassert(0);
1187 }
1188}
1189
1190static Int offsetIReg64orSP ( UInt iregNo )
1191{
sewardj60687882014-01-15 10:25:21 +00001192 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
sewardjbbcf1882014-01-12 12:49:10 +00001193}
1194
1195static const HChar* nameIReg64orZR ( UInt iregNo )
1196{
1197 vassert(iregNo < 32);
1198 static const HChar* names[32]
1199 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
1200 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1201 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1202 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1203 return names[iregNo];
1204}
1205
1206static const HChar* nameIReg64orSP ( UInt iregNo )
1207{
1208 if (iregNo == 31) {
1209 return "sp";
1210 }
1211 vassert(iregNo < 31);
1212 return nameIReg64orZR(iregNo);
1213}
1214
1215static IRExpr* getIReg64orSP ( UInt iregNo )
1216{
1217 vassert(iregNo < 32);
1218 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1219}
1220
1221static IRExpr* getIReg64orZR ( UInt iregNo )
1222{
1223 if (iregNo == 31) {
1224 return mkU64(0);
1225 }
1226 vassert(iregNo < 31);
1227 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1228}
1229
1230static void putIReg64orSP ( UInt iregNo, IRExpr* e )
1231{
1232 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1233 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1234}
1235
1236static void putIReg64orZR ( UInt iregNo, IRExpr* e )
1237{
1238 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1239 if (iregNo == 31) {
1240 return;
1241 }
1242 vassert(iregNo < 31);
1243 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1244}
1245
1246static const HChar* nameIReg32orZR ( UInt iregNo )
1247{
1248 vassert(iregNo < 32);
1249 static const HChar* names[32]
1250 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
1251 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
1252 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1253 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1254 return names[iregNo];
1255}
1256
1257static const HChar* nameIReg32orSP ( UInt iregNo )
1258{
1259 if (iregNo == 31) {
1260 return "wsp";
1261 }
1262 vassert(iregNo < 31);
1263 return nameIReg32orZR(iregNo);
1264}
1265
1266static IRExpr* getIReg32orSP ( UInt iregNo )
1267{
1268 vassert(iregNo < 32);
1269 return unop(Iop_64to32,
1270 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1271}
1272
1273static IRExpr* getIReg32orZR ( UInt iregNo )
1274{
1275 if (iregNo == 31) {
1276 return mkU32(0);
1277 }
1278 vassert(iregNo < 31);
1279 return unop(Iop_64to32,
1280 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1281}
1282
1283static void putIReg32orSP ( UInt iregNo, IRExpr* e )
1284{
1285 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1286 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1287}
1288
1289static void putIReg32orZR ( UInt iregNo, IRExpr* e )
1290{
1291 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1292 if (iregNo == 31) {
1293 return;
1294 }
1295 vassert(iregNo < 31);
1296 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1297}
1298
1299static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
1300{
1301 vassert(is64 == True || is64 == False);
1302 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
1303}
1304
1305static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
1306{
1307 vassert(is64 == True || is64 == False);
1308 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
1309}
1310
1311static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
1312{
1313 vassert(is64 == True || is64 == False);
1314 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
1315}
1316
1317static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
1318{
1319 vassert(is64 == True || is64 == False);
1320 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
1321}
1322
1323static void putPC ( IRExpr* e )
1324{
1325 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1326 stmt( IRStmt_Put(OFFB_PC, e) );
1327}
1328
1329
1330/* ---------------- Vector (Q) registers ---------------- */
1331
1332static Int offsetQReg128 ( UInt qregNo )
1333{
1334 /* We don't care about endianness at this point. It only becomes
1335 relevant when dealing with sections of these registers.*/
1336 switch (qregNo) {
1337 case 0: return OFFB_Q0;
1338 case 1: return OFFB_Q1;
1339 case 2: return OFFB_Q2;
1340 case 3: return OFFB_Q3;
1341 case 4: return OFFB_Q4;
1342 case 5: return OFFB_Q5;
1343 case 6: return OFFB_Q6;
1344 case 7: return OFFB_Q7;
1345 case 8: return OFFB_Q8;
1346 case 9: return OFFB_Q9;
1347 case 10: return OFFB_Q10;
1348 case 11: return OFFB_Q11;
1349 case 12: return OFFB_Q12;
1350 case 13: return OFFB_Q13;
1351 case 14: return OFFB_Q14;
1352 case 15: return OFFB_Q15;
1353 case 16: return OFFB_Q16;
1354 case 17: return OFFB_Q17;
1355 case 18: return OFFB_Q18;
1356 case 19: return OFFB_Q19;
1357 case 20: return OFFB_Q20;
1358 case 21: return OFFB_Q21;
1359 case 22: return OFFB_Q22;
1360 case 23: return OFFB_Q23;
1361 case 24: return OFFB_Q24;
1362 case 25: return OFFB_Q25;
1363 case 26: return OFFB_Q26;
1364 case 27: return OFFB_Q27;
1365 case 28: return OFFB_Q28;
1366 case 29: return OFFB_Q29;
1367 case 30: return OFFB_Q30;
1368 case 31: return OFFB_Q31;
1369 default: vassert(0);
1370 }
1371}
1372
sewardjbbcf1882014-01-12 12:49:10 +00001373/* Write to a complete Qreg. */
1374static void putQReg128 ( UInt qregNo, IRExpr* e )
1375{
1376 vassert(qregNo < 32);
1377 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
1378 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
1379}
1380
1381/* Read a complete Qreg. */
1382static IRExpr* getQReg128 ( UInt qregNo )
1383{
1384 vassert(qregNo < 32);
1385 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
1386}
1387
1388/* Produce the IR type for some sub-part of a vector. For 32- and 64-
1389 bit sub-parts we can choose either integer or float types, and
1390 choose float on the basis that that is the common use case and so
1391 will give least interference with Put-to-Get forwarding later
1392 on. */
1393static IRType preferredVectorSubTypeFromSize ( UInt szB )
1394{
1395 switch (szB) {
1396 case 1: return Ity_I8;
1397 case 2: return Ity_I16;
1398 case 4: return Ity_I32; //Ity_F32;
1399 case 8: return Ity_F64;
1400 case 16: return Ity_V128;
1401 default: vassert(0);
1402 }
1403}
1404
sewardj606c4ba2014-01-26 19:11:14 +00001405/* Find the offset of the laneNo'th lane of type laneTy in the given
1406 Qreg. Since the host is little-endian, the least significant lane
1407 has the lowest offset. */
1408static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
sewardjbbcf1882014-01-12 12:49:10 +00001409{
sewardj9b769162014-07-24 12:42:03 +00001410 vassert(host_endness == VexEndnessLE);
sewardjbbcf1882014-01-12 12:49:10 +00001411 Int base = offsetQReg128(qregNo);
sewardj606c4ba2014-01-26 19:11:14 +00001412 /* Since the host is little-endian, the least significant lane
1413 will be at the lowest address. */
1414 /* Restrict this to known types, so as to avoid silently accepting
1415 stupid types. */
1416 UInt laneSzB = 0;
1417 switch (laneTy) {
sewardj5860ec72014-03-01 11:19:45 +00001418 case Ity_I8: laneSzB = 1; break;
sewardj400d6b92015-03-30 09:01:51 +00001419 case Ity_F16: case Ity_I16: laneSzB = 2; break;
sewardj606c4ba2014-01-26 19:11:14 +00001420 case Ity_F32: case Ity_I32: laneSzB = 4; break;
1421 case Ity_F64: case Ity_I64: laneSzB = 8; break;
1422 case Ity_V128: laneSzB = 16; break;
1423 default: break;
sewardjbbcf1882014-01-12 12:49:10 +00001424 }
sewardj606c4ba2014-01-26 19:11:14 +00001425 vassert(laneSzB > 0);
1426 UInt minOff = laneNo * laneSzB;
1427 UInt maxOff = minOff + laneSzB - 1;
1428 vassert(maxOff < 16);
1429 return base + minOff;
sewardjbbcf1882014-01-12 12:49:10 +00001430}
1431
sewardj606c4ba2014-01-26 19:11:14 +00001432/* Put to the least significant lane of a Qreg. */
1433static void putQRegLO ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +00001434{
1435 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +00001436 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +00001437 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001438 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
sewardj400d6b92015-03-30 09:01:51 +00001439 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
sewardj606c4ba2014-01-26 19:11:14 +00001440 break;
1441 default:
1442 vassert(0); // Other cases are probably invalid
sewardjbbcf1882014-01-12 12:49:10 +00001443 }
1444 stmt(IRStmt_Put(off, e));
1445}
1446
sewardj606c4ba2014-01-26 19:11:14 +00001447/* Get from the least significant lane of a Qreg. */
1448static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
sewardjbbcf1882014-01-12 12:49:10 +00001449{
sewardj606c4ba2014-01-26 19:11:14 +00001450 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +00001451 switch (ty) {
sewardjb3553472014-05-15 16:49:21 +00001452 case Ity_I8:
sewardj400d6b92015-03-30 09:01:51 +00001453 case Ity_F16: case Ity_I16:
sewardj606c4ba2014-01-26 19:11:14 +00001454 case Ity_I32: case Ity_I64:
1455 case Ity_F32: case Ity_F64: case Ity_V128:
1456 break;
1457 default:
1458 vassert(0); // Other cases are ATC
sewardjbbcf1882014-01-12 12:49:10 +00001459 }
1460 return IRExpr_Get(off, ty);
1461}
1462
sewardj606c4ba2014-01-26 19:11:14 +00001463static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
sewardjbbcf1882014-01-12 12:49:10 +00001464{
1465 static const HChar* namesQ[32]
1466 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1467 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1468 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1469 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1470 static const HChar* namesD[32]
1471 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1472 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1473 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1474 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1475 static const HChar* namesS[32]
1476 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1477 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1478 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1479 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1480 static const HChar* namesH[32]
1481 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1482 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1483 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1484 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1485 static const HChar* namesB[32]
1486 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1487 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1488 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1489 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1490 vassert(qregNo < 32);
sewardj606c4ba2014-01-26 19:11:14 +00001491 switch (sizeofIRType(laneTy)) {
sewardjbbcf1882014-01-12 12:49:10 +00001492 case 1: return namesB[qregNo];
1493 case 2: return namesH[qregNo];
1494 case 4: return namesS[qregNo];
1495 case 8: return namesD[qregNo];
1496 case 16: return namesQ[qregNo];
1497 default: vassert(0);
1498 }
1499 /*NOTREACHED*/
1500}
1501
sewardj606c4ba2014-01-26 19:11:14 +00001502static const HChar* nameQReg128 ( UInt qregNo )
1503{
1504 return nameQRegLO(qregNo, Ity_V128);
1505}
1506
sewardjbbcf1882014-01-12 12:49:10 +00001507/* Find the offset of the most significant half (8 bytes) of the given
1508 Qreg. This requires knowing the endianness of the host. */
sewardj606c4ba2014-01-26 19:11:14 +00001509static Int offsetQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001510{
sewardj606c4ba2014-01-26 19:11:14 +00001511 return offsetQRegLane(qregNo, Ity_I64, 1);
sewardjbbcf1882014-01-12 12:49:10 +00001512}
1513
sewardj606c4ba2014-01-26 19:11:14 +00001514static IRExpr* getQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001515{
sewardj606c4ba2014-01-26 19:11:14 +00001516 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
sewardjbbcf1882014-01-12 12:49:10 +00001517}
1518
sewardj606c4ba2014-01-26 19:11:14 +00001519static void putQRegHI64 ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +00001520{
1521 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +00001522 Int off = offsetQRegHI64(qregNo);
sewardjbbcf1882014-01-12 12:49:10 +00001523 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001524 case Ity_I64: case Ity_F64:
1525 break;
1526 default:
1527 vassert(0); // Other cases are plain wrong
sewardjbbcf1882014-01-12 12:49:10 +00001528 }
1529 stmt(IRStmt_Put(off, e));
1530}
1531
sewardj606c4ba2014-01-26 19:11:14 +00001532/* Put to a specified lane of a Qreg. */
1533static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1534{
1535 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1536 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1537 switch (laneTy) {
1538 case Ity_F64: case Ity_I64:
sewardj32d86752014-03-02 12:47:18 +00001539 case Ity_I32: case Ity_F32:
sewardj400d6b92015-03-30 09:01:51 +00001540 case Ity_I16: case Ity_F16:
sewardj5860ec72014-03-01 11:19:45 +00001541 case Ity_I8:
sewardj606c4ba2014-01-26 19:11:14 +00001542 break;
1543 default:
1544 vassert(0); // Other cases are ATC
1545 }
1546 stmt(IRStmt_Put(off, e));
1547}
1548
sewardj32d86752014-03-02 12:47:18 +00001549/* Get from a specified lane of a Qreg. */
sewardj606c4ba2014-01-26 19:11:14 +00001550static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1551{
1552 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1553 switch (laneTy) {
sewardj32d86752014-03-02 12:47:18 +00001554 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
sewardj400d6b92015-03-30 09:01:51 +00001555 case Ity_F64: case Ity_F32: case Ity_F16:
sewardj606c4ba2014-01-26 19:11:14 +00001556 break;
1557 default:
1558 vassert(0); // Other cases are ATC
1559 }
1560 return IRExpr_Get(off, laneTy);
1561}
1562
1563
sewardjbbcf1882014-01-12 12:49:10 +00001564//ZZ /* ---------------- Misc registers ---------------- */
1565//ZZ
1566//ZZ static void putMiscReg32 ( UInt gsoffset,
1567//ZZ IRExpr* e, /* :: Ity_I32 */
1568//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1569//ZZ {
1570//ZZ switch (gsoffset) {
1571//ZZ case OFFB_FPSCR: break;
1572//ZZ case OFFB_QFLAG32: break;
1573//ZZ case OFFB_GEFLAG0: break;
1574//ZZ case OFFB_GEFLAG1: break;
1575//ZZ case OFFB_GEFLAG2: break;
1576//ZZ case OFFB_GEFLAG3: break;
1577//ZZ default: vassert(0); /* awaiting more cases */
1578//ZZ }
1579//ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1580//ZZ
1581//ZZ if (guardT == IRTemp_INVALID) {
1582//ZZ /* unconditional write */
1583//ZZ stmt(IRStmt_Put(gsoffset, e));
1584//ZZ } else {
1585//ZZ stmt(IRStmt_Put(
1586//ZZ gsoffset,
1587//ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1588//ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1589//ZZ ));
1590//ZZ }
1591//ZZ }
1592//ZZ
1593//ZZ static IRTemp get_ITSTATE ( void )
1594//ZZ {
1595//ZZ ASSERT_IS_THUMB;
1596//ZZ IRTemp t = newTemp(Ity_I32);
1597//ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1598//ZZ return t;
1599//ZZ }
1600//ZZ
1601//ZZ static void put_ITSTATE ( IRTemp t )
1602//ZZ {
1603//ZZ ASSERT_IS_THUMB;
1604//ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1605//ZZ }
1606//ZZ
1607//ZZ static IRTemp get_QFLAG32 ( void )
1608//ZZ {
1609//ZZ IRTemp t = newTemp(Ity_I32);
1610//ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1611//ZZ return t;
1612//ZZ }
1613//ZZ
1614//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1615//ZZ {
1616//ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1617//ZZ }
1618//ZZ
1619//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1620//ZZ Status Register) to indicate that overflow or saturation occurred.
1621//ZZ Nb: t must be zero to denote no saturation, and any nonzero
1622//ZZ value to indicate saturation. */
1623//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1624//ZZ {
1625//ZZ IRTemp old = get_QFLAG32();
1626//ZZ IRTemp nyu = newTemp(Ity_I32);
1627//ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1628//ZZ put_QFLAG32(nyu, condT);
1629//ZZ }
1630
1631
1632/* ---------------- FPCR stuff ---------------- */
1633
1634/* Generate IR to get hold of the rounding mode bits in FPCR, and
1635 convert them to IR format. Bind the final result to the
1636 returned temp. */
1637static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1638{
1639 /* The ARMvfp encoding for rounding mode bits is:
1640 00 to nearest
1641 01 to +infinity
1642 10 to -infinity
1643 11 to zero
1644 We need to convert that to the IR encoding:
1645 00 to nearest (the default)
1646 10 to +infinity
1647 01 to -infinity
1648 11 to zero
1649 Which can be done by swapping bits 0 and 1.
1650 The rmode bits are at 23:22 in FPSCR.
1651 */
1652 IRTemp armEncd = newTemp(Ity_I32);
1653 IRTemp swapped = newTemp(Ity_I32);
1654 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1655 we don't zero out bits 24 and above, since the assignment to
1656 'swapped' will mask them out anyway. */
1657 assign(armEncd,
1658 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1659 /* Now swap them. */
1660 assign(swapped,
1661 binop(Iop_Or32,
1662 binop(Iop_And32,
1663 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1664 mkU32(2)),
1665 binop(Iop_And32,
1666 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1667 mkU32(1))
1668 ));
1669 return swapped;
1670}
1671
1672
1673/*------------------------------------------------------------*/
1674/*--- Helpers for flag handling and conditional insns ---*/
1675/*------------------------------------------------------------*/
1676
1677static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1678{
1679 switch (cond) {
1680 case ARM64CondEQ: return "eq";
1681 case ARM64CondNE: return "ne";
1682 case ARM64CondCS: return "cs"; // or 'hs'
1683 case ARM64CondCC: return "cc"; // or 'lo'
1684 case ARM64CondMI: return "mi";
1685 case ARM64CondPL: return "pl";
1686 case ARM64CondVS: return "vs";
1687 case ARM64CondVC: return "vc";
1688 case ARM64CondHI: return "hi";
1689 case ARM64CondLS: return "ls";
1690 case ARM64CondGE: return "ge";
1691 case ARM64CondLT: return "lt";
1692 case ARM64CondGT: return "gt";
1693 case ARM64CondLE: return "le";
1694 case ARM64CondAL: return "al";
1695 case ARM64CondNV: return "nv";
1696 default: vpanic("name_ARM64Condcode");
1697 }
1698}
1699
1700/* and a handy shorthand for it */
1701static const HChar* nameCC ( ARM64Condcode cond ) {
1702 return nameARM64Condcode(cond);
1703}
1704
1705
1706/* Build IR to calculate some particular condition from stored
1707 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1708 Ity_I64, suitable for narrowing. Although the return type is
1709 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1710 :: Ity_I64 and must denote the condition to compute in
1711 bits 7:4, and be zero everywhere else.
1712*/
1713static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1714{
1715 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1716 /* And 'cond' had better produce a value in which only bits 7:4 are
1717 nonzero. However, obviously we can't assert for that. */
1718
1719 /* So what we're constructing for the first argument is
1720 "(cond << 4) | stored-operation".
1721 However, as per comments above, 'cond' must be supplied
1722 pre-shifted to this function.
1723
1724 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1725 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1726 8 bits of the first argument. */
1727 IRExpr** args
1728 = mkIRExprVec_4(
1729 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1730 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1731 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1732 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1733 );
1734 IRExpr* call
1735 = mkIRExprCCall(
1736 Ity_I64,
1737 0/*regparm*/,
1738 "arm64g_calculate_condition", &arm64g_calculate_condition,
1739 args
1740 );
1741
1742 /* Exclude the requested condition, OP and NDEP from definedness
1743 checking. We're only interested in DEP1 and DEP2. */
1744 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1745 return call;
1746}
1747
1748
1749/* Build IR to calculate some particular condition from stored
1750 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1751 Ity_I64, suitable for narrowing. Although the return type is
1752 Ity_I64, the returned value is either 0 or 1.
1753*/
1754static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1755{
1756 /* First arg is "(cond << 4) | condition". This requires that the
1757 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1758 (COND, OP) pair in the lowest 8 bits of the first argument. */
1759 vassert(cond >= 0 && cond <= 15);
1760 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1761}
1762
1763
sewardjdee30502014-06-04 13:09:44 +00001764/* Build IR to calculate just the carry flag from stored
1765 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1766 Ity_I64. */
1767static IRExpr* mk_arm64g_calculate_flag_c ( void )
1768{
1769 IRExpr** args
1770 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1771 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1772 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1773 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1774 IRExpr* call
1775 = mkIRExprCCall(
1776 Ity_I64,
1777 0/*regparm*/,
1778 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1779 args
1780 );
1781 /* Exclude OP and NDEP from definedness checking. We're only
1782 interested in DEP1 and DEP2. */
1783 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1784 return call;
1785}
1786
1787
sewardjbbcf1882014-01-12 12:49:10 +00001788//ZZ /* Build IR to calculate just the overflow flag from stored
1789//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1790//ZZ Ity_I32. */
1791//ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1792//ZZ {
1793//ZZ IRExpr** args
1794//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1795//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1796//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1797//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1798//ZZ IRExpr* call
1799//ZZ = mkIRExprCCall(
1800//ZZ Ity_I32,
1801//ZZ 0/*regparm*/,
1802//ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1803//ZZ args
1804//ZZ );
1805//ZZ /* Exclude OP and NDEP from definedness checking. We're only
1806//ZZ interested in DEP1 and DEP2. */
1807//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1808//ZZ return call;
1809//ZZ }
1810
1811
1812/* Build IR to calculate N Z C V in bits 31:28 of the
1813 returned word. */
1814static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1815{
1816 IRExpr** args
1817 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1818 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1819 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1820 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1821 IRExpr* call
1822 = mkIRExprCCall(
1823 Ity_I64,
1824 0/*regparm*/,
1825 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1826 args
1827 );
1828 /* Exclude OP and NDEP from definedness checking. We're only
1829 interested in DEP1 and DEP2. */
1830 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1831 return call;
1832}
1833
1834
1835/* Build IR to set the flags thunk, in the most general case. */
1836static
1837void setFlags_D1_D2_ND ( UInt cc_op,
1838 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1839{
1840 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1841 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1842 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1843 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1844 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1845 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1846 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1847 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1848}
1849
1850/* Build IR to set the flags thunk after ADD or SUB. */
1851static
1852void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1853{
1854 IRTemp argL64 = IRTemp_INVALID;
1855 IRTemp argR64 = IRTemp_INVALID;
1856 IRTemp z64 = newTemp(Ity_I64);
1857 if (is64) {
1858 argL64 = argL;
1859 argR64 = argR;
1860 } else {
1861 argL64 = newTemp(Ity_I64);
1862 argR64 = newTemp(Ity_I64);
1863 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1864 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1865 }
1866 assign(z64, mkU64(0));
1867 UInt cc_op = ARM64G_CC_OP_NUMBER;
1868 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1869 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1870 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1871 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1872 else { vassert(0); }
1873 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1874}
1875
sewardjdee30502014-06-04 13:09:44 +00001876/* Build IR to set the flags thunk after ADC or SBC. */
1877static
1878void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
1879 IRTemp argL, IRTemp argR, IRTemp oldC )
1880{
1881 IRTemp argL64 = IRTemp_INVALID;
1882 IRTemp argR64 = IRTemp_INVALID;
1883 IRTemp oldC64 = IRTemp_INVALID;
1884 if (is64) {
1885 argL64 = argL;
1886 argR64 = argR;
1887 oldC64 = oldC;
1888 } else {
1889 argL64 = newTemp(Ity_I64);
1890 argR64 = newTemp(Ity_I64);
1891 oldC64 = newTemp(Ity_I64);
1892 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1893 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1894 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1895 }
1896 UInt cc_op = ARM64G_CC_OP_NUMBER;
1897 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; }
1898 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1899 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; }
1900 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1901 else { vassert(0); }
1902 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1903}
1904
sewardjbbcf1882014-01-12 12:49:10 +00001905/* Build IR to set the flags thunk after ADD or SUB, if the given
1906 condition evaluates to True at run time. If not, the flags are set
1907 to the specified NZCV value. */
1908static
1909void setFlags_ADD_SUB_conditionally (
1910 Bool is64, Bool isSUB,
1911 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1912 )
1913{
1914 /* Generate IR as follows:
1915 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1916 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1917 CC_DEP2 = ITE(cond, argR64, 0)
1918 CC_NDEP = 0
1919 */
1920
1921 IRTemp z64 = newTemp(Ity_I64);
1922 assign(z64, mkU64(0));
1923
1924 /* Establish the operation and operands for the True case. */
1925 IRTemp t_dep1 = IRTemp_INVALID;
1926 IRTemp t_dep2 = IRTemp_INVALID;
1927 UInt t_op = ARM64G_CC_OP_NUMBER;
1928 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1929 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1930 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1931 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1932 else { vassert(0); }
1933 /* */
1934 if (is64) {
1935 t_dep1 = argL;
1936 t_dep2 = argR;
1937 } else {
1938 t_dep1 = newTemp(Ity_I64);
1939 t_dep2 = newTemp(Ity_I64);
1940 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1941 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1942 }
1943
1944 /* Establish the operation and operands for the False case. */
1945 IRTemp f_dep1 = newTemp(Ity_I64);
1946 IRTemp f_dep2 = z64;
1947 UInt f_op = ARM64G_CC_OP_COPY;
1948 assign(f_dep1, mkU64(nzcv << 28));
1949
1950 /* Final thunk values */
1951 IRTemp dep1 = newTemp(Ity_I64);
1952 IRTemp dep2 = newTemp(Ity_I64);
1953 IRTemp op = newTemp(Ity_I64);
1954
1955 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1956 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1957 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1958
1959 /* finally .. */
1960 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
1961 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1962 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1963 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1964}
1965
1966/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1967static
1968void setFlags_LOGIC ( Bool is64, IRTemp res )
1969{
1970 IRTemp res64 = IRTemp_INVALID;
1971 IRTemp z64 = newTemp(Ity_I64);
1972 UInt cc_op = ARM64G_CC_OP_NUMBER;
1973 if (is64) {
1974 res64 = res;
1975 cc_op = ARM64G_CC_OP_LOGIC64;
1976 } else {
1977 res64 = newTemp(Ity_I64);
1978 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1979 cc_op = ARM64G_CC_OP_LOGIC32;
1980 }
1981 assign(z64, mkU64(0));
1982 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1983}
1984
1985/* Build IR to set the flags thunk to a given NZCV value. NZCV is
1986 located in bits 31:28 of the supplied value. */
1987static
1988void setFlags_COPY ( IRTemp nzcv_28x0 )
1989{
1990 IRTemp z64 = newTemp(Ity_I64);
1991 assign(z64, mkU64(0));
1992 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
1993}
1994
1995
1996//ZZ /* Minor variant of the above that sets NDEP to zero (if it
1997//ZZ sets it at all) */
1998//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1999//ZZ IRTemp t_dep2,
2000//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2001//ZZ {
2002//ZZ IRTemp z32 = newTemp(Ity_I32);
2003//ZZ assign( z32, mkU32(0) );
2004//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2005//ZZ }
2006//ZZ
2007//ZZ
2008//ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2009//ZZ sets it at all) */
2010//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2011//ZZ IRTemp t_ndep,
2012//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2013//ZZ {
2014//ZZ IRTemp z32 = newTemp(Ity_I32);
2015//ZZ assign( z32, mkU32(0) );
2016//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2017//ZZ }
2018//ZZ
2019//ZZ
2020//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2021//ZZ sets them at all) */
2022//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2023//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2024//ZZ {
2025//ZZ IRTemp z32 = newTemp(Ity_I32);
2026//ZZ assign( z32, mkU32(0) );
2027//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2028//ZZ }
2029
2030
2031/*------------------------------------------------------------*/
2032/*--- Misc math helpers ---*/
2033/*------------------------------------------------------------*/
2034
sewardj32d86752014-03-02 12:47:18 +00002035/* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
2036static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
sewardjbbcf1882014-01-12 12:49:10 +00002037{
sewardj32d86752014-03-02 12:47:18 +00002038 IRTemp maskT = newTemp(Ity_I64);
2039 IRTemp res = newTemp(Ity_I64);
2040 vassert(sh >= 1 && sh <= 63);
2041 assign(maskT, mkU64(mask));
sewardjdc9259c2014-02-27 11:10:19 +00002042 assign( res,
sewardjbbcf1882014-01-12 12:49:10 +00002043 binop(Iop_Or64,
2044 binop(Iop_Shr64,
sewardj32d86752014-03-02 12:47:18 +00002045 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
2046 mkU8(sh)),
sewardjbbcf1882014-01-12 12:49:10 +00002047 binop(Iop_And64,
sewardj32d86752014-03-02 12:47:18 +00002048 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
2049 mkexpr(maskT))
sewardjbbcf1882014-01-12 12:49:10 +00002050 )
2051 );
sewardjdc9259c2014-02-27 11:10:19 +00002052 return res;
2053}
2054
sewardj32d86752014-03-02 12:47:18 +00002055/* Generates byte swaps within 32-bit lanes. */
2056static IRTemp math_UINTSWAP64 ( IRTemp src )
2057{
2058 IRTemp res;
2059 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2060 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2061 return res;
2062}
2063
2064/* Generates byte swaps within 16-bit lanes. */
2065static IRTemp math_USHORTSWAP64 ( IRTemp src )
2066{
2067 IRTemp res;
2068 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2069 return res;
2070}
2071
2072/* Generates a 64-bit byte swap. */
2073static IRTemp math_BYTESWAP64 ( IRTemp src )
2074{
2075 IRTemp res;
2076 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2077 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2078 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
2079 return res;
2080}
sewardjdc9259c2014-02-27 11:10:19 +00002081
2082/* Generates a 64-bit bit swap. */
2083static IRTemp math_BITSWAP64 ( IRTemp src )
2084{
sewardj32d86752014-03-02 12:47:18 +00002085 IRTemp res;
2086 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
2087 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
2088 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
2089 return math_BYTESWAP64(res);
sewardjbbcf1882014-01-12 12:49:10 +00002090}
2091
sewardj606c4ba2014-01-26 19:11:14 +00002092/* Duplicates the bits at the bottom of the given word to fill the
2093 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
2094 except for the bottom bits. */
2095static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
2096{
2097 if (srcTy == Ity_I8) {
2098 IRTemp t16 = newTemp(Ity_I64);
2099 assign(t16, binop(Iop_Or64, mkexpr(src),
2100 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
2101 IRTemp t32 = newTemp(Ity_I64);
2102 assign(t32, binop(Iop_Or64, mkexpr(t16),
2103 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
2104 IRTemp t64 = newTemp(Ity_I64);
2105 assign(t64, binop(Iop_Or64, mkexpr(t32),
2106 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2107 return t64;
2108 }
2109 if (srcTy == Ity_I16) {
2110 IRTemp t32 = newTemp(Ity_I64);
2111 assign(t32, binop(Iop_Or64, mkexpr(src),
2112 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
2113 IRTemp t64 = newTemp(Ity_I64);
2114 assign(t64, binop(Iop_Or64, mkexpr(t32),
2115 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2116 return t64;
2117 }
2118 if (srcTy == Ity_I32) {
2119 IRTemp t64 = newTemp(Ity_I64);
2120 assign(t64, binop(Iop_Or64, mkexpr(src),
2121 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
2122 return t64;
2123 }
2124 if (srcTy == Ity_I64) {
2125 return src;
2126 }
2127 vassert(0);
2128}
2129
2130
sewardj18bf5172014-06-14 18:05:30 +00002131/* Duplicates the src element exactly so as to fill a V128 value. */
sewardj85fbb022014-06-12 13:16:01 +00002132static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
2133{
sewardj8e91fd42014-07-11 12:05:47 +00002134 IRTemp res = newTempV128();
sewardj85fbb022014-06-12 13:16:01 +00002135 if (srcTy == Ity_F64) {
2136 IRTemp i64 = newTemp(Ity_I64);
2137 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
2138 assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
2139 return res;
2140 }
2141 if (srcTy == Ity_F32) {
2142 IRTemp i64a = newTemp(Ity_I64);
2143 assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
2144 IRTemp i64b = newTemp(Ity_I64);
2145 assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
2146 mkexpr(i64a)));
2147 assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
2148 return res;
2149 }
sewardj18bf5172014-06-14 18:05:30 +00002150 if (srcTy == Ity_I64) {
2151 assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
2152 return res;
2153 }
2154 if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
2155 IRTemp t1 = newTemp(Ity_I64);
2156 assign(t1, widenUto64(srcTy, mkexpr(src)));
2157 IRTemp t2 = math_DUP_TO_64(t1, srcTy);
2158 assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
2159 return res;
2160 }
sewardj85fbb022014-06-12 13:16:01 +00002161 vassert(0);
2162}
2163
2164
sewardjdf9d6d52014-06-27 10:43:22 +00002165/* |fullWidth| is a full V128 width result. Depending on bitQ,
2166 zero out the upper half. */
2167static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
2168{
2169 if (bitQ == 1) return mkexpr(fullWidth);
2170 if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
2171 vassert(0);
2172}
2173
sewardja5a6b752014-06-30 07:33:56 +00002174/* The same, but from an expression instead. */
2175static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
2176{
sewardj8e91fd42014-07-11 12:05:47 +00002177 IRTemp fullWidthT = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00002178 assign(fullWidthT, fullWidth);
2179 return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
2180}
2181
sewardjdf9d6d52014-06-27 10:43:22 +00002182
sewardjbbcf1882014-01-12 12:49:10 +00002183/*------------------------------------------------------------*/
2184/*--- FP comparison helpers ---*/
2185/*------------------------------------------------------------*/
2186
2187/* irRes :: Ity_I32 holds a floating point comparison result encoded
2188 as an IRCmpF64Result. Generate code to convert it to an
2189 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2190 Assign a new temp to hold that value, and return the temp. */
2191static
2192IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
2193{
2194 IRTemp ix = newTemp(Ity_I64);
2195 IRTemp termL = newTemp(Ity_I64);
2196 IRTemp termR = newTemp(Ity_I64);
2197 IRTemp nzcv = newTemp(Ity_I64);
2198 IRTemp irRes = newTemp(Ity_I64);
2199
2200 /* This is where the fun starts. We have to convert 'irRes' from
2201 an IR-convention return result (IRCmpF64Result) to an
2202 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
2203 4 bits of 'nzcv'. */
2204 /* Map compare result from IR to ARM(nzcv) */
2205 /*
2206 FP cmp result | IR | ARM(nzcv)
2207 --------------------------------
2208 UN 0x45 0011
2209 LT 0x01 1000
2210 GT 0x00 0010
2211 EQ 0x40 0110
2212 */
2213 /* Now since you're probably wondering WTF ..
2214
2215 ix fishes the useful bits out of the IR value, bits 6 and 0, and
2216 places them side by side, giving a number which is 0, 1, 2 or 3.
2217
2218 termL is a sequence cooked up by GNU superopt. It converts ix
2219 into an almost correct value NZCV value (incredibly), except
2220 for the case of UN, where it produces 0100 instead of the
2221 required 0011.
2222
2223 termR is therefore a correction term, also computed from ix. It
2224 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
2225 the final correct value, we subtract termR from termL.
2226
2227 Don't take my word for it. There's a test program at the bottom
2228 of guest_arm_toIR.c, to try this out with.
2229 */
2230 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
2231
2232 assign(
2233 ix,
2234 binop(Iop_Or64,
2235 binop(Iop_And64,
2236 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
2237 mkU64(3)),
2238 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
2239
2240 assign(
2241 termL,
2242 binop(Iop_Add64,
2243 binop(Iop_Shr64,
2244 binop(Iop_Sub64,
2245 binop(Iop_Shl64,
2246 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
2247 mkU8(62)),
2248 mkU64(1)),
2249 mkU8(61)),
2250 mkU64(1)));
2251
2252 assign(
2253 termR,
2254 binop(Iop_And64,
2255 binop(Iop_And64,
2256 mkexpr(ix),
2257 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
2258 mkU64(1)));
2259
2260 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
2261 return nzcv;
2262}
2263
2264
2265/*------------------------------------------------------------*/
2266/*--- Data processing (immediate) ---*/
2267/*------------------------------------------------------------*/
2268
2269/* Helper functions for supporting "DecodeBitMasks" */
2270
2271static ULong dbm_ROR ( Int width, ULong x, Int rot )
2272{
2273 vassert(width > 0 && width <= 64);
2274 vassert(rot >= 0 && rot < width);
2275 if (rot == 0) return x;
2276 ULong res = x >> rot;
2277 res |= (x << (width - rot));
2278 if (width < 64)
2279 res &= ((1ULL << width) - 1);
2280 return res;
2281}
2282
2283static ULong dbm_RepTo64( Int esize, ULong x )
2284{
2285 switch (esize) {
2286 case 64:
2287 return x;
2288 case 32:
2289 x &= 0xFFFFFFFF; x |= (x << 32);
2290 return x;
2291 case 16:
2292 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
2293 return x;
2294 case 8:
2295 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
2296 return x;
2297 case 4:
2298 x &= 0xF; x |= (x << 4); x |= (x << 8);
2299 x |= (x << 16); x |= (x << 32);
2300 return x;
2301 case 2:
2302 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
2303 x |= (x << 16); x |= (x << 32);
2304 return x;
2305 default:
2306 break;
2307 }
2308 vpanic("dbm_RepTo64");
2309 /*NOTREACHED*/
2310 return 0;
2311}
2312
2313static Int dbm_highestSetBit ( ULong x )
2314{
2315 Int i;
2316 for (i = 63; i >= 0; i--) {
2317 if (x & (1ULL << i))
2318 return i;
2319 }
2320 vassert(x == 0);
2321 return -1;
2322}
2323
2324static
2325Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
2326 ULong immN, ULong imms, ULong immr, Bool immediate,
2327 UInt M /*32 or 64*/)
2328{
2329 vassert(immN < (1ULL << 1));
2330 vassert(imms < (1ULL << 6));
2331 vassert(immr < (1ULL << 6));
2332 vassert(immediate == False || immediate == True);
2333 vassert(M == 32 || M == 64);
2334
2335 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
2336 if (len < 1) { /* printf("fail1\n"); */ return False; }
2337 vassert(len <= 6);
2338 vassert(M >= (1 << len));
2339
2340 vassert(len >= 1 && len <= 6);
2341 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
2342 (1 << len) - 1;
2343 vassert(levels >= 1 && levels <= 63);
2344
2345 if (immediate && ((imms & levels) == levels)) {
2346 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2347 return False;
2348 }
2349
2350 ULong S = imms & levels;
2351 ULong R = immr & levels;
2352 Int diff = S - R;
2353 diff &= 63;
2354 Int esize = 1 << len;
2355 vassert(2 <= esize && esize <= 64);
2356
2357 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2358 same below with d. S can be 63 in which case we have an out of
2359 range and hence undefined shift. */
2360 vassert(S >= 0 && S <= 63);
2361 vassert(esize >= (S+1));
2362 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
2363 //(1ULL << (S+1)) - 1;
2364 ((1ULL << S) - 1) + (1ULL << S);
2365
2366 Int d = // diff<len-1:0>
2367 diff & ((1 << len)-1);
2368 vassert(esize >= (d+1));
2369 vassert(d >= 0 && d <= 63);
2370
2371 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
2372 //(1ULL << (d+1)) - 1;
2373 ((1ULL << d) - 1) + (1ULL << d);
2374
2375 if (esize != 64) vassert(elem_s < (1ULL << esize));
2376 if (esize != 64) vassert(elem_d < (1ULL << esize));
2377
2378 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
2379 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
2380
2381 return True;
2382}
2383
2384
2385static
2386Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
2387 UInt insn)
2388{
2389# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2390
2391 /* insn[28:23]
2392 10000x PC-rel addressing
2393 10001x Add/subtract (immediate)
2394 100100 Logical (immediate)
2395 100101 Move Wide (immediate)
2396 100110 Bitfield
2397 100111 Extract
2398 */
2399
2400 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2401 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2402 Bool is64 = INSN(31,31) == 1;
2403 Bool isSub = INSN(30,30) == 1;
2404 Bool setCC = INSN(29,29) == 1;
2405 UInt sh = INSN(23,22);
2406 UInt uimm12 = INSN(21,10);
2407 UInt nn = INSN(9,5);
2408 UInt dd = INSN(4,0);
2409 const HChar* nm = isSub ? "sub" : "add";
2410 if (sh >= 2) {
2411 /* Invalid; fall through */
2412 } else {
2413 vassert(sh <= 1);
2414 uimm12 <<= (12 * sh);
2415 if (is64) {
2416 IRTemp argL = newTemp(Ity_I64);
2417 IRTemp argR = newTemp(Ity_I64);
2418 IRTemp res = newTemp(Ity_I64);
2419 assign(argL, getIReg64orSP(nn));
2420 assign(argR, mkU64(uimm12));
2421 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2422 mkexpr(argL), mkexpr(argR)));
2423 if (setCC) {
2424 putIReg64orZR(dd, mkexpr(res));
2425 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2426 DIP("%ss %s, %s, 0x%x\n",
2427 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
2428 } else {
2429 putIReg64orSP(dd, mkexpr(res));
2430 DIP("%s %s, %s, 0x%x\n",
2431 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
2432 }
2433 } else {
2434 IRTemp argL = newTemp(Ity_I32);
2435 IRTemp argR = newTemp(Ity_I32);
2436 IRTemp res = newTemp(Ity_I32);
2437 assign(argL, getIReg32orSP(nn));
2438 assign(argR, mkU32(uimm12));
2439 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
2440 mkexpr(argL), mkexpr(argR)));
2441 if (setCC) {
2442 putIReg32orZR(dd, mkexpr(res));
2443 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
2444 DIP("%ss %s, %s, 0x%x\n",
2445 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
2446 } else {
2447 putIReg32orSP(dd, mkexpr(res));
2448 DIP("%s %s, %s, 0x%x\n",
2449 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
2450 }
2451 }
2452 return True;
2453 }
2454 }
2455
2456 /* -------------------- ADR/ADRP -------------------- */
2457 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2458 UInt bP = INSN(31,31);
2459 UInt immLo = INSN(30,29);
2460 UInt immHi = INSN(23,5);
2461 UInt rD = INSN(4,0);
2462 ULong uimm = (immHi << 2) | immLo;
2463 ULong simm = sx_to_64(uimm, 21);
2464 ULong val;
2465 if (bP) {
2466 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
2467 } else {
2468 val = guest_PC_curr_instr + simm;
2469 }
2470 putIReg64orZR(rD, mkU64(val));
2471 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
2472 return True;
2473 }
2474
2475 /* -------------------- LOGIC(imm) -------------------- */
2476 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2477 /* 31 30 28 22 21 15 9 4
2478 sf op 100100 N immr imms Rn Rd
2479 op=00: AND Rd|SP, Rn, #imm
2480 op=01: ORR Rd|SP, Rn, #imm
2481 op=10: EOR Rd|SP, Rn, #imm
2482 op=11: ANDS Rd|ZR, Rn, #imm
2483 */
2484 Bool is64 = INSN(31,31) == 1;
2485 UInt op = INSN(30,29);
2486 UInt N = INSN(22,22);
2487 UInt immR = INSN(21,16);
2488 UInt immS = INSN(15,10);
2489 UInt nn = INSN(9,5);
2490 UInt dd = INSN(4,0);
2491 ULong imm = 0;
2492 Bool ok;
2493 if (N == 1 && !is64)
2494 goto after_logic_imm; /* not allowed; fall through */
2495 ok = dbm_DecodeBitMasks(&imm, NULL,
2496 N, immS, immR, True, is64 ? 64 : 32);
2497 if (!ok)
2498 goto after_logic_imm;
2499
2500 const HChar* names[4] = { "and", "orr", "eor", "ands" };
2501 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2502 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2503
2504 vassert(op < 4);
2505 if (is64) {
2506 IRExpr* argL = getIReg64orZR(nn);
2507 IRExpr* argR = mkU64(imm);
2508 IRTemp res = newTemp(Ity_I64);
2509 assign(res, binop(ops64[op], argL, argR));
2510 if (op < 3) {
2511 putIReg64orSP(dd, mkexpr(res));
2512 DIP("%s %s, %s, 0x%llx\n", names[op],
2513 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2514 } else {
2515 putIReg64orZR(dd, mkexpr(res));
2516 setFlags_LOGIC(True/*is64*/, res);
2517 DIP("%s %s, %s, 0x%llx\n", names[op],
2518 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2519 }
2520 } else {
2521 IRExpr* argL = getIReg32orZR(nn);
2522 IRExpr* argR = mkU32((UInt)imm);
2523 IRTemp res = newTemp(Ity_I32);
2524 assign(res, binop(ops32[op], argL, argR));
2525 if (op < 3) {
2526 putIReg32orSP(dd, mkexpr(res));
2527 DIP("%s %s, %s, 0x%x\n", names[op],
2528 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2529 } else {
2530 putIReg32orZR(dd, mkexpr(res));
2531 setFlags_LOGIC(False/*!is64*/, res);
2532 DIP("%s %s, %s, 0x%x\n", names[op],
2533 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2534 }
2535 }
2536 return True;
2537 }
2538 after_logic_imm:
2539
2540 /* -------------------- MOV{Z,N,K} -------------------- */
2541 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2542 /* 31 30 28 22 20 4
2543 | | | | | |
2544 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2545 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2546 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2547 */
2548 Bool is64 = INSN(31,31) == 1;
2549 UInt subopc = INSN(30,29);
2550 UInt hw = INSN(22,21);
2551 UInt imm16 = INSN(20,5);
2552 UInt dd = INSN(4,0);
2553 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2554 /* invalid; fall through */
2555 } else {
2556 ULong imm64 = ((ULong)imm16) << (16 * hw);
2557 if (!is64)
2558 vassert(imm64 < 0x100000000ULL);
2559 switch (subopc) {
2560 case BITS2(1,0): // MOVZ
2561 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2562 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2563 break;
2564 case BITS2(0,0): // MOVN
2565 imm64 = ~imm64;
2566 if (!is64)
2567 imm64 &= 0xFFFFFFFFULL;
2568 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2569 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2570 break;
2571 case BITS2(1,1): // MOVK
2572 /* This is more complex. We are inserting a slice into
2573 the destination register, so we need to have the old
2574 value of it. */
2575 if (is64) {
2576 IRTemp old = newTemp(Ity_I64);
2577 assign(old, getIReg64orZR(dd));
2578 ULong mask = 0xFFFFULL << (16 * hw);
2579 IRExpr* res
2580 = binop(Iop_Or64,
2581 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2582 mkU64(imm64));
2583 putIReg64orZR(dd, res);
2584 DIP("movk %s, 0x%x, lsl %u\n",
2585 nameIReg64orZR(dd), imm16, 16*hw);
2586 } else {
2587 IRTemp old = newTemp(Ity_I32);
2588 assign(old, getIReg32orZR(dd));
2589 vassert(hw <= 1);
2590 UInt mask = 0xFFFF << (16 * hw);
2591 IRExpr* res
2592 = binop(Iop_Or32,
2593 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2594 mkU32((UInt)imm64));
2595 putIReg32orZR(dd, res);
2596 DIP("movk %s, 0x%x, lsl %u\n",
2597 nameIReg32orZR(dd), imm16, 16*hw);
2598 }
2599 break;
2600 default:
2601 vassert(0);
2602 }
2603 return True;
2604 }
2605 }
2606
2607 /* -------------------- {U,S,}BFM -------------------- */
2608 /* 30 28 22 21 15 9 4
2609
2610 sf 10 100110 N immr imms nn dd
2611 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2612 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2613
2614 sf 00 100110 N immr imms nn dd
2615 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2616 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2617
2618 sf 01 100110 N immr imms nn dd
2619 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2620 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2621 */
2622 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2623 UInt sf = INSN(31,31);
2624 UInt opc = INSN(30,29);
2625 UInt N = INSN(22,22);
2626 UInt immR = INSN(21,16);
2627 UInt immS = INSN(15,10);
2628 UInt nn = INSN(9,5);
2629 UInt dd = INSN(4,0);
2630 Bool inZero = False;
2631 Bool extend = False;
2632 const HChar* nm = "???";
2633 /* skip invalid combinations */
2634 switch (opc) {
2635 case BITS2(0,0):
2636 inZero = True; extend = True; nm = "sbfm"; break;
2637 case BITS2(0,1):
2638 inZero = False; extend = False; nm = "bfm"; break;
2639 case BITS2(1,0):
2640 inZero = True; extend = False; nm = "ubfm"; break;
2641 case BITS2(1,1):
2642 goto after_bfm; /* invalid */
2643 default:
2644 vassert(0);
2645 }
2646 if (sf == 1 && N != 1) goto after_bfm;
2647 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2648 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2649 ULong wmask = 0, tmask = 0;
2650 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2651 N, immS, immR, False, sf == 1 ? 64 : 32);
2652 if (!ok) goto after_bfm; /* hmmm */
2653
2654 Bool is64 = sf == 1;
2655 IRType ty = is64 ? Ity_I64 : Ity_I32;
2656
2657 IRTemp dst = newTemp(ty);
2658 IRTemp src = newTemp(ty);
2659 IRTemp bot = newTemp(ty);
2660 IRTemp top = newTemp(ty);
2661 IRTemp res = newTemp(ty);
2662 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2663 assign(src, getIRegOrZR(is64, nn));
2664 /* perform bitfield move on low bits */
2665 assign(bot, binop(mkOR(ty),
2666 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2667 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2668 mkU(ty, wmask))));
2669 /* determine extension bits (sign, zero or dest register) */
2670 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2671 /* combine extension bits and result bits */
2672 assign(res, binop(mkOR(ty),
2673 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2674 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2675 putIRegOrZR(is64, dd, mkexpr(res));
2676 DIP("%s %s, %s, immR=%u, immS=%u\n",
2677 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2678 return True;
2679 }
2680 after_bfm:
2681
2682 /* ---------------------- EXTR ---------------------- */
2683 /* 30 28 22 20 15 9 4
2684 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2685 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2686 */
2687 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2688 Bool is64 = INSN(31,31) == 1;
2689 UInt mm = INSN(20,16);
2690 UInt imm6 = INSN(15,10);
2691 UInt nn = INSN(9,5);
2692 UInt dd = INSN(4,0);
2693 Bool valid = True;
2694 if (INSN(31,31) != INSN(22,22))
2695 valid = False;
2696 if (!is64 && imm6 >= 32)
2697 valid = False;
2698 if (!valid) goto after_extr;
2699 IRType ty = is64 ? Ity_I64 : Ity_I32;
2700 IRTemp srcHi = newTemp(ty);
2701 IRTemp srcLo = newTemp(ty);
2702 IRTemp res = newTemp(ty);
2703 assign(srcHi, getIRegOrZR(is64, nn));
2704 assign(srcLo, getIRegOrZR(is64, mm));
2705 if (imm6 == 0) {
2706 assign(res, mkexpr(srcLo));
2707 } else {
2708 UInt szBits = 8 * sizeofIRType(ty);
2709 vassert(imm6 > 0 && imm6 < szBits);
2710 assign(res, binop(mkOR(ty),
2711 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2712 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2713 }
2714 putIRegOrZR(is64, dd, mkexpr(res));
2715 DIP("extr %s, %s, %s, #%u\n",
2716 nameIRegOrZR(is64,dd),
2717 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2718 return True;
2719 }
2720 after_extr:
2721
2722 vex_printf("ARM64 front end: data_processing_immediate\n");
2723 return False;
2724# undef INSN
2725}
2726
2727
2728/*------------------------------------------------------------*/
2729/*--- Data processing (register) instructions ---*/
2730/*------------------------------------------------------------*/
2731
2732static const HChar* nameSH ( UInt sh ) {
2733 switch (sh) {
2734 case 0: return "lsl";
2735 case 1: return "lsr";
2736 case 2: return "asr";
2737 case 3: return "ror";
2738 default: vassert(0);
2739 }
2740}
2741
2742/* Generate IR to get a register value, possibly shifted by an
2743 immediate. Returns either a 32- or 64-bit temporary holding the
2744 result. After the shift, the value can optionally be NOT-ed
2745 too.
2746
2747 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2748 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2749 isn't allowed, but it's the job of the caller to check that.
2750*/
2751static IRTemp getShiftedIRegOrZR ( Bool is64,
2752 UInt sh_how, UInt sh_amt, UInt regNo,
2753 Bool invert )
2754{
2755 vassert(sh_how < 4);
2756 vassert(sh_amt < (is64 ? 64 : 32));
2757 IRType ty = is64 ? Ity_I64 : Ity_I32;
2758 IRTemp t0 = newTemp(ty);
2759 assign(t0, getIRegOrZR(is64, regNo));
2760 IRTemp t1 = newTemp(ty);
2761 switch (sh_how) {
2762 case BITS2(0,0):
2763 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2764 break;
2765 case BITS2(0,1):
2766 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2767 break;
2768 case BITS2(1,0):
2769 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2770 break;
2771 case BITS2(1,1):
2772 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2773 break;
2774 default:
2775 vassert(0);
2776 }
2777 if (invert) {
2778 IRTemp t2 = newTemp(ty);
2779 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2780 return t2;
2781 } else {
2782 return t1;
2783 }
2784}
2785
2786
2787static
2788Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2789 UInt insn)
2790{
2791# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2792
2793 /* ------------------- ADD/SUB(reg) ------------------- */
2794 /* x==0 => 32 bit op x==1 => 64 bit op
2795 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2796
2797 31 30 29 28 23 21 20 15 9 4
2798 | | | | | | | | | |
2799 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2800 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2801 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2802 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2803 */
2804 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2805 UInt bX = INSN(31,31);
2806 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2807 UInt bS = INSN(29, 29); /* set flags? */
2808 UInt sh = INSN(23,22);
2809 UInt rM = INSN(20,16);
2810 UInt imm6 = INSN(15,10);
2811 UInt rN = INSN(9,5);
2812 UInt rD = INSN(4,0);
2813 Bool isSUB = bOP == 1;
2814 Bool is64 = bX == 1;
2815 IRType ty = is64 ? Ity_I64 : Ity_I32;
2816 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2817 /* invalid; fall through */
2818 } else {
2819 IRTemp argL = newTemp(ty);
2820 assign(argL, getIRegOrZR(is64, rN));
2821 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2822 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2823 IRTemp res = newTemp(ty);
2824 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2825 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2826 if (bS) {
2827 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2828 }
2829 DIP("%s%s %s, %s, %s, %s #%u\n",
2830 bOP ? "sub" : "add", bS ? "s" : "",
2831 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2832 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2833 return True;
2834 }
2835 }
2836
sewardjdee30502014-06-04 13:09:44 +00002837 /* ------------------- ADC/SBC(reg) ------------------- */
2838 /* x==0 => 32 bit op x==1 => 64 bit op
2839
2840 31 30 29 28 23 21 20 15 9 4
2841 | | | | | | | | | |
2842 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
2843 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
2844 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
2845 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
2846 */
2847
2848 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2849 UInt bX = INSN(31,31);
2850 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */
2851 UInt bS = INSN(29,29); /* set flags */
2852 UInt rM = INSN(20,16);
2853 UInt rN = INSN(9,5);
2854 UInt rD = INSN(4,0);
2855
2856 Bool isSUB = bOP == 1;
2857 Bool is64 = bX == 1;
2858 IRType ty = is64 ? Ity_I64 : Ity_I32;
2859
2860 IRTemp oldC = newTemp(ty);
2861 assign(oldC,
2862 is64 ? mk_arm64g_calculate_flag_c()
2863 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
2864
2865 IRTemp argL = newTemp(ty);
2866 assign(argL, getIRegOrZR(is64, rN));
2867 IRTemp argR = newTemp(ty);
2868 assign(argR, getIRegOrZR(is64, rM));
2869
2870 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2871 IRTemp res = newTemp(ty);
2872 if (isSUB) {
2873 IRExpr* one = is64 ? mkU64(1) : mkU32(1);
2874 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
2875 assign(res,
2876 binop(op,
2877 binop(op, mkexpr(argL), mkexpr(argR)),
2878 binop(xorOp, mkexpr(oldC), one)));
2879 } else {
2880 assign(res,
2881 binop(op,
2882 binop(op, mkexpr(argL), mkexpr(argR)),
2883 mkexpr(oldC)));
2884 }
2885
2886 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2887
2888 if (bS) {
2889 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
2890 }
2891
2892 DIP("%s%s %s, %s, %s\n",
2893 bOP ? "sbc" : "adc", bS ? "s" : "",
2894 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2895 nameIRegOrZR(is64, rM));
2896 return True;
2897 }
2898
sewardjbbcf1882014-01-12 12:49:10 +00002899 /* -------------------- LOGIC(reg) -------------------- */
2900 /* x==0 => 32 bit op x==1 => 64 bit op
2901 N==0 => inv? is no-op (no inversion)
2902 N==1 => inv? is NOT
2903 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2904
2905 31 30 28 23 21 20 15 9 4
2906 | | | | | | | | |
2907 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2908 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2909 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2910 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2911 With N=1, the names are: BIC ORN EON BICS
2912 */
2913 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2914 UInt bX = INSN(31,31);
2915 UInt sh = INSN(23,22);
2916 UInt bN = INSN(21,21);
2917 UInt rM = INSN(20,16);
2918 UInt imm6 = INSN(15,10);
2919 UInt rN = INSN(9,5);
2920 UInt rD = INSN(4,0);
2921 Bool is64 = bX == 1;
2922 IRType ty = is64 ? Ity_I64 : Ity_I32;
2923 if (!is64 && imm6 > 31) {
2924 /* invalid; fall though */
2925 } else {
2926 IRTemp argL = newTemp(ty);
2927 assign(argL, getIRegOrZR(is64, rN));
2928 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2929 IROp op = Iop_INVALID;
2930 switch (INSN(30,29)) {
2931 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2932 case BITS2(0,1): op = mkOR(ty); break;
2933 case BITS2(1,0): op = mkXOR(ty); break;
2934 default: vassert(0);
2935 }
2936 IRTemp res = newTemp(ty);
2937 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2938 if (INSN(30,29) == BITS2(1,1)) {
2939 setFlags_LOGIC(is64, res);
2940 }
2941 putIRegOrZR(is64, rD, mkexpr(res));
2942
2943 static const HChar* names_op[8]
2944 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2945 vassert(((bN << 2) | INSN(30,29)) < 8);
2946 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2947 /* Special-case the printing of "MOV" */
2948 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2949 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2950 nameIRegOrZR(is64, rM));
2951 } else {
2952 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2953 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2954 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2955 }
2956 return True;
2957 }
2958 }
2959
2960 /* -------------------- {U,S}MULH -------------------- */
2961 /* 31 23 22 20 15 9 4
2962 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2963 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2964 */
2965 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
sewardj7fce7cc2014-05-07 09:41:40 +00002966 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
sewardjbbcf1882014-01-12 12:49:10 +00002967 Bool isU = INSN(23,23) == 1;
2968 UInt mm = INSN(20,16);
2969 UInt nn = INSN(9,5);
2970 UInt dd = INSN(4,0);
2971 putIReg64orZR(dd, unop(Iop_128HIto64,
2972 binop(isU ? Iop_MullU64 : Iop_MullS64,
2973 getIReg64orZR(nn), getIReg64orZR(mm))));
2974 DIP("%cmulh %s, %s, %s\n",
2975 isU ? 'u' : 's',
2976 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2977 return True;
2978 }
2979
2980 /* -------------------- M{ADD,SUB} -------------------- */
2981 /* 31 30 20 15 14 9 4
2982 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
2983 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
2984 */
2985 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2986 Bool is64 = INSN(31,31) == 1;
2987 UInt mm = INSN(20,16);
2988 Bool isAdd = INSN(15,15) == 0;
2989 UInt aa = INSN(14,10);
2990 UInt nn = INSN(9,5);
2991 UInt dd = INSN(4,0);
2992 if (is64) {
2993 putIReg64orZR(
2994 dd,
2995 binop(isAdd ? Iop_Add64 : Iop_Sub64,
2996 getIReg64orZR(aa),
2997 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
2998 } else {
2999 putIReg32orZR(
3000 dd,
3001 binop(isAdd ? Iop_Add32 : Iop_Sub32,
3002 getIReg32orZR(aa),
3003 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
3004 }
3005 DIP("%s %s, %s, %s, %s\n",
3006 isAdd ? "madd" : "msub",
3007 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3008 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
3009 return True;
3010 }
3011
3012 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3013 /* 31 30 28 20 15 11 9 4
3014 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
3015 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
3016 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
3017 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
3018 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3019 */
3020 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3021 Bool is64 = INSN(31,31) == 1;
3022 UInt b30 = INSN(30,30);
3023 UInt mm = INSN(20,16);
3024 UInt cond = INSN(15,12);
3025 UInt b10 = INSN(10,10);
3026 UInt nn = INSN(9,5);
3027 UInt dd = INSN(4,0);
3028 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
3029 IRType ty = is64 ? Ity_I64 : Ity_I32;
3030 IRExpr* argL = getIRegOrZR(is64, nn);
3031 IRExpr* argR = getIRegOrZR(is64, mm);
3032 switch (op) {
3033 case BITS2(0,0):
3034 break;
3035 case BITS2(0,1):
3036 argR = binop(mkADD(ty), argR, mkU(ty,1));
3037 break;
3038 case BITS2(1,0):
3039 argR = unop(mkNOT(ty), argR);
3040 break;
3041 case BITS2(1,1):
3042 argR = binop(mkSUB(ty), mkU(ty,0), argR);
3043 break;
3044 default:
3045 vassert(0);
3046 }
3047 putIRegOrZR(
3048 is64, dd,
3049 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
3050 argL, argR)
3051 );
3052 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
3053 DIP("%s %s, %s, %s, %s\n", op_nm[op],
3054 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3055 nameIRegOrZR(is64, mm), nameCC(cond));
3056 return True;
3057 }
3058
3059 /* -------------- ADD/SUB(extended reg) -------------- */
3060 /* 28 20 15 12 9 4
3061 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
3062 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
3063
3064 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
3065 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
3066
3067 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
3068 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
3069
3070 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
3071 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
3072
3073 The 'm' operand is extended per opt, thusly:
3074
3075 000 Xm & 0xFF UXTB
3076 001 Xm & 0xFFFF UXTH
3077 010 Xm & (2^32)-1 UXTW
3078 011 Xm UXTX
3079
3080 100 Xm sx from bit 7 SXTB
3081 101 Xm sx from bit 15 SXTH
3082 110 Xm sx from bit 31 SXTW
3083 111 Xm SXTX
3084
3085 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3086 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3087 are the identity operation on Wm.
3088
3089 After extension, the value is shifted left by imm3 bits, which
3090 may only be in the range 0 .. 4 inclusive.
3091 */
3092 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3093 Bool is64 = INSN(31,31) == 1;
3094 Bool isSub = INSN(30,30) == 1;
3095 Bool setCC = INSN(29,29) == 1;
3096 UInt mm = INSN(20,16);
3097 UInt opt = INSN(15,13);
3098 UInt imm3 = INSN(12,10);
3099 UInt nn = INSN(9,5);
3100 UInt dd = INSN(4,0);
3101 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3102 "sxtb", "sxth", "sxtw", "sxtx" };
3103 /* Do almost the same thing in the 32- and 64-bit cases. */
3104 IRTemp xN = newTemp(Ity_I64);
3105 IRTemp xM = newTemp(Ity_I64);
3106 assign(xN, getIReg64orSP(nn));
3107 assign(xM, getIReg64orZR(mm));
3108 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
3109 Int shSX = 0;
3110 /* widen Xm .. */
3111 switch (opt) {
3112 case BITS3(0,0,0): // UXTB
3113 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
3114 case BITS3(0,0,1): // UXTH
3115 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
3116 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3117 if (is64) {
3118 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
3119 }
3120 break;
3121 case BITS3(0,1,1): // UXTX -- always a noop
3122 break;
3123 case BITS3(1,0,0): // SXTB
3124 shSX = 56; goto sxTo64;
3125 case BITS3(1,0,1): // SXTH
3126 shSX = 48; goto sxTo64;
3127 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3128 if (is64) {
3129 shSX = 32; goto sxTo64;
3130 }
3131 break;
3132 case BITS3(1,1,1): // SXTX -- always a noop
3133 break;
3134 sxTo64:
3135 vassert(shSX >= 32);
3136 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
3137 mkU8(shSX));
3138 break;
3139 default:
3140 vassert(0);
3141 }
3142 /* and now shift */
3143 IRTemp argL = xN;
3144 IRTemp argR = newTemp(Ity_I64);
3145 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
3146 IRTemp res = newTemp(Ity_I64);
3147 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
3148 mkexpr(argL), mkexpr(argR)));
3149 if (is64) {
3150 if (setCC) {
3151 putIReg64orZR(dd, mkexpr(res));
3152 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
3153 } else {
3154 putIReg64orSP(dd, mkexpr(res));
3155 }
3156 } else {
3157 if (setCC) {
3158 IRTemp argL32 = newTemp(Ity_I32);
3159 IRTemp argR32 = newTemp(Ity_I32);
3160 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
3161 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
3162 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
3163 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
3164 } else {
3165 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
3166 }
3167 }
3168 DIP("%s%s %s, %s, %s %s lsl %u\n",
3169 isSub ? "sub" : "add", setCC ? "s" : "",
3170 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
3171 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
3172 nameExt[opt], imm3);
3173 return True;
3174 }
3175
3176 /* ---------------- CCMP/CCMN(imm) ---------------- */
3177 /* Bizarrely, these appear in the "data processing register"
3178 category, even though they are operations against an
3179 immediate. */
3180 /* 31 29 20 15 11 9 3
3181 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
3182 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
3183
3184 Operation is:
3185 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3186 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3187 */
3188 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3189 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3190 Bool is64 = INSN(31,31) == 1;
3191 Bool isSUB = INSN(30,30) == 1;
3192 UInt imm5 = INSN(20,16);
3193 UInt cond = INSN(15,12);
3194 UInt nn = INSN(9,5);
3195 UInt nzcv = INSN(3,0);
3196
3197 IRTemp condT = newTemp(Ity_I1);
3198 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3199
3200 IRType ty = is64 ? Ity_I64 : Ity_I32;
3201 IRTemp argL = newTemp(ty);
3202 IRTemp argR = newTemp(ty);
3203
3204 if (is64) {
3205 assign(argL, getIReg64orZR(nn));
3206 assign(argR, mkU64(imm5));
3207 } else {
3208 assign(argL, getIReg32orZR(nn));
3209 assign(argR, mkU32(imm5));
3210 }
3211 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3212
3213 DIP("ccm%c %s, #%u, #%u, %s\n",
3214 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3215 imm5, nzcv, nameCC(cond));
3216 return True;
3217 }
3218
3219 /* ---------------- CCMP/CCMN(reg) ---------------- */
3220 /* 31 29 20 15 11 9 3
3221 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
3222 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
3223 Operation is:
3224 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3225 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3226 */
3227 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3228 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3229 Bool is64 = INSN(31,31) == 1;
3230 Bool isSUB = INSN(30,30) == 1;
3231 UInt mm = INSN(20,16);
3232 UInt cond = INSN(15,12);
3233 UInt nn = INSN(9,5);
3234 UInt nzcv = INSN(3,0);
3235
3236 IRTemp condT = newTemp(Ity_I1);
3237 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3238
3239 IRType ty = is64 ? Ity_I64 : Ity_I32;
3240 IRTemp argL = newTemp(ty);
3241 IRTemp argR = newTemp(ty);
3242
3243 if (is64) {
3244 assign(argL, getIReg64orZR(nn));
3245 assign(argR, getIReg64orZR(mm));
3246 } else {
3247 assign(argL, getIReg32orZR(nn));
3248 assign(argR, getIReg32orZR(mm));
3249 }
3250 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3251
3252 DIP("ccm%c %s, %s, #%u, %s\n",
3253 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3254 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
3255 return True;
3256 }
3257
3258
3259 /* -------------- REV/REV16/REV32/RBIT -------------- */
3260 /* 31 30 28 20 15 11 9 4
3261
sewardj32d86752014-03-02 12:47:18 +00003262 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
3263 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003264
sewardj32d86752014-03-02 12:47:18 +00003265 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
3266 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003267
sewardjdc9259c2014-02-27 11:10:19 +00003268 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
3269 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003270
sewardjdc9259c2014-02-27 11:10:19 +00003271 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
sewardjbbcf1882014-01-12 12:49:10 +00003272 */
sewardjbbcf1882014-01-12 12:49:10 +00003273 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
sewardjdc9259c2014-02-27 11:10:19 +00003274 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3275 UInt b31 = INSN(31,31);
3276 UInt opc = INSN(11,10);
3277
3278 UInt ix = 0;
3279 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
3280 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
3281 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
3282 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
3283 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
3284 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
3285 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
sewardj32d86752014-03-02 12:47:18 +00003286 if (ix >= 1 && ix <= 7) {
3287 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
sewardjdc9259c2014-02-27 11:10:19 +00003288 UInt nn = INSN(9,5);
3289 UInt dd = INSN(4,0);
3290 IRTemp src = newTemp(Ity_I64);
3291 IRTemp dst = IRTemp_INVALID;
sewardj32d86752014-03-02 12:47:18 +00003292 IRTemp (*math)(IRTemp) = NULL;
3293 switch (ix) {
3294 case 1: case 2: math = math_BYTESWAP64; break;
3295 case 3: case 4: math = math_BITSWAP64; break;
3296 case 5: case 6: math = math_USHORTSWAP64; break;
3297 case 7: math = math_UINTSWAP64; break;
3298 default: vassert(0);
3299 }
3300 const HChar* names[7]
3301 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3302 const HChar* nm = names[ix-1];
3303 vassert(math);
3304 if (ix == 6) {
3305 /* This has to be special cased, since the logic below doesn't
3306 handle it correctly. */
sewardjdc9259c2014-02-27 11:10:19 +00003307 assign(src, getIReg64orZR(nn));
sewardj32d86752014-03-02 12:47:18 +00003308 dst = math(src);
3309 putIReg64orZR(dd,
3310 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
3311 } else if (is64) {
3312 assign(src, getIReg64orZR(nn));
3313 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00003314 putIReg64orZR(dd, mkexpr(dst));
3315 } else {
3316 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
sewardj32d86752014-03-02 12:47:18 +00003317 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00003318 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3319 }
sewardj32d86752014-03-02 12:47:18 +00003320 DIP("%s %s, %s\n", nm,
sewardjdc9259c2014-02-27 11:10:19 +00003321 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
3322 return True;
sewardjbbcf1882014-01-12 12:49:10 +00003323 }
sewardjdc9259c2014-02-27 11:10:19 +00003324 /* else fall through */
sewardjbbcf1882014-01-12 12:49:10 +00003325 }
3326
3327 /* -------------------- CLZ/CLS -------------------- */
3328 /* 30 28 24 20 15 9 4
3329 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
3330 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
3331 */
3332 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3333 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3334 Bool is64 = INSN(31,31) == 1;
3335 Bool isCLS = INSN(10,10) == 1;
3336 UInt nn = INSN(9,5);
3337 UInt dd = INSN(4,0);
3338 IRTemp src = newTemp(Ity_I64);
sewardj928540c2014-11-25 15:51:07 +00003339 IRTemp srcZ = newTemp(Ity_I64);
sewardjbbcf1882014-01-12 12:49:10 +00003340 IRTemp dst = newTemp(Ity_I64);
sewardj928540c2014-11-25 15:51:07 +00003341 /* Get the argument, widened out to 64 bit */
3342 if (is64) {
3343 assign(src, getIReg64orZR(nn));
3344 } else {
3345 assign(src, binop(Iop_Shl64,
3346 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
sewardjbbcf1882014-01-12 12:49:10 +00003347 }
sewardj928540c2014-11-25 15:51:07 +00003348 /* If this is CLS, mash the arg around accordingly */
3349 if (isCLS) {
3350 IRExpr* one = mkU8(1);
3351 assign(srcZ,
3352 binop(Iop_Xor64,
3353 binop(Iop_Shl64, mkexpr(src), one),
3354 binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one)));
3355 } else {
3356 assign(srcZ, mkexpr(src));
3357 }
3358 /* And compute CLZ. */
3359 if (is64) {
3360 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3361 mkU64(isCLS ? 63 : 64),
3362 unop(Iop_Clz64, mkexpr(srcZ))));
3363 putIReg64orZR(dd, mkexpr(dst));
3364 } else {
3365 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3366 mkU64(isCLS ? 31 : 32),
3367 unop(Iop_Clz64, mkexpr(srcZ))));
3368 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3369 }
3370 DIP("cl%c %s, %s\n", isCLS ? 's' : 'z',
3371 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
3372 return True;
sewardjbbcf1882014-01-12 12:49:10 +00003373 }
3374
sewardjca95f2d2014-11-25 17:27:32 +00003375 /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
sewardjbbcf1882014-01-12 12:49:10 +00003376 /* 30 28 20 15 11 9 4
3377 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
3378 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
3379 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
sewardjca95f2d2014-11-25 17:27:32 +00003380 sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm
sewardjbbcf1882014-01-12 12:49:10 +00003381 */
3382 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
sewardjca95f2d2014-11-25 17:27:32 +00003383 && INSN(15,12) == BITS4(0,0,1,0)) {
sewardjbbcf1882014-01-12 12:49:10 +00003384 Bool is64 = INSN(31,31) == 1;
3385 UInt mm = INSN(20,16);
3386 UInt op = INSN(11,10);
3387 UInt nn = INSN(9,5);
3388 UInt dd = INSN(4,0);
3389 IRType ty = is64 ? Ity_I64 : Ity_I32;
3390 IRTemp srcL = newTemp(ty);
sewardjca95f2d2014-11-25 17:27:32 +00003391 IRTemp srcR = newTemp(Ity_I64);
sewardjbbcf1882014-01-12 12:49:10 +00003392 IRTemp res = newTemp(ty);
3393 IROp iop = Iop_INVALID;
3394 assign(srcL, getIRegOrZR(is64, nn));
sewardjca95f2d2014-11-25 17:27:32 +00003395 assign(srcR, binop(Iop_And64, getIReg64orZR(mm),
3396 mkU64(is64 ? 63 : 31)));
3397 if (op < 3) {
3398 // LSLV, LSRV, ASRV
3399 switch (op) {
3400 case BITS2(0,0): iop = mkSHL(ty); break;
3401 case BITS2(0,1): iop = mkSHR(ty); break;
3402 case BITS2(1,0): iop = mkSAR(ty); break;
3403 default: vassert(0);
3404 }
3405 assign(res, binop(iop, mkexpr(srcL),
3406 unop(Iop_64to8, mkexpr(srcR))));
3407 } else {
3408 // RORV
3409 IROp opSHL = mkSHL(ty);
3410 IROp opSHR = mkSHR(ty);
3411 IROp opOR = mkOR(ty);
3412 IRExpr* width = mkU64(is64 ? 64: 32);
3413 assign(
3414 res,
3415 IRExpr_ITE(
3416 binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)),
3417 mkexpr(srcL),
3418 binop(opOR,
3419 binop(opSHL,
3420 mkexpr(srcL),
3421 unop(Iop_64to8, binop(Iop_Sub64, width,
3422 mkexpr(srcR)))),
3423 binop(opSHR,
3424 mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR))))
3425 ));
sewardjbbcf1882014-01-12 12:49:10 +00003426 }
sewardjbbcf1882014-01-12 12:49:10 +00003427 putIRegOrZR(is64, dd, mkexpr(res));
sewardjca95f2d2014-11-25 17:27:32 +00003428 vassert(op < 4);
3429 const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" };
sewardjbbcf1882014-01-12 12:49:10 +00003430 DIP("%s %s, %s, %s\n",
3431 names[op], nameIRegOrZR(is64,dd),
3432 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
3433 return True;
3434 }
3435
3436 /* -------------------- SDIV/UDIV -------------------- */
3437 /* 30 28 20 15 10 9 4
3438 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
3439 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
3440 */
3441 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3442 && INSN(15,11) == BITS5(0,0,0,0,1)) {
3443 Bool is64 = INSN(31,31) == 1;
3444 UInt mm = INSN(20,16);
3445 Bool isS = INSN(10,10) == 1;
3446 UInt nn = INSN(9,5);
3447 UInt dd = INSN(4,0);
3448 if (isS) {
3449 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
3450 getIRegOrZR(is64, nn),
3451 getIRegOrZR(is64, mm)));
3452 } else {
3453 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
3454 getIRegOrZR(is64, nn),
3455 getIRegOrZR(is64, mm)));
3456 }
3457 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
3458 nameIRegOrZR(is64, dd),
3459 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
3460 return True;
3461 }
3462
3463 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3464 /* 31 23 20 15 14 9 4
3465 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
3466 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
3467 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
3468 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
3469 with operation
3470 Xd = Xa +/- (Wn *u/s Wm)
3471 */
3472 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3473 Bool isU = INSN(23,23) == 1;
3474 UInt mm = INSN(20,16);
3475 Bool isAdd = INSN(15,15) == 0;
3476 UInt aa = INSN(14,10);
3477 UInt nn = INSN(9,5);
3478 UInt dd = INSN(4,0);
3479 IRTemp wN = newTemp(Ity_I32);
3480 IRTemp wM = newTemp(Ity_I32);
3481 IRTemp xA = newTemp(Ity_I64);
3482 IRTemp muld = newTemp(Ity_I64);
3483 IRTemp res = newTemp(Ity_I64);
3484 assign(wN, getIReg32orZR(nn));
3485 assign(wM, getIReg32orZR(mm));
3486 assign(xA, getIReg64orZR(aa));
3487 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
3488 mkexpr(wN), mkexpr(wM)));
3489 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
3490 mkexpr(xA), mkexpr(muld)));
3491 putIReg64orZR(dd, mkexpr(res));
3492 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
3493 nameIReg64orZR(dd), nameIReg32orZR(nn),
3494 nameIReg32orZR(mm), nameIReg64orZR(aa));
3495 return True;
3496 }
3497 vex_printf("ARM64 front end: data_processing_register\n");
3498 return False;
3499# undef INSN
3500}
3501
3502
3503/*------------------------------------------------------------*/
sewardj208a7762014-10-22 13:52:51 +00003504/*--- Math helpers for vector interleave/deinterleave ---*/
3505/*------------------------------------------------------------*/
3506
3507#define EX(_tmp) \
3508 mkexpr(_tmp)
3509#define SL(_hi128,_lo128,_nbytes) \
3510 ( (_nbytes) == 0 \
3511 ? (_lo128) \
3512 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
3513#define ROR(_v128,_nbytes) \
3514 SL((_v128),(_v128),(_nbytes))
3515#define ROL(_v128,_nbytes) \
3516 SL((_v128),(_v128),16-(_nbytes))
3517#define SHR(_v128,_nbytes) \
3518 binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
3519#define SHL(_v128,_nbytes) \
3520 binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
3521#define ILO64x2(_argL,_argR) \
3522 binop(Iop_InterleaveLO64x2,(_argL),(_argR))
3523#define IHI64x2(_argL,_argR) \
3524 binop(Iop_InterleaveHI64x2,(_argL),(_argR))
3525#define ILO32x4(_argL,_argR) \
3526 binop(Iop_InterleaveLO32x4,(_argL),(_argR))
3527#define IHI32x4(_argL,_argR) \
3528 binop(Iop_InterleaveHI32x4,(_argL),(_argR))
3529#define ILO16x8(_argL,_argR) \
3530 binop(Iop_InterleaveLO16x8,(_argL),(_argR))
3531#define IHI16x8(_argL,_argR) \
3532 binop(Iop_InterleaveHI16x8,(_argL),(_argR))
3533#define ILO8x16(_argL,_argR) \
3534 binop(Iop_InterleaveLO8x16,(_argL),(_argR))
3535#define IHI8x16(_argL,_argR) \
3536 binop(Iop_InterleaveHI8x16,(_argL),(_argR))
3537#define CEV32x4(_argL,_argR) \
3538 binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
3539#define COD32x4(_argL,_argR) \
3540 binop(Iop_CatOddLanes32x4,(_argL),(_argR))
3541#define COD16x8(_argL,_argR) \
3542 binop(Iop_CatOddLanes16x8,(_argL),(_argR))
3543#define COD8x16(_argL,_argR) \
3544 binop(Iop_CatOddLanes8x16,(_argL),(_argR))
3545#define CEV8x16(_argL,_argR) \
3546 binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
3547#define AND(_arg1,_arg2) \
3548 binop(Iop_AndV128,(_arg1),(_arg2))
3549#define OR2(_arg1,_arg2) \
3550 binop(Iop_OrV128,(_arg1),(_arg2))
3551#define OR3(_arg1,_arg2,_arg3) \
3552 binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
3553#define OR4(_arg1,_arg2,_arg3,_arg4) \
3554 binop(Iop_OrV128, \
3555 binop(Iop_OrV128,(_arg1),(_arg2)), \
3556 binop(Iop_OrV128,(_arg3),(_arg4)))
3557
3558
3559/* Do interleaving for 1 128 bit vector, for ST1 insns. */
3560static
3561void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0,
3562 UInt laneSzBlg2, IRTemp u0 )
3563{
3564 assign(*i0, mkexpr(u0));
3565}
3566
3567
3568/* Do interleaving for 2 128 bit vectors, for ST2 insns. */
3569static
3570void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
3571 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
3572{
3573 /* This is pretty easy, since we have primitives directly to
3574 hand. */
3575 if (laneSzBlg2 == 3) {
3576 // 64x2
3577 // u1 == B1 B0, u0 == A1 A0
3578 // i1 == B1 A1, i0 == B0 A0
3579 assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0)));
3580 assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0)));
3581 return;
3582 }
3583 if (laneSzBlg2 == 2) {
3584 // 32x4
3585 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3586 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3587 assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0)));
3588 assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0)));
3589 return;
3590 }
3591 if (laneSzBlg2 == 1) {
3592 // 16x8
3593 // u1 == B{7..0}, u0 == A{7..0}
3594 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3595 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3596 assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0)));
3597 assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0)));
3598 return;
3599 }
3600 if (laneSzBlg2 == 0) {
3601 // 8x16
3602 // u1 == B{f..0}, u0 == A{f..0}
3603 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3604 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3605 assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0)));
3606 assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0)));
3607 return;
3608 }
3609 /*NOTREACHED*/
3610 vassert(0);
3611}
3612
3613
3614/* Do interleaving for 3 128 bit vectors, for ST3 insns. */
3615static
3616void math_INTERLEAVE3_128(
3617 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
3618 UInt laneSzBlg2,
3619 IRTemp u0, IRTemp u1, IRTemp u2 )
3620{
3621 if (laneSzBlg2 == 3) {
3622 // 64x2
3623 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3624 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3625 assign(*i2, IHI64x2( EX(u2), EX(u1) ));
3626 assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) ));
3627 assign(*i0, ILO64x2( EX(u1), EX(u0) ));
3628 return;
3629 }
3630
3631 if (laneSzBlg2 == 2) {
3632 // 32x4
3633 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3634 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3635 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3636 IRTemp p0 = newTempV128();
3637 IRTemp p1 = newTempV128();
3638 IRTemp p2 = newTempV128();
3639 IRTemp c1100 = newTempV128();
3640 IRTemp c0011 = newTempV128();
3641 IRTemp c0110 = newTempV128();
3642 assign(c1100, mkV128(0xFF00));
3643 assign(c0011, mkV128(0x00FF));
3644 assign(c0110, mkV128(0x0FF0));
3645 // First interleave them at 64x2 granularity,
3646 // generating partial ("p") values.
3647 math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2);
3648 // And more shuffling around for the final answer
3649 assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ),
3650 AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) ));
3651 assign(*i1, OR3( SHL(EX(p2),12),
3652 AND(EX(p1),EX(c0110)),
3653 SHR(EX(p0),12) ));
3654 assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ),
3655 AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) ));
3656 return;
3657 }
3658
3659 if (laneSzBlg2 == 1) {
3660 // 16x8
3661 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3662 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3663 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3664 //
3665 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3666 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3667 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3668 //
3669 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3670 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3671 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3672 IRTemp p0 = newTempV128();
3673 IRTemp p1 = newTempV128();
3674 IRTemp p2 = newTempV128();
3675 IRTemp c1000 = newTempV128();
3676 IRTemp c0100 = newTempV128();
3677 IRTemp c0010 = newTempV128();
3678 IRTemp c0001 = newTempV128();
3679 assign(c1000, mkV128(0xF000));
3680 assign(c0100, mkV128(0x0F00));
3681 assign(c0010, mkV128(0x00F0));
3682 assign(c0001, mkV128(0x000F));
3683 // First interleave them at 32x4 granularity,
3684 // generating partial ("p") values.
3685 math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2);
3686 // And more shuffling around for the final answer
3687 assign(*i2,
3688 OR4( AND( IHI16x8( EX(p2), ROL(EX(p2),4) ), EX(c1000) ),
3689 AND( IHI16x8( ROL(EX(p2),6), EX(p2) ), EX(c0100) ),
3690 AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ),
3691 AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) )
3692 ));
3693 assign(*i1,
3694 OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ),
3695 AND( IHI16x8( EX(p1), ROL(EX(p1),4) ), EX(c0100) ),
3696 AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ),
3697 AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) )
3698 ));
3699 assign(*i0,
3700 OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ),
3701 AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ),
3702 AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ),
3703 AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) )
3704 ));
3705 return;
3706 }
3707
3708 if (laneSzBlg2 == 0) {
3709 // 8x16. It doesn't seem worth the hassle of first doing a
3710 // 16x8 interleave, so just generate all 24 partial results
3711 // directly :-(
3712 // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
3713 // i2 == Cf Bf Af Ce .. Bb Ab Ca
3714 // i1 == Ba Aa C9 B9 .. A6 C5 B5
3715 // i0 == A5 C4 B4 A4 .. C0 B0 A0
3716
3717 IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128();
3718 IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128();
3719 IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128();
3720 IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128();
3721 IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128();
3722 IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128();
3723 IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128();
3724 IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128();
3725 IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128();
3726
3727 // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
3728 // of the form 14 bytes junk : CC[0xF] : BB[0xA]
3729 //
3730# define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
3731 IRTemp t_##_tempName = newTempV128(); \
3732 assign(t_##_tempName, \
3733 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
3734 ROR(EX(_srcVec2),(_srcShift2)) ) )
3735
3736 // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
3737 IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0;
3738
3739 // The slicing and reassembly are done as interleavedly as possible,
3740 // so as to minimise the demand for registers in the back end, which
3741 // was observed to be a problem in testing.
3742
3743 XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14]
3744 XXXX(AfCe, AA, 0xf, CC, 0xe);
3745 assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe)));
3746
3747 XXXX(BeAe, BB, 0xe, AA, 0xe);
3748 XXXX(CdBd, CC, 0xd, BB, 0xd);
3749 assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd)));
3750 assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98)));
3751
3752 XXXX(AdCc, AA, 0xd, CC, 0xc);
3753 XXXX(BcAc, BB, 0xc, AA, 0xc);
3754 assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc)));
3755
3756 XXXX(CbBb, CC, 0xb, BB, 0xb);
3757 XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0]
3758 assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa)));
3759 assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210)));
3760 assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64)));
3761
3762 XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14]
3763 XXXX(C9B9, CC, 0x9, BB, 0x9);
3764 assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9)));
3765
3766 XXXX(A9C8, AA, 0x9, CC, 0x8);
3767 XXXX(B8A8, BB, 0x8, AA, 0x8);
3768 assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8)));
3769 assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98)));
3770
3771 XXXX(C7B7, CC, 0x7, BB, 0x7);
3772 XXXX(A7C6, AA, 0x7, CC, 0x6);
3773 assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6)));
3774
3775 XXXX(B6A6, BB, 0x6, AA, 0x6);
3776 XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0]
3777 assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5)));
3778 assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210)));
3779 assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64)));
3780
3781 XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14]
3782 XXXX(B4A4, BB, 0x4, AA, 0x4);
3783 assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4)));
3784
3785 XXXX(C3B3, CC, 0x3, BB, 0x3);
3786 XXXX(A3C2, AA, 0x3, CC, 0x2);
3787 assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2)));
3788 assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98)));
3789
3790 XXXX(B2A2, BB, 0x2, AA, 0x2);
3791 XXXX(C1B1, CC, 0x1, BB, 0x1);
3792 assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1)));
3793
3794 XXXX(A1C0, AA, 0x1, CC, 0x0);
3795 XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0]
3796 assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0)));
3797 assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210)));
3798 assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64)));
3799
3800# undef XXXX
3801 return;
3802 }
3803
3804 /*NOTREACHED*/
3805 vassert(0);
3806}
3807
3808
3809/* Do interleaving for 4 128 bit vectors, for ST4 insns. */
3810static
3811void math_INTERLEAVE4_128(
3812 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
3813 UInt laneSzBlg2,
3814 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
3815{
3816 if (laneSzBlg2 == 3) {
3817 // 64x2
3818 assign(*i0, ILO64x2(EX(u1), EX(u0)));
3819 assign(*i1, ILO64x2(EX(u3), EX(u2)));
3820 assign(*i2, IHI64x2(EX(u1), EX(u0)));
3821 assign(*i3, IHI64x2(EX(u3), EX(u2)));
3822 return;
3823 }
3824 if (laneSzBlg2 == 2) {
3825 // 32x4
3826 // First, interleave at the 64-bit lane size.
3827 IRTemp p0 = newTempV128();
3828 IRTemp p1 = newTempV128();
3829 IRTemp p2 = newTempV128();
3830 IRTemp p3 = newTempV128();
3831 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3);
3832 // And interleave (cat) at the 32 bit size.
3833 assign(*i0, CEV32x4(EX(p1), EX(p0)));
3834 assign(*i1, COD32x4(EX(p1), EX(p0)));
3835 assign(*i2, CEV32x4(EX(p3), EX(p2)));
3836 assign(*i3, COD32x4(EX(p3), EX(p2)));
3837 return;
3838 }
3839 if (laneSzBlg2 == 1) {
3840 // 16x8
3841 // First, interleave at the 32-bit lane size.
3842 IRTemp p0 = newTempV128();
3843 IRTemp p1 = newTempV128();
3844 IRTemp p2 = newTempV128();
3845 IRTemp p3 = newTempV128();
3846 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3);
3847 // And rearrange within each vector, to get the right 16 bit lanes.
3848 assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2)));
3849 assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2)));
3850 assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2)));
3851 assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2)));
3852 return;
3853 }
3854 if (laneSzBlg2 == 0) {
3855 // 8x16
3856 // First, interleave at the 16-bit lane size.
3857 IRTemp p0 = newTempV128();
3858 IRTemp p1 = newTempV128();
3859 IRTemp p2 = newTempV128();
3860 IRTemp p3 = newTempV128();
3861 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3);
3862 // And rearrange within each vector, to get the right 8 bit lanes.
3863 assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0))));
3864 assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1))));
3865 assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2))));
3866 assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3))));
3867 return;
3868 }
3869 /*NOTREACHED*/
3870 vassert(0);
3871}
3872
3873
3874/* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
3875static
3876void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0,
3877 UInt laneSzBlg2, IRTemp i0 )
3878{
3879 assign(*u0, mkexpr(i0));
3880}
3881
3882
3883/* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
3884static
3885void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
3886 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
3887{
3888 /* This is pretty easy, since we have primitives directly to
3889 hand. */
3890 if (laneSzBlg2 == 3) {
3891 // 64x2
3892 // i1 == B1 A1, i0 == B0 A0
3893 // u1 == B1 B0, u0 == A1 A0
3894 assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0)));
3895 assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0)));
3896 return;
3897 }
3898 if (laneSzBlg2 == 2) {
3899 // 32x4
3900 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3901 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3902 assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0)));
3903 assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0)));
3904 return;
3905 }
3906 if (laneSzBlg2 == 1) {
3907 // 16x8
3908 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3909 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3910 // u1 == B{7..0}, u0 == A{7..0}
3911 assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0)));
3912 assign(*u1, binop(Iop_CatOddLanes16x8, mkexpr(i1), mkexpr(i0)));
3913 return;
3914 }
3915 if (laneSzBlg2 == 0) {
3916 // 8x16
3917 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3918 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3919 // u1 == B{f..0}, u0 == A{f..0}
3920 assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0)));
3921 assign(*u1, binop(Iop_CatOddLanes8x16, mkexpr(i1), mkexpr(i0)));
3922 return;
3923 }
3924 /*NOTREACHED*/
3925 vassert(0);
3926}
3927
3928
3929/* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
3930static
3931void math_DEINTERLEAVE3_128(
3932 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
3933 UInt laneSzBlg2,
3934 IRTemp i0, IRTemp i1, IRTemp i2 )
3935{
3936 if (laneSzBlg2 == 3) {
3937 // 64x2
3938 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3939 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3940 assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1) ));
3941 assign(*u1, ILO64x2( EX(i2), ROL(EX(i0),8) ));
3942 assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0) ));
3943 return;
3944 }
3945
3946 if (laneSzBlg2 == 2) {
3947 // 32x4
3948 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3949 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3950 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3951 IRTemp t_a1c0b0a0 = newTempV128();
3952 IRTemp t_a2c1b1a1 = newTempV128();
3953 IRTemp t_a3c2b2a2 = newTempV128();
3954 IRTemp t_a0c3b3a3 = newTempV128();
3955 IRTemp p0 = newTempV128();
3956 IRTemp p1 = newTempV128();
3957 IRTemp p2 = newTempV128();
3958 // Compute some intermediate values.
3959 assign(t_a1c0b0a0, EX(i0));
3960 assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4));
3961 assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4));
3962 assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4));
3963 // First deinterleave into lane-pairs
3964 assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0)));
3965 assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)),
3966 IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0))));
3967 assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4)));
3968 // Then deinterleave at 64x2 granularity.
3969 math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2);
3970 return;
3971 }
3972
3973 if (laneSzBlg2 == 1) {
3974 // 16x8
3975 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3976 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3977 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3978 //
3979 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3980 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3981 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3982 //
3983 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3984 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3985 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3986
3987 IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111;
3988 s0 = s1 = s2 = s3
3989 = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID;
3990 newTempsV128_4(&s0, &s1, &s2, &s3);
3991 newTempsV128_4(&t0, &t1, &t2, &t3);
3992 newTempsV128_4(&p0, &p1, &p2, &c00111111);
3993
3994 // s0 == b2a2 c1b1a1 c0b0a0
3995 // s1 == b4a4 c3b3c3 c2b2a2
3996 // s2 == b6a6 c5b5a5 c4b4a4
3997 // s3 == b0a0 c7b7a7 c6b6a6
3998 assign(s0, EX(i0));
3999 assign(s1, SL(EX(i1),EX(i0),6*2));
4000 assign(s2, SL(EX(i2),EX(i1),4*2));
4001 assign(s3, SL(EX(i0),EX(i2),2*2));
4002
4003 // t0 == 0 0 c1c0 b1b0 a1a0
4004 // t1 == 0 0 c3c2 b3b2 a3a2
4005 // t2 == 0 0 c5c4 b5b4 a5a4
4006 // t3 == 0 0 c7c6 b7b6 a7a6
4007 assign(c00111111, mkV128(0x0FFF));
4008 assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111)));
4009 assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111)));
4010 assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111)));
4011 assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111)));
4012
4013 assign(p0, OR2(EX(t0), SHL(EX(t1),6*2)));
4014 assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2)));
4015 assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2)));
4016
4017 // Then deinterleave at 32x4 granularity.
4018 math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2);
4019 return;
4020 }
4021
4022 if (laneSzBlg2 == 0) {
4023 // 8x16. This is the same scheme as for 16x8, with twice the
4024 // number of intermediate values.
4025 //
4026 // u2 == C{f..0}
4027 // u1 == B{f..0}
4028 // u0 == A{f..0}
4029 //
4030 // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
4031 // i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
4032 // i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4033 //
4034 // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
4035 // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
4036 // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
4037 //
4038 IRTemp s0, s1, s2, s3, s4, s5, s6, s7,
4039 t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK;
4040 s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7
4041 = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK
4042 = IRTemp_INVALID;
4043 newTempsV128_4(&s0, &s1, &s2, &s3);
4044 newTempsV128_4(&s4, &s5, &s6, &s7);
4045 newTempsV128_4(&t0, &t1, &t2, &t3);
4046 newTempsV128_4(&t4, &t5, &t6, &t7);
4047 newTempsV128_4(&p0, &p1, &p2, &cMASK);
4048
4049 // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4050 // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
4051 // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
4052 // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
4053 // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
4054 // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
4055 // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
4056 // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
4057 assign(s0, SL(EX(i1),EX(i0), 0));
4058 assign(s1, SL(EX(i1),EX(i0), 6));
4059 assign(s2, SL(EX(i1),EX(i0),12));
4060 assign(s3, SL(EX(i2),EX(i1), 2));
4061 assign(s4, SL(EX(i2),EX(i1), 8));
4062 assign(s5, SL(EX(i2),EX(i1),14));
4063 assign(s6, SL(EX(i0),EX(i2), 4));
4064 assign(s7, SL(EX(i0),EX(i2),10));
4065
4066 // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
4067 // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
4068 // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
4069 // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
4070 // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
4071 // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
4072 // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
4073 // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
4074 assign(cMASK, mkV128(0x003F));
4075 assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK)));
4076 assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK)));
4077 assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK)));
4078 assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK)));
4079 assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK)));
4080 assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK)));
4081 assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK)));
4082 assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK)));
4083
4084 assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) ));
4085 assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8),
4086 SHL(EX(t3),2), SHR(EX(t2),4) ));
4087 assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) ));
4088
4089 // Then deinterleave at 16x8 granularity.
4090 math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2);
4091 return;
4092 }
4093
4094 /*NOTREACHED*/
4095 vassert(0);
4096}
4097
4098
4099/* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
4100static
4101void math_DEINTERLEAVE4_128(
4102 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4103 UInt laneSzBlg2,
4104 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4105{
4106 if (laneSzBlg2 == 3) {
4107 // 64x2
4108 assign(*u0, ILO64x2(EX(i2), EX(i0)));
4109 assign(*u1, IHI64x2(EX(i2), EX(i0)));
4110 assign(*u2, ILO64x2(EX(i3), EX(i1)));
4111 assign(*u3, IHI64x2(EX(i3), EX(i1)));
4112 return;
4113 }
4114 if (laneSzBlg2 == 2) {
4115 // 32x4
4116 IRTemp p0 = newTempV128();
4117 IRTemp p2 = newTempV128();
4118 IRTemp p1 = newTempV128();
4119 IRTemp p3 = newTempV128();
4120 assign(p0, ILO32x4(EX(i1), EX(i0)));
4121 assign(p1, IHI32x4(EX(i1), EX(i0)));
4122 assign(p2, ILO32x4(EX(i3), EX(i2)));
4123 assign(p3, IHI32x4(EX(i3), EX(i2)));
4124 // And now do what we did for the 64-bit case.
4125 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3);
4126 return;
4127 }
4128 if (laneSzBlg2 == 1) {
4129 // 16x8
4130 // Deinterleave into 32-bit chunks, then do as the 32-bit case.
4131 IRTemp p0 = newTempV128();
4132 IRTemp p1 = newTempV128();
4133 IRTemp p2 = newTempV128();
4134 IRTemp p3 = newTempV128();
4135 assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8)));
4136 assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8)));
4137 assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8)));
4138 assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8)));
4139 // From here on is like the 32 bit case.
4140 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3);
4141 return;
4142 }
4143 if (laneSzBlg2 == 0) {
4144 // 8x16
4145 // Deinterleave into 16-bit chunks, then do as the 16-bit case.
4146 IRTemp p0 = newTempV128();
4147 IRTemp p1 = newTempV128();
4148 IRTemp p2 = newTempV128();
4149 IRTemp p3 = newTempV128();
4150 assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)),
4151 ILO8x16(EX(i0),ROL(EX(i0),4)) ));
4152 assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)),
4153 ILO8x16(EX(i1),ROL(EX(i1),4)) ));
4154 assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)),
4155 ILO8x16(EX(i2),ROL(EX(i2),4)) ));
4156 assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)),
4157 ILO8x16(EX(i3),ROL(EX(i3),4)) ));
4158 // From here on is like the 16 bit case.
4159 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3);
4160 return;
4161 }
4162 /*NOTREACHED*/
4163 vassert(0);
4164}
4165
4166
4167/* Wrappers that use the full-width (de)interleavers to do half-width
4168 (de)interleaving. The scheme is to clone each input lane in the
4169 lower half of each incoming value, do a full width (de)interleave
4170 at the next lane size up, and remove every other lane of the the
4171 result. The returned values may have any old junk in the upper
4172 64 bits -- the caller must ignore that. */
4173
4174/* Helper function -- get doubling and narrowing operations. */
4175static
4176void math_get_doubler_and_halver ( /*OUT*/IROp* doubler,
4177 /*OUT*/IROp* halver,
4178 UInt laneSzBlg2 )
4179{
4180 switch (laneSzBlg2) {
4181 case 2:
4182 *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4;
4183 break;
4184 case 1:
4185 *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8;
4186 break;
4187 case 0:
4188 *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16;
4189 break;
4190 default:
4191 vassert(0);
4192 }
4193}
4194
4195/* Do interleaving for 1 64 bit vector, for ST1 insns. */
4196static
4197void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0,
4198 UInt laneSzBlg2, IRTemp u0 )
4199{
4200 assign(*i0, mkexpr(u0));
4201}
4202
4203
4204/* Do interleaving for 2 64 bit vectors, for ST2 insns. */
4205static
4206void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
4207 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
4208{
4209 if (laneSzBlg2 == 3) {
4210 // 1x64, degenerate case
4211 assign(*i0, EX(u0));
4212 assign(*i1, EX(u1));
4213 return;
4214 }
4215
4216 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4217 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4218 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4219
4220 IRTemp du0 = newTempV128();
4221 IRTemp du1 = newTempV128();
4222 assign(du0, binop(doubler, EX(u0), EX(u0)));
4223 assign(du1, binop(doubler, EX(u1), EX(u1)));
4224 IRTemp di0 = newTempV128();
4225 IRTemp di1 = newTempV128();
4226 math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1);
4227 assign(*i0, binop(halver, EX(di0), EX(di0)));
4228 assign(*i1, binop(halver, EX(di1), EX(di1)));
4229}
4230
4231
4232/* Do interleaving for 3 64 bit vectors, for ST3 insns. */
4233static
4234void math_INTERLEAVE3_64(
4235 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
4236 UInt laneSzBlg2,
4237 IRTemp u0, IRTemp u1, IRTemp u2 )
4238{
4239 if (laneSzBlg2 == 3) {
4240 // 1x64, degenerate case
4241 assign(*i0, EX(u0));
4242 assign(*i1, EX(u1));
4243 assign(*i2, EX(u2));
4244 return;
4245 }
4246
4247 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4248 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4249 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4250
4251 IRTemp du0 = newTempV128();
4252 IRTemp du1 = newTempV128();
4253 IRTemp du2 = newTempV128();
4254 assign(du0, binop(doubler, EX(u0), EX(u0)));
4255 assign(du1, binop(doubler, EX(u1), EX(u1)));
4256 assign(du2, binop(doubler, EX(u2), EX(u2)));
4257 IRTemp di0 = newTempV128();
4258 IRTemp di1 = newTempV128();
4259 IRTemp di2 = newTempV128();
4260 math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2);
4261 assign(*i0, binop(halver, EX(di0), EX(di0)));
4262 assign(*i1, binop(halver, EX(di1), EX(di1)));
4263 assign(*i2, binop(halver, EX(di2), EX(di2)));
4264}
4265
4266
4267/* Do interleaving for 4 64 bit vectors, for ST4 insns. */
4268static
4269void math_INTERLEAVE4_64(
4270 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
4271 UInt laneSzBlg2,
4272 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
4273{
4274 if (laneSzBlg2 == 3) {
4275 // 1x64, degenerate case
4276 assign(*i0, EX(u0));
4277 assign(*i1, EX(u1));
4278 assign(*i2, EX(u2));
4279 assign(*i3, EX(u3));
4280 return;
4281 }
4282
4283 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4284 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4285 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4286
4287 IRTemp du0 = newTempV128();
4288 IRTemp du1 = newTempV128();
4289 IRTemp du2 = newTempV128();
4290 IRTemp du3 = newTempV128();
4291 assign(du0, binop(doubler, EX(u0), EX(u0)));
4292 assign(du1, binop(doubler, EX(u1), EX(u1)));
4293 assign(du2, binop(doubler, EX(u2), EX(u2)));
4294 assign(du3, binop(doubler, EX(u3), EX(u3)));
4295 IRTemp di0 = newTempV128();
4296 IRTemp di1 = newTempV128();
4297 IRTemp di2 = newTempV128();
4298 IRTemp di3 = newTempV128();
4299 math_INTERLEAVE4_128(&di0, &di1, &di2, &di3,
4300 laneSzBlg2 + 1, du0, du1, du2, du3);
4301 assign(*i0, binop(halver, EX(di0), EX(di0)));
4302 assign(*i1, binop(halver, EX(di1), EX(di1)));
4303 assign(*i2, binop(halver, EX(di2), EX(di2)));
4304 assign(*i3, binop(halver, EX(di3), EX(di3)));
4305}
4306
4307
4308/* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
4309static
4310void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0,
4311 UInt laneSzBlg2, IRTemp i0 )
4312{
4313 assign(*u0, mkexpr(i0));
4314}
4315
4316
4317/* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
4318static
4319void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
4320 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
4321{
4322 if (laneSzBlg2 == 3) {
4323 // 1x64, degenerate case
4324 assign(*u0, EX(i0));
4325 assign(*u1, EX(i1));
4326 return;
4327 }
4328
4329 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4330 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4331 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4332
4333 IRTemp di0 = newTempV128();
4334 IRTemp di1 = newTempV128();
4335 assign(di0, binop(doubler, EX(i0), EX(i0)));
4336 assign(di1, binop(doubler, EX(i1), EX(i1)));
4337
4338 IRTemp du0 = newTempV128();
4339 IRTemp du1 = newTempV128();
4340 math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1);
4341 assign(*u0, binop(halver, EX(du0), EX(du0)));
4342 assign(*u1, binop(halver, EX(du1), EX(du1)));
4343}
4344
4345
4346/* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
4347static
4348void math_DEINTERLEAVE3_64(
4349 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4350 UInt laneSzBlg2,
4351 IRTemp i0, IRTemp i1, IRTemp i2 )
4352{
4353 if (laneSzBlg2 == 3) {
4354 // 1x64, degenerate case
4355 assign(*u0, EX(i0));
4356 assign(*u1, EX(i1));
4357 assign(*u2, EX(i2));
4358 return;
4359 }
4360
4361 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4362 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4363 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4364
4365 IRTemp di0 = newTempV128();
4366 IRTemp di1 = newTempV128();
4367 IRTemp di2 = newTempV128();
4368 assign(di0, binop(doubler, EX(i0), EX(i0)));
4369 assign(di1, binop(doubler, EX(i1), EX(i1)));
4370 assign(di2, binop(doubler, EX(i2), EX(i2)));
4371 IRTemp du0 = newTempV128();
4372 IRTemp du1 = newTempV128();
4373 IRTemp du2 = newTempV128();
4374 math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2);
4375 assign(*u0, binop(halver, EX(du0), EX(du0)));
4376 assign(*u1, binop(halver, EX(du1), EX(du1)));
4377 assign(*u2, binop(halver, EX(du2), EX(du2)));
4378}
4379
4380
4381/* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
4382static
4383void math_DEINTERLEAVE4_64(
4384 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4385 UInt laneSzBlg2,
4386 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4387{
4388 if (laneSzBlg2 == 3) {
4389 // 1x64, degenerate case
4390 assign(*u0, EX(i0));
4391 assign(*u1, EX(i1));
4392 assign(*u2, EX(i2));
4393 assign(*u3, EX(i3));
4394 return;
4395 }
4396
4397 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4398 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4399 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4400
4401 IRTemp di0 = newTempV128();
4402 IRTemp di1 = newTempV128();
4403 IRTemp di2 = newTempV128();
4404 IRTemp di3 = newTempV128();
4405 assign(di0, binop(doubler, EX(i0), EX(i0)));
4406 assign(di1, binop(doubler, EX(i1), EX(i1)));
4407 assign(di2, binop(doubler, EX(i2), EX(i2)));
4408 assign(di3, binop(doubler, EX(i3), EX(i3)));
4409 IRTemp du0 = newTempV128();
4410 IRTemp du1 = newTempV128();
4411 IRTemp du2 = newTempV128();
4412 IRTemp du3 = newTempV128();
4413 math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3,
4414 laneSzBlg2 + 1, di0, di1, di2, di3);
4415 assign(*u0, binop(halver, EX(du0), EX(du0)));
4416 assign(*u1, binop(halver, EX(du1), EX(du1)));
4417 assign(*u2, binop(halver, EX(du2), EX(du2)));
4418 assign(*u3, binop(halver, EX(du3), EX(du3)));
4419}
4420
4421
4422#undef EX
4423#undef SL
4424#undef ROR
4425#undef ROL
4426#undef SHR
4427#undef SHL
4428#undef ILO64x2
4429#undef IHI64x2
4430#undef ILO32x4
4431#undef IHI32x4
4432#undef ILO16x8
4433#undef IHI16x8
4434#undef ILO16x8
4435#undef IHI16x8
4436#undef CEV32x4
4437#undef COD32x4
4438#undef COD16x8
4439#undef COD8x16
4440#undef CEV8x16
4441#undef AND
4442#undef OR2
4443#undef OR3
4444#undef OR4
4445
4446
4447/*------------------------------------------------------------*/
sewardjbbcf1882014-01-12 12:49:10 +00004448/*--- Load and Store instructions ---*/
4449/*------------------------------------------------------------*/
4450
4451/* Generate the EA for a "reg + reg" style amode. This is done from
4452 parts of the insn, but for sanity checking sake it takes the whole
4453 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
4454 and S=insn[12]:
4455
4456 The possible forms, along with their opt:S values, are:
4457 011:0 Xn|SP + Xm
4458 111:0 Xn|SP + Xm
4459 011:1 Xn|SP + Xm * transfer_szB
4460 111:1 Xn|SP + Xm * transfer_szB
4461 010:0 Xn|SP + 32Uto64(Wm)
4462 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
4463 110:0 Xn|SP + 32Sto64(Wm)
4464 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
4465
4466 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
4467 the transfer size is insn[23,31,30]. For integer loads/stores,
4468 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
4469
4470 If the decoding fails, it returns IRTemp_INVALID.
4471
4472 isInt is True iff this is decoding is for transfers to/from integer
4473 registers. If False it is for transfers to/from vector registers.
4474*/
4475static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
4476{
4477 UInt optS = SLICE_UInt(insn, 15, 12);
4478 UInt mm = SLICE_UInt(insn, 20, 16);
4479 UInt nn = SLICE_UInt(insn, 9, 5);
4480 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
4481 | SLICE_UInt(insn, 31, 30); // Log2 of the size
4482
4483 buf[0] = 0;
4484
4485 /* Sanity checks, that this really is a load/store insn. */
4486 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
4487 goto fail;
4488
4489 if (isInt
4490 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
4491 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
4492 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
4493 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
4494 goto fail;
4495
4496 if (!isInt
4497 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
4498 goto fail;
4499
4500 /* Throw out non-verified but possibly valid cases. */
4501 switch (szLg2) {
4502 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
4503 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
4504 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
4505 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
4506 case BITS3(1,0,0): // can only ever be valid for the vector case
sewardj208a7762014-10-22 13:52:51 +00004507 if (isInt) goto fail; else break;
sewardjbbcf1882014-01-12 12:49:10 +00004508 case BITS3(1,0,1): // these sizes are never valid
4509 case BITS3(1,1,0):
4510 case BITS3(1,1,1): goto fail;
4511
4512 default: vassert(0);
4513 }
4514
4515 IRExpr* rhs = NULL;
4516 switch (optS) {
4517 case BITS4(1,1,1,0): goto fail; //ATC
4518 case BITS4(0,1,1,0):
4519 rhs = getIReg64orZR(mm);
4520 vex_sprintf(buf, "[%s, %s]",
4521 nameIReg64orZR(nn), nameIReg64orZR(mm));
4522 break;
4523 case BITS4(1,1,1,1): goto fail; //ATC
4524 case BITS4(0,1,1,1):
4525 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
4526 vex_sprintf(buf, "[%s, %s lsl %u]",
4527 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
4528 break;
4529 case BITS4(0,1,0,0):
4530 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
4531 vex_sprintf(buf, "[%s, %s uxtx]",
4532 nameIReg64orZR(nn), nameIReg32orZR(mm));
4533 break;
4534 case BITS4(0,1,0,1):
4535 rhs = binop(Iop_Shl64,
4536 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
4537 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
4538 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4539 break;
4540 case BITS4(1,1,0,0):
4541 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
4542 vex_sprintf(buf, "[%s, %s sxtx]",
4543 nameIReg64orZR(nn), nameIReg32orZR(mm));
4544 break;
4545 case BITS4(1,1,0,1):
4546 rhs = binop(Iop_Shl64,
4547 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
4548 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
4549 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4550 break;
4551 default:
4552 /* The rest appear to be genuinely invalid */
4553 goto fail;
4554 }
4555
4556 vassert(rhs);
4557 IRTemp res = newTemp(Ity_I64);
4558 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
4559 return res;
4560
4561 fail:
4562 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
4563 return IRTemp_INVALID;
4564}
4565
4566
4567/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
4568 bits of DATAE :: Ity_I64. */
4569static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
4570{
4571 IRExpr* addrE = mkexpr(addr);
4572 switch (szB) {
4573 case 8:
4574 storeLE(addrE, dataE);
4575 break;
4576 case 4:
4577 storeLE(addrE, unop(Iop_64to32, dataE));
4578 break;
4579 case 2:
4580 storeLE(addrE, unop(Iop_64to16, dataE));
4581 break;
4582 case 1:
4583 storeLE(addrE, unop(Iop_64to8, dataE));
4584 break;
4585 default:
4586 vassert(0);
4587 }
4588}
4589
4590
4591/* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
4592 placing the result in an Ity_I64 temporary. */
4593static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
4594{
4595 IRTemp res = newTemp(Ity_I64);
4596 IRExpr* addrE = mkexpr(addr);
4597 switch (szB) {
4598 case 8:
4599 assign(res, loadLE(Ity_I64,addrE));
4600 break;
4601 case 4:
4602 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
4603 break;
4604 case 2:
4605 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
4606 break;
4607 case 1:
4608 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
4609 break;
4610 default:
4611 vassert(0);
4612 }
4613 return res;
4614}
4615
4616
sewardj18bf5172014-06-14 18:05:30 +00004617/* Generate a "standard 7" name, from bitQ and size. But also
4618 allow ".1d" since that's occasionally useful. */
4619static
4620const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
4621{
4622 vassert(bitQ <= 1 && size <= 3);
4623 const HChar* nms[8]
sewardj25523c42014-06-15 19:36:29 +00004624 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
sewardj18bf5172014-06-14 18:05:30 +00004625 UInt ix = (bitQ << 2) | size;
4626 vassert(ix < 8);
4627 return nms[ix];
4628}
4629
4630
sewardjbbcf1882014-01-12 12:49:10 +00004631static
4632Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
4633{
4634# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4635
4636 /* ------------ LDR,STR (immediate, uimm12) ----------- */
4637 /* uimm12 is scaled by the transfer size
4638
4639 31 29 26 21 9 4
4640 | | | | | |
4641 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
4642 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
4643
4644 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
4645 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
4646
4647 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
4648 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
4649
4650 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
4651 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
4652 */
4653 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
4654 UInt szLg2 = INSN(31,30);
4655 UInt szB = 1 << szLg2;
4656 Bool isLD = INSN(22,22) == 1;
4657 UInt offs = INSN(21,10) * szB;
4658 UInt nn = INSN(9,5);
4659 UInt tt = INSN(4,0);
4660 IRTemp ta = newTemp(Ity_I64);
4661 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
4662 if (nn == 31) { /* FIXME generate stack alignment check */ }
4663 vassert(szLg2 < 4);
4664 if (isLD) {
4665 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
4666 } else {
4667 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
4668 }
4669 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
4670 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
4671 DIP("%s %s, [%s, #%u]\n",
4672 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
4673 nameIReg64orSP(nn), offs);
4674 return True;
4675 }
4676
4677 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
4678 /*
4679 31 29 26 20 11 9 4
4680 | | | | | | |
4681 (at-Rn-then-Rn=EA) | | |
4682 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
4683 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
4684
4685 (at-EA-then-Rn=EA)
4686 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
4687 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
4688
4689 (at-EA)
4690 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
4691 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
4692
4693 simm9 is unscaled.
4694
4695 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
4696 load case this is because would create two competing values for
4697 Rt. In the store case the reason is unclear, but the spec
4698 disallows it anyway.
4699
4700 Stores are narrowing, loads are unsigned widening. sz encodes
4701 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
4702 */
4703 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
4704 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
4705 UInt szLg2 = INSN(31,30);
4706 UInt szB = 1 << szLg2;
4707 Bool isLoad = INSN(22,22) == 1;
4708 UInt imm9 = INSN(20,12);
4709 UInt nn = INSN(9,5);
4710 UInt tt = INSN(4,0);
4711 Bool wBack = INSN(10,10) == 1;
4712 UInt how = INSN(11,10);
4713 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
4714 /* undecodable; fall through */
4715 } else {
4716 if (nn == 31) { /* FIXME generate stack alignment check */ }
4717
4718 // Compute the transfer address TA and the writeback address WA.
4719 IRTemp tRN = newTemp(Ity_I64);
4720 assign(tRN, getIReg64orSP(nn));
4721 IRTemp tEA = newTemp(Ity_I64);
4722 Long simm9 = (Long)sx_to_64(imm9, 9);
4723 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4724
4725 IRTemp tTA = newTemp(Ity_I64);
4726 IRTemp tWA = newTemp(Ity_I64);
4727 switch (how) {
4728 case BITS2(0,1):
4729 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4730 case BITS2(1,1):
4731 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4732 case BITS2(0,0):
4733 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4734 default:
4735 vassert(0); /* NOTREACHED */
4736 }
4737
sewardje0bff8b2014-03-09 09:40:23 +00004738 /* Normally rN would be updated after the transfer. However, in
4739 the special case typifed by
4740 str x30, [sp,#-16]!
4741 it is necessary to update SP before the transfer, (1)
4742 because Memcheck will otherwise complain about a write
4743 below the stack pointer, and (2) because the segfault
4744 stack extension mechanism will otherwise extend the stack
4745 only down to SP before the instruction, which might not be
4746 far enough, if the -16 bit takes the actual access
4747 address to the next page.
4748 */
4749 Bool earlyWBack
4750 = wBack && simm9 < 0 && szB == 8
4751 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
4752
4753 if (wBack && earlyWBack)
4754 putIReg64orSP(nn, mkexpr(tEA));
4755
sewardjbbcf1882014-01-12 12:49:10 +00004756 if (isLoad) {
4757 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
4758 } else {
4759 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
4760 }
4761
sewardje0bff8b2014-03-09 09:40:23 +00004762 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00004763 putIReg64orSP(nn, mkexpr(tEA));
4764
4765 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
4766 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
4767 const HChar* fmt_str = NULL;
4768 switch (how) {
4769 case BITS2(0,1):
4770 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4771 break;
4772 case BITS2(1,1):
4773 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4774 break;
4775 case BITS2(0,0):
4776 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
4777 break;
4778 default:
4779 vassert(0);
4780 }
4781 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
4782 nameIRegOrZR(szB == 8, tt),
4783 nameIReg64orSP(nn), simm9);
4784 return True;
4785 }
4786 }
4787
4788 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
4789 /* L==1 => mm==LD
4790 L==0 => mm==ST
4791 x==0 => 32 bit transfers, and zero extended loads
4792 x==1 => 64 bit transfers
4793 simm7 is scaled by the (single-register) transfer size
4794
4795 (at-Rn-then-Rn=EA)
4796 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
4797
4798 (at-EA-then-Rn=EA)
4799 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
4800
4801 (at-EA)
4802 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
4803 */
4804
4805 UInt insn_30_23 = INSN(30,23);
4806 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
4807 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
4808 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
4809 UInt bL = INSN(22,22);
4810 UInt bX = INSN(31,31);
4811 UInt bWBack = INSN(23,23);
4812 UInt rT1 = INSN(4,0);
4813 UInt rN = INSN(9,5);
4814 UInt rT2 = INSN(14,10);
4815 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
4816 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
4817 || (bL && rT1 == rT2)) {
4818 /* undecodable; fall through */
4819 } else {
4820 if (rN == 31) { /* FIXME generate stack alignment check */ }
4821
4822 // Compute the transfer address TA and the writeback address WA.
4823 IRTemp tRN = newTemp(Ity_I64);
4824 assign(tRN, getIReg64orSP(rN));
4825 IRTemp tEA = newTemp(Ity_I64);
4826 simm7 = (bX ? 8 : 4) * simm7;
4827 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
4828
4829 IRTemp tTA = newTemp(Ity_I64);
4830 IRTemp tWA = newTemp(Ity_I64);
4831 switch (INSN(24,23)) {
4832 case BITS2(0,1):
4833 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4834 case BITS2(1,1):
4835 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4836 case BITS2(1,0):
4837 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4838 default:
4839 vassert(0); /* NOTREACHED */
4840 }
4841
4842 /* Normally rN would be updated after the transfer. However, in
4843 the special case typifed by
4844 stp x29, x30, [sp,#-112]!
4845 it is necessary to update SP before the transfer, (1)
4846 because Memcheck will otherwise complain about a write
4847 below the stack pointer, and (2) because the segfault
4848 stack extension mechanism will otherwise extend the stack
4849 only down to SP before the instruction, which might not be
4850 far enough, if the -112 bit takes the actual access
4851 address to the next page.
4852 */
4853 Bool earlyWBack
4854 = bWBack && simm7 < 0
4855 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
4856
4857 if (bWBack && earlyWBack)
4858 putIReg64orSP(rN, mkexpr(tEA));
4859
4860 /**/ if (bL == 1 && bX == 1) {
4861 // 64 bit load
4862 putIReg64orZR(rT1, loadLE(Ity_I64,
4863 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4864 putIReg64orZR(rT2, loadLE(Ity_I64,
4865 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
4866 } else if (bL == 1 && bX == 0) {
sewardjbbcf1882014-01-12 12:49:10 +00004867 // 32 bit load
4868 putIReg32orZR(rT1, loadLE(Ity_I32,
4869 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4870 putIReg32orZR(rT2, loadLE(Ity_I32,
4871 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
4872 } else if (bL == 0 && bX == 1) {
4873 // 64 bit store
4874 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4875 getIReg64orZR(rT1));
4876 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
4877 getIReg64orZR(rT2));
4878 } else {
4879 vassert(bL == 0 && bX == 0);
sewardjbbcf1882014-01-12 12:49:10 +00004880 // 32 bit store
4881 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4882 getIReg32orZR(rT1));
4883 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
4884 getIReg32orZR(rT2));
4885 }
4886
4887 if (bWBack && !earlyWBack)
4888 putIReg64orSP(rN, mkexpr(tEA));
4889
4890 const HChar* fmt_str = NULL;
4891 switch (INSN(24,23)) {
4892 case BITS2(0,1):
4893 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4894 break;
4895 case BITS2(1,1):
4896 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4897 break;
4898 case BITS2(1,0):
4899 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
4900 break;
4901 default:
4902 vassert(0);
4903 }
4904 DIP(fmt_str, bL == 0 ? "st" : "ld",
4905 nameIRegOrZR(bX == 1, rT1),
4906 nameIRegOrZR(bX == 1, rT2),
4907 nameIReg64orSP(rN), simm7);
4908 return True;
4909 }
4910 }
4911
4912 /* ---------------- LDR (literal, int reg) ---------------- */
4913 /* 31 29 23 4
4914 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
4915 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
4916 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
4917 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
4918 Just handles the first two cases for now.
4919 */
4920 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
4921 UInt imm19 = INSN(23,5);
4922 UInt rT = INSN(4,0);
4923 UInt bX = INSN(30,30);
4924 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
4925 if (bX) {
4926 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
4927 } else {
4928 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
4929 }
4930 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
4931 return True;
4932 }
4933
4934 /* -------------- {LD,ST}R (integer register) --------------- */
4935 /* 31 29 20 15 12 11 9 4
4936 | | | | | | | |
4937 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
4938 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
4939 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
4940 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
4941
4942 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
4943 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
4944 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
4945 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
4946 */
4947 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
4948 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
4949 HChar dis_buf[64];
4950 UInt szLg2 = INSN(31,30);
4951 Bool isLD = INSN(22,22) == 1;
4952 UInt tt = INSN(4,0);
4953 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
4954 if (ea != IRTemp_INVALID) {
4955 switch (szLg2) {
4956 case 3: /* 64 bit */
4957 if (isLD) {
4958 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
4959 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
4960 } else {
4961 storeLE(mkexpr(ea), getIReg64orZR(tt));
4962 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
4963 }
4964 break;
4965 case 2: /* 32 bit */
4966 if (isLD) {
4967 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
4968 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
4969 } else {
4970 storeLE(mkexpr(ea), getIReg32orZR(tt));
4971 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
4972 }
4973 break;
4974 case 1: /* 16 bit */
4975 if (isLD) {
4976 putIReg64orZR(tt, unop(Iop_16Uto64,
4977 loadLE(Ity_I16, mkexpr(ea))));
4978 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
4979 } else {
4980 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
4981 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
4982 }
4983 break;
4984 case 0: /* 8 bit */
4985 if (isLD) {
4986 putIReg64orZR(tt, unop(Iop_8Uto64,
4987 loadLE(Ity_I8, mkexpr(ea))));
4988 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
4989 } else {
4990 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
4991 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
4992 }
4993 break;
4994 default:
4995 vassert(0);
4996 }
4997 return True;
4998 }
4999 }
5000
5001 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
5002 /* 31 29 26 23 21 9 4
5003 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
5004 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
5005 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
5006 where
5007 Rt is Wt when x==1, Xt when x==0
5008 */
5009 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
5010 /* Further checks on bits 31:30 and 22 */
5011 Bool valid = False;
5012 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5013 case BITS3(1,0,0):
5014 case BITS3(0,1,0): case BITS3(0,1,1):
5015 case BITS3(0,0,0): case BITS3(0,0,1):
5016 valid = True;
5017 break;
5018 }
5019 if (valid) {
5020 UInt szLg2 = INSN(31,30);
5021 UInt bitX = INSN(22,22);
5022 UInt imm12 = INSN(21,10);
5023 UInt nn = INSN(9,5);
5024 UInt tt = INSN(4,0);
5025 UInt szB = 1 << szLg2;
5026 IRExpr* ea = binop(Iop_Add64,
5027 getIReg64orSP(nn), mkU64(imm12 * szB));
5028 switch (szB) {
5029 case 4:
5030 vassert(bitX == 0);
5031 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
5032 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
5033 nameIReg64orSP(nn), imm12 * szB);
5034 break;
5035 case 2:
5036 if (bitX == 1) {
5037 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
5038 } else {
5039 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
5040 }
5041 DIP("ldrsh %s, [%s, #%u]\n",
5042 nameIRegOrZR(bitX == 0, tt),
5043 nameIReg64orSP(nn), imm12 * szB);
5044 break;
5045 case 1:
5046 if (bitX == 1) {
5047 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
5048 } else {
5049 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
5050 }
5051 DIP("ldrsb %s, [%s, #%u]\n",
5052 nameIRegOrZR(bitX == 0, tt),
5053 nameIReg64orSP(nn), imm12 * szB);
5054 break;
5055 default:
5056 vassert(0);
5057 }
5058 return True;
5059 }
5060 /* else fall through */
5061 }
5062
5063 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
5064 /* (at-Rn-then-Rn=EA)
5065 31 29 23 21 20 11 9 4
5066 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
5067 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
5068 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
5069
5070 (at-EA-then-Rn=EA)
5071 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
5072 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
5073 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
5074 where
5075 Rt is Wt when x==1, Xt when x==0
5076 transfer-at-Rn when [11]==0, at EA when [11]==1
5077 */
5078 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5079 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5080 /* Further checks on bits 31:30 and 22 */
5081 Bool valid = False;
5082 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5083 case BITS3(1,0,0): // LDRSW Xt
5084 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
5085 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
5086 valid = True;
5087 break;
5088 }
5089 if (valid) {
5090 UInt szLg2 = INSN(31,30);
5091 UInt imm9 = INSN(20,12);
5092 Bool atRN = INSN(11,11) == 0;
5093 UInt nn = INSN(9,5);
5094 UInt tt = INSN(4,0);
5095 IRTemp tRN = newTemp(Ity_I64);
5096 IRTemp tEA = newTemp(Ity_I64);
5097 IRTemp tTA = IRTemp_INVALID;
5098 ULong simm9 = sx_to_64(imm9, 9);
5099 Bool is64 = INSN(22,22) == 0;
5100 assign(tRN, getIReg64orSP(nn));
5101 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5102 tTA = atRN ? tRN : tEA;
5103 HChar ch = '?';
5104 /* There are 5 cases:
5105 byte load, SX to 64
5106 byte load, SX to 32, ZX to 64
5107 halfword load, SX to 64
5108 halfword load, SX to 32, ZX to 64
5109 word load, SX to 64
5110 The ifs below handle them in the listed order.
5111 */
5112 if (szLg2 == 0) {
5113 ch = 'b';
5114 if (is64) {
5115 putIReg64orZR(tt, unop(Iop_8Sto64,
5116 loadLE(Ity_I8, mkexpr(tTA))));
5117 } else {
5118 putIReg32orZR(tt, unop(Iop_8Sto32,
5119 loadLE(Ity_I8, mkexpr(tTA))));
5120 }
5121 }
5122 else if (szLg2 == 1) {
5123 ch = 'h';
5124 if (is64) {
5125 putIReg64orZR(tt, unop(Iop_16Sto64,
5126 loadLE(Ity_I16, mkexpr(tTA))));
5127 } else {
5128 putIReg32orZR(tt, unop(Iop_16Sto32,
5129 loadLE(Ity_I16, mkexpr(tTA))));
5130 }
5131 }
5132 else if (szLg2 == 2 && is64) {
5133 ch = 'w';
5134 putIReg64orZR(tt, unop(Iop_32Sto64,
5135 loadLE(Ity_I32, mkexpr(tTA))));
5136 }
5137 else {
5138 vassert(0);
5139 }
5140 putIReg64orSP(nn, mkexpr(tEA));
5141 DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
5142 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5143 return True;
5144 }
5145 /* else fall through */
5146 }
5147
5148 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
5149 /* 31 29 23 21 20 11 9 4
5150 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
5151 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
5152 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
5153 where
5154 Rt is Wt when x==1, Xt when x==0
5155 */
5156 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5157 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5158 /* Further checks on bits 31:30 and 22 */
5159 Bool valid = False;
5160 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5161 case BITS3(1,0,0): // LDURSW Xt
5162 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
5163 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
5164 valid = True;
5165 break;
5166 }
5167 if (valid) {
5168 UInt szLg2 = INSN(31,30);
5169 UInt imm9 = INSN(20,12);
5170 UInt nn = INSN(9,5);
5171 UInt tt = INSN(4,0);
5172 IRTemp tRN = newTemp(Ity_I64);
5173 IRTemp tEA = newTemp(Ity_I64);
5174 ULong simm9 = sx_to_64(imm9, 9);
5175 Bool is64 = INSN(22,22) == 0;
5176 assign(tRN, getIReg64orSP(nn));
5177 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5178 HChar ch = '?';
5179 /* There are 5 cases:
5180 byte load, SX to 64
5181 byte load, SX to 32, ZX to 64
5182 halfword load, SX to 64
5183 halfword load, SX to 32, ZX to 64
5184 word load, SX to 64
5185 The ifs below handle them in the listed order.
5186 */
5187 if (szLg2 == 0) {
5188 ch = 'b';
5189 if (is64) {
5190 putIReg64orZR(tt, unop(Iop_8Sto64,
5191 loadLE(Ity_I8, mkexpr(tEA))));
5192 } else {
5193 putIReg32orZR(tt, unop(Iop_8Sto32,
5194 loadLE(Ity_I8, mkexpr(tEA))));
5195 }
5196 }
5197 else if (szLg2 == 1) {
5198 ch = 'h';
5199 if (is64) {
5200 putIReg64orZR(tt, unop(Iop_16Sto64,
5201 loadLE(Ity_I16, mkexpr(tEA))));
5202 } else {
5203 putIReg32orZR(tt, unop(Iop_16Sto32,
5204 loadLE(Ity_I16, mkexpr(tEA))));
5205 }
5206 }
5207 else if (szLg2 == 2 && is64) {
5208 ch = 'w';
5209 putIReg64orZR(tt, unop(Iop_32Sto64,
5210 loadLE(Ity_I32, mkexpr(tEA))));
5211 }
5212 else {
5213 vassert(0);
5214 }
5215 DIP("ldurs%c %s, [%s, #%lld]",
5216 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5217 return True;
5218 }
5219 /* else fall through */
5220 }
5221
5222 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
5223 /* L==1 => mm==LD
5224 L==0 => mm==ST
5225 sz==00 => 32 bit (S) transfers
5226 sz==01 => 64 bit (D) transfers
5227 sz==10 => 128 bit (Q) transfers
5228 sz==11 isn't allowed
5229 simm7 is scaled by the (single-register) transfer size
5230
sewardj208a7762014-10-22 13:52:51 +00005231 31 29 26 22 21 14 9 4
sewardjbbcf1882014-01-12 12:49:10 +00005232
sewardj208a7762014-10-22 13:52:51 +00005233 sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn|SP, #imm]
5234 (at-EA, with nontemporal hint)
5235
5236 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
5237 (at-Rn-then-Rn=EA)
sewardjbbcf1882014-01-12 12:49:10 +00005238
5239 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
sewardj208a7762014-10-22 13:52:51 +00005240 (at-EA)
sewardjbbcf1882014-01-12 12:49:10 +00005241
sewardj208a7762014-10-22 13:52:51 +00005242 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
5243 (at-EA-then-Rn=EA)
5244 */
5245 if (INSN(29,25) == BITS5(1,0,1,1,0)) {
sewardjbbcf1882014-01-12 12:49:10 +00005246 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
5247 Bool isLD = INSN(22,22) == 1;
5248 Bool wBack = INSN(23,23) == 1;
5249 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5250 UInt tt2 = INSN(14,10);
5251 UInt nn = INSN(9,5);
5252 UInt tt1 = INSN(4,0);
5253 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
5254 /* undecodable; fall through */
5255 } else {
5256 if (nn == 31) { /* FIXME generate stack alignment check */ }
5257
5258 // Compute the transfer address TA and the writeback address WA.
5259 UInt szB = 4 << szSlg2; /* szB is the per-register size */
5260 IRTemp tRN = newTemp(Ity_I64);
5261 assign(tRN, getIReg64orSP(nn));
5262 IRTemp tEA = newTemp(Ity_I64);
5263 simm7 = szB * simm7;
5264 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5265
5266 IRTemp tTA = newTemp(Ity_I64);
5267 IRTemp tWA = newTemp(Ity_I64);
5268 switch (INSN(24,23)) {
5269 case BITS2(0,1):
5270 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5271 case BITS2(1,1):
5272 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5273 case BITS2(1,0):
sewardj208a7762014-10-22 13:52:51 +00005274 case BITS2(0,0):
sewardjbbcf1882014-01-12 12:49:10 +00005275 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5276 default:
5277 vassert(0); /* NOTREACHED */
5278 }
5279
5280 IRType ty = Ity_INVALID;
5281 switch (szB) {
5282 case 4: ty = Ity_F32; break;
5283 case 8: ty = Ity_F64; break;
5284 case 16: ty = Ity_V128; break;
5285 default: vassert(0);
5286 }
5287
sewardje0bff8b2014-03-09 09:40:23 +00005288 /* Normally rN would be updated after the transfer. However, in
sewardj19551432014-05-07 09:20:11 +00005289 the special cases typifed by
sewardje0bff8b2014-03-09 09:40:23 +00005290 stp q0, q1, [sp,#-512]!
sewardj19551432014-05-07 09:20:11 +00005291 stp d0, d1, [sp,#-512]!
5292 stp s0, s1, [sp,#-512]!
sewardje0bff8b2014-03-09 09:40:23 +00005293 it is necessary to update SP before the transfer, (1)
5294 because Memcheck will otherwise complain about a write
5295 below the stack pointer, and (2) because the segfault
5296 stack extension mechanism will otherwise extend the stack
5297 only down to SP before the instruction, which might not be
5298 far enough, if the -512 bit takes the actual access
5299 address to the next page.
5300 */
5301 Bool earlyWBack
sewardj19551432014-05-07 09:20:11 +00005302 = wBack && simm7 < 0
sewardje0bff8b2014-03-09 09:40:23 +00005303 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
5304
5305 if (wBack && earlyWBack)
5306 putIReg64orSP(nn, mkexpr(tEA));
5307
sewardjbbcf1882014-01-12 12:49:10 +00005308 if (isLD) {
sewardj5ba41302014-03-03 08:42:16 +00005309 if (szB < 16) {
5310 putQReg128(tt1, mkV128(0x0000));
5311 }
sewardj606c4ba2014-01-26 19:11:14 +00005312 putQRegLO(tt1,
5313 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
sewardj5ba41302014-03-03 08:42:16 +00005314 if (szB < 16) {
5315 putQReg128(tt2, mkV128(0x0000));
5316 }
sewardj606c4ba2014-01-26 19:11:14 +00005317 putQRegLO(tt2,
5318 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
sewardjbbcf1882014-01-12 12:49:10 +00005319 } else {
5320 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
sewardj606c4ba2014-01-26 19:11:14 +00005321 getQRegLO(tt1, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005322 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
sewardj606c4ba2014-01-26 19:11:14 +00005323 getQRegLO(tt2, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005324 }
5325
sewardje0bff8b2014-03-09 09:40:23 +00005326 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00005327 putIReg64orSP(nn, mkexpr(tEA));
5328
5329 const HChar* fmt_str = NULL;
5330 switch (INSN(24,23)) {
5331 case BITS2(0,1):
5332 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5333 break;
5334 case BITS2(1,1):
5335 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5336 break;
5337 case BITS2(1,0):
5338 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5339 break;
sewardj208a7762014-10-22 13:52:51 +00005340 case BITS2(0,0):
5341 fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
5342 break;
sewardjbbcf1882014-01-12 12:49:10 +00005343 default:
5344 vassert(0);
5345 }
5346 DIP(fmt_str, isLD ? "ld" : "st",
sewardj606c4ba2014-01-26 19:11:14 +00005347 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
sewardjbbcf1882014-01-12 12:49:10 +00005348 nameIReg64orSP(nn), simm7);
5349 return True;
5350 }
5351 }
5352
5353 /* -------------- {LD,ST}R (vector register) --------------- */
5354 /* 31 29 23 20 15 12 11 9 4
5355 | | | | | | | | |
5356 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
5357 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
5358 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
5359 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
5360 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
5361
5362 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
5363 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
5364 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
5365 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
5366 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
5367 */
5368 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5369 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5370 HChar dis_buf[64];
5371 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5372 Bool isLD = INSN(22,22) == 1;
5373 UInt tt = INSN(4,0);
sewardj208a7762014-10-22 13:52:51 +00005374 if (szLg2 > 4) goto after_LDR_STR_vector_register;
sewardjbbcf1882014-01-12 12:49:10 +00005375 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
5376 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
5377 switch (szLg2) {
5378 case 0: /* 8 bit */
5379 if (isLD) {
5380 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005381 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
5382 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005383 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005384 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
5385 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005386 }
5387 break;
5388 case 1:
5389 if (isLD) {
5390 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005391 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
5392 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005393 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005394 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
5395 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005396 }
5397 break;
5398 case 2: /* 32 bit */
5399 if (isLD) {
5400 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005401 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
5402 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005403 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005404 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
5405 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005406 }
5407 break;
5408 case 3: /* 64 bit */
5409 if (isLD) {
5410 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005411 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
5412 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005413 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005414 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
5415 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005416 }
5417 break;
sewardj208a7762014-10-22 13:52:51 +00005418 case 4:
5419 if (isLD) {
5420 putQReg128(tt, loadLE(Ity_V128, mkexpr(ea)));
5421 DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf);
5422 } else {
5423 storeLE(mkexpr(ea), getQReg128(tt));
5424 DIP("str %s, %s\n", nameQReg128(tt), dis_buf);
5425 }
5426 break;
5427 default:
5428 vassert(0);
sewardjbbcf1882014-01-12 12:49:10 +00005429 }
5430 return True;
5431 }
5432 after_LDR_STR_vector_register:
5433
5434 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
5435 /* 31 29 22 20 15 12 11 9 4
5436 | | | | | | | | |
5437 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
5438
5439 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
5440 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
5441
5442 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
5443 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
5444 */
5445 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5446 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5447 HChar dis_buf[64];
5448 UInt szLg2 = INSN(31,30);
5449 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
5450 UInt tt = INSN(4,0);
5451 if (szLg2 == 3) goto after_LDRS_integer_register;
5452 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5453 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
5454 /* Enumerate the 5 variants explicitly. */
5455 if (szLg2 == 2/*32 bit*/ && sxTo64) {
5456 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
5457 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
5458 return True;
5459 }
5460 else
5461 if (szLg2 == 1/*16 bit*/) {
5462 if (sxTo64) {
5463 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
5464 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
5465 } else {
5466 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
5467 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5468 }
5469 return True;
5470 }
5471 else
5472 if (szLg2 == 0/*8 bit*/) {
5473 if (sxTo64) {
5474 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
5475 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
5476 } else {
5477 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
5478 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5479 }
5480 return True;
5481 }
5482 /* else it's an invalid combination */
5483 }
5484 after_LDRS_integer_register:
5485
5486 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
5487 /* This is the Unsigned offset variant only. The Post-Index and
5488 Pre-Index variants are below.
5489
5490 31 29 23 21 9 4
5491 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
5492 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
5493 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
5494 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
5495 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
5496
5497 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
5498 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
5499 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
5500 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
5501 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
5502 */
5503 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
5504 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
5505 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5506 Bool isLD = INSN(22,22) == 1;
5507 UInt pimm12 = INSN(21,10) << szLg2;
5508 UInt nn = INSN(9,5);
5509 UInt tt = INSN(4,0);
5510 IRTemp tEA = newTemp(Ity_I64);
5511 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5512 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
5513 if (isLD) {
5514 if (szLg2 < 4) {
5515 putQReg128(tt, mkV128(0x0000));
5516 }
sewardj606c4ba2014-01-26 19:11:14 +00005517 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00005518 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005519 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005520 }
5521 DIP("%s %s, [%s, #%u]\n",
5522 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00005523 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
sewardjbbcf1882014-01-12 12:49:10 +00005524 return True;
5525 }
5526
5527 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
5528 /* These are the Post-Index and Pre-Index variants.
5529
5530 31 29 23 20 11 9 4
5531 (at-Rn-then-Rn=EA)
5532 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
5533 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
5534 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
5535 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
5536 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
5537
5538 (at-EA-then-Rn=EA)
5539 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
5540 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
5541 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
5542 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
5543 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
5544
5545 Stores are the same except with bit 22 set to 0.
5546 */
5547 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5548 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5549 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5550 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5551 Bool isLD = INSN(22,22) == 1;
5552 UInt imm9 = INSN(20,12);
5553 Bool atRN = INSN(11,11) == 0;
5554 UInt nn = INSN(9,5);
5555 UInt tt = INSN(4,0);
5556 IRTemp tRN = newTemp(Ity_I64);
5557 IRTemp tEA = newTemp(Ity_I64);
5558 IRTemp tTA = IRTemp_INVALID;
5559 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5560 ULong simm9 = sx_to_64(imm9, 9);
5561 assign(tRN, getIReg64orSP(nn));
5562 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5563 tTA = atRN ? tRN : tEA;
5564 if (isLD) {
5565 if (szLg2 < 4) {
5566 putQReg128(tt, mkV128(0x0000));
5567 }
sewardj606c4ba2014-01-26 19:11:14 +00005568 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
sewardjbbcf1882014-01-12 12:49:10 +00005569 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005570 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005571 }
5572 putIReg64orSP(nn, mkexpr(tEA));
5573 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
5574 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00005575 nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
sewardjbbcf1882014-01-12 12:49:10 +00005576 return True;
5577 }
5578
5579 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
5580 /* 31 29 23 20 11 9 4
5581 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
5582 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
5583 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
5584 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
5585 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
5586
5587 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
5588 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
5589 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
5590 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
5591 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
5592 */
5593 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5594 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5595 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5596 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5597 Bool isLD = INSN(22,22) == 1;
5598 UInt imm9 = INSN(20,12);
5599 UInt nn = INSN(9,5);
5600 UInt tt = INSN(4,0);
5601 ULong simm9 = sx_to_64(imm9, 9);
5602 IRTemp tEA = newTemp(Ity_I64);
5603 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5604 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
5605 if (isLD) {
sewardj606c4ba2014-01-26 19:11:14 +00005606 if (szLg2 < 4) {
5607 putQReg128(tt, mkV128(0x0000));
5608 }
5609 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00005610 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005611 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005612 }
5613 DIP("%s %s, [%s, #%lld]\n",
5614 isLD ? "ldur" : "stur",
sewardj606c4ba2014-01-26 19:11:14 +00005615 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
sewardjbbcf1882014-01-12 12:49:10 +00005616 return True;
5617 }
5618
5619 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
5620 /* 31 29 23 4
5621 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
5622 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
5623 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
5624 */
5625 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
5626 UInt szB = 4 << INSN(31,30);
5627 UInt imm19 = INSN(23,5);
5628 UInt tt = INSN(4,0);
5629 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5630 IRType ty = preferredVectorSubTypeFromSize(szB);
sewardj606c4ba2014-01-26 19:11:14 +00005631 putQReg128(tt, mkV128(0x0000));
5632 putQRegLO(tt, loadLE(ty, mkU64(ea)));
5633 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
sewardjbbcf1882014-01-12 12:49:10 +00005634 return True;
5635 }
5636
sewardj208a7762014-10-22 13:52:51 +00005637 /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */
5638 /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
5639 /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
5640 /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
5641 /* 31 29 26 22 21 20 15 11 9 4
sewardjbbcf1882014-01-12 12:49:10 +00005642
sewardjbed9f682014-10-27 09:29:48 +00005643 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP]
5644 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step
sewardj606c4ba2014-01-26 19:11:14 +00005645
sewardjbed9f682014-10-27 09:29:48 +00005646 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP]
5647 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step
sewardj606c4ba2014-01-26 19:11:14 +00005648
sewardjbed9f682014-10-27 09:29:48 +00005649 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP]
5650 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step
sewardj208a7762014-10-22 13:52:51 +00005651
sewardjbed9f682014-10-27 09:29:48 +00005652 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP]
5653 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step
sewardj208a7762014-10-22 13:52:51 +00005654
5655 T = defined by Q and sz in the normal way
5656 step = if m == 11111 then transfer-size else Xm
5657 xx = case L of 1 -> LD ; 0 -> ST
sewardj606c4ba2014-01-26 19:11:14 +00005658 */
sewardj208a7762014-10-22 13:52:51 +00005659 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5660 && INSN(21,21) == 0) {
5661 Bool bitQ = INSN(30,30);
5662 Bool isPX = INSN(23,23) == 1;
5663 Bool isLD = INSN(22,22) == 1;
5664 UInt mm = INSN(20,16);
5665 UInt opc = INSN(15,12);
5666 UInt sz = INSN(11,10);
5667 UInt nn = INSN(9,5);
5668 UInt tt = INSN(4,0);
5669 Bool isQ = bitQ == 1;
5670 Bool is1d = sz == BITS2(1,1) && !isQ;
5671 UInt nRegs = 0;
5672 switch (opc) {
5673 case BITS4(0,0,0,0): nRegs = 4; break;
5674 case BITS4(0,1,0,0): nRegs = 3; break;
5675 case BITS4(1,0,0,0): nRegs = 2; break;
5676 case BITS4(0,1,1,1): nRegs = 1; break;
5677 default: break;
sewardj950ca7a2014-04-03 23:03:32 +00005678 }
sewardjbed9f682014-10-27 09:29:48 +00005679
5680 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5681 If we see it, set nRegs to 0 so as to cause the next conditional
5682 to fail. */
5683 if (!isPX && mm != 0)
5684 nRegs = 0;
sewardj208a7762014-10-22 13:52:51 +00005685
5686 if (nRegs == 1 /* .1d is allowed */
5687 || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) {
5688
5689 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5690
5691 /* Generate the transfer address (TA) and if necessary the
5692 writeback address (WB) */
5693 IRTemp tTA = newTemp(Ity_I64);
5694 assign(tTA, getIReg64orSP(nn));
5695 if (nn == 31) { /* FIXME generate stack alignment check */ }
5696 IRTemp tWB = IRTemp_INVALID;
5697 if (isPX) {
5698 tWB = newTemp(Ity_I64);
5699 assign(tWB, binop(Iop_Add64,
5700 mkexpr(tTA),
5701 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5702 : getIReg64orZR(mm)));
5703 }
5704
5705 /* -- BEGIN generate the transfers -- */
5706
5707 IRTemp u0, u1, u2, u3, i0, i1, i2, i3;
5708 u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID;
5709 switch (nRegs) {
5710 case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */
5711 case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */
5712 case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */
5713 case 1: u0 = newTempV128(); i0 = newTempV128(); break;
5714 default: vassert(0);
5715 }
5716
5717 /* -- Multiple 128 or 64 bit stores -- */
5718 if (!isLD) {
5719 switch (nRegs) {
5720 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5721 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5722 case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */
5723 case 1: assign(u0, getQReg128((tt+0) % 32)); break;
5724 default: vassert(0);
5725 }
5726 switch (nRegs) {
5727 case 4: (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64)
5728 (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3);
5729 break;
5730 case 3: (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64)
5731 (&i0, &i1, &i2, sz, u0, u1, u2);
5732 break;
5733 case 2: (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64)
5734 (&i0, &i1, sz, u0, u1);
5735 break;
5736 case 1: (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64)
5737 (&i0, sz, u0);
5738 break;
5739 default: vassert(0);
5740 }
5741# define MAYBE_NARROW_TO_64(_expr) \
5742 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5743 UInt step = isQ ? 16 : 8;
5744 switch (nRegs) {
5745 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5746 MAYBE_NARROW_TO_64(mkexpr(i3)) );
5747 /* fallthru */
5748 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5749 MAYBE_NARROW_TO_64(mkexpr(i2)) );
5750 /* fallthru */
5751 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5752 MAYBE_NARROW_TO_64(mkexpr(i1)) );
5753 /* fallthru */
5754 case 1: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5755 MAYBE_NARROW_TO_64(mkexpr(i0)) );
5756 break;
5757 default: vassert(0);
5758 }
5759# undef MAYBE_NARROW_TO_64
5760 }
5761
5762 /* -- Multiple 128 or 64 bit loads -- */
5763 else /* isLD */ {
5764 UInt step = isQ ? 16 : 8;
5765 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5766# define MAYBE_WIDEN_FROM_64(_expr) \
5767 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5768 switch (nRegs) {
5769 case 4:
5770 assign(i3, MAYBE_WIDEN_FROM_64(
5771 loadLE(loadTy,
5772 binop(Iop_Add64, mkexpr(tTA),
5773 mkU64(3 * step)))));
5774 /* fallthru */
5775 case 3:
5776 assign(i2, MAYBE_WIDEN_FROM_64(
5777 loadLE(loadTy,
5778 binop(Iop_Add64, mkexpr(tTA),
5779 mkU64(2 * step)))));
5780 /* fallthru */
5781 case 2:
5782 assign(i1, MAYBE_WIDEN_FROM_64(
5783 loadLE(loadTy,
5784 binop(Iop_Add64, mkexpr(tTA),
5785 mkU64(1 * step)))));
5786 /* fallthru */
sewardj208a7762014-10-22 13:52:51 +00005787 case 1:
5788 assign(i0, MAYBE_WIDEN_FROM_64(
5789 loadLE(loadTy,
5790 binop(Iop_Add64, mkexpr(tTA),
5791 mkU64(0 * step)))));
5792 break;
5793 default:
5794 vassert(0);
5795 }
5796# undef MAYBE_WIDEN_FROM_64
5797 switch (nRegs) {
5798 case 4: (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64)
5799 (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3);
5800 break;
5801 case 3: (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64)
5802 (&u0, &u1, &u2, sz, i0, i1, i2);
5803 break;
5804 case 2: (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64)
5805 (&u0, &u1, sz, i0, i1);
5806 break;
5807 case 1: (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64)
5808 (&u0, sz, i0);
5809 break;
5810 default: vassert(0);
5811 }
5812 switch (nRegs) {
5813 case 4: putQReg128( (tt+3) % 32,
5814 math_MAYBE_ZERO_HI64(bitQ, u3));
5815 /* fallthru */
5816 case 3: putQReg128( (tt+2) % 32,
5817 math_MAYBE_ZERO_HI64(bitQ, u2));
5818 /* fallthru */
5819 case 2: putQReg128( (tt+1) % 32,
5820 math_MAYBE_ZERO_HI64(bitQ, u1));
5821 /* fallthru */
5822 case 1: putQReg128( (tt+0) % 32,
5823 math_MAYBE_ZERO_HI64(bitQ, u0));
5824 break;
5825 default: vassert(0);
5826 }
5827 }
5828
5829 /* -- END generate the transfers -- */
5830
5831 /* Do the writeback, if necessary */
5832 if (isPX) {
5833 putIReg64orSP(nn, mkexpr(tWB));
5834 }
5835
5836 HChar pxStr[20];
5837 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
5838 if (isPX) {
5839 if (mm == BITS5(1,1,1,1,1))
5840 vex_sprintf(pxStr, ", #%u", xferSzB);
5841 else
5842 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
5843 }
5844 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
5845 DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
5846 isLD ? "ld" : "st", nRegs,
5847 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
5848 pxStr);
5849
5850 return True;
5851 }
5852 /* else fall through */
sewardj950ca7a2014-04-03 23:03:32 +00005853 }
5854
sewardjbed9f682014-10-27 09:29:48 +00005855 /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */
5856 /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */
5857 /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */
5858 /* 31 29 26 22 21 20 15 11 9 4
5859
5860 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP]
5861 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step
5862
5863 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP]
5864 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step
5865
5866 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP]
5867 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step
5868
5869 T = defined by Q and sz in the normal way
5870 step = if m == 11111 then transfer-size else Xm
5871 xx = case L of 1 -> LD ; 0 -> ST
5872 */
5873 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5874 && INSN(21,21) == 0) {
5875 Bool bitQ = INSN(30,30);
5876 Bool isPX = INSN(23,23) == 1;
5877 Bool isLD = INSN(22,22) == 1;
5878 UInt mm = INSN(20,16);
5879 UInt opc = INSN(15,12);
5880 UInt sz = INSN(11,10);
5881 UInt nn = INSN(9,5);
5882 UInt tt = INSN(4,0);
5883 Bool isQ = bitQ == 1;
5884 UInt nRegs = 0;
5885 switch (opc) {
5886 case BITS4(0,0,1,0): nRegs = 4; break;
5887 case BITS4(0,1,1,0): nRegs = 3; break;
5888 case BITS4(1,0,1,0): nRegs = 2; break;
5889 default: break;
5890 }
5891
5892 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5893 If we see it, set nRegs to 0 so as to cause the next conditional
5894 to fail. */
5895 if (!isPX && mm != 0)
5896 nRegs = 0;
5897
5898 if (nRegs >= 2 && nRegs <= 4) {
5899
5900 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5901
5902 /* Generate the transfer address (TA) and if necessary the
5903 writeback address (WB) */
5904 IRTemp tTA = newTemp(Ity_I64);
5905 assign(tTA, getIReg64orSP(nn));
5906 if (nn == 31) { /* FIXME generate stack alignment check */ }
5907 IRTemp tWB = IRTemp_INVALID;
5908 if (isPX) {
5909 tWB = newTemp(Ity_I64);
5910 assign(tWB, binop(Iop_Add64,
5911 mkexpr(tTA),
5912 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5913 : getIReg64orZR(mm)));
5914 }
5915
5916 /* -- BEGIN generate the transfers -- */
5917
5918 IRTemp u0, u1, u2, u3;
5919 u0 = u1 = u2 = u3 = IRTemp_INVALID;
5920 switch (nRegs) {
5921 case 4: u3 = newTempV128(); /* fallthru */
5922 case 3: u2 = newTempV128(); /* fallthru */
5923 case 2: u1 = newTempV128();
5924 u0 = newTempV128(); break;
5925 default: vassert(0);
5926 }
5927
5928 /* -- Multiple 128 or 64 bit stores -- */
5929 if (!isLD) {
5930 switch (nRegs) {
5931 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5932 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5933 case 2: assign(u1, getQReg128((tt+1) % 32));
5934 assign(u0, getQReg128((tt+0) % 32)); break;
5935 default: vassert(0);
5936 }
5937# define MAYBE_NARROW_TO_64(_expr) \
5938 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5939 UInt step = isQ ? 16 : 8;
5940 switch (nRegs) {
5941 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5942 MAYBE_NARROW_TO_64(mkexpr(u3)) );
5943 /* fallthru */
5944 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5945 MAYBE_NARROW_TO_64(mkexpr(u2)) );
5946 /* fallthru */
5947 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5948 MAYBE_NARROW_TO_64(mkexpr(u1)) );
5949 storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5950 MAYBE_NARROW_TO_64(mkexpr(u0)) );
5951 break;
5952 default: vassert(0);
5953 }
5954# undef MAYBE_NARROW_TO_64
5955 }
5956
5957 /* -- Multiple 128 or 64 bit loads -- */
5958 else /* isLD */ {
5959 UInt step = isQ ? 16 : 8;
5960 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5961# define MAYBE_WIDEN_FROM_64(_expr) \
5962 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5963 switch (nRegs) {
5964 case 4:
5965 assign(u3, MAYBE_WIDEN_FROM_64(
5966 loadLE(loadTy,
5967 binop(Iop_Add64, mkexpr(tTA),
5968 mkU64(3 * step)))));
5969 /* fallthru */
5970 case 3:
5971 assign(u2, MAYBE_WIDEN_FROM_64(
5972 loadLE(loadTy,
5973 binop(Iop_Add64, mkexpr(tTA),
5974 mkU64(2 * step)))));
5975 /* fallthru */
5976 case 2:
5977 assign(u1, MAYBE_WIDEN_FROM_64(
5978 loadLE(loadTy,
5979 binop(Iop_Add64, mkexpr(tTA),
5980 mkU64(1 * step)))));
5981 assign(u0, MAYBE_WIDEN_FROM_64(
5982 loadLE(loadTy,
5983 binop(Iop_Add64, mkexpr(tTA),
5984 mkU64(0 * step)))));
5985 break;
5986 default:
5987 vassert(0);
5988 }
5989# undef MAYBE_WIDEN_FROM_64
5990 switch (nRegs) {
5991 case 4: putQReg128( (tt+3) % 32,
5992 math_MAYBE_ZERO_HI64(bitQ, u3));
5993 /* fallthru */
5994 case 3: putQReg128( (tt+2) % 32,
5995 math_MAYBE_ZERO_HI64(bitQ, u2));
5996 /* fallthru */
5997 case 2: putQReg128( (tt+1) % 32,
5998 math_MAYBE_ZERO_HI64(bitQ, u1));
5999 putQReg128( (tt+0) % 32,
6000 math_MAYBE_ZERO_HI64(bitQ, u0));
6001 break;
6002 default: vassert(0);
6003 }
6004 }
6005
6006 /* -- END generate the transfers -- */
6007
6008 /* Do the writeback, if necessary */
6009 if (isPX) {
6010 putIReg64orSP(nn, mkexpr(tWB));
6011 }
6012
6013 HChar pxStr[20];
6014 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6015 if (isPX) {
6016 if (mm == BITS5(1,1,1,1,1))
6017 vex_sprintf(pxStr, ", #%u", xferSzB);
6018 else
6019 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6020 }
6021 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6022 DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
6023 isLD ? "ld" : "st",
6024 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6025 pxStr);
6026
6027 return True;
6028 }
6029 /* else fall through */
6030 }
6031
sewardj18bf5172014-06-14 18:05:30 +00006032 /* ---------- LD1R (single structure, replicate) ---------- */
sewardjbed9f682014-10-27 09:29:48 +00006033 /* ---------- LD2R (single structure, replicate) ---------- */
6034 /* ---------- LD3R (single structure, replicate) ---------- */
6035 /* ---------- LD4R (single structure, replicate) ---------- */
sewardj18bf5172014-06-14 18:05:30 +00006036 /* 31 29 22 20 15 11 9 4
sewardjbed9f682014-10-27 09:29:48 +00006037 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP]
6038 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step
6039
6040 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP]
6041 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step
6042
6043 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP]
6044 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step
6045
6046 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP]
6047 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step
6048
6049 step = if m == 11111 then transfer-size else Xm
sewardj18bf5172014-06-14 18:05:30 +00006050 */
6051 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
sewardjbed9f682014-10-27 09:29:48 +00006052 && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
6053 && INSN(12,12) == 0) {
6054 UInt bitQ = INSN(30,30);
6055 Bool isPX = INSN(23,23) == 1;
6056 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6057 UInt mm = INSN(20,16);
6058 UInt sz = INSN(11,10);
6059 UInt nn = INSN(9,5);
6060 UInt tt = INSN(4,0);
6061
6062 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6063 if (isPX || mm == 0) {
6064
6065 IRType ty = integerIRTypeOfSize(1 << sz);
6066
6067 UInt laneSzB = 1 << sz;
6068 UInt xferSzB = laneSzB * nRegs;
6069
6070 /* Generate the transfer address (TA) and if necessary the
6071 writeback address (WB) */
6072 IRTemp tTA = newTemp(Ity_I64);
6073 assign(tTA, getIReg64orSP(nn));
6074 if (nn == 31) { /* FIXME generate stack alignment check */ }
6075 IRTemp tWB = IRTemp_INVALID;
6076 if (isPX) {
6077 tWB = newTemp(Ity_I64);
6078 assign(tWB, binop(Iop_Add64,
6079 mkexpr(tTA),
6080 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6081 : getIReg64orZR(mm)));
sewardj18bf5172014-06-14 18:05:30 +00006082 }
sewardjbed9f682014-10-27 09:29:48 +00006083
6084 /* Do the writeback, if necessary */
6085 if (isPX) {
6086 putIReg64orSP(nn, mkexpr(tWB));
6087 }
6088
6089 IRTemp e0, e1, e2, e3, v0, v1, v2, v3;
6090 e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID;
6091 switch (nRegs) {
6092 case 4:
6093 e3 = newTemp(ty);
6094 assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6095 mkU64(3 * laneSzB))));
6096 v3 = math_DUP_TO_V128(e3, ty);
6097 putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3));
6098 /* fallthrough */
6099 case 3:
6100 e2 = newTemp(ty);
6101 assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6102 mkU64(2 * laneSzB))));
6103 v2 = math_DUP_TO_V128(e2, ty);
6104 putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2));
6105 /* fallthrough */
6106 case 2:
6107 e1 = newTemp(ty);
6108 assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6109 mkU64(1 * laneSzB))));
6110 v1 = math_DUP_TO_V128(e1, ty);
6111 putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1));
6112 /* fallthrough */
6113 case 1:
6114 e0 = newTemp(ty);
6115 assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6116 mkU64(0 * laneSzB))));
6117 v0 = math_DUP_TO_V128(e0, ty);
6118 putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0));
6119 break;
6120 default:
6121 vassert(0);
6122 }
6123
6124 HChar pxStr[20];
6125 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6126 if (isPX) {
6127 if (mm == BITS5(1,1,1,1,1))
6128 vex_sprintf(pxStr, ", #%u", xferSzB);
6129 else
6130 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6131 }
6132 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6133 DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
6134 nRegs,
6135 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6136 pxStr);
6137
sewardj18bf5172014-06-14 18:05:30 +00006138 return True;
6139 }
sewardjbed9f682014-10-27 09:29:48 +00006140 /* else fall through */
sewardj18bf5172014-06-14 18:05:30 +00006141 }
6142
sewardjbed9f682014-10-27 09:29:48 +00006143 /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
6144 /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
6145 /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
6146 /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
6147 /* 31 29 22 21 20 15 11 9 4
6148 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP]
6149 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step
sewardj8a5ed542014-07-15 11:08:42 +00006150
sewardjbed9f682014-10-27 09:29:48 +00006151 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP]
6152 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step
sewardj606c4ba2014-01-26 19:11:14 +00006153
sewardjbed9f682014-10-27 09:29:48 +00006154 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP]
6155 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step
6156
6157 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP]
6158 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step
6159
6160 step = if m == 11111 then transfer-size else Xm
6161 op = case L of 1 -> LD ; 0 -> ST
6162
6163 laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
6164 01:b:b:b0 -> 2, bbb
6165 10:b:b:00 -> 4, bb
6166 10:b:0:01 -> 8, b
sewardj39f754d2014-06-24 10:26:52 +00006167 */
sewardjbed9f682014-10-27 09:29:48 +00006168 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
6169 UInt bitQ = INSN(30,30);
6170 Bool isPX = INSN(23,23) == 1;
6171 Bool isLD = INSN(22,22) == 1;
6172 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6173 UInt mm = INSN(20,16);
6174 UInt xx = INSN(15,14);
6175 UInt bitS = INSN(12,12);
6176 UInt sz = INSN(11,10);
6177 UInt nn = INSN(9,5);
6178 UInt tt = INSN(4,0);
6179
6180 Bool valid = True;
6181
6182 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6183 if (!isPX && mm != 0)
6184 valid = False;
6185
6186 UInt laneSzB = 0; /* invalid */
6187 UInt ix = 16; /* invalid */
6188
6189 UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz;
6190 switch (xx_q_S_sz) {
6191 case 0x00: case 0x01: case 0x02: case 0x03:
6192 case 0x04: case 0x05: case 0x06: case 0x07:
6193 case 0x08: case 0x09: case 0x0A: case 0x0B:
6194 case 0x0C: case 0x0D: case 0x0E: case 0x0F:
6195 laneSzB = 1; ix = xx_q_S_sz & 0xF;
6196 break;
6197 case 0x10: case 0x12: case 0x14: case 0x16:
6198 case 0x18: case 0x1A: case 0x1C: case 0x1E:
6199 laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7;
6200 break;
6201 case 0x20: case 0x24: case 0x28: case 0x2C:
6202 laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3;
6203 break;
6204 case 0x21: case 0x29:
6205 laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1;
6206 break;
6207 default:
6208 break;
sewardj39f754d2014-06-24 10:26:52 +00006209 }
sewardjbed9f682014-10-27 09:29:48 +00006210
6211 if (valid && laneSzB != 0) {
6212
6213 IRType ty = integerIRTypeOfSize(laneSzB);
6214 UInt xferSzB = laneSzB * nRegs;
6215
6216 /* Generate the transfer address (TA) and if necessary the
6217 writeback address (WB) */
6218 IRTemp tTA = newTemp(Ity_I64);
6219 assign(tTA, getIReg64orSP(nn));
6220 if (nn == 31) { /* FIXME generate stack alignment check */ }
6221 IRTemp tWB = IRTemp_INVALID;
6222 if (isPX) {
6223 tWB = newTemp(Ity_I64);
6224 assign(tWB, binop(Iop_Add64,
6225 mkexpr(tTA),
6226 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6227 : getIReg64orZR(mm)));
6228 }
6229
6230 /* Do the writeback, if necessary */
6231 if (isPX) {
6232 putIReg64orSP(nn, mkexpr(tWB));
6233 }
6234
6235 switch (nRegs) {
6236 case 4: {
6237 IRExpr* addr
6238 = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB));
6239 if (isLD) {
6240 putQRegLane((tt+3) % 32, ix, loadLE(ty, addr));
6241 } else {
6242 storeLE(addr, getQRegLane((tt+3) % 32, ix, ty));
6243 }
6244 /* fallthrough */
6245 }
6246 case 3: {
6247 IRExpr* addr
6248 = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB));
6249 if (isLD) {
6250 putQRegLane((tt+2) % 32, ix, loadLE(ty, addr));
6251 } else {
6252 storeLE(addr, getQRegLane((tt+2) % 32, ix, ty));
6253 }
6254 /* fallthrough */
6255 }
6256 case 2: {
6257 IRExpr* addr
6258 = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB));
6259 if (isLD) {
6260 putQRegLane((tt+1) % 32, ix, loadLE(ty, addr));
6261 } else {
6262 storeLE(addr, getQRegLane((tt+1) % 32, ix, ty));
6263 }
6264 /* fallthrough */
6265 }
6266 case 1: {
6267 IRExpr* addr
6268 = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB));
6269 if (isLD) {
6270 putQRegLane((tt+0) % 32, ix, loadLE(ty, addr));
6271 } else {
6272 storeLE(addr, getQRegLane((tt+0) % 32, ix, ty));
6273 }
6274 break;
6275 }
6276 default:
6277 vassert(0);
6278 }
6279
6280 HChar pxStr[20];
6281 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6282 if (isPX) {
6283 if (mm == BITS5(1,1,1,1,1))
6284 vex_sprintf(pxStr, ", #%u", xferSzB);
6285 else
6286 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6287 }
6288 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6289 DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
6290 isLD ? "ld" : "st", nRegs,
6291 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr,
6292 ix, nameIReg64orSP(nn), pxStr);
6293
6294 return True;
6295 }
6296 /* else fall through */
sewardj39f754d2014-06-24 10:26:52 +00006297 }
6298
sewardj7d009132014-02-20 17:43:38 +00006299 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
6300 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
6301 /* 31 29 23 20 14 9 4
6302 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
6303 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
6304 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
6305 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
sewardjbbcf1882014-01-12 12:49:10 +00006306 */
sewardj7d009132014-02-20 17:43:38 +00006307 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
6308 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
6309 && INSN(14,10) == BITS5(1,1,1,1,1)) {
sewardjdc9259c2014-02-27 11:10:19 +00006310 UInt szBlg2 = INSN(31,30);
6311 Bool isLD = INSN(22,22) == 1;
6312 Bool isAcqOrRel = INSN(15,15) == 1;
6313 UInt ss = INSN(20,16);
6314 UInt nn = INSN(9,5);
6315 UInt tt = INSN(4,0);
sewardjbbcf1882014-01-12 12:49:10 +00006316
sewardjdc9259c2014-02-27 11:10:19 +00006317 vassert(szBlg2 < 4);
6318 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6319 IRType ty = integerIRTypeOfSize(szB);
6320 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
sewardj7d009132014-02-20 17:43:38 +00006321
sewardjdc9259c2014-02-27 11:10:19 +00006322 IRTemp ea = newTemp(Ity_I64);
6323 assign(ea, getIReg64orSP(nn));
6324 /* FIXME generate check that ea is szB-aligned */
sewardj7d009132014-02-20 17:43:38 +00006325
sewardjdc9259c2014-02-27 11:10:19 +00006326 if (isLD && ss == BITS5(1,1,1,1,1)) {
6327 IRTemp res = newTemp(ty);
6328 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
6329 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6330 if (isAcqOrRel) {
6331 stmt(IRStmt_MBE(Imbe_Fence));
6332 }
6333 DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6334 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6335 return True;
6336 }
6337 if (!isLD) {
6338 if (isAcqOrRel) {
6339 stmt(IRStmt_MBE(Imbe_Fence));
6340 }
6341 IRTemp res = newTemp(Ity_I1);
6342 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6343 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
6344 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
6345 Need to set rS to 1 on failure, 0 on success. */
6346 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
6347 mkU64(1)));
6348 DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6349 nameIRegOrZR(False, ss),
6350 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6351 return True;
6352 }
6353 /* else fall through */
6354 }
6355
6356 /* ------------------ LDA{R,RH,RB} ------------------ */
6357 /* ------------------ STL{R,RH,RB} ------------------ */
6358 /* 31 29 23 20 14 9 4
6359 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
6360 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
6361 */
6362 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6363 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
6364 UInt szBlg2 = INSN(31,30);
6365 Bool isLD = INSN(22,22) == 1;
6366 UInt nn = INSN(9,5);
6367 UInt tt = INSN(4,0);
6368
6369 vassert(szBlg2 < 4);
6370 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6371 IRType ty = integerIRTypeOfSize(szB);
6372 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6373
6374 IRTemp ea = newTemp(Ity_I64);
6375 assign(ea, getIReg64orSP(nn));
6376 /* FIXME generate check that ea is szB-aligned */
6377
6378 if (isLD) {
6379 IRTemp res = newTemp(ty);
6380 assign(res, loadLE(ty, mkexpr(ea)));
6381 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6382 stmt(IRStmt_MBE(Imbe_Fence));
6383 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
6384 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6385 } else {
6386 stmt(IRStmt_MBE(Imbe_Fence));
6387 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6388 storeLE(mkexpr(ea), data);
6389 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
6390 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6391 }
6392 return True;
sewardjbbcf1882014-01-12 12:49:10 +00006393 }
6394
sewardj5b924c82014-10-30 23:56:10 +00006395 /* ------------------ PRFM (immediate) ------------------ */
6396 /* 31 21 9 4
6397 11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn|SP, #pimm]
6398 */
6399 if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
6400 UInt imm12 = INSN(21,10);
6401 UInt nn = INSN(9,5);
6402 UInt tt = INSN(4,0);
6403 /* Generating any IR here is pointless, except for documentation
6404 purposes, as it will get optimised away later. */
6405 IRTemp ea = newTemp(Ity_I64);
6406 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8)));
6407 DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8);
6408 return True;
6409 }
6410
sewardjbbcf1882014-01-12 12:49:10 +00006411 vex_printf("ARM64 front end: load_store\n");
6412 return False;
6413# undef INSN
6414}
6415
6416
6417/*------------------------------------------------------------*/
6418/*--- Control flow and misc instructions ---*/
6419/*------------------------------------------------------------*/
6420
6421static
sewardj65902992014-05-03 21:20:56 +00006422Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
floriancacba8e2014-12-15 18:58:07 +00006423 const VexArchInfo* archinfo)
sewardjbbcf1882014-01-12 12:49:10 +00006424{
6425# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6426
6427 /* ---------------------- B cond ----------------------- */
6428 /* 31 24 4 3
6429 0101010 0 imm19 0 cond */
6430 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
6431 UInt cond = INSN(3,0);
6432 ULong uimm64 = INSN(23,5) << 2;
6433 Long simm64 = (Long)sx_to_64(uimm64, 21);
6434 vassert(dres->whatNext == Dis_Continue);
6435 vassert(dres->len == 4);
6436 vassert(dres->continueAt == 0);
6437 vassert(dres->jk_StopHere == Ijk_INVALID);
6438 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
6439 Ijk_Boring,
6440 IRConst_U64(guest_PC_curr_instr + simm64),
6441 OFFB_PC) );
6442 putPC(mkU64(guest_PC_curr_instr + 4));
6443 dres->whatNext = Dis_StopHere;
6444 dres->jk_StopHere = Ijk_Boring;
6445 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
6446 return True;
6447 }
6448
6449 /* -------------------- B{L} uncond -------------------- */
6450 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
6451 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
6452 100101 imm26 B (PC + sxTo64(imm26 << 2))
6453 */
6454 UInt bLink = INSN(31,31);
6455 ULong uimm64 = INSN(25,0) << 2;
6456 Long simm64 = (Long)sx_to_64(uimm64, 28);
6457 if (bLink) {
6458 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6459 }
6460 putPC(mkU64(guest_PC_curr_instr + simm64));
6461 dres->whatNext = Dis_StopHere;
6462 dres->jk_StopHere = Ijk_Call;
6463 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
6464 guest_PC_curr_instr + simm64);
6465 return True;
6466 }
6467
6468 /* --------------------- B{L} reg --------------------- */
6469 /* 31 24 22 20 15 9 4
6470 1101011 00 10 11111 000000 nn 00000 RET Rn
6471 1101011 00 01 11111 000000 nn 00000 CALL Rn
6472 1101011 00 00 11111 000000 nn 00000 JMP Rn
6473 */
6474 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
6475 && INSN(20,16) == BITS5(1,1,1,1,1)
6476 && INSN(15,10) == BITS6(0,0,0,0,0,0)
6477 && INSN(4,0) == BITS5(0,0,0,0,0)) {
6478 UInt branch_type = INSN(22,21);
6479 UInt nn = INSN(9,5);
6480 if (branch_type == BITS2(1,0) /* RET */) {
6481 putPC(getIReg64orZR(nn));
6482 dres->whatNext = Dis_StopHere;
6483 dres->jk_StopHere = Ijk_Ret;
6484 DIP("ret %s\n", nameIReg64orZR(nn));
6485 return True;
6486 }
6487 if (branch_type == BITS2(0,1) /* CALL */) {
sewardj702054e2014-05-07 11:09:28 +00006488 IRTemp dst = newTemp(Ity_I64);
6489 assign(dst, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00006490 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
sewardj702054e2014-05-07 11:09:28 +00006491 putPC(mkexpr(dst));
sewardjbbcf1882014-01-12 12:49:10 +00006492 dres->whatNext = Dis_StopHere;
6493 dres->jk_StopHere = Ijk_Call;
6494 DIP("blr %s\n", nameIReg64orZR(nn));
6495 return True;
6496 }
6497 if (branch_type == BITS2(0,0) /* JMP */) {
6498 putPC(getIReg64orZR(nn));
6499 dres->whatNext = Dis_StopHere;
6500 dres->jk_StopHere = Ijk_Boring;
6501 DIP("jmp %s\n", nameIReg64orZR(nn));
6502 return True;
6503 }
6504 }
6505
6506 /* -------------------- CB{N}Z -------------------- */
6507 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6508 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6509 */
6510 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
6511 Bool is64 = INSN(31,31) == 1;
6512 Bool bIfZ = INSN(24,24) == 0;
6513 ULong uimm64 = INSN(23,5) << 2;
6514 UInt rT = INSN(4,0);
6515 Long simm64 = (Long)sx_to_64(uimm64, 21);
6516 IRExpr* cond = NULL;
6517 if (is64) {
6518 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6519 getIReg64orZR(rT), mkU64(0));
6520 } else {
6521 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
6522 getIReg32orZR(rT), mkU32(0));
6523 }
6524 stmt( IRStmt_Exit(cond,
6525 Ijk_Boring,
6526 IRConst_U64(guest_PC_curr_instr + simm64),
6527 OFFB_PC) );
6528 putPC(mkU64(guest_PC_curr_instr + 4));
6529 dres->whatNext = Dis_StopHere;
6530 dres->jk_StopHere = Ijk_Boring;
6531 DIP("cb%sz %s, 0x%llx\n",
6532 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
6533 guest_PC_curr_instr + simm64);
6534 return True;
6535 }
6536
6537 /* -------------------- TB{N}Z -------------------- */
6538 /* 31 30 24 23 18 5 4
6539 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6540 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6541 */
6542 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
6543 UInt b5 = INSN(31,31);
6544 Bool bIfZ = INSN(24,24) == 0;
6545 UInt b40 = INSN(23,19);
6546 UInt imm14 = INSN(18,5);
6547 UInt tt = INSN(4,0);
6548 UInt bitNo = (b5 << 5) | b40;
6549 ULong uimm64 = imm14 << 2;
6550 Long simm64 = sx_to_64(uimm64, 16);
6551 IRExpr* cond
6552 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6553 binop(Iop_And64,
6554 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
6555 mkU64(1)),
6556 mkU64(0));
6557 stmt( IRStmt_Exit(cond,
6558 Ijk_Boring,
6559 IRConst_U64(guest_PC_curr_instr + simm64),
6560 OFFB_PC) );
6561 putPC(mkU64(guest_PC_curr_instr + 4));
6562 dres->whatNext = Dis_StopHere;
6563 dres->jk_StopHere = Ijk_Boring;
6564 DIP("tb%sz %s, #%u, 0x%llx\n",
6565 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
6566 guest_PC_curr_instr + simm64);
6567 return True;
6568 }
6569
6570 /* -------------------- SVC -------------------- */
6571 /* 11010100 000 imm16 000 01
6572 Don't bother with anything except the imm16==0 case.
6573 */
6574 if (INSN(31,0) == 0xD4000001) {
6575 putPC(mkU64(guest_PC_curr_instr + 4));
6576 dres->whatNext = Dis_StopHere;
6577 dres->jk_StopHere = Ijk_Sys_syscall;
6578 DIP("svc #0\n");
6579 return True;
6580 }
6581
6582 /* ------------------ M{SR,RS} ------------------ */
sewardj6eb5ef82014-07-14 20:39:23 +00006583 /* ---- Cases for TPIDR_EL0 ----
sewardjbbcf1882014-01-12 12:49:10 +00006584 0xD51BD0 010 Rt MSR tpidr_el0, rT
6585 0xD53BD0 010 Rt MRS rT, tpidr_el0
6586 */
6587 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
6588 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
6589 Bool toSys = INSN(21,21) == 0;
6590 UInt tt = INSN(4,0);
6591 if (toSys) {
6592 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
6593 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
6594 } else {
6595 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
6596 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
6597 }
6598 return True;
6599 }
sewardj6eb5ef82014-07-14 20:39:23 +00006600 /* ---- Cases for FPCR ----
sewardjbbcf1882014-01-12 12:49:10 +00006601 0xD51B44 000 Rt MSR fpcr, rT
6602 0xD53B44 000 Rt MSR rT, fpcr
6603 */
6604 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
6605 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
6606 Bool toSys = INSN(21,21) == 0;
6607 UInt tt = INSN(4,0);
6608 if (toSys) {
6609 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
6610 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
6611 } else {
6612 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
6613 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
6614 }
6615 return True;
6616 }
sewardj6eb5ef82014-07-14 20:39:23 +00006617 /* ---- Cases for FPSR ----
sewardj7d009132014-02-20 17:43:38 +00006618 0xD51B44 001 Rt MSR fpsr, rT
6619 0xD53B44 001 Rt MSR rT, fpsr
sewardja0645d52014-06-28 22:11:16 +00006620 The only part of this we model is FPSR.QC. All other bits
6621 are ignored when writing to it and RAZ when reading from it.
sewardjbbcf1882014-01-12 12:49:10 +00006622 */
6623 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
6624 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
6625 Bool toSys = INSN(21,21) == 0;
6626 UInt tt = INSN(4,0);
6627 if (toSys) {
sewardja0645d52014-06-28 22:11:16 +00006628 /* Just deal with FPSR.QC. Make up a V128 value which is
6629 zero if Xt[27] is zero and any other value if Xt[27] is
6630 nonzero. */
6631 IRTemp qc64 = newTemp(Ity_I64);
6632 assign(qc64, binop(Iop_And64,
6633 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
6634 mkU64(1)));
6635 IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
6636 stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
sewardjbbcf1882014-01-12 12:49:10 +00006637 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
6638 } else {
sewardja0645d52014-06-28 22:11:16 +00006639 /* Generate a value which is all zeroes except for bit 27,
6640 which must be zero if QCFLAG is all zeroes and one otherwise. */
sewardj8e91fd42014-07-11 12:05:47 +00006641 IRTemp qcV128 = newTempV128();
sewardja0645d52014-06-28 22:11:16 +00006642 assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
6643 IRTemp qc64 = newTemp(Ity_I64);
6644 assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
6645 unop(Iop_V128to64, mkexpr(qcV128))));
6646 IRExpr* res = binop(Iop_Shl64,
6647 unop(Iop_1Uto64,
6648 binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
6649 mkU8(27));
6650 putIReg64orZR(tt, res);
sewardjbbcf1882014-01-12 12:49:10 +00006651 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
6652 }
6653 return True;
6654 }
sewardj6eb5ef82014-07-14 20:39:23 +00006655 /* ---- Cases for NZCV ----
sewardjbbcf1882014-01-12 12:49:10 +00006656 D51B42 000 Rt MSR nzcv, rT
6657 D53B42 000 Rt MRS rT, nzcv
sewardja0645d52014-06-28 22:11:16 +00006658 The only parts of NZCV that actually exist are bits 31:28, which
6659 are the N Z C and V bits themselves. Hence the flags thunk provides
6660 all the state we need.
sewardjbbcf1882014-01-12 12:49:10 +00006661 */
6662 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
6663 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
6664 Bool toSys = INSN(21,21) == 0;
6665 UInt tt = INSN(4,0);
6666 if (toSys) {
6667 IRTemp t = newTemp(Ity_I64);
6668 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
6669 setFlags_COPY(t);
6670 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
6671 } else {
6672 IRTemp res = newTemp(Ity_I64);
6673 assign(res, mk_arm64g_calculate_flags_nzcv());
6674 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
6675 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
6676 }
6677 return True;
6678 }
sewardj6eb5ef82014-07-14 20:39:23 +00006679 /* ---- Cases for DCZID_EL0 ----
sewardjd512d102014-02-21 14:49:44 +00006680 Don't support arbitrary reads and writes to this register. Just
6681 return the value 16, which indicates that the DC ZVA instruction
6682 is not permitted, so we don't have to emulate it.
6683 D5 3B 00 111 Rt MRS rT, dczid_el0
6684 */
6685 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
6686 UInt tt = INSN(4,0);
6687 putIReg64orZR(tt, mkU64(1<<4));
6688 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
6689 return True;
6690 }
sewardj6eb5ef82014-07-14 20:39:23 +00006691 /* ---- Cases for CTR_EL0 ----
sewardj65902992014-05-03 21:20:56 +00006692 We just handle reads, and make up a value from the D and I line
6693 sizes in the VexArchInfo we are given, and patch in the following
6694 fields that the Foundation model gives ("natively"):
6695 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
6696 D5 3B 00 001 Rt MRS rT, dczid_el0
6697 */
6698 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
6699 UInt tt = INSN(4,0);
6700 /* Need to generate a value from dMinLine_lg2_szB and
6701 dMinLine_lg2_szB. The value in the register is in 32-bit
6702 units, so need to subtract 2 from the values in the
6703 VexArchInfo. We can assume that the values here are valid --
6704 disInstr_ARM64 checks them -- so there's no need to deal with
6705 out-of-range cases. */
6706 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
6707 && archinfo->arm64_dMinLine_lg2_szB <= 17
6708 && archinfo->arm64_iMinLine_lg2_szB >= 2
6709 && archinfo->arm64_iMinLine_lg2_szB <= 17);
6710 UInt val
6711 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
6712 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
6713 putIReg64orZR(tt, mkU64(val));
6714 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
6715 return True;
6716 }
sewardj6eb5ef82014-07-14 20:39:23 +00006717 /* ---- Cases for CNTVCT_EL0 ----
6718 This is a timestamp counter of some sort. Support reads of it only
6719 by passing through to the host.
6720 D5 3B E0 010 Rt MRS Xt, cntvct_el0
6721 */
6722 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
6723 UInt tt = INSN(4,0);
6724 IRTemp val = newTemp(Ity_I64);
6725 IRExpr** args = mkIRExprVec_0();
6726 IRDirty* d = unsafeIRDirty_1_N (
6727 val,
6728 0/*regparms*/,
6729 "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
6730 &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
6731 args
6732 );
6733 /* execute the dirty call, dumping the result in val. */
6734 stmt( IRStmt_Dirty(d) );
6735 putIReg64orZR(tt, mkexpr(val));
6736 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
6737 return True;
6738 }
sewardjbbcf1882014-01-12 12:49:10 +00006739
sewardj65902992014-05-03 21:20:56 +00006740 /* ------------------ IC_IVAU ------------------ */
6741 /* D5 0B 75 001 Rt ic ivau, rT
6742 */
6743 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
6744 /* We will always be provided with a valid iMinLine value. */
6745 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
6746 && archinfo->arm64_iMinLine_lg2_szB <= 17);
6747 /* Round the requested address, in rT, down to the start of the
6748 containing block. */
6749 UInt tt = INSN(4,0);
6750 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
6751 IRTemp addr = newTemp(Ity_I64);
6752 assign( addr, binop( Iop_And64,
6753 getIReg64orZR(tt),
6754 mkU64(~(lineszB - 1))) );
6755 /* Set the invalidation range, request exit-and-invalidate, with
6756 continuation at the next instruction. */
sewardj05f5e012014-05-04 10:52:11 +00006757 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
6758 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj65902992014-05-03 21:20:56 +00006759 /* be paranoid ... */
6760 stmt( IRStmt_MBE(Imbe_Fence) );
6761 putPC(mkU64( guest_PC_curr_instr + 4 ));
6762 dres->whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +00006763 dres->jk_StopHere = Ijk_InvalICache;
sewardj65902992014-05-03 21:20:56 +00006764 DIP("ic ivau, %s\n", nameIReg64orZR(tt));
6765 return True;
6766 }
6767
6768 /* ------------------ DC_CVAU ------------------ */
6769 /* D5 0B 7B 001 Rt dc cvau, rT
6770 */
6771 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
6772 /* Exactly the same scheme as for IC IVAU, except we observe the
sewardj05f5e012014-05-04 10:52:11 +00006773 dMinLine size, and request an Ijk_FlushDCache instead of
6774 Ijk_InvalICache. */
sewardj65902992014-05-03 21:20:56 +00006775 /* We will always be provided with a valid dMinLine value. */
6776 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
6777 && archinfo->arm64_dMinLine_lg2_szB <= 17);
6778 /* Round the requested address, in rT, down to the start of the
6779 containing block. */
6780 UInt tt = INSN(4,0);
6781 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
6782 IRTemp addr = newTemp(Ity_I64);
6783 assign( addr, binop( Iop_And64,
6784 getIReg64orZR(tt),
6785 mkU64(~(lineszB - 1))) );
6786 /* Set the flush range, request exit-and-flush, with
6787 continuation at the next instruction. */
sewardj05f5e012014-05-04 10:52:11 +00006788 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
6789 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj65902992014-05-03 21:20:56 +00006790 /* be paranoid ... */
6791 stmt( IRStmt_MBE(Imbe_Fence) );
6792 putPC(mkU64( guest_PC_curr_instr + 4 ));
6793 dres->whatNext = Dis_StopHere;
6794 dres->jk_StopHere = Ijk_FlushDCache;
6795 DIP("dc cvau, %s\n", nameIReg64orZR(tt));
6796 return True;
6797 }
6798
6799 /* ------------------ ISB, DMB, DSB ------------------ */
sewardj25842552014-10-31 10:25:19 +00006800 /* 31 21 11 7 6 4
6801 11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt
6802 11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt
6803 11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt
6804 */
6805 if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
6806 && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
6807 && INSN(7,7) == 1
6808 && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
6809 UInt opc = INSN(6,5);
6810 UInt CRm = INSN(11,8);
6811 vassert(opc <= 2 && CRm <= 15);
sewardjd512d102014-02-21 14:49:44 +00006812 stmt(IRStmt_MBE(Imbe_Fence));
sewardj25842552014-10-31 10:25:19 +00006813 const HChar* opNames[3]
6814 = { "dsb", "dmb", "isb" };
6815 const HChar* howNames[16]
6816 = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
6817 "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
6818 DIP("%s %s\n", opNames[opc], howNames[CRm]);
sewardj65902992014-05-03 21:20:56 +00006819 return True;
6820 }
sewardjbbcf1882014-01-12 12:49:10 +00006821
sewardjdc9259c2014-02-27 11:10:19 +00006822 /* -------------------- NOP -------------------- */
6823 if (INSN(31,0) == 0xD503201F) {
6824 DIP("nop\n");
6825 return True;
6826 }
6827
sewardj39b51682014-11-25 12:17:53 +00006828 /* -------------------- BRK -------------------- */
6829 /* 31 23 20 4
6830 1101 0100 001 imm16 00000 BRK #imm16
6831 */
6832 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
6833 && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
6834 UInt imm16 = INSN(20,5);
6835 /* Request SIGTRAP and then restart of this insn. */
6836 putPC(mkU64(guest_PC_curr_instr + 0));
6837 dres->whatNext = Dis_StopHere;
6838 dres->jk_StopHere = Ijk_SigTRAP;
6839 DIP("brk #%u\n", imm16);
6840 return True;
6841 }
6842
sewardjbbcf1882014-01-12 12:49:10 +00006843 //fail:
6844 vex_printf("ARM64 front end: branch_etc\n");
6845 return False;
6846# undef INSN
6847}
6848
6849
6850/*------------------------------------------------------------*/
sewardj8e91fd42014-07-11 12:05:47 +00006851/*--- SIMD and FP instructions: helper functions ---*/
sewardjbbcf1882014-01-12 12:49:10 +00006852/*------------------------------------------------------------*/
6853
sewardjd96daf62014-06-15 08:17:35 +00006854/* Some constructors for interleave/deinterleave expressions. */
sewardje520bb32014-02-17 11:00:53 +00006855
sewardjd96daf62014-06-15 08:17:35 +00006856static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
6857 // returns a0 b0
6858 return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
6859}
sewardje520bb32014-02-17 11:00:53 +00006860
sewardjd96daf62014-06-15 08:17:35 +00006861static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
6862 // returns a1 b1
6863 return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
6864}
sewardje520bb32014-02-17 11:00:53 +00006865
sewardjd96daf62014-06-15 08:17:35 +00006866static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
6867 // returns a2 a0 b2 b0
6868 return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
6869}
6870
6871static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
6872 // returns a3 a1 b3 b1
6873 return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
6874}
6875
6876static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
6877 // returns a1 b1 a0 b0
6878 return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
6879}
6880
6881static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
6882 // returns a3 b3 a2 b2
6883 return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
6884}
6885
6886static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6887 // returns a6 a4 a2 a0 b6 b4 b2 b0
6888 return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
6889}
6890
6891static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6892 // returns a7 a5 a3 a1 b7 b5 b3 b1
6893 return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
6894}
6895
6896static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6897 // returns a3 b3 a2 b2 a1 b1 a0 b0
6898 return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
6899}
6900
6901static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6902 // returns a7 b7 a6 b6 a5 b5 a4 b4
6903 return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
6904}
6905
6906static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
6907 IRTemp bFEDCBA9876543210 ) {
6908 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
6909 return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
6910 mkexpr(bFEDCBA9876543210));
6911}
6912
6913static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
6914 IRTemp bFEDCBA9876543210 ) {
6915 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
6916 return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
6917 mkexpr(bFEDCBA9876543210));
6918}
6919
6920static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
6921 IRTemp bFEDCBA9876543210 ) {
6922 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
6923 return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
6924 mkexpr(bFEDCBA9876543210));
6925}
6926
6927static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
6928 IRTemp bFEDCBA9876543210 ) {
6929 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
6930 return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
6931 mkexpr(bFEDCBA9876543210));
6932}
sewardjecde6972014-02-05 11:01:19 +00006933
sewardjbbcf1882014-01-12 12:49:10 +00006934/* Generate N copies of |bit| in the bottom of a ULong. */
6935static ULong Replicate ( ULong bit, Int N )
6936{
sewardj606c4ba2014-01-26 19:11:14 +00006937 vassert(bit <= 1 && N >= 1 && N < 64);
6938 if (bit == 0) {
6939 return 0;
6940 } else {
6941 /* Careful. This won't work for N == 64. */
6942 return (1ULL << N) - 1;
6943 }
sewardjbbcf1882014-01-12 12:49:10 +00006944}
6945
sewardjfab09142014-02-10 10:28:13 +00006946static ULong Replicate32x2 ( ULong bits32 )
6947{
6948 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
6949 return (bits32 << 32) | bits32;
6950}
6951
6952static ULong Replicate16x4 ( ULong bits16 )
6953{
6954 vassert(0 == (bits16 & ~0xFFFFULL));
6955 return Replicate32x2((bits16 << 16) | bits16);
6956}
6957
6958static ULong Replicate8x8 ( ULong bits8 )
6959{
6960 vassert(0 == (bits8 & ~0xFFULL));
6961 return Replicate16x4((bits8 << 8) | bits8);
6962}
6963
6964/* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
6965 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
6966 is 64. In the former case, the upper 32 bits of the returned value
6967 are guaranteed to be zero. */
sewardjbbcf1882014-01-12 12:49:10 +00006968static ULong VFPExpandImm ( ULong imm8, Int N )
6969{
sewardj606c4ba2014-01-26 19:11:14 +00006970 vassert(imm8 <= 0xFF);
6971 vassert(N == 32 || N == 64);
6972 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
6973 Int F = N - E - 1;
6974 ULong imm8_6 = (imm8 >> 6) & 1;
6975 /* sign: 1 bit */
6976 /* exp: E bits */
6977 /* frac: F bits */
6978 ULong sign = (imm8 >> 7) & 1;
6979 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
6980 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
6981 vassert(sign < (1ULL << 1));
6982 vassert(exp < (1ULL << E));
6983 vassert(frac < (1ULL << F));
6984 vassert(1 + E + F == N);
6985 ULong res = (sign << (E+F)) | (exp << F) | frac;
6986 return res;
sewardjbbcf1882014-01-12 12:49:10 +00006987}
6988
sewardjfab09142014-02-10 10:28:13 +00006989/* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
6990 This might fail, as indicated by the returned Bool. Page 2530 of
6991 the manual. */
6992static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
6993 UInt op, UInt cmode, UInt imm8 )
6994{
6995 vassert(op <= 1);
6996 vassert(cmode <= 15);
6997 vassert(imm8 <= 255);
6998
6999 *res = 0; /* will overwrite iff returning True */
7000
7001 ULong imm64 = 0;
7002 Bool testimm8 = False;
7003
7004 switch (cmode >> 1) {
7005 case 0:
7006 testimm8 = False; imm64 = Replicate32x2(imm8); break;
7007 case 1:
7008 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
7009 case 2:
7010 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
7011 case 3:
7012 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
7013 case 4:
7014 testimm8 = False; imm64 = Replicate16x4(imm8); break;
7015 case 5:
7016 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
7017 case 6:
7018 testimm8 = True;
7019 if ((cmode & 1) == 0)
7020 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
7021 else
7022 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
7023 break;
7024 case 7:
7025 testimm8 = False;
7026 if ((cmode & 1) == 0 && op == 0)
7027 imm64 = Replicate8x8(imm8);
7028 if ((cmode & 1) == 0 && op == 1) {
7029 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
7030 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
7031 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
7032 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
7033 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
7034 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
7035 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
7036 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
7037 }
7038 if ((cmode & 1) == 1 && op == 0) {
7039 ULong imm8_7 = (imm8 >> 7) & 1;
7040 ULong imm8_6 = (imm8 >> 6) & 1;
7041 ULong imm8_50 = imm8 & 63;
7042 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
7043 | ((imm8_6 ^ 1) << (5 + 6 + 19))
7044 | (Replicate(imm8_6, 5) << (6 + 19))
7045 | (imm8_50 << 19);
7046 imm64 = Replicate32x2(imm32);
7047 }
7048 if ((cmode & 1) == 1 && op == 1) {
7049 // imm64 = imm8<7>:NOT(imm8<6>)
7050 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
7051 ULong imm8_7 = (imm8 >> 7) & 1;
7052 ULong imm8_6 = (imm8 >> 6) & 1;
7053 ULong imm8_50 = imm8 & 63;
7054 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
7055 | (Replicate(imm8_6, 8) << 54)
7056 | (imm8_50 << 48);
7057 }
7058 break;
7059 default:
7060 vassert(0);
7061 }
7062
7063 if (testimm8 && imm8 == 0)
7064 return False;
7065
7066 *res = imm64;
7067 return True;
7068}
7069
sewardj606c4ba2014-01-26 19:11:14 +00007070/* Help a bit for decoding laneage for vector operations that can be
7071 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
7072 and SZ bits, typically for vector floating point. */
7073static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
7074 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
7075 /*OUT*/const HChar** arrSpec,
7076 Bool bitQ, Bool bitSZ )
7077{
7078 vassert(bitQ == True || bitQ == False);
7079 vassert(bitSZ == True || bitSZ == False);
7080 if (bitQ && bitSZ) { // 2x64
7081 if (tyI) *tyI = Ity_I64;
7082 if (tyF) *tyF = Ity_F64;
7083 if (nLanes) *nLanes = 2;
7084 if (zeroUpper) *zeroUpper = False;
7085 if (arrSpec) *arrSpec = "2d";
7086 return True;
7087 }
7088 if (bitQ && !bitSZ) { // 4x32
7089 if (tyI) *tyI = Ity_I32;
7090 if (tyF) *tyF = Ity_F32;
7091 if (nLanes) *nLanes = 4;
7092 if (zeroUpper) *zeroUpper = False;
7093 if (arrSpec) *arrSpec = "4s";
7094 return True;
7095 }
7096 if (!bitQ && !bitSZ) { // 2x32
7097 if (tyI) *tyI = Ity_I32;
7098 if (tyF) *tyF = Ity_F32;
7099 if (nLanes) *nLanes = 2;
7100 if (zeroUpper) *zeroUpper = True;
7101 if (arrSpec) *arrSpec = "2s";
7102 return True;
7103 }
7104 // Else impliedly 1x64, which isn't allowed.
7105 return False;
7106}
7107
sewardje520bb32014-02-17 11:00:53 +00007108/* Helper for decoding laneage for shift-style vector operations
7109 that involve an immediate shift amount. */
7110static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
7111 UInt immh, UInt immb )
7112{
7113 vassert(immh < (1<<4));
7114 vassert(immb < (1<<3));
7115 UInt immhb = (immh << 3) | immb;
7116 if (immh & 8) {
7117 if (shift) *shift = 128 - immhb;
7118 if (szBlg2) *szBlg2 = 3;
7119 return True;
7120 }
7121 if (immh & 4) {
7122 if (shift) *shift = 64 - immhb;
7123 if (szBlg2) *szBlg2 = 2;
7124 return True;
7125 }
7126 if (immh & 2) {
7127 if (shift) *shift = 32 - immhb;
7128 if (szBlg2) *szBlg2 = 1;
7129 return True;
7130 }
7131 if (immh & 1) {
7132 if (shift) *shift = 16 - immhb;
7133 if (szBlg2) *szBlg2 = 0;
7134 return True;
7135 }
7136 return False;
7137}
7138
sewardjecde6972014-02-05 11:01:19 +00007139/* Generate IR to fold all lanes of the V128 value in 'src' as
7140 characterised by the operator 'op', and return the result in the
7141 bottom bits of a V128, with all other bits set to zero. */
sewardjdf9d6d52014-06-27 10:43:22 +00007142static IRTemp math_FOLDV ( IRTemp src, IROp op )
sewardjecde6972014-02-05 11:01:19 +00007143{
7144 /* The basic idea is to use repeated applications of Iop_CatEven*
7145 and Iop_CatOdd* operators to 'src' so as to clone each lane into
7146 a complete vector. Then fold all those vectors with 'op' and
7147 zero out all but the least significant lane. */
7148 switch (op) {
7149 case Iop_Min8Sx16: case Iop_Min8Ux16:
sewardjb9aff1e2014-06-15 21:55:33 +00007150 case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
sewardjfab09142014-02-10 10:28:13 +00007151 /* NB: temp naming here is misleading -- the naming is for 8
7152 lanes of 16 bit, whereas what is being operated on is 16
7153 lanes of 8 bits. */
7154 IRTemp x76543210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00007155 IRTemp x76547654 = newTempV128();
7156 IRTemp x32103210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007157 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7158 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
sewardj8e91fd42014-07-11 12:05:47 +00007159 IRTemp x76767676 = newTempV128();
7160 IRTemp x54545454 = newTempV128();
7161 IRTemp x32323232 = newTempV128();
7162 IRTemp x10101010 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007163 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7164 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7165 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7166 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
sewardj8e91fd42014-07-11 12:05:47 +00007167 IRTemp x77777777 = newTempV128();
7168 IRTemp x66666666 = newTempV128();
7169 IRTemp x55555555 = newTempV128();
7170 IRTemp x44444444 = newTempV128();
7171 IRTemp x33333333 = newTempV128();
7172 IRTemp x22222222 = newTempV128();
7173 IRTemp x11111111 = newTempV128();
7174 IRTemp x00000000 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007175 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7176 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7177 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7178 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7179 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7180 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7181 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7182 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7183 /* Naming not misleading after here. */
sewardj8e91fd42014-07-11 12:05:47 +00007184 IRTemp xAllF = newTempV128();
7185 IRTemp xAllE = newTempV128();
7186 IRTemp xAllD = newTempV128();
7187 IRTemp xAllC = newTempV128();
7188 IRTemp xAllB = newTempV128();
7189 IRTemp xAllA = newTempV128();
7190 IRTemp xAll9 = newTempV128();
7191 IRTemp xAll8 = newTempV128();
7192 IRTemp xAll7 = newTempV128();
7193 IRTemp xAll6 = newTempV128();
7194 IRTemp xAll5 = newTempV128();
7195 IRTemp xAll4 = newTempV128();
7196 IRTemp xAll3 = newTempV128();
7197 IRTemp xAll2 = newTempV128();
7198 IRTemp xAll1 = newTempV128();
7199 IRTemp xAll0 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007200 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
7201 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
7202 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
7203 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
7204 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
7205 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
7206 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
7207 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
7208 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
7209 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
7210 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
7211 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
7212 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
7213 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
7214 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
7215 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
sewardj8e91fd42014-07-11 12:05:47 +00007216 IRTemp maxFE = newTempV128();
7217 IRTemp maxDC = newTempV128();
7218 IRTemp maxBA = newTempV128();
7219 IRTemp max98 = newTempV128();
7220 IRTemp max76 = newTempV128();
7221 IRTemp max54 = newTempV128();
7222 IRTemp max32 = newTempV128();
7223 IRTemp max10 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007224 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
7225 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
7226 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
7227 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
7228 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
7229 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
7230 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
7231 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
sewardj8e91fd42014-07-11 12:05:47 +00007232 IRTemp maxFEDC = newTempV128();
7233 IRTemp maxBA98 = newTempV128();
7234 IRTemp max7654 = newTempV128();
7235 IRTemp max3210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007236 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
7237 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
7238 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7239 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00007240 IRTemp maxFEDCBA98 = newTempV128();
7241 IRTemp max76543210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007242 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
7243 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
sewardj8e91fd42014-07-11 12:05:47 +00007244 IRTemp maxAllLanes = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007245 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
7246 mkexpr(max76543210)));
sewardj8e91fd42014-07-11 12:05:47 +00007247 IRTemp res = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007248 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
7249 return res;
sewardjecde6972014-02-05 11:01:19 +00007250 }
7251 case Iop_Min16Sx8: case Iop_Min16Ux8:
sewardjb9aff1e2014-06-15 21:55:33 +00007252 case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
sewardjecde6972014-02-05 11:01:19 +00007253 IRTemp x76543210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00007254 IRTemp x76547654 = newTempV128();
7255 IRTemp x32103210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007256 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7257 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
sewardj8e91fd42014-07-11 12:05:47 +00007258 IRTemp x76767676 = newTempV128();
7259 IRTemp x54545454 = newTempV128();
7260 IRTemp x32323232 = newTempV128();
7261 IRTemp x10101010 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007262 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7263 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7264 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7265 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
sewardj8e91fd42014-07-11 12:05:47 +00007266 IRTemp x77777777 = newTempV128();
7267 IRTemp x66666666 = newTempV128();
7268 IRTemp x55555555 = newTempV128();
7269 IRTemp x44444444 = newTempV128();
7270 IRTemp x33333333 = newTempV128();
7271 IRTemp x22222222 = newTempV128();
7272 IRTemp x11111111 = newTempV128();
7273 IRTemp x00000000 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007274 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7275 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7276 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7277 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7278 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7279 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7280 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7281 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
sewardj8e91fd42014-07-11 12:05:47 +00007282 IRTemp max76 = newTempV128();
7283 IRTemp max54 = newTempV128();
7284 IRTemp max32 = newTempV128();
7285 IRTemp max10 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007286 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
7287 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
7288 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
7289 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
sewardj8e91fd42014-07-11 12:05:47 +00007290 IRTemp max7654 = newTempV128();
7291 IRTemp max3210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007292 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7293 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00007294 IRTemp max76543210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007295 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
sewardj8e91fd42014-07-11 12:05:47 +00007296 IRTemp res = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007297 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
7298 return res;
7299 }
sewardj5cb53e72015-02-08 12:08:56 +00007300 case Iop_Max32Fx4: case Iop_Min32Fx4:
sewardjecde6972014-02-05 11:01:19 +00007301 case Iop_Min32Sx4: case Iop_Min32Ux4:
sewardjb9aff1e2014-06-15 21:55:33 +00007302 case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
sewardjecde6972014-02-05 11:01:19 +00007303 IRTemp x3210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00007304 IRTemp x3232 = newTempV128();
7305 IRTemp x1010 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007306 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
7307 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
sewardj8e91fd42014-07-11 12:05:47 +00007308 IRTemp x3333 = newTempV128();
7309 IRTemp x2222 = newTempV128();
7310 IRTemp x1111 = newTempV128();
7311 IRTemp x0000 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007312 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
7313 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
7314 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
7315 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
sewardj8e91fd42014-07-11 12:05:47 +00007316 IRTemp max32 = newTempV128();
7317 IRTemp max10 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007318 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
7319 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
sewardj8e91fd42014-07-11 12:05:47 +00007320 IRTemp max3210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007321 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00007322 IRTemp res = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007323 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
7324 return res;
7325 }
sewardja5a6b752014-06-30 07:33:56 +00007326 case Iop_Add64x2: {
7327 IRTemp x10 = src;
sewardj8e91fd42014-07-11 12:05:47 +00007328 IRTemp x00 = newTempV128();
7329 IRTemp x11 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00007330 assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
7331 assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
sewardj8e91fd42014-07-11 12:05:47 +00007332 IRTemp max10 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00007333 assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
sewardj8e91fd42014-07-11 12:05:47 +00007334 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00007335 assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
7336 return res;
7337 }
sewardjecde6972014-02-05 11:01:19 +00007338 default:
7339 vassert(0);
7340 }
7341}
7342
7343
sewardj92d0ae32014-04-03 13:48:54 +00007344/* Generate IR for TBL and TBX. This deals with the 128 bit case
7345 only. */
7346static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
7347 IRTemp oor_values )
7348{
7349 vassert(len >= 0 && len <= 3);
7350
7351 /* Generate some useful constants as concisely as possible. */
7352 IRTemp half15 = newTemp(Ity_I64);
7353 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
7354 IRTemp half16 = newTemp(Ity_I64);
7355 assign(half16, mkU64(0x1010101010101010ULL));
7356
7357 /* A zero vector */
sewardj8e91fd42014-07-11 12:05:47 +00007358 IRTemp allZero = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007359 assign(allZero, mkV128(0x0000));
7360 /* A vector containing 15 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00007361 IRTemp all15 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007362 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
7363 /* A vector containing 16 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00007364 IRTemp all16 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007365 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
7366 /* A vector containing 32 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00007367 IRTemp all32 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007368 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
7369 /* A vector containing 48 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00007370 IRTemp all48 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007371 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
7372 /* A vector containing 64 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00007373 IRTemp all64 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007374 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
7375
7376 /* Group the 16/32/48/64 vectors so as to be indexable. */
7377 IRTemp allXX[4] = { all16, all32, all48, all64 };
7378
7379 /* Compute the result for each table vector, with zeroes in places
7380 where the index values are out of range, and OR them into the
7381 running vector. */
sewardj8e91fd42014-07-11 12:05:47 +00007382 IRTemp running_result = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007383 assign(running_result, mkV128(0));
7384
7385 UInt tabent;
7386 for (tabent = 0; tabent <= len; tabent++) {
7387 vassert(tabent >= 0 && tabent < 4);
sewardj8e91fd42014-07-11 12:05:47 +00007388 IRTemp bias = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007389 assign(bias,
7390 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
sewardj8e91fd42014-07-11 12:05:47 +00007391 IRTemp biased_indices = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007392 assign(biased_indices,
7393 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
sewardj8e91fd42014-07-11 12:05:47 +00007394 IRTemp valid_mask = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007395 assign(valid_mask,
7396 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
sewardj8e91fd42014-07-11 12:05:47 +00007397 IRTemp safe_biased_indices = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007398 assign(safe_biased_indices,
7399 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
sewardj8e91fd42014-07-11 12:05:47 +00007400 IRTemp results_or_junk = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007401 assign(results_or_junk,
7402 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
7403 mkexpr(safe_biased_indices)));
sewardj8e91fd42014-07-11 12:05:47 +00007404 IRTemp results_or_zero = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007405 assign(results_or_zero,
7406 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
7407 /* And OR that into the running result. */
sewardj8e91fd42014-07-11 12:05:47 +00007408 IRTemp tmp = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007409 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
7410 mkexpr(running_result)));
7411 running_result = tmp;
7412 }
7413
7414 /* So now running_result holds the overall result where the indices
7415 are in range, and zero in out-of-range lanes. Now we need to
7416 compute an overall validity mask and use this to copy in the
7417 lanes in the oor_values for out of range indices. This is
7418 unnecessary for TBL but will get folded out by iropt, so we lean
7419 on that and generate the same code for TBL and TBX here. */
sewardj8e91fd42014-07-11 12:05:47 +00007420 IRTemp overall_valid_mask = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007421 assign(overall_valid_mask,
7422 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
sewardj8e91fd42014-07-11 12:05:47 +00007423 IRTemp result = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007424 assign(result,
7425 binop(Iop_OrV128,
7426 mkexpr(running_result),
7427 binop(Iop_AndV128,
7428 mkexpr(oor_values),
7429 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
7430 return result;
7431}
7432
7433
sewardj31b5a952014-06-26 07:41:14 +00007434/* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
7435 an op which takes two I64s and produces a V128. That is, a widening
7436 operator. Generate IR which applies |opI64x2toV128| to either the
7437 lower (if |is2| is False) or upper (if |is2| is True) halves of
7438 |argL| and |argR|, and return the value in a new IRTemp.
7439*/
7440static
7441IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
7442 IRExpr* argL, IRExpr* argR )
7443{
sewardj8e91fd42014-07-11 12:05:47 +00007444 IRTemp res = newTempV128();
sewardj31b5a952014-06-26 07:41:14 +00007445 IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
7446 assign(res, binop(opI64x2toV128, unop(slice, argL),
7447 unop(slice, argR)));
7448 return res;
7449}
7450
7451
sewardjdf9d6d52014-06-27 10:43:22 +00007452/* Generate signed/unsigned absolute difference vector IR. */
7453static
7454IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
7455{
sewardj6f312d02014-06-28 12:21:37 +00007456 vassert(size <= 3);
sewardj8e91fd42014-07-11 12:05:47 +00007457 IRTemp argL = newTempV128();
7458 IRTemp argR = newTempV128();
7459 IRTemp msk = newTempV128();
7460 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +00007461 assign(argL, argLE);
7462 assign(argR, argRE);
sewardj8e91fd42014-07-11 12:05:47 +00007463 assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
sewardjdf9d6d52014-06-27 10:43:22 +00007464 mkexpr(argL), mkexpr(argR)));
7465 assign(res,
7466 binop(Iop_OrV128,
7467 binop(Iop_AndV128,
sewardj8e91fd42014-07-11 12:05:47 +00007468 binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
sewardjdf9d6d52014-06-27 10:43:22 +00007469 mkexpr(msk)),
7470 binop(Iop_AndV128,
sewardj8e91fd42014-07-11 12:05:47 +00007471 binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
sewardjdf9d6d52014-06-27 10:43:22 +00007472 unop(Iop_NotV128, mkexpr(msk)))));
7473 return res;
7474}
7475
7476
sewardj6f312d02014-06-28 12:21:37 +00007477/* Generate IR that takes a V128 and sign- or zero-widens
7478 either the lower or upper set of lanes to twice-as-wide,
7479 resulting in a new V128 value. */
7480static
sewardja5a6b752014-06-30 07:33:56 +00007481IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
7482 UInt sizeNarrow, IRExpr* srcE )
sewardj6f312d02014-06-28 12:21:37 +00007483{
sewardj8e91fd42014-07-11 12:05:47 +00007484 IRTemp src = newTempV128();
7485 IRTemp res = newTempV128();
sewardj6f312d02014-06-28 12:21:37 +00007486 assign(src, srcE);
7487 switch (sizeNarrow) {
7488 case X10:
7489 assign(res,
7490 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
7491 binop(fromUpperHalf ? Iop_InterleaveHI32x4
7492 : Iop_InterleaveLO32x4,
7493 mkexpr(src),
7494 mkexpr(src)),
7495 mkU8(32)));
7496 break;
7497 case X01:
7498 assign(res,
7499 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
7500 binop(fromUpperHalf ? Iop_InterleaveHI16x8
7501 : Iop_InterleaveLO16x8,
7502 mkexpr(src),
7503 mkexpr(src)),
7504 mkU8(16)));
7505 break;
7506 case X00:
7507 assign(res,
7508 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
7509 binop(fromUpperHalf ? Iop_InterleaveHI8x16
7510 : Iop_InterleaveLO8x16,
7511 mkexpr(src),
7512 mkexpr(src)),
7513 mkU8(8)));
7514 break;
7515 default:
7516 vassert(0);
7517 }
7518 return res;
7519}
7520
7521
sewardja5a6b752014-06-30 07:33:56 +00007522/* Generate IR that takes a V128 and sign- or zero-widens
7523 either the even or odd lanes to twice-as-wide,
7524 resulting in a new V128 value. */
7525static
7526IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
7527 UInt sizeNarrow, IRExpr* srcE )
7528{
sewardj8e91fd42014-07-11 12:05:47 +00007529 IRTemp src = newTempV128();
7530 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00007531 IROp opSAR = mkVecSARN(sizeNarrow+1);
7532 IROp opSHR = mkVecSHRN(sizeNarrow+1);
7533 IROp opSHL = mkVecSHLN(sizeNarrow+1);
7534 IROp opSxR = zWiden ? opSHR : opSAR;
7535 UInt amt = 0;
7536 switch (sizeNarrow) {
7537 case X10: amt = 32; break;
7538 case X01: amt = 16; break;
7539 case X00: amt = 8; break;
7540 default: vassert(0);
7541 }
7542 assign(src, srcE);
7543 if (fromOdd) {
7544 assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
7545 } else {
7546 assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
7547 mkU8(amt)));
7548 }
7549 return res;
7550}
7551
7552
7553/* Generate IR that takes two V128s and narrows (takes lower half)
7554 of each lane, producing a single V128 value. */
7555static
7556IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
7557{
sewardj8e91fd42014-07-11 12:05:47 +00007558 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00007559 assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
7560 mkexpr(argHi), mkexpr(argLo)));
7561 return res;
7562}
7563
7564
sewardj487559e2014-07-10 14:22:45 +00007565/* Return a temp which holds the vector dup of the lane of width
7566 (1 << size) obtained from src[laneNo]. */
7567static
7568IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
7569{
7570 vassert(size <= 3);
7571 /* Normalise |laneNo| so it is of the form
7572 x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
7573 This puts the bits we want to inspect at constant offsets
7574 regardless of the value of |size|.
7575 */
7576 UInt ix = laneNo << size;
7577 vassert(ix <= 15);
7578 IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
7579 switch (size) {
7580 case 0: /* B */
7581 ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
7582 /* fallthrough */
7583 case 1: /* H */
7584 ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
7585 /* fallthrough */
7586 case 2: /* S */
7587 ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
7588 /* fallthrough */
7589 case 3: /* D */
7590 ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
7591 break;
7592 default:
7593 vassert(0);
7594 }
sewardj8e91fd42014-07-11 12:05:47 +00007595 IRTemp res = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00007596 assign(res, src);
7597 Int i;
7598 for (i = 3; i >= 0; i--) {
7599 if (ops[i] == Iop_INVALID)
7600 break;
sewardj8e91fd42014-07-11 12:05:47 +00007601 IRTemp tmp = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00007602 assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
7603 res = tmp;
7604 }
7605 return res;
7606}
7607
7608
7609/* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
7610 selector encoded as shown below. Return a new V128 holding the
7611 selected lane from |srcV| dup'd out to V128, and also return the
7612 lane number, log2 of the lane size in bytes, and width-character via
7613 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5
7614 is an invalid selector, in which case return
7615 IRTemp_INVALID, 0, 0 and '?' respectively.
7616
7617 imm5 = xxxx1 signifies .b[xxxx]
7618 = xxx10 .h[xxx]
7619 = xx100 .s[xx]
7620 = x1000 .d[x]
7621 otherwise invalid
7622*/
7623static
7624IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
7625 /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
7626 IRExpr* srcV, UInt imm5 )
7627{
7628 *laneNo = 0;
7629 *laneSzLg2 = 0;
7630 *laneCh = '?';
7631
7632 if (imm5 & 1) {
7633 *laneNo = (imm5 >> 1) & 15;
7634 *laneSzLg2 = 0;
7635 *laneCh = 'b';
7636 }
7637 else if (imm5 & 2) {
7638 *laneNo = (imm5 >> 2) & 7;
7639 *laneSzLg2 = 1;
7640 *laneCh = 'h';
7641 }
7642 else if (imm5 & 4) {
7643 *laneNo = (imm5 >> 3) & 3;
7644 *laneSzLg2 = 2;
7645 *laneCh = 's';
7646 }
7647 else if (imm5 & 8) {
7648 *laneNo = (imm5 >> 4) & 1;
7649 *laneSzLg2 = 3;
7650 *laneCh = 'd';
7651 }
7652 else {
7653 /* invalid */
7654 return IRTemp_INVALID;
7655 }
7656
7657 return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
7658}
7659
7660
7661/* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
7662static
7663IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
7664{
7665 IRType ty = Ity_INVALID;
7666 IRTemp rcS = IRTemp_INVALID;
7667 switch (size) {
7668 case X01:
7669 vassert(imm <= 0xFFFFULL);
7670 ty = Ity_I16;
7671 rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
7672 break;
7673 case X10:
7674 vassert(imm <= 0xFFFFFFFFULL);
7675 ty = Ity_I32;
7676 rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
7677 break;
7678 case X11:
7679 ty = Ity_I64;
7680 rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
7681 default:
7682 vassert(0);
7683 }
7684 IRTemp rcV = math_DUP_TO_V128(rcS, ty);
7685 return rcV;
7686}
7687
7688
sewardj25523c42014-06-15 19:36:29 +00007689/* Let |new64| be a V128 in which only the lower 64 bits are interesting,
7690 and the upper can contain any value -- it is ignored. If |is2| is False,
7691 generate IR to put |new64| in the lower half of vector reg |dd| and zero
7692 the upper half. If |is2| is True, generate IR to put |new64| in the upper
7693 half of vector reg |dd| and leave the lower half unchanged. This
7694 simulates the behaviour of the "foo/foo2" instructions in which the
7695 destination is half the width of sources, for example addhn/addhn2.
7696*/
7697static
7698void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
7699{
7700 if (is2) {
7701 /* Get the old contents of Vdd, zero the upper half, and replace
7702 it with 'x'. */
sewardj8e91fd42014-07-11 12:05:47 +00007703 IRTemp t_zero_oldLO = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00007704 assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
sewardj8e91fd42014-07-11 12:05:47 +00007705 IRTemp t_newHI_zero = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00007706 assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
7707 mkV128(0x0000)));
sewardj8e91fd42014-07-11 12:05:47 +00007708 IRTemp res = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00007709 assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
7710 mkexpr(t_newHI_zero)));
7711 putQReg128(dd, mkexpr(res));
7712 } else {
7713 /* This is simple. */
7714 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
7715 }
7716}
7717
7718
sewardj8e91fd42014-07-11 12:05:47 +00007719/* Compute vector SQABS at lane size |size| for |srcE|, returning
7720 the q result in |*qabs| and the normal result in |*nabs|. */
7721static
7722void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
7723 IRExpr* srcE, UInt size )
7724{
7725 IRTemp src, mask, maskn, nsub, qsub;
7726 src = mask = maskn = nsub = qsub = IRTemp_INVALID;
7727 newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
7728 assign(src, srcE);
7729 assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src)));
7730 assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
7731 assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
7732 assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
7733 assign(*nabs, binop(Iop_OrV128,
7734 binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
7735 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
7736 assign(*qabs, binop(Iop_OrV128,
7737 binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
7738 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
7739}
7740
7741
sewardj51d012a2014-07-21 09:19:50 +00007742/* Compute vector SQNEG at lane size |size| for |srcE|, returning
7743 the q result in |*qneg| and the normal result in |*nneg|. */
7744static
7745void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
7746 IRExpr* srcE, UInt size )
7747{
7748 IRTemp src = IRTemp_INVALID;
7749 newTempsV128_3(&src, nneg, qneg);
7750 assign(src, srcE);
7751 assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
7752 assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
7753}
7754
7755
sewardjecedd982014-08-11 14:02:47 +00007756/* Zero all except the least significant lane of |srcE|, where |size|
7757 indicates the lane size in the usual way. */
sewardj257e99f2014-08-03 12:45:19 +00007758static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
sewardj8e91fd42014-07-11 12:05:47 +00007759{
7760 vassert(size < 4);
7761 IRTemp t = newTempV128();
sewardj51d012a2014-07-21 09:19:50 +00007762 assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
sewardj8e91fd42014-07-11 12:05:47 +00007763 return t;
7764}
7765
7766
sewardj51d012a2014-07-21 09:19:50 +00007767/* Generate IR to compute vector widening MULL from either the lower
7768 (is2==False) or upper (is2==True) halves of vecN and vecM. The
7769 widening multiplies are unsigned when isU==True and signed when
7770 isU==False. |size| is the narrow lane size indication. Optionally,
7771 the product may be added to or subtracted from vecD, at the wide lane
7772 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas|
7773 is 'm' (only multiply) then the accumulate part does not happen, and
7774 |vecD| is expected to == IRTemp_INVALID.
7775
7776 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
7777 are allowed. The result is returned in a new IRTemp, which is
7778 returned in *res. */
7779static
7780void math_MULL_ACC ( /*OUT*/IRTemp* res,
7781 Bool is2, Bool isU, UInt size, HChar mas,
7782 IRTemp vecN, IRTemp vecM, IRTemp vecD )
7783{
7784 vassert(res && *res == IRTemp_INVALID);
7785 vassert(size <= 2);
7786 vassert(mas == 'm' || mas == 'a' || mas == 's');
7787 if (mas == 'm') vassert(vecD == IRTemp_INVALID);
7788 IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
7789 IROp accOp = (mas == 'a') ? mkVecADD(size+1)
7790 : (mas == 's' ? mkVecSUB(size+1)
7791 : Iop_INVALID);
7792 IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp,
7793 mkexpr(vecN), mkexpr(vecM));
7794 *res = newTempV128();
7795 assign(*res, mas == 'm' ? mkexpr(mul)
7796 : binop(accOp, mkexpr(vecD), mkexpr(mul)));
7797}
7798
7799
7800/* Same as math_MULL_ACC, except the multiply is signed widening,
7801 the multiplied value is then doubled, before being added to or
7802 subtracted from the accumulated value. And everything is
7803 saturated. In all cases, saturation residuals are returned
7804 via (sat1q, sat1n), and in the accumulate cases,
7805 via (sat2q, sat2n) too. All results are returned in new temporaries.
7806 In the no-accumulate case, *sat2q and *sat2n are never instantiated,
7807 so the caller can tell this has happened. */
7808static
7809void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
7810 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
7811 /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
7812 Bool is2, UInt size, HChar mas,
7813 IRTemp vecN, IRTemp vecM, IRTemp vecD )
7814{
7815 vassert(size <= 2);
7816 vassert(mas == 'm' || mas == 'a' || mas == 's');
7817 /* Compute
7818 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
7819 sat1n = vecN.D[is2] *s vecM.d[is2] * 2
7820 IOW take either the low or high halves of vecN and vecM, signed widen,
7821 multiply, double that, and signedly saturate. Also compute the same
7822 but without saturation.
7823 */
7824 vassert(sat2q && *sat2q == IRTemp_INVALID);
7825 vassert(sat2n && *sat2n == IRTemp_INVALID);
7826 newTempsV128_3(sat1q, sat1n, res);
7827 IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
7828 mkexpr(vecN), mkexpr(vecM));
7829 IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
7830 mkexpr(vecN), mkexpr(vecM));
7831 assign(*sat1q, mkexpr(tq));
7832 assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
7833
7834 /* If there is no accumulation, the final result is sat1q,
7835 and there's no assignment to sat2q or sat2n. */
7836 if (mas == 'm') {
7837 assign(*res, mkexpr(*sat1q));
7838 return;
7839 }
7840
7841 /* Compute
7842 sat2q = vecD +sq/-sq sat1q
7843 sat2n = vecD +/- sat1n
7844 result = sat2q
7845 */
7846 newTempsV128_2(sat2q, sat2n);
7847 assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
7848 mkexpr(vecD), mkexpr(*sat1q)));
7849 assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
7850 mkexpr(vecD), mkexpr(*sat1n)));
7851 assign(*res, mkexpr(*sat2q));
7852}
7853
7854
sewardj54ffa1d2014-07-22 09:27:49 +00007855/* Generate IR for widening signed vector multiplies. The operands
7856 have their lane width signedly widened, and they are then multiplied
7857 at the wider width, returning results in two new IRTemps. */
sewardja5a6b752014-06-30 07:33:56 +00007858static
sewardj54ffa1d2014-07-22 09:27:49 +00007859void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
7860 UInt sizeNarrow, IRTemp argL, IRTemp argR )
7861{
7862 vassert(sizeNarrow <= 2);
7863 newTempsV128_2(resHI, resLO);
7864 IRTemp argLhi = newTemp(Ity_I64);
7865 IRTemp argLlo = newTemp(Ity_I64);
7866 IRTemp argRhi = newTemp(Ity_I64);
7867 IRTemp argRlo = newTemp(Ity_I64);
7868 assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
7869 assign(argLlo, unop(Iop_V128to64, mkexpr(argL)));
7870 assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
7871 assign(argRlo, unop(Iop_V128to64, mkexpr(argR)));
7872 IROp opMulls = mkVecMULLS(sizeNarrow);
7873 assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
7874 assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
7875}
7876
7877
sewardj257e99f2014-08-03 12:45:19 +00007878/* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
7879 double that, possibly add a rounding constant (R variants), and take
7880 the high half. */
sewardj54ffa1d2014-07-22 09:27:49 +00007881static
7882void math_SQDMULH ( /*OUT*/IRTemp* res,
7883 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
7884 Bool isR, UInt size, IRTemp vN, IRTemp vM )
7885{
7886 vassert(size == X01 || size == X10); /* s or h only */
7887
7888 newTempsV128_3(res, sat1q, sat1n);
7889
7890 IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
7891 math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
7892
7893 IRTemp addWide = mkVecADD(size+1);
7894
7895 if (isR) {
7896 assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
7897
7898 Int rcShift = size == X01 ? 15 : 31;
7899 IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
7900 assign(*sat1n,
7901 binop(mkVecCATODDLANES(size),
7902 binop(addWide,
7903 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
7904 mkexpr(roundConst)),
7905 binop(addWide,
7906 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
7907 mkexpr(roundConst))));
7908 } else {
7909 assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
7910
7911 assign(*sat1n,
7912 binop(mkVecCATODDLANES(size),
7913 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
7914 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
7915 }
7916
7917 assign(*res, mkexpr(*sat1q));
7918}
7919
7920
sewardja97dddf2014-08-14 22:26:52 +00007921/* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
7922 a new temp in *res, and the Q difference pair in new temps in
7923 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
7924 three operations it is. */
7925static
7926void math_QSHL_IMM ( /*OUT*/IRTemp* res,
7927 /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
7928 IRTemp src, UInt size, UInt shift, const HChar* nm )
7929{
7930 vassert(size <= 3);
7931 UInt laneBits = 8 << size;
7932 vassert(shift < laneBits);
7933 newTempsV128_3(res, qDiff1, qDiff2);
7934 IRTemp z128 = newTempV128();
7935 assign(z128, mkV128(0x0000));
7936
7937 /* UQSHL */
7938 if (vex_streq(nm, "uqshl")) {
sewardj1dd3ec12014-08-15 09:11:08 +00007939 IROp qop = mkVecQSHLNSATUU(size);
sewardja97dddf2014-08-14 22:26:52 +00007940 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
7941 if (shift == 0) {
7942 /* No shift means no saturation. */
7943 assign(*qDiff1, mkexpr(z128));
7944 assign(*qDiff2, mkexpr(z128));
7945 } else {
7946 /* Saturation has occurred if any of the shifted-out bits are
7947 nonzero. We get the shifted-out bits by right-shifting the
7948 original value. */
7949 UInt rshift = laneBits - shift;
7950 vassert(rshift >= 1 && rshift < laneBits);
7951 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
7952 assign(*qDiff2, mkexpr(z128));
7953 }
7954 return;
7955 }
7956
7957 /* SQSHL */
7958 if (vex_streq(nm, "sqshl")) {
sewardj1dd3ec12014-08-15 09:11:08 +00007959 IROp qop = mkVecQSHLNSATSS(size);
sewardja97dddf2014-08-14 22:26:52 +00007960 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
7961 if (shift == 0) {
7962 /* No shift means no saturation. */
7963 assign(*qDiff1, mkexpr(z128));
7964 assign(*qDiff2, mkexpr(z128));
7965 } else {
7966 /* Saturation has occurred if any of the shifted-out bits are
7967 different from the top bit of the original value. */
7968 UInt rshift = laneBits - 1 - shift;
7969 vassert(rshift >= 0 && rshift < laneBits-1);
7970 /* qDiff1 is the shifted out bits, and the top bit of the original
7971 value, preceded by zeroes. */
7972 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
7973 /* qDiff2 is the top bit of the original value, cloned the
7974 correct number of times. */
7975 assign(*qDiff2, binop(mkVecSHRN(size),
7976 binop(mkVecSARN(size), mkexpr(src),
7977 mkU8(laneBits-1)),
7978 mkU8(rshift)));
7979 /* This also succeeds in comparing the top bit of the original
7980 value to itself, which is a bit stupid, but not wrong. */
7981 }
7982 return;
7983 }
7984
7985 /* SQSHLU */
7986 if (vex_streq(nm, "sqshlu")) {
sewardj1dd3ec12014-08-15 09:11:08 +00007987 IROp qop = mkVecQSHLNSATSU(size);
sewardja97dddf2014-08-14 22:26:52 +00007988 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
sewardjacc29642014-08-15 05:35:35 +00007989 if (shift == 0) {
7990 /* If there's no shift, saturation depends on the top bit
7991 of the source. */
7992 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
7993 assign(*qDiff2, mkexpr(z128));
7994 } else {
7995 /* Saturation has occurred if any of the shifted-out bits are
7996 nonzero. We get the shifted-out bits by right-shifting the
7997 original value. */
7998 UInt rshift = laneBits - shift;
7999 vassert(rshift >= 1 && rshift < laneBits);
8000 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8001 assign(*qDiff2, mkexpr(z128));
8002 }
sewardja97dddf2014-08-14 22:26:52 +00008003 return;
8004 }
8005
8006 vassert(0);
8007}
8008
8009
sewardj62ece662014-08-17 19:59:09 +00008010/* Generate IR to do SRHADD and URHADD. */
8011static
8012IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
8013{
8014 /* Generate this:
8015 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
8016 */
8017 vassert(size <= 3);
8018 IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
8019 IROp opADD = mkVecADD(size);
8020 /* The only tricky bit is to generate the correct vector 1 constant. */
8021 const ULong ones64[4]
8022 = { 0x0101010101010101ULL, 0x0001000100010001ULL,
8023 0x0000000100000001ULL, 0x0000000000000001ULL };
8024 IRTemp imm64 = newTemp(Ity_I64);
8025 assign(imm64, mkU64(ones64[size]));
8026 IRTemp vecOne = newTempV128();
8027 assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
8028 IRTemp scaOne = newTemp(Ity_I8);
8029 assign(scaOne, mkU8(1));
8030 IRTemp res = newTempV128();
8031 assign(res,
8032 binop(opADD,
8033 binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
8034 binop(opADD,
8035 binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
8036 binop(opSHR,
8037 binop(opADD,
8038 binop(opADD,
8039 binop(Iop_AndV128, mkexpr(aa),
8040 mkexpr(vecOne)),
8041 binop(Iop_AndV128, mkexpr(bb),
8042 mkexpr(vecOne))
8043 ),
8044 mkexpr(vecOne)
8045 ),
8046 mkexpr(scaOne)
8047 )
8048 )
8049 )
8050 );
8051 return res;
8052}
8053
8054
sewardj54ffa1d2014-07-22 09:27:49 +00008055/* QCFLAG tracks the SIMD sticky saturation status. Update the status
8056 thusly: if, after application of |opZHI| to both |qres| and |nres|,
8057 they have the same value, leave QCFLAG unchanged. Otherwise, set it
8058 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128
8059 operators, or Iop_INVALID, in which case |qres| and |nres| are used
8060 unmodified. The presence |opZHI| means this function can be used to
8061 generate QCFLAG update code for both scalar and vector SIMD operations.
8062*/
8063static
8064void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
sewardja5a6b752014-06-30 07:33:56 +00008065{
sewardj8e91fd42014-07-11 12:05:47 +00008066 IRTemp diff = newTempV128();
8067 IRTemp oldQCFLAG = newTempV128();
8068 IRTemp newQCFLAG = newTempV128();
sewardj54ffa1d2014-07-22 09:27:49 +00008069 if (opZHI == Iop_INVALID) {
8070 assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
8071 } else {
sewardj257e99f2014-08-03 12:45:19 +00008072 vassert(opZHI == Iop_ZeroHI64ofV128
8073 || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
sewardj54ffa1d2014-07-22 09:27:49 +00008074 assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
8075 }
sewardja5a6b752014-06-30 07:33:56 +00008076 assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
8077 assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
8078 stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
8079}
8080
8081
sewardj54ffa1d2014-07-22 09:27:49 +00008082/* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
8083 are used unmodified, hence suitable for QCFLAG updates for whole-vector
8084 operations. */
8085static
8086void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
8087{
8088 updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
8089}
8090
8091
sewardj76927e62014-11-17 11:21:21 +00008092/* Generate IR to rearrange two vector values in a way which is useful
8093 for doing S/D add-pair etc operations. There are 3 cases:
8094
8095 2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0]
8096
8097 4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0]
8098
8099 2s: [m2 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0]
8100
8101 The cases are distinguished as follows:
8102 isD == True, bitQ == 1 => 2d
8103 isD == False, bitQ == 1 => 4s
8104 isD == False, bitQ == 0 => 2s
8105*/
8106static
8107void math_REARRANGE_FOR_FLOATING_PAIRWISE (
8108 /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR,
8109 IRTemp vecM, IRTemp vecN, Bool isD, UInt bitQ
8110 )
8111{
8112 vassert(rearrL && *rearrL == IRTemp_INVALID);
8113 vassert(rearrR && *rearrR == IRTemp_INVALID);
8114 *rearrL = newTempV128();
8115 *rearrR = newTempV128();
8116 if (isD) {
8117 // 2d case
8118 vassert(bitQ == 1);
8119 assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN)));
8120 assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN)));
8121 }
8122 else if (!isD && bitQ == 1) {
8123 // 4s case
8124 assign(*rearrL, binop(Iop_CatOddLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8125 assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8126 } else {
8127 // 2s case
8128 vassert(!isD && bitQ == 0);
8129 IRTemp m1n1m0n0 = newTempV128();
8130 IRTemp m0n0m1n1 = newTempV128();
8131 assign(m1n1m0n0, binop(Iop_InterleaveLO32x4,
8132 mkexpr(vecM), mkexpr(vecN)));
8133 assign(m0n0m1n1, triop(Iop_SliceV128,
8134 mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8)));
8135 assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0)));
8136 assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1)));
8137 }
8138}
8139
8140
sewardj1aff76b2014-11-20 10:14:06 +00008141/* Returns 2.0 ^ (-n) for n in 1 .. 64 */
8142static Double two_to_the_minus ( Int n )
8143{
8144 if (n == 1) return 0.5;
8145 vassert(n >= 2 && n <= 64);
8146 Int half = n / 2;
8147 return two_to_the_minus(half) * two_to_the_minus(n - half);
8148}
8149
8150
sewardj8e91fd42014-07-11 12:05:47 +00008151/*------------------------------------------------------------*/
8152/*--- SIMD and FP instructions ---*/
8153/*------------------------------------------------------------*/
8154
sewardjdf1628c2014-06-10 22:52:05 +00008155static
8156Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
sewardjbbcf1882014-01-12 12:49:10 +00008157{
sewardjab33a7a2014-06-19 22:20:47 +00008158 /* 31 29 23 21 20 15 14 10 9 4
8159 0 q 101110 op2 0 m 0 imm4 0 n d
8160 Decode fields: op2
8161 */
sewardjbbcf1882014-01-12 12:49:10 +00008162# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjab33a7a2014-06-19 22:20:47 +00008163 if (INSN(31,31) != 0
8164 || INSN(29,24) != BITS6(1,0,1,1,1,0)
8165 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
8166 return False;
8167 }
8168 UInt bitQ = INSN(30,30);
8169 UInt op2 = INSN(23,22);
8170 UInt mm = INSN(20,16);
8171 UInt imm4 = INSN(14,11);
8172 UInt nn = INSN(9,5);
8173 UInt dd = INSN(4,0);
8174
8175 if (op2 == BITS2(0,0)) {
8176 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00008177 IRTemp sHi = newTempV128();
8178 IRTemp sLo = newTempV128();
8179 IRTemp res = newTempV128();
sewardjab33a7a2014-06-19 22:20:47 +00008180 assign(sHi, getQReg128(mm));
8181 assign(sLo, getQReg128(nn));
8182 if (bitQ == 1) {
8183 if (imm4 == 0) {
8184 assign(res, mkexpr(sLo));
8185 } else {
sewardj8def0492014-09-01 14:13:15 +00008186 vassert(imm4 >= 1 && imm4 <= 15);
8187 assign(res, triop(Iop_SliceV128,
8188 mkexpr(sHi), mkexpr(sLo), mkU8(imm4)));
sewardjab33a7a2014-06-19 22:20:47 +00008189 }
8190 putQReg128(dd, mkexpr(res));
8191 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
8192 } else {
8193 if (imm4 >= 8) return False;
8194 if (imm4 == 0) {
8195 assign(res, mkexpr(sLo));
8196 } else {
sewardj8def0492014-09-01 14:13:15 +00008197 vassert(imm4 >= 1 && imm4 <= 7);
8198 IRTemp hi64lo64 = newTempV128();
8199 assign(hi64lo64, binop(Iop_InterleaveLO64x2,
8200 mkexpr(sHi), mkexpr(sLo)));
8201 assign(res, triop(Iop_SliceV128,
8202 mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4)));
sewardjab33a7a2014-06-19 22:20:47 +00008203 }
8204 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8205 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
8206 }
8207 return True;
8208 }
8209
sewardjdf1628c2014-06-10 22:52:05 +00008210 return False;
8211# undef INSN
8212}
sewardjbbcf1882014-01-12 12:49:10 +00008213
sewardjbbcf1882014-01-12 12:49:10 +00008214
sewardjdf1628c2014-06-10 22:52:05 +00008215static
8216Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
8217{
8218 /* 31 29 23 21 20 15 14 12 11 9 4
8219 0 q 001110 op2 0 m 0 len op 00 n d
8220 Decode fields: op2,len,op
sewardjbbcf1882014-01-12 12:49:10 +00008221 */
sewardjdf1628c2014-06-10 22:52:05 +00008222# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8223 if (INSN(31,31) != 0
8224 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8225 || INSN(21,21) != 0
8226 || INSN(15,15) != 0
8227 || INSN(11,10) != BITS2(0,0)) {
8228 return False;
8229 }
8230 UInt bitQ = INSN(30,30);
8231 UInt op2 = INSN(23,22);
8232 UInt mm = INSN(20,16);
8233 UInt len = INSN(14,13);
8234 UInt bitOP = INSN(12,12);
8235 UInt nn = INSN(9,5);
8236 UInt dd = INSN(4,0);
8237
8238 if (op2 == X00) {
8239 /* -------- 00,xx,0 TBL, xx register table -------- */
8240 /* -------- 00,xx,1 TBX, xx register table -------- */
8241 /* 31 28 20 15 14 12 9 4
8242 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8243 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8244 where Ta = 16b(q=1) or 8b(q=0)
8245 */
sewardjdf1628c2014-06-10 22:52:05 +00008246 Bool isTBX = bitOP == 1;
8247 /* The out-of-range values to use. */
sewardj8e91fd42014-07-11 12:05:47 +00008248 IRTemp oor_values = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008249 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
8250 /* src value */
sewardj8e91fd42014-07-11 12:05:47 +00008251 IRTemp src = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008252 assign(src, getQReg128(mm));
8253 /* The table values */
8254 IRTemp tab[4];
8255 UInt i;
8256 for (i = 0; i <= len; i++) {
8257 vassert(i < 4);
sewardj8e91fd42014-07-11 12:05:47 +00008258 tab[i] = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008259 assign(tab[i], getQReg128((nn + i) % 32));
8260 }
8261 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
sewardjdf9d6d52014-06-27 10:43:22 +00008262 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8263 const HChar* Ta = bitQ ==1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +00008264 const HChar* nm = isTBX ? "tbx" : "tbl";
8265 DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
8266 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
8267 return True;
8268 }
8269
8270# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8271 return False;
8272# undef INSN
8273}
8274
8275
8276static
8277Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
8278{
sewardjfc261d92014-08-24 20:36:14 +00008279 /* 31 29 23 21 20 15 14 11 9 4
8280 0 q 001110 size 0 m 0 opcode 10 n d
8281 Decode fields: opcode
8282 */
sewardjdf1628c2014-06-10 22:52:05 +00008283# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjfc261d92014-08-24 20:36:14 +00008284 if (INSN(31,31) != 0
8285 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8286 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
8287 return False;
8288 }
8289 UInt bitQ = INSN(30,30);
8290 UInt size = INSN(23,22);
8291 UInt mm = INSN(20,16);
8292 UInt opcode = INSN(14,12);
8293 UInt nn = INSN(9,5);
8294 UInt dd = INSN(4,0);
8295
8296 if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
8297 /* -------- 001 UZP1 std7_std7_std7 -------- */
8298 /* -------- 101 UZP2 std7_std7_std7 -------- */
8299 if (bitQ == 0 && size == X11) return False; // implied 1d case
8300 Bool isUZP1 = opcode == BITS3(0,0,1);
8301 IROp op = isUZP1 ? mkVecCATEVENLANES(size)
8302 : mkVecCATODDLANES(size);
8303 IRTemp preL = newTempV128();
8304 IRTemp preR = newTempV128();
8305 IRTemp res = newTempV128();
8306 if (bitQ == 0) {
8307 assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
8308 getQReg128(nn)));
8309 assign(preR, mkexpr(preL));
8310 } else {
8311 assign(preL, getQReg128(mm));
8312 assign(preR, getQReg128(nn));
8313 }
8314 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8315 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8316 const HChar* nm = isUZP1 ? "uzp1" : "uzp2";
8317 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8318 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8319 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8320 return True;
8321 }
8322
8323 if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
8324 /* -------- 010 TRN1 std7_std7_std7 -------- */
8325 /* -------- 110 TRN2 std7_std7_std7 -------- */
8326 if (bitQ == 0 && size == X11) return False; // implied 1d case
8327 Bool isTRN1 = opcode == BITS3(0,1,0);
8328 IROp op1 = isTRN1 ? mkVecCATEVENLANES(size)
8329 : mkVecCATODDLANES(size);
8330 IROp op2 = mkVecINTERLEAVEHI(size);
8331 IRTemp srcM = newTempV128();
8332 IRTemp srcN = newTempV128();
8333 IRTemp res = newTempV128();
8334 assign(srcM, getQReg128(mm));
8335 assign(srcN, getQReg128(nn));
8336 assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
8337 binop(op1, mkexpr(srcN), mkexpr(srcN))));
8338 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8339 const HChar* nm = isTRN1 ? "trn1" : "trn2";
8340 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8341 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8342 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8343 return True;
8344 }
8345
8346 if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
8347 /* -------- 011 ZIP1 std7_std7_std7 -------- */
8348 /* -------- 111 ZIP2 std7_std7_std7 -------- */
8349 if (bitQ == 0 && size == X11) return False; // implied 1d case
8350 Bool isZIP1 = opcode == BITS3(0,1,1);
8351 IROp op = isZIP1 ? mkVecINTERLEAVELO(size)
8352 : mkVecINTERLEAVEHI(size);
8353 IRTemp preL = newTempV128();
8354 IRTemp preR = newTempV128();
8355 IRTemp res = newTempV128();
8356 if (bitQ == 0 && !isZIP1) {
sewardj8def0492014-09-01 14:13:15 +00008357 IRTemp z128 = newTempV128();
8358 assign(z128, mkV128(0x0000));
8359 // preL = Vm shifted left 32 bits
8360 // preR = Vn shifted left 32 bits
8361 assign(preL, triop(Iop_SliceV128,
8362 getQReg128(mm), mkexpr(z128), mkU8(12)));
8363 assign(preR, triop(Iop_SliceV128,
8364 getQReg128(nn), mkexpr(z128), mkU8(12)));
8365
sewardjfc261d92014-08-24 20:36:14 +00008366 } else {
8367 assign(preL, getQReg128(mm));
8368 assign(preR, getQReg128(nn));
8369 }
8370 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8371 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8372 const HChar* nm = isZIP1 ? "zip1" : "zip2";
8373 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8374 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8375 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8376 return True;
8377 }
8378
sewardjdf1628c2014-06-10 22:52:05 +00008379 return False;
8380# undef INSN
8381}
8382
8383
8384static
8385Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
8386{
8387 /* 31 28 23 21 16 11 9 4
8388 0 q u 01110 size 11000 opcode 10 n d
8389 Decode fields: u,size,opcode
8390 */
8391# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8392 if (INSN(31,31) != 0
8393 || INSN(28,24) != BITS5(0,1,1,1,0)
8394 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
8395 return False;
8396 }
8397 UInt bitQ = INSN(30,30);
8398 UInt bitU = INSN(29,29);
8399 UInt size = INSN(23,22);
8400 UInt opcode = INSN(16,12);
8401 UInt nn = INSN(9,5);
8402 UInt dd = INSN(4,0);
8403
sewardja5a6b752014-06-30 07:33:56 +00008404 if (opcode == BITS5(0,0,0,1,1)) {
8405 /* -------- 0,xx,00011 SADDLV -------- */
8406 /* -------- 1,xx,00011 UADDLV -------- */
8407 /* size is the narrow size */
8408 if (size == X11 || (size == X10 && bitQ == 0)) return False;
8409 Bool isU = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +00008410 IRTemp src = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00008411 assign(src, getQReg128(nn));
8412 /* The basic plan is to widen the lower half, and if Q = 1,
8413 the upper half too. Add them together (if Q = 1), and in
8414 either case fold with add at twice the lane width.
8415 */
8416 IRExpr* widened
8417 = mkexpr(math_WIDEN_LO_OR_HI_LANES(
8418 isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
8419 if (bitQ == 1) {
8420 widened
8421 = binop(mkVecADD(size+1),
8422 widened,
8423 mkexpr(math_WIDEN_LO_OR_HI_LANES(
8424 isU, True/*fromUpperHalf*/, size, mkexpr(src)))
8425 );
8426 }
8427 /* Now fold. */
sewardj8e91fd42014-07-11 12:05:47 +00008428 IRTemp tWi = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00008429 assign(tWi, widened);
8430 IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
8431 putQReg128(dd, mkexpr(res));
8432 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8433 const HChar ch = "bhsd"[size];
8434 DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
8435 nameQReg128(dd), ch, nameQReg128(nn), arr);
8436 return True;
8437 }
8438
sewardjb9aff1e2014-06-15 21:55:33 +00008439 UInt ix = 0;
8440 /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
8441 else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
8442 else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
8443 /**/
8444 if (ix != 0) {
8445 /* -------- 0,xx,01010: SMAXV -------- (1) */
8446 /* -------- 1,xx,01010: UMAXV -------- (2) */
8447 /* -------- 0,xx,11010: SMINV -------- (3) */
8448 /* -------- 1,xx,11010: UMINV -------- (4) */
8449 /* -------- 0,xx,11011: ADDV -------- (5) */
8450 vassert(ix >= 1 && ix <= 5);
sewardjdf1628c2014-06-10 22:52:05 +00008451 if (size == X11) return False; // 1d,2d cases not allowed
8452 if (size == X10 && bitQ == 0) return False; // 2s case not allowed
sewardjdf1628c2014-06-10 22:52:05 +00008453 const IROp opMAXS[3]
8454 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
8455 const IROp opMAXU[3]
8456 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
sewardjb9aff1e2014-06-15 21:55:33 +00008457 const IROp opMINS[3]
8458 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
8459 const IROp opMINU[3]
8460 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
8461 const IROp opADD[3]
8462 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 };
sewardjdf1628c2014-06-10 22:52:05 +00008463 vassert(size < 3);
sewardjb9aff1e2014-06-15 21:55:33 +00008464 IROp op = Iop_INVALID;
8465 const HChar* nm = NULL;
8466 switch (ix) {
8467 case 1: op = opMAXS[size]; nm = "smaxv"; break;
8468 case 2: op = opMAXU[size]; nm = "umaxv"; break;
8469 case 3: op = opMINS[size]; nm = "sminv"; break;
8470 case 4: op = opMINU[size]; nm = "uminv"; break;
8471 case 5: op = opADD[size]; nm = "addv"; break;
8472 default: vassert(0);
8473 }
8474 vassert(op != Iop_INVALID && nm != NULL);
sewardj8e91fd42014-07-11 12:05:47 +00008475 IRTemp tN1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008476 assign(tN1, getQReg128(nn));
8477 /* If Q == 0, we're just folding lanes in the lower half of
8478 the value. In which case, copy the lower half of the
8479 source into the upper half, so we can then treat it the
sewardjb9aff1e2014-06-15 21:55:33 +00008480 same as the full width case. Except for the addition case,
8481 in which we have to zero out the upper half. */
sewardj8e91fd42014-07-11 12:05:47 +00008482 IRTemp tN2 = newTempV128();
sewardjb9aff1e2014-06-15 21:55:33 +00008483 assign(tN2, bitQ == 0
8484 ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
8485 : mk_CatEvenLanes64x2(tN1,tN1))
8486 : mkexpr(tN1));
sewardjdf9d6d52014-06-27 10:43:22 +00008487 IRTemp res = math_FOLDV(tN2, op);
sewardjdf1628c2014-06-10 22:52:05 +00008488 if (res == IRTemp_INVALID)
sewardj5cb53e72015-02-08 12:08:56 +00008489 return False; /* means math_FOLDV
sewardjdf1628c2014-06-10 22:52:05 +00008490 doesn't handle this case yet */
8491 putQReg128(dd, mkexpr(res));
sewardjdf1628c2014-06-10 22:52:05 +00008492 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
8493 IRType laneTy = tys[size];
8494 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8495 DIP("%s %s, %s.%s\n", nm,
8496 nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
8497 return True;
8498 }
8499
sewardj5cb53e72015-02-08 12:08:56 +00008500 if ((size == X00 || size == X10)
8501 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
8502 /* -------- 0,00,01100: FMAXMNV s_4s -------- */
8503 /* -------- 0,10,01100: FMINMNV s_4s -------- */
8504 /* -------- 1,00,01111: FMAXV s_4s -------- */
8505 /* -------- 1,10,01111: FMINV s_4s -------- */
8506 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
8507 if (bitQ == 0) return False; // Only 4s is allowed
8508 Bool isMIN = (size & 2) == 2;
8509 Bool isNM = opcode == BITS5(0,1,1,0,0);
8510 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2);
8511 IRTemp src = newTempV128();
8512 assign(src, getQReg128(nn));
8513 IRTemp res = math_FOLDV(src, opMXX);
8514 putQReg128(dd, mkexpr(res));
8515 DIP("%s%sv s%u, %u.4s\n",
8516 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn);
8517 return True;
8518 }
8519
sewardjdf1628c2014-06-10 22:52:05 +00008520# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8521 return False;
8522# undef INSN
8523}
8524
8525
8526static
8527Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
8528{
8529 /* 31 28 20 15 14 10 9 4
8530 0 q op 01110000 imm5 0 imm4 1 n d
8531 Decode fields: q,op,imm4
8532 */
8533# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8534 if (INSN(31,31) != 0
8535 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
8536 || INSN(15,15) != 0 || INSN(10,10) != 1) {
8537 return False;
8538 }
8539 UInt bitQ = INSN(30,30);
8540 UInt bitOP = INSN(29,29);
8541 UInt imm5 = INSN(20,16);
8542 UInt imm4 = INSN(14,11);
8543 UInt nn = INSN(9,5);
8544 UInt dd = INSN(4,0);
8545
8546 /* -------- x,0,0000: DUP (element, vector) -------- */
8547 /* 31 28 20 15 9 4
8548 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
8549 */
8550 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
sewardj487559e2014-07-10 14:22:45 +00008551 UInt laneNo = 0;
8552 UInt laneSzLg2 = 0;
8553 HChar laneCh = '?';
8554 IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
8555 getQReg128(nn), imm5);
8556 if (res == IRTemp_INVALID)
8557 return False;
8558 if (bitQ == 0 && laneSzLg2 == X11)
8559 return False; /* .1d case */
8560 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8561 const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
8562 DIP("dup %s.%s, %s.%c[%u]\n",
8563 nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
8564 return True;
sewardjdf1628c2014-06-10 22:52:05 +00008565 }
8566
8567 /* -------- x,0,0001: DUP (general, vector) -------- */
8568 /* 31 28 20 15 9 4
8569 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
8570 Q=0 writes 64, Q=1 writes 128
8571 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
8572 xxx10 4H(q=0) or 8H(q=1), R=W
8573 xx100 2S(q=0) or 4S(q=1), R=W
8574 x1000 Invalid(q=0) or 2D(q=1), R=X
8575 x0000 Invalid(q=0) or Invalid(q=1)
8576 Require op=0, imm4=0001
8577 */
8578 if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
8579 Bool isQ = bitQ == 1;
8580 IRTemp w0 = newTemp(Ity_I64);
8581 const HChar* arT = "??";
8582 IRType laneTy = Ity_INVALID;
8583 if (imm5 & 1) {
8584 arT = isQ ? "16b" : "8b";
8585 laneTy = Ity_I8;
8586 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
8587 }
8588 else if (imm5 & 2) {
8589 arT = isQ ? "8h" : "4h";
8590 laneTy = Ity_I16;
8591 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
8592 }
8593 else if (imm5 & 4) {
8594 arT = isQ ? "4s" : "2s";
8595 laneTy = Ity_I32;
8596 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
8597 }
8598 else if ((imm5 & 8) && isQ) {
8599 arT = "2d";
8600 laneTy = Ity_I64;
8601 assign(w0, getIReg64orZR(nn));
8602 }
8603 else {
8604 /* invalid; leave laneTy unchanged. */
8605 }
8606 /* */
8607 if (laneTy != Ity_INVALID) {
8608 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
8609 putQReg128(dd, binop(Iop_64HLtoV128,
8610 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
8611 DIP("dup %s.%s, %s\n",
8612 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
8613 return True;
8614 }
sewardj787a67f2014-06-23 09:09:41 +00008615 /* invalid */
8616 return False;
sewardjdf1628c2014-06-10 22:52:05 +00008617 }
8618
8619 /* -------- 1,0,0011: INS (general) -------- */
8620 /* 31 28 20 15 9 4
8621 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
8622 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
8623 xxx10 -> H, xxx
8624 xx100 -> S, xx
8625 x1000 -> D, x
8626 */
8627 if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
8628 HChar ts = '?';
8629 UInt laneNo = 16;
8630 IRExpr* src = NULL;
8631 if (imm5 & 1) {
8632 src = unop(Iop_64to8, getIReg64orZR(nn));
8633 laneNo = (imm5 >> 1) & 15;
8634 ts = 'b';
8635 }
8636 else if (imm5 & 2) {
8637 src = unop(Iop_64to16, getIReg64orZR(nn));
8638 laneNo = (imm5 >> 2) & 7;
8639 ts = 'h';
8640 }
8641 else if (imm5 & 4) {
8642 src = unop(Iop_64to32, getIReg64orZR(nn));
8643 laneNo = (imm5 >> 3) & 3;
8644 ts = 's';
8645 }
8646 else if (imm5 & 8) {
8647 src = getIReg64orZR(nn);
8648 laneNo = (imm5 >> 4) & 1;
8649 ts = 'd';
8650 }
8651 /* */
8652 if (src) {
8653 vassert(laneNo < 16);
8654 putQRegLane(dd, laneNo, src);
8655 DIP("ins %s.%c[%u], %s\n",
8656 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
8657 return True;
8658 }
sewardj787a67f2014-06-23 09:09:41 +00008659 /* invalid */
8660 return False;
sewardjdf1628c2014-06-10 22:52:05 +00008661 }
8662
8663 /* -------- x,0,0101: SMOV -------- */
8664 /* -------- x,0,0111: UMOV -------- */
8665 /* 31 28 20 15 9 4
8666 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
8667 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
8668 dest is Xd when q==1, Wd when q==0
8669 UMOV:
8670 Ts,index,ops = case q:imm5 of
8671 0:xxxx1 -> B, xxxx, 8Uto64
8672 1:xxxx1 -> invalid
8673 0:xxx10 -> H, xxx, 16Uto64
8674 1:xxx10 -> invalid
8675 0:xx100 -> S, xx, 32Uto64
8676 1:xx100 -> invalid
8677 1:x1000 -> D, x, copy64
8678 other -> invalid
8679 SMOV:
8680 Ts,index,ops = case q:imm5 of
8681 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
8682 1:xxxx1 -> B, xxxx, 8Sto64
8683 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
8684 1:xxx10 -> H, xxx, 16Sto64
8685 0:xx100 -> invalid
8686 1:xx100 -> S, xx, 32Sto64
8687 1:x1000 -> invalid
8688 other -> invalid
8689 */
8690 if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
8691 Bool isU = (imm4 & 2) == 2;
8692 const HChar* arTs = "??";
8693 UInt laneNo = 16; /* invalid */
8694 // Setting 'res' to non-NULL determines valid/invalid
8695 IRExpr* res = NULL;
8696 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
8697 laneNo = (imm5 >> 1) & 15;
8698 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
8699 res = isU ? unop(Iop_8Uto64, lane)
8700 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
8701 arTs = "b";
8702 }
8703 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
8704 laneNo = (imm5 >> 1) & 15;
8705 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
8706 res = isU ? NULL
8707 : unop(Iop_8Sto64, lane);
8708 arTs = "b";
8709 }
8710 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
8711 laneNo = (imm5 >> 2) & 7;
8712 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
8713 res = isU ? unop(Iop_16Uto64, lane)
8714 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
8715 arTs = "h";
8716 }
8717 else if (bitQ && (imm5 & 2)) { // 1:xxx10
8718 laneNo = (imm5 >> 2) & 7;
8719 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
8720 res = isU ? NULL
8721 : unop(Iop_16Sto64, lane);
8722 arTs = "h";
8723 }
8724 else if (!bitQ && (imm5 & 4)) { // 0:xx100
8725 laneNo = (imm5 >> 3) & 3;
8726 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
8727 res = isU ? unop(Iop_32Uto64, lane)
8728 : NULL;
8729 arTs = "s";
8730 }
8731 else if (bitQ && (imm5 & 4)) { // 1:xxx10
8732 laneNo = (imm5 >> 3) & 3;
8733 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
8734 res = isU ? NULL
8735 : unop(Iop_32Sto64, lane);
8736 arTs = "s";
8737 }
8738 else if (bitQ && (imm5 & 8)) { // 1:x1000
8739 laneNo = (imm5 >> 4) & 1;
8740 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
8741 res = isU ? lane
8742 : NULL;
8743 arTs = "d";
8744 }
8745 /* */
8746 if (res) {
8747 vassert(laneNo < 16);
8748 putIReg64orZR(dd, res);
8749 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
8750 nameIRegOrZR(bitQ == 1, dd),
8751 nameQReg128(nn), arTs, laneNo);
8752 return True;
8753 }
sewardj787a67f2014-06-23 09:09:41 +00008754 /* invalid */
8755 return False;
8756 }
8757
8758 /* -------- 1,1,xxxx: INS (element) -------- */
8759 /* 31 28 20 14 9 4
8760 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
8761 where Ts,ix1,ix2
8762 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
8763 xxx10 -> H, xxx, imm4[3:1]
8764 xx100 -> S, xx, imm4[3:2]
8765 x1000 -> D, x, imm4[3:3]
8766 */
8767 if (bitQ == 1 && bitOP == 1) {
8768 HChar ts = '?';
8769 IRType ity = Ity_INVALID;
8770 UInt ix1 = 16;
8771 UInt ix2 = 16;
8772 if (imm5 & 1) {
8773 ts = 'b';
8774 ity = Ity_I8;
8775 ix1 = (imm5 >> 1) & 15;
8776 ix2 = (imm4 >> 0) & 15;
8777 }
8778 else if (imm5 & 2) {
8779 ts = 'h';
8780 ity = Ity_I16;
8781 ix1 = (imm5 >> 2) & 7;
8782 ix2 = (imm4 >> 1) & 7;
8783 }
8784 else if (imm5 & 4) {
8785 ts = 's';
8786 ity = Ity_I32;
8787 ix1 = (imm5 >> 3) & 3;
8788 ix2 = (imm4 >> 2) & 3;
8789 }
8790 else if (imm5 & 8) {
8791 ts = 'd';
8792 ity = Ity_I64;
8793 ix1 = (imm5 >> 4) & 1;
8794 ix2 = (imm4 >> 3) & 1;
8795 }
8796 /* */
8797 if (ity != Ity_INVALID) {
8798 vassert(ix1 < 16);
8799 vassert(ix2 < 16);
8800 putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
8801 DIP("ins %s.%c[%u], %s.%c[%u]\n",
8802 nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
8803 return True;
8804 }
8805 /* invalid */
8806 return False;
sewardjdf1628c2014-06-10 22:52:05 +00008807 }
8808
8809 return False;
8810# undef INSN
8811}
8812
8813
8814static
8815Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
8816{
8817 /* 31 28 18 15 11 9 4
8818 0q op 01111 00000 abc cmode 01 defgh d
sewardj2b6fd5e2014-06-19 14:21:37 +00008819 Decode fields: q,op,cmode
8820 Bit 11 is really "o2", but it is always zero.
sewardjdf1628c2014-06-10 22:52:05 +00008821 */
8822# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8823 if (INSN(31,31) != 0
8824 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
8825 || INSN(11,10) != BITS2(0,1)) {
8826 return False;
8827 }
8828 UInt bitQ = INSN(30,30);
8829 UInt bitOP = INSN(29,29);
8830 UInt cmode = INSN(15,12);
8831 UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
8832 UInt dd = INSN(4,0);
8833
sewardjdf1628c2014-06-10 22:52:05 +00008834 ULong imm64lo = 0;
8835 UInt op_cmode = (bitOP << 4) | cmode;
8836 Bool ok = False;
sewardj2b6fd5e2014-06-19 14:21:37 +00008837 Bool isORR = False;
8838 Bool isBIC = False;
sewardj787a67f2014-06-23 09:09:41 +00008839 Bool isMOV = False;
8840 Bool isMVN = False;
8841 Bool isFMOV = False;
sewardjdf1628c2014-06-10 22:52:05 +00008842 switch (op_cmode) {
sewardj2b6fd5e2014-06-19 14:21:37 +00008843 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
sewardj2b6fd5e2014-06-19 14:21:37 +00008844 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
sewardj787a67f2014-06-23 09:09:41 +00008845 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
8846 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
8847 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
8848 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
8849 ok = True; isMOV = True; break;
sewardj2b6fd5e2014-06-19 14:21:37 +00008850
8851 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
8852 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
8853 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
8854 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
8855 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
8856 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
8857 ok = True; isORR = True; break;
8858
sewardj787a67f2014-06-23 09:09:41 +00008859 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
8860 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
8861 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
8862 ok = True; isMOV = True; break;
8863
8864 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
8865 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
8866 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
8867 ok = True; isORR = True; break;
8868
8869 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
8870 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
8871 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
8872 ok = True; isMOV = True; break;
8873
8874 /* -------- x,0,1110 MOVI 8-bit -------- */
8875 case BITS5(0,1,1,1,0):
8876 ok = True; isMOV = True; break;
8877
sewardj6a785df2015-02-09 09:07:47 +00008878 /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
8879 case BITS5(0,1,1,1,1): // 0:1111
8880 ok = True; isFMOV = True; break;
sewardj787a67f2014-06-23 09:09:41 +00008881
8882 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
8883 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
8884 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
8885 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
8886 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
8887 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
8888 ok = True; isMVN = True; break;
8889
sewardj2b6fd5e2014-06-19 14:21:37 +00008890 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
8891 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
8892 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
8893 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
8894 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
8895 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
8896 ok = True; isBIC = True; break;
8897
sewardj787a67f2014-06-23 09:09:41 +00008898 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
8899 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
8900 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
8901 ok = True; isMVN = True; break;
8902
8903 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
8904 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
8905 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
8906 ok = True; isBIC = True; break;
8907
8908 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
8909 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
8910 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
8911 ok = True; isMVN = True; break;
8912
8913 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
8914 /* -------- 1,1,1110 MOVI 64-bit vector -------- */
8915 case BITS5(1,1,1,1,0):
8916 ok = True; isMOV = True; break;
8917
sewardj6a785df2015-02-09 09:07:47 +00008918 /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
sewardj787a67f2014-06-23 09:09:41 +00008919 case BITS5(1,1,1,1,1): // 1:1111
8920 ok = bitQ == 1; isFMOV = True; break;
8921
sewardjdf1628c2014-06-10 22:52:05 +00008922 default:
8923 break;
8924 }
8925 if (ok) {
sewardj787a67f2014-06-23 09:09:41 +00008926 vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
8927 + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
sewardjdf1628c2014-06-10 22:52:05 +00008928 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
8929 }
8930 if (ok) {
sewardj2b6fd5e2014-06-19 14:21:37 +00008931 if (isORR || isBIC) {
8932 ULong inv
8933 = isORR ? 0ULL : ~0ULL;
8934 IRExpr* immV128
8935 = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
8936 IRExpr* res
8937 = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
sewardj2b6fd5e2014-06-19 14:21:37 +00008938 const HChar* nm = isORR ? "orr" : "bic";
8939 if (bitQ == 0) {
8940 putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
8941 DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
8942 } else {
8943 putQReg128(dd, res);
8944 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
8945 nameQReg128(dd), imm64lo, imm64lo);
8946 }
sewardj787a67f2014-06-23 09:09:41 +00008947 }
8948 else if (isMOV || isMVN || isFMOV) {
8949 if (isMVN) imm64lo = ~imm64lo;
8950 ULong imm64hi = bitQ == 0 ? 0 : imm64lo;
sewardj8e91fd42014-07-11 12:05:47 +00008951 IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
8952 mkU64(imm64lo));
sewardj2b6fd5e2014-06-19 14:21:37 +00008953 putQReg128(dd, immV128);
8954 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
8955 }
sewardjdf1628c2014-06-10 22:52:05 +00008956 return True;
8957 }
8958 /* else fall through */
8959
8960 return False;
8961# undef INSN
8962}
8963
8964
8965static
8966Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
8967{
sewardjab33a7a2014-06-19 22:20:47 +00008968 /* 31 28 20 15 14 10 9 4
8969 01 op 11110000 imm5 0 imm4 1 n d
8970 Decode fields: op,imm4
8971 */
sewardjdf1628c2014-06-10 22:52:05 +00008972# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjab33a7a2014-06-19 22:20:47 +00008973 if (INSN(31,30) != BITS2(0,1)
8974 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
8975 || INSN(15,15) != 0 || INSN(10,10) != 1) {
8976 return False;
8977 }
8978 UInt bitOP = INSN(29,29);
8979 UInt imm5 = INSN(20,16);
8980 UInt imm4 = INSN(14,11);
8981 UInt nn = INSN(9,5);
8982 UInt dd = INSN(4,0);
8983
8984 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
8985 /* -------- 0,0000 DUP (element, scalar) -------- */
8986 IRTemp w0 = newTemp(Ity_I64);
8987 const HChar* arTs = "??";
8988 IRType laneTy = Ity_INVALID;
8989 UInt laneNo = 16; /* invalid */
8990 if (imm5 & 1) {
8991 arTs = "b";
8992 laneNo = (imm5 >> 1) & 15;
8993 laneTy = Ity_I8;
8994 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
8995 }
8996 else if (imm5 & 2) {
8997 arTs = "h";
8998 laneNo = (imm5 >> 2) & 7;
8999 laneTy = Ity_I16;
9000 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
9001 }
9002 else if (imm5 & 4) {
9003 arTs = "s";
9004 laneNo = (imm5 >> 3) & 3;
9005 laneTy = Ity_I32;
9006 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
9007 }
9008 else if (imm5 & 8) {
9009 arTs = "d";
9010 laneNo = (imm5 >> 4) & 1;
9011 laneTy = Ity_I64;
9012 assign(w0, getQRegLane(nn, laneNo, laneTy));
9013 }
9014 else {
9015 /* invalid; leave laneTy unchanged. */
9016 }
9017 /* */
9018 if (laneTy != Ity_INVALID) {
9019 vassert(laneNo < 16);
9020 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
9021 DIP("dup %s, %s.%s[%u]\n",
9022 nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
9023 return True;
9024 }
9025 /* else fall through */
9026 }
9027
sewardjdf1628c2014-06-10 22:52:05 +00009028 return False;
9029# undef INSN
9030}
9031
sewardjfc83d2c2014-06-12 10:15:46 +00009032
sewardjdf1628c2014-06-10 22:52:05 +00009033static
9034Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn)
9035{
sewardjb9aff1e2014-06-15 21:55:33 +00009036 /* 31 28 23 21 16 11 9 4
9037 01 u 11110 sz 11000 opcode 10 n d
9038 Decode fields: u,sz,opcode
9039 */
sewardjdf1628c2014-06-10 22:52:05 +00009040# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjb9aff1e2014-06-15 21:55:33 +00009041 if (INSN(31,30) != BITS2(0,1)
9042 || INSN(28,24) != BITS5(1,1,1,1,0)
9043 || INSN(21,17) != BITS5(1,1,0,0,0)
9044 || INSN(11,10) != BITS2(1,0)) {
9045 return False;
9046 }
9047 UInt bitU = INSN(29,29);
9048 UInt sz = INSN(23,22);
9049 UInt opcode = INSN(16,12);
9050 UInt nn = INSN(9,5);
9051 UInt dd = INSN(4,0);
9052
9053 if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
9054 /* -------- 0,11,11011 ADDP d_2d -------- */
sewardj8e91fd42014-07-11 12:05:47 +00009055 IRTemp xy = newTempV128();
9056 IRTemp xx = newTempV128();
sewardjb9aff1e2014-06-15 21:55:33 +00009057 assign(xy, getQReg128(nn));
9058 assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
9059 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9060 binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
9061 DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
9062 return True;
9063 }
9064
sewardj76927e62014-11-17 11:21:21 +00009065 if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) {
9066 /* -------- 1,00,01101 ADDP s_2s -------- */
9067 /* -------- 1,01,01101 ADDP d_2d -------- */
9068 Bool isD = sz == X01;
9069 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9070 IROp opADD = mkVecADDF(isD ? 3 : 2);
9071 IRTemp src = newTempV128();
9072 IRTemp argL = newTempV128();
9073 IRTemp argR = newTempV128();
9074 assign(src, getQReg128(nn));
9075 assign(argL, unop(opZHI, mkexpr(src)));
9076 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9077 mkU8(isD ? 8 : 4))));
9078 putQReg128(dd, unop(opZHI,
9079 triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
9080 mkexpr(argL), mkexpr(argR))));
9081 DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn);
9082 return True;
9083 }
9084
sewardj5cb53e72015-02-08 12:08:56 +00009085 if (bitU == 1
9086 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
9087 /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
9088 /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
9089 /* -------- 1,0x,01111 FMAXP d_2d, s_2s -------- */
9090 /* -------- 1,1x,01111 FMINP d_2d, s_2s -------- */
9091 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9092 Bool isD = (sz & 1) == 1;
9093 Bool isMIN = (sz & 2) == 2;
9094 Bool isNM = opcode == BITS5(0,1,1,0,0);
9095 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9096 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
9097 IRTemp src = newTempV128();
9098 IRTemp argL = newTempV128();
9099 IRTemp argR = newTempV128();
9100 assign(src, getQReg128(nn));
9101 assign(argL, unop(opZHI, mkexpr(src)));
9102 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9103 mkU8(isD ? 8 : 4))));
9104 putQReg128(dd, unop(opZHI,
9105 binop(opMXX, mkexpr(argL), mkexpr(argR))));
9106 HChar c = isD ? 'd' : 's';
9107 DIP("%s%sp %c%u, v%u.2%c\n",
9108 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c);
9109 return True;
9110 }
9111
sewardjdf1628c2014-06-10 22:52:05 +00009112 return False;
9113# undef INSN
9114}
9115
sewardjfc83d2c2014-06-12 10:15:46 +00009116
sewardjdf1628c2014-06-10 22:52:05 +00009117static
9118Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
9119{
9120 /* 31 28 22 18 15 10 9 4
9121 01 u 111110 immh immb opcode 1 n d
9122 Decode fields: u,immh,opcode
9123 */
9124# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9125 if (INSN(31,30) != BITS2(0,1)
9126 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
9127 return False;
9128 }
9129 UInt bitU = INSN(29,29);
9130 UInt immh = INSN(22,19);
9131 UInt immb = INSN(18,16);
9132 UInt opcode = INSN(15,11);
9133 UInt nn = INSN(9,5);
9134 UInt dd = INSN(4,0);
9135 UInt immhb = (immh << 3) | immb;
9136
sewardja6b61f02014-08-17 18:32:14 +00009137 if ((immh & 8) == 8
9138 && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) {
9139 /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
9140 /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
9141 /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
9142 /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
9143 Bool isU = bitU == 1;
9144 Bool isAcc = opcode == BITS5(0,0,0,1,0);
9145 UInt sh = 128 - immhb;
sewardjfc83d2c2014-06-12 10:15:46 +00009146 vassert(sh >= 1 && sh <= 64);
sewardja6b61f02014-08-17 18:32:14 +00009147 IROp op = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
9148 IRExpr* src = getQReg128(nn);
9149 IRTemp shf = newTempV128();
9150 IRTemp res = newTempV128();
9151 if (sh == 64 && isU) {
9152 assign(shf, mkV128(0x0000));
9153 } else {
9154 UInt nudge = 0;
9155 if (sh == 64) {
9156 vassert(!isU);
9157 nudge = 1;
9158 }
9159 assign(shf, binop(op, src, mkU8(sh - nudge)));
9160 }
9161 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9162 : mkexpr(shf));
9163 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9164 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
9165 : (isU ? "ushr" : "sshr");
9166 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9167 return True;
9168 }
9169
9170 if ((immh & 8) == 8
9171 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) {
9172 /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
9173 /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
9174 /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
9175 /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
9176 Bool isU = bitU == 1;
9177 Bool isAcc = opcode == BITS5(0,0,1,1,0);
9178 UInt sh = 128 - immhb;
9179 vassert(sh >= 1 && sh <= 64);
9180 IROp op = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
9181 vassert(sh >= 1 && sh <= 64);
9182 IRExpr* src = getQReg128(nn);
9183 IRTemp imm8 = newTemp(Ity_I8);
9184 assign(imm8, mkU8((UChar)(-sh)));
9185 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
9186 IRTemp shf = newTempV128();
9187 IRTemp res = newTempV128();
9188 assign(shf, binop(op, src, amt));
9189 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9190 : mkexpr(shf));
9191 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9192 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
9193 : (isU ? "urshr" : "srshr");
9194 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
sewardjfc83d2c2014-06-12 10:15:46 +00009195 return True;
9196 }
9197
sewardj8e91fd42014-07-11 12:05:47 +00009198 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
9199 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
9200 UInt sh = 128 - immhb;
9201 vassert(sh >= 1 && sh <= 64);
9202 if (sh == 64) {
9203 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
9204 } else {
9205 /* sh is in range 1 .. 63 */
9206 ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
9207 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9208 IRTemp res = newTempV128();
9209 assign(res, binop(Iop_OrV128,
9210 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9211 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
9212 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9213 }
9214 DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
9215 return True;
9216 }
9217
sewardjacc29642014-08-15 05:35:35 +00009218 if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9219 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
9220 UInt sh = immhb - 64;
9221 vassert(sh >= 0 && sh < 64);
9222 putQReg128(dd,
9223 unop(Iop_ZeroHI64ofV128,
9224 sh == 0 ? getQReg128(nn)
9225 : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9226 DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
9227 return True;
9228 }
9229
sewardj8e91fd42014-07-11 12:05:47 +00009230 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9231 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
9232 UInt sh = immhb - 64;
9233 vassert(sh >= 0 && sh < 64);
9234 if (sh == 0) {
9235 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
9236 } else {
9237 /* sh is in range 1 .. 63 */
9238 ULong nmask = (1ULL << sh) - 1;
9239 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9240 IRTemp res = newTempV128();
9241 assign(res, binop(Iop_OrV128,
9242 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9243 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9244 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9245 }
9246 DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
9247 return True;
9248 }
9249
sewardjacc29642014-08-15 05:35:35 +00009250 if (opcode == BITS5(0,1,1,1,0)
9251 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
9252 /* -------- 0,01110 SQSHL #imm -------- */
9253 /* -------- 1,01110 UQSHL #imm -------- */
9254 /* -------- 1,01100 SQSHLU #imm -------- */
9255 UInt size = 0;
9256 UInt shift = 0;
9257 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9258 if (!ok) return False;
9259 vassert(size >= 0 && size <= 3);
9260 /* The shift encoding has opposite sign for the leftwards case.
9261 Adjust shift to compensate. */
9262 UInt lanebits = 8 << size;
9263 shift = lanebits - shift;
9264 vassert(shift >= 0 && shift < lanebits);
9265 const HChar* nm = NULL;
9266 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
9267 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
9268 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
9269 else vassert(0);
9270 IRTemp qDiff1 = IRTemp_INVALID;
9271 IRTemp qDiff2 = IRTemp_INVALID;
9272 IRTemp res = IRTemp_INVALID;
9273 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
9274 /* This relies on the fact that the zeroed out lanes generate zeroed
9275 result lanes and don't saturate, so there's no point in trimming
9276 the resulting res, qDiff1 or qDiff2 values. */
9277 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
9278 putQReg128(dd, mkexpr(res));
9279 updateQCFLAGwithDifference(qDiff1, qDiff2);
9280 const HChar arr = "bhsd"[size];
9281 DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
9282 return True;
9283 }
9284
sewardje741d162014-08-13 13:10:47 +00009285 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
9286 || (bitU == 1
9287 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
9288 /* -------- 0,10010 SQSHRN #imm -------- */
9289 /* -------- 1,10010 UQSHRN #imm -------- */
9290 /* -------- 0,10011 SQRSHRN #imm -------- */
9291 /* -------- 1,10011 UQRSHRN #imm -------- */
9292 /* -------- 1,10000 SQSHRUN #imm -------- */
9293 /* -------- 1,10001 SQRSHRUN #imm -------- */
9294 UInt size = 0;
9295 UInt shift = 0;
9296 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9297 if (!ok || size == X11) return False;
9298 vassert(size >= X00 && size <= X10);
9299 vassert(shift >= 1 && shift <= (8 << size));
9300 const HChar* nm = "??";
9301 IROp op = Iop_INVALID;
9302 /* Decide on the name and the operation. */
9303 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
9304 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
9305 }
9306 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9307 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
9308 }
9309 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
9310 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
9311 }
9312 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
9313 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
9314 }
9315 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
9316 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
9317 }
9318 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
9319 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
9320 }
9321 else vassert(0);
9322 /* Compute the result (Q, shifted value) pair. */
9323 IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
9324 IRTemp pair = newTempV128();
9325 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
9326 /* Update the result reg */
9327 IRTemp res64in128 = newTempV128();
9328 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
9329 putQReg128(dd, mkexpr(res64in128));
9330 /* Update the Q flag. */
9331 IRTemp q64q64 = newTempV128();
9332 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
9333 IRTemp z128 = newTempV128();
9334 assign(z128, mkV128(0x0000));
9335 updateQCFLAGwithDifference(q64q64, z128);
9336 /* */
9337 const HChar arrNarrow = "bhsd"[size];
9338 const HChar arrWide = "bhsd"[size+1];
9339 DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
9340 return True;
9341 }
9342
sewardjdf1628c2014-06-10 22:52:05 +00009343# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9344 return False;
9345# undef INSN
9346}
9347
sewardjfc83d2c2014-06-12 10:15:46 +00009348
sewardjdf1628c2014-06-10 22:52:05 +00009349static
9350Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
9351{
sewardj54ffa1d2014-07-22 09:27:49 +00009352 /* 31 29 28 23 21 20 15 11 9 4
9353 01 U 11110 size 1 m opcode 00 n d
9354 Decode fields: u,opcode
9355 */
sewardjdf1628c2014-06-10 22:52:05 +00009356# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj54ffa1d2014-07-22 09:27:49 +00009357 if (INSN(31,30) != BITS2(0,1)
9358 || INSN(28,24) != BITS5(1,1,1,1,0)
9359 || INSN(21,21) != 1
9360 || INSN(11,10) != BITS2(0,0)) {
9361 return False;
9362 }
9363 UInt bitU = INSN(29,29);
9364 UInt size = INSN(23,22);
9365 UInt mm = INSN(20,16);
9366 UInt opcode = INSN(15,12);
9367 UInt nn = INSN(9,5);
9368 UInt dd = INSN(4,0);
9369 vassert(size < 4);
9370
9371 if (bitU == 0
9372 && (opcode == BITS4(1,1,0,1)
9373 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
9374 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
9375 /* -------- 0,1001 SQDMLAL -------- */ // 1
9376 /* -------- 0,1011 SQDMLSL -------- */ // 2
9377 /* Widens, and size refers to the narrowed lanes. */
9378 UInt ks = 3;
9379 switch (opcode) {
9380 case BITS4(1,1,0,1): ks = 0; break;
9381 case BITS4(1,0,0,1): ks = 1; break;
9382 case BITS4(1,0,1,1): ks = 2; break;
9383 default: vassert(0);
9384 }
9385 vassert(ks >= 0 && ks <= 2);
9386 if (size == X00 || size == X11) return False;
9387 vassert(size <= 2);
9388 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
9389 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
9390 newTempsV128_3(&vecN, &vecM, &vecD);
9391 assign(vecN, getQReg128(nn));
9392 assign(vecM, getQReg128(mm));
9393 assign(vecD, getQReg128(dd));
9394 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
9395 False/*!is2*/, size, "mas"[ks],
9396 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
9397 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
9398 putQReg128(dd, unop(opZHI, mkexpr(res)));
9399 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
9400 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
9401 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
9402 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
9403 }
9404 const HChar* nm = ks == 0 ? "sqdmull"
9405 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
9406 const HChar arrNarrow = "bhsd"[size];
9407 const HChar arrWide = "bhsd"[size+1];
9408 DIP("%s %c%d, %c%d, %c%d\n",
9409 nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
9410 return True;
9411 }
9412
sewardjdf1628c2014-06-10 22:52:05 +00009413 return False;
9414# undef INSN
9415}
9416
9417
9418static
9419Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
9420{
9421 /* 31 29 28 23 21 20 15 10 9 4
9422 01 U 11110 size 1 m opcode 1 n d
sewardj51d012a2014-07-21 09:19:50 +00009423 Decode fields: u,size,opcode
sewardjdf1628c2014-06-10 22:52:05 +00009424 */
9425# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9426 if (INSN(31,30) != BITS2(0,1)
9427 || INSN(28,24) != BITS5(1,1,1,1,0)
9428 || INSN(21,21) != 1
9429 || INSN(10,10) != 1) {
9430 return False;
9431 }
9432 UInt bitU = INSN(29,29);
9433 UInt size = INSN(23,22);
9434 UInt mm = INSN(20,16);
9435 UInt opcode = INSN(15,11);
9436 UInt nn = INSN(9,5);
9437 UInt dd = INSN(4,0);
9438 vassert(size < 4);
9439
sewardj51d012a2014-07-21 09:19:50 +00009440 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
9441 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
9442 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
9443 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
9444 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
9445 Bool isADD = opcode == BITS5(0,0,0,0,1);
9446 Bool isU = bitU == 1;
9447 IROp qop = Iop_INVALID;
9448 IROp nop = Iop_INVALID;
9449 if (isADD) {
9450 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
9451 nop = mkVecADD(size);
9452 } else {
9453 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
9454 nop = mkVecSUB(size);
9455 }
9456 IRTemp argL = newTempV128();
9457 IRTemp argR = newTempV128();
9458 IRTemp qres = newTempV128();
9459 IRTemp nres = newTempV128();
9460 assign(argL, getQReg128(nn));
9461 assign(argR, getQReg128(mm));
9462 assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
sewardj257e99f2014-08-03 12:45:19 +00009463 size, binop(qop, mkexpr(argL), mkexpr(argR)))));
sewardj51d012a2014-07-21 09:19:50 +00009464 assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
sewardj257e99f2014-08-03 12:45:19 +00009465 size, binop(nop, mkexpr(argL), mkexpr(argR)))));
sewardj51d012a2014-07-21 09:19:50 +00009466 putQReg128(dd, mkexpr(qres));
9467 updateQCFLAGwithDifference(qres, nres);
9468 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
9469 : (isU ? "uqsub" : "sqsub");
9470 const HChar arr = "bhsd"[size];
sewardj12972182014-08-04 08:09:47 +00009471 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
sewardj51d012a2014-07-21 09:19:50 +00009472 return True;
9473 }
9474
sewardj2b6fd5e2014-06-19 14:21:37 +00009475 if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
9476 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
9477 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
9478 Bool isGT = bitU == 0;
9479 IRExpr* argL = getQReg128(nn);
9480 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00009481 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00009482 assign(res,
9483 isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
9484 : binop(Iop_CmpGT64Ux2, argL, argR));
9485 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9486 DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
9487 nameQRegLO(dd, Ity_I64),
9488 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9489 return True;
9490 }
9491
9492 if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
9493 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
9494 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
9495 Bool isGE = bitU == 0;
9496 IRExpr* argL = getQReg128(nn);
9497 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00009498 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00009499 assign(res,
9500 isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
9501 : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
9502 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9503 DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
9504 nameQRegLO(dd, Ity_I64),
9505 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9506 return True;
9507 }
9508
sewardja6b61f02014-08-17 18:32:14 +00009509 if (size == X11 && (opcode == BITS5(0,1,0,0,0)
9510 || opcode == BITS5(0,1,0,1,0))) {
9511 /* -------- 0,xx,01000 SSHL d_d_d -------- */
9512 /* -------- 0,xx,01010 SRSHL d_d_d -------- */
9513 /* -------- 1,xx,01000 USHL d_d_d -------- */
9514 /* -------- 1,xx,01010 URSHL d_d_d -------- */
9515 Bool isU = bitU == 1;
9516 Bool isR = opcode == BITS5(0,1,0,1,0);
9517 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
9518 : (isU ? mkVecSHU(size) : mkVecSHS(size));
9519 IRTemp res = newTempV128();
9520 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
9521 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9522 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
9523 : (isU ? "ushl" : "sshl");
9524 DIP("%s %s, %s, %s\n", nm,
9525 nameQRegLO(dd, Ity_I64),
9526 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9527 return True;
9528 }
9529
sewardj12972182014-08-04 08:09:47 +00009530 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
9531 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
9532 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
9533 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
9534 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
9535 Bool isU = bitU == 1;
9536 Bool isR = opcode == BITS5(0,1,0,1,1);
9537 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
9538 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
9539 /* This is a bit tricky. Since we're only interested in the lowest
9540 lane of the result, we zero out all the rest in the operands, so
9541 as to ensure that other lanes don't pollute the returned Q value.
9542 This works because it means, for the lanes we don't care about, we
9543 are shifting zero by zero, which can never saturate. */
9544 IRTemp res256 = newTemp(Ity_V256);
9545 IRTemp resSH = newTempV128();
9546 IRTemp resQ = newTempV128();
9547 IRTemp zero = newTempV128();
9548 assign(
9549 res256,
9550 binop(op,
9551 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
9552 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
9553 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
9554 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
9555 assign(zero, mkV128(0x0000));
9556 putQReg128(dd, mkexpr(resSH));
9557 updateQCFLAGwithDifference(resQ, zero);
9558 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
9559 : (isU ? "uqshl" : "sqshl");
9560 const HChar arr = "bhsd"[size];
9561 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
9562 return True;
9563 }
9564
sewardjdf1628c2014-06-10 22:52:05 +00009565 if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
9566 /* -------- 0,11,10000 ADD d_d_d -------- */
9567 /* -------- 1,11,10000 SUB d_d_d -------- */
9568 Bool isSUB = bitU == 1;
9569 IRTemp res = newTemp(Ity_I64);
9570 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
9571 getQRegLane(nn, 0, Ity_I64),
9572 getQRegLane(mm, 0, Ity_I64)));
9573 putQRegLane(dd, 0, mkexpr(res));
9574 putQRegLane(dd, 1, mkU64(0));
9575 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
9576 nameQRegLO(dd, Ity_I64),
9577 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9578 return True;
9579 }
9580
sewardj2b6fd5e2014-06-19 14:21:37 +00009581 if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
9582 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
9583 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
9584 Bool isEQ = bitU == 1;
9585 IRExpr* argL = getQReg128(nn);
9586 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00009587 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00009588 assign(res,
9589 isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
9590 : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
9591 binop(Iop_AndV128, argL, argR),
9592 mkV128(0x0000))));
9593 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9594 DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
9595 nameQRegLO(dd, Ity_I64),
9596 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9597 return True;
9598 }
9599
sewardj257e99f2014-08-03 12:45:19 +00009600 if (opcode == BITS5(1,0,1,1,0)) {
9601 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
9602 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
9603 if (size == X00 || size == X11) return False;
9604 Bool isR = bitU == 1;
9605 IRTemp res, sat1q, sat1n, vN, vM;
9606 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
9607 newTempsV128_2(&vN, &vM);
9608 assign(vN, getQReg128(nn));
9609 assign(vM, getQReg128(mm));
9610 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
9611 putQReg128(dd,
9612 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
9613 updateQCFLAGwithDifference(
9614 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
9615 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
9616 const HChar arr = "bhsd"[size];
9617 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
9618 DIP("%s %c%d, %c%d, %c%d\n", nm, arr, dd, arr, nn, arr, mm);
9619 return True;
9620 }
9621
sewardjdf1628c2014-06-10 22:52:05 +00009622 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
9623 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
9624 IRType ity = size == X11 ? Ity_F64 : Ity_F32;
9625 IRTemp res = newTemp(ity);
9626 assign(res, unop(mkABSF(ity),
9627 triop(mkSUBF(ity),
9628 mkexpr(mk_get_IR_rounding_mode()),
9629 getQRegLO(nn,ity), getQRegLO(mm,ity))));
9630 putQReg128(dd, mkV128(0x0000));
9631 putQRegLO(dd, mkexpr(res));
9632 DIP("fabd %s, %s, %s\n",
9633 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9634 return True;
9635 }
9636
sewardjee3db332015-02-08 18:24:38 +00009637 if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
9638 /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
9639 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
9640 IRType ity = size == X01 ? Ity_F64 : Ity_F32;
9641 IRTemp res = newTemp(ity);
9642 assign(res, triop(mkMULF(ity),
9643 mkexpr(mk_get_IR_rounding_mode()),
9644 getQRegLO(nn,ity), getQRegLO(mm,ity)));
9645 putQReg128(dd, mkV128(0x0000));
9646 putQRegLO(dd, mkexpr(res));
9647 DIP("fmulx %s, %s, %s\n",
9648 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9649 return True;
9650 }
9651
sewardj13830dc2015-02-07 21:09:47 +00009652 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
9653 /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
9654 /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
9655 Bool isD = size == X01;
9656 IRType ity = isD ? Ity_F64 : Ity_F32;
9657 Bool isGE = bitU == 1;
9658 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
9659 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
9660 IRTemp res = newTempV128();
9661 assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
9662 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
9663 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9664 mkexpr(res))));
9665 DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq",
9666 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9667 return True;
9668 }
9669
9670 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
9671 /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
9672 Bool isD = size == X11;
9673 IRType ity = isD ? Ity_F64 : Ity_F32;
9674 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
9675 IRTemp res = newTempV128();
9676 assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
9677 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9678 mkexpr(res))));
9679 DIP("%s %s, %s, %s\n", "fcmgt",
9680 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9681 return True;
9682 }
9683
9684 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
9685 /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
9686 /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
9687 Bool isD = (size & 1) == 1;
9688 IRType ity = isD ? Ity_F64 : Ity_F32;
9689 Bool isGT = (size & 2) == 2;
9690 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
9691 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
9692 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
9693 IRTemp res = newTempV128();
9694 assign(res, binop(opCMP, unop(opABS, getQReg128(mm)),
9695 unop(opABS, getQReg128(nn)))); // swapd
9696 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9697 mkexpr(res))));
9698 DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge",
9699 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9700 return True;
9701 }
9702
sewardj89cefe42015-02-24 12:21:01 +00009703 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
9704 /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */
9705 /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
9706 Bool isSQRT = (size & 2) == 2;
9707 Bool isD = (size & 1) == 1;
9708 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
9709 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
9710 IRTemp res = newTempV128();
9711 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
9712 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9713 mkexpr(res))));
9714 HChar c = isD ? 'd' : 's';
9715 DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps",
9716 c, dd, c, nn, c, mm);
9717 return True;
9718 }
9719
sewardjdf1628c2014-06-10 22:52:05 +00009720 return False;
9721# undef INSN
9722}
9723
9724
9725static
9726Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
9727{
9728 /* 31 29 28 23 21 16 11 9 4
9729 01 U 11110 size 10000 opcode 10 n d
sewardj8e91fd42014-07-11 12:05:47 +00009730 Decode fields: u,size,opcode
sewardjdf1628c2014-06-10 22:52:05 +00009731 */
9732# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9733 if (INSN(31,30) != BITS2(0,1)
9734 || INSN(28,24) != BITS5(1,1,1,1,0)
9735 || INSN(21,17) != BITS5(1,0,0,0,0)
9736 || INSN(11,10) != BITS2(1,0)) {
9737 return False;
9738 }
9739 UInt bitU = INSN(29,29);
9740 UInt size = INSN(23,22);
9741 UInt opcode = INSN(16,12);
9742 UInt nn = INSN(9,5);
9743 UInt dd = INSN(4,0);
9744 vassert(size < 4);
9745
sewardjf7003bc2014-08-18 12:28:02 +00009746 if (opcode == BITS5(0,0,0,1,1)) {
9747 /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
9748 /* -------- 1,xx,00011: USQADD std4_std4 -------- */
9749 /* These are a bit tricky (to say the least). See comments on
9750 the vector variants (in dis_AdvSIMD_two_reg_misc) below for
9751 details. */
9752 Bool isUSQADD = bitU == 1;
9753 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
9754 : mkVecQADDEXTUSSATSS(size);
9755 IROp nop = mkVecADD(size);
9756 IRTemp argL = newTempV128();
9757 IRTemp argR = newTempV128();
9758 assign(argL, getQReg128(nn));
9759 assign(argR, getQReg128(dd));
9760 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9761 size, binop(qop, mkexpr(argL), mkexpr(argR)));
9762 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9763 size, binop(nop, mkexpr(argL), mkexpr(argR)));
9764 putQReg128(dd, mkexpr(qres));
9765 updateQCFLAGwithDifference(qres, nres);
9766 const HChar arr = "bhsd"[size];
9767 DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
9768 return True;
9769 }
9770
sewardj51d012a2014-07-21 09:19:50 +00009771 if (opcode == BITS5(0,0,1,1,1)) {
sewardj8e91fd42014-07-11 12:05:47 +00009772 /* -------- 0,xx,00111 SQABS std4_std4 -------- */
sewardj51d012a2014-07-21 09:19:50 +00009773 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
9774 Bool isNEG = bitU == 1;
9775 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
9776 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
9777 getQReg128(nn), size );
sewardj257e99f2014-08-03 12:45:19 +00009778 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
9779 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
sewardj8e91fd42014-07-11 12:05:47 +00009780 putQReg128(dd, mkexpr(qres));
9781 updateQCFLAGwithDifference(qres, nres);
9782 const HChar arr = "bhsd"[size];
sewardj51d012a2014-07-21 09:19:50 +00009783 DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
sewardj8e91fd42014-07-11 12:05:47 +00009784 return True;
9785 }
9786
sewardj2b6fd5e2014-06-19 14:21:37 +00009787 if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
9788 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
9789 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
9790 Bool isGT = bitU == 0;
9791 IRExpr* argL = getQReg128(nn);
9792 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +00009793 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00009794 assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
9795 : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
9796 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9797 DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
9798 return True;
9799 }
9800
9801 if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
9802 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
9803 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
9804 Bool isEQ = bitU == 0;
9805 IRExpr* argL = getQReg128(nn);
9806 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +00009807 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00009808 assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
9809 : unop(Iop_NotV128,
9810 binop(Iop_CmpGT64Sx2, argL, argR)));
9811 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9812 DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
9813 return True;
9814 }
9815
9816 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
9817 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
sewardjdf1628c2014-06-10 22:52:05 +00009818 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
sewardj2b6fd5e2014-06-19 14:21:37 +00009819 binop(Iop_CmpGT64Sx2, mkV128(0x0000),
9820 getQReg128(nn))));
9821 DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
sewardjdf1628c2014-06-10 22:52:05 +00009822 return True;
9823 }
9824
sewardj25523c42014-06-15 19:36:29 +00009825 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
9826 /* -------- 0,11,01011 ABS d_d -------- */
9827 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9828 unop(Iop_Abs64x2, getQReg128(nn))));
9829 DIP("abs d%u, d%u\n", dd, nn);
9830 return True;
9831 }
9832
9833 if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
9834 /* -------- 1,11,01011 NEG d_d -------- */
9835 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9836 binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
9837 DIP("neg d%u, d%u\n", dd, nn);
9838 return True;
9839 }
9840
sewardj13830dc2015-02-07 21:09:47 +00009841 UInt ix = 0; /*INVALID*/
9842 if (size >= X10) {
9843 switch (opcode) {
9844 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
9845 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
9846 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
9847 default: break;
9848 }
9849 }
9850 if (ix > 0) {
9851 /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
9852 /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
9853 /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
9854 /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
9855 /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
9856 Bool isD = size == X11;
9857 IRType ity = isD ? Ity_F64 : Ity_F32;
9858 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
9859 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
9860 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
9861 IROp opCmp = Iop_INVALID;
9862 Bool swap = False;
9863 const HChar* nm = "??";
9864 switch (ix) {
9865 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
9866 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
9867 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
9868 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
9869 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
9870 default: vassert(0);
9871 }
9872 IRExpr* zero = mkV128(0x0000);
9873 IRTemp res = newTempV128();
9874 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
9875 : binop(opCmp, getQReg128(nn), zero));
9876 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9877 mkexpr(res))));
9878
9879 DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
9880 return True;
9881 }
9882
sewardjecedd982014-08-11 14:02:47 +00009883 if (opcode == BITS5(1,0,1,0,0)
9884 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
9885 /* -------- 0,xx,10100: SQXTN -------- */
9886 /* -------- 1,xx,10100: UQXTN -------- */
9887 /* -------- 1,xx,10010: SQXTUN -------- */
9888 if (size == X11) return False;
9889 vassert(size < 3);
9890 IROp opN = Iop_INVALID;
9891 Bool zWiden = True;
9892 const HChar* nm = "??";
9893 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
9894 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
9895 }
9896 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
9897 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
9898 }
9899 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9900 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
9901 }
9902 else vassert(0);
9903 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9904 size+1, getQReg128(nn));
9905 IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9906 size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
9907 putQReg128(dd, mkexpr(resN));
9908 /* This widens zero lanes to zero, and compares it against zero, so all
9909 of the non-participating lanes make no contribution to the
9910 Q flag state. */
9911 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
9912 size, mkexpr(resN));
9913 updateQCFLAGwithDifference(src, resW);
9914 const HChar arrNarrow = "bhsd"[size];
9915 const HChar arrWide = "bhsd"[size+1];
9916 DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
9917 return True;
9918 }
9919
sewardj400d6b92015-03-30 09:01:51 +00009920 ix = 0; /*INVALID*/
9921 switch (opcode) {
9922 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
9923 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
9924 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
9925 default: break;
9926 }
9927 if (ix > 0) {
9928 /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
9929 /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
9930 /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
9931 /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
9932 /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
9933 /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
9934 /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
9935 /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
9936 /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
9937 /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
sewardjbc0b7222015-03-30 18:49:38 +00009938 Bool isD = (size & 1) == 1;
9939 IRType tyF = isD ? Ity_F64 : Ity_F32;
9940 IRType tyI = isD ? Ity_I64 : Ity_I32;
sewardj400d6b92015-03-30 09:01:51 +00009941 IRRoundingMode irrm = 8; /*impossible*/
9942 HChar ch = '?';
9943 switch (ix) {
9944 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
9945 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
9946 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
9947 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
9948 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
9949 default: vassert(0);
9950 }
9951 IROp cvt = Iop_INVALID;
9952 if (bitU == 1) {
sewardjbc0b7222015-03-30 18:49:38 +00009953 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
sewardj400d6b92015-03-30 09:01:51 +00009954 } else {
sewardjbc0b7222015-03-30 18:49:38 +00009955 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
sewardj400d6b92015-03-30 09:01:51 +00009956 }
9957 IRTemp src = newTemp(tyF);
9958 IRTemp res = newTemp(tyI);
9959 assign(src, getQRegLane(nn, 0, tyF));
9960 assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
9961 putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
sewardjbc0b7222015-03-30 18:49:38 +00009962 if (!isD) {
sewardj400d6b92015-03-30 09:01:51 +00009963 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
9964 }
9965 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
sewardjbc0b7222015-03-30 18:49:38 +00009966 HChar sOrD = isD ? 'd' : 's';
sewardj400d6b92015-03-30 09:01:51 +00009967 DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
9968 sOrD, dd, sOrD, nn);
9969 return True;
9970 }
9971
sewardj89cefe42015-02-24 12:21:01 +00009972 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
9973 /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
9974 /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
9975 Bool isSQRT = bitU == 1;
9976 Bool isD = (size & 1) == 1;
9977 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
9978 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
9979 IRTemp resV = newTempV128();
9980 assign(resV, unop(op, getQReg128(nn)));
9981 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9982 mkexpr(resV))));
9983 HChar c = isD ? 'd' : 's';
9984 DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn);
9985 return True;
9986 }
9987
9988 if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
9989 /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */
9990 Bool isD = (size & 1) == 1;
9991 IRType ty = isD ? Ity_F64 : Ity_F32;
9992 IROp op = isD ? Iop_RecpExpF64 : Iop_RecpExpF32;
9993 IRTemp res = newTemp(ty);
9994 IRTemp rm = mk_get_IR_rounding_mode();
9995 assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty)));
9996 putQReg128(dd, mkV128(0x0000));
9997 putQRegLane(dd, 0, mkexpr(res));
9998 HChar c = isD ? 'd' : 's';
9999 DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn);
10000 return True;
10001 }
10002
sewardjdf1628c2014-06-10 22:52:05 +000010003 return False;
10004# undef INSN
10005}
10006
sewardjfc83d2c2014-06-12 10:15:46 +000010007
sewardjdf1628c2014-06-10 22:52:05 +000010008static
10009Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
10010{
sewardj54ffa1d2014-07-22 09:27:49 +000010011 /* 31 28 23 21 20 19 15 11 9 4
10012 01 U 11111 size L M m opcode H 0 n d
10013 Decode fields are: u,size,opcode
10014 M is really part of the mm register number. Individual
10015 cases need to inspect L and H though.
10016 */
sewardjdf1628c2014-06-10 22:52:05 +000010017# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj54ffa1d2014-07-22 09:27:49 +000010018 if (INSN(31,30) != BITS2(0,1)
10019 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) {
10020 return False;
10021 }
10022 UInt bitU = INSN(29,29);
10023 UInt size = INSN(23,22);
10024 UInt bitL = INSN(21,21);
10025 UInt bitM = INSN(20,20);
10026 UInt mmLO4 = INSN(19,16);
10027 UInt opcode = INSN(15,12);
10028 UInt bitH = INSN(11,11);
10029 UInt nn = INSN(9,5);
10030 UInt dd = INSN(4,0);
10031 vassert(size < 4);
10032 vassert(bitH < 2 && bitM < 2 && bitL < 2);
10033
sewardjee3db332015-02-08 18:24:38 +000010034 if (bitU == 0 && size >= X10
10035 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
10036 /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
10037 /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
10038 Bool isD = (size & 1) == 1;
10039 Bool isSUB = opcode == BITS4(0,1,0,1);
10040 UInt index;
10041 if (!isD) index = (bitH << 1) | bitL;
10042 else if (isD && bitL == 0) index = bitH;
10043 else return False; // sz:L == x11 => unallocated encoding
10044 vassert(index < (isD ? 2 : 4));
10045 IRType ity = isD ? Ity_F64 : Ity_F32;
10046 IRTemp elem = newTemp(ity);
10047 UInt mm = (bitM << 4) | mmLO4;
10048 assign(elem, getQRegLane(mm, index, ity));
10049 IRTemp dupd = math_DUP_TO_V128(elem, ity);
10050 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
10051 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
10052 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10053 IRTemp rm = mk_get_IR_rounding_mode();
10054 IRTemp t1 = newTempV128();
10055 IRTemp t2 = newTempV128();
10056 // FIXME: double rounding; use FMA primops instead
10057 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10058 assign(t2, triop(isSUB ? opSUB : opADD,
10059 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
10060 putQReg128(dd,
10061 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10062 mkexpr(t2))));
10063 const HChar c = isD ? 'd' : 's';
10064 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
10065 c, dd, c, nn, nameQReg128(mm), c, index);
10066 return True;
10067 }
10068
10069 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
10070 /* -------- 0,1x,1001 FMUL d_d_d[], s_s_s[] -------- */
10071 /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
10072 Bool isD = (size & 1) == 1;
10073 Bool isMULX = bitU == 1;
10074 UInt index;
10075 if (!isD) index = (bitH << 1) | bitL;
10076 else if (isD && bitL == 0) index = bitH;
10077 else return False; // sz:L == x11 => unallocated encoding
10078 vassert(index < (isD ? 2 : 4));
10079 IRType ity = isD ? Ity_F64 : Ity_F32;
10080 IRTemp elem = newTemp(ity);
10081 UInt mm = (bitM << 4) | mmLO4;
10082 assign(elem, getQRegLane(mm, index, ity));
10083 IRTemp dupd = math_DUP_TO_V128(elem, ity);
10084 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10085 IRTemp rm = mk_get_IR_rounding_mode();
10086 IRTemp t1 = newTempV128();
10087 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10088 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10089 putQReg128(dd,
10090 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10091 mkexpr(t1))));
10092 const HChar c = isD ? 'd' : 's';
10093 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul",
10094 c, dd, c, nn, nameQReg128(mm), c, index);
10095 return True;
10096 }
10097
sewardj54ffa1d2014-07-22 09:27:49 +000010098 if (bitU == 0
10099 && (opcode == BITS4(1,0,1,1)
10100 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
10101 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
10102 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
10103 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
10104 /* Widens, and size refers to the narrowed lanes. */
10105 UInt ks = 3;
10106 switch (opcode) {
10107 case BITS4(1,0,1,1): ks = 0; break;
10108 case BITS4(0,0,1,1): ks = 1; break;
10109 case BITS4(0,1,1,1): ks = 2; break;
10110 default: vassert(0);
10111 }
10112 vassert(ks >= 0 && ks <= 2);
10113 UInt mm = 32; // invalid
10114 UInt ix = 16; // invalid
10115 switch (size) {
10116 case X00:
10117 return False; // h_b_b[] case is not allowed
10118 case X01:
10119 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10120 case X10:
10121 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10122 case X11:
10123 return False; // q_d_d[] case is not allowed
10124 default:
10125 vassert(0);
10126 }
10127 vassert(mm < 32 && ix < 16);
10128 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
10129 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10130 newTempsV128_2(&vecN, &vecD);
10131 assign(vecN, getQReg128(nn));
10132 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10133 assign(vecD, getQReg128(dd));
10134 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10135 False/*!is2*/, size, "mas"[ks],
10136 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10137 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
10138 putQReg128(dd, unop(opZHI, mkexpr(res)));
10139 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10140 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10141 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10142 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
10143 }
10144 const HChar* nm = ks == 0 ? "sqmull"
10145 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10146 const HChar arrNarrow = "bhsd"[size];
10147 const HChar arrWide = "bhsd"[size+1];
10148 DIP("%s %c%d, %c%d, v%d.%c[%u]\n",
10149 nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
10150 return True;
10151 }
10152
sewardj257e99f2014-08-03 12:45:19 +000010153 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
10154 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
10155 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
10156 UInt mm = 32; // invalid
10157 UInt ix = 16; // invalid
10158 switch (size) {
10159 case X00:
10160 return False; // b case is not allowed
10161 case X01:
10162 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10163 case X10:
10164 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10165 case X11:
10166 return False; // q case is not allowed
10167 default:
10168 vassert(0);
10169 }
10170 vassert(mm < 32 && ix < 16);
10171 Bool isR = opcode == BITS4(1,1,0,1);
10172 IRTemp res, sat1q, sat1n, vN, vM;
10173 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10174 vN = newTempV128();
10175 assign(vN, getQReg128(nn));
10176 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10177 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10178 IROp opZHI = mkVecZEROHIxxOFV128(size);
10179 putQReg128(dd, unop(opZHI, mkexpr(res)));
10180 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10181 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
10182 HChar ch = size == X01 ? 'h' : 's';
10183 DIP("%s %c%d, %c%d, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, dd, ix);
10184 return True;
10185 }
10186
sewardjdf1628c2014-06-10 22:52:05 +000010187 return False;
10188# undef INSN
10189}
10190
sewardjfc83d2c2014-06-12 10:15:46 +000010191
sewardjdf1628c2014-06-10 22:52:05 +000010192static
10193Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
10194{
10195 /* 31 28 22 18 15 10 9 4
10196 0 q u 011110 immh immb opcode 1 n d
10197 Decode fields: u,opcode
10198 */
10199# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10200 if (INSN(31,31) != 0
10201 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
10202 return False;
10203 }
10204 UInt bitQ = INSN(30,30);
10205 UInt bitU = INSN(29,29);
10206 UInt immh = INSN(22,19);
10207 UInt immb = INSN(18,16);
10208 UInt opcode = INSN(15,11);
10209 UInt nn = INSN(9,5);
10210 UInt dd = INSN(4,0);
10211
sewardja6b61f02014-08-17 18:32:14 +000010212 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) {
sewardjdf1628c2014-06-10 22:52:05 +000010213 /* -------- 0,00000 SSHR std7_std7_#imm -------- */
10214 /* -------- 1,00000 USHR std7_std7_#imm -------- */
sewardja6b61f02014-08-17 18:32:14 +000010215 /* -------- 0,00010 SSRA std7_std7_#imm -------- */
10216 /* -------- 1,00010 USRA std7_std7_#imm -------- */
sewardjdf1628c2014-06-10 22:52:05 +000010217 /* laneTy, shift = case immh:immb of
10218 0001:xxx -> B, SHR:8-xxx
10219 001x:xxx -> H, SHR:16-xxxx
10220 01xx:xxx -> S, SHR:32-xxxxx
10221 1xxx:xxx -> D, SHR:64-xxxxxx
10222 other -> invalid
10223 */
sewardjdf1628c2014-06-10 22:52:05 +000010224 UInt size = 0;
10225 UInt shift = 0;
10226 Bool isQ = bitQ == 1;
10227 Bool isU = bitU == 1;
sewardja6b61f02014-08-17 18:32:14 +000010228 Bool isAcc = opcode == BITS5(0,0,0,1,0);
sewardjdf1628c2014-06-10 22:52:05 +000010229 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
sewardj8e91fd42014-07-11 12:05:47 +000010230 if (!ok || (bitQ == 0 && size == X11)) return False;
sewardjdf1628c2014-06-10 22:52:05 +000010231 vassert(size >= 0 && size <= 3);
sewardj8e91fd42014-07-11 12:05:47 +000010232 UInt lanebits = 8 << size;
10233 vassert(shift >= 1 && shift <= lanebits);
10234 IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size);
10235 IRExpr* src = getQReg128(nn);
sewardja6b61f02014-08-17 18:32:14 +000010236 IRTemp shf = newTempV128();
sewardj8e91fd42014-07-11 12:05:47 +000010237 IRTemp res = newTempV128();
10238 if (shift == lanebits && isU) {
sewardja6b61f02014-08-17 18:32:14 +000010239 assign(shf, mkV128(0x0000));
sewardj8e91fd42014-07-11 12:05:47 +000010240 } else {
10241 UInt nudge = 0;
10242 if (shift == lanebits) {
10243 vassert(!isU);
10244 nudge = 1;
10245 }
sewardja6b61f02014-08-17 18:32:14 +000010246 assign(shf, binop(op, src, mkU8(shift - nudge)));
sewardjdf1628c2014-06-10 22:52:05 +000010247 }
sewardja6b61f02014-08-17 18:32:14 +000010248 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10249 : mkexpr(shf));
sewardj8e91fd42014-07-11 12:05:47 +000010250 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10251 HChar laneCh = "bhsd"[size];
10252 UInt nLanes = (isQ ? 128 : 64) / lanebits;
sewardja6b61f02014-08-17 18:32:14 +000010253 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
10254 : (isU ? "ushr" : "sshr");
10255 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10256 nameQReg128(dd), nLanes, laneCh,
10257 nameQReg128(nn), nLanes, laneCh, shift);
10258 return True;
10259 }
10260
10261 if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) {
10262 /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
10263 /* -------- 1,00100 URSHR std7_std7_#imm -------- */
10264 /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
10265 /* -------- 1,00110 URSRA std7_std7_#imm -------- */
10266 /* laneTy, shift = case immh:immb of
10267 0001:xxx -> B, SHR:8-xxx
10268 001x:xxx -> H, SHR:16-xxxx
10269 01xx:xxx -> S, SHR:32-xxxxx
10270 1xxx:xxx -> D, SHR:64-xxxxxx
10271 other -> invalid
10272 */
10273 UInt size = 0;
10274 UInt shift = 0;
10275 Bool isQ = bitQ == 1;
10276 Bool isU = bitU == 1;
10277 Bool isAcc = opcode == BITS5(0,0,1,1,0);
10278 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10279 if (!ok || (bitQ == 0 && size == X11)) return False;
10280 vassert(size >= 0 && size <= 3);
10281 UInt lanebits = 8 << size;
10282 vassert(shift >= 1 && shift <= lanebits);
10283 IROp op = isU ? mkVecRSHU(size) : mkVecRSHS(size);
10284 IRExpr* src = getQReg128(nn);
10285 IRTemp imm8 = newTemp(Ity_I8);
10286 assign(imm8, mkU8((UChar)(-shift)));
10287 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
10288 IRTemp shf = newTempV128();
10289 IRTemp res = newTempV128();
10290 assign(shf, binop(op, src, amt));
10291 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10292 : mkexpr(shf));
10293 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10294 HChar laneCh = "bhsd"[size];
10295 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10296 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
10297 : (isU ? "urshr" : "srshr");
sewardj8e91fd42014-07-11 12:05:47 +000010298 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10299 nameQReg128(dd), nLanes, laneCh,
10300 nameQReg128(nn), nLanes, laneCh, shift);
10301 return True;
sewardjdf1628c2014-06-10 22:52:05 +000010302 }
10303
sewardj8e91fd42014-07-11 12:05:47 +000010304 if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
10305 /* -------- 1,01000 SRI std7_std7_#imm -------- */
10306 /* laneTy, shift = case immh:immb of
10307 0001:xxx -> B, SHR:8-xxx
10308 001x:xxx -> H, SHR:16-xxxx
10309 01xx:xxx -> S, SHR:32-xxxxx
10310 1xxx:xxx -> D, SHR:64-xxxxxx
10311 other -> invalid
10312 */
10313 UInt size = 0;
10314 UInt shift = 0;
10315 Bool isQ = bitQ == 1;
10316 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10317 if (!ok || (bitQ == 0 && size == X11)) return False;
10318 vassert(size >= 0 && size <= 3);
10319 UInt lanebits = 8 << size;
10320 vassert(shift >= 1 && shift <= lanebits);
10321 IRExpr* src = getQReg128(nn);
10322 IRTemp res = newTempV128();
10323 if (shift == lanebits) {
10324 assign(res, getQReg128(dd));
10325 } else {
10326 assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
10327 IRExpr* nmask = binop(mkVecSHLN(size),
10328 mkV128(0xFFFF), mkU8(lanebits - shift));
10329 IRTemp tmp = newTempV128();
10330 assign(tmp, binop(Iop_OrV128,
10331 mkexpr(res),
10332 binop(Iop_AndV128, getQReg128(dd), nmask)));
10333 res = tmp;
10334 }
10335 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10336 HChar laneCh = "bhsd"[size];
10337 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10338 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
10339 nameQReg128(dd), nLanes, laneCh,
10340 nameQReg128(nn), nLanes, laneCh, shift);
10341 return True;
10342 }
10343
10344 if (opcode == BITS5(0,1,0,1,0)) {
sewardjdf1628c2014-06-10 22:52:05 +000010345 /* -------- 0,01010 SHL std7_std7_#imm -------- */
sewardj8e91fd42014-07-11 12:05:47 +000010346 /* -------- 1,01010 SLI std7_std7_#imm -------- */
sewardjdf1628c2014-06-10 22:52:05 +000010347 /* laneTy, shift = case immh:immb of
10348 0001:xxx -> B, xxx
10349 001x:xxx -> H, xxxx
10350 01xx:xxx -> S, xxxxx
10351 1xxx:xxx -> D, xxxxxx
10352 other -> invalid
10353 */
sewardjdf1628c2014-06-10 22:52:05 +000010354 UInt size = 0;
10355 UInt shift = 0;
sewardj8e91fd42014-07-11 12:05:47 +000010356 Bool isSLI = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +000010357 Bool isQ = bitQ == 1;
10358 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
sewardj8e91fd42014-07-11 12:05:47 +000010359 if (!ok || (bitQ == 0 && size == X11)) return False;
sewardjdf1628c2014-06-10 22:52:05 +000010360 vassert(size >= 0 && size <= 3);
10361 /* The shift encoding has opposite sign for the leftwards case.
10362 Adjust shift to compensate. */
sewardj8e91fd42014-07-11 12:05:47 +000010363 UInt lanebits = 8 << size;
10364 shift = lanebits - shift;
10365 vassert(shift >= 0 && shift < lanebits);
10366 IROp op = mkVecSHLN(size);
10367 IRExpr* src = getQReg128(nn);
10368 IRTemp res = newTempV128();
10369 if (shift == 0) {
10370 assign(res, src);
10371 } else {
sewardjdf9d6d52014-06-27 10:43:22 +000010372 assign(res, binop(op, src, mkU8(shift)));
sewardj8e91fd42014-07-11 12:05:47 +000010373 if (isSLI) {
10374 IRExpr* nmask = binop(mkVecSHRN(size),
10375 mkV128(0xFFFF), mkU8(lanebits - shift));
10376 IRTemp tmp = newTempV128();
10377 assign(tmp, binop(Iop_OrV128,
10378 mkexpr(res),
10379 binop(Iop_AndV128, getQReg128(dd), nmask)));
10380 res = tmp;
10381 }
sewardjdf1628c2014-06-10 22:52:05 +000010382 }
sewardj8e91fd42014-07-11 12:05:47 +000010383 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10384 HChar laneCh = "bhsd"[size];
10385 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10386 const HChar* nm = isSLI ? "sli" : "shl";
10387 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10388 nameQReg128(dd), nLanes, laneCh,
10389 nameQReg128(nn), nLanes, laneCh, shift);
10390 return True;
sewardjdf1628c2014-06-10 22:52:05 +000010391 }
10392
sewardja97dddf2014-08-14 22:26:52 +000010393 if (opcode == BITS5(0,1,1,1,0)
10394 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
10395 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
10396 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
10397 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
10398 UInt size = 0;
10399 UInt shift = 0;
10400 Bool isQ = bitQ == 1;
10401 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10402 if (!ok || (bitQ == 0 && size == X11)) return False;
10403 vassert(size >= 0 && size <= 3);
10404 /* The shift encoding has opposite sign for the leftwards case.
10405 Adjust shift to compensate. */
10406 UInt lanebits = 8 << size;
10407 shift = lanebits - shift;
10408 vassert(shift >= 0 && shift < lanebits);
10409 const HChar* nm = NULL;
10410 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
10411 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
10412 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
10413 else vassert(0);
10414 IRTemp qDiff1 = IRTemp_INVALID;
10415 IRTemp qDiff2 = IRTemp_INVALID;
10416 IRTemp res = IRTemp_INVALID;
10417 IRTemp src = newTempV128();
10418 assign(src, getQReg128(nn));
10419 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
10420 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10421 updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
sewardjacc29642014-08-15 05:35:35 +000010422 isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
sewardja97dddf2014-08-14 22:26:52 +000010423 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10424 DIP("%s %s.%s, %s.%s, #%u\n", nm,
10425 nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
10426 return True;
10427 }
10428
sewardj487559e2014-07-10 14:22:45 +000010429 if (bitU == 0
10430 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
10431 /* -------- 0,10000 SHRN{,2} #imm -------- */
10432 /* -------- 0,10001 RSHRN{,2} #imm -------- */
10433 /* Narrows, and size is the narrow size. */
10434 UInt size = 0;
10435 UInt shift = 0;
10436 Bool is2 = bitQ == 1;
10437 Bool isR = opcode == BITS5(1,0,0,0,1);
10438 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10439 if (!ok || size == X11) return False;
10440 vassert(shift >= 1);
sewardj8e91fd42014-07-11 12:05:47 +000010441 IRTemp t1 = newTempV128();
10442 IRTemp t2 = newTempV128();
10443 IRTemp t3 = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000010444 assign(t1, getQReg128(nn));
10445 assign(t2, isR ? binop(mkVecADD(size+1),
10446 mkexpr(t1),
10447 mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
10448 : mkexpr(t1));
10449 assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
10450 IRTemp t4 = math_NARROW_LANES(t3, t3, size);
10451 putLO64andZUorPutHI64(is2, dd, t4);
10452 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10453 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10454 DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
10455 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10456 return True;
10457 }
10458
sewardjecedd982014-08-11 14:02:47 +000010459 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
10460 || (bitU == 1
10461 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
10462 /* -------- 0,10010 SQSHRN{,2} #imm -------- */
10463 /* -------- 1,10010 UQSHRN{,2} #imm -------- */
10464 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
10465 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
10466 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
10467 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
10468 UInt size = 0;
10469 UInt shift = 0;
10470 Bool is2 = bitQ == 1;
10471 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10472 if (!ok || size == X11) return False;
10473 vassert(shift >= 1 && shift <= (8 << size));
10474 const HChar* nm = "??";
10475 IROp op = Iop_INVALID;
10476 /* Decide on the name and the operation. */
10477 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
10478 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
10479 }
10480 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10481 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
10482 }
10483 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
10484 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
10485 }
10486 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
10487 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
10488 }
10489 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
10490 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
10491 }
10492 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
10493 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
10494 }
10495 else vassert(0);
10496 /* Compute the result (Q, shifted value) pair. */
10497 IRTemp src128 = newTempV128();
10498 assign(src128, getQReg128(nn));
10499 IRTemp pair = newTempV128();
10500 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
10501 /* Update the result reg */
10502 IRTemp res64in128 = newTempV128();
10503 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
10504 putLO64andZUorPutHI64(is2, dd, res64in128);
10505 /* Update the Q flag. */
10506 IRTemp q64q64 = newTempV128();
10507 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
10508 IRTemp z128 = newTempV128();
10509 assign(z128, mkV128(0x0000));
10510 updateQCFLAGwithDifference(q64q64, z128);
10511 /* */
10512 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10513 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10514 DIP("%s %s.%s, %s.%s, #%u\n", nm,
10515 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10516 return True;
10517 }
10518
sewardjdf1628c2014-06-10 22:52:05 +000010519 if (opcode == BITS5(1,0,1,0,0)) {
10520 /* -------- 0,10100 SSHLL{,2} #imm -------- */
10521 /* -------- 1,10100 USHLL{,2} #imm -------- */
10522 /* 31 28 22 18 15 9 4
10523 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
10524 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
10525 where Ta,Tb,sh
10526 = case immh of 1xxx -> invalid
10527 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
10528 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
10529 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
10530 0000 -> AdvSIMD modified immediate (???)
10531 */
10532 Bool isQ = bitQ == 1;
10533 Bool isU = bitU == 1;
10534 UInt immhb = (immh << 3) | immb;
sewardj8e91fd42014-07-11 12:05:47 +000010535 IRTemp src = newTempV128();
10536 IRTemp zero = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000010537 IRExpr* res = NULL;
10538 UInt sh = 0;
10539 const HChar* ta = "??";
10540 const HChar* tb = "??";
10541 assign(src, getQReg128(nn));
10542 assign(zero, mkV128(0x0000));
10543 if (immh & 8) {
10544 /* invalid; don't assign to res */
10545 }
10546 else if (immh & 4) {
10547 sh = immhb - 32;
10548 vassert(sh < 32); /* so 32-sh is 1..32 */
10549 ta = "2d";
10550 tb = isQ ? "4s" : "2s";
10551 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
10552 : mk_InterleaveLO32x4(src, zero);
10553 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
10554 }
10555 else if (immh & 2) {
10556 sh = immhb - 16;
10557 vassert(sh < 16); /* so 16-sh is 1..16 */
10558 ta = "4s";
10559 tb = isQ ? "8h" : "4h";
10560 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
10561 : mk_InterleaveLO16x8(src, zero);
10562 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
10563 }
10564 else if (immh & 1) {
10565 sh = immhb - 8;
10566 vassert(sh < 8); /* so 8-sh is 1..8 */
10567 ta = "8h";
10568 tb = isQ ? "16b" : "8b";
10569 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
10570 : mk_InterleaveLO8x16(src, zero);
10571 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
10572 } else {
10573 vassert(immh == 0);
10574 /* invalid; don't assign to res */
10575 }
10576 /* */
10577 if (res) {
10578 putQReg128(dd, res);
10579 DIP("%cshll%s %s.%s, %s.%s, #%d\n",
10580 isU ? 'u' : 's', isQ ? "2" : "",
10581 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
10582 return True;
10583 }
10584 return False;
10585 }
10586
10587# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10588 return False;
10589# undef INSN
10590}
10591
sewardjfc83d2c2014-06-12 10:15:46 +000010592
sewardjdf1628c2014-06-10 22:52:05 +000010593static
10594Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
10595{
sewardj25523c42014-06-15 19:36:29 +000010596 /* 31 30 29 28 23 21 20 15 11 9 4
10597 0 Q U 01110 size 1 m opcode 00 n d
10598 Decode fields: u,opcode
10599 */
sewardjdf1628c2014-06-10 22:52:05 +000010600# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj25523c42014-06-15 19:36:29 +000010601 if (INSN(31,31) != 0
10602 || INSN(28,24) != BITS5(0,1,1,1,0)
10603 || INSN(21,21) != 1
10604 || INSN(11,10) != BITS2(0,0)) {
10605 return False;
10606 }
10607 UInt bitQ = INSN(30,30);
10608 UInt bitU = INSN(29,29);
10609 UInt size = INSN(23,22);
10610 UInt mm = INSN(20,16);
10611 UInt opcode = INSN(15,12);
10612 UInt nn = INSN(9,5);
10613 UInt dd = INSN(4,0);
10614 vassert(size < 4);
10615 Bool is2 = bitQ == 1;
10616
sewardj6f312d02014-06-28 12:21:37 +000010617 if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
10618 /* -------- 0,0000 SADDL{2} -------- */
10619 /* -------- 1,0000 UADDL{2} -------- */
10620 /* -------- 0,0010 SSUBL{2} -------- */
10621 /* -------- 1,0010 USUBL{2} -------- */
10622 /* Widens, and size refers to the narrowed lanes. */
sewardj6f312d02014-06-28 12:21:37 +000010623 if (size == X11) return False;
10624 vassert(size <= 2);
10625 Bool isU = bitU == 1;
10626 Bool isADD = opcode == BITS4(0,0,0,0);
sewardja5a6b752014-06-30 07:33:56 +000010627 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
10628 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +000010629 IRTemp res = newTempV128();
sewardj54ffa1d2014-07-22 09:27:49 +000010630 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
sewardj6f312d02014-06-28 12:21:37 +000010631 mkexpr(argL), mkexpr(argR)));
10632 putQReg128(dd, mkexpr(res));
10633 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10634 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10635 const HChar* nm = isADD ? (isU ? "uaddl" : "saddl")
10636 : (isU ? "usubl" : "ssubl");
10637 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10638 nameQReg128(dd), arrWide,
10639 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10640 return True;
10641 }
10642
sewardja5a6b752014-06-30 07:33:56 +000010643 if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
10644 /* -------- 0,0001 SADDW{2} -------- */
10645 /* -------- 1,0001 UADDW{2} -------- */
10646 /* -------- 0,0011 SSUBW{2} -------- */
10647 /* -------- 1,0011 USUBW{2} -------- */
10648 /* Widens, and size refers to the narrowed lanes. */
10649 if (size == X11) return False;
10650 vassert(size <= 2);
10651 Bool isU = bitU == 1;
10652 Bool isADD = opcode == BITS4(0,0,0,1);
10653 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +000010654 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000010655 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
10656 getQReg128(nn), mkexpr(argR)));
10657 putQReg128(dd, mkexpr(res));
10658 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10659 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10660 const HChar* nm = isADD ? (isU ? "uaddw" : "saddw")
10661 : (isU ? "usubw" : "ssubw");
10662 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10663 nameQReg128(dd), arrWide,
10664 nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
10665 return True;
10666 }
10667
sewardj25523c42014-06-15 19:36:29 +000010668 if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
10669 /* -------- 0,0100 ADDHN{2} -------- */
10670 /* -------- 1,0100 RADDHN{2} -------- */
10671 /* -------- 0,0110 SUBHN{2} -------- */
10672 /* -------- 1,0110 RSUBHN{2} -------- */
10673 /* Narrows, and size refers to the narrowed lanes. */
10674 if (size == X11) return False;
10675 vassert(size <= 2);
sewardj487559e2014-07-10 14:22:45 +000010676 const UInt shift[3] = { 8, 16, 32 };
sewardj25523c42014-06-15 19:36:29 +000010677 Bool isADD = opcode == BITS4(0,1,0,0);
10678 Bool isR = bitU == 1;
10679 /* Combined elements in wide lanes */
sewardj8e91fd42014-07-11 12:05:47 +000010680 IRTemp wide = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000010681 IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
sewardj25523c42014-06-15 19:36:29 +000010682 getQReg128(nn), getQReg128(mm));
10683 if (isR) {
sewardj487559e2014-07-10 14:22:45 +000010684 wideE = binop(mkVecADD(size+1),
10685 wideE,
10686 mkexpr(math_VEC_DUP_IMM(size+1,
10687 1ULL << (shift[size]-1))));
sewardj25523c42014-06-15 19:36:29 +000010688 }
10689 assign(wide, wideE);
10690 /* Top halves of elements, still in wide lanes */
sewardj8e91fd42014-07-11 12:05:47 +000010691 IRTemp shrd = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000010692 assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
sewardj25523c42014-06-15 19:36:29 +000010693 /* Elements now compacted into lower 64 bits */
sewardj8e91fd42014-07-11 12:05:47 +000010694 IRTemp new64 = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000010695 assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
sewardj25523c42014-06-15 19:36:29 +000010696 putLO64andZUorPutHI64(is2, dd, new64);
10697 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10698 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10699 const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
10700 : (isR ? "rsubhn" : "subhn");
10701 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10702 nameQReg128(dd), arrNarrow,
10703 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
10704 return True;
10705 }
10706
sewardj6f312d02014-06-28 12:21:37 +000010707 if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
10708 /* -------- 0,0101 SABAL{2} -------- */
10709 /* -------- 1,0101 UABAL{2} -------- */
10710 /* -------- 0,0111 SABDL{2} -------- */
10711 /* -------- 1,0111 UABDL{2} -------- */
10712 /* Widens, and size refers to the narrowed lanes. */
sewardj6f312d02014-06-28 12:21:37 +000010713 if (size == X11) return False;
10714 vassert(size <= 2);
10715 Bool isU = bitU == 1;
10716 Bool isACC = opcode == BITS4(0,1,0,1);
sewardja5a6b752014-06-30 07:33:56 +000010717 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
10718 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj6f312d02014-06-28 12:21:37 +000010719 IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
sewardj8e91fd42014-07-11 12:05:47 +000010720 IRTemp res = newTempV128();
10721 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
sewardj6f312d02014-06-28 12:21:37 +000010722 : mkexpr(abd));
10723 putQReg128(dd, mkexpr(res));
10724 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10725 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10726 const HChar* nm = isACC ? (isU ? "uabal" : "sabal")
10727 : (isU ? "uabdl" : "sabdl");
10728 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10729 nameQReg128(dd), arrWide,
10730 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10731 return True;
10732 }
10733
10734 if (opcode == BITS4(1,1,0,0)
10735 || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
sewardj487559e2014-07-10 14:22:45 +000010736 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
sewardj6f312d02014-06-28 12:21:37 +000010737 /* -------- 1,1100 UMULL{2} -------- */ // 0
10738 /* -------- 0,1000 SMLAL{2} -------- */ // 1
10739 /* -------- 1,1000 UMLAL{2} -------- */ // 1
10740 /* -------- 0,1010 SMLSL{2} -------- */ // 2
10741 /* -------- 1,1010 UMLSL{2} -------- */ // 2
10742 /* Widens, and size refers to the narrowed lanes. */
sewardj487559e2014-07-10 14:22:45 +000010743 UInt ks = 3;
sewardj6f312d02014-06-28 12:21:37 +000010744 switch (opcode) {
sewardj487559e2014-07-10 14:22:45 +000010745 case BITS4(1,1,0,0): ks = 0; break;
10746 case BITS4(1,0,0,0): ks = 1; break;
10747 case BITS4(1,0,1,0): ks = 2; break;
sewardj6f312d02014-06-28 12:21:37 +000010748 default: vassert(0);
10749 }
sewardj487559e2014-07-10 14:22:45 +000010750 vassert(ks >= 0 && ks <= 2);
sewardj6f312d02014-06-28 12:21:37 +000010751 if (size == X11) return False;
10752 vassert(size <= 2);
sewardj51d012a2014-07-21 09:19:50 +000010753 Bool isU = bitU == 1;
10754 IRTemp vecN = newTempV128();
10755 IRTemp vecM = newTempV128();
10756 IRTemp vecD = newTempV128();
10757 assign(vecN, getQReg128(nn));
10758 assign(vecM, getQReg128(mm));
10759 assign(vecD, getQReg128(dd));
10760 IRTemp res = IRTemp_INVALID;
10761 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
10762 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
sewardj6f312d02014-06-28 12:21:37 +000010763 putQReg128(dd, mkexpr(res));
10764 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10765 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
sewardj487559e2014-07-10 14:22:45 +000010766 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
sewardj6f312d02014-06-28 12:21:37 +000010767 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
10768 nameQReg128(dd), arrWide,
10769 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10770 return True;
10771 }
10772
sewardj54ffa1d2014-07-22 09:27:49 +000010773 if (bitU == 0
10774 && (opcode == BITS4(1,1,0,1)
10775 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
10776 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
10777 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1
10778 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2
10779 /* Widens, and size refers to the narrowed lanes. */
10780 UInt ks = 3;
10781 switch (opcode) {
10782 case BITS4(1,1,0,1): ks = 0; break;
10783 case BITS4(1,0,0,1): ks = 1; break;
10784 case BITS4(1,0,1,1): ks = 2; break;
10785 default: vassert(0);
10786 }
10787 vassert(ks >= 0 && ks <= 2);
10788 if (size == X00 || size == X11) return False;
10789 vassert(size <= 2);
10790 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
10791 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10792 newTempsV128_3(&vecN, &vecM, &vecD);
10793 assign(vecN, getQReg128(nn));
10794 assign(vecM, getQReg128(mm));
10795 assign(vecD, getQReg128(dd));
10796 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10797 is2, size, "mas"[ks],
10798 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10799 putQReg128(dd, mkexpr(res));
10800 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10801 updateQCFLAGwithDifference(sat1q, sat1n);
10802 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10803 updateQCFLAGwithDifference(sat2q, sat2n);
10804 }
10805 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10806 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10807 const HChar* nm = ks == 0 ? "sqdmull"
10808 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10809 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10810 nameQReg128(dd), arrWide,
10811 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10812 return True;
10813 }
10814
sewardj31b5a952014-06-26 07:41:14 +000010815 if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
10816 /* -------- 0,1110 PMULL{2} -------- */
sewardj6f312d02014-06-28 12:21:37 +000010817 /* Widens, and size refers to the narrowed lanes. */
sewardj31b5a952014-06-26 07:41:14 +000010818 if (size != X00) return False;
10819 IRTemp res
10820 = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
10821 getQReg128(nn), getQReg128(mm));
10822 putQReg128(dd, mkexpr(res));
10823 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10824 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10825 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
10826 nameQReg128(dd), arrNarrow,
10827 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
10828 return True;
10829 }
10830
sewardjdf1628c2014-06-10 22:52:05 +000010831 return False;
10832# undef INSN
10833}
10834
10835
10836static
10837Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
10838{
10839 /* 31 30 29 28 23 21 20 15 10 9 4
10840 0 Q U 01110 size 1 m opcode 1 n d
10841 Decode fields: u,size,opcode
10842 */
10843# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10844 if (INSN(31,31) != 0
10845 || INSN(28,24) != BITS5(0,1,1,1,0)
10846 || INSN(21,21) != 1
10847 || INSN(10,10) != 1) {
10848 return False;
10849 }
10850 UInt bitQ = INSN(30,30);
10851 UInt bitU = INSN(29,29);
10852 UInt size = INSN(23,22);
10853 UInt mm = INSN(20,16);
10854 UInt opcode = INSN(15,11);
10855 UInt nn = INSN(9,5);
10856 UInt dd = INSN(4,0);
10857 vassert(size < 4);
10858
sewardja5a6b752014-06-30 07:33:56 +000010859 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
10860 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
10861 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
10862 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
10863 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
10864 if (size == X11) return False;
10865 Bool isADD = opcode == BITS5(0,0,0,0,0);
10866 Bool isU = bitU == 1;
10867 /* Widen both args out, do the math, narrow to final result. */
sewardj8e91fd42014-07-11 12:05:47 +000010868 IRTemp argL = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000010869 IRTemp argLhi = IRTemp_INVALID;
10870 IRTemp argLlo = IRTemp_INVALID;
sewardj8e91fd42014-07-11 12:05:47 +000010871 IRTemp argR = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000010872 IRTemp argRhi = IRTemp_INVALID;
10873 IRTemp argRlo = IRTemp_INVALID;
sewardj8e91fd42014-07-11 12:05:47 +000010874 IRTemp resHi = newTempV128();
10875 IRTemp resLo = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000010876 IRTemp res = IRTemp_INVALID;
10877 assign(argL, getQReg128(nn));
10878 argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
10879 argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL));
10880 assign(argR, getQReg128(mm));
10881 argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
10882 argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR));
10883 IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
10884 IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
10885 assign(resHi, binop(opSxR,
10886 binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
10887 mkU8(1)));
10888 assign(resLo, binop(opSxR,
10889 binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
10890 mkU8(1)));
10891 res = math_NARROW_LANES ( resHi, resLo, size );
10892 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10893 const HChar* nm = isADD ? (isU ? "uhadd" : "shadd")
10894 : (isU ? "uhsub" : "shsub");
10895 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10896 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
10897 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10898 return True;
10899 }
10900
sewardj62ece662014-08-17 19:59:09 +000010901 if (opcode == BITS5(0,0,0,1,0)) {
10902 /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
10903 /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
10904 if (bitQ == 0 && size == X11) return False; // implied 1d case
10905 Bool isU = bitU == 1;
10906 IRTemp argL = newTempV128();
10907 IRTemp argR = newTempV128();
10908 assign(argL, getQReg128(nn));
10909 assign(argR, getQReg128(mm));
10910 IRTemp res = math_RHADD(size, isU, argL, argR);
10911 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10912 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10913 DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
10914 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10915 return True;
10916 }
10917
sewardja5a6b752014-06-30 07:33:56 +000010918 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
10919 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
10920 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
10921 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
10922 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
10923 if (bitQ == 0 && size == X11) return False; // implied 1d case
10924 Bool isADD = opcode == BITS5(0,0,0,0,1);
10925 Bool isU = bitU == 1;
10926 IROp qop = Iop_INVALID;
10927 IROp nop = Iop_INVALID;
10928 if (isADD) {
10929 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
10930 nop = mkVecADD(size);
10931 } else {
10932 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
10933 nop = mkVecSUB(size);
10934 }
sewardj8e91fd42014-07-11 12:05:47 +000010935 IRTemp argL = newTempV128();
10936 IRTemp argR = newTempV128();
10937 IRTemp qres = newTempV128();
10938 IRTemp nres = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000010939 assign(argL, getQReg128(nn));
10940 assign(argR, getQReg128(mm));
10941 assign(qres, math_MAYBE_ZERO_HI64_fromE(
10942 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
10943 assign(nres, math_MAYBE_ZERO_HI64_fromE(
10944 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
10945 putQReg128(dd, mkexpr(qres));
sewardj8e91fd42014-07-11 12:05:47 +000010946 updateQCFLAGwithDifference(qres, nres);
sewardja5a6b752014-06-30 07:33:56 +000010947 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
10948 : (isU ? "uqsub" : "sqsub");
10949 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10950 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
10951 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10952 return True;
10953 }
10954
sewardjdf1628c2014-06-10 22:52:05 +000010955 if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
10956 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
10957 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
10958 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
10959 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
sewardjdf9d6d52014-06-27 10:43:22 +000010960 Bool isORx = (size & 2) == 2;
sewardjdf1628c2014-06-10 22:52:05 +000010961 Bool invert = (size & 1) == 1;
sewardj8e91fd42014-07-11 12:05:47 +000010962 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +000010963 assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
sewardjdf1628c2014-06-10 22:52:05 +000010964 getQReg128(nn),
10965 invert ? unop(Iop_NotV128, getQReg128(mm))
10966 : getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000010967 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000010968 const HChar* names[4] = { "and", "bic", "orr", "orn" };
sewardjdf9d6d52014-06-27 10:43:22 +000010969 const HChar* ar = bitQ == 1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +000010970 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
10971 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
10972 return True;
10973 }
10974
10975 if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
10976 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
10977 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
10978 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
10979 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +000010980 IRTemp argD = newTempV128();
10981 IRTemp argN = newTempV128();
10982 IRTemp argM = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000010983 assign(argD, getQReg128(dd));
10984 assign(argN, getQReg128(nn));
10985 assign(argM, getQReg128(mm));
10986 const IROp opXOR = Iop_XorV128;
10987 const IROp opAND = Iop_AndV128;
10988 const IROp opNOT = Iop_NotV128;
sewardj8e91fd42014-07-11 12:05:47 +000010989 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000010990 switch (size) {
10991 case BITS2(0,0): /* EOR */
sewardjdf9d6d52014-06-27 10:43:22 +000010992 assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
sewardjdf1628c2014-06-10 22:52:05 +000010993 break;
10994 case BITS2(0,1): /* BSL */
sewardjdf9d6d52014-06-27 10:43:22 +000010995 assign(res, binop(opXOR, mkexpr(argM),
10996 binop(opAND,
10997 binop(opXOR, mkexpr(argM), mkexpr(argN)),
10998 mkexpr(argD))));
sewardjdf1628c2014-06-10 22:52:05 +000010999 break;
11000 case BITS2(1,0): /* BIT */
sewardjdf9d6d52014-06-27 10:43:22 +000011001 assign(res, binop(opXOR, mkexpr(argD),
11002 binop(opAND,
11003 binop(opXOR, mkexpr(argD), mkexpr(argN)),
11004 mkexpr(argM))));
sewardjdf1628c2014-06-10 22:52:05 +000011005 break;
11006 case BITS2(1,1): /* BIF */
sewardjdf9d6d52014-06-27 10:43:22 +000011007 assign(res, binop(opXOR, mkexpr(argD),
11008 binop(opAND,
11009 binop(opXOR, mkexpr(argD), mkexpr(argN)),
11010 unop(opNOT, mkexpr(argM)))));
sewardjdf1628c2014-06-10 22:52:05 +000011011 break;
11012 default:
11013 vassert(0);
11014 }
sewardjdf9d6d52014-06-27 10:43:22 +000011015 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011016 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
sewardjdf9d6d52014-06-27 10:43:22 +000011017 const HChar* arr = bitQ == 1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +000011018 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
11019 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11020 return True;
11021 }
11022
11023 if (opcode == BITS5(0,0,1,1,0)) {
11024 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
11025 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
11026 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000011027 Bool isGT = bitU == 0;
sewardjdf1628c2014-06-10 22:52:05 +000011028 IRExpr* argL = getQReg128(nn);
11029 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +000011030 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011031 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +000011032 isGT ? binop(mkVecCMPGTS(size), argL, argR)
11033 : binop(mkVecCMPGTU(size), argL, argR));
sewardjdf9d6d52014-06-27 10:43:22 +000011034 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011035 const HChar* nm = isGT ? "cmgt" : "cmhi";
11036 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11037 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11038 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11039 return True;
11040 }
11041
11042 if (opcode == BITS5(0,0,1,1,1)) {
11043 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
11044 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
11045 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000011046 Bool isGE = bitU == 0;
sewardjdf1628c2014-06-10 22:52:05 +000011047 IRExpr* argL = getQReg128(nn);
11048 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +000011049 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011050 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +000011051 isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
11052 : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
sewardjdf9d6d52014-06-27 10:43:22 +000011053 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011054 const HChar* nm = isGE ? "cmge" : "cmhs";
11055 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11056 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11057 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11058 return True;
11059 }
11060
sewardja6b61f02014-08-17 18:32:14 +000011061 if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) {
11062 /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
11063 /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
11064 /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
11065 /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
11066 if (bitQ == 0 && size == X11) return False; // implied 1d case
11067 Bool isU = bitU == 1;
11068 Bool isR = opcode == BITS5(0,1,0,1,0);
11069 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
11070 : (isU ? mkVecSHU(size) : mkVecSHS(size));
11071 IRTemp res = newTempV128();
11072 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11073 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11074 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
11075 : (isU ? "ushl" : "sshl");
11076 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11077 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11078 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11079 return True;
11080 }
11081
sewardj12972182014-08-04 08:09:47 +000011082 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
11083 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
11084 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
11085 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
11086 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
11087 if (bitQ == 0 && size == X11) return False; // implied 1d case
11088 Bool isU = bitU == 1;
11089 Bool isR = opcode == BITS5(0,1,0,1,1);
11090 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
11091 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
11092 /* This is a bit tricky. If we're only interested in the lowest 64 bits
11093 of the result (viz, bitQ == 0), then we must adjust the operands to
11094 ensure that the upper part of the result, that we don't care about,
11095 doesn't pollute the returned Q value. To do this, zero out the upper
11096 operand halves beforehand. This works because it means, for the
11097 lanes we don't care about, we are shifting zero by zero, which can
11098 never saturate. */
11099 IRTemp res256 = newTemp(Ity_V256);
11100 IRTemp resSH = newTempV128();
11101 IRTemp resQ = newTempV128();
11102 IRTemp zero = newTempV128();
11103 assign(res256, binop(op,
11104 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
11105 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
11106 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
11107 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
11108 assign(zero, mkV128(0x0000));
11109 putQReg128(dd, mkexpr(resSH));
11110 updateQCFLAGwithDifference(resQ, zero);
11111 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
11112 : (isU ? "uqshl" : "sqshl");
11113 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11114 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11115 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11116 return True;
11117 }
11118
sewardjdf1628c2014-06-10 22:52:05 +000011119 if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
11120 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
11121 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
11122 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
11123 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
11124 if (bitQ == 0 && size == X11) return False; // implied 1d case
11125 Bool isU = bitU == 1;
11126 Bool isMAX = (opcode & 1) == 0;
sewardj8e91fd42014-07-11 12:05:47 +000011127 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11128 : (isU ? mkVecMINU(size) : mkVecMINS(size));
11129 IRTemp t = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011130 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011131 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
sewardjdf1628c2014-06-10 22:52:05 +000011132 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
11133 : (isU ? "umin" : "smin");
11134 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11135 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11136 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11137 return True;
11138 }
11139
sewardjdf9d6d52014-06-27 10:43:22 +000011140 if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
11141 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
11142 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
11143 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
11144 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
11145 if (size == X11) return False; // 1d/2d cases not allowed
11146 Bool isU = bitU == 1;
11147 Bool isACC = opcode == BITS5(0,1,1,1,1);
sewardjdf9d6d52014-06-27 10:43:22 +000011148 vassert(size <= 2);
11149 IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +000011150 IRTemp t2 = newTempV128();
11151 assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
sewardjdf9d6d52014-06-27 10:43:22 +000011152 : mkexpr(t1));
11153 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11154 const HChar* nm = isACC ? (isU ? "uaba" : "saba")
11155 : (isU ? "uabd" : "sabd");
11156 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11157 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11158 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11159 return True;
11160 }
11161
sewardjdf1628c2014-06-10 22:52:05 +000011162 if (opcode == BITS5(1,0,0,0,0)) {
11163 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
11164 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
11165 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000011166 Bool isSUB = bitU == 1;
11167 IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size);
11168 IRTemp t = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011169 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011170 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
sewardjdf1628c2014-06-10 22:52:05 +000011171 const HChar* nm = isSUB ? "sub" : "add";
11172 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11173 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11174 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11175 return True;
11176 }
11177
11178 if (opcode == BITS5(1,0,0,0,1)) {
11179 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
11180 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
11181 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000011182 Bool isEQ = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +000011183 IRExpr* argL = getQReg128(nn);
11184 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +000011185 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011186 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +000011187 isEQ ? binop(mkVecCMPEQ(size), argL, argR)
11188 : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
sewardjdf1628c2014-06-10 22:52:05 +000011189 binop(Iop_AndV128, argL, argR),
11190 mkV128(0x0000))));
sewardjdf9d6d52014-06-27 10:43:22 +000011191 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011192 const HChar* nm = isEQ ? "cmeq" : "cmtst";
11193 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11194 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11195 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11196 return True;
11197 }
11198
11199 if (opcode == BITS5(1,0,0,1,0)) {
11200 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
11201 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
11202 if (bitQ == 0 && size == X11) return False; // implied 1d case
11203 Bool isMLS = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +000011204 IROp opMUL = mkVecMUL(size);
11205 IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
11206 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011207 if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
11208 assign(res, binop(opADDSUB,
11209 getQReg128(dd),
11210 binop(opMUL, getQReg128(nn), getQReg128(mm))));
sewardjdf9d6d52014-06-27 10:43:22 +000011211 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011212 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11213 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
11214 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11215 return True;
11216 }
11217 return False;
11218 }
11219
11220 if (opcode == BITS5(1,0,0,1,1)) {
11221 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
11222 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
11223 if (bitQ == 0 && size == X11) return False; // implied 1d case
11224 Bool isPMUL = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +000011225 const IROp opsPMUL[4]
11226 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
sewardj8e91fd42014-07-11 12:05:47 +000011227 IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
11228 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011229 if (opMUL != Iop_INVALID) {
11230 assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011231 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011232 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11233 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
11234 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11235 return True;
11236 }
11237 return False;
11238 }
11239
sewardja5a6b752014-06-30 07:33:56 +000011240 if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
11241 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
11242 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
11243 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
11244 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
11245 if (size == X11) return False;
11246 Bool isU = bitU == 1;
11247 Bool isMAX = opcode == BITS5(1,0,1,0,0);
sewardj8e91fd42014-07-11 12:05:47 +000011248 IRTemp vN = newTempV128();
11249 IRTemp vM = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000011250 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11251 : (isU ? mkVecMINU(size) : mkVecMINS(size));
11252 assign(vN, getQReg128(nn));
11253 assign(vM, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +000011254 IRTemp res128 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000011255 assign(res128,
11256 binop(op,
11257 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11258 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
11259 /* In the half-width case, use CatEL32x4 to extract the half-width
11260 result from the full-width result. */
11261 IRExpr* res
11262 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11263 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11264 mkexpr(res128)))
11265 : mkexpr(res128);
11266 putQReg128(dd, res);
11267 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11268 const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp")
11269 : (isU ? "uminp" : "sminp");
11270 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11271 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11272 return True;
11273 }
11274
sewardj54ffa1d2014-07-22 09:27:49 +000011275 if (opcode == BITS5(1,0,1,1,0)) {
11276 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
11277 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
11278 if (size == X00 || size == X11) return False;
11279 Bool isR = bitU == 1;
11280 IRTemp res, sat1q, sat1n, vN, vM;
11281 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
11282 newTempsV128_2(&vN, &vM);
11283 assign(vN, getQReg128(nn));
11284 assign(vM, getQReg128(mm));
11285 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
11286 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11287 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
11288 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
11289 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11290 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
11291 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11292 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11293 return True;
11294 }
11295
sewardja5a6b752014-06-30 07:33:56 +000011296 if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
11297 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
11298 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000011299 IRTemp vN = newTempV128();
11300 IRTemp vM = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000011301 assign(vN, getQReg128(nn));
11302 assign(vM, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +000011303 IRTemp res128 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000011304 assign(res128,
11305 binop(mkVecADD(size),
11306 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11307 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
11308 /* In the half-width case, use CatEL32x4 to extract the half-width
11309 result from the full-width result. */
11310 IRExpr* res
11311 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11312 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11313 mkexpr(res128)))
11314 : mkexpr(res128);
11315 putQReg128(dd, res);
11316 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11317 DIP("addp %s.%s, %s.%s, %s.%s\n",
11318 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11319 return True;
11320 }
11321
sewardj5cb53e72015-02-08 12:08:56 +000011322 if (bitU == 0
11323 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
11324 /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11325 /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11326 /* -------- 0,0x,11110 FMAX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11327 /* -------- 0,1x,11110 FMIN 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11328 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
11329 Bool isD = (size & 1) == 1;
11330 if (bitQ == 0 && isD) return False; // implied 1d case
11331 Bool isMIN = (size & 2) == 2;
11332 Bool isNM = opcode == BITS5(1,1,0,0,0);
11333 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10);
11334 IRTemp res = newTempV128();
11335 assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm)));
11336 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11337 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11338 DIP("%s%s %s.%s, %s.%s, %s.%s\n",
11339 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
11340 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11341 return True;
11342 }
11343
sewardjdf1628c2014-06-10 22:52:05 +000011344 if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
11345 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11346 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11347 Bool isD = (size & 1) == 1;
11348 Bool isSUB = (size & 2) == 2;
11349 if (bitQ == 0 && isD) return False; // implied 1d case
11350 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
11351 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11352 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
11353 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +000011354 IRTemp t1 = newTempV128();
11355 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011356 // FIXME: double rounding; use FMA primops instead
11357 assign(t1, triop(opMUL,
11358 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11359 assign(t2, triop(isSUB ? opSUB : opADD,
11360 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
sewardjdf9d6d52014-06-27 10:43:22 +000011361 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
sewardjdf1628c2014-06-10 22:52:05 +000011362 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11363 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
11364 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11365 return True;
11366 }
11367
11368 if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
11369 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11370 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11371 Bool isD = (size & 1) == 1;
11372 Bool isSUB = (size & 2) == 2;
11373 if (bitQ == 0 && isD) return False; // implied 1d case
11374 const IROp ops[4]
11375 = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
11376 IROp op = ops[size];
11377 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +000011378 IRTemp t1 = newTempV128();
11379 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011380 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011381 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011382 putQReg128(dd, mkexpr(t2));
11383 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11384 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
11385 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11386 return True;
11387 }
11388
11389 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
11390 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11391 Bool isD = (size & 1) == 1;
11392 if (bitQ == 0 && isD) return False; // implied 1d case
11393 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11394 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
11395 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +000011396 IRTemp t1 = newTempV128();
11397 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011398 // FIXME: use Abd primop instead?
sewardjdf9d6d52014-06-27 10:43:22 +000011399 assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf1628c2014-06-10 22:52:05 +000011400 assign(t2, unop(opABS, mkexpr(t1)));
sewardjdf9d6d52014-06-27 10:43:22 +000011401 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
sewardjdf1628c2014-06-10 22:52:05 +000011402 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11403 DIP("fabd %s.%s, %s.%s, %s.%s\n",
11404 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11405 return True;
11406 }
11407
sewardjee3db332015-02-08 18:24:38 +000011408 if (size <= X01 && opcode == BITS5(1,1,0,1,1)) {
11409 /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11410 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11411 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
11412 Bool isD = (size & 1) == 1;
11413 Bool isMULX = bitU == 0;
sewardjdf1628c2014-06-10 22:52:05 +000011414 if (bitQ == 0 && isD) return False; // implied 1d case
11415 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +000011416 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011417 assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
11418 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011419 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011420 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
sewardjee3db332015-02-08 18:24:38 +000011421 DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul",
sewardjdf1628c2014-06-10 22:52:05 +000011422 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11423 return True;
11424 }
11425
11426 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
11427 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11428 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11429 Bool isD = (size & 1) == 1;
11430 if (bitQ == 0 && isD) return False; // implied 1d case
11431 Bool isGE = bitU == 1;
11432 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
11433 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
sewardj8e91fd42014-07-11 12:05:47 +000011434 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011435 assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
11436 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011437 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011438 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11439 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
11440 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11441 return True;
11442 }
11443
11444 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
11445 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11446 Bool isD = (size & 1) == 1;
11447 if (bitQ == 0 && isD) return False; // implied 1d case
11448 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
sewardj8e91fd42014-07-11 12:05:47 +000011449 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011450 assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
sewardjdf9d6d52014-06-27 10:43:22 +000011451 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011452 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11453 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
11454 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11455 return True;
11456 }
11457
11458 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
11459 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11460 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11461 Bool isD = (size & 1) == 1;
11462 Bool isGT = (size & 2) == 2;
11463 if (bitQ == 0 && isD) return False; // implied 1d case
11464 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
11465 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
11466 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
sewardj8e91fd42014-07-11 12:05:47 +000011467 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011468 assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
11469 unop(opABS, getQReg128(nn)))); // swapd
sewardjdf9d6d52014-06-27 10:43:22 +000011470 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011471 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11472 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
11473 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11474 return True;
11475 }
11476
sewardj5cb53e72015-02-08 12:08:56 +000011477 if (bitU == 1
11478 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
11479 /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11480 /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11481 /* -------- 1,0x,11110 FMAXP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11482 /* -------- 1,1x,11110 FMINP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11483 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
11484 Bool isD = (size & 1) == 1;
11485 if (bitQ == 0 && isD) return False; // implied 1d case
11486 Bool isMIN = (size & 2) == 2;
11487 Bool isNM = opcode == BITS5(1,1,0,0,0);
11488 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
11489 IRTemp srcN = newTempV128();
11490 IRTemp srcM = newTempV128();
11491 IRTemp preL = IRTemp_INVALID;
11492 IRTemp preR = IRTemp_INVALID;
11493 assign(srcN, getQReg128(nn));
11494 assign(srcM, getQReg128(mm));
11495 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
11496 srcM, srcN, isD, bitQ);
11497 putQReg128(
11498 dd, math_MAYBE_ZERO_HI64_fromE(
11499 bitQ,
11500 binop(opMXX, mkexpr(preL), mkexpr(preR))));
11501 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11502 DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
11503 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
11504 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11505 return True;
11506 }
11507
sewardj76927e62014-11-17 11:21:21 +000011508 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
11509 /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11510 Bool isD = size == X01;
11511 if (bitQ == 0 && isD) return False; // implied 1d case
11512 IRTemp srcN = newTempV128();
11513 IRTemp srcM = newTempV128();
11514 IRTemp preL = IRTemp_INVALID;
11515 IRTemp preR = IRTemp_INVALID;
11516 assign(srcN, getQReg128(nn));
11517 assign(srcM, getQReg128(mm));
11518 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
11519 srcM, srcN, isD, bitQ);
11520 putQReg128(
11521 dd, math_MAYBE_ZERO_HI64_fromE(
11522 bitQ,
11523 triop(mkVecADDF(isD ? 3 : 2),
11524 mkexpr(mk_get_IR_rounding_mode()),
11525 mkexpr(preL), mkexpr(preR))));
11526 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11527 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
11528 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11529 return True;
11530 }
11531
sewardjdf1628c2014-06-10 22:52:05 +000011532 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
11533 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11534 Bool isD = (size & 1) == 1;
11535 if (bitQ == 0 && isD) return False; // implied 1d case
11536 vassert(size <= 1);
11537 const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
11538 IROp op = ops[size];
11539 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +000011540 IRTemp t1 = newTempV128();
11541 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011542 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011543 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011544 putQReg128(dd, mkexpr(t2));
11545 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11546 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
11547 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11548 return True;
11549 }
11550
sewardj89cefe42015-02-24 12:21:01 +000011551 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
11552 /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11553 /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11554 Bool isSQRT = (size & 2) == 2;
11555 Bool isD = (size & 1) == 1;
11556 if (bitQ == 0 && isD) return False; // implied 1d case
11557 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
11558 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
11559 IRTemp res = newTempV128();
11560 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11561 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11562 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11563 DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps",
11564 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11565 return True;
11566 }
11567
sewardjdf1628c2014-06-10 22:52:05 +000011568 return False;
11569# undef INSN
11570}
11571
11572
11573static
11574Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
11575{
11576 /* 31 30 29 28 23 21 16 11 9 4
11577 0 Q U 01110 size 10000 opcode 10 n d
11578 Decode fields: U,size,opcode
11579 */
11580# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11581 if (INSN(31,31) != 0
11582 || INSN(28,24) != BITS5(0,1,1,1,0)
11583 || INSN(21,17) != BITS5(1,0,0,0,0)
11584 || INSN(11,10) != BITS2(1,0)) {
11585 return False;
11586 }
11587 UInt bitQ = INSN(30,30);
11588 UInt bitU = INSN(29,29);
11589 UInt size = INSN(23,22);
11590 UInt opcode = INSN(16,12);
11591 UInt nn = INSN(9,5);
11592 UInt dd = INSN(4,0);
11593 vassert(size < 4);
11594
sewardjdf9d6d52014-06-27 10:43:22 +000011595 if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
11596 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
11597 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
11598 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
11599 const IROp iops[3] = { Iop_Reverse8sIn64_x2,
11600 Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
11601 vassert(size <= 2);
sewardj8e91fd42014-07-11 12:05:47 +000011602 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +000011603 assign(res, unop(iops[size], getQReg128(nn)));
11604 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11605 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11606 DIP("%s %s.%s, %s.%s\n", "rev64",
11607 nameQReg128(dd), arr, nameQReg128(nn), arr);
11608 return True;
11609 }
11610
11611 if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
11612 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
11613 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
11614 Bool isH = size == X01;
sewardj8e91fd42014-07-11 12:05:47 +000011615 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +000011616 IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
11617 assign(res, unop(iop, getQReg128(nn)));
11618 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11619 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11620 DIP("%s %s.%s, %s.%s\n", "rev32",
11621 nameQReg128(dd), arr, nameQReg128(nn), arr);
11622 return True;
11623 }
11624
sewardj715d1622014-06-26 12:39:05 +000011625 if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
11626 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +000011627 IRTemp res = newTempV128();
sewardj715d1622014-06-26 12:39:05 +000011628 assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
11629 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf9d6d52014-06-27 10:43:22 +000011630 const HChar* arr = nameArr_Q_SZ(bitQ, size);
sewardj715d1622014-06-26 12:39:05 +000011631 DIP("%s %s.%s, %s.%s\n", "rev16",
11632 nameQReg128(dd), arr, nameQReg128(nn), arr);
11633 return True;
11634 }
11635
sewardja5a6b752014-06-30 07:33:56 +000011636 if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
11637 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
11638 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
11639 /* -------- 0,xx,00110: SADALP std6_std6 -------- */
11640 /* -------- 1,xx,00110: UADALP std6_std6 -------- */
11641 /* Widens, and size refers to the narrow size. */
11642 if (size == X11) return False; // no 1d or 2d cases
11643 Bool isU = bitU == 1;
11644 Bool isACC = opcode == BITS5(0,0,1,1,0);
sewardj8e91fd42014-07-11 12:05:47 +000011645 IRTemp src = newTempV128();
11646 IRTemp sum = newTempV128();
11647 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000011648 assign(src, getQReg128(nn));
11649 assign(sum,
11650 binop(mkVecADD(size+1),
11651 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
11652 isU, True/*fromOdd*/, size, mkexpr(src))),
11653 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
11654 isU, False/*!fromOdd*/, size, mkexpr(src)))));
11655 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
11656 : mkexpr(sum));
11657 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11658 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11659 const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1);
11660 DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
11661 : (isU ? "uaddlp" : "saddlp"),
11662 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
11663 return True;
11664 }
11665
sewardjf7003bc2014-08-18 12:28:02 +000011666 if (opcode == BITS5(0,0,0,1,1)) {
11667 /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
11668 /* -------- 1,xx,00011: USQADD std7_std7 -------- */
11669 if (bitQ == 0 && size == X11) return False; // implied 1d case
11670 Bool isUSQADD = bitU == 1;
11671 /* This is switched (in the US vs SU sense) deliberately.
11672 SUQADD corresponds to the ExtUSsatSS variants and
11673 USQADD corresponds to the ExtSUsatUU variants.
11674 See libvex_ir for more details. */
11675 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
11676 : mkVecQADDEXTUSSATSS(size);
11677 IROp nop = mkVecADD(size);
11678 IRTemp argL = newTempV128();
11679 IRTemp argR = newTempV128();
11680 IRTemp qres = newTempV128();
11681 IRTemp nres = newTempV128();
11682 /* Because the two arguments to the addition are implicitly
11683 extended differently (one signedly, the other unsignedly) it is
11684 important to present them to the primop in the correct order. */
11685 assign(argL, getQReg128(nn));
11686 assign(argR, getQReg128(dd));
11687 assign(qres, math_MAYBE_ZERO_HI64_fromE(
11688 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
11689 assign(nres, math_MAYBE_ZERO_HI64_fromE(
11690 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
11691 putQReg128(dd, mkexpr(qres));
11692 updateQCFLAGwithDifference(qres, nres);
11693 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11694 DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
11695 nameQReg128(dd), arr, nameQReg128(nn), arr);
11696 return True;
11697 }
11698
sewardj2b6fd5e2014-06-19 14:21:37 +000011699 if (opcode == BITS5(0,0,1,0,0)) {
11700 /* -------- 0,xx,00100: CLS std6_std6 -------- */
11701 /* -------- 1,xx,00100: CLZ std6_std6 -------- */
11702 if (size == X11) return False; // no 1d or 2d cases
sewardja8c7b0f2014-06-26 08:18:08 +000011703 const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
11704 const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
sewardj2b6fd5e2014-06-19 14:21:37 +000011705 Bool isCLZ = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +000011706 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +000011707 vassert(size <= 2);
11708 assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000011709 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj2b6fd5e2014-06-19 14:21:37 +000011710 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11711 DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
11712 nameQReg128(dd), arr, nameQReg128(nn), arr);
11713 return True;
11714 }
11715
sewardj787a67f2014-06-23 09:09:41 +000011716 if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
sewardj2b6fd5e2014-06-19 14:21:37 +000011717 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
sewardj787a67f2014-06-23 09:09:41 +000011718 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +000011719 IRTemp res = newTempV128();
sewardj787a67f2014-06-23 09:09:41 +000011720 assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000011721 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj715d1622014-06-26 12:39:05 +000011722 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
sewardj787a67f2014-06-23 09:09:41 +000011723 DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
sewardj2b6fd5e2014-06-19 14:21:37 +000011724 nameQReg128(dd), arr, nameQReg128(nn), arr);
11725 return True;
11726 }
11727
sewardj715d1622014-06-26 12:39:05 +000011728 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
11729 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +000011730 IRTemp res = newTempV128();
sewardj715d1622014-06-26 12:39:05 +000011731 assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000011732 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj715d1622014-06-26 12:39:05 +000011733 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
11734 DIP("%s %s.%s, %s.%s\n", "rbit",
11735 nameQReg128(dd), arr, nameQReg128(nn), arr);
11736 return True;
11737 }
11738
sewardj51d012a2014-07-21 09:19:50 +000011739 if (opcode == BITS5(0,0,1,1,1)) {
sewardj8e91fd42014-07-11 12:05:47 +000011740 /* -------- 0,xx,00111 SQABS std7_std7 -------- */
sewardj51d012a2014-07-21 09:19:50 +000011741 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
sewardj8e91fd42014-07-11 12:05:47 +000011742 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj51d012a2014-07-21 09:19:50 +000011743 Bool isNEG = bitU == 1;
11744 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
11745 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
11746 getQReg128(nn), size );
sewardj8e91fd42014-07-11 12:05:47 +000011747 IRTemp qres = newTempV128(), nres = newTempV128();
sewardj51d012a2014-07-21 09:19:50 +000011748 assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
11749 assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
sewardj8e91fd42014-07-11 12:05:47 +000011750 putQReg128(dd, mkexpr(qres));
11751 updateQCFLAGwithDifference(qres, nres);
11752 const HChar* arr = nameArr_Q_SZ(bitQ, size);
sewardj51d012a2014-07-21 09:19:50 +000011753 DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
sewardj8e91fd42014-07-11 12:05:47 +000011754 nameQReg128(dd), arr, nameQReg128(nn), arr);
11755 return True;
11756 }
11757
sewardjdf1628c2014-06-10 22:52:05 +000011758 if (opcode == BITS5(0,1,0,0,0)) {
11759 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
11760 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
11761 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000011762 Bool isGT = bitU == 0;
11763 IRExpr* argL = getQReg128(nn);
11764 IRExpr* argR = mkV128(0x0000);
11765 IRTemp res = newTempV128();
11766 IROp opGTS = mkVecCMPGTS(size);
11767 assign(res, isGT ? binop(opGTS, argL, argR)
11768 : unop(Iop_NotV128, binop(opGTS, argR, argL)));
sewardjdf9d6d52014-06-27 10:43:22 +000011769 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011770 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11771 DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
11772 nameQReg128(dd), arr, nameQReg128(nn), arr);
11773 return True;
11774 }
11775
11776 if (opcode == BITS5(0,1,0,0,1)) {
11777 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
11778 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
11779 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardjdf1628c2014-06-10 22:52:05 +000011780 Bool isEQ = bitU == 0;
11781 IRExpr* argL = getQReg128(nn);
11782 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +000011783 IRTemp res = newTempV128();
11784 assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
sewardjdf1628c2014-06-10 22:52:05 +000011785 : unop(Iop_NotV128,
sewardj8e91fd42014-07-11 12:05:47 +000011786 binop(mkVecCMPGTS(size), argL, argR)));
sewardjdf9d6d52014-06-27 10:43:22 +000011787 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011788 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11789 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
11790 nameQReg128(dd), arr, nameQReg128(nn), arr);
11791 return True;
11792 }
11793
11794 if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
11795 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
11796 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardjdf1628c2014-06-10 22:52:05 +000011797 IRExpr* argL = getQReg128(nn);
11798 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +000011799 IRTemp res = newTempV128();
11800 assign(res, binop(mkVecCMPGTS(size), argR, argL));
sewardjdf9d6d52014-06-27 10:43:22 +000011801 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011802 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11803 DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
11804 nameQReg128(dd), arr, nameQReg128(nn), arr);
11805 return True;
11806 }
11807
sewardj25523c42014-06-15 19:36:29 +000011808 if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
11809 /* -------- 0,xx,01011: ABS std7_std7 -------- */
11810 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000011811 IRTemp res = newTempV128();
11812 assign(res, unop(mkVecABS(size), getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000011813 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj25523c42014-06-15 19:36:29 +000011814 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11815 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
11816 return True;
11817 }
11818
sewardjdf1628c2014-06-10 22:52:05 +000011819 if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
11820 /* -------- 1,xx,01011: NEG std7_std7 -------- */
11821 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000011822 IRTemp res = newTempV128();
11823 assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000011824 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011825 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11826 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
11827 return True;
11828 }
11829
sewardj13830dc2015-02-07 21:09:47 +000011830 UInt ix = 0; /*INVALID*/
11831 if (size >= X10) {
11832 switch (opcode) {
11833 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
11834 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
11835 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
11836 default: break;
11837 }
11838 }
11839 if (ix > 0) {
11840 /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
11841 /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
11842 /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
11843 /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
11844 /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
11845 if (bitQ == 0 && size == X11) return False; // implied 1d case
11846 Bool isD = size == X11;
11847 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
11848 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
11849 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
11850 IROp opCmp = Iop_INVALID;
11851 Bool swap = False;
11852 const HChar* nm = "??";
11853 switch (ix) {
11854 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
11855 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
11856 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
11857 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
11858 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
11859 default: vassert(0);
11860 }
11861 IRExpr* zero = mkV128(0x0000);
11862 IRTemp res = newTempV128();
11863 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
11864 : binop(opCmp, getQReg128(nn), zero));
11865 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11866 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
11867 DIP("%s %s.%s, %s.%s, #0.0\n", nm,
11868 nameQReg128(dd), arr, nameQReg128(nn), arr);
11869 return True;
11870 }
11871
sewardjdf1628c2014-06-10 22:52:05 +000011872 if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
11873 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
11874 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
11875 if (bitQ == 0 && size == X11) return False; // implied 1d case
11876 Bool isFNEG = bitU == 1;
11877 IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
11878 : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
sewardj8e91fd42014-07-11 12:05:47 +000011879 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011880 assign(res, unop(op, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000011881 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011882 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
11883 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
11884 nameQReg128(dd), arr, nameQReg128(nn), arr);
11885 return True;
11886 }
11887
11888 if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
11889 /* -------- 0,xx,10010: XTN{,2} -------- */
sewardjecedd982014-08-11 14:02:47 +000011890 if (size == X11) return False;
11891 vassert(size < 3);
11892 Bool is2 = bitQ == 1;
11893 IROp opN = mkVecNARROWUN(size);
11894 IRTemp resN = newTempV128();
11895 assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
11896 putLO64andZUorPutHI64(is2, dd, resN);
11897 const HChar* nm = "xtn";
11898 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11899 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11900 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
11901 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
11902 return True;
11903 }
11904
11905 if (opcode == BITS5(1,0,1,0,0)
11906 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
11907 /* -------- 0,xx,10100: SQXTN{,2} -------- */
11908 /* -------- 1,xx,10100: UQXTN{,2} -------- */
11909 /* -------- 1,xx,10010: SQXTUN{,2} -------- */
11910 if (size == X11) return False;
11911 vassert(size < 3);
11912 Bool is2 = bitQ == 1;
11913 IROp opN = Iop_INVALID;
11914 Bool zWiden = True;
11915 const HChar* nm = "??";
11916 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
11917 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
sewardjdf1628c2014-06-10 22:52:05 +000011918 }
sewardjecedd982014-08-11 14:02:47 +000011919 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
11920 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
sewardjdf1628c2014-06-10 22:52:05 +000011921 }
sewardjecedd982014-08-11 14:02:47 +000011922 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
11923 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
11924 }
11925 else vassert(0);
11926 IRTemp src = newTempV128();
11927 assign(src, getQReg128(nn));
11928 IRTemp resN = newTempV128();
11929 assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
11930 putLO64andZUorPutHI64(is2, dd, resN);
11931 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
11932 size, mkexpr(resN));
11933 updateQCFLAGwithDifference(src, resW);
11934 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11935 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11936 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
11937 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
11938 return True;
sewardjdf1628c2014-06-10 22:52:05 +000011939 }
11940
sewardj487559e2014-07-10 14:22:45 +000011941 if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
11942 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
11943 /* Widens, and size is the narrow size. */
11944 if (size == X11) return False;
11945 Bool is2 = bitQ == 1;
11946 IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
11947 IROp opSHL = mkVecSHLN(size+1);
sewardj8e91fd42014-07-11 12:05:47 +000011948 IRTemp src = newTempV128();
11949 IRTemp res = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000011950 assign(src, getQReg128(nn));
11951 assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
11952 mkU8(8 << size)));
11953 putQReg128(dd, mkexpr(res));
11954 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11955 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11956 DIP("shll%s %s.%s, %s.%s, #%u\n", is2 ? "2" : "",
11957 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
11958 return True;
11959 }
11960
sewardj400d6b92015-03-30 09:01:51 +000011961 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
11962 /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
11963 UInt nLanes = size == X00 ? 4 : 2;
11964 IRType srcTy = size == X00 ? Ity_F32 : Ity_F64;
11965 IROp opCvt = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
11966 IRTemp rm = mk_get_IR_rounding_mode();
11967 IRTemp src[nLanes];
11968 for (UInt i = 0; i < nLanes; i++) {
11969 src[i] = newTemp(srcTy);
11970 assign(src[i], getQRegLane(nn, i, srcTy));
11971 }
11972 for (UInt i = 0; i < nLanes; i++) {
11973 putQRegLane(dd, nLanes * bitQ + i,
11974 binop(opCvt, mkexpr(rm), mkexpr(src[i])));
11975 }
sewardjdf1628c2014-06-10 22:52:05 +000011976 if (bitQ == 0) {
11977 putQRegLane(dd, 1, mkU64(0));
11978 }
sewardj400d6b92015-03-30 09:01:51 +000011979 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
11980 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
11981 DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
11982 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
11983 return True;
11984 }
11985
11986 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
11987 /* -------- 0,0x,10110: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
11988 UInt nLanes = size == X00 ? 4 : 2;
11989 IRType srcTy = size == X00 ? Ity_F16 : Ity_F32;
11990 IROp opCvt = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
11991 IRTemp src[nLanes];
11992 for (UInt i = 0; i < nLanes; i++) {
11993 src[i] = newTemp(srcTy);
11994 assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
11995 }
11996 for (UInt i = 0; i < nLanes; i++) {
11997 putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
11998 }
11999 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12000 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12001 DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12002 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
sewardjdf1628c2014-06-10 22:52:05 +000012003 return True;
12004 }
12005
sewardj6a785df2015-02-09 09:07:47 +000012006 ix = 0;
12007 if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) {
12008 ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0));
12009 // = 1 + bitU[0]:size[1]:opcode[0]
12010 vassert(ix >= 1 && ix <= 8);
12011 if (ix == 7) ix = 0;
12012 }
12013 if (ix > 0) {
12014 /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
12015 /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
12016 /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
12017 /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
12018 /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
12019 /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
12020 /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */
12021 /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
12022 /* rm plan:
12023 FRINTN: tieeven -- !! FIXME KLUDGED !!
12024 FRINTM: -inf
12025 FRINTP: +inf
12026 FRINTZ: zero
12027 FRINTA: tieaway -- !! FIXME KLUDGED !!
12028 FRINTX: per FPCR + "exact = TRUE"
12029 FRINTI: per FPCR
12030 */
12031 Bool isD = (size & 1) == 1;
12032 if (bitQ == 0 && isD) return False; // implied 1d case
12033
12034 IRTemp irrmRM = mk_get_IR_rounding_mode();
12035
12036 UChar ch = '?';
12037 IRTemp irrm = newTemp(Ity_I32);
12038 switch (ix) {
12039 case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12040 case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
12041 case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
12042 case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break;
12043 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
12044 case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12045 // I am unsure about the following, due to the "integral exact"
12046 // description in the manual. What does it mean? (frintx, that is)
12047 case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
12048 case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break;
12049 default: vassert(0);
12050 }
12051
sewardj6a785df2015-02-09 09:07:47 +000012052 IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
12053 if (isD) {
12054 for (UInt i = 0; i < 2; i++) {
12055 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12056 getQRegLane(nn, i, Ity_F64)));
12057 }
12058 } else {
12059 UInt n = bitQ==1 ? 4 : 2;
12060 for (UInt i = 0; i < n; i++) {
12061 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12062 getQRegLane(nn, i, Ity_F32)));
12063 }
12064 if (bitQ == 0)
12065 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12066 }
12067 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12068 DIP("frint%c %s.%s, %s.%s\n", ch,
12069 nameQReg128(dd), arr, nameQReg128(nn), arr);
12070 return True;
12071 }
12072
sewardjbc0b7222015-03-30 18:49:38 +000012073 ix = 0; /*INVALID*/
12074 switch (opcode) {
12075 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
12076 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
12077 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
12078 default: break;
12079 }
12080 if (ix > 0) {
12081 /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12082 /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12083 /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12084 /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12085 /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12086 /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12087 /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12088 /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12089 /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12090 /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12091 Bool isD = (size & 1) == 1;
12092 if (bitQ == 0 && isD) return False; // implied 1d case
12093
12094 IRRoundingMode irrm = 8; /*impossible*/
12095 HChar ch = '?';
12096 switch (ix) {
12097 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
12098 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
12099 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
12100 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
12101 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
12102 default: vassert(0);
12103 }
12104 IROp cvt = Iop_INVALID;
12105 if (bitU == 1) {
12106 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
12107 } else {
12108 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
12109 }
12110 if (isD) {
12111 for (UInt i = 0; i < 2; i++) {
12112 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12113 getQRegLane(nn, i, Ity_F64)));
12114 }
12115 } else {
12116 UInt n = bitQ==1 ? 4 : 2;
12117 for (UInt i = 0; i < n; i++) {
12118 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12119 getQRegLane(nn, i, Ity_F32)));
12120 }
12121 if (bitQ == 0)
12122 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12123 }
12124 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12125 DIP("fcvt%c%c %s.%s, %s.%s\n", ch, bitU == 1 ? 'u' : 's',
12126 nameQReg128(dd), arr, nameQReg128(nn), arr);
12127 return True;
12128 }
12129
sewardjfc261d92014-08-24 20:36:14 +000012130 if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
12131 /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
12132 /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
12133 Bool isREC = bitU == 0;
12134 IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
12135 IRTemp res = newTempV128();
12136 assign(res, unop(op, getQReg128(nn)));
12137 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12138 const HChar* nm = isREC ? "urecpe" : "ursqrte";
12139 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12140 DIP("%s %s.%s, %s.%s\n", nm,
12141 nameQReg128(dd), arr, nameQReg128(nn), arr);
12142 return True;
12143 }
12144
sewardj5747c4a2014-06-11 20:57:23 +000012145 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
12146 /* -------- 0,0x,11101: SCVTF -------- */
12147 /* -------- 1,0x,11101: UCVTF -------- */
12148 /* 31 28 22 21 15 9 4
12149 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
12150 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
12151 with laneage:
12152 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
12153 */
12154 Bool isQ = bitQ == 1;
12155 Bool isU = bitU == 1;
12156 Bool isF64 = (size & 1) == 1;
12157 if (isQ || !isF64) {
12158 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
12159 UInt nLanes = 0;
12160 Bool zeroHI = False;
12161 const HChar* arrSpec = NULL;
12162 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
12163 isQ, isF64 );
12164 IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
12165 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
12166 IRTemp rm = mk_get_IR_rounding_mode();
12167 UInt i;
12168 vassert(ok); /* the 'if' above should ensure this */
12169 for (i = 0; i < nLanes; i++) {
12170 putQRegLane(dd, i,
12171 binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
12172 }
12173 if (zeroHI) {
12174 putQRegLane(dd, 1, mkU64(0));
12175 }
12176 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
12177 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
12178 return True;
12179 }
12180 /* else fall through */
12181 }
12182
sewardj89cefe42015-02-24 12:21:01 +000012183 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
12184 /* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */
12185 /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
12186 Bool isSQRT = bitU == 1;
12187 Bool isD = (size & 1) == 1;
12188 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
12189 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
12190 if (bitQ == 0 && isD) return False; // implied 1d case
12191 IRTemp resV = newTempV128();
12192 assign(resV, unop(op, getQReg128(nn)));
12193 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
12194 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12195 DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe",
12196 nameQReg128(dd), arr, nameQReg128(nn), arr);
12197 return True;
12198 }
12199
sewardjdf1628c2014-06-10 22:52:05 +000012200 return False;
12201# undef INSN
12202}
12203
sewardjfc83d2c2014-06-12 10:15:46 +000012204
sewardjdf1628c2014-06-10 22:52:05 +000012205static
12206Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
12207{
sewardj85fbb022014-06-12 13:16:01 +000012208 /* 31 28 23 21 20 19 15 11 9 4
12209 0 Q U 01111 size L M m opcode H 0 n d
12210 Decode fields are: u,size,opcode
sewardj787a67f2014-06-23 09:09:41 +000012211 M is really part of the mm register number. Individual
12212 cases need to inspect L and H though.
sewardj85fbb022014-06-12 13:16:01 +000012213 */
sewardjdf1628c2014-06-10 22:52:05 +000012214# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj85fbb022014-06-12 13:16:01 +000012215 if (INSN(31,31) != 0
sewardj8e91fd42014-07-11 12:05:47 +000012216 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
sewardj85fbb022014-06-12 13:16:01 +000012217 return False;
12218 }
12219 UInt bitQ = INSN(30,30);
12220 UInt bitU = INSN(29,29);
12221 UInt size = INSN(23,22);
12222 UInt bitL = INSN(21,21);
12223 UInt bitM = INSN(20,20);
12224 UInt mmLO4 = INSN(19,16);
12225 UInt opcode = INSN(15,12);
12226 UInt bitH = INSN(11,11);
12227 UInt nn = INSN(9,5);
12228 UInt dd = INSN(4,0);
sewardj85fbb022014-06-12 13:16:01 +000012229 vassert(size < 4);
sewardj787a67f2014-06-23 09:09:41 +000012230 vassert(bitH < 2 && bitM < 2 && bitL < 2);
sewardj85fbb022014-06-12 13:16:01 +000012231
sewardjd0e5e532014-10-30 16:36:53 +000012232 if (bitU == 0 && size >= X10
12233 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
12234 /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12235 /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12236 if (bitQ == 0 && size == X11) return False; // implied 1d case
12237 Bool isD = (size & 1) == 1;
12238 Bool isSUB = opcode == BITS4(0,1,0,1);
12239 UInt index;
12240 if (!isD) index = (bitH << 1) | bitL;
12241 else if (isD && bitL == 0) index = bitH;
12242 else return False; // sz:L == x11 => unallocated encoding
12243 vassert(index < (isD ? 2 : 4));
12244 IRType ity = isD ? Ity_F64 : Ity_F32;
12245 IRTemp elem = newTemp(ity);
12246 UInt mm = (bitM << 4) | mmLO4;
12247 assign(elem, getQRegLane(mm, index, ity));
12248 IRTemp dupd = math_DUP_TO_V128(elem, ity);
12249 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
12250 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
12251 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
12252 IRTemp rm = mk_get_IR_rounding_mode();
12253 IRTemp t1 = newTempV128();
12254 IRTemp t2 = newTempV128();
12255 // FIXME: double rounding; use FMA primops instead
12256 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
12257 assign(t2, triop(isSUB ? opSUB : opADD,
12258 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
12259 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12260 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12261 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
12262 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm),
12263 isD ? 'd' : 's', index);
12264 return True;
12265 }
12266
sewardjee3db332015-02-08 18:24:38 +000012267 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
12268 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12269 /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
sewardj85fbb022014-06-12 13:16:01 +000012270 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardjee3db332015-02-08 18:24:38 +000012271 Bool isD = (size & 1) == 1;
12272 Bool isMULX = bitU == 1;
sewardj85fbb022014-06-12 13:16:01 +000012273 UInt index;
12274 if (!isD) index = (bitH << 1) | bitL;
12275 else if (isD && bitL == 0) index = bitH;
12276 else return False; // sz:L == x11 => unallocated encoding
12277 vassert(index < (isD ? 2 : 4));
12278 IRType ity = isD ? Ity_F64 : Ity_F32;
12279 IRTemp elem = newTemp(ity);
sewardj787a67f2014-06-23 09:09:41 +000012280 UInt mm = (bitM << 4) | mmLO4;
sewardj85fbb022014-06-12 13:16:01 +000012281 assign(elem, getQRegLane(mm, index, ity));
12282 IRTemp dupd = math_DUP_TO_V128(elem, ity);
sewardjee3db332015-02-08 18:24:38 +000012283 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
sewardj8e91fd42014-07-11 12:05:47 +000012284 IRTemp res = newTempV128();
sewardj85fbb022014-06-12 13:16:01 +000012285 assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
12286 mkexpr(mk_get_IR_rounding_mode()),
12287 getQReg128(nn), mkexpr(dupd)));
sewardjdf9d6d52014-06-27 10:43:22 +000012288 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj85fbb022014-06-12 13:16:01 +000012289 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
sewardjee3db332015-02-08 18:24:38 +000012290 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
12291 isMULX ? "fmulx" : "fmul", nameQReg128(dd), arr,
sewardj85fbb022014-06-12 13:16:01 +000012292 nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
12293 return True;
12294 }
12295
sewardj787a67f2014-06-23 09:09:41 +000012296 if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
12297 || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
12298 /* -------- 1,xx,0000 MLA s/h variants only -------- */
12299 /* -------- 1,xx,0100 MLS s/h variants only -------- */
12300 /* -------- 0,xx,1000 MUL s/h variants only -------- */
12301 Bool isMLA = opcode == BITS4(0,0,0,0);
12302 Bool isMLS = opcode == BITS4(0,1,0,0);
12303 UInt mm = 32; // invalid
12304 UInt ix = 16; // invalid
12305 switch (size) {
12306 case X00:
12307 return False; // b case is not allowed
12308 case X01:
12309 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12310 case X10:
12311 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12312 case X11:
12313 return False; // d case is not allowed
12314 default:
12315 vassert(0);
12316 }
12317 vassert(mm < 32 && ix < 16);
sewardj487559e2014-07-10 14:22:45 +000012318 IROp opMUL = mkVecMUL(size);
12319 IROp opADD = mkVecADD(size);
12320 IROp opSUB = mkVecSUB(size);
sewardj787a67f2014-06-23 09:09:41 +000012321 HChar ch = size == X01 ? 'h' : 's';
sewardj487559e2014-07-10 14:22:45 +000012322 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
sewardj8e91fd42014-07-11 12:05:47 +000012323 IRTemp vecD = newTempV128();
12324 IRTemp vecN = newTempV128();
12325 IRTemp res = newTempV128();
sewardj787a67f2014-06-23 09:09:41 +000012326 assign(vecD, getQReg128(dd));
12327 assign(vecN, getQReg128(nn));
12328 IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
12329 if (isMLA || isMLS) {
12330 assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
12331 } else {
12332 assign(res, prod);
12333 }
sewardjdf9d6d52014-06-27 10:43:22 +000012334 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj787a67f2014-06-23 09:09:41 +000012335 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12336 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
12337 : (isMLS ? "mls" : "mul"),
12338 nameQReg128(dd), arr,
12339 nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
12340 return True;
12341 }
12342
sewardj487559e2014-07-10 14:22:45 +000012343 if (opcode == BITS4(1,0,1,0)
12344 || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
12345 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
12346 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
12347 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
12348 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
12349 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
12350 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
12351 /* Widens, and size refers to the narrowed lanes. */
12352 UInt ks = 3;
12353 switch (opcode) {
12354 case BITS4(1,0,1,0): ks = 0; break;
12355 case BITS4(0,0,1,0): ks = 1; break;
12356 case BITS4(0,1,1,0): ks = 2; break;
12357 default: vassert(0);
12358 }
12359 vassert(ks >= 0 && ks <= 2);
12360 Bool isU = bitU == 1;
12361 Bool is2 = bitQ == 1;
12362 UInt mm = 32; // invalid
12363 UInt ix = 16; // invalid
12364 switch (size) {
12365 case X00:
12366 return False; // h_b_b[] case is not allowed
12367 case X01:
12368 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12369 case X10:
12370 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12371 case X11:
12372 return False; // q_d_d[] case is not allowed
12373 default:
12374 vassert(0);
12375 }
12376 vassert(mm < 32 && ix < 16);
sewardj51d012a2014-07-21 09:19:50 +000012377 IRTemp vecN = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000012378 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
sewardj8e91fd42014-07-11 12:05:47 +000012379 IRTemp vecD = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000012380 assign(vecN, getQReg128(nn));
sewardj51d012a2014-07-21 09:19:50 +000012381 assign(vecD, getQReg128(dd));
12382 IRTemp res = IRTemp_INVALID;
12383 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
12384 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
sewardj487559e2014-07-10 14:22:45 +000012385 putQReg128(dd, mkexpr(res));
12386 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
12387 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12388 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12389 HChar ch = size == X01 ? 'h' : 's';
12390 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
12391 isU ? 'u' : 's', nm, is2 ? "2" : "",
12392 nameQReg128(dd), arrWide,
12393 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
12394 return True;
12395 }
12396
sewardj51d012a2014-07-21 09:19:50 +000012397 if (bitU == 0
12398 && (opcode == BITS4(1,0,1,1)
12399 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
12400 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
12401 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
12402 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
12403 /* Widens, and size refers to the narrowed lanes. */
12404 UInt ks = 3;
12405 switch (opcode) {
12406 case BITS4(1,0,1,1): ks = 0; break;
12407 case BITS4(0,0,1,1): ks = 1; break;
12408 case BITS4(0,1,1,1): ks = 2; break;
12409 default: vassert(0);
12410 }
12411 vassert(ks >= 0 && ks <= 2);
12412 Bool is2 = bitQ == 1;
12413 UInt mm = 32; // invalid
12414 UInt ix = 16; // invalid
12415 switch (size) {
12416 case X00:
12417 return False; // h_b_b[] case is not allowed
12418 case X01:
12419 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12420 case X10:
12421 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12422 case X11:
12423 return False; // q_d_d[] case is not allowed
12424 default:
12425 vassert(0);
12426 }
12427 vassert(mm < 32 && ix < 16);
12428 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
12429 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
12430 newTempsV128_2(&vecN, &vecD);
12431 assign(vecN, getQReg128(nn));
12432 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12433 assign(vecD, getQReg128(dd));
12434 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
12435 is2, size, "mas"[ks],
12436 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
12437 putQReg128(dd, mkexpr(res));
12438 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
12439 updateQCFLAGwithDifference(sat1q, sat1n);
12440 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
12441 updateQCFLAGwithDifference(sat2q, sat2n);
12442 }
sewardj54ffa1d2014-07-22 09:27:49 +000012443 const HChar* nm = ks == 0 ? "sqdmull"
sewardj51d012a2014-07-21 09:19:50 +000012444 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
12445 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12446 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12447 HChar ch = size == X01 ? 'h' : 's';
12448 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
12449 nm, is2 ? "2" : "",
12450 nameQReg128(dd), arrWide,
12451 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
12452 return True;
12453 }
12454
sewardj257e99f2014-08-03 12:45:19 +000012455 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
12456 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
12457 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
12458 UInt mm = 32; // invalid
12459 UInt ix = 16; // invalid
12460 switch (size) {
12461 case X00:
12462 return False; // b case is not allowed
12463 case X01:
12464 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12465 case X10:
12466 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12467 case X11:
12468 return False; // q case is not allowed
12469 default:
12470 vassert(0);
12471 }
12472 vassert(mm < 32 && ix < 16);
12473 Bool isR = opcode == BITS4(1,1,0,1);
12474 IRTemp res, sat1q, sat1n, vN, vM;
12475 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
12476 vN = newTempV128();
12477 assign(vN, getQReg128(nn));
12478 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12479 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
12480 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12481 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
12482 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
12483 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
12484 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12485 HChar ch = size == X01 ? 'h' : 's';
12486 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
12487 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
12488 return True;
12489 }
12490
sewardjdf1628c2014-06-10 22:52:05 +000012491 return False;
12492# undef INSN
12493}
12494
sewardjfc83d2c2014-06-12 10:15:46 +000012495
sewardjdf1628c2014-06-10 22:52:05 +000012496static
12497Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
12498{
12499# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12500 return False;
12501# undef INSN
12502}
12503
sewardjfc83d2c2014-06-12 10:15:46 +000012504
sewardjdf1628c2014-06-10 22:52:05 +000012505static
12506Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
12507{
12508# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12509 return False;
12510# undef INSN
12511}
12512
sewardjfc83d2c2014-06-12 10:15:46 +000012513
sewardjdf1628c2014-06-10 22:52:05 +000012514static
12515Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
12516{
12517# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12518 return False;
12519# undef INSN
12520}
12521
sewardj5747c4a2014-06-11 20:57:23 +000012522
sewardjdf1628c2014-06-10 22:52:05 +000012523static
12524Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
12525{
sewardj5747c4a2014-06-11 20:57:23 +000012526 /* 31 28 23 21 20 15 13 9 4
12527 000 11110 ty 1 m op 1000 n opcode2
12528 The first 3 bits are really "M 0 S", but M and S are always zero.
12529 Decode fields are: ty,op,opcode2
12530 */
sewardjdf1628c2014-06-10 22:52:05 +000012531# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000012532 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12533 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
12534 return False;
12535 }
12536 UInt ty = INSN(23,22);
12537 UInt mm = INSN(20,16);
12538 UInt op = INSN(15,14);
12539 UInt nn = INSN(9,5);
12540 UInt opcode2 = INSN(4,0);
12541 vassert(ty < 4);
12542
12543 if (ty <= X01 && op == X00
12544 && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
12545 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */
12546 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
12547 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
12548 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
12549 /* 31 23 20 15 9 4
12550 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
12551 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
12552 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
12553 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
12554
12555 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
12556 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
12557 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
12558 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
12559
12560 FCMPE generates Invalid Operation exn if either arg is any kind
12561 of NaN. FCMP generates Invalid Operation exn if either arg is a
12562 signalling NaN. We ignore this detail here and produce the same
12563 IR for both.
12564 */
12565 Bool isD = (ty & 1) == 1;
12566 Bool isCMPE = (opcode2 & 16) == 16;
12567 Bool cmpZero = (opcode2 & 8) == 8;
12568 IRType ity = isD ? Ity_F64 : Ity_F32;
12569 Bool valid = True;
12570 if (cmpZero && mm != 0) valid = False;
12571 if (valid) {
12572 IRTemp argL = newTemp(ity);
12573 IRTemp argR = newTemp(ity);
12574 IRTemp irRes = newTemp(Ity_I32);
12575 assign(argL, getQRegLO(nn, ity));
12576 assign(argR,
12577 cmpZero
12578 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
12579 : getQRegLO(mm, ity));
12580 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
12581 mkexpr(argL), mkexpr(argR)));
12582 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
12583 IRTemp nzcv_28x0 = newTemp(Ity_I64);
12584 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
12585 setFlags_COPY(nzcv_28x0);
12586 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
12587 cmpZero ? "#0.0" : nameQRegLO(mm, ity));
12588 return True;
12589 }
12590 return False;
12591 }
12592
sewardjdf1628c2014-06-10 22:52:05 +000012593 return False;
12594# undef INSN
12595}
12596
sewardj5747c4a2014-06-11 20:57:23 +000012597
sewardjdf1628c2014-06-10 22:52:05 +000012598static
12599Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn)
12600{
sewardj13830dc2015-02-07 21:09:47 +000012601 /* 31 28 23 21 20 15 11 9 4 3
12602 000 11110 ty 1 m cond 01 n op nzcv
12603 The first 3 bits are really "M 0 S", but M and S are always zero.
12604 Decode fields are: ty,op
12605 */
sewardjdf1628c2014-06-10 22:52:05 +000012606# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj13830dc2015-02-07 21:09:47 +000012607 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12608 || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) {
12609 return False;
12610 }
12611 UInt ty = INSN(23,22);
12612 UInt mm = INSN(20,16);
12613 UInt cond = INSN(15,12);
12614 UInt nn = INSN(9,5);
12615 UInt op = INSN(4,4);
12616 UInt nzcv = INSN(3,0);
12617 vassert(ty < 4 && op <= 1);
12618
12619 if (ty <= BITS2(0,1)) {
12620 /* -------- 00,0 FCCMP s_s -------- */
12621 /* -------- 00,1 FCCMPE s_s -------- */
12622 /* -------- 01,0 FCCMP d_d -------- */
12623 /* -------- 01,1 FCCMPE d_d -------- */
12624
12625 /* FCCMPE generates Invalid Operation exn if either arg is any kind
12626 of NaN. FCCMP generates Invalid Operation exn if either arg is a
12627 signalling NaN. We ignore this detail here and produce the same
12628 IR for both.
12629 */
12630 Bool isD = (ty & 1) == 1;
12631 Bool isCMPE = op == 1;
12632 IRType ity = isD ? Ity_F64 : Ity_F32;
12633 IRTemp argL = newTemp(ity);
12634 IRTemp argR = newTemp(ity);
12635 IRTemp irRes = newTemp(Ity_I32);
12636 assign(argL, getQRegLO(nn, ity));
12637 assign(argR, getQRegLO(mm, ity));
12638 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
12639 mkexpr(argL), mkexpr(argR)));
12640 IRTemp condT = newTemp(Ity_I1);
12641 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
12642 IRTemp nzcvT = mk_convert_IRCmpF64Result_to_NZCV(irRes);
12643
12644 IRTemp nzcvT_28x0 = newTemp(Ity_I64);
12645 assign(nzcvT_28x0, binop(Iop_Shl64, mkexpr(nzcvT), mkU8(28)));
12646
12647 IRExpr* nzcvF_28x0 = mkU64(((ULong)nzcv) << 28);
12648
12649 IRTemp nzcv_28x0 = newTemp(Ity_I64);
12650 assign(nzcv_28x0, IRExpr_ITE(mkexpr(condT),
12651 mkexpr(nzcvT_28x0), nzcvF_28x0));
12652 setFlags_COPY(nzcv_28x0);
12653 DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE ? "e" : "",
12654 nameQRegLO(nn, ity), nameQRegLO(mm, ity), nzcv, nameCC(cond));
12655 return True;
12656 }
12657
sewardjdf1628c2014-06-10 22:52:05 +000012658 return False;
12659# undef INSN
12660}
12661
sewardjfc83d2c2014-06-12 10:15:46 +000012662
sewardjdf1628c2014-06-10 22:52:05 +000012663static
12664Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
12665{
sewardje23ec112014-11-15 16:07:14 +000012666 /* 31 23 21 20 15 11 9 5
12667 000 11110 ty 1 m cond 11 n d
12668 The first 3 bits are really "M 0 S", but M and S are always zero.
12669 Decode fields: ty
12670 */
sewardjdf1628c2014-06-10 22:52:05 +000012671# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardje23ec112014-11-15 16:07:14 +000012672 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
12673 || INSN(11,10) != BITS2(1,1)) {
12674 return False;
12675 }
12676 UInt ty = INSN(23,22);
12677 UInt mm = INSN(20,16);
12678 UInt cond = INSN(15,12);
12679 UInt nn = INSN(9,5);
12680 UInt dd = INSN(4,0);
12681 if (ty <= X01) {
12682 /* -------- 00: FCSEL s_s -------- */
12683 /* -------- 00: FCSEL d_d -------- */
12684 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
12685 IRTemp srcT = newTemp(ity);
12686 IRTemp srcF = newTemp(ity);
12687 IRTemp res = newTemp(ity);
12688 assign(srcT, getQRegLO(nn, ity));
12689 assign(srcF, getQRegLO(mm, ity));
12690 assign(res, IRExpr_ITE(
12691 unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
12692 mkexpr(srcT), mkexpr(srcF)));
12693 putQReg128(dd, mkV128(0x0000));
12694 putQRegLO(dd, mkexpr(res));
12695 DIP("fcsel %s, %s, %s, %s\n",
12696 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity),
12697 nameCC(cond));
12698 return True;
12699 }
sewardjdf1628c2014-06-10 22:52:05 +000012700 return False;
12701# undef INSN
12702}
12703
sewardj5747c4a2014-06-11 20:57:23 +000012704
sewardjdf1628c2014-06-10 22:52:05 +000012705static
12706Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
12707{
12708 /* 31 28 23 21 20 14 9 4
12709 000 11110 ty 1 opcode 10000 n d
12710 The first 3 bits are really "M 0 S", but M and S are always zero.
sewardj5747c4a2014-06-11 20:57:23 +000012711 Decode fields: ty,opcode
sewardjdf1628c2014-06-10 22:52:05 +000012712 */
12713# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12714 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12715 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
12716 return False;
12717 }
12718 UInt ty = INSN(23,22);
12719 UInt opcode = INSN(20,15);
12720 UInt nn = INSN(9,5);
12721 UInt dd = INSN(4,0);
12722
12723 if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
12724 /* -------- 0x,000000: FMOV d_d, s_s -------- */
12725 /* -------- 0x,000001: FABS d_d, s_s -------- */
12726 /* -------- 0x,000010: FNEG d_d, s_s -------- */
12727 /* -------- 0x,000011: FSQRT d_d, s_s -------- */
12728 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
12729 IRTemp src = newTemp(ity);
12730 IRTemp res = newTemp(ity);
12731 const HChar* nm = "??";
12732 assign(src, getQRegLO(nn, ity));
12733 switch (opcode) {
12734 case BITS6(0,0,0,0,0,0):
12735 nm = "fmov"; assign(res, mkexpr(src)); break;
12736 case BITS6(0,0,0,0,0,1):
12737 nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
12738 case BITS6(0,0,0,0,1,0):
12739 nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
12740 case BITS6(0,0,0,0,1,1):
12741 nm = "fsqrt";
12742 assign(res, binop(mkSQRTF(ity),
12743 mkexpr(mk_get_IR_rounding_mode()),
12744 mkexpr(src))); break;
12745 default:
12746 vassert(0);
12747 }
12748 putQReg128(dd, mkV128(0x0000));
12749 putQRegLO(dd, mkexpr(res));
12750 DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
12751 return True;
12752 }
12753
sewardj5747c4a2014-06-11 20:57:23 +000012754 if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
12755 || opcode == BITS6(0,0,0,1,0,1)))
12756 || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
12757 || opcode == BITS6(0,0,0,1,0,1)))
12758 || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
12759 || opcode == BITS6(0,0,0,1,0,0)))) {
12760 /* -------- 11,000100: FCVT s_h -------- */
12761 /* -------- 11,000101: FCVT d_h -------- */
12762 /* -------- 00,000111: FCVT h_s -------- */
12763 /* -------- 00,000101: FCVT d_s -------- */
12764 /* -------- 01,000111: FCVT h_d -------- */
12765 /* -------- 01,000100: FCVT s_d -------- */
12766 /* 31 23 21 16 14 9 4
sewardj400d6b92015-03-30 09:01:51 +000012767 000 11110 11 10001 00 10000 n d FCVT Sd, Hn
12768 --------- 11 ----- 01 --------- FCVT Dd, Hn
12769 --------- 00 ----- 11 --------- FCVT Hd, Sn
sewardj5747c4a2014-06-11 20:57:23 +000012770 --------- 00 ----- 01 --------- FCVT Dd, Sn
sewardj400d6b92015-03-30 09:01:51 +000012771 --------- 01 ----- 11 --------- FCVT Hd, Dn
sewardj5747c4a2014-06-11 20:57:23 +000012772 --------- 01 ----- 00 --------- FCVT Sd, Dn
12773 Rounding, when dst is smaller than src, is per the FPCR.
12774 */
12775 UInt b2322 = ty;
12776 UInt b1615 = opcode & BITS2(1,1);
sewardj400d6b92015-03-30 09:01:51 +000012777 switch ((b2322 << 2) | b1615) {
12778 case BITS4(0,0,0,1): // S -> D
12779 case BITS4(1,1,0,1): { // H -> D
12780 Bool srcIsH = b2322 == BITS2(1,1);
12781 IRType srcTy = srcIsH ? Ity_F16 : Ity_F32;
12782 IRTemp res = newTemp(Ity_F64);
12783 assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64,
12784 getQRegLO(nn, srcTy)));
12785 putQReg128(dd, mkV128(0x0000));
12786 putQRegLO(dd, mkexpr(res));
12787 DIP("fcvt %s, %s\n",
12788 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy));
12789 return True;
12790 }
12791 case BITS4(0,1,0,0): // D -> S
12792 case BITS4(0,1,1,1): { // D -> H
12793 Bool dstIsH = b1615 == BITS2(1,1);
12794 IRType dstTy = dstIsH ? Ity_F16 : Ity_F32;
12795 IRTemp res = newTemp(dstTy);
12796 assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32,
12797 mkexpr(mk_get_IR_rounding_mode()),
12798 getQRegLO(nn, Ity_F64)));
12799 putQReg128(dd, mkV128(0x0000));
12800 putQRegLO(dd, mkexpr(res));
12801 DIP("fcvt %s, %s\n",
12802 nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64));
12803 return True;
12804 }
12805 case BITS4(0,0,1,1): // S -> H
12806 case BITS4(1,1,0,0): { // H -> S
12807 Bool toH = b1615 == BITS2(1,1);
12808 IRType srcTy = toH ? Ity_F32 : Ity_F16;
12809 IRType dstTy = toH ? Ity_F16 : Ity_F32;
12810 IRTemp res = newTemp(dstTy);
12811 if (toH) {
12812 assign(res, binop(Iop_F32toF16,
12813 mkexpr(mk_get_IR_rounding_mode()),
12814 getQRegLO(nn, srcTy)));
12815
12816 } else {
12817 assign(res, unop(Iop_F16toF32,
12818 getQRegLO(nn, srcTy)));
12819 }
12820 putQReg128(dd, mkV128(0x0000));
12821 putQRegLO(dd, mkexpr(res));
12822 DIP("fcvt %s, %s\n",
12823 nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy));
12824 return True;
12825 }
12826 default:
12827 break;
sewardj5747c4a2014-06-11 20:57:23 +000012828 }
12829 /* else unhandled */
12830 return False;
12831 }
12832
12833 if (ty <= X01
12834 && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
12835 && opcode != BITS6(0,0,1,1,0,1)) {
12836 /* -------- 0x,001000 FRINTN d_d, s_s -------- */
12837 /* -------- 0x,001001 FRINTP d_d, s_s -------- */
12838 /* -------- 0x,001010 FRINTM d_d, s_s -------- */
12839 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
12840 /* -------- 0x,001100 FRINTA d_d, s_s -------- */
12841 /* -------- 0x,001110 FRINTX d_d, s_s -------- */
12842 /* -------- 0x,001111 FRINTI d_d, s_s -------- */
12843 /* 31 23 21 17 14 9 4
12844 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
12845 rm
12846 x==0 => S-registers, x==1 => D-registers
12847 rm (17:15) encodings:
12848 111 per FPCR (FRINTI)
12849 001 +inf (FRINTP)
12850 010 -inf (FRINTM)
12851 011 zero (FRINTZ)
sewardj6a785df2015-02-09 09:07:47 +000012852 000 tieeven (FRINTN) -- !! FIXME KLUDGED !!
sewardj5747c4a2014-06-11 20:57:23 +000012853 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
sewardjd8ad76a2014-10-30 15:37:16 +000012854 110 per FPCR + "exact = TRUE" (FRINTX)
sewardj5747c4a2014-06-11 20:57:23 +000012855 101 unallocated
12856 */
12857 Bool isD = (ty & 1) == 1;
12858 UInt rm = opcode & BITS6(0,0,0,1,1,1);
12859 IRType ity = isD ? Ity_F64 : Ity_F32;
12860 IRExpr* irrmE = NULL;
12861 UChar ch = '?';
12862 switch (rm) {
12863 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
12864 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
12865 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
12866 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
12867 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
sewardjd8ad76a2014-10-30 15:37:16 +000012868 // I am unsure about the following, due to the "integral exact"
sewardj9e1c2b02014-11-25 17:42:52 +000012869 // description in the manual. What does it mean? (frintx, that is)
sewardjd8ad76a2014-10-30 15:37:16 +000012870 case BITS3(1,1,0):
12871 ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
sewardj9e1c2b02014-11-25 17:42:52 +000012872 case BITS3(1,1,1):
12873 ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
sewardj6a785df2015-02-09 09:07:47 +000012874 // The following is a kludge. There's no Irrm_ value to represent
12875 // this ("to nearest, with ties to even")
12876 case BITS3(0,0,0): ch = 'n'; irrmE = mkU32(Irrm_NEAREST); break;
sewardj5747c4a2014-06-11 20:57:23 +000012877 default: break;
12878 }
12879 if (irrmE) {
12880 IRTemp src = newTemp(ity);
12881 IRTemp dst = newTemp(ity);
12882 assign(src, getQRegLO(nn, ity));
12883 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
12884 irrmE, mkexpr(src)));
12885 putQReg128(dd, mkV128(0x0000));
12886 putQRegLO(dd, mkexpr(dst));
12887 DIP("frint%c %s, %s\n",
12888 ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
12889 return True;
12890 }
12891 return False;
12892 }
12893
sewardjdf1628c2014-06-10 22:52:05 +000012894 return False;
12895# undef INSN
12896}
12897
12898
12899static
12900Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn)
12901{
12902 /* 31 28 23 21 20 15 11 9 4
12903 000 11110 ty 1 m opcode 10 n d
12904 The first 3 bits are really "M 0 S", but M and S are always zero.
sewardj76927e62014-11-17 11:21:21 +000012905 Decode fields: ty, opcode
sewardjdf1628c2014-06-10 22:52:05 +000012906 */
12907# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12908 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12909 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
12910 return False;
12911 }
12912 UInt ty = INSN(23,22);
12913 UInt mm = INSN(20,16);
12914 UInt opcode = INSN(15,12);
12915 UInt nn = INSN(9,5);
12916 UInt dd = INSN(4,0);
12917
sewardj76927e62014-11-17 11:21:21 +000012918 if (ty <= X01 && opcode <= BITS4(0,1,1,1)) {
sewardjdf1628c2014-06-10 22:52:05 +000012919 /* ------- 0x,0000: FMUL d_d, s_s ------- */
12920 /* ------- 0x,0001: FDIV d_d, s_s ------- */
12921 /* ------- 0x,0010: FADD d_d, s_s ------- */
12922 /* ------- 0x,0011: FSUB d_d, s_s ------- */
sewardj76927e62014-11-17 11:21:21 +000012923 /* ------- 0x,0100: FMAX d_d, s_s ------- */
12924 /* ------- 0x,0101: FMIN d_d, s_s ------- */
12925 /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
12926 /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
sewardjdf1628c2014-06-10 22:52:05 +000012927 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
12928 IROp iop = Iop_INVALID;
12929 const HChar* nm = "???";
12930 switch (opcode) {
sewardj76927e62014-11-17 11:21:21 +000012931 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
12932 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
12933 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
12934 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
12935 case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break;
12936 case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break;
12937 case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!!
12938 case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!!
sewardjdf1628c2014-06-10 22:52:05 +000012939 default: vassert(0);
12940 }
sewardj76927e62014-11-17 11:21:21 +000012941 if (opcode <= BITS4(0,0,1,1)) {
12942 // This is really not good code. TODO: avoid width-changing
sewardjb963eef2014-11-17 14:16:56 +000012943 IRTemp res = newTemp(ity);
12944 assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()),
12945 getQRegLO(nn, ity), getQRegLO(mm, ity)));
sewardj76927e62014-11-17 11:21:21 +000012946 putQReg128(dd, mkV128(0));
sewardjb963eef2014-11-17 14:16:56 +000012947 putQRegLO(dd, mkexpr(res));
sewardj76927e62014-11-17 11:21:21 +000012948 } else {
12949 putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2),
12950 binop(iop, getQReg128(nn), getQReg128(mm))));
12951 }
sewardjdf1628c2014-06-10 22:52:05 +000012952 DIP("%s %s, %s, %s\n",
12953 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
12954 return True;
12955 }
12956
12957 if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
12958 /* ------- 0x,1000: FNMUL d_d, s_s ------- */
12959 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
12960 IROp iop = mkMULF(ity);
12961 IROp iopn = mkNEGF(ity);
12962 const HChar* nm = "fnmul";
12963 IRExpr* resE = unop(iopn,
12964 triop(iop, mkexpr(mk_get_IR_rounding_mode()),
12965 getQRegLO(nn, ity), getQRegLO(mm, ity)));
12966 IRTemp res = newTemp(ity);
12967 assign(res, resE);
12968 putQReg128(dd, mkV128(0));
12969 putQRegLO(dd, mkexpr(res));
12970 DIP("%s %s, %s, %s\n",
12971 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
12972 return True;
12973 }
12974
sewardjdf1628c2014-06-10 22:52:05 +000012975 return False;
12976# undef INSN
12977}
12978
12979
12980static
12981Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
12982{
sewardj5747c4a2014-06-11 20:57:23 +000012983 /* 31 28 23 21 20 15 14 9 4
12984 000 11111 ty o1 m o0 a n d
12985 The first 3 bits are really "M 0 S", but M and S are always zero.
12986 Decode fields: ty,o1,o0
12987 */
sewardjdf1628c2014-06-10 22:52:05 +000012988# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000012989 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
12990 return False;
12991 }
12992 UInt ty = INSN(23,22);
12993 UInt bitO1 = INSN(21,21);
12994 UInt mm = INSN(20,16);
12995 UInt bitO0 = INSN(15,15);
12996 UInt aa = INSN(14,10);
12997 UInt nn = INSN(9,5);
12998 UInt dd = INSN(4,0);
12999 vassert(ty < 4);
13000
13001 if (ty <= X01) {
13002 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
13003 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
13004 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
13005 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
13006 /* -------------------- F{N}M{ADD,SUB} -------------------- */
13007 /* 31 22 20 15 14 9 4 ix
13008 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
13009 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
13010 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
13011 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
13012 where Fx=Dx when sz=1, Fx=Sx when sz=0
13013
13014 -----SPEC------ ----IMPL----
13015 fmadd a + n * m a + n * m
13016 fmsub a + (-n) * m a - n * m
13017 fnmadd (-a) + (-n) * m -(a + n * m)
13018 fnmsub (-a) + n * m -(a - n * m)
13019 */
13020 Bool isD = (ty & 1) == 1;
13021 UInt ix = (bitO1 << 1) | bitO0;
13022 IRType ity = isD ? Ity_F64 : Ity_F32;
13023 IROp opADD = mkADDF(ity);
13024 IROp opSUB = mkSUBF(ity);
13025 IROp opMUL = mkMULF(ity);
13026 IROp opNEG = mkNEGF(ity);
13027 IRTemp res = newTemp(ity);
13028 IRExpr* eA = getQRegLO(aa, ity);
13029 IRExpr* eN = getQRegLO(nn, ity);
13030 IRExpr* eM = getQRegLO(mm, ity);
13031 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
13032 IRExpr* eNxM = triop(opMUL, rm, eN, eM);
13033 switch (ix) {
13034 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
13035 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
13036 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
13037 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
13038 default: vassert(0);
13039 }
13040 putQReg128(dd, mkV128(0x0000));
13041 putQRegLO(dd, mkexpr(res));
13042 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
13043 DIP("%s %s, %s, %s, %s\n",
13044 names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
13045 nameQRegLO(mm, ity), nameQRegLO(aa, ity));
13046 return True;
13047 }
13048
sewardjdf1628c2014-06-10 22:52:05 +000013049 return False;
13050# undef INSN
13051}
13052
13053
13054static
13055Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
13056{
13057 /* 31 28 23 21 20 12 9 4
13058 000 11110 ty 1 imm8 100 imm5 d
13059 The first 3 bits are really "M 0 S", but M and S are always zero.
13060 */
13061# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13062 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13063 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
13064 return False;
13065 }
13066 UInt ty = INSN(23,22);
13067 UInt imm8 = INSN(20,13);
13068 UInt imm5 = INSN(9,5);
13069 UInt dd = INSN(4,0);
13070
13071 /* ------- 00,00000: FMOV s_imm ------- */
13072 /* ------- 01,00000: FMOV d_imm ------- */
13073 if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
13074 Bool isD = (ty & 1) == 1;
13075 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
13076 if (!isD) {
13077 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
13078 }
13079 putQReg128(dd, mkV128(0));
13080 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
13081 DIP("fmov %s, #0x%llx\n",
13082 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
13083 return True;
13084 }
13085
13086 return False;
13087# undef INSN
13088}
13089
13090
13091static
sewardj1aff76b2014-11-20 10:14:06 +000013092Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
sewardjdf1628c2014-06-10 22:52:05 +000013093{
13094# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj1aff76b2014-11-20 10:14:06 +000013095 /* 31 30 29 28 23 21 20 18 15 9 4
13096 sf 0 0 11110 type 0 rmode opcode scale n d
13097 The first 3 bits are really "sf 0 S", but S is always zero.
13098 Decode fields: sf,type,rmode,opcode
13099 */
13100# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13101 if (INSN(30,29) != BITS2(0,0)
13102 || INSN(28,24) != BITS5(1,1,1,1,0)
13103 || INSN(21,21) != 0) {
13104 return False;
13105 }
13106 UInt bitSF = INSN(31,31);
13107 UInt ty = INSN(23,22); // type
13108 UInt rm = INSN(20,19); // rmode
13109 UInt op = INSN(18,16); // opcode
13110 UInt sc = INSN(15,10); // scale
13111 UInt nn = INSN(9,5);
13112 UInt dd = INSN(4,0);
13113
13114 // op = 010, 011
13115 /* -------------- {S,U}CVTF (scalar, fixedpt) -------------- */
13116 /* (ix) sf S 28 ty rm op 15 9 4
13117 0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits
13118 1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits
13119 2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits
13120 3 1 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Xn, #fbits
13121
13122 4 0 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Wn, #fbits
13123 5 0 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Wn, #fbits
13124 6 1 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Xn, #fbits
13125 7 1 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Xn, #fbits
13126
13127 These are signed/unsigned conversion from integer registers to
13128 FP registers, all 4 32/64-bit combinations, rounded per FPCR,
13129 scaled per |scale|.
13130 */
13131 if (ty <= X01 && rm == X00
13132 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))
13133 && (bitSF == 1 || ((sc >> 5) & 1) == 1)) {
13134 Bool isI64 = bitSF == 1;
13135 Bool isF64 = (ty & 1) == 1;
13136 Bool isU = (op & 1) == 1;
13137 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
13138
13139 Int fbits = 64 - sc;
13140 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
13141
13142 Double scale = two_to_the_minus(fbits);
13143 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
13144 : IRExpr_Const(IRConst_F32( (Float)scale ));
13145 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
13146
13147 const IROp ops[8]
13148 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
13149 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
13150 IRExpr* src = getIRegOrZR(isI64, nn);
13151 IRExpr* res = (isF64 && !isI64)
13152 ? unop(ops[ix], src)
13153 : binop(ops[ix],
13154 mkexpr(mk_get_IR_rounding_mode()), src);
13155 putQReg128(dd, mkV128(0));
13156 putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE));
13157
13158 DIP("%ccvtf %s, %s, #%d\n",
13159 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
13160 nameIRegOrZR(isI64, nn), fbits);
13161 return True;
13162 }
13163
sewardjdf1628c2014-06-10 22:52:05 +000013164 return False;
13165# undef INSN
13166}
13167
13168
13169static
sewardj5747c4a2014-06-11 20:57:23 +000013170Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
sewardjdf1628c2014-06-10 22:52:05 +000013171{
13172 /* 31 30 29 28 23 21 20 18 15 9 4
sewardj5747c4a2014-06-11 20:57:23 +000013173 sf 0 0 11110 type 1 rmode opcode 000000 n d
13174 The first 3 bits are really "sf 0 S", but S is always zero.
sewardjf67fcb92014-10-30 23:10:45 +000013175 Decode fields: sf,type,rmode,opcode
sewardjdf1628c2014-06-10 22:52:05 +000013176 */
13177# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000013178 if (INSN(30,29) != BITS2(0,0)
sewardjdf1628c2014-06-10 22:52:05 +000013179 || INSN(28,24) != BITS5(1,1,1,1,0)
13180 || INSN(21,21) != 1
13181 || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
13182 return False;
13183 }
13184 UInt bitSF = INSN(31,31);
sewardjdf1628c2014-06-10 22:52:05 +000013185 UInt ty = INSN(23,22); // type
13186 UInt rm = INSN(20,19); // rmode
13187 UInt op = INSN(18,16); // opcode
13188 UInt nn = INSN(9,5);
13189 UInt dd = INSN(4,0);
13190
sewardj5747c4a2014-06-11 20:57:23 +000013191 // op = 000, 001
sewardjf67fcb92014-10-30 23:10:45 +000013192 /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
sewardj5747c4a2014-06-11 20:57:23 +000013193 /* 30 23 20 18 15 9 4
13194 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
13195 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
13196 ---------------- 01 -------------- FCVTP-------- (round to +inf)
13197 ---------------- 10 -------------- FCVTM-------- (round to -inf)
13198 ---------------- 11 -------------- FCVTZ-------- (round to zero)
sewardjf67fcb92014-10-30 23:10:45 +000013199 ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
13200 ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
13201
sewardj5747c4a2014-06-11 20:57:23 +000013202 Rd is Xd when sf==1, Wd when sf==0
13203 Fn is Dn when x==1, Sn when x==0
13204 20:19 carry the rounding mode, using the same encoding as FPCR
13205 */
sewardjf67fcb92014-10-30 23:10:45 +000013206 if (ty <= X01
13207 && ( ((op == BITS3(0,0,0) || op == BITS3(0,0,1)) && True)
13208 || ((op == BITS3(1,0,0) || op == BITS3(1,0,1)) && rm == BITS2(0,0))
13209 )
13210 ) {
sewardj5747c4a2014-06-11 20:57:23 +000013211 Bool isI64 = bitSF == 1;
13212 Bool isF64 = (ty & 1) == 1;
13213 Bool isU = (op & 1) == 1;
13214 /* Decide on the IR rounding mode to use. */
13215 IRRoundingMode irrm = 8; /*impossible*/
13216 HChar ch = '?';
sewardjf67fcb92014-10-30 23:10:45 +000013217 if (op == BITS3(0,0,0) || op == BITS3(0,0,1)) {
13218 switch (rm) {
13219 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
13220 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
13221 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
13222 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
13223 default: vassert(0);
13224 }
13225 } else {
13226 vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1));
13227 switch (rm) {
13228 case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break;
13229 default: vassert(0);
13230 }
sewardj5747c4a2014-06-11 20:57:23 +000013231 }
13232 vassert(irrm != 8);
13233 /* Decide on the conversion primop, based on the source size,
13234 dest size and signedness (8 possibilities). Case coding:
13235 F32 ->s I32 0
13236 F32 ->u I32 1
13237 F32 ->s I64 2
13238 F32 ->u I64 3
13239 F64 ->s I32 4
13240 F64 ->u I32 5
13241 F64 ->s I64 6
13242 F64 ->u I64 7
13243 */
13244 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
13245 vassert(ix < 8);
13246 const IROp iops[8]
13247 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
13248 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
13249 IROp iop = iops[ix];
13250 // A bit of ATCery: bounce all cases we haven't seen an example of.
13251 if (/* F32toI32S */
13252 (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
13253 || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
13254 || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
sewardjf67fcb92014-10-30 23:10:45 +000013255 || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */
sewardj5747c4a2014-06-11 20:57:23 +000013256 /* F32toI32U */
13257 || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
13258 || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
sewardj0728a522014-11-15 22:24:18 +000013259 || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
sewardjf67fcb92014-10-30 23:10:45 +000013260 || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */
sewardj5747c4a2014-06-11 20:57:23 +000013261 /* F32toI64S */
13262 || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
sewardjf4edb1d2015-02-24 13:23:38 +000013263 || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */
13264 || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */
sewardj266d5962014-11-20 11:30:41 +000013265 || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */
sewardj5747c4a2014-06-11 20:57:23 +000013266 /* F32toI64U */
13267 || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
sewardj6d5985e2015-02-05 15:22:30 +000013268 || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */
sewardjefe536b2014-09-06 08:08:47 +000013269 || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
sewardjf4edb1d2015-02-24 13:23:38 +000013270 || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */
sewardj5747c4a2014-06-11 20:57:23 +000013271 /* F64toI32S */
13272 || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
13273 || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
13274 || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
sewardj76927e62014-11-17 11:21:21 +000013275 || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */
sewardj5747c4a2014-06-11 20:57:23 +000013276 /* F64toI32U */
13277 || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
13278 || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
13279 || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
sewardjf4edb1d2015-02-24 13:23:38 +000013280 || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */
sewardj5747c4a2014-06-11 20:57:23 +000013281 /* F64toI64S */
13282 || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
13283 || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
13284 || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
sewardj76927e62014-11-17 11:21:21 +000013285 || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */
sewardj5747c4a2014-06-11 20:57:23 +000013286 /* F64toI64U */
13287 || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
sewardj31b29af2014-10-30 15:54:53 +000013288 || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
sewardj5747c4a2014-06-11 20:57:23 +000013289 || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
sewardjf4edb1d2015-02-24 13:23:38 +000013290 || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */
sewardj5747c4a2014-06-11 20:57:23 +000013291 ) {
13292 /* validated */
13293 } else {
13294 return False;
13295 }
13296 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
13297 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
13298 IRTemp src = newTemp(srcTy);
13299 IRTemp dst = newTemp(dstTy);
13300 assign(src, getQRegLO(nn, srcTy));
13301 assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
13302 putIRegOrZR(isI64, dd, mkexpr(dst));
13303 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
13304 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
13305 return True;
13306 }
13307
13308 // op = 010, 011
sewardjdf1628c2014-06-10 22:52:05 +000013309 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
13310 /* (ix) sf S 28 ty rm op 15 9 4
13311 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
13312 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
13313 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
13314 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
13315
13316 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
13317 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
13318 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
13319 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
13320
13321 These are signed/unsigned conversion from integer registers to
13322 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
13323 */
sewardj5747c4a2014-06-11 20:57:23 +000013324 if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
sewardjdf1628c2014-06-10 22:52:05 +000013325 Bool isI64 = bitSF == 1;
13326 Bool isF64 = (ty & 1) == 1;
13327 Bool isU = (op & 1) == 1;
13328 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
13329 const IROp ops[8]
13330 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
13331 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
13332 IRExpr* src = getIRegOrZR(isI64, nn);
13333 IRExpr* res = (isF64 && !isI64)
13334 ? unop(ops[ix], src)
sewardj1aff76b2014-11-20 10:14:06 +000013335 : binop(ops[ix],
13336 mkexpr(mk_get_IR_rounding_mode()), src);
sewardjdf1628c2014-06-10 22:52:05 +000013337 putQReg128(dd, mkV128(0));
13338 putQRegLO(dd, res);
13339 DIP("%ccvtf %s, %s\n",
13340 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
13341 nameIRegOrZR(isI64, nn));
13342 return True;
13343 }
13344
sewardj5747c4a2014-06-11 20:57:23 +000013345 // op = 110, 111
sewardjdf1628c2014-06-10 22:52:05 +000013346 /* -------- FMOV (general) -------- */
13347 /* case sf S ty rm op 15 9 4
13348 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
13349 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
13350 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
13351
13352 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
13353 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
13354 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
13355 */
sewardj5747c4a2014-06-11 20:57:23 +000013356 if (1) {
sewardjbbcf1882014-01-12 12:49:10 +000013357 UInt ix = 0; // case
sewardjdf1628c2014-06-10 22:52:05 +000013358 if (bitSF == 0) {
sewardjbbcf1882014-01-12 12:49:10 +000013359 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
13360 ix = 1;
13361 else
13362 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
13363 ix = 4;
13364 } else {
sewardjdf1628c2014-06-10 22:52:05 +000013365 vassert(bitSF == 1);
sewardjbbcf1882014-01-12 12:49:10 +000013366 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
13367 ix = 2;
13368 else
13369 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
13370 ix = 5;
13371 else
13372 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
13373 ix = 3;
13374 else
13375 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
13376 ix = 6;
13377 }
13378 if (ix > 0) {
13379 switch (ix) {
13380 case 1:
13381 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +000013382 putQRegLO(dd, getIReg32orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000013383 DIP("fmov s%u, w%u\n", dd, nn);
13384 break;
13385 case 2:
13386 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +000013387 putQRegLO(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000013388 DIP("fmov d%u, x%u\n", dd, nn);
13389 break;
13390 case 3:
sewardj606c4ba2014-01-26 19:11:14 +000013391 putQRegHI64(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000013392 DIP("fmov v%u.d[1], x%u\n", dd, nn);
13393 break;
13394 case 4:
sewardj606c4ba2014-01-26 19:11:14 +000013395 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
sewardjbbcf1882014-01-12 12:49:10 +000013396 DIP("fmov w%u, s%u\n", dd, nn);
13397 break;
13398 case 5:
sewardj606c4ba2014-01-26 19:11:14 +000013399 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
sewardjbbcf1882014-01-12 12:49:10 +000013400 DIP("fmov x%u, d%u\n", dd, nn);
13401 break;
13402 case 6:
sewardj606c4ba2014-01-26 19:11:14 +000013403 putIReg64orZR(dd, getQRegHI64(nn));
sewardjbbcf1882014-01-12 12:49:10 +000013404 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
13405 break;
13406 default:
13407 vassert(0);
13408 }
13409 return True;
13410 }
13411 /* undecodable; fall through */
13412 }
13413
sewardjdf1628c2014-06-10 22:52:05 +000013414 return False;
13415# undef INSN
13416}
13417
13418
13419static
13420Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
13421{
13422 Bool ok;
13423 ok = dis_AdvSIMD_EXT(dres, insn);
13424 if (UNLIKELY(ok)) return True;
13425 ok = dis_AdvSIMD_TBL_TBX(dres, insn);
13426 if (UNLIKELY(ok)) return True;
13427 ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
13428 if (UNLIKELY(ok)) return True;
13429 ok = dis_AdvSIMD_across_lanes(dres, insn);
13430 if (UNLIKELY(ok)) return True;
13431 ok = dis_AdvSIMD_copy(dres, insn);
13432 if (UNLIKELY(ok)) return True;
13433 ok = dis_AdvSIMD_modified_immediate(dres, insn);
13434 if (UNLIKELY(ok)) return True;
13435 ok = dis_AdvSIMD_scalar_copy(dres, insn);
13436 if (UNLIKELY(ok)) return True;
13437 ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
13438 if (UNLIKELY(ok)) return True;
13439 ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
13440 if (UNLIKELY(ok)) return True;
13441 ok = dis_AdvSIMD_scalar_three_different(dres, insn);
13442 if (UNLIKELY(ok)) return True;
13443 ok = dis_AdvSIMD_scalar_three_same(dres, insn);
13444 if (UNLIKELY(ok)) return True;
13445 ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
13446 if (UNLIKELY(ok)) return True;
13447 ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
13448 if (UNLIKELY(ok)) return True;
13449 ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
13450 if (UNLIKELY(ok)) return True;
13451 ok = dis_AdvSIMD_three_different(dres, insn);
13452 if (UNLIKELY(ok)) return True;
13453 ok = dis_AdvSIMD_three_same(dres, insn);
13454 if (UNLIKELY(ok)) return True;
13455 ok = dis_AdvSIMD_two_reg_misc(dres, insn);
13456 if (UNLIKELY(ok)) return True;
13457 ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
13458 if (UNLIKELY(ok)) return True;
13459 ok = dis_AdvSIMD_crypto_aes(dres, insn);
13460 if (UNLIKELY(ok)) return True;
13461 ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
13462 if (UNLIKELY(ok)) return True;
13463 ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
13464 if (UNLIKELY(ok)) return True;
13465 ok = dis_AdvSIMD_fp_compare(dres, insn);
13466 if (UNLIKELY(ok)) return True;
13467 ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
13468 if (UNLIKELY(ok)) return True;
13469 ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
13470 if (UNLIKELY(ok)) return True;
13471 ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
13472 if (UNLIKELY(ok)) return True;
13473 ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
13474 if (UNLIKELY(ok)) return True;
13475 ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
13476 if (UNLIKELY(ok)) return True;
13477 ok = dis_AdvSIMD_fp_immediate(dres, insn);
13478 if (UNLIKELY(ok)) return True;
sewardj1aff76b2014-11-20 10:14:06 +000013479 ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn);
sewardjdf1628c2014-06-10 22:52:05 +000013480 if (UNLIKELY(ok)) return True;
sewardj5747c4a2014-06-11 20:57:23 +000013481 ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
sewardjdf1628c2014-06-10 22:52:05 +000013482 if (UNLIKELY(ok)) return True;
13483 return False;
13484}
13485
sewardjbbcf1882014-01-12 12:49:10 +000013486
13487/*------------------------------------------------------------*/
13488/*--- Disassemble a single ARM64 instruction ---*/
13489/*------------------------------------------------------------*/
13490
13491/* Disassemble a single ARM64 instruction into IR. The instruction
13492 has is located at |guest_instr| and has guest IP of
13493 |guest_PC_curr_instr|, which will have been set before the call
13494 here. Returns True iff the instruction was decoded, in which case
13495 *dres will be set accordingly, or False, in which case *dres should
13496 be ignored by the caller. */
13497
13498static
13499Bool disInstr_ARM64_WRK (
13500 /*MB_OUT*/DisResult* dres,
florianbeac5302014-12-31 12:09:38 +000013501 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
sewardjbbcf1882014-01-12 12:49:10 +000013502 Bool resteerCisOk,
13503 void* callback_opaque,
florian8462d112014-09-24 15:18:09 +000013504 const UChar* guest_instr,
floriancacba8e2014-12-15 18:58:07 +000013505 const VexArchInfo* archinfo,
13506 const VexAbiInfo* abiinfo
sewardjbbcf1882014-01-12 12:49:10 +000013507 )
13508{
13509 // A macro to fish bits out of 'insn'.
13510# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13511
13512//ZZ DisResult dres;
13513//ZZ UInt insn;
13514//ZZ //Bool allow_VFP = False;
13515//ZZ //UInt hwcaps = archinfo->hwcaps;
13516//ZZ IRTemp condT; /* :: Ity_I32 */
13517//ZZ UInt summary;
13518//ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
13519//ZZ
13520//ZZ /* What insn variants are we supporting today? */
13521//ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
13522//ZZ // etc etc
13523
13524 /* Set result defaults. */
13525 dres->whatNext = Dis_Continue;
13526 dres->len = 4;
13527 dres->continueAt = 0;
13528 dres->jk_StopHere = Ijk_INVALID;
13529
13530 /* At least this is simple on ARM64: insns are all 4 bytes long, and
13531 4-aligned. So just fish the whole thing out of memory right now
13532 and have done. */
13533 UInt insn = getUIntLittleEndianly( guest_instr );
13534
13535 if (0) vex_printf("insn: 0x%x\n", insn);
13536
13537 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
13538
13539 vassert(0 == (guest_PC_curr_instr & 3ULL));
13540
13541 /* ----------------------------------------------------------- */
13542
13543 /* Spot "Special" instructions (see comment at top of file). */
13544 {
florian8462d112014-09-24 15:18:09 +000013545 const UChar* code = guest_instr;
sewardjbbcf1882014-01-12 12:49:10 +000013546 /* Spot the 16-byte preamble:
13547 93CC0D8C ror x12, x12, #3
13548 93CC358C ror x12, x12, #13
13549 93CCCD8C ror x12, x12, #51
13550 93CCF58C ror x12, x12, #61
13551 */
13552 UInt word1 = 0x93CC0D8C;
13553 UInt word2 = 0x93CC358C;
13554 UInt word3 = 0x93CCCD8C;
13555 UInt word4 = 0x93CCF58C;
13556 if (getUIntLittleEndianly(code+ 0) == word1 &&
13557 getUIntLittleEndianly(code+ 4) == word2 &&
13558 getUIntLittleEndianly(code+ 8) == word3 &&
13559 getUIntLittleEndianly(code+12) == word4) {
13560 /* Got a "Special" instruction preamble. Which one is it? */
13561 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
13562 /* orr x10,x10,x10 */) {
13563 /* X3 = client_request ( X4 ) */
13564 DIP("x3 = client_request ( x4 )\n");
13565 putPC(mkU64( guest_PC_curr_instr + 20 ));
13566 dres->jk_StopHere = Ijk_ClientReq;
13567 dres->whatNext = Dis_StopHere;
13568 return True;
13569 }
13570 else
13571 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
13572 /* orr x11,x11,x11 */) {
13573 /* X3 = guest_NRADDR */
13574 DIP("x3 = guest_NRADDR\n");
13575 dres->len = 20;
13576 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
13577 return True;
13578 }
13579 else
13580 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
13581 /* orr x12,x12,x12 */) {
13582 /* branch-and-link-to-noredir X8 */
13583 DIP("branch-and-link-to-noredir x8\n");
13584 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
13585 putPC(getIReg64orZR(8));
13586 dres->jk_StopHere = Ijk_NoRedir;
13587 dres->whatNext = Dis_StopHere;
13588 return True;
13589 }
13590 else
13591 if (getUIntLittleEndianly(code+16) == 0xAA090129
13592 /* orr x9,x9,x9 */) {
13593 /* IR injection */
13594 DIP("IR injection\n");
13595 vex_inject_ir(irsb, Iend_LE);
13596 // Invalidate the current insn. The reason is that the IRop we're
13597 // injecting here can change. In which case the translation has to
13598 // be redone. For ease of handling, we simply invalidate all the
13599 // time.
sewardj05f5e012014-05-04 10:52:11 +000013600 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
13601 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
sewardjbbcf1882014-01-12 12:49:10 +000013602 putPC(mkU64( guest_PC_curr_instr + 20 ));
13603 dres->whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +000013604 dres->jk_StopHere = Ijk_InvalICache;
sewardjbbcf1882014-01-12 12:49:10 +000013605 return True;
13606 }
13607 /* We don't know what it is. */
13608 return False;
13609 /*NOTREACHED*/
13610 }
13611 }
13612
13613 /* ----------------------------------------------------------- */
13614
13615 /* Main ARM64 instruction decoder starts here. */
13616
13617 Bool ok = False;
13618
13619 /* insn[28:25] determines the top-level grouping, so let's start
13620 off with that.
13621
13622 For all of these dis_ARM64_ functions, we pass *dres with the
13623 normal default results "insn OK, 4 bytes long, keep decoding" so
13624 they don't need to change it. However, decodes of control-flow
13625 insns may cause *dres to change.
13626 */
13627 switch (INSN(28,25)) {
13628 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
13629 // Data processing - immediate
13630 ok = dis_ARM64_data_processing_immediate(dres, insn);
13631 break;
13632 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
13633 // Branch, exception generation and system instructions
sewardj65902992014-05-03 21:20:56 +000013634 ok = dis_ARM64_branch_etc(dres, insn, archinfo);
sewardjbbcf1882014-01-12 12:49:10 +000013635 break;
13636 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
13637 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
13638 // Loads and stores
13639 ok = dis_ARM64_load_store(dres, insn);
13640 break;
13641 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
13642 // Data processing - register
13643 ok = dis_ARM64_data_processing_register(dres, insn);
13644 break;
13645 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
13646 // Data processing - SIMD and floating point
13647 ok = dis_ARM64_simd_and_fp(dres, insn);
13648 break;
13649 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
13650 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
13651 // UNALLOCATED
13652 break;
13653 default:
13654 vassert(0); /* Can't happen */
13655 }
13656
13657 /* If the next-level down decoders failed, make sure |dres| didn't
13658 get changed. */
13659 if (!ok) {
13660 vassert(dres->whatNext == Dis_Continue);
13661 vassert(dres->len == 4);
13662 vassert(dres->continueAt == 0);
13663 vassert(dres->jk_StopHere == Ijk_INVALID);
13664 }
13665
13666 return ok;
13667
13668# undef INSN
13669}
13670
13671
13672/*------------------------------------------------------------*/
13673/*--- Top-level fn ---*/
13674/*------------------------------------------------------------*/
13675
13676/* Disassemble a single instruction into IR. The instruction
13677 is located in host memory at &guest_code[delta]. */
13678
13679DisResult disInstr_ARM64 ( IRSB* irsb_IN,
florianbeac5302014-12-31 12:09:38 +000013680 Bool (*resteerOkFn) ( void*, Addr ),
sewardjbbcf1882014-01-12 12:49:10 +000013681 Bool resteerCisOk,
13682 void* callback_opaque,
florian8462d112014-09-24 15:18:09 +000013683 const UChar* guest_code_IN,
sewardjbbcf1882014-01-12 12:49:10 +000013684 Long delta_IN,
floriand4cc0de2015-01-02 11:44:12 +000013685 Addr guest_IP,
sewardjbbcf1882014-01-12 12:49:10 +000013686 VexArch guest_arch,
floriancacba8e2014-12-15 18:58:07 +000013687 const VexArchInfo* archinfo,
13688 const VexAbiInfo* abiinfo,
sewardj9b769162014-07-24 12:42:03 +000013689 VexEndness host_endness_IN,
sewardjbbcf1882014-01-12 12:49:10 +000013690 Bool sigill_diag_IN )
13691{
13692 DisResult dres;
13693 vex_bzero(&dres, sizeof(dres));
13694
13695 /* Set globals (see top of this file) */
13696 vassert(guest_arch == VexArchARM64);
13697
13698 irsb = irsb_IN;
sewardj9b769162014-07-24 12:42:03 +000013699 host_endness = host_endness_IN;
sewardjbbcf1882014-01-12 12:49:10 +000013700 guest_PC_curr_instr = (Addr64)guest_IP;
13701
sewardj65902992014-05-03 21:20:56 +000013702 /* Sanity checks */
13703 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
13704 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
13705 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
13706
sewardjbbcf1882014-01-12 12:49:10 +000013707 /* Try to decode */
13708 Bool ok = disInstr_ARM64_WRK( &dres,
13709 resteerOkFn, resteerCisOk, callback_opaque,
florian8462d112014-09-24 15:18:09 +000013710 &guest_code_IN[delta_IN],
sewardjbbcf1882014-01-12 12:49:10 +000013711 archinfo, abiinfo );
13712 if (ok) {
13713 /* All decode successes end up here. */
sewardjdc9259c2014-02-27 11:10:19 +000013714 vassert(dres.len == 4 || dres.len == 20);
sewardjbbcf1882014-01-12 12:49:10 +000013715 switch (dres.whatNext) {
13716 case Dis_Continue:
13717 putPC( mkU64(dres.len + guest_PC_curr_instr) );
13718 break;
13719 case Dis_ResteerU:
13720 case Dis_ResteerC:
13721 putPC(mkU64(dres.continueAt));
13722 break;
13723 case Dis_StopHere:
13724 break;
13725 default:
13726 vassert(0);
13727 }
13728 DIP("\n");
13729 } else {
13730 /* All decode failures end up here. */
13731 if (sigill_diag_IN) {
13732 Int i, j;
13733 UChar buf[64];
13734 UInt insn
florian8462d112014-09-24 15:18:09 +000013735 = getUIntLittleEndianly( &guest_code_IN[delta_IN] );
sewardjbbcf1882014-01-12 12:49:10 +000013736 vex_bzero(buf, sizeof(buf));
13737 for (i = j = 0; i < 32; i++) {
13738 if (i > 0) {
13739 if ((i & 7) == 0) buf[j++] = ' ';
13740 else if ((i & 3) == 0) buf[j++] = '\'';
13741 }
13742 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
13743 }
13744 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
13745 vex_printf("disInstr(arm64): %s\n", buf);
13746 }
13747
13748 /* Tell the dispatcher that this insn cannot be decoded, and so
13749 has not been executed, and (is currently) the next to be
13750 executed. PC should be up-to-date since it is made so at the
13751 start of each insn, but nevertheless be paranoid and update
13752 it again right now. */
13753 putPC( mkU64(guest_PC_curr_instr) );
sewardjbbcf1882014-01-12 12:49:10 +000013754 dres.len = 0;
philippe2faf5912014-08-11 22:45:47 +000013755 dres.whatNext = Dis_StopHere;
sewardjbbcf1882014-01-12 12:49:10 +000013756 dres.jk_StopHere = Ijk_NoDecode;
philippe2faf5912014-08-11 22:45:47 +000013757 dres.continueAt = 0;
sewardjbbcf1882014-01-12 12:49:10 +000013758 }
13759 return dres;
13760}
13761
sewardjecde6972014-02-05 11:01:19 +000013762
sewardjbbcf1882014-01-12 12:49:10 +000013763/*--------------------------------------------------------------------*/
13764/*--- end guest_arm64_toIR.c ---*/
13765/*--------------------------------------------------------------------*/