blob: e5274474dcd9414bc579e0be91882c7f83743148 [file] [log] [blame]
sewardjbbcf1882014-01-12 12:49:10 +00001/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- begin guest_arm64_toIR.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
sewardj785952d2015-08-21 11:29:16 +000011 Copyright (C) 2013-2015 OpenWorks
sewardjbbcf1882014-01-12 12:49:10 +000012 info@open-works.net
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
sewardj76927e62014-11-17 11:21:21 +000032/* KNOWN LIMITATIONS 2014-Nov-16
33
34 * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
35
36 Also FP comparison "unordered" .. is implemented as normal FP
37 comparison.
38
39 Both should be fixed. They behave incorrectly in the presence of
40 NaNs.
41
sewardjee3db332015-02-08 18:24:38 +000042 FMULX is treated the same as FMUL. That's also not correct.
43
sewardj76927e62014-11-17 11:21:21 +000044 * Floating multiply-add (etc) insns. Are split into a multiply and
45 an add, and so suffer double rounding and hence sometimes the
46 least significant mantissa bit is incorrect. Fix: use the IR
47 multiply-add IROps instead.
sewardj6a785df2015-02-09 09:07:47 +000048
49 * FRINTA, FRINTN are kludged .. they just round to nearest. No special
50 handling for the "ties" case. FRINTX might be dubious too.
sewardj2130b342015-04-06 14:49:05 +000051
52 * Ditto FCVTXN. No idea what "round to odd" means. This implementation
53 just rounds to nearest.
sewardj76927e62014-11-17 11:21:21 +000054*/
sewardjbbcf1882014-01-12 12:49:10 +000055
56/* "Special" instructions.
57
58 This instruction decoder can decode four special instructions
59 which mean nothing natively (are no-ops as far as regs/mem are
60 concerned) but have meaning for supporting Valgrind. A special
61 instruction is flagged by a 16-byte preamble:
62
63 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
64 (ror x12, x12, #3; ror x12, x12, #13
65 ror x12, x12, #51; ror x12, x12, #61)
66
67 Following that, one of the following 3 are allowed
68 (standard interpretation in parentheses):
69
70 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
71 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
72 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
73 AA090129 (orr x9,x9,x9) IR injection
74
75 Any other bytes following the 16-byte preamble are illegal and
76 constitute a failure in instruction decoding. This all assumes
77 that the preamble will never occur except in specific code
78 fragments designed for Valgrind to catch.
79*/
80
81/* Translates ARM64 code to IR. */
82
83#include "libvex_basictypes.h"
84#include "libvex_ir.h"
85#include "libvex.h"
86#include "libvex_guest_arm64.h"
87
88#include "main_util.h"
89#include "main_globals.h"
90#include "guest_generic_bb_to_IR.h"
91#include "guest_arm64_defs.h"
92
93
94/*------------------------------------------------------------*/
95/*--- Globals ---*/
96/*------------------------------------------------------------*/
97
98/* These are set at the start of the translation of a instruction, so
99 that we don't have to pass them around endlessly. CONST means does
100 not change during translation of the instruction.
101*/
102
sewardj9b769162014-07-24 12:42:03 +0000103/* CONST: what is the host's endianness? We need to know this in
104 order to do sub-register accesses to the SIMD/FP registers
105 correctly. */
106static VexEndness host_endness;
sewardjbbcf1882014-01-12 12:49:10 +0000107
108/* CONST: The guest address for the instruction currently being
109 translated. */
110static Addr64 guest_PC_curr_instr;
111
112/* MOD: The IRSB* into which we're generating code. */
113static IRSB* irsb;
114
115
116/*------------------------------------------------------------*/
117/*--- Debugging output ---*/
118/*------------------------------------------------------------*/
119
120#define DIP(format, args...) \
121 if (vex_traceflags & VEX_TRACE_FE) \
122 vex_printf(format, ## args)
123
124#define DIS(buf, format, args...) \
125 if (vex_traceflags & VEX_TRACE_FE) \
126 vex_sprintf(buf, format, ## args)
127
128
129/*------------------------------------------------------------*/
130/*--- Helper bits and pieces for deconstructing the ---*/
131/*--- arm insn stream. ---*/
132/*------------------------------------------------------------*/
133
134/* Do a little-endian load of a 32-bit word, regardless of the
135 endianness of the underlying host. */
florian8462d112014-09-24 15:18:09 +0000136static inline UInt getUIntLittleEndianly ( const UChar* p )
sewardjbbcf1882014-01-12 12:49:10 +0000137{
138 UInt w = 0;
139 w = (w << 8) | p[3];
140 w = (w << 8) | p[2];
141 w = (w << 8) | p[1];
142 w = (w << 8) | p[0];
143 return w;
144}
145
146/* Sign extend a N-bit value up to 64 bits, by copying
147 bit N-1 into all higher positions. */
148static ULong sx_to_64 ( ULong x, UInt n )
149{
150 vassert(n > 1 && n < 64);
Elliott Hughesa0664b92017-04-18 17:46:52 -0700151 x <<= (64-n);
sewardjbbcf1882014-01-12 12:49:10 +0000152 Long r = (Long)x;
Elliott Hughesa0664b92017-04-18 17:46:52 -0700153 r >>= (64-n);
sewardjbbcf1882014-01-12 12:49:10 +0000154 return (ULong)r;
155}
156
157//ZZ /* Do a little-endian load of a 16-bit word, regardless of the
158//ZZ endianness of the underlying host. */
159//ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
160//ZZ {
161//ZZ UShort w = 0;
162//ZZ w = (w << 8) | p[1];
163//ZZ w = (w << 8) | p[0];
164//ZZ return w;
165//ZZ }
166//ZZ
167//ZZ static UInt ROR32 ( UInt x, UInt sh ) {
168//ZZ vassert(sh >= 0 && sh < 32);
169//ZZ if (sh == 0)
170//ZZ return x;
171//ZZ else
172//ZZ return (x << (32-sh)) | (x >> sh);
173//ZZ }
174//ZZ
175//ZZ static Int popcount32 ( UInt x )
176//ZZ {
177//ZZ Int res = 0, i;
178//ZZ for (i = 0; i < 32; i++) {
179//ZZ res += (x & 1);
180//ZZ x >>= 1;
181//ZZ }
182//ZZ return res;
183//ZZ }
184//ZZ
185//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
186//ZZ {
187//ZZ UInt mask = 1 << ix;
188//ZZ x &= ~mask;
189//ZZ x |= ((b << ix) & mask);
190//ZZ return x;
191//ZZ }
192
193#define BITS2(_b1,_b0) \
194 (((_b1) << 1) | (_b0))
195
196#define BITS3(_b2,_b1,_b0) \
197 (((_b2) << 2) | ((_b1) << 1) | (_b0))
198
199#define BITS4(_b3,_b2,_b1,_b0) \
200 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
201
202#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
203 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
204 | BITS4((_b3),(_b2),(_b1),(_b0)))
205
206#define BITS5(_b4,_b3,_b2,_b1,_b0) \
207 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
208#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
209 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
210#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
211 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
212
213#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
214 (((_b8) << 8) \
215 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
216
217#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
218 (((_b9) << 9) | ((_b8) << 8) \
219 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
220
221#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
222 (((_b10) << 10) \
223 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
224
sewardjdc9259c2014-02-27 11:10:19 +0000225#define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
226 (((_b11) << 11) \
227 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
228
sewardjdf1628c2014-06-10 22:52:05 +0000229#define X00 BITS2(0,0)
230#define X01 BITS2(0,1)
231#define X10 BITS2(1,0)
232#define X11 BITS2(1,1)
233
sewardjbbcf1882014-01-12 12:49:10 +0000234// produces _uint[_bMax:_bMin]
235#define SLICE_UInt(_uint,_bMax,_bMin) \
236 (( ((UInt)(_uint)) >> (_bMin)) \
237 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
238
239
240/*------------------------------------------------------------*/
241/*--- Helper bits and pieces for creating IR fragments. ---*/
242/*------------------------------------------------------------*/
243
244static IRExpr* mkV128 ( UShort w )
245{
246 return IRExpr_Const(IRConst_V128(w));
247}
248
249static IRExpr* mkU64 ( ULong i )
250{
251 return IRExpr_Const(IRConst_U64(i));
252}
253
254static IRExpr* mkU32 ( UInt i )
255{
256 return IRExpr_Const(IRConst_U32(i));
257}
258
sewardj25523c42014-06-15 19:36:29 +0000259static IRExpr* mkU16 ( UInt i )
260{
261 vassert(i < 65536);
262 return IRExpr_Const(IRConst_U16(i));
263}
264
sewardjbbcf1882014-01-12 12:49:10 +0000265static IRExpr* mkU8 ( UInt i )
266{
267 vassert(i < 256);
268 return IRExpr_Const(IRConst_U8( (UChar)i ));
269}
270
271static IRExpr* mkexpr ( IRTemp tmp )
272{
273 return IRExpr_RdTmp(tmp);
274}
275
276static IRExpr* unop ( IROp op, IRExpr* a )
277{
278 return IRExpr_Unop(op, a);
279}
280
281static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
282{
283 return IRExpr_Binop(op, a1, a2);
284}
285
286static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
287{
288 return IRExpr_Triop(op, a1, a2, a3);
289}
290
291static IRExpr* loadLE ( IRType ty, IRExpr* addr )
292{
293 return IRExpr_Load(Iend_LE, ty, addr);
294}
295
296/* Add a statement to the list held by "irbb". */
297static void stmt ( IRStmt* st )
298{
299 addStmtToIRSB( irsb, st );
300}
301
302static void assign ( IRTemp dst, IRExpr* e )
303{
304 stmt( IRStmt_WrTmp(dst, e) );
305}
306
307static void storeLE ( IRExpr* addr, IRExpr* data )
308{
309 stmt( IRStmt_Store(Iend_LE, addr, data) );
310}
311
312//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
313//ZZ {
314//ZZ if (guardT == IRTemp_INVALID) {
315//ZZ /* unconditional */
316//ZZ storeLE(addr, data);
317//ZZ } else {
318//ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
319//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
320//ZZ }
321//ZZ }
322//ZZ
323//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
324//ZZ IRExpr* addr, IRExpr* alt,
325//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
326//ZZ {
327//ZZ if (guardT == IRTemp_INVALID) {
328//ZZ /* unconditional */
329//ZZ IRExpr* loaded = NULL;
330//ZZ switch (cvt) {
331//ZZ case ILGop_Ident32:
332//ZZ loaded = loadLE(Ity_I32, addr); break;
333//ZZ case ILGop_8Uto32:
334//ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
335//ZZ case ILGop_8Sto32:
336//ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
337//ZZ case ILGop_16Uto32:
338//ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
339//ZZ case ILGop_16Sto32:
340//ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
341//ZZ default:
342//ZZ vassert(0);
343//ZZ }
344//ZZ vassert(loaded != NULL);
345//ZZ assign(dst, loaded);
346//ZZ } else {
347//ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
348//ZZ loaded data before putting the data in 'dst'. If the load
349//ZZ does not take place, 'alt' is placed directly in 'dst'. */
350//ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
351//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
352//ZZ }
353//ZZ }
354
355/* Generate a new temporary of the given type. */
356static IRTemp newTemp ( IRType ty )
357{
358 vassert(isPlausibleIRType(ty));
359 return newIRTemp( irsb->tyenv, ty );
360}
361
sewardj8e91fd42014-07-11 12:05:47 +0000362/* This is used in many places, so the brevity is an advantage. */
363static IRTemp newTempV128(void)
364{
365 return newTemp(Ity_V128);
366}
367
368/* Initialise V128 temporaries en masse. */
369static
sewardj51d012a2014-07-21 09:19:50 +0000370void newTempsV128_2(IRTemp* t1, IRTemp* t2)
371{
372 vassert(t1 && *t1 == IRTemp_INVALID);
373 vassert(t2 && *t2 == IRTemp_INVALID);
374 *t1 = newTempV128();
375 *t2 = newTempV128();
376}
377
sewardj51d012a2014-07-21 09:19:50 +0000378static
379void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
380{
381 vassert(t1 && *t1 == IRTemp_INVALID);
382 vassert(t2 && *t2 == IRTemp_INVALID);
383 vassert(t3 && *t3 == IRTemp_INVALID);
384 *t1 = newTempV128();
385 *t2 = newTempV128();
386 *t3 = newTempV128();
387}
388
sewardj208a7762014-10-22 13:52:51 +0000389static
390void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
391{
392 vassert(t1 && *t1 == IRTemp_INVALID);
393 vassert(t2 && *t2 == IRTemp_INVALID);
394 vassert(t3 && *t3 == IRTemp_INVALID);
395 vassert(t4 && *t4 == IRTemp_INVALID);
396 *t1 = newTempV128();
397 *t2 = newTempV128();
398 *t3 = newTempV128();
399 *t4 = newTempV128();
400}
sewardj54ffa1d2014-07-22 09:27:49 +0000401
sewardj51d012a2014-07-21 09:19:50 +0000402static
sewardj8e91fd42014-07-11 12:05:47 +0000403void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
404 IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
405{
406 vassert(t1 && *t1 == IRTemp_INVALID);
407 vassert(t2 && *t2 == IRTemp_INVALID);
408 vassert(t3 && *t3 == IRTemp_INVALID);
409 vassert(t4 && *t4 == IRTemp_INVALID);
410 vassert(t5 && *t5 == IRTemp_INVALID);
411 vassert(t6 && *t6 == IRTemp_INVALID);
412 vassert(t7 && *t7 == IRTemp_INVALID);
413 *t1 = newTempV128();
414 *t2 = newTempV128();
415 *t3 = newTempV128();
416 *t4 = newTempV128();
417 *t5 = newTempV128();
418 *t6 = newTempV128();
419 *t7 = newTempV128();
420}
421
sewardjbbcf1882014-01-12 12:49:10 +0000422//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
423//ZZ IRRoundingMode. */
424//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
425//ZZ {
426//ZZ return mkU32(Irrm_NEAREST);
427//ZZ }
428//ZZ
429//ZZ /* Generate an expression for SRC rotated right by ROT. */
430//ZZ static IRExpr* genROR32( IRTemp src, Int rot )
431//ZZ {
432//ZZ vassert(rot >= 0 && rot < 32);
433//ZZ if (rot == 0)
434//ZZ return mkexpr(src);
435//ZZ return
436//ZZ binop(Iop_Or32,
437//ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
438//ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
439//ZZ }
440//ZZ
441//ZZ static IRExpr* mkU128 ( ULong i )
442//ZZ {
443//ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
444//ZZ }
445//ZZ
446//ZZ /* Generate a 4-aligned version of the given expression if
447//ZZ the given condition is true. Else return it unchanged. */
448//ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
449//ZZ {
450//ZZ if (b)
451//ZZ return binop(Iop_And32, e, mkU32(~3));
452//ZZ else
453//ZZ return e;
454//ZZ }
455
456/* Other IR construction helpers. */
457static IROp mkAND ( IRType ty ) {
458 switch (ty) {
459 case Ity_I32: return Iop_And32;
460 case Ity_I64: return Iop_And64;
461 default: vpanic("mkAND");
462 }
463}
464
465static IROp mkOR ( IRType ty ) {
466 switch (ty) {
467 case Ity_I32: return Iop_Or32;
468 case Ity_I64: return Iop_Or64;
469 default: vpanic("mkOR");
470 }
471}
472
473static IROp mkXOR ( IRType ty ) {
474 switch (ty) {
475 case Ity_I32: return Iop_Xor32;
476 case Ity_I64: return Iop_Xor64;
477 default: vpanic("mkXOR");
478 }
479}
480
481static IROp mkSHL ( IRType ty ) {
482 switch (ty) {
483 case Ity_I32: return Iop_Shl32;
484 case Ity_I64: return Iop_Shl64;
485 default: vpanic("mkSHL");
486 }
487}
488
489static IROp mkSHR ( IRType ty ) {
490 switch (ty) {
491 case Ity_I32: return Iop_Shr32;
492 case Ity_I64: return Iop_Shr64;
493 default: vpanic("mkSHR");
494 }
495}
496
497static IROp mkSAR ( IRType ty ) {
498 switch (ty) {
499 case Ity_I32: return Iop_Sar32;
500 case Ity_I64: return Iop_Sar64;
501 default: vpanic("mkSAR");
502 }
503}
504
505static IROp mkNOT ( IRType ty ) {
506 switch (ty) {
507 case Ity_I32: return Iop_Not32;
508 case Ity_I64: return Iop_Not64;
509 default: vpanic("mkNOT");
510 }
511}
512
513static IROp mkADD ( IRType ty ) {
514 switch (ty) {
515 case Ity_I32: return Iop_Add32;
516 case Ity_I64: return Iop_Add64;
517 default: vpanic("mkADD");
518 }
519}
520
521static IROp mkSUB ( IRType ty ) {
522 switch (ty) {
523 case Ity_I32: return Iop_Sub32;
524 case Ity_I64: return Iop_Sub64;
525 default: vpanic("mkSUB");
526 }
527}
528
529static IROp mkADDF ( IRType ty ) {
530 switch (ty) {
531 case Ity_F32: return Iop_AddF32;
532 case Ity_F64: return Iop_AddF64;
533 default: vpanic("mkADDF");
534 }
535}
536
537static IROp mkSUBF ( IRType ty ) {
538 switch (ty) {
539 case Ity_F32: return Iop_SubF32;
540 case Ity_F64: return Iop_SubF64;
541 default: vpanic("mkSUBF");
542 }
543}
544
545static IROp mkMULF ( IRType ty ) {
546 switch (ty) {
547 case Ity_F32: return Iop_MulF32;
548 case Ity_F64: return Iop_MulF64;
549 default: vpanic("mkMULF");
550 }
551}
552
553static IROp mkDIVF ( IRType ty ) {
554 switch (ty) {
555 case Ity_F32: return Iop_DivF32;
556 case Ity_F64: return Iop_DivF64;
557 default: vpanic("mkMULF");
558 }
559}
560
561static IROp mkNEGF ( IRType ty ) {
562 switch (ty) {
563 case Ity_F32: return Iop_NegF32;
564 case Ity_F64: return Iop_NegF64;
565 default: vpanic("mkNEGF");
566 }
567}
568
569static IROp mkABSF ( IRType ty ) {
570 switch (ty) {
571 case Ity_F32: return Iop_AbsF32;
572 case Ity_F64: return Iop_AbsF64;
573 default: vpanic("mkNEGF");
574 }
575}
576
577static IROp mkSQRTF ( IRType ty ) {
578 switch (ty) {
579 case Ity_F32: return Iop_SqrtF32;
580 case Ity_F64: return Iop_SqrtF64;
581 default: vpanic("mkNEGF");
582 }
583}
584
sewardja5a6b752014-06-30 07:33:56 +0000585static IROp mkVecADD ( UInt size ) {
586 const IROp ops[4]
587 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
588 vassert(size < 4);
589 return ops[size];
590}
591
592static IROp mkVecQADDU ( UInt size ) {
593 const IROp ops[4]
594 = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
595 vassert(size < 4);
596 return ops[size];
597}
598
599static IROp mkVecQADDS ( UInt size ) {
600 const IROp ops[4]
601 = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
602 vassert(size < 4);
603 return ops[size];
604}
605
sewardjf7003bc2014-08-18 12:28:02 +0000606static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
607 const IROp ops[4]
608 = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
609 Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
610 vassert(size < 4);
611 return ops[size];
612}
613
614static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
615 const IROp ops[4]
616 = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
617 Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
618 vassert(size < 4);
619 return ops[size];
620}
621
sewardja5a6b752014-06-30 07:33:56 +0000622static IROp mkVecSUB ( UInt size ) {
623 const IROp ops[4]
624 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
625 vassert(size < 4);
626 return ops[size];
627}
628
629static IROp mkVecQSUBU ( UInt size ) {
630 const IROp ops[4]
631 = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
632 vassert(size < 4);
633 return ops[size];
634}
635
636static IROp mkVecQSUBS ( UInt size ) {
637 const IROp ops[4]
638 = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
639 vassert(size < 4);
640 return ops[size];
641}
642
643static IROp mkVecSARN ( UInt size ) {
644 const IROp ops[4]
645 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
646 vassert(size < 4);
647 return ops[size];
648}
649
650static IROp mkVecSHRN ( UInt size ) {
651 const IROp ops[4]
652 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
653 vassert(size < 4);
654 return ops[size];
655}
656
657static IROp mkVecSHLN ( UInt size ) {
658 const IROp ops[4]
659 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
660 vassert(size < 4);
661 return ops[size];
662}
663
664static IROp mkVecCATEVENLANES ( UInt size ) {
665 const IROp ops[4]
666 = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
667 Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
668 vassert(size < 4);
669 return ops[size];
670}
671
672static IROp mkVecCATODDLANES ( UInt size ) {
673 const IROp ops[4]
674 = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
675 Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
676 vassert(size < 4);
677 return ops[size];
678}
679
sewardj487559e2014-07-10 14:22:45 +0000680static IROp mkVecINTERLEAVELO ( UInt size ) {
681 const IROp ops[4]
682 = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
683 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
684 vassert(size < 4);
685 return ops[size];
686}
687
688static IROp mkVecINTERLEAVEHI ( UInt size ) {
689 const IROp ops[4]
690 = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
691 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
692 vassert(size < 4);
693 return ops[size];
694}
695
sewardja5a6b752014-06-30 07:33:56 +0000696static IROp mkVecMAXU ( UInt size ) {
697 const IROp ops[4]
698 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
699 vassert(size < 4);
700 return ops[size];
701}
702
703static IROp mkVecMAXS ( UInt size ) {
704 const IROp ops[4]
705 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
706 vassert(size < 4);
707 return ops[size];
708}
709
710static IROp mkVecMINU ( UInt size ) {
711 const IROp ops[4]
712 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
713 vassert(size < 4);
714 return ops[size];
715}
716
717static IROp mkVecMINS ( UInt size ) {
718 const IROp ops[4]
719 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
720 vassert(size < 4);
721 return ops[size];
722}
723
sewardj487559e2014-07-10 14:22:45 +0000724static IROp mkVecMUL ( UInt size ) {
725 const IROp ops[4]
726 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
727 vassert(size < 3);
728 return ops[size];
729}
730
731static IROp mkVecMULLU ( UInt sizeNarrow ) {
732 const IROp ops[4]
733 = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
734 vassert(sizeNarrow < 3);
735 return ops[sizeNarrow];
736}
737
738static IROp mkVecMULLS ( UInt sizeNarrow ) {
739 const IROp ops[4]
740 = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
741 vassert(sizeNarrow < 3);
742 return ops[sizeNarrow];
743}
744
sewardj51d012a2014-07-21 09:19:50 +0000745static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
746 const IROp ops[4]
747 = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
748 vassert(sizeNarrow < 3);
749 return ops[sizeNarrow];
750}
751
sewardj8e91fd42014-07-11 12:05:47 +0000752static IROp mkVecCMPEQ ( UInt size ) {
753 const IROp ops[4]
754 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
755 vassert(size < 4);
756 return ops[size];
757}
758
759static IROp mkVecCMPGTU ( UInt size ) {
760 const IROp ops[4]
761 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
762 vassert(size < 4);
763 return ops[size];
764}
765
766static IROp mkVecCMPGTS ( UInt size ) {
767 const IROp ops[4]
768 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
769 vassert(size < 4);
770 return ops[size];
771}
772
773static IROp mkVecABS ( UInt size ) {
774 const IROp ops[4]
775 = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
776 vassert(size < 4);
777 return ops[size];
778}
779
780static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
781 const IROp ops[4]
782 = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
783 Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 };
784 vassert(size < 4);
785 return ops[size];
786}
787
sewardjbbcf1882014-01-12 12:49:10 +0000788static IRExpr* mkU ( IRType ty, ULong imm ) {
789 switch (ty) {
790 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
791 case Ity_I64: return mkU64(imm);
792 default: vpanic("mkU");
793 }
794}
795
sewardj54ffa1d2014-07-22 09:27:49 +0000796static IROp mkVecQDMULHIS ( UInt size ) {
797 const IROp ops[4]
798 = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
799 vassert(size < 4);
800 return ops[size];
801}
802
803static IROp mkVecQRDMULHIS ( UInt size ) {
804 const IROp ops[4]
805 = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
806 vassert(size < 4);
807 return ops[size];
808}
809
sewardjecedd982014-08-11 14:02:47 +0000810static IROp mkVecQANDUQSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000811 const IROp ops[4]
812 = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
813 Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
814 vassert(size < 4);
815 return ops[size];
816}
817
sewardjecedd982014-08-11 14:02:47 +0000818static IROp mkVecQANDSQSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000819 const IROp ops[4]
820 = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
821 Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
822 vassert(size < 4);
823 return ops[size];
824}
825
sewardjecedd982014-08-11 14:02:47 +0000826static IROp mkVecQANDUQRSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000827 const IROp ops[4]
828 = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
829 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
830 vassert(size < 4);
831 return ops[size];
832}
833
sewardjecedd982014-08-11 14:02:47 +0000834static IROp mkVecQANDSQRSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000835 const IROp ops[4]
836 = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
837 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
838 vassert(size < 4);
839 return ops[size];
840}
841
sewardja6b61f02014-08-17 18:32:14 +0000842static IROp mkVecSHU ( UInt size ) {
843 const IROp ops[4]
844 = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
845 vassert(size < 4);
846 return ops[size];
847}
848
849static IROp mkVecSHS ( UInt size ) {
850 const IROp ops[4]
851 = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
852 vassert(size < 4);
853 return ops[size];
854}
855
856static IROp mkVecRSHU ( UInt size ) {
857 const IROp ops[4]
858 = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
859 vassert(size < 4);
860 return ops[size];
861}
862
863static IROp mkVecRSHS ( UInt size ) {
864 const IROp ops[4]
865 = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
866 vassert(size < 4);
867 return ops[size];
868}
869
sewardjecedd982014-08-11 14:02:47 +0000870static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
871 const IROp ops[4]
872 = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
873 Iop_NarrowUn64to32x2, Iop_INVALID };
874 vassert(sizeNarrow < 4);
875 return ops[sizeNarrow];
876}
877
878static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
879 const IROp ops[4]
880 = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4,
881 Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
882 vassert(sizeNarrow < 4);
883 return ops[sizeNarrow];
884}
885
886static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
887 const IROp ops[4]
888 = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4,
889 Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
890 vassert(sizeNarrow < 4);
891 return ops[sizeNarrow];
892}
893
894static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
895 const IROp ops[4]
896 = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4,
897 Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
898 vassert(sizeNarrow < 4);
899 return ops[sizeNarrow];
900}
901
902static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
903 const IROp ops[4]
904 = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
905 Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
906 vassert(sizeNarrow < 4);
907 return ops[sizeNarrow];
908}
909
910static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
911 const IROp ops[4]
912 = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4,
913 Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
914 vassert(sizeNarrow < 4);
915 return ops[sizeNarrow];
916}
917
918static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
919 const IROp ops[4]
920 = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4,
921 Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
922 vassert(sizeNarrow < 4);
923 return ops[sizeNarrow];
924}
925
926static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
927 const IROp ops[4]
928 = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4,
929 Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
930 vassert(sizeNarrow < 4);
931 return ops[sizeNarrow];
932}
933
934static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
935 const IROp ops[4]
936 = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4,
937 Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
938 vassert(sizeNarrow < 4);
939 return ops[sizeNarrow];
940}
941
942static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
943 const IROp ops[4]
944 = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4,
945 Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
946 vassert(sizeNarrow < 4);
947 return ops[sizeNarrow];
948}
949
sewardj1dd3ec12014-08-15 09:11:08 +0000950static IROp mkVecQSHLNSATUU ( UInt size ) {
sewardja97dddf2014-08-14 22:26:52 +0000951 const IROp ops[4]
sewardj1dd3ec12014-08-15 09:11:08 +0000952 = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
953 Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
sewardja97dddf2014-08-14 22:26:52 +0000954 vassert(size < 4);
955 return ops[size];
956}
957
sewardj1dd3ec12014-08-15 09:11:08 +0000958static IROp mkVecQSHLNSATSS ( UInt size ) {
sewardja97dddf2014-08-14 22:26:52 +0000959 const IROp ops[4]
sewardj1dd3ec12014-08-15 09:11:08 +0000960 = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
961 Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
sewardja97dddf2014-08-14 22:26:52 +0000962 vassert(size < 4);
963 return ops[size];
964}
965
sewardj1dd3ec12014-08-15 09:11:08 +0000966static IROp mkVecQSHLNSATSU ( UInt size ) {
sewardja97dddf2014-08-14 22:26:52 +0000967 const IROp ops[4]
sewardj1dd3ec12014-08-15 09:11:08 +0000968 = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
969 Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
sewardja97dddf2014-08-14 22:26:52 +0000970 vassert(size < 4);
971 return ops[size];
972}
973
sewardj76927e62014-11-17 11:21:21 +0000974static IROp mkVecADDF ( UInt size ) {
975 const IROp ops[4]
976 = { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 };
977 vassert(size < 4);
978 return ops[size];
979}
980
981static IROp mkVecMAXF ( UInt size ) {
982 const IROp ops[4]
983 = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 };
984 vassert(size < 4);
985 return ops[size];
986}
987
988static IROp mkVecMINF ( UInt size ) {
989 const IROp ops[4]
990 = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 };
991 vassert(size < 4);
992 return ops[size];
993}
sewardja97dddf2014-08-14 22:26:52 +0000994
sewardjbbcf1882014-01-12 12:49:10 +0000995/* Generate IR to create 'arg rotated right by imm', for sane values
996 of 'ty' and 'imm'. */
997static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
998{
999 UInt w = 0;
1000 if (ty == Ity_I64) {
1001 w = 64;
1002 } else {
1003 vassert(ty == Ity_I32);
1004 w = 32;
1005 }
1006 vassert(w != 0);
1007 vassert(imm < w);
1008 if (imm == 0) {
1009 return arg;
1010 }
1011 IRTemp res = newTemp(ty);
1012 assign(res, binop(mkOR(ty),
1013 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
1014 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
1015 return res;
1016}
1017
1018/* Generate IR to set the returned temp to either all-zeroes or
1019 all ones, as a copy of arg<imm>. */
1020static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
1021{
1022 UInt w = 0;
1023 if (ty == Ity_I64) {
1024 w = 64;
1025 } else {
1026 vassert(ty == Ity_I32);
1027 w = 32;
1028 }
1029 vassert(w != 0);
1030 vassert(imm < w);
1031 IRTemp res = newTemp(ty);
1032 assign(res, binop(mkSAR(ty),
1033 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
1034 mkU8(w - 1)));
1035 return res;
1036}
1037
sewardj7d009132014-02-20 17:43:38 +00001038/* U-widen 8/16/32/64 bit int expr to 64. */
1039static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
1040{
1041 switch (srcTy) {
1042 case Ity_I64: return e;
1043 case Ity_I32: return unop(Iop_32Uto64, e);
1044 case Ity_I16: return unop(Iop_16Uto64, e);
1045 case Ity_I8: return unop(Iop_8Uto64, e);
1046 default: vpanic("widenUto64(arm64)");
1047 }
1048}
1049
1050/* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
1051 of these combinations make sense. */
1052static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
1053{
1054 switch (dstTy) {
1055 case Ity_I64: return e;
1056 case Ity_I32: return unop(Iop_64to32, e);
1057 case Ity_I16: return unop(Iop_64to16, e);
1058 case Ity_I8: return unop(Iop_64to8, e);
1059 default: vpanic("narrowFrom64(arm64)");
1060 }
1061}
1062
sewardjbbcf1882014-01-12 12:49:10 +00001063
1064/*------------------------------------------------------------*/
1065/*--- Helpers for accessing guest registers. ---*/
1066/*------------------------------------------------------------*/
1067
1068#define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
1069#define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
1070#define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
1071#define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
1072#define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
1073#define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
1074#define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
1075#define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
1076#define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
1077#define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
1078#define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
1079#define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
1080#define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
1081#define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
1082#define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
1083#define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
1084#define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
1085#define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
1086#define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
1087#define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
1088#define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
1089#define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
1090#define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
1091#define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
1092#define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
1093#define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
1094#define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
1095#define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
1096#define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
1097#define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
1098#define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
1099
sewardj60687882014-01-15 10:25:21 +00001100#define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
sewardjbbcf1882014-01-12 12:49:10 +00001101#define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
1102
1103#define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
1104#define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
1105#define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
1106#define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
1107
1108#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1109#define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
1110
1111#define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
1112#define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
1113#define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
1114#define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
1115#define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
1116#define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
1117#define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
1118#define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
1119#define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
1120#define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
1121#define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
1122#define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
1123#define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
1124#define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
1125#define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
1126#define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
1127#define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
1128#define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
1129#define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
1130#define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
1131#define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
1132#define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
1133#define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
1134#define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
1135#define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
1136#define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
1137#define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
1138#define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
1139#define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
1140#define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
1141#define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
1142#define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
1143
1144#define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
sewardja0645d52014-06-28 22:11:16 +00001145#define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
sewardjbbcf1882014-01-12 12:49:10 +00001146
sewardj05f5e012014-05-04 10:52:11 +00001147#define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
1148#define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
sewardjbbcf1882014-01-12 12:49:10 +00001149
1150
1151/* ---------------- Integer registers ---------------- */
1152
1153static Int offsetIReg64 ( UInt iregNo )
1154{
1155 /* Do we care about endianness here? We do if sub-parts of integer
1156 registers are accessed. */
1157 switch (iregNo) {
1158 case 0: return OFFB_X0;
1159 case 1: return OFFB_X1;
1160 case 2: return OFFB_X2;
1161 case 3: return OFFB_X3;
1162 case 4: return OFFB_X4;
1163 case 5: return OFFB_X5;
1164 case 6: return OFFB_X6;
1165 case 7: return OFFB_X7;
1166 case 8: return OFFB_X8;
1167 case 9: return OFFB_X9;
1168 case 10: return OFFB_X10;
1169 case 11: return OFFB_X11;
1170 case 12: return OFFB_X12;
1171 case 13: return OFFB_X13;
1172 case 14: return OFFB_X14;
1173 case 15: return OFFB_X15;
1174 case 16: return OFFB_X16;
1175 case 17: return OFFB_X17;
1176 case 18: return OFFB_X18;
1177 case 19: return OFFB_X19;
1178 case 20: return OFFB_X20;
1179 case 21: return OFFB_X21;
1180 case 22: return OFFB_X22;
1181 case 23: return OFFB_X23;
1182 case 24: return OFFB_X24;
1183 case 25: return OFFB_X25;
1184 case 26: return OFFB_X26;
1185 case 27: return OFFB_X27;
1186 case 28: return OFFB_X28;
1187 case 29: return OFFB_X29;
1188 case 30: return OFFB_X30;
1189 /* but not 31 */
1190 default: vassert(0);
1191 }
1192}
1193
1194static Int offsetIReg64orSP ( UInt iregNo )
1195{
sewardj60687882014-01-15 10:25:21 +00001196 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
sewardjbbcf1882014-01-12 12:49:10 +00001197}
1198
1199static const HChar* nameIReg64orZR ( UInt iregNo )
1200{
1201 vassert(iregNo < 32);
1202 static const HChar* names[32]
1203 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
1204 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1205 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1206 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1207 return names[iregNo];
1208}
1209
1210static const HChar* nameIReg64orSP ( UInt iregNo )
1211{
1212 if (iregNo == 31) {
1213 return "sp";
1214 }
1215 vassert(iregNo < 31);
1216 return nameIReg64orZR(iregNo);
1217}
1218
1219static IRExpr* getIReg64orSP ( UInt iregNo )
1220{
1221 vassert(iregNo < 32);
1222 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1223}
1224
1225static IRExpr* getIReg64orZR ( UInt iregNo )
1226{
1227 if (iregNo == 31) {
1228 return mkU64(0);
1229 }
1230 vassert(iregNo < 31);
1231 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1232}
1233
1234static void putIReg64orSP ( UInt iregNo, IRExpr* e )
1235{
1236 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1237 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1238}
1239
1240static void putIReg64orZR ( UInt iregNo, IRExpr* e )
1241{
1242 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1243 if (iregNo == 31) {
1244 return;
1245 }
1246 vassert(iregNo < 31);
1247 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1248}
1249
1250static const HChar* nameIReg32orZR ( UInt iregNo )
1251{
1252 vassert(iregNo < 32);
1253 static const HChar* names[32]
1254 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
1255 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
1256 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1257 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1258 return names[iregNo];
1259}
1260
1261static const HChar* nameIReg32orSP ( UInt iregNo )
1262{
1263 if (iregNo == 31) {
1264 return "wsp";
1265 }
1266 vassert(iregNo < 31);
1267 return nameIReg32orZR(iregNo);
1268}
1269
1270static IRExpr* getIReg32orSP ( UInt iregNo )
1271{
1272 vassert(iregNo < 32);
1273 return unop(Iop_64to32,
1274 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1275}
1276
1277static IRExpr* getIReg32orZR ( UInt iregNo )
1278{
1279 if (iregNo == 31) {
1280 return mkU32(0);
1281 }
1282 vassert(iregNo < 31);
1283 return unop(Iop_64to32,
1284 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1285}
1286
1287static void putIReg32orSP ( UInt iregNo, IRExpr* e )
1288{
1289 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1290 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1291}
1292
1293static void putIReg32orZR ( UInt iregNo, IRExpr* e )
1294{
1295 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1296 if (iregNo == 31) {
1297 return;
1298 }
1299 vassert(iregNo < 31);
1300 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1301}
1302
1303static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
1304{
1305 vassert(is64 == True || is64 == False);
1306 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
1307}
1308
1309static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
1310{
1311 vassert(is64 == True || is64 == False);
1312 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
1313}
1314
1315static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
1316{
1317 vassert(is64 == True || is64 == False);
1318 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
1319}
1320
1321static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
1322{
1323 vassert(is64 == True || is64 == False);
1324 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
1325}
1326
1327static void putPC ( IRExpr* e )
1328{
1329 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1330 stmt( IRStmt_Put(OFFB_PC, e) );
1331}
1332
1333
1334/* ---------------- Vector (Q) registers ---------------- */
1335
1336static Int offsetQReg128 ( UInt qregNo )
1337{
1338 /* We don't care about endianness at this point. It only becomes
1339 relevant when dealing with sections of these registers.*/
1340 switch (qregNo) {
1341 case 0: return OFFB_Q0;
1342 case 1: return OFFB_Q1;
1343 case 2: return OFFB_Q2;
1344 case 3: return OFFB_Q3;
1345 case 4: return OFFB_Q4;
1346 case 5: return OFFB_Q5;
1347 case 6: return OFFB_Q6;
1348 case 7: return OFFB_Q7;
1349 case 8: return OFFB_Q8;
1350 case 9: return OFFB_Q9;
1351 case 10: return OFFB_Q10;
1352 case 11: return OFFB_Q11;
1353 case 12: return OFFB_Q12;
1354 case 13: return OFFB_Q13;
1355 case 14: return OFFB_Q14;
1356 case 15: return OFFB_Q15;
1357 case 16: return OFFB_Q16;
1358 case 17: return OFFB_Q17;
1359 case 18: return OFFB_Q18;
1360 case 19: return OFFB_Q19;
1361 case 20: return OFFB_Q20;
1362 case 21: return OFFB_Q21;
1363 case 22: return OFFB_Q22;
1364 case 23: return OFFB_Q23;
1365 case 24: return OFFB_Q24;
1366 case 25: return OFFB_Q25;
1367 case 26: return OFFB_Q26;
1368 case 27: return OFFB_Q27;
1369 case 28: return OFFB_Q28;
1370 case 29: return OFFB_Q29;
1371 case 30: return OFFB_Q30;
1372 case 31: return OFFB_Q31;
1373 default: vassert(0);
1374 }
1375}
1376
sewardjbbcf1882014-01-12 12:49:10 +00001377/* Write to a complete Qreg. */
1378static void putQReg128 ( UInt qregNo, IRExpr* e )
1379{
1380 vassert(qregNo < 32);
1381 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
1382 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
1383}
1384
1385/* Read a complete Qreg. */
1386static IRExpr* getQReg128 ( UInt qregNo )
1387{
1388 vassert(qregNo < 32);
1389 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
1390}
1391
1392/* Produce the IR type for some sub-part of a vector. For 32- and 64-
1393 bit sub-parts we can choose either integer or float types, and
1394 choose float on the basis that that is the common use case and so
1395 will give least interference with Put-to-Get forwarding later
1396 on. */
1397static IRType preferredVectorSubTypeFromSize ( UInt szB )
1398{
1399 switch (szB) {
1400 case 1: return Ity_I8;
1401 case 2: return Ity_I16;
1402 case 4: return Ity_I32; //Ity_F32;
1403 case 8: return Ity_F64;
1404 case 16: return Ity_V128;
1405 default: vassert(0);
1406 }
1407}
1408
sewardj606c4ba2014-01-26 19:11:14 +00001409/* Find the offset of the laneNo'th lane of type laneTy in the given
1410 Qreg. Since the host is little-endian, the least significant lane
1411 has the lowest offset. */
1412static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
sewardjbbcf1882014-01-12 12:49:10 +00001413{
sewardj9b769162014-07-24 12:42:03 +00001414 vassert(host_endness == VexEndnessLE);
sewardjbbcf1882014-01-12 12:49:10 +00001415 Int base = offsetQReg128(qregNo);
sewardj606c4ba2014-01-26 19:11:14 +00001416 /* Since the host is little-endian, the least significant lane
1417 will be at the lowest address. */
1418 /* Restrict this to known types, so as to avoid silently accepting
1419 stupid types. */
1420 UInt laneSzB = 0;
1421 switch (laneTy) {
sewardj5860ec72014-03-01 11:19:45 +00001422 case Ity_I8: laneSzB = 1; break;
sewardj400d6b92015-03-30 09:01:51 +00001423 case Ity_F16: case Ity_I16: laneSzB = 2; break;
sewardj606c4ba2014-01-26 19:11:14 +00001424 case Ity_F32: case Ity_I32: laneSzB = 4; break;
1425 case Ity_F64: case Ity_I64: laneSzB = 8; break;
1426 case Ity_V128: laneSzB = 16; break;
1427 default: break;
sewardjbbcf1882014-01-12 12:49:10 +00001428 }
sewardj606c4ba2014-01-26 19:11:14 +00001429 vassert(laneSzB > 0);
1430 UInt minOff = laneNo * laneSzB;
1431 UInt maxOff = minOff + laneSzB - 1;
1432 vassert(maxOff < 16);
1433 return base + minOff;
sewardjbbcf1882014-01-12 12:49:10 +00001434}
1435
sewardj606c4ba2014-01-26 19:11:14 +00001436/* Put to the least significant lane of a Qreg. */
1437static void putQRegLO ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +00001438{
1439 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +00001440 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +00001441 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001442 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
sewardj400d6b92015-03-30 09:01:51 +00001443 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
sewardj606c4ba2014-01-26 19:11:14 +00001444 break;
1445 default:
1446 vassert(0); // Other cases are probably invalid
sewardjbbcf1882014-01-12 12:49:10 +00001447 }
1448 stmt(IRStmt_Put(off, e));
1449}
1450
sewardj606c4ba2014-01-26 19:11:14 +00001451/* Get from the least significant lane of a Qreg. */
1452static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
sewardjbbcf1882014-01-12 12:49:10 +00001453{
sewardj606c4ba2014-01-26 19:11:14 +00001454 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +00001455 switch (ty) {
sewardjb3553472014-05-15 16:49:21 +00001456 case Ity_I8:
sewardj400d6b92015-03-30 09:01:51 +00001457 case Ity_F16: case Ity_I16:
sewardj606c4ba2014-01-26 19:11:14 +00001458 case Ity_I32: case Ity_I64:
1459 case Ity_F32: case Ity_F64: case Ity_V128:
1460 break;
1461 default:
1462 vassert(0); // Other cases are ATC
sewardjbbcf1882014-01-12 12:49:10 +00001463 }
1464 return IRExpr_Get(off, ty);
1465}
1466
sewardj606c4ba2014-01-26 19:11:14 +00001467static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
sewardjbbcf1882014-01-12 12:49:10 +00001468{
1469 static const HChar* namesQ[32]
1470 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1471 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1472 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1473 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1474 static const HChar* namesD[32]
1475 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1476 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1477 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1478 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1479 static const HChar* namesS[32]
1480 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1481 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1482 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1483 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1484 static const HChar* namesH[32]
1485 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1486 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1487 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1488 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1489 static const HChar* namesB[32]
1490 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1491 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1492 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1493 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1494 vassert(qregNo < 32);
sewardj606c4ba2014-01-26 19:11:14 +00001495 switch (sizeofIRType(laneTy)) {
sewardjbbcf1882014-01-12 12:49:10 +00001496 case 1: return namesB[qregNo];
1497 case 2: return namesH[qregNo];
1498 case 4: return namesS[qregNo];
1499 case 8: return namesD[qregNo];
1500 case 16: return namesQ[qregNo];
1501 default: vassert(0);
1502 }
1503 /*NOTREACHED*/
1504}
1505
sewardj606c4ba2014-01-26 19:11:14 +00001506static const HChar* nameQReg128 ( UInt qregNo )
1507{
1508 return nameQRegLO(qregNo, Ity_V128);
1509}
1510
sewardjbbcf1882014-01-12 12:49:10 +00001511/* Find the offset of the most significant half (8 bytes) of the given
1512 Qreg. This requires knowing the endianness of the host. */
sewardj606c4ba2014-01-26 19:11:14 +00001513static Int offsetQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001514{
sewardj606c4ba2014-01-26 19:11:14 +00001515 return offsetQRegLane(qregNo, Ity_I64, 1);
sewardjbbcf1882014-01-12 12:49:10 +00001516}
1517
sewardj606c4ba2014-01-26 19:11:14 +00001518static IRExpr* getQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001519{
sewardj606c4ba2014-01-26 19:11:14 +00001520 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
sewardjbbcf1882014-01-12 12:49:10 +00001521}
1522
sewardj606c4ba2014-01-26 19:11:14 +00001523static void putQRegHI64 ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +00001524{
1525 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +00001526 Int off = offsetQRegHI64(qregNo);
sewardjbbcf1882014-01-12 12:49:10 +00001527 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001528 case Ity_I64: case Ity_F64:
1529 break;
1530 default:
1531 vassert(0); // Other cases are plain wrong
sewardjbbcf1882014-01-12 12:49:10 +00001532 }
1533 stmt(IRStmt_Put(off, e));
1534}
1535
sewardj606c4ba2014-01-26 19:11:14 +00001536/* Put to a specified lane of a Qreg. */
1537static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1538{
1539 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1540 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1541 switch (laneTy) {
1542 case Ity_F64: case Ity_I64:
sewardj32d86752014-03-02 12:47:18 +00001543 case Ity_I32: case Ity_F32:
sewardj400d6b92015-03-30 09:01:51 +00001544 case Ity_I16: case Ity_F16:
sewardj5860ec72014-03-01 11:19:45 +00001545 case Ity_I8:
sewardj606c4ba2014-01-26 19:11:14 +00001546 break;
1547 default:
1548 vassert(0); // Other cases are ATC
1549 }
1550 stmt(IRStmt_Put(off, e));
1551}
1552
sewardj32d86752014-03-02 12:47:18 +00001553/* Get from a specified lane of a Qreg. */
sewardj606c4ba2014-01-26 19:11:14 +00001554static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1555{
1556 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1557 switch (laneTy) {
sewardj32d86752014-03-02 12:47:18 +00001558 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
sewardj400d6b92015-03-30 09:01:51 +00001559 case Ity_F64: case Ity_F32: case Ity_F16:
sewardj606c4ba2014-01-26 19:11:14 +00001560 break;
1561 default:
1562 vassert(0); // Other cases are ATC
1563 }
1564 return IRExpr_Get(off, laneTy);
1565}
1566
1567
sewardjbbcf1882014-01-12 12:49:10 +00001568//ZZ /* ---------------- Misc registers ---------------- */
1569//ZZ
1570//ZZ static void putMiscReg32 ( UInt gsoffset,
1571//ZZ IRExpr* e, /* :: Ity_I32 */
1572//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1573//ZZ {
1574//ZZ switch (gsoffset) {
1575//ZZ case OFFB_FPSCR: break;
1576//ZZ case OFFB_QFLAG32: break;
1577//ZZ case OFFB_GEFLAG0: break;
1578//ZZ case OFFB_GEFLAG1: break;
1579//ZZ case OFFB_GEFLAG2: break;
1580//ZZ case OFFB_GEFLAG3: break;
1581//ZZ default: vassert(0); /* awaiting more cases */
1582//ZZ }
1583//ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1584//ZZ
1585//ZZ if (guardT == IRTemp_INVALID) {
1586//ZZ /* unconditional write */
1587//ZZ stmt(IRStmt_Put(gsoffset, e));
1588//ZZ } else {
1589//ZZ stmt(IRStmt_Put(
1590//ZZ gsoffset,
1591//ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1592//ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1593//ZZ ));
1594//ZZ }
1595//ZZ }
1596//ZZ
1597//ZZ static IRTemp get_ITSTATE ( void )
1598//ZZ {
1599//ZZ ASSERT_IS_THUMB;
1600//ZZ IRTemp t = newTemp(Ity_I32);
1601//ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1602//ZZ return t;
1603//ZZ }
1604//ZZ
1605//ZZ static void put_ITSTATE ( IRTemp t )
1606//ZZ {
1607//ZZ ASSERT_IS_THUMB;
1608//ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1609//ZZ }
1610//ZZ
1611//ZZ static IRTemp get_QFLAG32 ( void )
1612//ZZ {
1613//ZZ IRTemp t = newTemp(Ity_I32);
1614//ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1615//ZZ return t;
1616//ZZ }
1617//ZZ
1618//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1619//ZZ {
1620//ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1621//ZZ }
1622//ZZ
1623//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1624//ZZ Status Register) to indicate that overflow or saturation occurred.
1625//ZZ Nb: t must be zero to denote no saturation, and any nonzero
1626//ZZ value to indicate saturation. */
1627//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1628//ZZ {
1629//ZZ IRTemp old = get_QFLAG32();
1630//ZZ IRTemp nyu = newTemp(Ity_I32);
1631//ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1632//ZZ put_QFLAG32(nyu, condT);
1633//ZZ }
1634
1635
1636/* ---------------- FPCR stuff ---------------- */
1637
1638/* Generate IR to get hold of the rounding mode bits in FPCR, and
1639 convert them to IR format. Bind the final result to the
1640 returned temp. */
1641static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1642{
1643 /* The ARMvfp encoding for rounding mode bits is:
1644 00 to nearest
1645 01 to +infinity
1646 10 to -infinity
1647 11 to zero
1648 We need to convert that to the IR encoding:
1649 00 to nearest (the default)
1650 10 to +infinity
1651 01 to -infinity
1652 11 to zero
1653 Which can be done by swapping bits 0 and 1.
1654 The rmode bits are at 23:22 in FPSCR.
1655 */
1656 IRTemp armEncd = newTemp(Ity_I32);
1657 IRTemp swapped = newTemp(Ity_I32);
1658 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1659 we don't zero out bits 24 and above, since the assignment to
1660 'swapped' will mask them out anyway. */
1661 assign(armEncd,
1662 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1663 /* Now swap them. */
1664 assign(swapped,
1665 binop(Iop_Or32,
1666 binop(Iop_And32,
1667 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1668 mkU32(2)),
1669 binop(Iop_And32,
1670 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1671 mkU32(1))
1672 ));
1673 return swapped;
1674}
1675
1676
1677/*------------------------------------------------------------*/
1678/*--- Helpers for flag handling and conditional insns ---*/
1679/*------------------------------------------------------------*/
1680
1681static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1682{
1683 switch (cond) {
1684 case ARM64CondEQ: return "eq";
1685 case ARM64CondNE: return "ne";
1686 case ARM64CondCS: return "cs"; // or 'hs'
1687 case ARM64CondCC: return "cc"; // or 'lo'
1688 case ARM64CondMI: return "mi";
1689 case ARM64CondPL: return "pl";
1690 case ARM64CondVS: return "vs";
1691 case ARM64CondVC: return "vc";
1692 case ARM64CondHI: return "hi";
1693 case ARM64CondLS: return "ls";
1694 case ARM64CondGE: return "ge";
1695 case ARM64CondLT: return "lt";
1696 case ARM64CondGT: return "gt";
1697 case ARM64CondLE: return "le";
1698 case ARM64CondAL: return "al";
1699 case ARM64CondNV: return "nv";
1700 default: vpanic("name_ARM64Condcode");
1701 }
1702}
1703
1704/* and a handy shorthand for it */
1705static const HChar* nameCC ( ARM64Condcode cond ) {
1706 return nameARM64Condcode(cond);
1707}
1708
1709
1710/* Build IR to calculate some particular condition from stored
1711 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1712 Ity_I64, suitable for narrowing. Although the return type is
1713 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1714 :: Ity_I64 and must denote the condition to compute in
1715 bits 7:4, and be zero everywhere else.
1716*/
1717static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1718{
1719 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1720 /* And 'cond' had better produce a value in which only bits 7:4 are
1721 nonzero. However, obviously we can't assert for that. */
1722
1723 /* So what we're constructing for the first argument is
1724 "(cond << 4) | stored-operation".
1725 However, as per comments above, 'cond' must be supplied
1726 pre-shifted to this function.
1727
1728 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1729 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1730 8 bits of the first argument. */
1731 IRExpr** args
1732 = mkIRExprVec_4(
1733 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1734 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1735 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1736 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1737 );
1738 IRExpr* call
1739 = mkIRExprCCall(
1740 Ity_I64,
1741 0/*regparm*/,
1742 "arm64g_calculate_condition", &arm64g_calculate_condition,
1743 args
1744 );
1745
1746 /* Exclude the requested condition, OP and NDEP from definedness
1747 checking. We're only interested in DEP1 and DEP2. */
1748 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1749 return call;
1750}
1751
1752
1753/* Build IR to calculate some particular condition from stored
1754 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1755 Ity_I64, suitable for narrowing. Although the return type is
1756 Ity_I64, the returned value is either 0 or 1.
1757*/
1758static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1759{
1760 /* First arg is "(cond << 4) | condition". This requires that the
1761 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1762 (COND, OP) pair in the lowest 8 bits of the first argument. */
1763 vassert(cond >= 0 && cond <= 15);
1764 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1765}
1766
1767
sewardjdee30502014-06-04 13:09:44 +00001768/* Build IR to calculate just the carry flag from stored
1769 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1770 Ity_I64. */
1771static IRExpr* mk_arm64g_calculate_flag_c ( void )
1772{
1773 IRExpr** args
1774 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1775 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1776 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1777 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1778 IRExpr* call
1779 = mkIRExprCCall(
1780 Ity_I64,
1781 0/*regparm*/,
1782 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1783 args
1784 );
1785 /* Exclude OP and NDEP from definedness checking. We're only
1786 interested in DEP1 and DEP2. */
1787 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1788 return call;
1789}
1790
1791
sewardjbbcf1882014-01-12 12:49:10 +00001792//ZZ /* Build IR to calculate just the overflow flag from stored
1793//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1794//ZZ Ity_I32. */
1795//ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1796//ZZ {
1797//ZZ IRExpr** args
1798//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1799//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1800//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1801//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1802//ZZ IRExpr* call
1803//ZZ = mkIRExprCCall(
1804//ZZ Ity_I32,
1805//ZZ 0/*regparm*/,
1806//ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1807//ZZ args
1808//ZZ );
1809//ZZ /* Exclude OP and NDEP from definedness checking. We're only
1810//ZZ interested in DEP1 and DEP2. */
1811//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1812//ZZ return call;
1813//ZZ }
1814
1815
1816/* Build IR to calculate N Z C V in bits 31:28 of the
1817 returned word. */
1818static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1819{
1820 IRExpr** args
1821 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1822 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1823 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1824 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1825 IRExpr* call
1826 = mkIRExprCCall(
1827 Ity_I64,
1828 0/*regparm*/,
1829 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1830 args
1831 );
1832 /* Exclude OP and NDEP from definedness checking. We're only
1833 interested in DEP1 and DEP2. */
1834 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1835 return call;
1836}
1837
1838
1839/* Build IR to set the flags thunk, in the most general case. */
1840static
1841void setFlags_D1_D2_ND ( UInt cc_op,
1842 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1843{
1844 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1845 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1846 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1847 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1848 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1849 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1850 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1851 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1852}
1853
1854/* Build IR to set the flags thunk after ADD or SUB. */
1855static
1856void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1857{
1858 IRTemp argL64 = IRTemp_INVALID;
1859 IRTemp argR64 = IRTemp_INVALID;
1860 IRTemp z64 = newTemp(Ity_I64);
1861 if (is64) {
1862 argL64 = argL;
1863 argR64 = argR;
1864 } else {
1865 argL64 = newTemp(Ity_I64);
1866 argR64 = newTemp(Ity_I64);
1867 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1868 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1869 }
1870 assign(z64, mkU64(0));
1871 UInt cc_op = ARM64G_CC_OP_NUMBER;
1872 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1873 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1874 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1875 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1876 else { vassert(0); }
1877 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1878}
1879
sewardjdee30502014-06-04 13:09:44 +00001880/* Build IR to set the flags thunk after ADC or SBC. */
1881static
1882void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
1883 IRTemp argL, IRTemp argR, IRTemp oldC )
1884{
1885 IRTemp argL64 = IRTemp_INVALID;
1886 IRTemp argR64 = IRTemp_INVALID;
1887 IRTemp oldC64 = IRTemp_INVALID;
1888 if (is64) {
1889 argL64 = argL;
1890 argR64 = argR;
1891 oldC64 = oldC;
1892 } else {
1893 argL64 = newTemp(Ity_I64);
1894 argR64 = newTemp(Ity_I64);
1895 oldC64 = newTemp(Ity_I64);
1896 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1897 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1898 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1899 }
1900 UInt cc_op = ARM64G_CC_OP_NUMBER;
1901 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; }
1902 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1903 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; }
1904 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1905 else { vassert(0); }
1906 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1907}
1908
sewardjbbcf1882014-01-12 12:49:10 +00001909/* Build IR to set the flags thunk after ADD or SUB, if the given
1910 condition evaluates to True at run time. If not, the flags are set
1911 to the specified NZCV value. */
1912static
1913void setFlags_ADD_SUB_conditionally (
1914 Bool is64, Bool isSUB,
1915 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1916 )
1917{
1918 /* Generate IR as follows:
1919 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1920 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1921 CC_DEP2 = ITE(cond, argR64, 0)
1922 CC_NDEP = 0
1923 */
1924
1925 IRTemp z64 = newTemp(Ity_I64);
1926 assign(z64, mkU64(0));
1927
1928 /* Establish the operation and operands for the True case. */
1929 IRTemp t_dep1 = IRTemp_INVALID;
1930 IRTemp t_dep2 = IRTemp_INVALID;
1931 UInt t_op = ARM64G_CC_OP_NUMBER;
1932 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1933 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1934 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1935 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1936 else { vassert(0); }
1937 /* */
1938 if (is64) {
1939 t_dep1 = argL;
1940 t_dep2 = argR;
1941 } else {
1942 t_dep1 = newTemp(Ity_I64);
1943 t_dep2 = newTemp(Ity_I64);
1944 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1945 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1946 }
1947
1948 /* Establish the operation and operands for the False case. */
1949 IRTemp f_dep1 = newTemp(Ity_I64);
1950 IRTemp f_dep2 = z64;
1951 UInt f_op = ARM64G_CC_OP_COPY;
1952 assign(f_dep1, mkU64(nzcv << 28));
1953
1954 /* Final thunk values */
1955 IRTemp dep1 = newTemp(Ity_I64);
1956 IRTemp dep2 = newTemp(Ity_I64);
1957 IRTemp op = newTemp(Ity_I64);
1958
1959 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1960 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1961 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1962
1963 /* finally .. */
1964 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
1965 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1966 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1967 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1968}
1969
1970/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1971static
1972void setFlags_LOGIC ( Bool is64, IRTemp res )
1973{
1974 IRTemp res64 = IRTemp_INVALID;
1975 IRTemp z64 = newTemp(Ity_I64);
1976 UInt cc_op = ARM64G_CC_OP_NUMBER;
1977 if (is64) {
1978 res64 = res;
1979 cc_op = ARM64G_CC_OP_LOGIC64;
1980 } else {
1981 res64 = newTemp(Ity_I64);
1982 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1983 cc_op = ARM64G_CC_OP_LOGIC32;
1984 }
1985 assign(z64, mkU64(0));
1986 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1987}
1988
1989/* Build IR to set the flags thunk to a given NZCV value. NZCV is
1990 located in bits 31:28 of the supplied value. */
1991static
1992void setFlags_COPY ( IRTemp nzcv_28x0 )
1993{
1994 IRTemp z64 = newTemp(Ity_I64);
1995 assign(z64, mkU64(0));
1996 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
1997}
1998
1999
2000//ZZ /* Minor variant of the above that sets NDEP to zero (if it
2001//ZZ sets it at all) */
2002//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
2003//ZZ IRTemp t_dep2,
2004//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2005//ZZ {
2006//ZZ IRTemp z32 = newTemp(Ity_I32);
2007//ZZ assign( z32, mkU32(0) );
2008//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2009//ZZ }
2010//ZZ
2011//ZZ
2012//ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2013//ZZ sets it at all) */
2014//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2015//ZZ IRTemp t_ndep,
2016//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2017//ZZ {
2018//ZZ IRTemp z32 = newTemp(Ity_I32);
2019//ZZ assign( z32, mkU32(0) );
2020//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2021//ZZ }
2022//ZZ
2023//ZZ
2024//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2025//ZZ sets them at all) */
2026//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2027//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2028//ZZ {
2029//ZZ IRTemp z32 = newTemp(Ity_I32);
2030//ZZ assign( z32, mkU32(0) );
2031//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2032//ZZ }
2033
2034
2035/*------------------------------------------------------------*/
2036/*--- Misc math helpers ---*/
2037/*------------------------------------------------------------*/
2038
sewardj32d86752014-03-02 12:47:18 +00002039/* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
2040static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
sewardjbbcf1882014-01-12 12:49:10 +00002041{
sewardj32d86752014-03-02 12:47:18 +00002042 IRTemp maskT = newTemp(Ity_I64);
2043 IRTemp res = newTemp(Ity_I64);
2044 vassert(sh >= 1 && sh <= 63);
2045 assign(maskT, mkU64(mask));
sewardjdc9259c2014-02-27 11:10:19 +00002046 assign( res,
sewardjbbcf1882014-01-12 12:49:10 +00002047 binop(Iop_Or64,
2048 binop(Iop_Shr64,
sewardj32d86752014-03-02 12:47:18 +00002049 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
2050 mkU8(sh)),
sewardjbbcf1882014-01-12 12:49:10 +00002051 binop(Iop_And64,
sewardj32d86752014-03-02 12:47:18 +00002052 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
2053 mkexpr(maskT))
sewardjbbcf1882014-01-12 12:49:10 +00002054 )
2055 );
sewardjdc9259c2014-02-27 11:10:19 +00002056 return res;
2057}
2058
sewardj32d86752014-03-02 12:47:18 +00002059/* Generates byte swaps within 32-bit lanes. */
2060static IRTemp math_UINTSWAP64 ( IRTemp src )
2061{
2062 IRTemp res;
2063 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2064 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2065 return res;
2066}
2067
2068/* Generates byte swaps within 16-bit lanes. */
2069static IRTemp math_USHORTSWAP64 ( IRTemp src )
2070{
2071 IRTemp res;
2072 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2073 return res;
2074}
2075
2076/* Generates a 64-bit byte swap. */
2077static IRTemp math_BYTESWAP64 ( IRTemp src )
2078{
2079 IRTemp res;
2080 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2081 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2082 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
2083 return res;
2084}
sewardjdc9259c2014-02-27 11:10:19 +00002085
2086/* Generates a 64-bit bit swap. */
2087static IRTemp math_BITSWAP64 ( IRTemp src )
2088{
sewardj32d86752014-03-02 12:47:18 +00002089 IRTemp res;
2090 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
2091 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
2092 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
2093 return math_BYTESWAP64(res);
sewardjbbcf1882014-01-12 12:49:10 +00002094}
2095
sewardj606c4ba2014-01-26 19:11:14 +00002096/* Duplicates the bits at the bottom of the given word to fill the
2097 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
2098 except for the bottom bits. */
2099static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
2100{
2101 if (srcTy == Ity_I8) {
2102 IRTemp t16 = newTemp(Ity_I64);
2103 assign(t16, binop(Iop_Or64, mkexpr(src),
2104 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
2105 IRTemp t32 = newTemp(Ity_I64);
2106 assign(t32, binop(Iop_Or64, mkexpr(t16),
2107 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
2108 IRTemp t64 = newTemp(Ity_I64);
2109 assign(t64, binop(Iop_Or64, mkexpr(t32),
2110 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2111 return t64;
2112 }
2113 if (srcTy == Ity_I16) {
2114 IRTemp t32 = newTemp(Ity_I64);
2115 assign(t32, binop(Iop_Or64, mkexpr(src),
2116 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
2117 IRTemp t64 = newTemp(Ity_I64);
2118 assign(t64, binop(Iop_Or64, mkexpr(t32),
2119 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2120 return t64;
2121 }
2122 if (srcTy == Ity_I32) {
2123 IRTemp t64 = newTemp(Ity_I64);
2124 assign(t64, binop(Iop_Or64, mkexpr(src),
2125 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
2126 return t64;
2127 }
2128 if (srcTy == Ity_I64) {
2129 return src;
2130 }
2131 vassert(0);
2132}
2133
2134
sewardj18bf5172014-06-14 18:05:30 +00002135/* Duplicates the src element exactly so as to fill a V128 value. */
sewardj85fbb022014-06-12 13:16:01 +00002136static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
2137{
sewardj8e91fd42014-07-11 12:05:47 +00002138 IRTemp res = newTempV128();
sewardj85fbb022014-06-12 13:16:01 +00002139 if (srcTy == Ity_F64) {
2140 IRTemp i64 = newTemp(Ity_I64);
2141 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
2142 assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
2143 return res;
2144 }
2145 if (srcTy == Ity_F32) {
2146 IRTemp i64a = newTemp(Ity_I64);
2147 assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
2148 IRTemp i64b = newTemp(Ity_I64);
2149 assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
2150 mkexpr(i64a)));
2151 assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
2152 return res;
2153 }
sewardj18bf5172014-06-14 18:05:30 +00002154 if (srcTy == Ity_I64) {
2155 assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
2156 return res;
2157 }
2158 if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
2159 IRTemp t1 = newTemp(Ity_I64);
2160 assign(t1, widenUto64(srcTy, mkexpr(src)));
2161 IRTemp t2 = math_DUP_TO_64(t1, srcTy);
2162 assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
2163 return res;
2164 }
sewardj85fbb022014-06-12 13:16:01 +00002165 vassert(0);
2166}
2167
2168
sewardjdf9d6d52014-06-27 10:43:22 +00002169/* |fullWidth| is a full V128 width result. Depending on bitQ,
2170 zero out the upper half. */
2171static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
2172{
2173 if (bitQ == 1) return mkexpr(fullWidth);
2174 if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
2175 vassert(0);
2176}
2177
sewardja5a6b752014-06-30 07:33:56 +00002178/* The same, but from an expression instead. */
2179static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
2180{
sewardj8e91fd42014-07-11 12:05:47 +00002181 IRTemp fullWidthT = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00002182 assign(fullWidthT, fullWidth);
2183 return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
2184}
2185
sewardjdf9d6d52014-06-27 10:43:22 +00002186
sewardjbbcf1882014-01-12 12:49:10 +00002187/*------------------------------------------------------------*/
2188/*--- FP comparison helpers ---*/
2189/*------------------------------------------------------------*/
2190
2191/* irRes :: Ity_I32 holds a floating point comparison result encoded
2192 as an IRCmpF64Result. Generate code to convert it to an
2193 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2194 Assign a new temp to hold that value, and return the temp. */
2195static
2196IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
2197{
2198 IRTemp ix = newTemp(Ity_I64);
2199 IRTemp termL = newTemp(Ity_I64);
2200 IRTemp termR = newTemp(Ity_I64);
2201 IRTemp nzcv = newTemp(Ity_I64);
2202 IRTemp irRes = newTemp(Ity_I64);
2203
2204 /* This is where the fun starts. We have to convert 'irRes' from
2205 an IR-convention return result (IRCmpF64Result) to an
2206 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
2207 4 bits of 'nzcv'. */
2208 /* Map compare result from IR to ARM(nzcv) */
2209 /*
2210 FP cmp result | IR | ARM(nzcv)
2211 --------------------------------
2212 UN 0x45 0011
2213 LT 0x01 1000
2214 GT 0x00 0010
2215 EQ 0x40 0110
2216 */
2217 /* Now since you're probably wondering WTF ..
2218
2219 ix fishes the useful bits out of the IR value, bits 6 and 0, and
2220 places them side by side, giving a number which is 0, 1, 2 or 3.
2221
2222 termL is a sequence cooked up by GNU superopt. It converts ix
2223 into an almost correct value NZCV value (incredibly), except
2224 for the case of UN, where it produces 0100 instead of the
2225 required 0011.
2226
2227 termR is therefore a correction term, also computed from ix. It
2228 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
2229 the final correct value, we subtract termR from termL.
2230
2231 Don't take my word for it. There's a test program at the bottom
2232 of guest_arm_toIR.c, to try this out with.
2233 */
2234 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
2235
2236 assign(
2237 ix,
2238 binop(Iop_Or64,
2239 binop(Iop_And64,
2240 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
2241 mkU64(3)),
2242 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
2243
2244 assign(
2245 termL,
2246 binop(Iop_Add64,
2247 binop(Iop_Shr64,
2248 binop(Iop_Sub64,
2249 binop(Iop_Shl64,
2250 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
2251 mkU8(62)),
2252 mkU64(1)),
2253 mkU8(61)),
2254 mkU64(1)));
2255
2256 assign(
2257 termR,
2258 binop(Iop_And64,
2259 binop(Iop_And64,
2260 mkexpr(ix),
2261 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
2262 mkU64(1)));
2263
2264 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
2265 return nzcv;
2266}
2267
2268
2269/*------------------------------------------------------------*/
2270/*--- Data processing (immediate) ---*/
2271/*------------------------------------------------------------*/
2272
2273/* Helper functions for supporting "DecodeBitMasks" */
2274
2275static ULong dbm_ROR ( Int width, ULong x, Int rot )
2276{
2277 vassert(width > 0 && width <= 64);
2278 vassert(rot >= 0 && rot < width);
2279 if (rot == 0) return x;
2280 ULong res = x >> rot;
2281 res |= (x << (width - rot));
2282 if (width < 64)
2283 res &= ((1ULL << width) - 1);
2284 return res;
2285}
2286
2287static ULong dbm_RepTo64( Int esize, ULong x )
2288{
2289 switch (esize) {
2290 case 64:
2291 return x;
2292 case 32:
2293 x &= 0xFFFFFFFF; x |= (x << 32);
2294 return x;
2295 case 16:
2296 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
2297 return x;
2298 case 8:
2299 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
2300 return x;
2301 case 4:
2302 x &= 0xF; x |= (x << 4); x |= (x << 8);
2303 x |= (x << 16); x |= (x << 32);
2304 return x;
2305 case 2:
2306 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
2307 x |= (x << 16); x |= (x << 32);
2308 return x;
2309 default:
2310 break;
2311 }
2312 vpanic("dbm_RepTo64");
2313 /*NOTREACHED*/
2314 return 0;
2315}
2316
2317static Int dbm_highestSetBit ( ULong x )
2318{
2319 Int i;
2320 for (i = 63; i >= 0; i--) {
2321 if (x & (1ULL << i))
2322 return i;
2323 }
2324 vassert(x == 0);
2325 return -1;
2326}
2327
2328static
2329Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
2330 ULong immN, ULong imms, ULong immr, Bool immediate,
2331 UInt M /*32 or 64*/)
2332{
2333 vassert(immN < (1ULL << 1));
2334 vassert(imms < (1ULL << 6));
2335 vassert(immr < (1ULL << 6));
2336 vassert(immediate == False || immediate == True);
2337 vassert(M == 32 || M == 64);
2338
2339 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
2340 if (len < 1) { /* printf("fail1\n"); */ return False; }
2341 vassert(len <= 6);
2342 vassert(M >= (1 << len));
2343
2344 vassert(len >= 1 && len <= 6);
2345 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
2346 (1 << len) - 1;
2347 vassert(levels >= 1 && levels <= 63);
2348
2349 if (immediate && ((imms & levels) == levels)) {
2350 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2351 return False;
2352 }
2353
2354 ULong S = imms & levels;
2355 ULong R = immr & levels;
2356 Int diff = S - R;
2357 diff &= 63;
2358 Int esize = 1 << len;
2359 vassert(2 <= esize && esize <= 64);
2360
2361 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2362 same below with d. S can be 63 in which case we have an out of
2363 range and hence undefined shift. */
2364 vassert(S >= 0 && S <= 63);
2365 vassert(esize >= (S+1));
2366 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
2367 //(1ULL << (S+1)) - 1;
2368 ((1ULL << S) - 1) + (1ULL << S);
2369
2370 Int d = // diff<len-1:0>
2371 diff & ((1 << len)-1);
2372 vassert(esize >= (d+1));
2373 vassert(d >= 0 && d <= 63);
2374
2375 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
2376 //(1ULL << (d+1)) - 1;
2377 ((1ULL << d) - 1) + (1ULL << d);
2378
2379 if (esize != 64) vassert(elem_s < (1ULL << esize));
2380 if (esize != 64) vassert(elem_d < (1ULL << esize));
2381
2382 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
2383 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
2384
2385 return True;
2386}
2387
2388
2389static
2390Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
2391 UInt insn)
2392{
2393# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2394
2395 /* insn[28:23]
2396 10000x PC-rel addressing
2397 10001x Add/subtract (immediate)
2398 100100 Logical (immediate)
2399 100101 Move Wide (immediate)
2400 100110 Bitfield
2401 100111 Extract
2402 */
2403
2404 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2405 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2406 Bool is64 = INSN(31,31) == 1;
2407 Bool isSub = INSN(30,30) == 1;
2408 Bool setCC = INSN(29,29) == 1;
2409 UInt sh = INSN(23,22);
2410 UInt uimm12 = INSN(21,10);
2411 UInt nn = INSN(9,5);
2412 UInt dd = INSN(4,0);
2413 const HChar* nm = isSub ? "sub" : "add";
2414 if (sh >= 2) {
2415 /* Invalid; fall through */
2416 } else {
2417 vassert(sh <= 1);
2418 uimm12 <<= (12 * sh);
2419 if (is64) {
2420 IRTemp argL = newTemp(Ity_I64);
2421 IRTemp argR = newTemp(Ity_I64);
2422 IRTemp res = newTemp(Ity_I64);
2423 assign(argL, getIReg64orSP(nn));
2424 assign(argR, mkU64(uimm12));
2425 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2426 mkexpr(argL), mkexpr(argR)));
2427 if (setCC) {
2428 putIReg64orZR(dd, mkexpr(res));
2429 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2430 DIP("%ss %s, %s, 0x%x\n",
2431 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
2432 } else {
2433 putIReg64orSP(dd, mkexpr(res));
2434 DIP("%s %s, %s, 0x%x\n",
2435 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
2436 }
2437 } else {
2438 IRTemp argL = newTemp(Ity_I32);
2439 IRTemp argR = newTemp(Ity_I32);
2440 IRTemp res = newTemp(Ity_I32);
2441 assign(argL, getIReg32orSP(nn));
2442 assign(argR, mkU32(uimm12));
2443 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
2444 mkexpr(argL), mkexpr(argR)));
2445 if (setCC) {
2446 putIReg32orZR(dd, mkexpr(res));
2447 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
2448 DIP("%ss %s, %s, 0x%x\n",
2449 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
2450 } else {
2451 putIReg32orSP(dd, mkexpr(res));
2452 DIP("%s %s, %s, 0x%x\n",
2453 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
2454 }
2455 }
2456 return True;
2457 }
2458 }
2459
2460 /* -------------------- ADR/ADRP -------------------- */
2461 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2462 UInt bP = INSN(31,31);
2463 UInt immLo = INSN(30,29);
2464 UInt immHi = INSN(23,5);
2465 UInt rD = INSN(4,0);
2466 ULong uimm = (immHi << 2) | immLo;
2467 ULong simm = sx_to_64(uimm, 21);
2468 ULong val;
2469 if (bP) {
2470 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
2471 } else {
2472 val = guest_PC_curr_instr + simm;
2473 }
2474 putIReg64orZR(rD, mkU64(val));
2475 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
2476 return True;
2477 }
2478
2479 /* -------------------- LOGIC(imm) -------------------- */
2480 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2481 /* 31 30 28 22 21 15 9 4
2482 sf op 100100 N immr imms Rn Rd
2483 op=00: AND Rd|SP, Rn, #imm
2484 op=01: ORR Rd|SP, Rn, #imm
2485 op=10: EOR Rd|SP, Rn, #imm
2486 op=11: ANDS Rd|ZR, Rn, #imm
2487 */
2488 Bool is64 = INSN(31,31) == 1;
2489 UInt op = INSN(30,29);
2490 UInt N = INSN(22,22);
2491 UInt immR = INSN(21,16);
2492 UInt immS = INSN(15,10);
2493 UInt nn = INSN(9,5);
2494 UInt dd = INSN(4,0);
2495 ULong imm = 0;
2496 Bool ok;
2497 if (N == 1 && !is64)
2498 goto after_logic_imm; /* not allowed; fall through */
2499 ok = dbm_DecodeBitMasks(&imm, NULL,
2500 N, immS, immR, True, is64 ? 64 : 32);
2501 if (!ok)
2502 goto after_logic_imm;
2503
2504 const HChar* names[4] = { "and", "orr", "eor", "ands" };
2505 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2506 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2507
2508 vassert(op < 4);
2509 if (is64) {
2510 IRExpr* argL = getIReg64orZR(nn);
2511 IRExpr* argR = mkU64(imm);
2512 IRTemp res = newTemp(Ity_I64);
2513 assign(res, binop(ops64[op], argL, argR));
2514 if (op < 3) {
2515 putIReg64orSP(dd, mkexpr(res));
2516 DIP("%s %s, %s, 0x%llx\n", names[op],
2517 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2518 } else {
2519 putIReg64orZR(dd, mkexpr(res));
2520 setFlags_LOGIC(True/*is64*/, res);
2521 DIP("%s %s, %s, 0x%llx\n", names[op],
2522 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2523 }
2524 } else {
2525 IRExpr* argL = getIReg32orZR(nn);
2526 IRExpr* argR = mkU32((UInt)imm);
2527 IRTemp res = newTemp(Ity_I32);
2528 assign(res, binop(ops32[op], argL, argR));
2529 if (op < 3) {
2530 putIReg32orSP(dd, mkexpr(res));
2531 DIP("%s %s, %s, 0x%x\n", names[op],
2532 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2533 } else {
2534 putIReg32orZR(dd, mkexpr(res));
2535 setFlags_LOGIC(False/*!is64*/, res);
2536 DIP("%s %s, %s, 0x%x\n", names[op],
2537 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2538 }
2539 }
2540 return True;
2541 }
2542 after_logic_imm:
2543
2544 /* -------------------- MOV{Z,N,K} -------------------- */
2545 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2546 /* 31 30 28 22 20 4
2547 | | | | | |
2548 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2549 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2550 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2551 */
2552 Bool is64 = INSN(31,31) == 1;
2553 UInt subopc = INSN(30,29);
2554 UInt hw = INSN(22,21);
2555 UInt imm16 = INSN(20,5);
2556 UInt dd = INSN(4,0);
2557 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2558 /* invalid; fall through */
2559 } else {
2560 ULong imm64 = ((ULong)imm16) << (16 * hw);
2561 if (!is64)
2562 vassert(imm64 < 0x100000000ULL);
2563 switch (subopc) {
2564 case BITS2(1,0): // MOVZ
2565 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2566 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2567 break;
2568 case BITS2(0,0): // MOVN
2569 imm64 = ~imm64;
2570 if (!is64)
2571 imm64 &= 0xFFFFFFFFULL;
2572 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2573 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2574 break;
2575 case BITS2(1,1): // MOVK
2576 /* This is more complex. We are inserting a slice into
2577 the destination register, so we need to have the old
2578 value of it. */
2579 if (is64) {
2580 IRTemp old = newTemp(Ity_I64);
2581 assign(old, getIReg64orZR(dd));
2582 ULong mask = 0xFFFFULL << (16 * hw);
2583 IRExpr* res
2584 = binop(Iop_Or64,
2585 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2586 mkU64(imm64));
2587 putIReg64orZR(dd, res);
2588 DIP("movk %s, 0x%x, lsl %u\n",
2589 nameIReg64orZR(dd), imm16, 16*hw);
2590 } else {
2591 IRTemp old = newTemp(Ity_I32);
2592 assign(old, getIReg32orZR(dd));
2593 vassert(hw <= 1);
Elliott Hughesa0664b92017-04-18 17:46:52 -07002594 UInt mask = ((UInt)0xFFFF) << (16 * hw);
sewardjbbcf1882014-01-12 12:49:10 +00002595 IRExpr* res
2596 = binop(Iop_Or32,
2597 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2598 mkU32((UInt)imm64));
2599 putIReg32orZR(dd, res);
2600 DIP("movk %s, 0x%x, lsl %u\n",
2601 nameIReg32orZR(dd), imm16, 16*hw);
2602 }
2603 break;
2604 default:
2605 vassert(0);
2606 }
2607 return True;
2608 }
2609 }
2610
2611 /* -------------------- {U,S,}BFM -------------------- */
2612 /* 30 28 22 21 15 9 4
2613
2614 sf 10 100110 N immr imms nn dd
2615 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2616 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2617
2618 sf 00 100110 N immr imms nn dd
2619 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2620 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2621
2622 sf 01 100110 N immr imms nn dd
2623 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2624 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2625 */
2626 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2627 UInt sf = INSN(31,31);
2628 UInt opc = INSN(30,29);
2629 UInt N = INSN(22,22);
2630 UInt immR = INSN(21,16);
2631 UInt immS = INSN(15,10);
2632 UInt nn = INSN(9,5);
2633 UInt dd = INSN(4,0);
2634 Bool inZero = False;
2635 Bool extend = False;
2636 const HChar* nm = "???";
2637 /* skip invalid combinations */
2638 switch (opc) {
2639 case BITS2(0,0):
2640 inZero = True; extend = True; nm = "sbfm"; break;
2641 case BITS2(0,1):
2642 inZero = False; extend = False; nm = "bfm"; break;
2643 case BITS2(1,0):
2644 inZero = True; extend = False; nm = "ubfm"; break;
2645 case BITS2(1,1):
2646 goto after_bfm; /* invalid */
2647 default:
2648 vassert(0);
2649 }
2650 if (sf == 1 && N != 1) goto after_bfm;
2651 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2652 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2653 ULong wmask = 0, tmask = 0;
2654 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2655 N, immS, immR, False, sf == 1 ? 64 : 32);
2656 if (!ok) goto after_bfm; /* hmmm */
2657
2658 Bool is64 = sf == 1;
2659 IRType ty = is64 ? Ity_I64 : Ity_I32;
2660
2661 IRTemp dst = newTemp(ty);
2662 IRTemp src = newTemp(ty);
2663 IRTemp bot = newTemp(ty);
2664 IRTemp top = newTemp(ty);
2665 IRTemp res = newTemp(ty);
2666 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2667 assign(src, getIRegOrZR(is64, nn));
2668 /* perform bitfield move on low bits */
2669 assign(bot, binop(mkOR(ty),
2670 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2671 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2672 mkU(ty, wmask))));
2673 /* determine extension bits (sign, zero or dest register) */
2674 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2675 /* combine extension bits and result bits */
2676 assign(res, binop(mkOR(ty),
2677 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2678 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2679 putIRegOrZR(is64, dd, mkexpr(res));
2680 DIP("%s %s, %s, immR=%u, immS=%u\n",
2681 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2682 return True;
2683 }
2684 after_bfm:
2685
2686 /* ---------------------- EXTR ---------------------- */
2687 /* 30 28 22 20 15 9 4
2688 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2689 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2690 */
2691 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2692 Bool is64 = INSN(31,31) == 1;
2693 UInt mm = INSN(20,16);
2694 UInt imm6 = INSN(15,10);
2695 UInt nn = INSN(9,5);
2696 UInt dd = INSN(4,0);
2697 Bool valid = True;
2698 if (INSN(31,31) != INSN(22,22))
2699 valid = False;
2700 if (!is64 && imm6 >= 32)
2701 valid = False;
2702 if (!valid) goto after_extr;
2703 IRType ty = is64 ? Ity_I64 : Ity_I32;
2704 IRTemp srcHi = newTemp(ty);
2705 IRTemp srcLo = newTemp(ty);
2706 IRTemp res = newTemp(ty);
2707 assign(srcHi, getIRegOrZR(is64, nn));
2708 assign(srcLo, getIRegOrZR(is64, mm));
2709 if (imm6 == 0) {
2710 assign(res, mkexpr(srcLo));
2711 } else {
2712 UInt szBits = 8 * sizeofIRType(ty);
2713 vassert(imm6 > 0 && imm6 < szBits);
2714 assign(res, binop(mkOR(ty),
2715 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2716 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2717 }
2718 putIRegOrZR(is64, dd, mkexpr(res));
2719 DIP("extr %s, %s, %s, #%u\n",
2720 nameIRegOrZR(is64,dd),
2721 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2722 return True;
2723 }
2724 after_extr:
2725
2726 vex_printf("ARM64 front end: data_processing_immediate\n");
2727 return False;
2728# undef INSN
2729}
2730
2731
2732/*------------------------------------------------------------*/
2733/*--- Data processing (register) instructions ---*/
2734/*------------------------------------------------------------*/
2735
2736static const HChar* nameSH ( UInt sh ) {
2737 switch (sh) {
2738 case 0: return "lsl";
2739 case 1: return "lsr";
2740 case 2: return "asr";
2741 case 3: return "ror";
2742 default: vassert(0);
2743 }
2744}
2745
2746/* Generate IR to get a register value, possibly shifted by an
2747 immediate. Returns either a 32- or 64-bit temporary holding the
2748 result. After the shift, the value can optionally be NOT-ed
2749 too.
2750
2751 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2752 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2753 isn't allowed, but it's the job of the caller to check that.
2754*/
2755static IRTemp getShiftedIRegOrZR ( Bool is64,
2756 UInt sh_how, UInt sh_amt, UInt regNo,
2757 Bool invert )
2758{
2759 vassert(sh_how < 4);
2760 vassert(sh_amt < (is64 ? 64 : 32));
2761 IRType ty = is64 ? Ity_I64 : Ity_I32;
2762 IRTemp t0 = newTemp(ty);
2763 assign(t0, getIRegOrZR(is64, regNo));
2764 IRTemp t1 = newTemp(ty);
2765 switch (sh_how) {
2766 case BITS2(0,0):
2767 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2768 break;
2769 case BITS2(0,1):
2770 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2771 break;
2772 case BITS2(1,0):
2773 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2774 break;
2775 case BITS2(1,1):
2776 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2777 break;
2778 default:
2779 vassert(0);
2780 }
2781 if (invert) {
2782 IRTemp t2 = newTemp(ty);
2783 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2784 return t2;
2785 } else {
2786 return t1;
2787 }
2788}
2789
2790
2791static
2792Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2793 UInt insn)
2794{
2795# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2796
2797 /* ------------------- ADD/SUB(reg) ------------------- */
2798 /* x==0 => 32 bit op x==1 => 64 bit op
2799 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2800
2801 31 30 29 28 23 21 20 15 9 4
2802 | | | | | | | | | |
2803 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2804 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2805 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2806 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2807 */
2808 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2809 UInt bX = INSN(31,31);
2810 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2811 UInt bS = INSN(29, 29); /* set flags? */
2812 UInt sh = INSN(23,22);
2813 UInt rM = INSN(20,16);
2814 UInt imm6 = INSN(15,10);
2815 UInt rN = INSN(9,5);
2816 UInt rD = INSN(4,0);
2817 Bool isSUB = bOP == 1;
2818 Bool is64 = bX == 1;
2819 IRType ty = is64 ? Ity_I64 : Ity_I32;
2820 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2821 /* invalid; fall through */
2822 } else {
2823 IRTemp argL = newTemp(ty);
2824 assign(argL, getIRegOrZR(is64, rN));
2825 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2826 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2827 IRTemp res = newTemp(ty);
2828 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2829 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2830 if (bS) {
2831 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2832 }
2833 DIP("%s%s %s, %s, %s, %s #%u\n",
2834 bOP ? "sub" : "add", bS ? "s" : "",
2835 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2836 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2837 return True;
2838 }
2839 }
2840
sewardjdee30502014-06-04 13:09:44 +00002841 /* ------------------- ADC/SBC(reg) ------------------- */
2842 /* x==0 => 32 bit op x==1 => 64 bit op
2843
2844 31 30 29 28 23 21 20 15 9 4
2845 | | | | | | | | | |
2846 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
2847 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
2848 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
2849 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
2850 */
2851
2852 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2853 UInt bX = INSN(31,31);
2854 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */
2855 UInt bS = INSN(29,29); /* set flags */
2856 UInt rM = INSN(20,16);
2857 UInt rN = INSN(9,5);
2858 UInt rD = INSN(4,0);
2859
2860 Bool isSUB = bOP == 1;
2861 Bool is64 = bX == 1;
2862 IRType ty = is64 ? Ity_I64 : Ity_I32;
2863
2864 IRTemp oldC = newTemp(ty);
2865 assign(oldC,
2866 is64 ? mk_arm64g_calculate_flag_c()
2867 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
2868
2869 IRTemp argL = newTemp(ty);
2870 assign(argL, getIRegOrZR(is64, rN));
2871 IRTemp argR = newTemp(ty);
2872 assign(argR, getIRegOrZR(is64, rM));
2873
2874 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2875 IRTemp res = newTemp(ty);
2876 if (isSUB) {
2877 IRExpr* one = is64 ? mkU64(1) : mkU32(1);
2878 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
2879 assign(res,
2880 binop(op,
2881 binop(op, mkexpr(argL), mkexpr(argR)),
2882 binop(xorOp, mkexpr(oldC), one)));
2883 } else {
2884 assign(res,
2885 binop(op,
2886 binop(op, mkexpr(argL), mkexpr(argR)),
2887 mkexpr(oldC)));
2888 }
2889
2890 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2891
2892 if (bS) {
2893 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
2894 }
2895
2896 DIP("%s%s %s, %s, %s\n",
2897 bOP ? "sbc" : "adc", bS ? "s" : "",
2898 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2899 nameIRegOrZR(is64, rM));
2900 return True;
2901 }
2902
sewardjbbcf1882014-01-12 12:49:10 +00002903 /* -------------------- LOGIC(reg) -------------------- */
2904 /* x==0 => 32 bit op x==1 => 64 bit op
2905 N==0 => inv? is no-op (no inversion)
2906 N==1 => inv? is NOT
2907 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2908
2909 31 30 28 23 21 20 15 9 4
2910 | | | | | | | | |
2911 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2912 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2913 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2914 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2915 With N=1, the names are: BIC ORN EON BICS
2916 */
2917 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2918 UInt bX = INSN(31,31);
2919 UInt sh = INSN(23,22);
2920 UInt bN = INSN(21,21);
2921 UInt rM = INSN(20,16);
2922 UInt imm6 = INSN(15,10);
2923 UInt rN = INSN(9,5);
2924 UInt rD = INSN(4,0);
2925 Bool is64 = bX == 1;
2926 IRType ty = is64 ? Ity_I64 : Ity_I32;
2927 if (!is64 && imm6 > 31) {
2928 /* invalid; fall though */
2929 } else {
2930 IRTemp argL = newTemp(ty);
2931 assign(argL, getIRegOrZR(is64, rN));
2932 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2933 IROp op = Iop_INVALID;
2934 switch (INSN(30,29)) {
2935 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2936 case BITS2(0,1): op = mkOR(ty); break;
2937 case BITS2(1,0): op = mkXOR(ty); break;
2938 default: vassert(0);
2939 }
2940 IRTemp res = newTemp(ty);
2941 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2942 if (INSN(30,29) == BITS2(1,1)) {
2943 setFlags_LOGIC(is64, res);
2944 }
2945 putIRegOrZR(is64, rD, mkexpr(res));
2946
2947 static const HChar* names_op[8]
2948 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2949 vassert(((bN << 2) | INSN(30,29)) < 8);
2950 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2951 /* Special-case the printing of "MOV" */
2952 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2953 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2954 nameIRegOrZR(is64, rM));
2955 } else {
2956 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2957 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2958 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2959 }
2960 return True;
2961 }
2962 }
2963
2964 /* -------------------- {U,S}MULH -------------------- */
2965 /* 31 23 22 20 15 9 4
2966 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2967 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2968 */
2969 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
sewardj7fce7cc2014-05-07 09:41:40 +00002970 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
sewardjbbcf1882014-01-12 12:49:10 +00002971 Bool isU = INSN(23,23) == 1;
2972 UInt mm = INSN(20,16);
2973 UInt nn = INSN(9,5);
2974 UInt dd = INSN(4,0);
2975 putIReg64orZR(dd, unop(Iop_128HIto64,
2976 binop(isU ? Iop_MullU64 : Iop_MullS64,
2977 getIReg64orZR(nn), getIReg64orZR(mm))));
2978 DIP("%cmulh %s, %s, %s\n",
2979 isU ? 'u' : 's',
2980 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2981 return True;
2982 }
2983
2984 /* -------------------- M{ADD,SUB} -------------------- */
2985 /* 31 30 20 15 14 9 4
2986 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
2987 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
2988 */
2989 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2990 Bool is64 = INSN(31,31) == 1;
2991 UInt mm = INSN(20,16);
2992 Bool isAdd = INSN(15,15) == 0;
2993 UInt aa = INSN(14,10);
2994 UInt nn = INSN(9,5);
2995 UInt dd = INSN(4,0);
2996 if (is64) {
2997 putIReg64orZR(
2998 dd,
2999 binop(isAdd ? Iop_Add64 : Iop_Sub64,
3000 getIReg64orZR(aa),
3001 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
3002 } else {
3003 putIReg32orZR(
3004 dd,
3005 binop(isAdd ? Iop_Add32 : Iop_Sub32,
3006 getIReg32orZR(aa),
3007 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
3008 }
3009 DIP("%s %s, %s, %s, %s\n",
3010 isAdd ? "madd" : "msub",
3011 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3012 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
3013 return True;
3014 }
3015
3016 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3017 /* 31 30 28 20 15 11 9 4
3018 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
3019 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
3020 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
3021 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
3022 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3023 */
3024 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3025 Bool is64 = INSN(31,31) == 1;
3026 UInt b30 = INSN(30,30);
3027 UInt mm = INSN(20,16);
3028 UInt cond = INSN(15,12);
3029 UInt b10 = INSN(10,10);
3030 UInt nn = INSN(9,5);
3031 UInt dd = INSN(4,0);
3032 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
3033 IRType ty = is64 ? Ity_I64 : Ity_I32;
3034 IRExpr* argL = getIRegOrZR(is64, nn);
3035 IRExpr* argR = getIRegOrZR(is64, mm);
3036 switch (op) {
3037 case BITS2(0,0):
3038 break;
3039 case BITS2(0,1):
3040 argR = binop(mkADD(ty), argR, mkU(ty,1));
3041 break;
3042 case BITS2(1,0):
3043 argR = unop(mkNOT(ty), argR);
3044 break;
3045 case BITS2(1,1):
3046 argR = binop(mkSUB(ty), mkU(ty,0), argR);
3047 break;
3048 default:
3049 vassert(0);
3050 }
3051 putIRegOrZR(
3052 is64, dd,
3053 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
3054 argL, argR)
3055 );
3056 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
3057 DIP("%s %s, %s, %s, %s\n", op_nm[op],
3058 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3059 nameIRegOrZR(is64, mm), nameCC(cond));
3060 return True;
3061 }
3062
3063 /* -------------- ADD/SUB(extended reg) -------------- */
3064 /* 28 20 15 12 9 4
3065 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
3066 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
3067
3068 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
3069 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
3070
3071 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
3072 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
3073
3074 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
3075 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
3076
3077 The 'm' operand is extended per opt, thusly:
3078
3079 000 Xm & 0xFF UXTB
3080 001 Xm & 0xFFFF UXTH
3081 010 Xm & (2^32)-1 UXTW
3082 011 Xm UXTX
3083
3084 100 Xm sx from bit 7 SXTB
3085 101 Xm sx from bit 15 SXTH
3086 110 Xm sx from bit 31 SXTW
3087 111 Xm SXTX
3088
3089 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3090 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3091 are the identity operation on Wm.
3092
3093 After extension, the value is shifted left by imm3 bits, which
3094 may only be in the range 0 .. 4 inclusive.
3095 */
3096 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3097 Bool is64 = INSN(31,31) == 1;
3098 Bool isSub = INSN(30,30) == 1;
3099 Bool setCC = INSN(29,29) == 1;
3100 UInt mm = INSN(20,16);
3101 UInt opt = INSN(15,13);
3102 UInt imm3 = INSN(12,10);
3103 UInt nn = INSN(9,5);
3104 UInt dd = INSN(4,0);
3105 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3106 "sxtb", "sxth", "sxtw", "sxtx" };
3107 /* Do almost the same thing in the 32- and 64-bit cases. */
3108 IRTemp xN = newTemp(Ity_I64);
3109 IRTemp xM = newTemp(Ity_I64);
3110 assign(xN, getIReg64orSP(nn));
3111 assign(xM, getIReg64orZR(mm));
3112 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
3113 Int shSX = 0;
3114 /* widen Xm .. */
3115 switch (opt) {
3116 case BITS3(0,0,0): // UXTB
3117 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
3118 case BITS3(0,0,1): // UXTH
3119 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
3120 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3121 if (is64) {
3122 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
3123 }
3124 break;
3125 case BITS3(0,1,1): // UXTX -- always a noop
3126 break;
3127 case BITS3(1,0,0): // SXTB
3128 shSX = 56; goto sxTo64;
3129 case BITS3(1,0,1): // SXTH
3130 shSX = 48; goto sxTo64;
3131 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3132 if (is64) {
3133 shSX = 32; goto sxTo64;
3134 }
3135 break;
3136 case BITS3(1,1,1): // SXTX -- always a noop
3137 break;
3138 sxTo64:
3139 vassert(shSX >= 32);
3140 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
3141 mkU8(shSX));
3142 break;
3143 default:
3144 vassert(0);
3145 }
3146 /* and now shift */
3147 IRTemp argL = xN;
3148 IRTemp argR = newTemp(Ity_I64);
3149 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
3150 IRTemp res = newTemp(Ity_I64);
3151 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
3152 mkexpr(argL), mkexpr(argR)));
3153 if (is64) {
3154 if (setCC) {
3155 putIReg64orZR(dd, mkexpr(res));
3156 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
3157 } else {
3158 putIReg64orSP(dd, mkexpr(res));
3159 }
3160 } else {
3161 if (setCC) {
3162 IRTemp argL32 = newTemp(Ity_I32);
3163 IRTemp argR32 = newTemp(Ity_I32);
3164 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
3165 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
3166 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
3167 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
3168 } else {
3169 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
3170 }
3171 }
3172 DIP("%s%s %s, %s, %s %s lsl %u\n",
3173 isSub ? "sub" : "add", setCC ? "s" : "",
3174 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
3175 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
3176 nameExt[opt], imm3);
3177 return True;
3178 }
3179
3180 /* ---------------- CCMP/CCMN(imm) ---------------- */
3181 /* Bizarrely, these appear in the "data processing register"
3182 category, even though they are operations against an
3183 immediate. */
3184 /* 31 29 20 15 11 9 3
3185 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
3186 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
3187
3188 Operation is:
3189 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3190 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3191 */
3192 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3193 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3194 Bool is64 = INSN(31,31) == 1;
3195 Bool isSUB = INSN(30,30) == 1;
3196 UInt imm5 = INSN(20,16);
3197 UInt cond = INSN(15,12);
3198 UInt nn = INSN(9,5);
3199 UInt nzcv = INSN(3,0);
3200
3201 IRTemp condT = newTemp(Ity_I1);
3202 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3203
3204 IRType ty = is64 ? Ity_I64 : Ity_I32;
3205 IRTemp argL = newTemp(ty);
3206 IRTemp argR = newTemp(ty);
3207
3208 if (is64) {
3209 assign(argL, getIReg64orZR(nn));
3210 assign(argR, mkU64(imm5));
3211 } else {
3212 assign(argL, getIReg32orZR(nn));
3213 assign(argR, mkU32(imm5));
3214 }
3215 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3216
3217 DIP("ccm%c %s, #%u, #%u, %s\n",
3218 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3219 imm5, nzcv, nameCC(cond));
3220 return True;
3221 }
3222
3223 /* ---------------- CCMP/CCMN(reg) ---------------- */
3224 /* 31 29 20 15 11 9 3
3225 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
3226 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
3227 Operation is:
3228 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3229 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3230 */
3231 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3232 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3233 Bool is64 = INSN(31,31) == 1;
3234 Bool isSUB = INSN(30,30) == 1;
3235 UInt mm = INSN(20,16);
3236 UInt cond = INSN(15,12);
3237 UInt nn = INSN(9,5);
3238 UInt nzcv = INSN(3,0);
3239
3240 IRTemp condT = newTemp(Ity_I1);
3241 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3242
3243 IRType ty = is64 ? Ity_I64 : Ity_I32;
3244 IRTemp argL = newTemp(ty);
3245 IRTemp argR = newTemp(ty);
3246
3247 if (is64) {
3248 assign(argL, getIReg64orZR(nn));
3249 assign(argR, getIReg64orZR(mm));
3250 } else {
3251 assign(argL, getIReg32orZR(nn));
3252 assign(argR, getIReg32orZR(mm));
3253 }
3254 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3255
3256 DIP("ccm%c %s, %s, #%u, %s\n",
3257 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3258 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
3259 return True;
3260 }
3261
3262
3263 /* -------------- REV/REV16/REV32/RBIT -------------- */
3264 /* 31 30 28 20 15 11 9 4
3265
sewardj32d86752014-03-02 12:47:18 +00003266 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
3267 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003268
sewardj32d86752014-03-02 12:47:18 +00003269 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
3270 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003271
sewardjdc9259c2014-02-27 11:10:19 +00003272 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
3273 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003274
sewardjdc9259c2014-02-27 11:10:19 +00003275 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
sewardjbbcf1882014-01-12 12:49:10 +00003276 */
sewardjbbcf1882014-01-12 12:49:10 +00003277 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
sewardjdc9259c2014-02-27 11:10:19 +00003278 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3279 UInt b31 = INSN(31,31);
3280 UInt opc = INSN(11,10);
3281
3282 UInt ix = 0;
3283 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
3284 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
3285 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
3286 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
3287 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
3288 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
3289 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
sewardj32d86752014-03-02 12:47:18 +00003290 if (ix >= 1 && ix <= 7) {
3291 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
sewardjdc9259c2014-02-27 11:10:19 +00003292 UInt nn = INSN(9,5);
3293 UInt dd = INSN(4,0);
3294 IRTemp src = newTemp(Ity_I64);
3295 IRTemp dst = IRTemp_INVALID;
sewardj32d86752014-03-02 12:47:18 +00003296 IRTemp (*math)(IRTemp) = NULL;
3297 switch (ix) {
3298 case 1: case 2: math = math_BYTESWAP64; break;
3299 case 3: case 4: math = math_BITSWAP64; break;
3300 case 5: case 6: math = math_USHORTSWAP64; break;
3301 case 7: math = math_UINTSWAP64; break;
3302 default: vassert(0);
3303 }
3304 const HChar* names[7]
3305 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3306 const HChar* nm = names[ix-1];
3307 vassert(math);
3308 if (ix == 6) {
3309 /* This has to be special cased, since the logic below doesn't
3310 handle it correctly. */
sewardjdc9259c2014-02-27 11:10:19 +00003311 assign(src, getIReg64orZR(nn));
sewardj32d86752014-03-02 12:47:18 +00003312 dst = math(src);
3313 putIReg64orZR(dd,
3314 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
3315 } else if (is64) {
3316 assign(src, getIReg64orZR(nn));
3317 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00003318 putIReg64orZR(dd, mkexpr(dst));
3319 } else {
3320 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
sewardj32d86752014-03-02 12:47:18 +00003321 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00003322 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3323 }
sewardj32d86752014-03-02 12:47:18 +00003324 DIP("%s %s, %s\n", nm,
sewardjdc9259c2014-02-27 11:10:19 +00003325 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
3326 return True;
sewardjbbcf1882014-01-12 12:49:10 +00003327 }
sewardjdc9259c2014-02-27 11:10:19 +00003328 /* else fall through */
sewardjbbcf1882014-01-12 12:49:10 +00003329 }
3330
3331 /* -------------------- CLZ/CLS -------------------- */
3332 /* 30 28 24 20 15 9 4
3333 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
3334 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
3335 */
3336 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3337 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3338 Bool is64 = INSN(31,31) == 1;
3339 Bool isCLS = INSN(10,10) == 1;
3340 UInt nn = INSN(9,5);
3341 UInt dd = INSN(4,0);
3342 IRTemp src = newTemp(Ity_I64);
sewardj928540c2014-11-25 15:51:07 +00003343 IRTemp srcZ = newTemp(Ity_I64);
sewardjbbcf1882014-01-12 12:49:10 +00003344 IRTemp dst = newTemp(Ity_I64);
sewardj928540c2014-11-25 15:51:07 +00003345 /* Get the argument, widened out to 64 bit */
3346 if (is64) {
3347 assign(src, getIReg64orZR(nn));
3348 } else {
3349 assign(src, binop(Iop_Shl64,
3350 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
sewardjbbcf1882014-01-12 12:49:10 +00003351 }
sewardj928540c2014-11-25 15:51:07 +00003352 /* If this is CLS, mash the arg around accordingly */
3353 if (isCLS) {
3354 IRExpr* one = mkU8(1);
3355 assign(srcZ,
3356 binop(Iop_Xor64,
3357 binop(Iop_Shl64, mkexpr(src), one),
3358 binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one)));
3359 } else {
3360 assign(srcZ, mkexpr(src));
3361 }
3362 /* And compute CLZ. */
3363 if (is64) {
3364 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3365 mkU64(isCLS ? 63 : 64),
3366 unop(Iop_Clz64, mkexpr(srcZ))));
3367 putIReg64orZR(dd, mkexpr(dst));
3368 } else {
3369 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3370 mkU64(isCLS ? 31 : 32),
3371 unop(Iop_Clz64, mkexpr(srcZ))));
3372 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3373 }
3374 DIP("cl%c %s, %s\n", isCLS ? 's' : 'z',
3375 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
3376 return True;
sewardjbbcf1882014-01-12 12:49:10 +00003377 }
3378
sewardjca95f2d2014-11-25 17:27:32 +00003379 /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
sewardjbbcf1882014-01-12 12:49:10 +00003380 /* 30 28 20 15 11 9 4
3381 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
3382 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
3383 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
sewardjca95f2d2014-11-25 17:27:32 +00003384 sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm
sewardjbbcf1882014-01-12 12:49:10 +00003385 */
3386 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
sewardjca95f2d2014-11-25 17:27:32 +00003387 && INSN(15,12) == BITS4(0,0,1,0)) {
sewardjbbcf1882014-01-12 12:49:10 +00003388 Bool is64 = INSN(31,31) == 1;
3389 UInt mm = INSN(20,16);
3390 UInt op = INSN(11,10);
3391 UInt nn = INSN(9,5);
3392 UInt dd = INSN(4,0);
3393 IRType ty = is64 ? Ity_I64 : Ity_I32;
3394 IRTemp srcL = newTemp(ty);
sewardjca95f2d2014-11-25 17:27:32 +00003395 IRTemp srcR = newTemp(Ity_I64);
sewardjbbcf1882014-01-12 12:49:10 +00003396 IRTemp res = newTemp(ty);
3397 IROp iop = Iop_INVALID;
3398 assign(srcL, getIRegOrZR(is64, nn));
sewardjca95f2d2014-11-25 17:27:32 +00003399 assign(srcR, binop(Iop_And64, getIReg64orZR(mm),
3400 mkU64(is64 ? 63 : 31)));
3401 if (op < 3) {
3402 // LSLV, LSRV, ASRV
3403 switch (op) {
3404 case BITS2(0,0): iop = mkSHL(ty); break;
3405 case BITS2(0,1): iop = mkSHR(ty); break;
3406 case BITS2(1,0): iop = mkSAR(ty); break;
3407 default: vassert(0);
3408 }
3409 assign(res, binop(iop, mkexpr(srcL),
3410 unop(Iop_64to8, mkexpr(srcR))));
3411 } else {
3412 // RORV
3413 IROp opSHL = mkSHL(ty);
3414 IROp opSHR = mkSHR(ty);
3415 IROp opOR = mkOR(ty);
3416 IRExpr* width = mkU64(is64 ? 64: 32);
3417 assign(
3418 res,
3419 IRExpr_ITE(
3420 binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)),
3421 mkexpr(srcL),
3422 binop(opOR,
3423 binop(opSHL,
3424 mkexpr(srcL),
3425 unop(Iop_64to8, binop(Iop_Sub64, width,
3426 mkexpr(srcR)))),
3427 binop(opSHR,
3428 mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR))))
3429 ));
sewardjbbcf1882014-01-12 12:49:10 +00003430 }
sewardjbbcf1882014-01-12 12:49:10 +00003431 putIRegOrZR(is64, dd, mkexpr(res));
sewardjca95f2d2014-11-25 17:27:32 +00003432 vassert(op < 4);
3433 const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" };
sewardjbbcf1882014-01-12 12:49:10 +00003434 DIP("%s %s, %s, %s\n",
3435 names[op], nameIRegOrZR(is64,dd),
3436 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
3437 return True;
3438 }
3439
3440 /* -------------------- SDIV/UDIV -------------------- */
3441 /* 30 28 20 15 10 9 4
3442 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
3443 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
3444 */
3445 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3446 && INSN(15,11) == BITS5(0,0,0,0,1)) {
3447 Bool is64 = INSN(31,31) == 1;
3448 UInt mm = INSN(20,16);
3449 Bool isS = INSN(10,10) == 1;
3450 UInt nn = INSN(9,5);
3451 UInt dd = INSN(4,0);
3452 if (isS) {
3453 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
3454 getIRegOrZR(is64, nn),
3455 getIRegOrZR(is64, mm)));
3456 } else {
3457 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
3458 getIRegOrZR(is64, nn),
3459 getIRegOrZR(is64, mm)));
3460 }
3461 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
3462 nameIRegOrZR(is64, dd),
3463 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
3464 return True;
3465 }
3466
3467 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3468 /* 31 23 20 15 14 9 4
3469 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
3470 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
3471 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
3472 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
3473 with operation
3474 Xd = Xa +/- (Wn *u/s Wm)
3475 */
3476 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3477 Bool isU = INSN(23,23) == 1;
3478 UInt mm = INSN(20,16);
3479 Bool isAdd = INSN(15,15) == 0;
3480 UInt aa = INSN(14,10);
3481 UInt nn = INSN(9,5);
3482 UInt dd = INSN(4,0);
3483 IRTemp wN = newTemp(Ity_I32);
3484 IRTemp wM = newTemp(Ity_I32);
3485 IRTemp xA = newTemp(Ity_I64);
3486 IRTemp muld = newTemp(Ity_I64);
3487 IRTemp res = newTemp(Ity_I64);
3488 assign(wN, getIReg32orZR(nn));
3489 assign(wM, getIReg32orZR(mm));
3490 assign(xA, getIReg64orZR(aa));
3491 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
3492 mkexpr(wN), mkexpr(wM)));
3493 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
3494 mkexpr(xA), mkexpr(muld)));
3495 putIReg64orZR(dd, mkexpr(res));
3496 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
3497 nameIReg64orZR(dd), nameIReg32orZR(nn),
3498 nameIReg32orZR(mm), nameIReg64orZR(aa));
3499 return True;
3500 }
Elliott Hughesa0664b92017-04-18 17:46:52 -07003501
3502 /* -------------------- CRC32/CRC32C -------------------- */
3503 /* 31 30 20 15 11 9 4
3504 sf 00 1101 0110 m 0100 sz n d CRC32<sz> Wd, Wn, Wm|Xm
3505 sf 00 1101 0110 m 0101 sz n d CRC32C<sz> Wd, Wn, Wm|Xm
3506 */
3507 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3508 && INSN(15,13) == BITS3(0,1,0)) {
3509 UInt bitSF = INSN(31,31);
3510 UInt mm = INSN(20,16);
3511 UInt bitC = INSN(12,12);
3512 UInt sz = INSN(11,10);
3513 UInt nn = INSN(9,5);
3514 UInt dd = INSN(4,0);
3515 vassert(sz >= 0 && sz <= 3);
3516 if ((bitSF == 0 && sz <= BITS2(1,0))
3517 || (bitSF == 1 && sz == BITS2(1,1))) {
3518 UInt ix = (bitC == 1 ? 4 : 0) | sz;
3519 void* helpers[8]
3520 = { &arm64g_calc_crc32b, &arm64g_calc_crc32h,
3521 &arm64g_calc_crc32w, &arm64g_calc_crc32x,
3522 &arm64g_calc_crc32cb, &arm64g_calc_crc32ch,
3523 &arm64g_calc_crc32cw, &arm64g_calc_crc32cx };
3524 const HChar* hNames[8]
3525 = { "arm64g_calc_crc32b", "arm64g_calc_crc32h",
3526 "arm64g_calc_crc32w", "arm64g_calc_crc32x",
3527 "arm64g_calc_crc32cb", "arm64g_calc_crc32ch",
3528 "arm64g_calc_crc32cw", "arm64g_calc_crc32cx" };
3529 const HChar* iNames[8]
3530 = { "crc32b", "crc32h", "crc32w", "crc32x",
3531 "crc32cb", "crc32ch", "crc32cw", "crc32cx" };
3532
3533 IRTemp srcN = newTemp(Ity_I64);
3534 assign(srcN, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
3535
3536 IRTemp srcM = newTemp(Ity_I64);
3537 IRExpr* at64 = getIReg64orZR(mm);
3538 switch (sz) {
3539 case BITS2(0,0):
3540 assign(srcM, binop(Iop_And64, at64, mkU64(0xFF))); break;
3541 case BITS2(0,1):
3542 assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFF))); break;
3543 case BITS2(1,0):
3544 assign(srcM, binop(Iop_And64, at64, mkU64(0xFFFFFFFF))); break;
3545 case BITS2(1,1):
3546 assign(srcM, at64); break;
3547 default:
3548 vassert(0);
3549 }
3550
3551 vassert(ix >= 0 && ix <= 7);
3552
3553 putIReg64orZR(
3554 dd,
3555 unop(Iop_32Uto64,
3556 unop(Iop_64to32,
3557 mkIRExprCCall(Ity_I64, 0/*regparm*/,
3558 hNames[ix], helpers[ix],
3559 mkIRExprVec_2(mkexpr(srcN),
3560 mkexpr(srcM))))));
3561
3562 DIP("%s %s, %s, %s\n", iNames[ix],
3563 nameIReg32orZR(dd),
3564 nameIReg32orZR(nn), nameIRegOrZR(bitSF == 1, mm));
3565 return True;
3566 }
3567 /* fall through */
3568 }
3569
sewardjbbcf1882014-01-12 12:49:10 +00003570 vex_printf("ARM64 front end: data_processing_register\n");
3571 return False;
3572# undef INSN
3573}
3574
3575
3576/*------------------------------------------------------------*/
sewardj208a7762014-10-22 13:52:51 +00003577/*--- Math helpers for vector interleave/deinterleave ---*/
3578/*------------------------------------------------------------*/
3579
3580#define EX(_tmp) \
3581 mkexpr(_tmp)
3582#define SL(_hi128,_lo128,_nbytes) \
3583 ( (_nbytes) == 0 \
3584 ? (_lo128) \
3585 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
3586#define ROR(_v128,_nbytes) \
3587 SL((_v128),(_v128),(_nbytes))
3588#define ROL(_v128,_nbytes) \
3589 SL((_v128),(_v128),16-(_nbytes))
3590#define SHR(_v128,_nbytes) \
3591 binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
3592#define SHL(_v128,_nbytes) \
3593 binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
3594#define ILO64x2(_argL,_argR) \
3595 binop(Iop_InterleaveLO64x2,(_argL),(_argR))
3596#define IHI64x2(_argL,_argR) \
3597 binop(Iop_InterleaveHI64x2,(_argL),(_argR))
3598#define ILO32x4(_argL,_argR) \
3599 binop(Iop_InterleaveLO32x4,(_argL),(_argR))
3600#define IHI32x4(_argL,_argR) \
3601 binop(Iop_InterleaveHI32x4,(_argL),(_argR))
3602#define ILO16x8(_argL,_argR) \
3603 binop(Iop_InterleaveLO16x8,(_argL),(_argR))
3604#define IHI16x8(_argL,_argR) \
3605 binop(Iop_InterleaveHI16x8,(_argL),(_argR))
3606#define ILO8x16(_argL,_argR) \
3607 binop(Iop_InterleaveLO8x16,(_argL),(_argR))
3608#define IHI8x16(_argL,_argR) \
3609 binop(Iop_InterleaveHI8x16,(_argL),(_argR))
3610#define CEV32x4(_argL,_argR) \
3611 binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
3612#define COD32x4(_argL,_argR) \
3613 binop(Iop_CatOddLanes32x4,(_argL),(_argR))
3614#define COD16x8(_argL,_argR) \
3615 binop(Iop_CatOddLanes16x8,(_argL),(_argR))
3616#define COD8x16(_argL,_argR) \
3617 binop(Iop_CatOddLanes8x16,(_argL),(_argR))
3618#define CEV8x16(_argL,_argR) \
3619 binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
3620#define AND(_arg1,_arg2) \
3621 binop(Iop_AndV128,(_arg1),(_arg2))
3622#define OR2(_arg1,_arg2) \
3623 binop(Iop_OrV128,(_arg1),(_arg2))
3624#define OR3(_arg1,_arg2,_arg3) \
3625 binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
3626#define OR4(_arg1,_arg2,_arg3,_arg4) \
3627 binop(Iop_OrV128, \
3628 binop(Iop_OrV128,(_arg1),(_arg2)), \
3629 binop(Iop_OrV128,(_arg3),(_arg4)))
3630
3631
3632/* Do interleaving for 1 128 bit vector, for ST1 insns. */
3633static
3634void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0,
3635 UInt laneSzBlg2, IRTemp u0 )
3636{
3637 assign(*i0, mkexpr(u0));
3638}
3639
3640
3641/* Do interleaving for 2 128 bit vectors, for ST2 insns. */
3642static
3643void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
3644 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
3645{
3646 /* This is pretty easy, since we have primitives directly to
3647 hand. */
3648 if (laneSzBlg2 == 3) {
3649 // 64x2
3650 // u1 == B1 B0, u0 == A1 A0
3651 // i1 == B1 A1, i0 == B0 A0
3652 assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0)));
3653 assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0)));
3654 return;
3655 }
3656 if (laneSzBlg2 == 2) {
3657 // 32x4
3658 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3659 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3660 assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0)));
3661 assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0)));
3662 return;
3663 }
3664 if (laneSzBlg2 == 1) {
3665 // 16x8
3666 // u1 == B{7..0}, u0 == A{7..0}
3667 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3668 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3669 assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0)));
3670 assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0)));
3671 return;
3672 }
3673 if (laneSzBlg2 == 0) {
3674 // 8x16
3675 // u1 == B{f..0}, u0 == A{f..0}
3676 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3677 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3678 assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0)));
3679 assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0)));
3680 return;
3681 }
3682 /*NOTREACHED*/
3683 vassert(0);
3684}
3685
3686
3687/* Do interleaving for 3 128 bit vectors, for ST3 insns. */
3688static
3689void math_INTERLEAVE3_128(
3690 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
3691 UInt laneSzBlg2,
3692 IRTemp u0, IRTemp u1, IRTemp u2 )
3693{
3694 if (laneSzBlg2 == 3) {
3695 // 64x2
3696 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3697 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3698 assign(*i2, IHI64x2( EX(u2), EX(u1) ));
3699 assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) ));
3700 assign(*i0, ILO64x2( EX(u1), EX(u0) ));
3701 return;
3702 }
3703
3704 if (laneSzBlg2 == 2) {
3705 // 32x4
3706 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3707 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3708 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3709 IRTemp p0 = newTempV128();
3710 IRTemp p1 = newTempV128();
3711 IRTemp p2 = newTempV128();
3712 IRTemp c1100 = newTempV128();
3713 IRTemp c0011 = newTempV128();
3714 IRTemp c0110 = newTempV128();
3715 assign(c1100, mkV128(0xFF00));
3716 assign(c0011, mkV128(0x00FF));
3717 assign(c0110, mkV128(0x0FF0));
3718 // First interleave them at 64x2 granularity,
3719 // generating partial ("p") values.
3720 math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2);
3721 // And more shuffling around for the final answer
3722 assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ),
3723 AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) ));
3724 assign(*i1, OR3( SHL(EX(p2),12),
3725 AND(EX(p1),EX(c0110)),
3726 SHR(EX(p0),12) ));
3727 assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ),
3728 AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) ));
3729 return;
3730 }
3731
3732 if (laneSzBlg2 == 1) {
3733 // 16x8
3734 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3735 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3736 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3737 //
3738 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3739 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3740 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3741 //
3742 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3743 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3744 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3745 IRTemp p0 = newTempV128();
3746 IRTemp p1 = newTempV128();
3747 IRTemp p2 = newTempV128();
3748 IRTemp c1000 = newTempV128();
3749 IRTemp c0100 = newTempV128();
3750 IRTemp c0010 = newTempV128();
3751 IRTemp c0001 = newTempV128();
3752 assign(c1000, mkV128(0xF000));
3753 assign(c0100, mkV128(0x0F00));
3754 assign(c0010, mkV128(0x00F0));
3755 assign(c0001, mkV128(0x000F));
3756 // First interleave them at 32x4 granularity,
3757 // generating partial ("p") values.
3758 math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2);
3759 // And more shuffling around for the final answer
3760 assign(*i2,
3761 OR4( AND( IHI16x8( EX(p2), ROL(EX(p2),4) ), EX(c1000) ),
3762 AND( IHI16x8( ROL(EX(p2),6), EX(p2) ), EX(c0100) ),
3763 AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ),
3764 AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) )
3765 ));
3766 assign(*i1,
3767 OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ),
3768 AND( IHI16x8( EX(p1), ROL(EX(p1),4) ), EX(c0100) ),
3769 AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ),
3770 AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) )
3771 ));
3772 assign(*i0,
3773 OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ),
3774 AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ),
3775 AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ),
3776 AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) )
3777 ));
3778 return;
3779 }
3780
3781 if (laneSzBlg2 == 0) {
3782 // 8x16. It doesn't seem worth the hassle of first doing a
3783 // 16x8 interleave, so just generate all 24 partial results
3784 // directly :-(
3785 // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
3786 // i2 == Cf Bf Af Ce .. Bb Ab Ca
3787 // i1 == Ba Aa C9 B9 .. A6 C5 B5
3788 // i0 == A5 C4 B4 A4 .. C0 B0 A0
3789
3790 IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128();
3791 IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128();
3792 IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128();
3793 IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128();
3794 IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128();
3795 IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128();
3796 IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128();
3797 IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128();
3798 IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128();
3799
3800 // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
3801 // of the form 14 bytes junk : CC[0xF] : BB[0xA]
3802 //
3803# define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
3804 IRTemp t_##_tempName = newTempV128(); \
3805 assign(t_##_tempName, \
3806 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
3807 ROR(EX(_srcVec2),(_srcShift2)) ) )
3808
3809 // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
3810 IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0;
3811
3812 // The slicing and reassembly are done as interleavedly as possible,
3813 // so as to minimise the demand for registers in the back end, which
3814 // was observed to be a problem in testing.
3815
3816 XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14]
3817 XXXX(AfCe, AA, 0xf, CC, 0xe);
3818 assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe)));
3819
3820 XXXX(BeAe, BB, 0xe, AA, 0xe);
3821 XXXX(CdBd, CC, 0xd, BB, 0xd);
3822 assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd)));
3823 assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98)));
3824
3825 XXXX(AdCc, AA, 0xd, CC, 0xc);
3826 XXXX(BcAc, BB, 0xc, AA, 0xc);
3827 assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc)));
3828
3829 XXXX(CbBb, CC, 0xb, BB, 0xb);
3830 XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0]
3831 assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa)));
3832 assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210)));
3833 assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64)));
3834
3835 XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14]
3836 XXXX(C9B9, CC, 0x9, BB, 0x9);
3837 assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9)));
3838
3839 XXXX(A9C8, AA, 0x9, CC, 0x8);
3840 XXXX(B8A8, BB, 0x8, AA, 0x8);
3841 assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8)));
3842 assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98)));
3843
3844 XXXX(C7B7, CC, 0x7, BB, 0x7);
3845 XXXX(A7C6, AA, 0x7, CC, 0x6);
3846 assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6)));
3847
3848 XXXX(B6A6, BB, 0x6, AA, 0x6);
3849 XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0]
3850 assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5)));
3851 assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210)));
3852 assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64)));
3853
3854 XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14]
3855 XXXX(B4A4, BB, 0x4, AA, 0x4);
3856 assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4)));
3857
3858 XXXX(C3B3, CC, 0x3, BB, 0x3);
3859 XXXX(A3C2, AA, 0x3, CC, 0x2);
3860 assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2)));
3861 assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98)));
3862
3863 XXXX(B2A2, BB, 0x2, AA, 0x2);
3864 XXXX(C1B1, CC, 0x1, BB, 0x1);
3865 assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1)));
3866
3867 XXXX(A1C0, AA, 0x1, CC, 0x0);
3868 XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0]
3869 assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0)));
3870 assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210)));
3871 assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64)));
3872
3873# undef XXXX
3874 return;
3875 }
3876
3877 /*NOTREACHED*/
3878 vassert(0);
3879}
3880
3881
3882/* Do interleaving for 4 128 bit vectors, for ST4 insns. */
3883static
3884void math_INTERLEAVE4_128(
3885 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
3886 UInt laneSzBlg2,
3887 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
3888{
3889 if (laneSzBlg2 == 3) {
3890 // 64x2
3891 assign(*i0, ILO64x2(EX(u1), EX(u0)));
3892 assign(*i1, ILO64x2(EX(u3), EX(u2)));
3893 assign(*i2, IHI64x2(EX(u1), EX(u0)));
3894 assign(*i3, IHI64x2(EX(u3), EX(u2)));
3895 return;
3896 }
3897 if (laneSzBlg2 == 2) {
3898 // 32x4
3899 // First, interleave at the 64-bit lane size.
3900 IRTemp p0 = newTempV128();
3901 IRTemp p1 = newTempV128();
3902 IRTemp p2 = newTempV128();
3903 IRTemp p3 = newTempV128();
3904 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3);
3905 // And interleave (cat) at the 32 bit size.
3906 assign(*i0, CEV32x4(EX(p1), EX(p0)));
3907 assign(*i1, COD32x4(EX(p1), EX(p0)));
3908 assign(*i2, CEV32x4(EX(p3), EX(p2)));
3909 assign(*i3, COD32x4(EX(p3), EX(p2)));
3910 return;
3911 }
3912 if (laneSzBlg2 == 1) {
3913 // 16x8
3914 // First, interleave at the 32-bit lane size.
3915 IRTemp p0 = newTempV128();
3916 IRTemp p1 = newTempV128();
3917 IRTemp p2 = newTempV128();
3918 IRTemp p3 = newTempV128();
3919 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3);
3920 // And rearrange within each vector, to get the right 16 bit lanes.
3921 assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2)));
3922 assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2)));
3923 assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2)));
3924 assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2)));
3925 return;
3926 }
3927 if (laneSzBlg2 == 0) {
3928 // 8x16
3929 // First, interleave at the 16-bit lane size.
3930 IRTemp p0 = newTempV128();
3931 IRTemp p1 = newTempV128();
3932 IRTemp p2 = newTempV128();
3933 IRTemp p3 = newTempV128();
3934 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3);
3935 // And rearrange within each vector, to get the right 8 bit lanes.
3936 assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0))));
3937 assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1))));
3938 assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2))));
3939 assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3))));
3940 return;
3941 }
3942 /*NOTREACHED*/
3943 vassert(0);
3944}
3945
3946
3947/* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
3948static
3949void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0,
3950 UInt laneSzBlg2, IRTemp i0 )
3951{
3952 assign(*u0, mkexpr(i0));
3953}
3954
3955
3956/* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
3957static
3958void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
3959 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
3960{
3961 /* This is pretty easy, since we have primitives directly to
3962 hand. */
3963 if (laneSzBlg2 == 3) {
3964 // 64x2
3965 // i1 == B1 A1, i0 == B0 A0
3966 // u1 == B1 B0, u0 == A1 A0
3967 assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0)));
3968 assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0)));
3969 return;
3970 }
3971 if (laneSzBlg2 == 2) {
3972 // 32x4
3973 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3974 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3975 assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0)));
3976 assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0)));
3977 return;
3978 }
3979 if (laneSzBlg2 == 1) {
3980 // 16x8
3981 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3982 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3983 // u1 == B{7..0}, u0 == A{7..0}
3984 assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0)));
3985 assign(*u1, binop(Iop_CatOddLanes16x8, mkexpr(i1), mkexpr(i0)));
3986 return;
3987 }
3988 if (laneSzBlg2 == 0) {
3989 // 8x16
3990 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3991 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3992 // u1 == B{f..0}, u0 == A{f..0}
3993 assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0)));
3994 assign(*u1, binop(Iop_CatOddLanes8x16, mkexpr(i1), mkexpr(i0)));
3995 return;
3996 }
3997 /*NOTREACHED*/
3998 vassert(0);
3999}
4000
4001
4002/* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
4003static
4004void math_DEINTERLEAVE3_128(
4005 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4006 UInt laneSzBlg2,
4007 IRTemp i0, IRTemp i1, IRTemp i2 )
4008{
4009 if (laneSzBlg2 == 3) {
4010 // 64x2
4011 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
4012 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
4013 assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1) ));
4014 assign(*u1, ILO64x2( EX(i2), ROL(EX(i0),8) ));
4015 assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0) ));
4016 return;
4017 }
4018
4019 if (laneSzBlg2 == 2) {
4020 // 32x4
4021 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
4022 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
4023 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
4024 IRTemp t_a1c0b0a0 = newTempV128();
4025 IRTemp t_a2c1b1a1 = newTempV128();
4026 IRTemp t_a3c2b2a2 = newTempV128();
4027 IRTemp t_a0c3b3a3 = newTempV128();
4028 IRTemp p0 = newTempV128();
4029 IRTemp p1 = newTempV128();
4030 IRTemp p2 = newTempV128();
4031 // Compute some intermediate values.
4032 assign(t_a1c0b0a0, EX(i0));
4033 assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4));
4034 assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4));
4035 assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4));
4036 // First deinterleave into lane-pairs
4037 assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0)));
4038 assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)),
4039 IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0))));
4040 assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4)));
4041 // Then deinterleave at 64x2 granularity.
4042 math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2);
4043 return;
4044 }
4045
4046 if (laneSzBlg2 == 1) {
4047 // 16x8
4048 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
4049 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
4050 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
4051 //
4052 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
4053 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
4054 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
4055 //
4056 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
4057 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
4058 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
4059
4060 IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111;
4061 s0 = s1 = s2 = s3
4062 = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID;
4063 newTempsV128_4(&s0, &s1, &s2, &s3);
4064 newTempsV128_4(&t0, &t1, &t2, &t3);
4065 newTempsV128_4(&p0, &p1, &p2, &c00111111);
4066
4067 // s0 == b2a2 c1b1a1 c0b0a0
4068 // s1 == b4a4 c3b3c3 c2b2a2
4069 // s2 == b6a6 c5b5a5 c4b4a4
4070 // s3 == b0a0 c7b7a7 c6b6a6
4071 assign(s0, EX(i0));
4072 assign(s1, SL(EX(i1),EX(i0),6*2));
4073 assign(s2, SL(EX(i2),EX(i1),4*2));
4074 assign(s3, SL(EX(i0),EX(i2),2*2));
4075
4076 // t0 == 0 0 c1c0 b1b0 a1a0
4077 // t1 == 0 0 c3c2 b3b2 a3a2
4078 // t2 == 0 0 c5c4 b5b4 a5a4
4079 // t3 == 0 0 c7c6 b7b6 a7a6
4080 assign(c00111111, mkV128(0x0FFF));
4081 assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111)));
4082 assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111)));
4083 assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111)));
4084 assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111)));
4085
4086 assign(p0, OR2(EX(t0), SHL(EX(t1),6*2)));
4087 assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2)));
4088 assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2)));
4089
4090 // Then deinterleave at 32x4 granularity.
4091 math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2);
4092 return;
4093 }
4094
4095 if (laneSzBlg2 == 0) {
4096 // 8x16. This is the same scheme as for 16x8, with twice the
4097 // number of intermediate values.
4098 //
4099 // u2 == C{f..0}
4100 // u1 == B{f..0}
4101 // u0 == A{f..0}
4102 //
4103 // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
4104 // i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
4105 // i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4106 //
4107 // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
4108 // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
4109 // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
4110 //
4111 IRTemp s0, s1, s2, s3, s4, s5, s6, s7,
4112 t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK;
4113 s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7
4114 = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK
4115 = IRTemp_INVALID;
4116 newTempsV128_4(&s0, &s1, &s2, &s3);
4117 newTempsV128_4(&s4, &s5, &s6, &s7);
4118 newTempsV128_4(&t0, &t1, &t2, &t3);
4119 newTempsV128_4(&t4, &t5, &t6, &t7);
4120 newTempsV128_4(&p0, &p1, &p2, &cMASK);
4121
4122 // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4123 // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
4124 // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
4125 // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
4126 // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
4127 // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
4128 // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
4129 // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
4130 assign(s0, SL(EX(i1),EX(i0), 0));
4131 assign(s1, SL(EX(i1),EX(i0), 6));
4132 assign(s2, SL(EX(i1),EX(i0),12));
4133 assign(s3, SL(EX(i2),EX(i1), 2));
4134 assign(s4, SL(EX(i2),EX(i1), 8));
4135 assign(s5, SL(EX(i2),EX(i1),14));
4136 assign(s6, SL(EX(i0),EX(i2), 4));
4137 assign(s7, SL(EX(i0),EX(i2),10));
4138
4139 // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
4140 // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
4141 // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
4142 // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
4143 // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
4144 // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
4145 // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
4146 // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
4147 assign(cMASK, mkV128(0x003F));
4148 assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK)));
4149 assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK)));
4150 assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK)));
4151 assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK)));
4152 assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK)));
4153 assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK)));
4154 assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK)));
4155 assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK)));
4156
4157 assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) ));
4158 assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8),
4159 SHL(EX(t3),2), SHR(EX(t2),4) ));
4160 assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) ));
4161
4162 // Then deinterleave at 16x8 granularity.
4163 math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2);
4164 return;
4165 }
4166
4167 /*NOTREACHED*/
4168 vassert(0);
4169}
4170
4171
4172/* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
4173static
4174void math_DEINTERLEAVE4_128(
4175 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4176 UInt laneSzBlg2,
4177 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4178{
4179 if (laneSzBlg2 == 3) {
4180 // 64x2
4181 assign(*u0, ILO64x2(EX(i2), EX(i0)));
4182 assign(*u1, IHI64x2(EX(i2), EX(i0)));
4183 assign(*u2, ILO64x2(EX(i3), EX(i1)));
4184 assign(*u3, IHI64x2(EX(i3), EX(i1)));
4185 return;
4186 }
4187 if (laneSzBlg2 == 2) {
4188 // 32x4
4189 IRTemp p0 = newTempV128();
4190 IRTemp p2 = newTempV128();
4191 IRTemp p1 = newTempV128();
4192 IRTemp p3 = newTempV128();
4193 assign(p0, ILO32x4(EX(i1), EX(i0)));
4194 assign(p1, IHI32x4(EX(i1), EX(i0)));
4195 assign(p2, ILO32x4(EX(i3), EX(i2)));
4196 assign(p3, IHI32x4(EX(i3), EX(i2)));
4197 // And now do what we did for the 64-bit case.
4198 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3);
4199 return;
4200 }
4201 if (laneSzBlg2 == 1) {
4202 // 16x8
4203 // Deinterleave into 32-bit chunks, then do as the 32-bit case.
4204 IRTemp p0 = newTempV128();
4205 IRTemp p1 = newTempV128();
4206 IRTemp p2 = newTempV128();
4207 IRTemp p3 = newTempV128();
4208 assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8)));
4209 assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8)));
4210 assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8)));
4211 assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8)));
4212 // From here on is like the 32 bit case.
4213 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3);
4214 return;
4215 }
4216 if (laneSzBlg2 == 0) {
4217 // 8x16
4218 // Deinterleave into 16-bit chunks, then do as the 16-bit case.
4219 IRTemp p0 = newTempV128();
4220 IRTemp p1 = newTempV128();
4221 IRTemp p2 = newTempV128();
4222 IRTemp p3 = newTempV128();
4223 assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)),
4224 ILO8x16(EX(i0),ROL(EX(i0),4)) ));
4225 assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)),
4226 ILO8x16(EX(i1),ROL(EX(i1),4)) ));
4227 assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)),
4228 ILO8x16(EX(i2),ROL(EX(i2),4)) ));
4229 assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)),
4230 ILO8x16(EX(i3),ROL(EX(i3),4)) ));
4231 // From here on is like the 16 bit case.
4232 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3);
4233 return;
4234 }
4235 /*NOTREACHED*/
4236 vassert(0);
4237}
4238
4239
4240/* Wrappers that use the full-width (de)interleavers to do half-width
4241 (de)interleaving. The scheme is to clone each input lane in the
4242 lower half of each incoming value, do a full width (de)interleave
4243 at the next lane size up, and remove every other lane of the the
4244 result. The returned values may have any old junk in the upper
4245 64 bits -- the caller must ignore that. */
4246
4247/* Helper function -- get doubling and narrowing operations. */
4248static
4249void math_get_doubler_and_halver ( /*OUT*/IROp* doubler,
4250 /*OUT*/IROp* halver,
4251 UInt laneSzBlg2 )
4252{
4253 switch (laneSzBlg2) {
4254 case 2:
4255 *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4;
4256 break;
4257 case 1:
4258 *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8;
4259 break;
4260 case 0:
4261 *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16;
4262 break;
4263 default:
4264 vassert(0);
4265 }
4266}
4267
4268/* Do interleaving for 1 64 bit vector, for ST1 insns. */
4269static
4270void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0,
4271 UInt laneSzBlg2, IRTemp u0 )
4272{
4273 assign(*i0, mkexpr(u0));
4274}
4275
4276
4277/* Do interleaving for 2 64 bit vectors, for ST2 insns. */
4278static
4279void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
4280 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
4281{
4282 if (laneSzBlg2 == 3) {
4283 // 1x64, degenerate case
4284 assign(*i0, EX(u0));
4285 assign(*i1, EX(u1));
4286 return;
4287 }
4288
4289 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4290 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4291 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4292
4293 IRTemp du0 = newTempV128();
4294 IRTemp du1 = newTempV128();
4295 assign(du0, binop(doubler, EX(u0), EX(u0)));
4296 assign(du1, binop(doubler, EX(u1), EX(u1)));
4297 IRTemp di0 = newTempV128();
4298 IRTemp di1 = newTempV128();
4299 math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1);
4300 assign(*i0, binop(halver, EX(di0), EX(di0)));
4301 assign(*i1, binop(halver, EX(di1), EX(di1)));
4302}
4303
4304
4305/* Do interleaving for 3 64 bit vectors, for ST3 insns. */
4306static
4307void math_INTERLEAVE3_64(
4308 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
4309 UInt laneSzBlg2,
4310 IRTemp u0, IRTemp u1, IRTemp u2 )
4311{
4312 if (laneSzBlg2 == 3) {
4313 // 1x64, degenerate case
4314 assign(*i0, EX(u0));
4315 assign(*i1, EX(u1));
4316 assign(*i2, EX(u2));
4317 return;
4318 }
4319
4320 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4321 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4322 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4323
4324 IRTemp du0 = newTempV128();
4325 IRTemp du1 = newTempV128();
4326 IRTemp du2 = newTempV128();
4327 assign(du0, binop(doubler, EX(u0), EX(u0)));
4328 assign(du1, binop(doubler, EX(u1), EX(u1)));
4329 assign(du2, binop(doubler, EX(u2), EX(u2)));
4330 IRTemp di0 = newTempV128();
4331 IRTemp di1 = newTempV128();
4332 IRTemp di2 = newTempV128();
4333 math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2);
4334 assign(*i0, binop(halver, EX(di0), EX(di0)));
4335 assign(*i1, binop(halver, EX(di1), EX(di1)));
4336 assign(*i2, binop(halver, EX(di2), EX(di2)));
4337}
4338
4339
4340/* Do interleaving for 4 64 bit vectors, for ST4 insns. */
4341static
4342void math_INTERLEAVE4_64(
4343 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
4344 UInt laneSzBlg2,
4345 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
4346{
4347 if (laneSzBlg2 == 3) {
4348 // 1x64, degenerate case
4349 assign(*i0, EX(u0));
4350 assign(*i1, EX(u1));
4351 assign(*i2, EX(u2));
4352 assign(*i3, EX(u3));
4353 return;
4354 }
4355
4356 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4357 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4358 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4359
4360 IRTemp du0 = newTempV128();
4361 IRTemp du1 = newTempV128();
4362 IRTemp du2 = newTempV128();
4363 IRTemp du3 = newTempV128();
4364 assign(du0, binop(doubler, EX(u0), EX(u0)));
4365 assign(du1, binop(doubler, EX(u1), EX(u1)));
4366 assign(du2, binop(doubler, EX(u2), EX(u2)));
4367 assign(du3, binop(doubler, EX(u3), EX(u3)));
4368 IRTemp di0 = newTempV128();
4369 IRTemp di1 = newTempV128();
4370 IRTemp di2 = newTempV128();
4371 IRTemp di3 = newTempV128();
4372 math_INTERLEAVE4_128(&di0, &di1, &di2, &di3,
4373 laneSzBlg2 + 1, du0, du1, du2, du3);
4374 assign(*i0, binop(halver, EX(di0), EX(di0)));
4375 assign(*i1, binop(halver, EX(di1), EX(di1)));
4376 assign(*i2, binop(halver, EX(di2), EX(di2)));
4377 assign(*i3, binop(halver, EX(di3), EX(di3)));
4378}
4379
4380
4381/* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
4382static
4383void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0,
4384 UInt laneSzBlg2, IRTemp i0 )
4385{
4386 assign(*u0, mkexpr(i0));
4387}
4388
4389
4390/* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
4391static
4392void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
4393 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
4394{
4395 if (laneSzBlg2 == 3) {
4396 // 1x64, degenerate case
4397 assign(*u0, EX(i0));
4398 assign(*u1, EX(i1));
4399 return;
4400 }
4401
4402 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4403 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4404 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4405
4406 IRTemp di0 = newTempV128();
4407 IRTemp di1 = newTempV128();
4408 assign(di0, binop(doubler, EX(i0), EX(i0)));
4409 assign(di1, binop(doubler, EX(i1), EX(i1)));
4410
4411 IRTemp du0 = newTempV128();
4412 IRTemp du1 = newTempV128();
4413 math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1);
4414 assign(*u0, binop(halver, EX(du0), EX(du0)));
4415 assign(*u1, binop(halver, EX(du1), EX(du1)));
4416}
4417
4418
4419/* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
4420static
4421void math_DEINTERLEAVE3_64(
4422 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4423 UInt laneSzBlg2,
4424 IRTemp i0, IRTemp i1, IRTemp i2 )
4425{
4426 if (laneSzBlg2 == 3) {
4427 // 1x64, degenerate case
4428 assign(*u0, EX(i0));
4429 assign(*u1, EX(i1));
4430 assign(*u2, EX(i2));
4431 return;
4432 }
4433
4434 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4435 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4436 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4437
4438 IRTemp di0 = newTempV128();
4439 IRTemp di1 = newTempV128();
4440 IRTemp di2 = newTempV128();
4441 assign(di0, binop(doubler, EX(i0), EX(i0)));
4442 assign(di1, binop(doubler, EX(i1), EX(i1)));
4443 assign(di2, binop(doubler, EX(i2), EX(i2)));
4444 IRTemp du0 = newTempV128();
4445 IRTemp du1 = newTempV128();
4446 IRTemp du2 = newTempV128();
4447 math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2);
4448 assign(*u0, binop(halver, EX(du0), EX(du0)));
4449 assign(*u1, binop(halver, EX(du1), EX(du1)));
4450 assign(*u2, binop(halver, EX(du2), EX(du2)));
4451}
4452
4453
4454/* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
4455static
4456void math_DEINTERLEAVE4_64(
4457 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4458 UInt laneSzBlg2,
4459 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4460{
4461 if (laneSzBlg2 == 3) {
4462 // 1x64, degenerate case
4463 assign(*u0, EX(i0));
4464 assign(*u1, EX(i1));
4465 assign(*u2, EX(i2));
4466 assign(*u3, EX(i3));
4467 return;
4468 }
4469
4470 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4471 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4472 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4473
4474 IRTemp di0 = newTempV128();
4475 IRTemp di1 = newTempV128();
4476 IRTemp di2 = newTempV128();
4477 IRTemp di3 = newTempV128();
4478 assign(di0, binop(doubler, EX(i0), EX(i0)));
4479 assign(di1, binop(doubler, EX(i1), EX(i1)));
4480 assign(di2, binop(doubler, EX(i2), EX(i2)));
4481 assign(di3, binop(doubler, EX(i3), EX(i3)));
4482 IRTemp du0 = newTempV128();
4483 IRTemp du1 = newTempV128();
4484 IRTemp du2 = newTempV128();
4485 IRTemp du3 = newTempV128();
4486 math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3,
4487 laneSzBlg2 + 1, di0, di1, di2, di3);
4488 assign(*u0, binop(halver, EX(du0), EX(du0)));
4489 assign(*u1, binop(halver, EX(du1), EX(du1)));
4490 assign(*u2, binop(halver, EX(du2), EX(du2)));
4491 assign(*u3, binop(halver, EX(du3), EX(du3)));
4492}
4493
4494
4495#undef EX
4496#undef SL
4497#undef ROR
4498#undef ROL
4499#undef SHR
4500#undef SHL
4501#undef ILO64x2
4502#undef IHI64x2
4503#undef ILO32x4
4504#undef IHI32x4
4505#undef ILO16x8
4506#undef IHI16x8
4507#undef ILO16x8
4508#undef IHI16x8
4509#undef CEV32x4
4510#undef COD32x4
4511#undef COD16x8
4512#undef COD8x16
4513#undef CEV8x16
4514#undef AND
4515#undef OR2
4516#undef OR3
4517#undef OR4
4518
4519
4520/*------------------------------------------------------------*/
sewardjbbcf1882014-01-12 12:49:10 +00004521/*--- Load and Store instructions ---*/
4522/*------------------------------------------------------------*/
4523
4524/* Generate the EA for a "reg + reg" style amode. This is done from
4525 parts of the insn, but for sanity checking sake it takes the whole
4526 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
4527 and S=insn[12]:
4528
4529 The possible forms, along with their opt:S values, are:
4530 011:0 Xn|SP + Xm
4531 111:0 Xn|SP + Xm
4532 011:1 Xn|SP + Xm * transfer_szB
4533 111:1 Xn|SP + Xm * transfer_szB
4534 010:0 Xn|SP + 32Uto64(Wm)
4535 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
4536 110:0 Xn|SP + 32Sto64(Wm)
4537 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
4538
4539 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
4540 the transfer size is insn[23,31,30]. For integer loads/stores,
4541 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
4542
4543 If the decoding fails, it returns IRTemp_INVALID.
4544
4545 isInt is True iff this is decoding is for transfers to/from integer
4546 registers. If False it is for transfers to/from vector registers.
4547*/
4548static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
4549{
4550 UInt optS = SLICE_UInt(insn, 15, 12);
4551 UInt mm = SLICE_UInt(insn, 20, 16);
4552 UInt nn = SLICE_UInt(insn, 9, 5);
4553 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
4554 | SLICE_UInt(insn, 31, 30); // Log2 of the size
4555
4556 buf[0] = 0;
4557
4558 /* Sanity checks, that this really is a load/store insn. */
4559 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
4560 goto fail;
4561
4562 if (isInt
4563 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
4564 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
4565 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
4566 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
4567 goto fail;
4568
4569 if (!isInt
4570 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
4571 goto fail;
4572
4573 /* Throw out non-verified but possibly valid cases. */
4574 switch (szLg2) {
4575 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
4576 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
4577 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
4578 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
4579 case BITS3(1,0,0): // can only ever be valid for the vector case
sewardj208a7762014-10-22 13:52:51 +00004580 if (isInt) goto fail; else break;
sewardjbbcf1882014-01-12 12:49:10 +00004581 case BITS3(1,0,1): // these sizes are never valid
4582 case BITS3(1,1,0):
4583 case BITS3(1,1,1): goto fail;
4584
4585 default: vassert(0);
4586 }
4587
4588 IRExpr* rhs = NULL;
4589 switch (optS) {
4590 case BITS4(1,1,1,0): goto fail; //ATC
4591 case BITS4(0,1,1,0):
4592 rhs = getIReg64orZR(mm);
4593 vex_sprintf(buf, "[%s, %s]",
4594 nameIReg64orZR(nn), nameIReg64orZR(mm));
4595 break;
4596 case BITS4(1,1,1,1): goto fail; //ATC
4597 case BITS4(0,1,1,1):
4598 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
4599 vex_sprintf(buf, "[%s, %s lsl %u]",
4600 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
4601 break;
4602 case BITS4(0,1,0,0):
4603 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
4604 vex_sprintf(buf, "[%s, %s uxtx]",
4605 nameIReg64orZR(nn), nameIReg32orZR(mm));
4606 break;
4607 case BITS4(0,1,0,1):
4608 rhs = binop(Iop_Shl64,
4609 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
4610 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
4611 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4612 break;
4613 case BITS4(1,1,0,0):
4614 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
4615 vex_sprintf(buf, "[%s, %s sxtx]",
4616 nameIReg64orZR(nn), nameIReg32orZR(mm));
4617 break;
4618 case BITS4(1,1,0,1):
4619 rhs = binop(Iop_Shl64,
4620 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
4621 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
4622 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4623 break;
4624 default:
4625 /* The rest appear to be genuinely invalid */
4626 goto fail;
4627 }
4628
4629 vassert(rhs);
4630 IRTemp res = newTemp(Ity_I64);
4631 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
4632 return res;
4633
4634 fail:
4635 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
4636 return IRTemp_INVALID;
4637}
4638
4639
4640/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
4641 bits of DATAE :: Ity_I64. */
4642static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
4643{
4644 IRExpr* addrE = mkexpr(addr);
4645 switch (szB) {
4646 case 8:
4647 storeLE(addrE, dataE);
4648 break;
4649 case 4:
4650 storeLE(addrE, unop(Iop_64to32, dataE));
4651 break;
4652 case 2:
4653 storeLE(addrE, unop(Iop_64to16, dataE));
4654 break;
4655 case 1:
4656 storeLE(addrE, unop(Iop_64to8, dataE));
4657 break;
4658 default:
4659 vassert(0);
4660 }
4661}
4662
4663
4664/* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
4665 placing the result in an Ity_I64 temporary. */
4666static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
4667{
4668 IRTemp res = newTemp(Ity_I64);
4669 IRExpr* addrE = mkexpr(addr);
4670 switch (szB) {
4671 case 8:
4672 assign(res, loadLE(Ity_I64,addrE));
4673 break;
4674 case 4:
4675 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
4676 break;
4677 case 2:
4678 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
4679 break;
4680 case 1:
4681 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
4682 break;
4683 default:
4684 vassert(0);
4685 }
4686 return res;
4687}
4688
4689
sewardj18bf5172014-06-14 18:05:30 +00004690/* Generate a "standard 7" name, from bitQ and size. But also
4691 allow ".1d" since that's occasionally useful. */
4692static
4693const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
4694{
4695 vassert(bitQ <= 1 && size <= 3);
4696 const HChar* nms[8]
sewardj25523c42014-06-15 19:36:29 +00004697 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
sewardj18bf5172014-06-14 18:05:30 +00004698 UInt ix = (bitQ << 2) | size;
4699 vassert(ix < 8);
4700 return nms[ix];
4701}
4702
4703
sewardjbbcf1882014-01-12 12:49:10 +00004704static
4705Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
4706{
4707# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4708
4709 /* ------------ LDR,STR (immediate, uimm12) ----------- */
4710 /* uimm12 is scaled by the transfer size
4711
4712 31 29 26 21 9 4
4713 | | | | | |
4714 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
4715 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
4716
4717 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
4718 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
4719
4720 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
4721 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
4722
4723 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
4724 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
4725 */
4726 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
4727 UInt szLg2 = INSN(31,30);
4728 UInt szB = 1 << szLg2;
4729 Bool isLD = INSN(22,22) == 1;
4730 UInt offs = INSN(21,10) * szB;
4731 UInt nn = INSN(9,5);
4732 UInt tt = INSN(4,0);
4733 IRTemp ta = newTemp(Ity_I64);
4734 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
4735 if (nn == 31) { /* FIXME generate stack alignment check */ }
4736 vassert(szLg2 < 4);
4737 if (isLD) {
4738 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
4739 } else {
4740 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
4741 }
4742 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
4743 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
4744 DIP("%s %s, [%s, #%u]\n",
4745 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
4746 nameIReg64orSP(nn), offs);
4747 return True;
4748 }
4749
4750 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
4751 /*
4752 31 29 26 20 11 9 4
4753 | | | | | | |
4754 (at-Rn-then-Rn=EA) | | |
4755 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
4756 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
4757
4758 (at-EA-then-Rn=EA)
4759 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
4760 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
4761
4762 (at-EA)
4763 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
4764 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
4765
4766 simm9 is unscaled.
4767
4768 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
4769 load case this is because would create two competing values for
4770 Rt. In the store case the reason is unclear, but the spec
4771 disallows it anyway.
4772
4773 Stores are narrowing, loads are unsigned widening. sz encodes
4774 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
4775 */
4776 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
4777 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
4778 UInt szLg2 = INSN(31,30);
4779 UInt szB = 1 << szLg2;
4780 Bool isLoad = INSN(22,22) == 1;
4781 UInt imm9 = INSN(20,12);
4782 UInt nn = INSN(9,5);
4783 UInt tt = INSN(4,0);
4784 Bool wBack = INSN(10,10) == 1;
4785 UInt how = INSN(11,10);
4786 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
4787 /* undecodable; fall through */
4788 } else {
4789 if (nn == 31) { /* FIXME generate stack alignment check */ }
4790
4791 // Compute the transfer address TA and the writeback address WA.
4792 IRTemp tRN = newTemp(Ity_I64);
4793 assign(tRN, getIReg64orSP(nn));
4794 IRTemp tEA = newTemp(Ity_I64);
4795 Long simm9 = (Long)sx_to_64(imm9, 9);
4796 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4797
4798 IRTemp tTA = newTemp(Ity_I64);
4799 IRTemp tWA = newTemp(Ity_I64);
4800 switch (how) {
4801 case BITS2(0,1):
4802 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4803 case BITS2(1,1):
4804 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4805 case BITS2(0,0):
4806 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4807 default:
4808 vassert(0); /* NOTREACHED */
4809 }
4810
sewardje0bff8b2014-03-09 09:40:23 +00004811 /* Normally rN would be updated after the transfer. However, in
4812 the special case typifed by
4813 str x30, [sp,#-16]!
4814 it is necessary to update SP before the transfer, (1)
4815 because Memcheck will otherwise complain about a write
4816 below the stack pointer, and (2) because the segfault
4817 stack extension mechanism will otherwise extend the stack
4818 only down to SP before the instruction, which might not be
4819 far enough, if the -16 bit takes the actual access
4820 address to the next page.
4821 */
4822 Bool earlyWBack
4823 = wBack && simm9 < 0 && szB == 8
4824 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
4825
4826 if (wBack && earlyWBack)
4827 putIReg64orSP(nn, mkexpr(tEA));
4828
sewardjbbcf1882014-01-12 12:49:10 +00004829 if (isLoad) {
4830 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
4831 } else {
4832 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
4833 }
4834
sewardje0bff8b2014-03-09 09:40:23 +00004835 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00004836 putIReg64orSP(nn, mkexpr(tEA));
4837
4838 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
4839 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
4840 const HChar* fmt_str = NULL;
4841 switch (how) {
4842 case BITS2(0,1):
4843 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4844 break;
4845 case BITS2(1,1):
4846 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4847 break;
4848 case BITS2(0,0):
4849 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
4850 break;
4851 default:
4852 vassert(0);
4853 }
4854 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
4855 nameIRegOrZR(szB == 8, tt),
4856 nameIReg64orSP(nn), simm9);
4857 return True;
4858 }
4859 }
4860
4861 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
4862 /* L==1 => mm==LD
4863 L==0 => mm==ST
4864 x==0 => 32 bit transfers, and zero extended loads
4865 x==1 => 64 bit transfers
4866 simm7 is scaled by the (single-register) transfer size
4867
4868 (at-Rn-then-Rn=EA)
4869 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
4870
4871 (at-EA-then-Rn=EA)
4872 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
4873
4874 (at-EA)
4875 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
4876 */
sewardjbbcf1882014-01-12 12:49:10 +00004877 UInt insn_30_23 = INSN(30,23);
4878 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
4879 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
4880 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
4881 UInt bL = INSN(22,22);
4882 UInt bX = INSN(31,31);
4883 UInt bWBack = INSN(23,23);
4884 UInt rT1 = INSN(4,0);
4885 UInt rN = INSN(9,5);
4886 UInt rT2 = INSN(14,10);
4887 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
4888 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
4889 || (bL && rT1 == rT2)) {
4890 /* undecodable; fall through */
4891 } else {
4892 if (rN == 31) { /* FIXME generate stack alignment check */ }
4893
4894 // Compute the transfer address TA and the writeback address WA.
4895 IRTemp tRN = newTemp(Ity_I64);
4896 assign(tRN, getIReg64orSP(rN));
4897 IRTemp tEA = newTemp(Ity_I64);
4898 simm7 = (bX ? 8 : 4) * simm7;
4899 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
4900
4901 IRTemp tTA = newTemp(Ity_I64);
4902 IRTemp tWA = newTemp(Ity_I64);
4903 switch (INSN(24,23)) {
4904 case BITS2(0,1):
4905 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4906 case BITS2(1,1):
4907 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4908 case BITS2(1,0):
4909 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4910 default:
4911 vassert(0); /* NOTREACHED */
4912 }
4913
4914 /* Normally rN would be updated after the transfer. However, in
4915 the special case typifed by
4916 stp x29, x30, [sp,#-112]!
4917 it is necessary to update SP before the transfer, (1)
4918 because Memcheck will otherwise complain about a write
4919 below the stack pointer, and (2) because the segfault
4920 stack extension mechanism will otherwise extend the stack
4921 only down to SP before the instruction, which might not be
4922 far enough, if the -112 bit takes the actual access
4923 address to the next page.
4924 */
4925 Bool earlyWBack
4926 = bWBack && simm7 < 0
4927 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
4928
4929 if (bWBack && earlyWBack)
4930 putIReg64orSP(rN, mkexpr(tEA));
4931
4932 /**/ if (bL == 1 && bX == 1) {
4933 // 64 bit load
4934 putIReg64orZR(rT1, loadLE(Ity_I64,
4935 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4936 putIReg64orZR(rT2, loadLE(Ity_I64,
4937 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
4938 } else if (bL == 1 && bX == 0) {
sewardjbbcf1882014-01-12 12:49:10 +00004939 // 32 bit load
4940 putIReg32orZR(rT1, loadLE(Ity_I32,
4941 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4942 putIReg32orZR(rT2, loadLE(Ity_I32,
4943 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
4944 } else if (bL == 0 && bX == 1) {
4945 // 64 bit store
4946 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4947 getIReg64orZR(rT1));
4948 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
4949 getIReg64orZR(rT2));
4950 } else {
4951 vassert(bL == 0 && bX == 0);
sewardjbbcf1882014-01-12 12:49:10 +00004952 // 32 bit store
4953 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4954 getIReg32orZR(rT1));
4955 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
4956 getIReg32orZR(rT2));
4957 }
4958
4959 if (bWBack && !earlyWBack)
4960 putIReg64orSP(rN, mkexpr(tEA));
4961
4962 const HChar* fmt_str = NULL;
4963 switch (INSN(24,23)) {
4964 case BITS2(0,1):
4965 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4966 break;
4967 case BITS2(1,1):
4968 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4969 break;
4970 case BITS2(1,0):
4971 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
4972 break;
4973 default:
4974 vassert(0);
4975 }
4976 DIP(fmt_str, bL == 0 ? "st" : "ld",
4977 nameIRegOrZR(bX == 1, rT1),
4978 nameIRegOrZR(bX == 1, rT2),
4979 nameIReg64orSP(rN), simm7);
4980 return True;
4981 }
4982 }
4983
Elliott Hughesa0664b92017-04-18 17:46:52 -07004984 /* -------- LDPSW (immediate, simm7) (INT REGS) -------- */
4985 /* Does 32 bit transfers which are sign extended to 64 bits.
4986 simm7 is scaled by the (single-register) transfer size
4987
4988 (at-Rn-then-Rn=EA)
4989 01 101 0001 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP], #imm
4990
4991 (at-EA-then-Rn=EA)
4992 01 101 0011 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]!
4993
4994 (at-EA)
4995 01 101 0010 1 imm7 Rt2 Rn Rt1 LDPSW Rt1,Rt2, [Xn|SP, #imm]
4996 */
4997 UInt insn_31_22 = INSN(31,22);
4998 if (insn_31_22 == BITS10(0,1,1,0,1,0,0,0,1,1)
4999 || insn_31_22 == BITS10(0,1,1,0,1,0,0,1,1,1)
5000 || insn_31_22 == BITS10(0,1,1,0,1,0,0,1,0,1)) {
5001 UInt bWBack = INSN(23,23);
5002 UInt rT1 = INSN(4,0);
5003 UInt rN = INSN(9,5);
5004 UInt rT2 = INSN(14,10);
5005 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5006 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
5007 || (rT1 == rT2)) {
5008 /* undecodable; fall through */
5009 } else {
5010 if (rN == 31) { /* FIXME generate stack alignment check */ }
5011
5012 // Compute the transfer address TA and the writeback address WA.
5013 IRTemp tRN = newTemp(Ity_I64);
5014 assign(tRN, getIReg64orSP(rN));
5015 IRTemp tEA = newTemp(Ity_I64);
5016 simm7 = 4 * simm7;
5017 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5018
5019 IRTemp tTA = newTemp(Ity_I64);
5020 IRTemp tWA = newTemp(Ity_I64);
5021 switch (INSN(24,23)) {
5022 case BITS2(0,1):
5023 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5024 case BITS2(1,1):
5025 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5026 case BITS2(1,0):
5027 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5028 default:
5029 vassert(0); /* NOTREACHED */
5030 }
5031
5032 // 32 bit load, sign extended to 64 bits
5033 putIReg64orZR(rT1, unop(Iop_32Sto64,
5034 loadLE(Ity_I32, binop(Iop_Add64,
5035 mkexpr(tTA),
5036 mkU64(0)))));
5037 putIReg64orZR(rT2, unop(Iop_32Sto64,
5038 loadLE(Ity_I32, binop(Iop_Add64,
5039 mkexpr(tTA),
5040 mkU64(4)))));
5041 if (bWBack)
5042 putIReg64orSP(rN, mkexpr(tEA));
5043
5044 const HChar* fmt_str = NULL;
5045 switch (INSN(24,23)) {
5046 case BITS2(0,1):
5047 fmt_str = "ldpsw %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5048 break;
5049 case BITS2(1,1):
5050 fmt_str = "ldpsw %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5051 break;
5052 case BITS2(1,0):
5053 fmt_str = "ldpsw %s, %s, [%s, #%lld] (at-Rn)\n";
5054 break;
5055 default:
5056 vassert(0);
5057 }
5058 DIP(fmt_str, nameIReg64orZR(rT1),
5059 nameIReg64orZR(rT2),
5060 nameIReg64orSP(rN), simm7);
5061 return True;
5062 }
5063 }
5064
sewardjbbcf1882014-01-12 12:49:10 +00005065 /* ---------------- LDR (literal, int reg) ---------------- */
5066 /* 31 29 23 4
5067 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
5068 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
5069 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
5070 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
5071 Just handles the first two cases for now.
5072 */
5073 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
5074 UInt imm19 = INSN(23,5);
5075 UInt rT = INSN(4,0);
5076 UInt bX = INSN(30,30);
5077 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5078 if (bX) {
5079 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
5080 } else {
5081 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
5082 }
5083 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
5084 return True;
5085 }
5086
5087 /* -------------- {LD,ST}R (integer register) --------------- */
5088 /* 31 29 20 15 12 11 9 4
5089 | | | | | | | |
5090 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
5091 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
5092 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
5093 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
5094
5095 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
5096 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
5097 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
5098 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
5099 */
5100 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
5101 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5102 HChar dis_buf[64];
5103 UInt szLg2 = INSN(31,30);
5104 Bool isLD = INSN(22,22) == 1;
5105 UInt tt = INSN(4,0);
5106 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5107 if (ea != IRTemp_INVALID) {
5108 switch (szLg2) {
5109 case 3: /* 64 bit */
5110 if (isLD) {
5111 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
5112 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
5113 } else {
5114 storeLE(mkexpr(ea), getIReg64orZR(tt));
5115 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
5116 }
5117 break;
5118 case 2: /* 32 bit */
5119 if (isLD) {
5120 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
5121 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
5122 } else {
5123 storeLE(mkexpr(ea), getIReg32orZR(tt));
5124 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
5125 }
5126 break;
5127 case 1: /* 16 bit */
5128 if (isLD) {
5129 putIReg64orZR(tt, unop(Iop_16Uto64,
5130 loadLE(Ity_I16, mkexpr(ea))));
5131 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5132 } else {
5133 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
5134 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5135 }
5136 break;
5137 case 0: /* 8 bit */
5138 if (isLD) {
5139 putIReg64orZR(tt, unop(Iop_8Uto64,
5140 loadLE(Ity_I8, mkexpr(ea))));
5141 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
5142 } else {
5143 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
5144 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5145 }
5146 break;
5147 default:
5148 vassert(0);
5149 }
5150 return True;
5151 }
5152 }
5153
5154 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
5155 /* 31 29 26 23 21 9 4
5156 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
5157 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
5158 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
5159 where
5160 Rt is Wt when x==1, Xt when x==0
5161 */
5162 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
5163 /* Further checks on bits 31:30 and 22 */
5164 Bool valid = False;
5165 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5166 case BITS3(1,0,0):
5167 case BITS3(0,1,0): case BITS3(0,1,1):
5168 case BITS3(0,0,0): case BITS3(0,0,1):
5169 valid = True;
5170 break;
5171 }
5172 if (valid) {
5173 UInt szLg2 = INSN(31,30);
5174 UInt bitX = INSN(22,22);
5175 UInt imm12 = INSN(21,10);
5176 UInt nn = INSN(9,5);
5177 UInt tt = INSN(4,0);
5178 UInt szB = 1 << szLg2;
5179 IRExpr* ea = binop(Iop_Add64,
5180 getIReg64orSP(nn), mkU64(imm12 * szB));
5181 switch (szB) {
5182 case 4:
5183 vassert(bitX == 0);
5184 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
5185 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
5186 nameIReg64orSP(nn), imm12 * szB);
5187 break;
5188 case 2:
5189 if (bitX == 1) {
5190 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
5191 } else {
5192 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
5193 }
5194 DIP("ldrsh %s, [%s, #%u]\n",
5195 nameIRegOrZR(bitX == 0, tt),
5196 nameIReg64orSP(nn), imm12 * szB);
5197 break;
5198 case 1:
5199 if (bitX == 1) {
5200 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
5201 } else {
5202 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
5203 }
5204 DIP("ldrsb %s, [%s, #%u]\n",
5205 nameIRegOrZR(bitX == 0, tt),
5206 nameIReg64orSP(nn), imm12 * szB);
5207 break;
5208 default:
5209 vassert(0);
5210 }
5211 return True;
5212 }
5213 /* else fall through */
5214 }
5215
5216 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
5217 /* (at-Rn-then-Rn=EA)
5218 31 29 23 21 20 11 9 4
5219 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
5220 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
5221 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
5222
5223 (at-EA-then-Rn=EA)
5224 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
5225 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
5226 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
5227 where
5228 Rt is Wt when x==1, Xt when x==0
5229 transfer-at-Rn when [11]==0, at EA when [11]==1
5230 */
5231 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5232 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5233 /* Further checks on bits 31:30 and 22 */
5234 Bool valid = False;
5235 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5236 case BITS3(1,0,0): // LDRSW Xt
5237 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
5238 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
5239 valid = True;
5240 break;
5241 }
5242 if (valid) {
5243 UInt szLg2 = INSN(31,30);
5244 UInt imm9 = INSN(20,12);
5245 Bool atRN = INSN(11,11) == 0;
5246 UInt nn = INSN(9,5);
5247 UInt tt = INSN(4,0);
5248 IRTemp tRN = newTemp(Ity_I64);
5249 IRTemp tEA = newTemp(Ity_I64);
5250 IRTemp tTA = IRTemp_INVALID;
5251 ULong simm9 = sx_to_64(imm9, 9);
5252 Bool is64 = INSN(22,22) == 0;
5253 assign(tRN, getIReg64orSP(nn));
5254 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5255 tTA = atRN ? tRN : tEA;
5256 HChar ch = '?';
5257 /* There are 5 cases:
5258 byte load, SX to 64
5259 byte load, SX to 32, ZX to 64
5260 halfword load, SX to 64
5261 halfword load, SX to 32, ZX to 64
5262 word load, SX to 64
5263 The ifs below handle them in the listed order.
5264 */
5265 if (szLg2 == 0) {
5266 ch = 'b';
5267 if (is64) {
5268 putIReg64orZR(tt, unop(Iop_8Sto64,
5269 loadLE(Ity_I8, mkexpr(tTA))));
5270 } else {
5271 putIReg32orZR(tt, unop(Iop_8Sto32,
5272 loadLE(Ity_I8, mkexpr(tTA))));
5273 }
5274 }
5275 else if (szLg2 == 1) {
5276 ch = 'h';
5277 if (is64) {
5278 putIReg64orZR(tt, unop(Iop_16Sto64,
5279 loadLE(Ity_I16, mkexpr(tTA))));
5280 } else {
5281 putIReg32orZR(tt, unop(Iop_16Sto32,
5282 loadLE(Ity_I16, mkexpr(tTA))));
5283 }
5284 }
5285 else if (szLg2 == 2 && is64) {
5286 ch = 'w';
5287 putIReg64orZR(tt, unop(Iop_32Sto64,
5288 loadLE(Ity_I32, mkexpr(tTA))));
5289 }
5290 else {
5291 vassert(0);
5292 }
5293 putIReg64orSP(nn, mkexpr(tEA));
florianb1737742015-08-03 16:03:13 +00005294 DIP(atRN ? "ldrs%c %s, [%s], #%llu\n" : "ldrs%c %s, [%s, #%llu]!",
sewardjbbcf1882014-01-12 12:49:10 +00005295 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5296 return True;
5297 }
5298 /* else fall through */
5299 }
5300
5301 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
5302 /* 31 29 23 21 20 11 9 4
5303 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
5304 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
5305 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
5306 where
5307 Rt is Wt when x==1, Xt when x==0
5308 */
5309 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5310 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5311 /* Further checks on bits 31:30 and 22 */
5312 Bool valid = False;
5313 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5314 case BITS3(1,0,0): // LDURSW Xt
5315 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
5316 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
5317 valid = True;
5318 break;
5319 }
5320 if (valid) {
5321 UInt szLg2 = INSN(31,30);
5322 UInt imm9 = INSN(20,12);
5323 UInt nn = INSN(9,5);
5324 UInt tt = INSN(4,0);
5325 IRTemp tRN = newTemp(Ity_I64);
5326 IRTemp tEA = newTemp(Ity_I64);
5327 ULong simm9 = sx_to_64(imm9, 9);
5328 Bool is64 = INSN(22,22) == 0;
5329 assign(tRN, getIReg64orSP(nn));
5330 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5331 HChar ch = '?';
5332 /* There are 5 cases:
5333 byte load, SX to 64
5334 byte load, SX to 32, ZX to 64
5335 halfword load, SX to 64
5336 halfword load, SX to 32, ZX to 64
5337 word load, SX to 64
5338 The ifs below handle them in the listed order.
5339 */
5340 if (szLg2 == 0) {
5341 ch = 'b';
5342 if (is64) {
5343 putIReg64orZR(tt, unop(Iop_8Sto64,
5344 loadLE(Ity_I8, mkexpr(tEA))));
5345 } else {
5346 putIReg32orZR(tt, unop(Iop_8Sto32,
5347 loadLE(Ity_I8, mkexpr(tEA))));
5348 }
5349 }
5350 else if (szLg2 == 1) {
5351 ch = 'h';
5352 if (is64) {
5353 putIReg64orZR(tt, unop(Iop_16Sto64,
5354 loadLE(Ity_I16, mkexpr(tEA))));
5355 } else {
5356 putIReg32orZR(tt, unop(Iop_16Sto32,
5357 loadLE(Ity_I16, mkexpr(tEA))));
5358 }
5359 }
5360 else if (szLg2 == 2 && is64) {
5361 ch = 'w';
5362 putIReg64orZR(tt, unop(Iop_32Sto64,
5363 loadLE(Ity_I32, mkexpr(tEA))));
5364 }
5365 else {
5366 vassert(0);
5367 }
5368 DIP("ldurs%c %s, [%s, #%lld]",
florianb1737742015-08-03 16:03:13 +00005369 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), (Long)simm9);
sewardjbbcf1882014-01-12 12:49:10 +00005370 return True;
5371 }
5372 /* else fall through */
5373 }
5374
5375 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
5376 /* L==1 => mm==LD
5377 L==0 => mm==ST
5378 sz==00 => 32 bit (S) transfers
5379 sz==01 => 64 bit (D) transfers
5380 sz==10 => 128 bit (Q) transfers
5381 sz==11 isn't allowed
5382 simm7 is scaled by the (single-register) transfer size
5383
sewardj208a7762014-10-22 13:52:51 +00005384 31 29 26 22 21 14 9 4
sewardjbbcf1882014-01-12 12:49:10 +00005385
sewardj208a7762014-10-22 13:52:51 +00005386 sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn|SP, #imm]
5387 (at-EA, with nontemporal hint)
5388
5389 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
5390 (at-Rn-then-Rn=EA)
sewardjbbcf1882014-01-12 12:49:10 +00005391
5392 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
sewardj208a7762014-10-22 13:52:51 +00005393 (at-EA)
sewardjbbcf1882014-01-12 12:49:10 +00005394
sewardj208a7762014-10-22 13:52:51 +00005395 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
5396 (at-EA-then-Rn=EA)
5397 */
5398 if (INSN(29,25) == BITS5(1,0,1,1,0)) {
sewardjbbcf1882014-01-12 12:49:10 +00005399 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
5400 Bool isLD = INSN(22,22) == 1;
5401 Bool wBack = INSN(23,23) == 1;
5402 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5403 UInt tt2 = INSN(14,10);
5404 UInt nn = INSN(9,5);
5405 UInt tt1 = INSN(4,0);
5406 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
5407 /* undecodable; fall through */
5408 } else {
5409 if (nn == 31) { /* FIXME generate stack alignment check */ }
5410
5411 // Compute the transfer address TA and the writeback address WA.
5412 UInt szB = 4 << szSlg2; /* szB is the per-register size */
5413 IRTemp tRN = newTemp(Ity_I64);
5414 assign(tRN, getIReg64orSP(nn));
5415 IRTemp tEA = newTemp(Ity_I64);
5416 simm7 = szB * simm7;
5417 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5418
5419 IRTemp tTA = newTemp(Ity_I64);
5420 IRTemp tWA = newTemp(Ity_I64);
5421 switch (INSN(24,23)) {
5422 case BITS2(0,1):
5423 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5424 case BITS2(1,1):
5425 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5426 case BITS2(1,0):
sewardj208a7762014-10-22 13:52:51 +00005427 case BITS2(0,0):
sewardjbbcf1882014-01-12 12:49:10 +00005428 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5429 default:
5430 vassert(0); /* NOTREACHED */
5431 }
5432
5433 IRType ty = Ity_INVALID;
5434 switch (szB) {
5435 case 4: ty = Ity_F32; break;
5436 case 8: ty = Ity_F64; break;
5437 case 16: ty = Ity_V128; break;
5438 default: vassert(0);
5439 }
5440
sewardje0bff8b2014-03-09 09:40:23 +00005441 /* Normally rN would be updated after the transfer. However, in
sewardj19551432014-05-07 09:20:11 +00005442 the special cases typifed by
sewardje0bff8b2014-03-09 09:40:23 +00005443 stp q0, q1, [sp,#-512]!
sewardj19551432014-05-07 09:20:11 +00005444 stp d0, d1, [sp,#-512]!
5445 stp s0, s1, [sp,#-512]!
sewardje0bff8b2014-03-09 09:40:23 +00005446 it is necessary to update SP before the transfer, (1)
5447 because Memcheck will otherwise complain about a write
5448 below the stack pointer, and (2) because the segfault
5449 stack extension mechanism will otherwise extend the stack
5450 only down to SP before the instruction, which might not be
5451 far enough, if the -512 bit takes the actual access
5452 address to the next page.
5453 */
5454 Bool earlyWBack
sewardj19551432014-05-07 09:20:11 +00005455 = wBack && simm7 < 0
sewardje0bff8b2014-03-09 09:40:23 +00005456 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
5457
5458 if (wBack && earlyWBack)
5459 putIReg64orSP(nn, mkexpr(tEA));
5460
sewardjbbcf1882014-01-12 12:49:10 +00005461 if (isLD) {
sewardj5ba41302014-03-03 08:42:16 +00005462 if (szB < 16) {
5463 putQReg128(tt1, mkV128(0x0000));
5464 }
sewardj606c4ba2014-01-26 19:11:14 +00005465 putQRegLO(tt1,
5466 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
sewardj5ba41302014-03-03 08:42:16 +00005467 if (szB < 16) {
5468 putQReg128(tt2, mkV128(0x0000));
5469 }
sewardj606c4ba2014-01-26 19:11:14 +00005470 putQRegLO(tt2,
5471 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
sewardjbbcf1882014-01-12 12:49:10 +00005472 } else {
5473 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
sewardj606c4ba2014-01-26 19:11:14 +00005474 getQRegLO(tt1, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005475 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
sewardj606c4ba2014-01-26 19:11:14 +00005476 getQRegLO(tt2, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005477 }
5478
sewardje0bff8b2014-03-09 09:40:23 +00005479 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00005480 putIReg64orSP(nn, mkexpr(tEA));
5481
5482 const HChar* fmt_str = NULL;
5483 switch (INSN(24,23)) {
5484 case BITS2(0,1):
5485 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5486 break;
5487 case BITS2(1,1):
5488 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5489 break;
5490 case BITS2(1,0):
5491 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5492 break;
sewardj208a7762014-10-22 13:52:51 +00005493 case BITS2(0,0):
5494 fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
5495 break;
sewardjbbcf1882014-01-12 12:49:10 +00005496 default:
5497 vassert(0);
5498 }
5499 DIP(fmt_str, isLD ? "ld" : "st",
sewardj606c4ba2014-01-26 19:11:14 +00005500 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
sewardjbbcf1882014-01-12 12:49:10 +00005501 nameIReg64orSP(nn), simm7);
5502 return True;
5503 }
5504 }
5505
5506 /* -------------- {LD,ST}R (vector register) --------------- */
5507 /* 31 29 23 20 15 12 11 9 4
5508 | | | | | | | | |
5509 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
5510 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
5511 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
5512 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
5513 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
5514
5515 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
5516 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
5517 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
5518 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
5519 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
5520 */
5521 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5522 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5523 HChar dis_buf[64];
5524 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5525 Bool isLD = INSN(22,22) == 1;
5526 UInt tt = INSN(4,0);
sewardj208a7762014-10-22 13:52:51 +00005527 if (szLg2 > 4) goto after_LDR_STR_vector_register;
sewardjbbcf1882014-01-12 12:49:10 +00005528 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
5529 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
5530 switch (szLg2) {
5531 case 0: /* 8 bit */
5532 if (isLD) {
5533 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005534 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
5535 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005536 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005537 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
5538 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005539 }
5540 break;
5541 case 1:
5542 if (isLD) {
5543 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005544 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
5545 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005546 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005547 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
5548 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005549 }
5550 break;
5551 case 2: /* 32 bit */
5552 if (isLD) {
5553 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005554 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
5555 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005556 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005557 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
5558 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005559 }
5560 break;
5561 case 3: /* 64 bit */
5562 if (isLD) {
5563 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005564 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
5565 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005566 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005567 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
5568 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005569 }
5570 break;
sewardj208a7762014-10-22 13:52:51 +00005571 case 4:
5572 if (isLD) {
5573 putQReg128(tt, loadLE(Ity_V128, mkexpr(ea)));
5574 DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf);
5575 } else {
5576 storeLE(mkexpr(ea), getQReg128(tt));
5577 DIP("str %s, %s\n", nameQReg128(tt), dis_buf);
5578 }
5579 break;
5580 default:
5581 vassert(0);
sewardjbbcf1882014-01-12 12:49:10 +00005582 }
5583 return True;
5584 }
5585 after_LDR_STR_vector_register:
5586
5587 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
5588 /* 31 29 22 20 15 12 11 9 4
5589 | | | | | | | | |
5590 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
5591
5592 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
5593 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
5594
5595 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
5596 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
5597 */
5598 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5599 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5600 HChar dis_buf[64];
5601 UInt szLg2 = INSN(31,30);
5602 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
5603 UInt tt = INSN(4,0);
5604 if (szLg2 == 3) goto after_LDRS_integer_register;
5605 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5606 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
5607 /* Enumerate the 5 variants explicitly. */
5608 if (szLg2 == 2/*32 bit*/ && sxTo64) {
5609 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
5610 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
5611 return True;
5612 }
5613 else
5614 if (szLg2 == 1/*16 bit*/) {
5615 if (sxTo64) {
5616 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
5617 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
5618 } else {
5619 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
5620 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5621 }
5622 return True;
5623 }
5624 else
5625 if (szLg2 == 0/*8 bit*/) {
5626 if (sxTo64) {
5627 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
5628 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
5629 } else {
5630 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
5631 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5632 }
5633 return True;
5634 }
5635 /* else it's an invalid combination */
5636 }
5637 after_LDRS_integer_register:
5638
5639 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
5640 /* This is the Unsigned offset variant only. The Post-Index and
5641 Pre-Index variants are below.
5642
5643 31 29 23 21 9 4
5644 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
5645 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
5646 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
5647 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
5648 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
5649
5650 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
5651 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
5652 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
5653 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
5654 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
5655 */
5656 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
5657 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
5658 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5659 Bool isLD = INSN(22,22) == 1;
5660 UInt pimm12 = INSN(21,10) << szLg2;
5661 UInt nn = INSN(9,5);
5662 UInt tt = INSN(4,0);
5663 IRTemp tEA = newTemp(Ity_I64);
5664 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5665 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
5666 if (isLD) {
5667 if (szLg2 < 4) {
5668 putQReg128(tt, mkV128(0x0000));
5669 }
sewardj606c4ba2014-01-26 19:11:14 +00005670 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00005671 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005672 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005673 }
5674 DIP("%s %s, [%s, #%u]\n",
5675 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00005676 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
sewardjbbcf1882014-01-12 12:49:10 +00005677 return True;
5678 }
5679
5680 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
5681 /* These are the Post-Index and Pre-Index variants.
5682
5683 31 29 23 20 11 9 4
5684 (at-Rn-then-Rn=EA)
5685 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
5686 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
5687 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
5688 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
5689 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
5690
5691 (at-EA-then-Rn=EA)
5692 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
5693 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
5694 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
5695 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
5696 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
5697
5698 Stores are the same except with bit 22 set to 0.
5699 */
5700 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5701 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5702 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5703 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5704 Bool isLD = INSN(22,22) == 1;
5705 UInt imm9 = INSN(20,12);
5706 Bool atRN = INSN(11,11) == 0;
5707 UInt nn = INSN(9,5);
5708 UInt tt = INSN(4,0);
5709 IRTemp tRN = newTemp(Ity_I64);
5710 IRTemp tEA = newTemp(Ity_I64);
5711 IRTemp tTA = IRTemp_INVALID;
5712 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5713 ULong simm9 = sx_to_64(imm9, 9);
5714 assign(tRN, getIReg64orSP(nn));
5715 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5716 tTA = atRN ? tRN : tEA;
5717 if (isLD) {
5718 if (szLg2 < 4) {
5719 putQReg128(tt, mkV128(0x0000));
5720 }
sewardj606c4ba2014-01-26 19:11:14 +00005721 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
sewardjbbcf1882014-01-12 12:49:10 +00005722 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005723 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005724 }
5725 putIReg64orSP(nn, mkexpr(tEA));
5726 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
5727 isLD ? "ldr" : "str",
florianb1737742015-08-03 16:03:13 +00005728 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
sewardjbbcf1882014-01-12 12:49:10 +00005729 return True;
5730 }
5731
5732 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
5733 /* 31 29 23 20 11 9 4
5734 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
5735 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
5736 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
5737 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
5738 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
5739
5740 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
5741 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
5742 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
5743 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
5744 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
5745 */
5746 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5747 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5748 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5749 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5750 Bool isLD = INSN(22,22) == 1;
5751 UInt imm9 = INSN(20,12);
5752 UInt nn = INSN(9,5);
5753 UInt tt = INSN(4,0);
5754 ULong simm9 = sx_to_64(imm9, 9);
5755 IRTemp tEA = newTemp(Ity_I64);
5756 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5757 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
5758 if (isLD) {
sewardj606c4ba2014-01-26 19:11:14 +00005759 if (szLg2 < 4) {
5760 putQReg128(tt, mkV128(0x0000));
5761 }
5762 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00005763 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005764 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005765 }
5766 DIP("%s %s, [%s, #%lld]\n",
5767 isLD ? "ldur" : "stur",
sewardj606c4ba2014-01-26 19:11:14 +00005768 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
sewardjbbcf1882014-01-12 12:49:10 +00005769 return True;
5770 }
5771
5772 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
5773 /* 31 29 23 4
5774 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
5775 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
5776 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
5777 */
5778 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
5779 UInt szB = 4 << INSN(31,30);
5780 UInt imm19 = INSN(23,5);
5781 UInt tt = INSN(4,0);
5782 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5783 IRType ty = preferredVectorSubTypeFromSize(szB);
sewardj606c4ba2014-01-26 19:11:14 +00005784 putQReg128(tt, mkV128(0x0000));
5785 putQRegLO(tt, loadLE(ty, mkU64(ea)));
5786 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
sewardjbbcf1882014-01-12 12:49:10 +00005787 return True;
5788 }
5789
sewardj208a7762014-10-22 13:52:51 +00005790 /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */
5791 /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
5792 /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
5793 /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
5794 /* 31 29 26 22 21 20 15 11 9 4
sewardjbbcf1882014-01-12 12:49:10 +00005795
sewardjbed9f682014-10-27 09:29:48 +00005796 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP]
5797 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step
sewardj606c4ba2014-01-26 19:11:14 +00005798
sewardjbed9f682014-10-27 09:29:48 +00005799 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP]
5800 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step
sewardj606c4ba2014-01-26 19:11:14 +00005801
sewardjbed9f682014-10-27 09:29:48 +00005802 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP]
5803 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step
sewardj208a7762014-10-22 13:52:51 +00005804
sewardjbed9f682014-10-27 09:29:48 +00005805 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP]
5806 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step
sewardj208a7762014-10-22 13:52:51 +00005807
5808 T = defined by Q and sz in the normal way
5809 step = if m == 11111 then transfer-size else Xm
5810 xx = case L of 1 -> LD ; 0 -> ST
sewardj606c4ba2014-01-26 19:11:14 +00005811 */
sewardj208a7762014-10-22 13:52:51 +00005812 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5813 && INSN(21,21) == 0) {
5814 Bool bitQ = INSN(30,30);
5815 Bool isPX = INSN(23,23) == 1;
5816 Bool isLD = INSN(22,22) == 1;
5817 UInt mm = INSN(20,16);
5818 UInt opc = INSN(15,12);
5819 UInt sz = INSN(11,10);
5820 UInt nn = INSN(9,5);
5821 UInt tt = INSN(4,0);
5822 Bool isQ = bitQ == 1;
5823 Bool is1d = sz == BITS2(1,1) && !isQ;
5824 UInt nRegs = 0;
5825 switch (opc) {
5826 case BITS4(0,0,0,0): nRegs = 4; break;
5827 case BITS4(0,1,0,0): nRegs = 3; break;
5828 case BITS4(1,0,0,0): nRegs = 2; break;
5829 case BITS4(0,1,1,1): nRegs = 1; break;
5830 default: break;
sewardj950ca7a2014-04-03 23:03:32 +00005831 }
sewardjbed9f682014-10-27 09:29:48 +00005832
5833 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5834 If we see it, set nRegs to 0 so as to cause the next conditional
5835 to fail. */
5836 if (!isPX && mm != 0)
5837 nRegs = 0;
sewardj208a7762014-10-22 13:52:51 +00005838
5839 if (nRegs == 1 /* .1d is allowed */
5840 || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) {
5841
5842 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5843
5844 /* Generate the transfer address (TA) and if necessary the
5845 writeback address (WB) */
5846 IRTemp tTA = newTemp(Ity_I64);
5847 assign(tTA, getIReg64orSP(nn));
5848 if (nn == 31) { /* FIXME generate stack alignment check */ }
5849 IRTemp tWB = IRTemp_INVALID;
5850 if (isPX) {
5851 tWB = newTemp(Ity_I64);
5852 assign(tWB, binop(Iop_Add64,
5853 mkexpr(tTA),
5854 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5855 : getIReg64orZR(mm)));
5856 }
5857
5858 /* -- BEGIN generate the transfers -- */
5859
5860 IRTemp u0, u1, u2, u3, i0, i1, i2, i3;
5861 u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID;
5862 switch (nRegs) {
5863 case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */
5864 case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */
5865 case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */
5866 case 1: u0 = newTempV128(); i0 = newTempV128(); break;
5867 default: vassert(0);
5868 }
5869
5870 /* -- Multiple 128 or 64 bit stores -- */
5871 if (!isLD) {
5872 switch (nRegs) {
5873 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5874 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5875 case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */
5876 case 1: assign(u0, getQReg128((tt+0) % 32)); break;
5877 default: vassert(0);
5878 }
5879 switch (nRegs) {
5880 case 4: (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64)
5881 (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3);
5882 break;
5883 case 3: (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64)
5884 (&i0, &i1, &i2, sz, u0, u1, u2);
5885 break;
5886 case 2: (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64)
5887 (&i0, &i1, sz, u0, u1);
5888 break;
5889 case 1: (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64)
5890 (&i0, sz, u0);
5891 break;
5892 default: vassert(0);
5893 }
5894# define MAYBE_NARROW_TO_64(_expr) \
5895 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5896 UInt step = isQ ? 16 : 8;
5897 switch (nRegs) {
5898 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5899 MAYBE_NARROW_TO_64(mkexpr(i3)) );
5900 /* fallthru */
5901 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5902 MAYBE_NARROW_TO_64(mkexpr(i2)) );
5903 /* fallthru */
5904 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5905 MAYBE_NARROW_TO_64(mkexpr(i1)) );
5906 /* fallthru */
5907 case 1: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5908 MAYBE_NARROW_TO_64(mkexpr(i0)) );
5909 break;
5910 default: vassert(0);
5911 }
5912# undef MAYBE_NARROW_TO_64
5913 }
5914
5915 /* -- Multiple 128 or 64 bit loads -- */
5916 else /* isLD */ {
5917 UInt step = isQ ? 16 : 8;
5918 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5919# define MAYBE_WIDEN_FROM_64(_expr) \
5920 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5921 switch (nRegs) {
5922 case 4:
5923 assign(i3, MAYBE_WIDEN_FROM_64(
5924 loadLE(loadTy,
5925 binop(Iop_Add64, mkexpr(tTA),
5926 mkU64(3 * step)))));
5927 /* fallthru */
5928 case 3:
5929 assign(i2, MAYBE_WIDEN_FROM_64(
5930 loadLE(loadTy,
5931 binop(Iop_Add64, mkexpr(tTA),
5932 mkU64(2 * step)))));
5933 /* fallthru */
5934 case 2:
5935 assign(i1, MAYBE_WIDEN_FROM_64(
5936 loadLE(loadTy,
5937 binop(Iop_Add64, mkexpr(tTA),
5938 mkU64(1 * step)))));
5939 /* fallthru */
sewardj208a7762014-10-22 13:52:51 +00005940 case 1:
5941 assign(i0, MAYBE_WIDEN_FROM_64(
5942 loadLE(loadTy,
5943 binop(Iop_Add64, mkexpr(tTA),
5944 mkU64(0 * step)))));
5945 break;
5946 default:
5947 vassert(0);
5948 }
5949# undef MAYBE_WIDEN_FROM_64
5950 switch (nRegs) {
5951 case 4: (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64)
5952 (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3);
5953 break;
5954 case 3: (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64)
5955 (&u0, &u1, &u2, sz, i0, i1, i2);
5956 break;
5957 case 2: (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64)
5958 (&u0, &u1, sz, i0, i1);
5959 break;
5960 case 1: (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64)
5961 (&u0, sz, i0);
5962 break;
5963 default: vassert(0);
5964 }
5965 switch (nRegs) {
5966 case 4: putQReg128( (tt+3) % 32,
5967 math_MAYBE_ZERO_HI64(bitQ, u3));
5968 /* fallthru */
5969 case 3: putQReg128( (tt+2) % 32,
5970 math_MAYBE_ZERO_HI64(bitQ, u2));
5971 /* fallthru */
5972 case 2: putQReg128( (tt+1) % 32,
5973 math_MAYBE_ZERO_HI64(bitQ, u1));
5974 /* fallthru */
5975 case 1: putQReg128( (tt+0) % 32,
5976 math_MAYBE_ZERO_HI64(bitQ, u0));
5977 break;
5978 default: vassert(0);
5979 }
5980 }
5981
5982 /* -- END generate the transfers -- */
5983
5984 /* Do the writeback, if necessary */
5985 if (isPX) {
5986 putIReg64orSP(nn, mkexpr(tWB));
5987 }
5988
5989 HChar pxStr[20];
5990 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
5991 if (isPX) {
5992 if (mm == BITS5(1,1,1,1,1))
5993 vex_sprintf(pxStr, ", #%u", xferSzB);
5994 else
5995 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
5996 }
5997 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
5998 DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
5999 isLD ? "ld" : "st", nRegs,
6000 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6001 pxStr);
6002
6003 return True;
6004 }
6005 /* else fall through */
sewardj950ca7a2014-04-03 23:03:32 +00006006 }
6007
sewardjbed9f682014-10-27 09:29:48 +00006008 /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */
6009 /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */
6010 /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */
6011 /* 31 29 26 22 21 20 15 11 9 4
6012
6013 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP]
6014 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step
6015
6016 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP]
6017 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step
6018
6019 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP]
6020 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step
6021
6022 T = defined by Q and sz in the normal way
6023 step = if m == 11111 then transfer-size else Xm
6024 xx = case L of 1 -> LD ; 0 -> ST
6025 */
6026 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
6027 && INSN(21,21) == 0) {
6028 Bool bitQ = INSN(30,30);
6029 Bool isPX = INSN(23,23) == 1;
6030 Bool isLD = INSN(22,22) == 1;
6031 UInt mm = INSN(20,16);
6032 UInt opc = INSN(15,12);
6033 UInt sz = INSN(11,10);
6034 UInt nn = INSN(9,5);
6035 UInt tt = INSN(4,0);
6036 Bool isQ = bitQ == 1;
6037 UInt nRegs = 0;
6038 switch (opc) {
6039 case BITS4(0,0,1,0): nRegs = 4; break;
6040 case BITS4(0,1,1,0): nRegs = 3; break;
6041 case BITS4(1,0,1,0): nRegs = 2; break;
6042 default: break;
6043 }
6044
6045 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
6046 If we see it, set nRegs to 0 so as to cause the next conditional
6047 to fail. */
6048 if (!isPX && mm != 0)
6049 nRegs = 0;
6050
6051 if (nRegs >= 2 && nRegs <= 4) {
6052
6053 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
6054
6055 /* Generate the transfer address (TA) and if necessary the
6056 writeback address (WB) */
6057 IRTemp tTA = newTemp(Ity_I64);
6058 assign(tTA, getIReg64orSP(nn));
6059 if (nn == 31) { /* FIXME generate stack alignment check */ }
6060 IRTemp tWB = IRTemp_INVALID;
6061 if (isPX) {
6062 tWB = newTemp(Ity_I64);
6063 assign(tWB, binop(Iop_Add64,
6064 mkexpr(tTA),
6065 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6066 : getIReg64orZR(mm)));
6067 }
6068
6069 /* -- BEGIN generate the transfers -- */
6070
6071 IRTemp u0, u1, u2, u3;
6072 u0 = u1 = u2 = u3 = IRTemp_INVALID;
6073 switch (nRegs) {
6074 case 4: u3 = newTempV128(); /* fallthru */
6075 case 3: u2 = newTempV128(); /* fallthru */
6076 case 2: u1 = newTempV128();
6077 u0 = newTempV128(); break;
6078 default: vassert(0);
6079 }
6080
6081 /* -- Multiple 128 or 64 bit stores -- */
6082 if (!isLD) {
6083 switch (nRegs) {
6084 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
6085 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
6086 case 2: assign(u1, getQReg128((tt+1) % 32));
6087 assign(u0, getQReg128((tt+0) % 32)); break;
6088 default: vassert(0);
6089 }
6090# define MAYBE_NARROW_TO_64(_expr) \
6091 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
6092 UInt step = isQ ? 16 : 8;
6093 switch (nRegs) {
6094 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
6095 MAYBE_NARROW_TO_64(mkexpr(u3)) );
6096 /* fallthru */
6097 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
6098 MAYBE_NARROW_TO_64(mkexpr(u2)) );
6099 /* fallthru */
6100 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
6101 MAYBE_NARROW_TO_64(mkexpr(u1)) );
6102 storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
6103 MAYBE_NARROW_TO_64(mkexpr(u0)) );
6104 break;
6105 default: vassert(0);
6106 }
6107# undef MAYBE_NARROW_TO_64
6108 }
6109
6110 /* -- Multiple 128 or 64 bit loads -- */
6111 else /* isLD */ {
6112 UInt step = isQ ? 16 : 8;
6113 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
6114# define MAYBE_WIDEN_FROM_64(_expr) \
6115 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
6116 switch (nRegs) {
6117 case 4:
6118 assign(u3, MAYBE_WIDEN_FROM_64(
6119 loadLE(loadTy,
6120 binop(Iop_Add64, mkexpr(tTA),
6121 mkU64(3 * step)))));
6122 /* fallthru */
6123 case 3:
6124 assign(u2, MAYBE_WIDEN_FROM_64(
6125 loadLE(loadTy,
6126 binop(Iop_Add64, mkexpr(tTA),
6127 mkU64(2 * step)))));
6128 /* fallthru */
6129 case 2:
6130 assign(u1, MAYBE_WIDEN_FROM_64(
6131 loadLE(loadTy,
6132 binop(Iop_Add64, mkexpr(tTA),
6133 mkU64(1 * step)))));
6134 assign(u0, MAYBE_WIDEN_FROM_64(
6135 loadLE(loadTy,
6136 binop(Iop_Add64, mkexpr(tTA),
6137 mkU64(0 * step)))));
6138 break;
6139 default:
6140 vassert(0);
6141 }
6142# undef MAYBE_WIDEN_FROM_64
6143 switch (nRegs) {
6144 case 4: putQReg128( (tt+3) % 32,
6145 math_MAYBE_ZERO_HI64(bitQ, u3));
6146 /* fallthru */
6147 case 3: putQReg128( (tt+2) % 32,
6148 math_MAYBE_ZERO_HI64(bitQ, u2));
6149 /* fallthru */
6150 case 2: putQReg128( (tt+1) % 32,
6151 math_MAYBE_ZERO_HI64(bitQ, u1));
6152 putQReg128( (tt+0) % 32,
6153 math_MAYBE_ZERO_HI64(bitQ, u0));
6154 break;
6155 default: vassert(0);
6156 }
6157 }
6158
6159 /* -- END generate the transfers -- */
6160
6161 /* Do the writeback, if necessary */
6162 if (isPX) {
6163 putIReg64orSP(nn, mkexpr(tWB));
6164 }
6165
6166 HChar pxStr[20];
6167 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6168 if (isPX) {
6169 if (mm == BITS5(1,1,1,1,1))
6170 vex_sprintf(pxStr, ", #%u", xferSzB);
6171 else
6172 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6173 }
6174 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6175 DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
6176 isLD ? "ld" : "st",
6177 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6178 pxStr);
6179
6180 return True;
6181 }
6182 /* else fall through */
6183 }
6184
sewardj18bf5172014-06-14 18:05:30 +00006185 /* ---------- LD1R (single structure, replicate) ---------- */
sewardjbed9f682014-10-27 09:29:48 +00006186 /* ---------- LD2R (single structure, replicate) ---------- */
6187 /* ---------- LD3R (single structure, replicate) ---------- */
6188 /* ---------- LD4R (single structure, replicate) ---------- */
sewardj18bf5172014-06-14 18:05:30 +00006189 /* 31 29 22 20 15 11 9 4
sewardjbed9f682014-10-27 09:29:48 +00006190 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP]
6191 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step
6192
6193 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP]
6194 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step
6195
6196 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP]
6197 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step
6198
6199 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP]
6200 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step
6201
6202 step = if m == 11111 then transfer-size else Xm
sewardj18bf5172014-06-14 18:05:30 +00006203 */
6204 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
sewardjbed9f682014-10-27 09:29:48 +00006205 && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
6206 && INSN(12,12) == 0) {
6207 UInt bitQ = INSN(30,30);
6208 Bool isPX = INSN(23,23) == 1;
6209 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6210 UInt mm = INSN(20,16);
6211 UInt sz = INSN(11,10);
6212 UInt nn = INSN(9,5);
6213 UInt tt = INSN(4,0);
6214
6215 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6216 if (isPX || mm == 0) {
6217
6218 IRType ty = integerIRTypeOfSize(1 << sz);
6219
6220 UInt laneSzB = 1 << sz;
6221 UInt xferSzB = laneSzB * nRegs;
6222
6223 /* Generate the transfer address (TA) and if necessary the
6224 writeback address (WB) */
6225 IRTemp tTA = newTemp(Ity_I64);
6226 assign(tTA, getIReg64orSP(nn));
6227 if (nn == 31) { /* FIXME generate stack alignment check */ }
6228 IRTemp tWB = IRTemp_INVALID;
6229 if (isPX) {
6230 tWB = newTemp(Ity_I64);
6231 assign(tWB, binop(Iop_Add64,
6232 mkexpr(tTA),
6233 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6234 : getIReg64orZR(mm)));
sewardj18bf5172014-06-14 18:05:30 +00006235 }
sewardjbed9f682014-10-27 09:29:48 +00006236
6237 /* Do the writeback, if necessary */
6238 if (isPX) {
6239 putIReg64orSP(nn, mkexpr(tWB));
6240 }
6241
6242 IRTemp e0, e1, e2, e3, v0, v1, v2, v3;
6243 e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID;
6244 switch (nRegs) {
6245 case 4:
6246 e3 = newTemp(ty);
6247 assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6248 mkU64(3 * laneSzB))));
6249 v3 = math_DUP_TO_V128(e3, ty);
6250 putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3));
6251 /* fallthrough */
6252 case 3:
6253 e2 = newTemp(ty);
6254 assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6255 mkU64(2 * laneSzB))));
6256 v2 = math_DUP_TO_V128(e2, ty);
6257 putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2));
6258 /* fallthrough */
6259 case 2:
6260 e1 = newTemp(ty);
6261 assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6262 mkU64(1 * laneSzB))));
6263 v1 = math_DUP_TO_V128(e1, ty);
6264 putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1));
6265 /* fallthrough */
6266 case 1:
6267 e0 = newTemp(ty);
6268 assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6269 mkU64(0 * laneSzB))));
6270 v0 = math_DUP_TO_V128(e0, ty);
6271 putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0));
6272 break;
6273 default:
6274 vassert(0);
6275 }
6276
6277 HChar pxStr[20];
6278 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6279 if (isPX) {
6280 if (mm == BITS5(1,1,1,1,1))
6281 vex_sprintf(pxStr, ", #%u", xferSzB);
6282 else
6283 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6284 }
6285 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6286 DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
6287 nRegs,
6288 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6289 pxStr);
6290
sewardj18bf5172014-06-14 18:05:30 +00006291 return True;
6292 }
sewardjbed9f682014-10-27 09:29:48 +00006293 /* else fall through */
sewardj18bf5172014-06-14 18:05:30 +00006294 }
6295
sewardjbed9f682014-10-27 09:29:48 +00006296 /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
6297 /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
6298 /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
6299 /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
6300 /* 31 29 22 21 20 15 11 9 4
6301 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP]
6302 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step
sewardj8a5ed542014-07-15 11:08:42 +00006303
sewardjbed9f682014-10-27 09:29:48 +00006304 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP]
6305 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step
sewardj606c4ba2014-01-26 19:11:14 +00006306
sewardjbed9f682014-10-27 09:29:48 +00006307 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP]
6308 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step
6309
6310 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP]
6311 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step
6312
6313 step = if m == 11111 then transfer-size else Xm
6314 op = case L of 1 -> LD ; 0 -> ST
6315
6316 laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
6317 01:b:b:b0 -> 2, bbb
6318 10:b:b:00 -> 4, bb
6319 10:b:0:01 -> 8, b
sewardj39f754d2014-06-24 10:26:52 +00006320 */
sewardjbed9f682014-10-27 09:29:48 +00006321 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
6322 UInt bitQ = INSN(30,30);
6323 Bool isPX = INSN(23,23) == 1;
6324 Bool isLD = INSN(22,22) == 1;
6325 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6326 UInt mm = INSN(20,16);
6327 UInt xx = INSN(15,14);
6328 UInt bitS = INSN(12,12);
6329 UInt sz = INSN(11,10);
6330 UInt nn = INSN(9,5);
6331 UInt tt = INSN(4,0);
6332
6333 Bool valid = True;
6334
6335 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6336 if (!isPX && mm != 0)
6337 valid = False;
6338
6339 UInt laneSzB = 0; /* invalid */
6340 UInt ix = 16; /* invalid */
6341
6342 UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz;
6343 switch (xx_q_S_sz) {
6344 case 0x00: case 0x01: case 0x02: case 0x03:
6345 case 0x04: case 0x05: case 0x06: case 0x07:
6346 case 0x08: case 0x09: case 0x0A: case 0x0B:
6347 case 0x0C: case 0x0D: case 0x0E: case 0x0F:
6348 laneSzB = 1; ix = xx_q_S_sz & 0xF;
6349 break;
6350 case 0x10: case 0x12: case 0x14: case 0x16:
6351 case 0x18: case 0x1A: case 0x1C: case 0x1E:
6352 laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7;
6353 break;
6354 case 0x20: case 0x24: case 0x28: case 0x2C:
6355 laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3;
6356 break;
6357 case 0x21: case 0x29:
6358 laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1;
6359 break;
6360 default:
6361 break;
sewardj39f754d2014-06-24 10:26:52 +00006362 }
sewardjbed9f682014-10-27 09:29:48 +00006363
6364 if (valid && laneSzB != 0) {
6365
6366 IRType ty = integerIRTypeOfSize(laneSzB);
6367 UInt xferSzB = laneSzB * nRegs;
6368
6369 /* Generate the transfer address (TA) and if necessary the
6370 writeback address (WB) */
6371 IRTemp tTA = newTemp(Ity_I64);
6372 assign(tTA, getIReg64orSP(nn));
6373 if (nn == 31) { /* FIXME generate stack alignment check */ }
6374 IRTemp tWB = IRTemp_INVALID;
6375 if (isPX) {
6376 tWB = newTemp(Ity_I64);
6377 assign(tWB, binop(Iop_Add64,
6378 mkexpr(tTA),
6379 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6380 : getIReg64orZR(mm)));
6381 }
6382
6383 /* Do the writeback, if necessary */
6384 if (isPX) {
6385 putIReg64orSP(nn, mkexpr(tWB));
6386 }
6387
6388 switch (nRegs) {
6389 case 4: {
6390 IRExpr* addr
6391 = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB));
6392 if (isLD) {
6393 putQRegLane((tt+3) % 32, ix, loadLE(ty, addr));
6394 } else {
6395 storeLE(addr, getQRegLane((tt+3) % 32, ix, ty));
6396 }
6397 /* fallthrough */
6398 }
6399 case 3: {
6400 IRExpr* addr
6401 = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB));
6402 if (isLD) {
6403 putQRegLane((tt+2) % 32, ix, loadLE(ty, addr));
6404 } else {
6405 storeLE(addr, getQRegLane((tt+2) % 32, ix, ty));
6406 }
6407 /* fallthrough */
6408 }
6409 case 2: {
6410 IRExpr* addr
6411 = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB));
6412 if (isLD) {
6413 putQRegLane((tt+1) % 32, ix, loadLE(ty, addr));
6414 } else {
6415 storeLE(addr, getQRegLane((tt+1) % 32, ix, ty));
6416 }
6417 /* fallthrough */
6418 }
6419 case 1: {
6420 IRExpr* addr
6421 = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB));
6422 if (isLD) {
6423 putQRegLane((tt+0) % 32, ix, loadLE(ty, addr));
6424 } else {
6425 storeLE(addr, getQRegLane((tt+0) % 32, ix, ty));
6426 }
6427 break;
6428 }
6429 default:
6430 vassert(0);
6431 }
6432
6433 HChar pxStr[20];
6434 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6435 if (isPX) {
6436 if (mm == BITS5(1,1,1,1,1))
6437 vex_sprintf(pxStr, ", #%u", xferSzB);
6438 else
6439 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6440 }
6441 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6442 DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
6443 isLD ? "ld" : "st", nRegs,
6444 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr,
6445 ix, nameIReg64orSP(nn), pxStr);
6446
6447 return True;
6448 }
6449 /* else fall through */
sewardj39f754d2014-06-24 10:26:52 +00006450 }
6451
sewardj7d009132014-02-20 17:43:38 +00006452 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
6453 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
6454 /* 31 29 23 20 14 9 4
6455 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
6456 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
6457 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
6458 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
sewardjbbcf1882014-01-12 12:49:10 +00006459 */
sewardj7d009132014-02-20 17:43:38 +00006460 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
6461 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
6462 && INSN(14,10) == BITS5(1,1,1,1,1)) {
sewardjdc9259c2014-02-27 11:10:19 +00006463 UInt szBlg2 = INSN(31,30);
6464 Bool isLD = INSN(22,22) == 1;
6465 Bool isAcqOrRel = INSN(15,15) == 1;
6466 UInt ss = INSN(20,16);
6467 UInt nn = INSN(9,5);
6468 UInt tt = INSN(4,0);
sewardjbbcf1882014-01-12 12:49:10 +00006469
sewardjdc9259c2014-02-27 11:10:19 +00006470 vassert(szBlg2 < 4);
6471 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6472 IRType ty = integerIRTypeOfSize(szB);
6473 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
sewardj7d009132014-02-20 17:43:38 +00006474
sewardjdc9259c2014-02-27 11:10:19 +00006475 IRTemp ea = newTemp(Ity_I64);
6476 assign(ea, getIReg64orSP(nn));
6477 /* FIXME generate check that ea is szB-aligned */
sewardj7d009132014-02-20 17:43:38 +00006478
sewardjdc9259c2014-02-27 11:10:19 +00006479 if (isLD && ss == BITS5(1,1,1,1,1)) {
6480 IRTemp res = newTemp(ty);
6481 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
6482 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6483 if (isAcqOrRel) {
6484 stmt(IRStmt_MBE(Imbe_Fence));
6485 }
6486 DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6487 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6488 return True;
6489 }
6490 if (!isLD) {
6491 if (isAcqOrRel) {
6492 stmt(IRStmt_MBE(Imbe_Fence));
6493 }
6494 IRTemp res = newTemp(Ity_I1);
6495 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6496 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
6497 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
6498 Need to set rS to 1 on failure, 0 on success. */
6499 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
6500 mkU64(1)));
6501 DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6502 nameIRegOrZR(False, ss),
6503 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6504 return True;
6505 }
6506 /* else fall through */
6507 }
6508
6509 /* ------------------ LDA{R,RH,RB} ------------------ */
6510 /* ------------------ STL{R,RH,RB} ------------------ */
6511 /* 31 29 23 20 14 9 4
6512 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
6513 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
6514 */
6515 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6516 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
6517 UInt szBlg2 = INSN(31,30);
6518 Bool isLD = INSN(22,22) == 1;
6519 UInt nn = INSN(9,5);
6520 UInt tt = INSN(4,0);
6521
6522 vassert(szBlg2 < 4);
6523 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6524 IRType ty = integerIRTypeOfSize(szB);
6525 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6526
6527 IRTemp ea = newTemp(Ity_I64);
6528 assign(ea, getIReg64orSP(nn));
6529 /* FIXME generate check that ea is szB-aligned */
6530
6531 if (isLD) {
6532 IRTemp res = newTemp(ty);
6533 assign(res, loadLE(ty, mkexpr(ea)));
6534 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6535 stmt(IRStmt_MBE(Imbe_Fence));
6536 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
6537 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6538 } else {
6539 stmt(IRStmt_MBE(Imbe_Fence));
6540 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6541 storeLE(mkexpr(ea), data);
6542 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
6543 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6544 }
6545 return True;
sewardjbbcf1882014-01-12 12:49:10 +00006546 }
6547
sewardj5b924c82014-10-30 23:56:10 +00006548 /* ------------------ PRFM (immediate) ------------------ */
6549 /* 31 21 9 4
6550 11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn|SP, #pimm]
6551 */
6552 if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
6553 UInt imm12 = INSN(21,10);
6554 UInt nn = INSN(9,5);
6555 UInt tt = INSN(4,0);
6556 /* Generating any IR here is pointless, except for documentation
6557 purposes, as it will get optimised away later. */
6558 IRTemp ea = newTemp(Ity_I64);
6559 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8)));
6560 DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8);
6561 return True;
6562 }
6563
sewardj48453e32015-08-16 11:44:30 +00006564 /* ------------------ PRFM (register) ------------------ */
6565 /* 31 29 22 20 15 12 11 9 4
6566 11 1110001 01 Rm opt S 10 Rn Rt PRFM pfrop=Rt, [Xn|SP, R<m>{ext/sh}]
6567 */
6568 if (INSN(31,21) == BITS11(1,1,1,1,1,0,0,0,1,0,1)
6569 && INSN(11,10) == BITS2(1,0)) {
6570 HChar dis_buf[64];
6571 UInt tt = INSN(4,0);
6572 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
6573 if (ea != IRTemp_INVALID) {
6574 /* No actual code to generate. */
6575 DIP("prfm prfop=%u, %s\n", tt, dis_buf);
6576 return True;
6577 }
6578 }
6579
sewardjbbcf1882014-01-12 12:49:10 +00006580 vex_printf("ARM64 front end: load_store\n");
6581 return False;
6582# undef INSN
6583}
6584
6585
6586/*------------------------------------------------------------*/
6587/*--- Control flow and misc instructions ---*/
6588/*------------------------------------------------------------*/
6589
6590static
sewardj65902992014-05-03 21:20:56 +00006591Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
floriancacba8e2014-12-15 18:58:07 +00006592 const VexArchInfo* archinfo)
sewardjbbcf1882014-01-12 12:49:10 +00006593{
6594# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6595
6596 /* ---------------------- B cond ----------------------- */
6597 /* 31 24 4 3
6598 0101010 0 imm19 0 cond */
6599 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
6600 UInt cond = INSN(3,0);
6601 ULong uimm64 = INSN(23,5) << 2;
6602 Long simm64 = (Long)sx_to_64(uimm64, 21);
6603 vassert(dres->whatNext == Dis_Continue);
6604 vassert(dres->len == 4);
6605 vassert(dres->continueAt == 0);
6606 vassert(dres->jk_StopHere == Ijk_INVALID);
6607 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
6608 Ijk_Boring,
6609 IRConst_U64(guest_PC_curr_instr + simm64),
6610 OFFB_PC) );
6611 putPC(mkU64(guest_PC_curr_instr + 4));
6612 dres->whatNext = Dis_StopHere;
6613 dres->jk_StopHere = Ijk_Boring;
6614 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
6615 return True;
6616 }
6617
6618 /* -------------------- B{L} uncond -------------------- */
6619 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
6620 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
6621 100101 imm26 B (PC + sxTo64(imm26 << 2))
6622 */
6623 UInt bLink = INSN(31,31);
6624 ULong uimm64 = INSN(25,0) << 2;
6625 Long simm64 = (Long)sx_to_64(uimm64, 28);
6626 if (bLink) {
6627 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6628 }
6629 putPC(mkU64(guest_PC_curr_instr + simm64));
6630 dres->whatNext = Dis_StopHere;
6631 dres->jk_StopHere = Ijk_Call;
6632 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
6633 guest_PC_curr_instr + simm64);
6634 return True;
6635 }
6636
6637 /* --------------------- B{L} reg --------------------- */
6638 /* 31 24 22 20 15 9 4
6639 1101011 00 10 11111 000000 nn 00000 RET Rn
6640 1101011 00 01 11111 000000 nn 00000 CALL Rn
6641 1101011 00 00 11111 000000 nn 00000 JMP Rn
6642 */
6643 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
6644 && INSN(20,16) == BITS5(1,1,1,1,1)
6645 && INSN(15,10) == BITS6(0,0,0,0,0,0)
6646 && INSN(4,0) == BITS5(0,0,0,0,0)) {
6647 UInt branch_type = INSN(22,21);
6648 UInt nn = INSN(9,5);
6649 if (branch_type == BITS2(1,0) /* RET */) {
6650 putPC(getIReg64orZR(nn));
6651 dres->whatNext = Dis_StopHere;
6652 dres->jk_StopHere = Ijk_Ret;
6653 DIP("ret %s\n", nameIReg64orZR(nn));
6654 return True;
6655 }
6656 if (branch_type == BITS2(0,1) /* CALL */) {
sewardj702054e2014-05-07 11:09:28 +00006657 IRTemp dst = newTemp(Ity_I64);
6658 assign(dst, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00006659 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
sewardj702054e2014-05-07 11:09:28 +00006660 putPC(mkexpr(dst));
sewardjbbcf1882014-01-12 12:49:10 +00006661 dres->whatNext = Dis_StopHere;
6662 dres->jk_StopHere = Ijk_Call;
6663 DIP("blr %s\n", nameIReg64orZR(nn));
6664 return True;
6665 }
6666 if (branch_type == BITS2(0,0) /* JMP */) {
6667 putPC(getIReg64orZR(nn));
6668 dres->whatNext = Dis_StopHere;
6669 dres->jk_StopHere = Ijk_Boring;
6670 DIP("jmp %s\n", nameIReg64orZR(nn));
6671 return True;
6672 }
6673 }
6674
6675 /* -------------------- CB{N}Z -------------------- */
6676 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6677 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6678 */
6679 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
6680 Bool is64 = INSN(31,31) == 1;
6681 Bool bIfZ = INSN(24,24) == 0;
6682 ULong uimm64 = INSN(23,5) << 2;
6683 UInt rT = INSN(4,0);
6684 Long simm64 = (Long)sx_to_64(uimm64, 21);
6685 IRExpr* cond = NULL;
6686 if (is64) {
6687 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6688 getIReg64orZR(rT), mkU64(0));
6689 } else {
6690 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
6691 getIReg32orZR(rT), mkU32(0));
6692 }
6693 stmt( IRStmt_Exit(cond,
6694 Ijk_Boring,
6695 IRConst_U64(guest_PC_curr_instr + simm64),
6696 OFFB_PC) );
6697 putPC(mkU64(guest_PC_curr_instr + 4));
6698 dres->whatNext = Dis_StopHere;
6699 dres->jk_StopHere = Ijk_Boring;
6700 DIP("cb%sz %s, 0x%llx\n",
6701 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
6702 guest_PC_curr_instr + simm64);
6703 return True;
6704 }
6705
6706 /* -------------------- TB{N}Z -------------------- */
6707 /* 31 30 24 23 18 5 4
6708 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6709 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6710 */
6711 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
6712 UInt b5 = INSN(31,31);
6713 Bool bIfZ = INSN(24,24) == 0;
6714 UInt b40 = INSN(23,19);
6715 UInt imm14 = INSN(18,5);
6716 UInt tt = INSN(4,0);
6717 UInt bitNo = (b5 << 5) | b40;
6718 ULong uimm64 = imm14 << 2;
6719 Long simm64 = sx_to_64(uimm64, 16);
6720 IRExpr* cond
6721 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6722 binop(Iop_And64,
6723 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
6724 mkU64(1)),
6725 mkU64(0));
6726 stmt( IRStmt_Exit(cond,
6727 Ijk_Boring,
6728 IRConst_U64(guest_PC_curr_instr + simm64),
6729 OFFB_PC) );
6730 putPC(mkU64(guest_PC_curr_instr + 4));
6731 dres->whatNext = Dis_StopHere;
6732 dres->jk_StopHere = Ijk_Boring;
6733 DIP("tb%sz %s, #%u, 0x%llx\n",
6734 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
6735 guest_PC_curr_instr + simm64);
6736 return True;
6737 }
6738
6739 /* -------------------- SVC -------------------- */
6740 /* 11010100 000 imm16 000 01
6741 Don't bother with anything except the imm16==0 case.
6742 */
6743 if (INSN(31,0) == 0xD4000001) {
6744 putPC(mkU64(guest_PC_curr_instr + 4));
6745 dres->whatNext = Dis_StopHere;
6746 dres->jk_StopHere = Ijk_Sys_syscall;
6747 DIP("svc #0\n");
6748 return True;
6749 }
6750
6751 /* ------------------ M{SR,RS} ------------------ */
sewardj6eb5ef82014-07-14 20:39:23 +00006752 /* ---- Cases for TPIDR_EL0 ----
sewardjbbcf1882014-01-12 12:49:10 +00006753 0xD51BD0 010 Rt MSR tpidr_el0, rT
6754 0xD53BD0 010 Rt MRS rT, tpidr_el0
6755 */
6756 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
6757 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
6758 Bool toSys = INSN(21,21) == 0;
6759 UInt tt = INSN(4,0);
6760 if (toSys) {
6761 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
6762 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
6763 } else {
6764 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
6765 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
6766 }
6767 return True;
6768 }
sewardj6eb5ef82014-07-14 20:39:23 +00006769 /* ---- Cases for FPCR ----
sewardjbbcf1882014-01-12 12:49:10 +00006770 0xD51B44 000 Rt MSR fpcr, rT
6771 0xD53B44 000 Rt MSR rT, fpcr
6772 */
6773 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
6774 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
6775 Bool toSys = INSN(21,21) == 0;
6776 UInt tt = INSN(4,0);
6777 if (toSys) {
6778 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
6779 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
6780 } else {
6781 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
6782 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
6783 }
6784 return True;
6785 }
sewardj6eb5ef82014-07-14 20:39:23 +00006786 /* ---- Cases for FPSR ----
sewardj7d009132014-02-20 17:43:38 +00006787 0xD51B44 001 Rt MSR fpsr, rT
6788 0xD53B44 001 Rt MSR rT, fpsr
sewardja0645d52014-06-28 22:11:16 +00006789 The only part of this we model is FPSR.QC. All other bits
6790 are ignored when writing to it and RAZ when reading from it.
sewardjbbcf1882014-01-12 12:49:10 +00006791 */
6792 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
6793 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
6794 Bool toSys = INSN(21,21) == 0;
6795 UInt tt = INSN(4,0);
6796 if (toSys) {
sewardja0645d52014-06-28 22:11:16 +00006797 /* Just deal with FPSR.QC. Make up a V128 value which is
6798 zero if Xt[27] is zero and any other value if Xt[27] is
6799 nonzero. */
6800 IRTemp qc64 = newTemp(Ity_I64);
6801 assign(qc64, binop(Iop_And64,
6802 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
6803 mkU64(1)));
6804 IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
6805 stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
sewardjbbcf1882014-01-12 12:49:10 +00006806 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
6807 } else {
sewardja0645d52014-06-28 22:11:16 +00006808 /* Generate a value which is all zeroes except for bit 27,
6809 which must be zero if QCFLAG is all zeroes and one otherwise. */
sewardj8e91fd42014-07-11 12:05:47 +00006810 IRTemp qcV128 = newTempV128();
sewardja0645d52014-06-28 22:11:16 +00006811 assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
6812 IRTemp qc64 = newTemp(Ity_I64);
6813 assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
6814 unop(Iop_V128to64, mkexpr(qcV128))));
6815 IRExpr* res = binop(Iop_Shl64,
6816 unop(Iop_1Uto64,
6817 binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
6818 mkU8(27));
6819 putIReg64orZR(tt, res);
sewardjbbcf1882014-01-12 12:49:10 +00006820 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
6821 }
6822 return True;
6823 }
sewardj6eb5ef82014-07-14 20:39:23 +00006824 /* ---- Cases for NZCV ----
sewardjbbcf1882014-01-12 12:49:10 +00006825 D51B42 000 Rt MSR nzcv, rT
6826 D53B42 000 Rt MRS rT, nzcv
sewardja0645d52014-06-28 22:11:16 +00006827 The only parts of NZCV that actually exist are bits 31:28, which
6828 are the N Z C and V bits themselves. Hence the flags thunk provides
6829 all the state we need.
sewardjbbcf1882014-01-12 12:49:10 +00006830 */
6831 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
6832 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
6833 Bool toSys = INSN(21,21) == 0;
6834 UInt tt = INSN(4,0);
6835 if (toSys) {
6836 IRTemp t = newTemp(Ity_I64);
6837 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
6838 setFlags_COPY(t);
6839 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
6840 } else {
6841 IRTemp res = newTemp(Ity_I64);
6842 assign(res, mk_arm64g_calculate_flags_nzcv());
6843 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
6844 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
6845 }
6846 return True;
6847 }
sewardj6eb5ef82014-07-14 20:39:23 +00006848 /* ---- Cases for DCZID_EL0 ----
sewardjd512d102014-02-21 14:49:44 +00006849 Don't support arbitrary reads and writes to this register. Just
6850 return the value 16, which indicates that the DC ZVA instruction
6851 is not permitted, so we don't have to emulate it.
6852 D5 3B 00 111 Rt MRS rT, dczid_el0
6853 */
6854 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
6855 UInt tt = INSN(4,0);
6856 putIReg64orZR(tt, mkU64(1<<4));
6857 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
6858 return True;
6859 }
sewardj6eb5ef82014-07-14 20:39:23 +00006860 /* ---- Cases for CTR_EL0 ----
sewardj65902992014-05-03 21:20:56 +00006861 We just handle reads, and make up a value from the D and I line
6862 sizes in the VexArchInfo we are given, and patch in the following
6863 fields that the Foundation model gives ("natively"):
6864 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
6865 D5 3B 00 001 Rt MRS rT, dczid_el0
6866 */
6867 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
6868 UInt tt = INSN(4,0);
6869 /* Need to generate a value from dMinLine_lg2_szB and
6870 dMinLine_lg2_szB. The value in the register is in 32-bit
6871 units, so need to subtract 2 from the values in the
6872 VexArchInfo. We can assume that the values here are valid --
6873 disInstr_ARM64 checks them -- so there's no need to deal with
6874 out-of-range cases. */
6875 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
6876 && archinfo->arm64_dMinLine_lg2_szB <= 17
6877 && archinfo->arm64_iMinLine_lg2_szB >= 2
6878 && archinfo->arm64_iMinLine_lg2_szB <= 17);
6879 UInt val
6880 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
6881 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
6882 putIReg64orZR(tt, mkU64(val));
6883 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
6884 return True;
6885 }
sewardj6eb5ef82014-07-14 20:39:23 +00006886 /* ---- Cases for CNTVCT_EL0 ----
6887 This is a timestamp counter of some sort. Support reads of it only
6888 by passing through to the host.
6889 D5 3B E0 010 Rt MRS Xt, cntvct_el0
6890 */
6891 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
6892 UInt tt = INSN(4,0);
6893 IRTemp val = newTemp(Ity_I64);
6894 IRExpr** args = mkIRExprVec_0();
6895 IRDirty* d = unsafeIRDirty_1_N (
6896 val,
6897 0/*regparms*/,
6898 "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
6899 &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
6900 args
6901 );
6902 /* execute the dirty call, dumping the result in val. */
6903 stmt( IRStmt_Dirty(d) );
6904 putIReg64orZR(tt, mkexpr(val));
6905 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
6906 return True;
6907 }
sewardjbbcf1882014-01-12 12:49:10 +00006908
sewardj65902992014-05-03 21:20:56 +00006909 /* ------------------ IC_IVAU ------------------ */
6910 /* D5 0B 75 001 Rt ic ivau, rT
6911 */
6912 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
6913 /* We will always be provided with a valid iMinLine value. */
6914 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
6915 && archinfo->arm64_iMinLine_lg2_szB <= 17);
6916 /* Round the requested address, in rT, down to the start of the
6917 containing block. */
6918 UInt tt = INSN(4,0);
6919 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
6920 IRTemp addr = newTemp(Ity_I64);
6921 assign( addr, binop( Iop_And64,
6922 getIReg64orZR(tt),
6923 mkU64(~(lineszB - 1))) );
6924 /* Set the invalidation range, request exit-and-invalidate, with
6925 continuation at the next instruction. */
sewardj05f5e012014-05-04 10:52:11 +00006926 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
6927 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj65902992014-05-03 21:20:56 +00006928 /* be paranoid ... */
6929 stmt( IRStmt_MBE(Imbe_Fence) );
6930 putPC(mkU64( guest_PC_curr_instr + 4 ));
6931 dres->whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +00006932 dres->jk_StopHere = Ijk_InvalICache;
sewardj65902992014-05-03 21:20:56 +00006933 DIP("ic ivau, %s\n", nameIReg64orZR(tt));
6934 return True;
6935 }
6936
6937 /* ------------------ DC_CVAU ------------------ */
6938 /* D5 0B 7B 001 Rt dc cvau, rT
6939 */
6940 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
6941 /* Exactly the same scheme as for IC IVAU, except we observe the
sewardj05f5e012014-05-04 10:52:11 +00006942 dMinLine size, and request an Ijk_FlushDCache instead of
6943 Ijk_InvalICache. */
sewardj65902992014-05-03 21:20:56 +00006944 /* We will always be provided with a valid dMinLine value. */
6945 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
6946 && archinfo->arm64_dMinLine_lg2_szB <= 17);
6947 /* Round the requested address, in rT, down to the start of the
6948 containing block. */
6949 UInt tt = INSN(4,0);
6950 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
6951 IRTemp addr = newTemp(Ity_I64);
6952 assign( addr, binop( Iop_And64,
6953 getIReg64orZR(tt),
6954 mkU64(~(lineszB - 1))) );
6955 /* Set the flush range, request exit-and-flush, with
6956 continuation at the next instruction. */
sewardj05f5e012014-05-04 10:52:11 +00006957 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
6958 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj65902992014-05-03 21:20:56 +00006959 /* be paranoid ... */
6960 stmt( IRStmt_MBE(Imbe_Fence) );
6961 putPC(mkU64( guest_PC_curr_instr + 4 ));
6962 dres->whatNext = Dis_StopHere;
6963 dres->jk_StopHere = Ijk_FlushDCache;
6964 DIP("dc cvau, %s\n", nameIReg64orZR(tt));
6965 return True;
6966 }
6967
6968 /* ------------------ ISB, DMB, DSB ------------------ */
sewardj25842552014-10-31 10:25:19 +00006969 /* 31 21 11 7 6 4
6970 11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt
6971 11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt
6972 11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt
6973 */
6974 if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
6975 && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
6976 && INSN(7,7) == 1
6977 && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
6978 UInt opc = INSN(6,5);
6979 UInt CRm = INSN(11,8);
6980 vassert(opc <= 2 && CRm <= 15);
sewardjd512d102014-02-21 14:49:44 +00006981 stmt(IRStmt_MBE(Imbe_Fence));
sewardj25842552014-10-31 10:25:19 +00006982 const HChar* opNames[3]
6983 = { "dsb", "dmb", "isb" };
6984 const HChar* howNames[16]
6985 = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
6986 "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
6987 DIP("%s %s\n", opNames[opc], howNames[CRm]);
sewardj65902992014-05-03 21:20:56 +00006988 return True;
6989 }
sewardjbbcf1882014-01-12 12:49:10 +00006990
sewardjdc9259c2014-02-27 11:10:19 +00006991 /* -------------------- NOP -------------------- */
6992 if (INSN(31,0) == 0xD503201F) {
6993 DIP("nop\n");
6994 return True;
6995 }
6996
sewardj39b51682014-11-25 12:17:53 +00006997 /* -------------------- BRK -------------------- */
6998 /* 31 23 20 4
6999 1101 0100 001 imm16 00000 BRK #imm16
7000 */
7001 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
7002 && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
7003 UInt imm16 = INSN(20,5);
7004 /* Request SIGTRAP and then restart of this insn. */
7005 putPC(mkU64(guest_PC_curr_instr + 0));
7006 dres->whatNext = Dis_StopHere;
7007 dres->jk_StopHere = Ijk_SigTRAP;
7008 DIP("brk #%u\n", imm16);
7009 return True;
7010 }
7011
sewardje3115c12015-08-18 19:55:16 +00007012 /* ------------------- YIELD ------------------- */
7013 /* 31 23 15 7
7014 1101 0101 0000 0011 0010 0000 0011 1111
7015 */
7016 if (INSN(31,0) == 0xD503203F) {
7017 /* Request yield followed by continuation at the next insn. */
7018 putPC(mkU64(guest_PC_curr_instr + 4));
7019 dres->whatNext = Dis_StopHere;
7020 dres->jk_StopHere = Ijk_Yield;
7021 DIP("yield\n");
7022 return True;
7023 }
7024
Elliott Hughesa0664b92017-04-18 17:46:52 -07007025 /* ------------------- CLREX ------------------ */
7026 /* 31 23 15 11 7
7027 1101 0101 0000 0011 0011 m 0101 1111 CLREX CRm
7028 CRm is apparently ignored.
7029 */
7030 if ((INSN(31,0) & 0xFFFFF0FF) == 0xD503305F) {
7031 UInt mm = INSN(11,8);
7032 /* AFAICS, this simply cancels a (all?) reservations made by a
7033 (any?) preceding LDREX(es). Arrange to hand it through to
7034 the back end. */
7035 stmt( IRStmt_MBE(Imbe_CancelReservation) );
7036 DIP("clrex #%u\n", mm);
7037 return True;
7038 }
7039
sewardjbbcf1882014-01-12 12:49:10 +00007040 vex_printf("ARM64 front end: branch_etc\n");
7041 return False;
7042# undef INSN
7043}
7044
7045
7046/*------------------------------------------------------------*/
sewardj8e91fd42014-07-11 12:05:47 +00007047/*--- SIMD and FP instructions: helper functions ---*/
sewardjbbcf1882014-01-12 12:49:10 +00007048/*------------------------------------------------------------*/
7049
sewardjd96daf62014-06-15 08:17:35 +00007050/* Some constructors for interleave/deinterleave expressions. */
sewardje520bb32014-02-17 11:00:53 +00007051
sewardjd96daf62014-06-15 08:17:35 +00007052static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
7053 // returns a0 b0
7054 return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
7055}
sewardje520bb32014-02-17 11:00:53 +00007056
sewardjd96daf62014-06-15 08:17:35 +00007057static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
7058 // returns a1 b1
7059 return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
7060}
sewardje520bb32014-02-17 11:00:53 +00007061
sewardjd96daf62014-06-15 08:17:35 +00007062static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
7063 // returns a2 a0 b2 b0
7064 return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
7065}
7066
7067static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
7068 // returns a3 a1 b3 b1
7069 return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
7070}
7071
7072static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
7073 // returns a1 b1 a0 b0
7074 return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
7075}
7076
7077static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
7078 // returns a3 b3 a2 b2
7079 return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
7080}
7081
7082static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7083 // returns a6 a4 a2 a0 b6 b4 b2 b0
7084 return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
7085}
7086
7087static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7088 // returns a7 a5 a3 a1 b7 b5 b3 b1
7089 return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
7090}
7091
7092static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7093 // returns a3 b3 a2 b2 a1 b1 a0 b0
7094 return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
7095}
7096
7097static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
7098 // returns a7 b7 a6 b6 a5 b5 a4 b4
7099 return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
7100}
7101
7102static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
7103 IRTemp bFEDCBA9876543210 ) {
7104 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
7105 return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
7106 mkexpr(bFEDCBA9876543210));
7107}
7108
7109static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
7110 IRTemp bFEDCBA9876543210 ) {
7111 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
7112 return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
7113 mkexpr(bFEDCBA9876543210));
7114}
7115
7116static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
7117 IRTemp bFEDCBA9876543210 ) {
7118 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
7119 return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
7120 mkexpr(bFEDCBA9876543210));
7121}
7122
7123static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
7124 IRTemp bFEDCBA9876543210 ) {
7125 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
7126 return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
7127 mkexpr(bFEDCBA9876543210));
7128}
sewardjecde6972014-02-05 11:01:19 +00007129
sewardjbbcf1882014-01-12 12:49:10 +00007130/* Generate N copies of |bit| in the bottom of a ULong. */
7131static ULong Replicate ( ULong bit, Int N )
7132{
sewardj606c4ba2014-01-26 19:11:14 +00007133 vassert(bit <= 1 && N >= 1 && N < 64);
7134 if (bit == 0) {
7135 return 0;
7136 } else {
7137 /* Careful. This won't work for N == 64. */
7138 return (1ULL << N) - 1;
7139 }
sewardjbbcf1882014-01-12 12:49:10 +00007140}
7141
sewardjfab09142014-02-10 10:28:13 +00007142static ULong Replicate32x2 ( ULong bits32 )
7143{
7144 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
7145 return (bits32 << 32) | bits32;
7146}
7147
7148static ULong Replicate16x4 ( ULong bits16 )
7149{
7150 vassert(0 == (bits16 & ~0xFFFFULL));
7151 return Replicate32x2((bits16 << 16) | bits16);
7152}
7153
7154static ULong Replicate8x8 ( ULong bits8 )
7155{
7156 vassert(0 == (bits8 & ~0xFFULL));
7157 return Replicate16x4((bits8 << 8) | bits8);
7158}
7159
7160/* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
7161 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
7162 is 64. In the former case, the upper 32 bits of the returned value
7163 are guaranteed to be zero. */
sewardjbbcf1882014-01-12 12:49:10 +00007164static ULong VFPExpandImm ( ULong imm8, Int N )
7165{
sewardj606c4ba2014-01-26 19:11:14 +00007166 vassert(imm8 <= 0xFF);
7167 vassert(N == 32 || N == 64);
7168 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
7169 Int F = N - E - 1;
7170 ULong imm8_6 = (imm8 >> 6) & 1;
7171 /* sign: 1 bit */
7172 /* exp: E bits */
7173 /* frac: F bits */
7174 ULong sign = (imm8 >> 7) & 1;
7175 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
7176 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
7177 vassert(sign < (1ULL << 1));
7178 vassert(exp < (1ULL << E));
7179 vassert(frac < (1ULL << F));
7180 vassert(1 + E + F == N);
7181 ULong res = (sign << (E+F)) | (exp << F) | frac;
7182 return res;
sewardjbbcf1882014-01-12 12:49:10 +00007183}
7184
sewardjfab09142014-02-10 10:28:13 +00007185/* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
7186 This might fail, as indicated by the returned Bool. Page 2530 of
7187 the manual. */
7188static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
7189 UInt op, UInt cmode, UInt imm8 )
7190{
7191 vassert(op <= 1);
7192 vassert(cmode <= 15);
7193 vassert(imm8 <= 255);
7194
7195 *res = 0; /* will overwrite iff returning True */
7196
7197 ULong imm64 = 0;
7198 Bool testimm8 = False;
7199
7200 switch (cmode >> 1) {
7201 case 0:
7202 testimm8 = False; imm64 = Replicate32x2(imm8); break;
7203 case 1:
7204 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
7205 case 2:
7206 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
7207 case 3:
7208 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
7209 case 4:
7210 testimm8 = False; imm64 = Replicate16x4(imm8); break;
7211 case 5:
7212 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
7213 case 6:
7214 testimm8 = True;
7215 if ((cmode & 1) == 0)
7216 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
7217 else
7218 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
7219 break;
7220 case 7:
7221 testimm8 = False;
7222 if ((cmode & 1) == 0 && op == 0)
7223 imm64 = Replicate8x8(imm8);
7224 if ((cmode & 1) == 0 && op == 1) {
7225 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
7226 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
7227 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
7228 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
7229 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
7230 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
7231 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
7232 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
7233 }
7234 if ((cmode & 1) == 1 && op == 0) {
7235 ULong imm8_7 = (imm8 >> 7) & 1;
7236 ULong imm8_6 = (imm8 >> 6) & 1;
7237 ULong imm8_50 = imm8 & 63;
7238 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
7239 | ((imm8_6 ^ 1) << (5 + 6 + 19))
7240 | (Replicate(imm8_6, 5) << (6 + 19))
7241 | (imm8_50 << 19);
7242 imm64 = Replicate32x2(imm32);
7243 }
7244 if ((cmode & 1) == 1 && op == 1) {
7245 // imm64 = imm8<7>:NOT(imm8<6>)
7246 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
7247 ULong imm8_7 = (imm8 >> 7) & 1;
7248 ULong imm8_6 = (imm8 >> 6) & 1;
7249 ULong imm8_50 = imm8 & 63;
7250 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
7251 | (Replicate(imm8_6, 8) << 54)
7252 | (imm8_50 << 48);
7253 }
7254 break;
7255 default:
7256 vassert(0);
7257 }
7258
7259 if (testimm8 && imm8 == 0)
7260 return False;
7261
7262 *res = imm64;
7263 return True;
7264}
7265
sewardj606c4ba2014-01-26 19:11:14 +00007266/* Help a bit for decoding laneage for vector operations that can be
7267 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
7268 and SZ bits, typically for vector floating point. */
7269static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
7270 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
7271 /*OUT*/const HChar** arrSpec,
7272 Bool bitQ, Bool bitSZ )
7273{
7274 vassert(bitQ == True || bitQ == False);
7275 vassert(bitSZ == True || bitSZ == False);
7276 if (bitQ && bitSZ) { // 2x64
7277 if (tyI) *tyI = Ity_I64;
7278 if (tyF) *tyF = Ity_F64;
7279 if (nLanes) *nLanes = 2;
7280 if (zeroUpper) *zeroUpper = False;
7281 if (arrSpec) *arrSpec = "2d";
7282 return True;
7283 }
7284 if (bitQ && !bitSZ) { // 4x32
7285 if (tyI) *tyI = Ity_I32;
7286 if (tyF) *tyF = Ity_F32;
7287 if (nLanes) *nLanes = 4;
7288 if (zeroUpper) *zeroUpper = False;
7289 if (arrSpec) *arrSpec = "4s";
7290 return True;
7291 }
7292 if (!bitQ && !bitSZ) { // 2x32
7293 if (tyI) *tyI = Ity_I32;
7294 if (tyF) *tyF = Ity_F32;
7295 if (nLanes) *nLanes = 2;
7296 if (zeroUpper) *zeroUpper = True;
7297 if (arrSpec) *arrSpec = "2s";
7298 return True;
7299 }
7300 // Else impliedly 1x64, which isn't allowed.
7301 return False;
7302}
7303
sewardje520bb32014-02-17 11:00:53 +00007304/* Helper for decoding laneage for shift-style vector operations
7305 that involve an immediate shift amount. */
7306static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
7307 UInt immh, UInt immb )
7308{
7309 vassert(immh < (1<<4));
7310 vassert(immb < (1<<3));
7311 UInt immhb = (immh << 3) | immb;
7312 if (immh & 8) {
7313 if (shift) *shift = 128 - immhb;
7314 if (szBlg2) *szBlg2 = 3;
7315 return True;
7316 }
7317 if (immh & 4) {
7318 if (shift) *shift = 64 - immhb;
7319 if (szBlg2) *szBlg2 = 2;
7320 return True;
7321 }
7322 if (immh & 2) {
7323 if (shift) *shift = 32 - immhb;
7324 if (szBlg2) *szBlg2 = 1;
7325 return True;
7326 }
7327 if (immh & 1) {
7328 if (shift) *shift = 16 - immhb;
7329 if (szBlg2) *szBlg2 = 0;
7330 return True;
7331 }
7332 return False;
7333}
7334
sewardjecde6972014-02-05 11:01:19 +00007335/* Generate IR to fold all lanes of the V128 value in 'src' as
7336 characterised by the operator 'op', and return the result in the
7337 bottom bits of a V128, with all other bits set to zero. */
sewardjdf9d6d52014-06-27 10:43:22 +00007338static IRTemp math_FOLDV ( IRTemp src, IROp op )
sewardjecde6972014-02-05 11:01:19 +00007339{
7340 /* The basic idea is to use repeated applications of Iop_CatEven*
7341 and Iop_CatOdd* operators to 'src' so as to clone each lane into
7342 a complete vector. Then fold all those vectors with 'op' and
7343 zero out all but the least significant lane. */
7344 switch (op) {
7345 case Iop_Min8Sx16: case Iop_Min8Ux16:
sewardjb9aff1e2014-06-15 21:55:33 +00007346 case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
sewardjfab09142014-02-10 10:28:13 +00007347 /* NB: temp naming here is misleading -- the naming is for 8
7348 lanes of 16 bit, whereas what is being operated on is 16
7349 lanes of 8 bits. */
7350 IRTemp x76543210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00007351 IRTemp x76547654 = newTempV128();
7352 IRTemp x32103210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007353 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7354 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
sewardj8e91fd42014-07-11 12:05:47 +00007355 IRTemp x76767676 = newTempV128();
7356 IRTemp x54545454 = newTempV128();
7357 IRTemp x32323232 = newTempV128();
7358 IRTemp x10101010 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007359 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7360 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7361 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7362 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
sewardj8e91fd42014-07-11 12:05:47 +00007363 IRTemp x77777777 = newTempV128();
7364 IRTemp x66666666 = newTempV128();
7365 IRTemp x55555555 = newTempV128();
7366 IRTemp x44444444 = newTempV128();
7367 IRTemp x33333333 = newTempV128();
7368 IRTemp x22222222 = newTempV128();
7369 IRTemp x11111111 = newTempV128();
7370 IRTemp x00000000 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007371 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7372 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7373 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7374 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7375 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7376 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7377 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7378 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7379 /* Naming not misleading after here. */
sewardj8e91fd42014-07-11 12:05:47 +00007380 IRTemp xAllF = newTempV128();
7381 IRTemp xAllE = newTempV128();
7382 IRTemp xAllD = newTempV128();
7383 IRTemp xAllC = newTempV128();
7384 IRTemp xAllB = newTempV128();
7385 IRTemp xAllA = newTempV128();
7386 IRTemp xAll9 = newTempV128();
7387 IRTemp xAll8 = newTempV128();
7388 IRTemp xAll7 = newTempV128();
7389 IRTemp xAll6 = newTempV128();
7390 IRTemp xAll5 = newTempV128();
7391 IRTemp xAll4 = newTempV128();
7392 IRTemp xAll3 = newTempV128();
7393 IRTemp xAll2 = newTempV128();
7394 IRTemp xAll1 = newTempV128();
7395 IRTemp xAll0 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007396 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
7397 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
7398 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
7399 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
7400 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
7401 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
7402 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
7403 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
7404 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
7405 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
7406 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
7407 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
7408 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
7409 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
7410 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
7411 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
sewardj8e91fd42014-07-11 12:05:47 +00007412 IRTemp maxFE = newTempV128();
7413 IRTemp maxDC = newTempV128();
7414 IRTemp maxBA = newTempV128();
7415 IRTemp max98 = newTempV128();
7416 IRTemp max76 = newTempV128();
7417 IRTemp max54 = newTempV128();
7418 IRTemp max32 = newTempV128();
7419 IRTemp max10 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007420 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
7421 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
7422 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
7423 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
7424 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
7425 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
7426 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
7427 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
sewardj8e91fd42014-07-11 12:05:47 +00007428 IRTemp maxFEDC = newTempV128();
7429 IRTemp maxBA98 = newTempV128();
7430 IRTemp max7654 = newTempV128();
7431 IRTemp max3210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007432 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
7433 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
7434 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7435 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00007436 IRTemp maxFEDCBA98 = newTempV128();
7437 IRTemp max76543210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007438 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
7439 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
sewardj8e91fd42014-07-11 12:05:47 +00007440 IRTemp maxAllLanes = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007441 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
7442 mkexpr(max76543210)));
sewardj8e91fd42014-07-11 12:05:47 +00007443 IRTemp res = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007444 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
7445 return res;
sewardjecde6972014-02-05 11:01:19 +00007446 }
7447 case Iop_Min16Sx8: case Iop_Min16Ux8:
sewardjb9aff1e2014-06-15 21:55:33 +00007448 case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
sewardjecde6972014-02-05 11:01:19 +00007449 IRTemp x76543210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00007450 IRTemp x76547654 = newTempV128();
7451 IRTemp x32103210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007452 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7453 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
sewardj8e91fd42014-07-11 12:05:47 +00007454 IRTemp x76767676 = newTempV128();
7455 IRTemp x54545454 = newTempV128();
7456 IRTemp x32323232 = newTempV128();
7457 IRTemp x10101010 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007458 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7459 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7460 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7461 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
sewardj8e91fd42014-07-11 12:05:47 +00007462 IRTemp x77777777 = newTempV128();
7463 IRTemp x66666666 = newTempV128();
7464 IRTemp x55555555 = newTempV128();
7465 IRTemp x44444444 = newTempV128();
7466 IRTemp x33333333 = newTempV128();
7467 IRTemp x22222222 = newTempV128();
7468 IRTemp x11111111 = newTempV128();
7469 IRTemp x00000000 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007470 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7471 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7472 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7473 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7474 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7475 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7476 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7477 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
sewardj8e91fd42014-07-11 12:05:47 +00007478 IRTemp max76 = newTempV128();
7479 IRTemp max54 = newTempV128();
7480 IRTemp max32 = newTempV128();
7481 IRTemp max10 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007482 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
7483 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
7484 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
7485 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
sewardj8e91fd42014-07-11 12:05:47 +00007486 IRTemp max7654 = newTempV128();
7487 IRTemp max3210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007488 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7489 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00007490 IRTemp max76543210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007491 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
sewardj8e91fd42014-07-11 12:05:47 +00007492 IRTemp res = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007493 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
7494 return res;
7495 }
sewardj5cb53e72015-02-08 12:08:56 +00007496 case Iop_Max32Fx4: case Iop_Min32Fx4:
sewardjecde6972014-02-05 11:01:19 +00007497 case Iop_Min32Sx4: case Iop_Min32Ux4:
sewardjb9aff1e2014-06-15 21:55:33 +00007498 case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
sewardjecde6972014-02-05 11:01:19 +00007499 IRTemp x3210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00007500 IRTemp x3232 = newTempV128();
7501 IRTemp x1010 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007502 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
7503 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
sewardj8e91fd42014-07-11 12:05:47 +00007504 IRTemp x3333 = newTempV128();
7505 IRTemp x2222 = newTempV128();
7506 IRTemp x1111 = newTempV128();
7507 IRTemp x0000 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007508 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
7509 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
7510 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
7511 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
sewardj8e91fd42014-07-11 12:05:47 +00007512 IRTemp max32 = newTempV128();
7513 IRTemp max10 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007514 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
7515 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
sewardj8e91fd42014-07-11 12:05:47 +00007516 IRTemp max3210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007517 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00007518 IRTemp res = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007519 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
7520 return res;
7521 }
sewardja5a6b752014-06-30 07:33:56 +00007522 case Iop_Add64x2: {
7523 IRTemp x10 = src;
sewardj8e91fd42014-07-11 12:05:47 +00007524 IRTemp x00 = newTempV128();
7525 IRTemp x11 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00007526 assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
7527 assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
sewardj8e91fd42014-07-11 12:05:47 +00007528 IRTemp max10 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00007529 assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
sewardj8e91fd42014-07-11 12:05:47 +00007530 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00007531 assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
7532 return res;
7533 }
sewardjecde6972014-02-05 11:01:19 +00007534 default:
7535 vassert(0);
7536 }
7537}
7538
7539
sewardj92d0ae32014-04-03 13:48:54 +00007540/* Generate IR for TBL and TBX. This deals with the 128 bit case
7541 only. */
7542static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
7543 IRTemp oor_values )
7544{
7545 vassert(len >= 0 && len <= 3);
7546
7547 /* Generate some useful constants as concisely as possible. */
7548 IRTemp half15 = newTemp(Ity_I64);
7549 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
7550 IRTemp half16 = newTemp(Ity_I64);
7551 assign(half16, mkU64(0x1010101010101010ULL));
7552
7553 /* A zero vector */
sewardj8e91fd42014-07-11 12:05:47 +00007554 IRTemp allZero = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007555 assign(allZero, mkV128(0x0000));
7556 /* A vector containing 15 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00007557 IRTemp all15 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007558 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
7559 /* A vector containing 16 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00007560 IRTemp all16 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007561 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
7562 /* A vector containing 32 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00007563 IRTemp all32 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007564 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
7565 /* A vector containing 48 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00007566 IRTemp all48 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007567 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
7568 /* A vector containing 64 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00007569 IRTemp all64 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007570 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
7571
7572 /* Group the 16/32/48/64 vectors so as to be indexable. */
7573 IRTemp allXX[4] = { all16, all32, all48, all64 };
7574
7575 /* Compute the result for each table vector, with zeroes in places
7576 where the index values are out of range, and OR them into the
7577 running vector. */
sewardj8e91fd42014-07-11 12:05:47 +00007578 IRTemp running_result = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007579 assign(running_result, mkV128(0));
7580
7581 UInt tabent;
7582 for (tabent = 0; tabent <= len; tabent++) {
7583 vassert(tabent >= 0 && tabent < 4);
sewardj8e91fd42014-07-11 12:05:47 +00007584 IRTemp bias = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007585 assign(bias,
7586 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
sewardj8e91fd42014-07-11 12:05:47 +00007587 IRTemp biased_indices = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007588 assign(biased_indices,
7589 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
sewardj8e91fd42014-07-11 12:05:47 +00007590 IRTemp valid_mask = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007591 assign(valid_mask,
7592 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
sewardj8e91fd42014-07-11 12:05:47 +00007593 IRTemp safe_biased_indices = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007594 assign(safe_biased_indices,
7595 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
sewardj8e91fd42014-07-11 12:05:47 +00007596 IRTemp results_or_junk = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007597 assign(results_or_junk,
7598 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
7599 mkexpr(safe_biased_indices)));
sewardj8e91fd42014-07-11 12:05:47 +00007600 IRTemp results_or_zero = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007601 assign(results_or_zero,
7602 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
7603 /* And OR that into the running result. */
sewardj8e91fd42014-07-11 12:05:47 +00007604 IRTemp tmp = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007605 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
7606 mkexpr(running_result)));
7607 running_result = tmp;
7608 }
7609
7610 /* So now running_result holds the overall result where the indices
7611 are in range, and zero in out-of-range lanes. Now we need to
7612 compute an overall validity mask and use this to copy in the
7613 lanes in the oor_values for out of range indices. This is
7614 unnecessary for TBL but will get folded out by iropt, so we lean
7615 on that and generate the same code for TBL and TBX here. */
sewardj8e91fd42014-07-11 12:05:47 +00007616 IRTemp overall_valid_mask = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007617 assign(overall_valid_mask,
7618 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
sewardj8e91fd42014-07-11 12:05:47 +00007619 IRTemp result = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007620 assign(result,
7621 binop(Iop_OrV128,
7622 mkexpr(running_result),
7623 binop(Iop_AndV128,
7624 mkexpr(oor_values),
7625 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
7626 return result;
7627}
7628
7629
sewardj31b5a952014-06-26 07:41:14 +00007630/* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
7631 an op which takes two I64s and produces a V128. That is, a widening
7632 operator. Generate IR which applies |opI64x2toV128| to either the
7633 lower (if |is2| is False) or upper (if |is2| is True) halves of
7634 |argL| and |argR|, and return the value in a new IRTemp.
7635*/
7636static
7637IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
7638 IRExpr* argL, IRExpr* argR )
7639{
sewardj8e91fd42014-07-11 12:05:47 +00007640 IRTemp res = newTempV128();
sewardj31b5a952014-06-26 07:41:14 +00007641 IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
7642 assign(res, binop(opI64x2toV128, unop(slice, argL),
7643 unop(slice, argR)));
7644 return res;
7645}
7646
7647
sewardjdf9d6d52014-06-27 10:43:22 +00007648/* Generate signed/unsigned absolute difference vector IR. */
7649static
7650IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
7651{
sewardj6f312d02014-06-28 12:21:37 +00007652 vassert(size <= 3);
sewardj8e91fd42014-07-11 12:05:47 +00007653 IRTemp argL = newTempV128();
7654 IRTemp argR = newTempV128();
7655 IRTemp msk = newTempV128();
7656 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +00007657 assign(argL, argLE);
7658 assign(argR, argRE);
sewardj8e91fd42014-07-11 12:05:47 +00007659 assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
sewardjdf9d6d52014-06-27 10:43:22 +00007660 mkexpr(argL), mkexpr(argR)));
7661 assign(res,
7662 binop(Iop_OrV128,
7663 binop(Iop_AndV128,
sewardj8e91fd42014-07-11 12:05:47 +00007664 binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
sewardjdf9d6d52014-06-27 10:43:22 +00007665 mkexpr(msk)),
7666 binop(Iop_AndV128,
sewardj8e91fd42014-07-11 12:05:47 +00007667 binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
sewardjdf9d6d52014-06-27 10:43:22 +00007668 unop(Iop_NotV128, mkexpr(msk)))));
7669 return res;
7670}
7671
7672
sewardj6f312d02014-06-28 12:21:37 +00007673/* Generate IR that takes a V128 and sign- or zero-widens
7674 either the lower or upper set of lanes to twice-as-wide,
7675 resulting in a new V128 value. */
7676static
sewardja5a6b752014-06-30 07:33:56 +00007677IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
7678 UInt sizeNarrow, IRExpr* srcE )
sewardj6f312d02014-06-28 12:21:37 +00007679{
sewardj8e91fd42014-07-11 12:05:47 +00007680 IRTemp src = newTempV128();
7681 IRTemp res = newTempV128();
sewardj6f312d02014-06-28 12:21:37 +00007682 assign(src, srcE);
7683 switch (sizeNarrow) {
7684 case X10:
7685 assign(res,
7686 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
7687 binop(fromUpperHalf ? Iop_InterleaveHI32x4
7688 : Iop_InterleaveLO32x4,
7689 mkexpr(src),
7690 mkexpr(src)),
7691 mkU8(32)));
7692 break;
7693 case X01:
7694 assign(res,
7695 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
7696 binop(fromUpperHalf ? Iop_InterleaveHI16x8
7697 : Iop_InterleaveLO16x8,
7698 mkexpr(src),
7699 mkexpr(src)),
7700 mkU8(16)));
7701 break;
7702 case X00:
7703 assign(res,
7704 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
7705 binop(fromUpperHalf ? Iop_InterleaveHI8x16
7706 : Iop_InterleaveLO8x16,
7707 mkexpr(src),
7708 mkexpr(src)),
7709 mkU8(8)));
7710 break;
7711 default:
7712 vassert(0);
7713 }
7714 return res;
7715}
7716
7717
sewardja5a6b752014-06-30 07:33:56 +00007718/* Generate IR that takes a V128 and sign- or zero-widens
7719 either the even or odd lanes to twice-as-wide,
7720 resulting in a new V128 value. */
7721static
7722IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
7723 UInt sizeNarrow, IRExpr* srcE )
7724{
sewardj8e91fd42014-07-11 12:05:47 +00007725 IRTemp src = newTempV128();
7726 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00007727 IROp opSAR = mkVecSARN(sizeNarrow+1);
7728 IROp opSHR = mkVecSHRN(sizeNarrow+1);
7729 IROp opSHL = mkVecSHLN(sizeNarrow+1);
7730 IROp opSxR = zWiden ? opSHR : opSAR;
7731 UInt amt = 0;
7732 switch (sizeNarrow) {
7733 case X10: amt = 32; break;
7734 case X01: amt = 16; break;
7735 case X00: amt = 8; break;
7736 default: vassert(0);
7737 }
7738 assign(src, srcE);
7739 if (fromOdd) {
7740 assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
7741 } else {
7742 assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
7743 mkU8(amt)));
7744 }
7745 return res;
7746}
7747
7748
7749/* Generate IR that takes two V128s and narrows (takes lower half)
7750 of each lane, producing a single V128 value. */
7751static
7752IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
7753{
sewardj8e91fd42014-07-11 12:05:47 +00007754 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00007755 assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
7756 mkexpr(argHi), mkexpr(argLo)));
7757 return res;
7758}
7759
7760
sewardj487559e2014-07-10 14:22:45 +00007761/* Return a temp which holds the vector dup of the lane of width
7762 (1 << size) obtained from src[laneNo]. */
7763static
7764IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
7765{
7766 vassert(size <= 3);
7767 /* Normalise |laneNo| so it is of the form
7768 x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
7769 This puts the bits we want to inspect at constant offsets
7770 regardless of the value of |size|.
7771 */
7772 UInt ix = laneNo << size;
7773 vassert(ix <= 15);
7774 IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
7775 switch (size) {
7776 case 0: /* B */
7777 ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
7778 /* fallthrough */
7779 case 1: /* H */
7780 ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
7781 /* fallthrough */
7782 case 2: /* S */
7783 ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
7784 /* fallthrough */
7785 case 3: /* D */
7786 ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
7787 break;
7788 default:
7789 vassert(0);
7790 }
sewardj8e91fd42014-07-11 12:05:47 +00007791 IRTemp res = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00007792 assign(res, src);
7793 Int i;
7794 for (i = 3; i >= 0; i--) {
7795 if (ops[i] == Iop_INVALID)
7796 break;
sewardj8e91fd42014-07-11 12:05:47 +00007797 IRTemp tmp = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00007798 assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
7799 res = tmp;
7800 }
7801 return res;
7802}
7803
7804
7805/* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
7806 selector encoded as shown below. Return a new V128 holding the
7807 selected lane from |srcV| dup'd out to V128, and also return the
7808 lane number, log2 of the lane size in bytes, and width-character via
7809 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5
7810 is an invalid selector, in which case return
7811 IRTemp_INVALID, 0, 0 and '?' respectively.
7812
7813 imm5 = xxxx1 signifies .b[xxxx]
7814 = xxx10 .h[xxx]
7815 = xx100 .s[xx]
7816 = x1000 .d[x]
7817 otherwise invalid
7818*/
7819static
7820IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
7821 /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
7822 IRExpr* srcV, UInt imm5 )
7823{
7824 *laneNo = 0;
7825 *laneSzLg2 = 0;
7826 *laneCh = '?';
7827
7828 if (imm5 & 1) {
7829 *laneNo = (imm5 >> 1) & 15;
7830 *laneSzLg2 = 0;
7831 *laneCh = 'b';
7832 }
7833 else if (imm5 & 2) {
7834 *laneNo = (imm5 >> 2) & 7;
7835 *laneSzLg2 = 1;
7836 *laneCh = 'h';
7837 }
7838 else if (imm5 & 4) {
7839 *laneNo = (imm5 >> 3) & 3;
7840 *laneSzLg2 = 2;
7841 *laneCh = 's';
7842 }
7843 else if (imm5 & 8) {
7844 *laneNo = (imm5 >> 4) & 1;
7845 *laneSzLg2 = 3;
7846 *laneCh = 'd';
7847 }
7848 else {
7849 /* invalid */
7850 return IRTemp_INVALID;
7851 }
7852
7853 return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
7854}
7855
7856
7857/* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
7858static
7859IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
7860{
7861 IRType ty = Ity_INVALID;
7862 IRTemp rcS = IRTemp_INVALID;
7863 switch (size) {
7864 case X01:
7865 vassert(imm <= 0xFFFFULL);
7866 ty = Ity_I16;
7867 rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
7868 break;
7869 case X10:
7870 vassert(imm <= 0xFFFFFFFFULL);
7871 ty = Ity_I32;
7872 rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
7873 break;
7874 case X11:
7875 ty = Ity_I64;
7876 rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
7877 default:
7878 vassert(0);
7879 }
7880 IRTemp rcV = math_DUP_TO_V128(rcS, ty);
7881 return rcV;
7882}
7883
7884
sewardj25523c42014-06-15 19:36:29 +00007885/* Let |new64| be a V128 in which only the lower 64 bits are interesting,
7886 and the upper can contain any value -- it is ignored. If |is2| is False,
7887 generate IR to put |new64| in the lower half of vector reg |dd| and zero
7888 the upper half. If |is2| is True, generate IR to put |new64| in the upper
7889 half of vector reg |dd| and leave the lower half unchanged. This
7890 simulates the behaviour of the "foo/foo2" instructions in which the
7891 destination is half the width of sources, for example addhn/addhn2.
7892*/
7893static
7894void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
7895{
7896 if (is2) {
7897 /* Get the old contents of Vdd, zero the upper half, and replace
7898 it with 'x'. */
sewardj8e91fd42014-07-11 12:05:47 +00007899 IRTemp t_zero_oldLO = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00007900 assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
sewardj8e91fd42014-07-11 12:05:47 +00007901 IRTemp t_newHI_zero = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00007902 assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
7903 mkV128(0x0000)));
sewardj8e91fd42014-07-11 12:05:47 +00007904 IRTemp res = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00007905 assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
7906 mkexpr(t_newHI_zero)));
7907 putQReg128(dd, mkexpr(res));
7908 } else {
7909 /* This is simple. */
7910 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
7911 }
7912}
7913
7914
sewardj8e91fd42014-07-11 12:05:47 +00007915/* Compute vector SQABS at lane size |size| for |srcE|, returning
7916 the q result in |*qabs| and the normal result in |*nabs|. */
7917static
7918void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
7919 IRExpr* srcE, UInt size )
7920{
7921 IRTemp src, mask, maskn, nsub, qsub;
7922 src = mask = maskn = nsub = qsub = IRTemp_INVALID;
7923 newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
7924 assign(src, srcE);
7925 assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src)));
7926 assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
7927 assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
7928 assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
7929 assign(*nabs, binop(Iop_OrV128,
7930 binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
7931 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
7932 assign(*qabs, binop(Iop_OrV128,
7933 binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
7934 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
7935}
7936
7937
sewardj51d012a2014-07-21 09:19:50 +00007938/* Compute vector SQNEG at lane size |size| for |srcE|, returning
7939 the q result in |*qneg| and the normal result in |*nneg|. */
7940static
7941void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
7942 IRExpr* srcE, UInt size )
7943{
7944 IRTemp src = IRTemp_INVALID;
7945 newTempsV128_3(&src, nneg, qneg);
7946 assign(src, srcE);
7947 assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
7948 assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
7949}
7950
7951
sewardjecedd982014-08-11 14:02:47 +00007952/* Zero all except the least significant lane of |srcE|, where |size|
7953 indicates the lane size in the usual way. */
sewardj257e99f2014-08-03 12:45:19 +00007954static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
sewardj8e91fd42014-07-11 12:05:47 +00007955{
7956 vassert(size < 4);
7957 IRTemp t = newTempV128();
sewardj51d012a2014-07-21 09:19:50 +00007958 assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
sewardj8e91fd42014-07-11 12:05:47 +00007959 return t;
7960}
7961
7962
sewardj51d012a2014-07-21 09:19:50 +00007963/* Generate IR to compute vector widening MULL from either the lower
7964 (is2==False) or upper (is2==True) halves of vecN and vecM. The
7965 widening multiplies are unsigned when isU==True and signed when
7966 isU==False. |size| is the narrow lane size indication. Optionally,
7967 the product may be added to or subtracted from vecD, at the wide lane
7968 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas|
7969 is 'm' (only multiply) then the accumulate part does not happen, and
7970 |vecD| is expected to == IRTemp_INVALID.
7971
7972 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
7973 are allowed. The result is returned in a new IRTemp, which is
7974 returned in *res. */
7975static
7976void math_MULL_ACC ( /*OUT*/IRTemp* res,
7977 Bool is2, Bool isU, UInt size, HChar mas,
7978 IRTemp vecN, IRTemp vecM, IRTemp vecD )
7979{
7980 vassert(res && *res == IRTemp_INVALID);
7981 vassert(size <= 2);
7982 vassert(mas == 'm' || mas == 'a' || mas == 's');
7983 if (mas == 'm') vassert(vecD == IRTemp_INVALID);
7984 IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
7985 IROp accOp = (mas == 'a') ? mkVecADD(size+1)
7986 : (mas == 's' ? mkVecSUB(size+1)
7987 : Iop_INVALID);
7988 IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp,
7989 mkexpr(vecN), mkexpr(vecM));
7990 *res = newTempV128();
7991 assign(*res, mas == 'm' ? mkexpr(mul)
7992 : binop(accOp, mkexpr(vecD), mkexpr(mul)));
7993}
7994
7995
7996/* Same as math_MULL_ACC, except the multiply is signed widening,
7997 the multiplied value is then doubled, before being added to or
7998 subtracted from the accumulated value. And everything is
7999 saturated. In all cases, saturation residuals are returned
8000 via (sat1q, sat1n), and in the accumulate cases,
8001 via (sat2q, sat2n) too. All results are returned in new temporaries.
8002 In the no-accumulate case, *sat2q and *sat2n are never instantiated,
8003 so the caller can tell this has happened. */
8004static
8005void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
8006 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
8007 /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
8008 Bool is2, UInt size, HChar mas,
8009 IRTemp vecN, IRTemp vecM, IRTemp vecD )
8010{
8011 vassert(size <= 2);
8012 vassert(mas == 'm' || mas == 'a' || mas == 's');
8013 /* Compute
8014 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
8015 sat1n = vecN.D[is2] *s vecM.d[is2] * 2
8016 IOW take either the low or high halves of vecN and vecM, signed widen,
8017 multiply, double that, and signedly saturate. Also compute the same
8018 but without saturation.
8019 */
8020 vassert(sat2q && *sat2q == IRTemp_INVALID);
8021 vassert(sat2n && *sat2n == IRTemp_INVALID);
8022 newTempsV128_3(sat1q, sat1n, res);
8023 IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
8024 mkexpr(vecN), mkexpr(vecM));
8025 IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
8026 mkexpr(vecN), mkexpr(vecM));
8027 assign(*sat1q, mkexpr(tq));
8028 assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
8029
8030 /* If there is no accumulation, the final result is sat1q,
8031 and there's no assignment to sat2q or sat2n. */
8032 if (mas == 'm') {
8033 assign(*res, mkexpr(*sat1q));
8034 return;
8035 }
8036
8037 /* Compute
8038 sat2q = vecD +sq/-sq sat1q
8039 sat2n = vecD +/- sat1n
8040 result = sat2q
8041 */
8042 newTempsV128_2(sat2q, sat2n);
8043 assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
8044 mkexpr(vecD), mkexpr(*sat1q)));
8045 assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
8046 mkexpr(vecD), mkexpr(*sat1n)));
8047 assign(*res, mkexpr(*sat2q));
8048}
8049
8050
sewardj54ffa1d2014-07-22 09:27:49 +00008051/* Generate IR for widening signed vector multiplies. The operands
8052 have their lane width signedly widened, and they are then multiplied
8053 at the wider width, returning results in two new IRTemps. */
sewardja5a6b752014-06-30 07:33:56 +00008054static
sewardj54ffa1d2014-07-22 09:27:49 +00008055void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
8056 UInt sizeNarrow, IRTemp argL, IRTemp argR )
8057{
8058 vassert(sizeNarrow <= 2);
8059 newTempsV128_2(resHI, resLO);
8060 IRTemp argLhi = newTemp(Ity_I64);
8061 IRTemp argLlo = newTemp(Ity_I64);
8062 IRTemp argRhi = newTemp(Ity_I64);
8063 IRTemp argRlo = newTemp(Ity_I64);
8064 assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
8065 assign(argLlo, unop(Iop_V128to64, mkexpr(argL)));
8066 assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
8067 assign(argRlo, unop(Iop_V128to64, mkexpr(argR)));
8068 IROp opMulls = mkVecMULLS(sizeNarrow);
8069 assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
8070 assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
8071}
8072
8073
sewardj257e99f2014-08-03 12:45:19 +00008074/* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
8075 double that, possibly add a rounding constant (R variants), and take
8076 the high half. */
sewardj54ffa1d2014-07-22 09:27:49 +00008077static
8078void math_SQDMULH ( /*OUT*/IRTemp* res,
8079 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
8080 Bool isR, UInt size, IRTemp vN, IRTemp vM )
8081{
8082 vassert(size == X01 || size == X10); /* s or h only */
8083
8084 newTempsV128_3(res, sat1q, sat1n);
8085
8086 IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
8087 math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
8088
8089 IRTemp addWide = mkVecADD(size+1);
8090
8091 if (isR) {
8092 assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
8093
8094 Int rcShift = size == X01 ? 15 : 31;
8095 IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
8096 assign(*sat1n,
8097 binop(mkVecCATODDLANES(size),
8098 binop(addWide,
8099 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
8100 mkexpr(roundConst)),
8101 binop(addWide,
8102 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
8103 mkexpr(roundConst))));
8104 } else {
8105 assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
8106
8107 assign(*sat1n,
8108 binop(mkVecCATODDLANES(size),
8109 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
8110 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
8111 }
8112
8113 assign(*res, mkexpr(*sat1q));
8114}
8115
8116
sewardja97dddf2014-08-14 22:26:52 +00008117/* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
8118 a new temp in *res, and the Q difference pair in new temps in
8119 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
8120 three operations it is. */
8121static
8122void math_QSHL_IMM ( /*OUT*/IRTemp* res,
8123 /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
8124 IRTemp src, UInt size, UInt shift, const HChar* nm )
8125{
8126 vassert(size <= 3);
8127 UInt laneBits = 8 << size;
8128 vassert(shift < laneBits);
8129 newTempsV128_3(res, qDiff1, qDiff2);
8130 IRTemp z128 = newTempV128();
8131 assign(z128, mkV128(0x0000));
8132
8133 /* UQSHL */
8134 if (vex_streq(nm, "uqshl")) {
sewardj1dd3ec12014-08-15 09:11:08 +00008135 IROp qop = mkVecQSHLNSATUU(size);
sewardja97dddf2014-08-14 22:26:52 +00008136 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8137 if (shift == 0) {
8138 /* No shift means no saturation. */
8139 assign(*qDiff1, mkexpr(z128));
8140 assign(*qDiff2, mkexpr(z128));
8141 } else {
8142 /* Saturation has occurred if any of the shifted-out bits are
8143 nonzero. We get the shifted-out bits by right-shifting the
8144 original value. */
8145 UInt rshift = laneBits - shift;
8146 vassert(rshift >= 1 && rshift < laneBits);
8147 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8148 assign(*qDiff2, mkexpr(z128));
8149 }
8150 return;
8151 }
8152
8153 /* SQSHL */
8154 if (vex_streq(nm, "sqshl")) {
sewardj1dd3ec12014-08-15 09:11:08 +00008155 IROp qop = mkVecQSHLNSATSS(size);
sewardja97dddf2014-08-14 22:26:52 +00008156 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
8157 if (shift == 0) {
8158 /* No shift means no saturation. */
8159 assign(*qDiff1, mkexpr(z128));
8160 assign(*qDiff2, mkexpr(z128));
8161 } else {
8162 /* Saturation has occurred if any of the shifted-out bits are
8163 different from the top bit of the original value. */
8164 UInt rshift = laneBits - 1 - shift;
8165 vassert(rshift >= 0 && rshift < laneBits-1);
8166 /* qDiff1 is the shifted out bits, and the top bit of the original
8167 value, preceded by zeroes. */
8168 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8169 /* qDiff2 is the top bit of the original value, cloned the
8170 correct number of times. */
8171 assign(*qDiff2, binop(mkVecSHRN(size),
8172 binop(mkVecSARN(size), mkexpr(src),
8173 mkU8(laneBits-1)),
8174 mkU8(rshift)));
8175 /* This also succeeds in comparing the top bit of the original
8176 value to itself, which is a bit stupid, but not wrong. */
8177 }
8178 return;
8179 }
8180
8181 /* SQSHLU */
8182 if (vex_streq(nm, "sqshlu")) {
sewardj1dd3ec12014-08-15 09:11:08 +00008183 IROp qop = mkVecQSHLNSATSU(size);
sewardja97dddf2014-08-14 22:26:52 +00008184 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
sewardjacc29642014-08-15 05:35:35 +00008185 if (shift == 0) {
8186 /* If there's no shift, saturation depends on the top bit
8187 of the source. */
8188 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
8189 assign(*qDiff2, mkexpr(z128));
8190 } else {
8191 /* Saturation has occurred if any of the shifted-out bits are
8192 nonzero. We get the shifted-out bits by right-shifting the
8193 original value. */
8194 UInt rshift = laneBits - shift;
8195 vassert(rshift >= 1 && rshift < laneBits);
8196 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
8197 assign(*qDiff2, mkexpr(z128));
8198 }
sewardja97dddf2014-08-14 22:26:52 +00008199 return;
8200 }
8201
8202 vassert(0);
8203}
8204
8205
sewardj62ece662014-08-17 19:59:09 +00008206/* Generate IR to do SRHADD and URHADD. */
8207static
8208IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
8209{
8210 /* Generate this:
8211 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
8212 */
8213 vassert(size <= 3);
8214 IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
8215 IROp opADD = mkVecADD(size);
8216 /* The only tricky bit is to generate the correct vector 1 constant. */
8217 const ULong ones64[4]
8218 = { 0x0101010101010101ULL, 0x0001000100010001ULL,
8219 0x0000000100000001ULL, 0x0000000000000001ULL };
8220 IRTemp imm64 = newTemp(Ity_I64);
8221 assign(imm64, mkU64(ones64[size]));
8222 IRTemp vecOne = newTempV128();
8223 assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
8224 IRTemp scaOne = newTemp(Ity_I8);
8225 assign(scaOne, mkU8(1));
8226 IRTemp res = newTempV128();
8227 assign(res,
8228 binop(opADD,
8229 binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
8230 binop(opADD,
8231 binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
8232 binop(opSHR,
8233 binop(opADD,
8234 binop(opADD,
8235 binop(Iop_AndV128, mkexpr(aa),
8236 mkexpr(vecOne)),
8237 binop(Iop_AndV128, mkexpr(bb),
8238 mkexpr(vecOne))
8239 ),
8240 mkexpr(vecOne)
8241 ),
8242 mkexpr(scaOne)
8243 )
8244 )
8245 )
8246 );
8247 return res;
8248}
8249
8250
sewardj54ffa1d2014-07-22 09:27:49 +00008251/* QCFLAG tracks the SIMD sticky saturation status. Update the status
8252 thusly: if, after application of |opZHI| to both |qres| and |nres|,
8253 they have the same value, leave QCFLAG unchanged. Otherwise, set it
8254 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128
8255 operators, or Iop_INVALID, in which case |qres| and |nres| are used
8256 unmodified. The presence |opZHI| means this function can be used to
8257 generate QCFLAG update code for both scalar and vector SIMD operations.
8258*/
8259static
8260void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
sewardja5a6b752014-06-30 07:33:56 +00008261{
sewardj8e91fd42014-07-11 12:05:47 +00008262 IRTemp diff = newTempV128();
8263 IRTemp oldQCFLAG = newTempV128();
8264 IRTemp newQCFLAG = newTempV128();
sewardj54ffa1d2014-07-22 09:27:49 +00008265 if (opZHI == Iop_INVALID) {
8266 assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
8267 } else {
sewardj257e99f2014-08-03 12:45:19 +00008268 vassert(opZHI == Iop_ZeroHI64ofV128
8269 || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
sewardj54ffa1d2014-07-22 09:27:49 +00008270 assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
8271 }
sewardja5a6b752014-06-30 07:33:56 +00008272 assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
8273 assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
8274 stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
8275}
8276
8277
sewardj54ffa1d2014-07-22 09:27:49 +00008278/* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
8279 are used unmodified, hence suitable for QCFLAG updates for whole-vector
8280 operations. */
8281static
8282void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
8283{
8284 updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
8285}
8286
8287
sewardj76927e62014-11-17 11:21:21 +00008288/* Generate IR to rearrange two vector values in a way which is useful
8289 for doing S/D add-pair etc operations. There are 3 cases:
8290
8291 2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0]
8292
8293 4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0]
8294
8295 2s: [m2 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0]
8296
8297 The cases are distinguished as follows:
8298 isD == True, bitQ == 1 => 2d
8299 isD == False, bitQ == 1 => 4s
8300 isD == False, bitQ == 0 => 2s
8301*/
8302static
8303void math_REARRANGE_FOR_FLOATING_PAIRWISE (
8304 /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR,
8305 IRTemp vecM, IRTemp vecN, Bool isD, UInt bitQ
8306 )
8307{
8308 vassert(rearrL && *rearrL == IRTemp_INVALID);
8309 vassert(rearrR && *rearrR == IRTemp_INVALID);
8310 *rearrL = newTempV128();
8311 *rearrR = newTempV128();
8312 if (isD) {
8313 // 2d case
8314 vassert(bitQ == 1);
8315 assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN)));
8316 assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN)));
8317 }
8318 else if (!isD && bitQ == 1) {
8319 // 4s case
8320 assign(*rearrL, binop(Iop_CatOddLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8321 assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8322 } else {
8323 // 2s case
8324 vassert(!isD && bitQ == 0);
8325 IRTemp m1n1m0n0 = newTempV128();
8326 IRTemp m0n0m1n1 = newTempV128();
8327 assign(m1n1m0n0, binop(Iop_InterleaveLO32x4,
8328 mkexpr(vecM), mkexpr(vecN)));
8329 assign(m0n0m1n1, triop(Iop_SliceV128,
8330 mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8)));
8331 assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0)));
8332 assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1)));
8333 }
8334}
8335
8336
sewardj1aff76b2014-11-20 10:14:06 +00008337/* Returns 2.0 ^ (-n) for n in 1 .. 64 */
8338static Double two_to_the_minus ( Int n )
8339{
8340 if (n == 1) return 0.5;
8341 vassert(n >= 2 && n <= 64);
8342 Int half = n / 2;
8343 return two_to_the_minus(half) * two_to_the_minus(n - half);
8344}
8345
8346
sewardj2130b342015-04-06 14:49:05 +00008347/* Returns 2.0 ^ n for n in 1 .. 64 */
8348static Double two_to_the_plus ( Int n )
8349{
8350 if (n == 1) return 2.0;
8351 vassert(n >= 2 && n <= 64);
8352 Int half = n / 2;
8353 return two_to_the_plus(half) * two_to_the_plus(n - half);
8354}
8355
8356
sewardj8e91fd42014-07-11 12:05:47 +00008357/*------------------------------------------------------------*/
8358/*--- SIMD and FP instructions ---*/
8359/*------------------------------------------------------------*/
8360
sewardjdf1628c2014-06-10 22:52:05 +00008361static
8362Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
sewardjbbcf1882014-01-12 12:49:10 +00008363{
sewardjab33a7a2014-06-19 22:20:47 +00008364 /* 31 29 23 21 20 15 14 10 9 4
8365 0 q 101110 op2 0 m 0 imm4 0 n d
8366 Decode fields: op2
8367 */
sewardjbbcf1882014-01-12 12:49:10 +00008368# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjab33a7a2014-06-19 22:20:47 +00008369 if (INSN(31,31) != 0
8370 || INSN(29,24) != BITS6(1,0,1,1,1,0)
8371 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
8372 return False;
8373 }
8374 UInt bitQ = INSN(30,30);
8375 UInt op2 = INSN(23,22);
8376 UInt mm = INSN(20,16);
8377 UInt imm4 = INSN(14,11);
8378 UInt nn = INSN(9,5);
8379 UInt dd = INSN(4,0);
8380
8381 if (op2 == BITS2(0,0)) {
8382 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00008383 IRTemp sHi = newTempV128();
8384 IRTemp sLo = newTempV128();
8385 IRTemp res = newTempV128();
sewardjab33a7a2014-06-19 22:20:47 +00008386 assign(sHi, getQReg128(mm));
8387 assign(sLo, getQReg128(nn));
8388 if (bitQ == 1) {
8389 if (imm4 == 0) {
8390 assign(res, mkexpr(sLo));
8391 } else {
sewardj8def0492014-09-01 14:13:15 +00008392 vassert(imm4 >= 1 && imm4 <= 15);
8393 assign(res, triop(Iop_SliceV128,
8394 mkexpr(sHi), mkexpr(sLo), mkU8(imm4)));
sewardjab33a7a2014-06-19 22:20:47 +00008395 }
8396 putQReg128(dd, mkexpr(res));
8397 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
8398 } else {
8399 if (imm4 >= 8) return False;
8400 if (imm4 == 0) {
8401 assign(res, mkexpr(sLo));
8402 } else {
sewardj8def0492014-09-01 14:13:15 +00008403 vassert(imm4 >= 1 && imm4 <= 7);
8404 IRTemp hi64lo64 = newTempV128();
8405 assign(hi64lo64, binop(Iop_InterleaveLO64x2,
8406 mkexpr(sHi), mkexpr(sLo)));
8407 assign(res, triop(Iop_SliceV128,
8408 mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4)));
sewardjab33a7a2014-06-19 22:20:47 +00008409 }
8410 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8411 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
8412 }
8413 return True;
8414 }
8415
sewardjdf1628c2014-06-10 22:52:05 +00008416 return False;
8417# undef INSN
8418}
sewardjbbcf1882014-01-12 12:49:10 +00008419
sewardjbbcf1882014-01-12 12:49:10 +00008420
sewardjdf1628c2014-06-10 22:52:05 +00008421static
8422Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
8423{
8424 /* 31 29 23 21 20 15 14 12 11 9 4
8425 0 q 001110 op2 0 m 0 len op 00 n d
8426 Decode fields: op2,len,op
sewardjbbcf1882014-01-12 12:49:10 +00008427 */
sewardjdf1628c2014-06-10 22:52:05 +00008428# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8429 if (INSN(31,31) != 0
8430 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8431 || INSN(21,21) != 0
8432 || INSN(15,15) != 0
8433 || INSN(11,10) != BITS2(0,0)) {
8434 return False;
8435 }
8436 UInt bitQ = INSN(30,30);
8437 UInt op2 = INSN(23,22);
8438 UInt mm = INSN(20,16);
8439 UInt len = INSN(14,13);
8440 UInt bitOP = INSN(12,12);
8441 UInt nn = INSN(9,5);
8442 UInt dd = INSN(4,0);
8443
8444 if (op2 == X00) {
8445 /* -------- 00,xx,0 TBL, xx register table -------- */
8446 /* -------- 00,xx,1 TBX, xx register table -------- */
8447 /* 31 28 20 15 14 12 9 4
8448 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8449 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8450 where Ta = 16b(q=1) or 8b(q=0)
8451 */
sewardjdf1628c2014-06-10 22:52:05 +00008452 Bool isTBX = bitOP == 1;
8453 /* The out-of-range values to use. */
sewardj8e91fd42014-07-11 12:05:47 +00008454 IRTemp oor_values = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008455 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
8456 /* src value */
sewardj8e91fd42014-07-11 12:05:47 +00008457 IRTemp src = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008458 assign(src, getQReg128(mm));
8459 /* The table values */
8460 IRTemp tab[4];
8461 UInt i;
8462 for (i = 0; i <= len; i++) {
8463 vassert(i < 4);
sewardj8e91fd42014-07-11 12:05:47 +00008464 tab[i] = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008465 assign(tab[i], getQReg128((nn + i) % 32));
8466 }
8467 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
sewardjdf9d6d52014-06-27 10:43:22 +00008468 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8469 const HChar* Ta = bitQ ==1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +00008470 const HChar* nm = isTBX ? "tbx" : "tbl";
florianb1737742015-08-03 16:03:13 +00008471 DIP("%s %s.%s, {v%u.16b .. v%u.16b}, %s.%s\n",
sewardjdf1628c2014-06-10 22:52:05 +00008472 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
8473 return True;
8474 }
8475
8476# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8477 return False;
8478# undef INSN
8479}
8480
8481
8482static
8483Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
8484{
sewardjfc261d92014-08-24 20:36:14 +00008485 /* 31 29 23 21 20 15 14 11 9 4
8486 0 q 001110 size 0 m 0 opcode 10 n d
8487 Decode fields: opcode
8488 */
sewardjdf1628c2014-06-10 22:52:05 +00008489# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjfc261d92014-08-24 20:36:14 +00008490 if (INSN(31,31) != 0
8491 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8492 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
8493 return False;
8494 }
8495 UInt bitQ = INSN(30,30);
8496 UInt size = INSN(23,22);
8497 UInt mm = INSN(20,16);
8498 UInt opcode = INSN(14,12);
8499 UInt nn = INSN(9,5);
8500 UInt dd = INSN(4,0);
8501
8502 if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
8503 /* -------- 001 UZP1 std7_std7_std7 -------- */
8504 /* -------- 101 UZP2 std7_std7_std7 -------- */
8505 if (bitQ == 0 && size == X11) return False; // implied 1d case
8506 Bool isUZP1 = opcode == BITS3(0,0,1);
8507 IROp op = isUZP1 ? mkVecCATEVENLANES(size)
8508 : mkVecCATODDLANES(size);
8509 IRTemp preL = newTempV128();
8510 IRTemp preR = newTempV128();
8511 IRTemp res = newTempV128();
8512 if (bitQ == 0) {
8513 assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
8514 getQReg128(nn)));
8515 assign(preR, mkexpr(preL));
8516 } else {
8517 assign(preL, getQReg128(mm));
8518 assign(preR, getQReg128(nn));
8519 }
8520 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8521 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8522 const HChar* nm = isUZP1 ? "uzp1" : "uzp2";
8523 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8524 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8525 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8526 return True;
8527 }
8528
8529 if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
8530 /* -------- 010 TRN1 std7_std7_std7 -------- */
8531 /* -------- 110 TRN2 std7_std7_std7 -------- */
8532 if (bitQ == 0 && size == X11) return False; // implied 1d case
8533 Bool isTRN1 = opcode == BITS3(0,1,0);
8534 IROp op1 = isTRN1 ? mkVecCATEVENLANES(size)
8535 : mkVecCATODDLANES(size);
8536 IROp op2 = mkVecINTERLEAVEHI(size);
8537 IRTemp srcM = newTempV128();
8538 IRTemp srcN = newTempV128();
8539 IRTemp res = newTempV128();
8540 assign(srcM, getQReg128(mm));
8541 assign(srcN, getQReg128(nn));
8542 assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
8543 binop(op1, mkexpr(srcN), mkexpr(srcN))));
8544 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8545 const HChar* nm = isTRN1 ? "trn1" : "trn2";
8546 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8547 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8548 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8549 return True;
8550 }
8551
8552 if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
8553 /* -------- 011 ZIP1 std7_std7_std7 -------- */
8554 /* -------- 111 ZIP2 std7_std7_std7 -------- */
8555 if (bitQ == 0 && size == X11) return False; // implied 1d case
8556 Bool isZIP1 = opcode == BITS3(0,1,1);
8557 IROp op = isZIP1 ? mkVecINTERLEAVELO(size)
8558 : mkVecINTERLEAVEHI(size);
8559 IRTemp preL = newTempV128();
8560 IRTemp preR = newTempV128();
8561 IRTemp res = newTempV128();
8562 if (bitQ == 0 && !isZIP1) {
sewardj8def0492014-09-01 14:13:15 +00008563 IRTemp z128 = newTempV128();
8564 assign(z128, mkV128(0x0000));
8565 // preL = Vm shifted left 32 bits
8566 // preR = Vn shifted left 32 bits
8567 assign(preL, triop(Iop_SliceV128,
8568 getQReg128(mm), mkexpr(z128), mkU8(12)));
8569 assign(preR, triop(Iop_SliceV128,
8570 getQReg128(nn), mkexpr(z128), mkU8(12)));
8571
sewardjfc261d92014-08-24 20:36:14 +00008572 } else {
8573 assign(preL, getQReg128(mm));
8574 assign(preR, getQReg128(nn));
8575 }
8576 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8577 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8578 const HChar* nm = isZIP1 ? "zip1" : "zip2";
8579 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8580 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8581 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8582 return True;
8583 }
8584
sewardjdf1628c2014-06-10 22:52:05 +00008585 return False;
8586# undef INSN
8587}
8588
8589
8590static
8591Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
8592{
8593 /* 31 28 23 21 16 11 9 4
8594 0 q u 01110 size 11000 opcode 10 n d
8595 Decode fields: u,size,opcode
8596 */
8597# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8598 if (INSN(31,31) != 0
8599 || INSN(28,24) != BITS5(0,1,1,1,0)
8600 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
8601 return False;
8602 }
8603 UInt bitQ = INSN(30,30);
8604 UInt bitU = INSN(29,29);
8605 UInt size = INSN(23,22);
8606 UInt opcode = INSN(16,12);
8607 UInt nn = INSN(9,5);
8608 UInt dd = INSN(4,0);
8609
sewardja5a6b752014-06-30 07:33:56 +00008610 if (opcode == BITS5(0,0,0,1,1)) {
8611 /* -------- 0,xx,00011 SADDLV -------- */
8612 /* -------- 1,xx,00011 UADDLV -------- */
8613 /* size is the narrow size */
8614 if (size == X11 || (size == X10 && bitQ == 0)) return False;
8615 Bool isU = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +00008616 IRTemp src = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00008617 assign(src, getQReg128(nn));
8618 /* The basic plan is to widen the lower half, and if Q = 1,
8619 the upper half too. Add them together (if Q = 1), and in
8620 either case fold with add at twice the lane width.
8621 */
8622 IRExpr* widened
8623 = mkexpr(math_WIDEN_LO_OR_HI_LANES(
8624 isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
8625 if (bitQ == 1) {
8626 widened
8627 = binop(mkVecADD(size+1),
8628 widened,
8629 mkexpr(math_WIDEN_LO_OR_HI_LANES(
8630 isU, True/*fromUpperHalf*/, size, mkexpr(src)))
8631 );
8632 }
8633 /* Now fold. */
sewardj8e91fd42014-07-11 12:05:47 +00008634 IRTemp tWi = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00008635 assign(tWi, widened);
8636 IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
8637 putQReg128(dd, mkexpr(res));
8638 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8639 const HChar ch = "bhsd"[size];
8640 DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
8641 nameQReg128(dd), ch, nameQReg128(nn), arr);
8642 return True;
8643 }
8644
sewardjb9aff1e2014-06-15 21:55:33 +00008645 UInt ix = 0;
8646 /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
8647 else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
8648 else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
8649 /**/
8650 if (ix != 0) {
8651 /* -------- 0,xx,01010: SMAXV -------- (1) */
8652 /* -------- 1,xx,01010: UMAXV -------- (2) */
8653 /* -------- 0,xx,11010: SMINV -------- (3) */
8654 /* -------- 1,xx,11010: UMINV -------- (4) */
8655 /* -------- 0,xx,11011: ADDV -------- (5) */
8656 vassert(ix >= 1 && ix <= 5);
sewardjdf1628c2014-06-10 22:52:05 +00008657 if (size == X11) return False; // 1d,2d cases not allowed
8658 if (size == X10 && bitQ == 0) return False; // 2s case not allowed
sewardjdf1628c2014-06-10 22:52:05 +00008659 const IROp opMAXS[3]
8660 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
8661 const IROp opMAXU[3]
8662 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
sewardjb9aff1e2014-06-15 21:55:33 +00008663 const IROp opMINS[3]
8664 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
8665 const IROp opMINU[3]
8666 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
8667 const IROp opADD[3]
8668 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 };
sewardjdf1628c2014-06-10 22:52:05 +00008669 vassert(size < 3);
sewardjb9aff1e2014-06-15 21:55:33 +00008670 IROp op = Iop_INVALID;
8671 const HChar* nm = NULL;
8672 switch (ix) {
8673 case 1: op = opMAXS[size]; nm = "smaxv"; break;
8674 case 2: op = opMAXU[size]; nm = "umaxv"; break;
8675 case 3: op = opMINS[size]; nm = "sminv"; break;
8676 case 4: op = opMINU[size]; nm = "uminv"; break;
8677 case 5: op = opADD[size]; nm = "addv"; break;
8678 default: vassert(0);
8679 }
8680 vassert(op != Iop_INVALID && nm != NULL);
sewardj8e91fd42014-07-11 12:05:47 +00008681 IRTemp tN1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008682 assign(tN1, getQReg128(nn));
8683 /* If Q == 0, we're just folding lanes in the lower half of
8684 the value. In which case, copy the lower half of the
8685 source into the upper half, so we can then treat it the
sewardjb9aff1e2014-06-15 21:55:33 +00008686 same as the full width case. Except for the addition case,
8687 in which we have to zero out the upper half. */
sewardj8e91fd42014-07-11 12:05:47 +00008688 IRTemp tN2 = newTempV128();
sewardjb9aff1e2014-06-15 21:55:33 +00008689 assign(tN2, bitQ == 0
8690 ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
8691 : mk_CatEvenLanes64x2(tN1,tN1))
8692 : mkexpr(tN1));
sewardjdf9d6d52014-06-27 10:43:22 +00008693 IRTemp res = math_FOLDV(tN2, op);
sewardjdf1628c2014-06-10 22:52:05 +00008694 if (res == IRTemp_INVALID)
sewardj5cb53e72015-02-08 12:08:56 +00008695 return False; /* means math_FOLDV
sewardjdf1628c2014-06-10 22:52:05 +00008696 doesn't handle this case yet */
8697 putQReg128(dd, mkexpr(res));
sewardjdf1628c2014-06-10 22:52:05 +00008698 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
8699 IRType laneTy = tys[size];
8700 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8701 DIP("%s %s, %s.%s\n", nm,
8702 nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
8703 return True;
8704 }
8705
sewardj5cb53e72015-02-08 12:08:56 +00008706 if ((size == X00 || size == X10)
8707 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
8708 /* -------- 0,00,01100: FMAXMNV s_4s -------- */
8709 /* -------- 0,10,01100: FMINMNV s_4s -------- */
8710 /* -------- 1,00,01111: FMAXV s_4s -------- */
8711 /* -------- 1,10,01111: FMINV s_4s -------- */
8712 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
8713 if (bitQ == 0) return False; // Only 4s is allowed
8714 Bool isMIN = (size & 2) == 2;
8715 Bool isNM = opcode == BITS5(0,1,1,0,0);
8716 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2);
8717 IRTemp src = newTempV128();
8718 assign(src, getQReg128(nn));
8719 IRTemp res = math_FOLDV(src, opMXX);
8720 putQReg128(dd, mkexpr(res));
8721 DIP("%s%sv s%u, %u.4s\n",
8722 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn);
8723 return True;
8724 }
8725
sewardjdf1628c2014-06-10 22:52:05 +00008726# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8727 return False;
8728# undef INSN
8729}
8730
8731
8732static
8733Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
8734{
8735 /* 31 28 20 15 14 10 9 4
8736 0 q op 01110000 imm5 0 imm4 1 n d
8737 Decode fields: q,op,imm4
8738 */
8739# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8740 if (INSN(31,31) != 0
8741 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
8742 || INSN(15,15) != 0 || INSN(10,10) != 1) {
8743 return False;
8744 }
8745 UInt bitQ = INSN(30,30);
8746 UInt bitOP = INSN(29,29);
8747 UInt imm5 = INSN(20,16);
8748 UInt imm4 = INSN(14,11);
8749 UInt nn = INSN(9,5);
8750 UInt dd = INSN(4,0);
8751
8752 /* -------- x,0,0000: DUP (element, vector) -------- */
8753 /* 31 28 20 15 9 4
8754 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
8755 */
8756 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
sewardj487559e2014-07-10 14:22:45 +00008757 UInt laneNo = 0;
8758 UInt laneSzLg2 = 0;
8759 HChar laneCh = '?';
8760 IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
8761 getQReg128(nn), imm5);
8762 if (res == IRTemp_INVALID)
8763 return False;
8764 if (bitQ == 0 && laneSzLg2 == X11)
8765 return False; /* .1d case */
8766 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8767 const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
8768 DIP("dup %s.%s, %s.%c[%u]\n",
8769 nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
8770 return True;
sewardjdf1628c2014-06-10 22:52:05 +00008771 }
8772
8773 /* -------- x,0,0001: DUP (general, vector) -------- */
8774 /* 31 28 20 15 9 4
8775 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
8776 Q=0 writes 64, Q=1 writes 128
8777 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
8778 xxx10 4H(q=0) or 8H(q=1), R=W
8779 xx100 2S(q=0) or 4S(q=1), R=W
8780 x1000 Invalid(q=0) or 2D(q=1), R=X
8781 x0000 Invalid(q=0) or Invalid(q=1)
8782 Require op=0, imm4=0001
8783 */
8784 if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
8785 Bool isQ = bitQ == 1;
8786 IRTemp w0 = newTemp(Ity_I64);
8787 const HChar* arT = "??";
8788 IRType laneTy = Ity_INVALID;
8789 if (imm5 & 1) {
8790 arT = isQ ? "16b" : "8b";
8791 laneTy = Ity_I8;
8792 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
8793 }
8794 else if (imm5 & 2) {
8795 arT = isQ ? "8h" : "4h";
8796 laneTy = Ity_I16;
8797 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
8798 }
8799 else if (imm5 & 4) {
8800 arT = isQ ? "4s" : "2s";
8801 laneTy = Ity_I32;
8802 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
8803 }
8804 else if ((imm5 & 8) && isQ) {
8805 arT = "2d";
8806 laneTy = Ity_I64;
8807 assign(w0, getIReg64orZR(nn));
8808 }
8809 else {
8810 /* invalid; leave laneTy unchanged. */
8811 }
8812 /* */
8813 if (laneTy != Ity_INVALID) {
8814 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
8815 putQReg128(dd, binop(Iop_64HLtoV128,
8816 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
8817 DIP("dup %s.%s, %s\n",
8818 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
8819 return True;
8820 }
sewardj787a67f2014-06-23 09:09:41 +00008821 /* invalid */
8822 return False;
sewardjdf1628c2014-06-10 22:52:05 +00008823 }
8824
8825 /* -------- 1,0,0011: INS (general) -------- */
8826 /* 31 28 20 15 9 4
8827 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
8828 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
8829 xxx10 -> H, xxx
8830 xx100 -> S, xx
8831 x1000 -> D, x
8832 */
8833 if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
8834 HChar ts = '?';
8835 UInt laneNo = 16;
8836 IRExpr* src = NULL;
8837 if (imm5 & 1) {
8838 src = unop(Iop_64to8, getIReg64orZR(nn));
8839 laneNo = (imm5 >> 1) & 15;
8840 ts = 'b';
8841 }
8842 else if (imm5 & 2) {
8843 src = unop(Iop_64to16, getIReg64orZR(nn));
8844 laneNo = (imm5 >> 2) & 7;
8845 ts = 'h';
8846 }
8847 else if (imm5 & 4) {
8848 src = unop(Iop_64to32, getIReg64orZR(nn));
8849 laneNo = (imm5 >> 3) & 3;
8850 ts = 's';
8851 }
8852 else if (imm5 & 8) {
8853 src = getIReg64orZR(nn);
8854 laneNo = (imm5 >> 4) & 1;
8855 ts = 'd';
8856 }
8857 /* */
8858 if (src) {
8859 vassert(laneNo < 16);
8860 putQRegLane(dd, laneNo, src);
8861 DIP("ins %s.%c[%u], %s\n",
8862 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
8863 return True;
8864 }
sewardj787a67f2014-06-23 09:09:41 +00008865 /* invalid */
8866 return False;
sewardjdf1628c2014-06-10 22:52:05 +00008867 }
8868
8869 /* -------- x,0,0101: SMOV -------- */
8870 /* -------- x,0,0111: UMOV -------- */
8871 /* 31 28 20 15 9 4
8872 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
8873 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
8874 dest is Xd when q==1, Wd when q==0
8875 UMOV:
8876 Ts,index,ops = case q:imm5 of
8877 0:xxxx1 -> B, xxxx, 8Uto64
8878 1:xxxx1 -> invalid
8879 0:xxx10 -> H, xxx, 16Uto64
8880 1:xxx10 -> invalid
8881 0:xx100 -> S, xx, 32Uto64
8882 1:xx100 -> invalid
8883 1:x1000 -> D, x, copy64
8884 other -> invalid
8885 SMOV:
8886 Ts,index,ops = case q:imm5 of
8887 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
8888 1:xxxx1 -> B, xxxx, 8Sto64
8889 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
8890 1:xxx10 -> H, xxx, 16Sto64
8891 0:xx100 -> invalid
8892 1:xx100 -> S, xx, 32Sto64
8893 1:x1000 -> invalid
8894 other -> invalid
8895 */
8896 if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
8897 Bool isU = (imm4 & 2) == 2;
8898 const HChar* arTs = "??";
8899 UInt laneNo = 16; /* invalid */
8900 // Setting 'res' to non-NULL determines valid/invalid
8901 IRExpr* res = NULL;
8902 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
8903 laneNo = (imm5 >> 1) & 15;
8904 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
8905 res = isU ? unop(Iop_8Uto64, lane)
8906 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
8907 arTs = "b";
8908 }
8909 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
8910 laneNo = (imm5 >> 1) & 15;
8911 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
8912 res = isU ? NULL
8913 : unop(Iop_8Sto64, lane);
8914 arTs = "b";
8915 }
8916 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
8917 laneNo = (imm5 >> 2) & 7;
8918 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
8919 res = isU ? unop(Iop_16Uto64, lane)
8920 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
8921 arTs = "h";
8922 }
8923 else if (bitQ && (imm5 & 2)) { // 1:xxx10
8924 laneNo = (imm5 >> 2) & 7;
8925 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
8926 res = isU ? NULL
8927 : unop(Iop_16Sto64, lane);
8928 arTs = "h";
8929 }
8930 else if (!bitQ && (imm5 & 4)) { // 0:xx100
8931 laneNo = (imm5 >> 3) & 3;
8932 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
8933 res = isU ? unop(Iop_32Uto64, lane)
8934 : NULL;
8935 arTs = "s";
8936 }
8937 else if (bitQ && (imm5 & 4)) { // 1:xxx10
8938 laneNo = (imm5 >> 3) & 3;
8939 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
8940 res = isU ? NULL
8941 : unop(Iop_32Sto64, lane);
8942 arTs = "s";
8943 }
8944 else if (bitQ && (imm5 & 8)) { // 1:x1000
8945 laneNo = (imm5 >> 4) & 1;
8946 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
8947 res = isU ? lane
8948 : NULL;
8949 arTs = "d";
8950 }
8951 /* */
8952 if (res) {
8953 vassert(laneNo < 16);
8954 putIReg64orZR(dd, res);
8955 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
8956 nameIRegOrZR(bitQ == 1, dd),
8957 nameQReg128(nn), arTs, laneNo);
8958 return True;
8959 }
sewardj787a67f2014-06-23 09:09:41 +00008960 /* invalid */
8961 return False;
8962 }
8963
8964 /* -------- 1,1,xxxx: INS (element) -------- */
8965 /* 31 28 20 14 9 4
8966 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
8967 where Ts,ix1,ix2
8968 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
8969 xxx10 -> H, xxx, imm4[3:1]
8970 xx100 -> S, xx, imm4[3:2]
8971 x1000 -> D, x, imm4[3:3]
8972 */
8973 if (bitQ == 1 && bitOP == 1) {
8974 HChar ts = '?';
8975 IRType ity = Ity_INVALID;
8976 UInt ix1 = 16;
8977 UInt ix2 = 16;
8978 if (imm5 & 1) {
8979 ts = 'b';
8980 ity = Ity_I8;
8981 ix1 = (imm5 >> 1) & 15;
8982 ix2 = (imm4 >> 0) & 15;
8983 }
8984 else if (imm5 & 2) {
8985 ts = 'h';
8986 ity = Ity_I16;
8987 ix1 = (imm5 >> 2) & 7;
8988 ix2 = (imm4 >> 1) & 7;
8989 }
8990 else if (imm5 & 4) {
8991 ts = 's';
8992 ity = Ity_I32;
8993 ix1 = (imm5 >> 3) & 3;
8994 ix2 = (imm4 >> 2) & 3;
8995 }
8996 else if (imm5 & 8) {
8997 ts = 'd';
8998 ity = Ity_I64;
8999 ix1 = (imm5 >> 4) & 1;
9000 ix2 = (imm4 >> 3) & 1;
9001 }
9002 /* */
9003 if (ity != Ity_INVALID) {
9004 vassert(ix1 < 16);
9005 vassert(ix2 < 16);
9006 putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
9007 DIP("ins %s.%c[%u], %s.%c[%u]\n",
9008 nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
9009 return True;
9010 }
9011 /* invalid */
9012 return False;
sewardjdf1628c2014-06-10 22:52:05 +00009013 }
9014
9015 return False;
9016# undef INSN
9017}
9018
9019
9020static
9021Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
9022{
9023 /* 31 28 18 15 11 9 4
9024 0q op 01111 00000 abc cmode 01 defgh d
sewardj2b6fd5e2014-06-19 14:21:37 +00009025 Decode fields: q,op,cmode
9026 Bit 11 is really "o2", but it is always zero.
sewardjdf1628c2014-06-10 22:52:05 +00009027 */
9028# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9029 if (INSN(31,31) != 0
9030 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
9031 || INSN(11,10) != BITS2(0,1)) {
9032 return False;
9033 }
9034 UInt bitQ = INSN(30,30);
9035 UInt bitOP = INSN(29,29);
9036 UInt cmode = INSN(15,12);
9037 UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
9038 UInt dd = INSN(4,0);
9039
sewardjdf1628c2014-06-10 22:52:05 +00009040 ULong imm64lo = 0;
9041 UInt op_cmode = (bitOP << 4) | cmode;
9042 Bool ok = False;
sewardj2b6fd5e2014-06-19 14:21:37 +00009043 Bool isORR = False;
9044 Bool isBIC = False;
sewardj787a67f2014-06-23 09:09:41 +00009045 Bool isMOV = False;
9046 Bool isMVN = False;
9047 Bool isFMOV = False;
sewardjdf1628c2014-06-10 22:52:05 +00009048 switch (op_cmode) {
sewardj2b6fd5e2014-06-19 14:21:37 +00009049 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
sewardj2b6fd5e2014-06-19 14:21:37 +00009050 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
sewardj787a67f2014-06-23 09:09:41 +00009051 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
9052 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
9053 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
9054 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
9055 ok = True; isMOV = True; break;
sewardj2b6fd5e2014-06-19 14:21:37 +00009056
9057 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
9058 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
9059 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
9060 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
9061 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
9062 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
9063 ok = True; isORR = True; break;
9064
sewardj787a67f2014-06-23 09:09:41 +00009065 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
9066 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
9067 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
9068 ok = True; isMOV = True; break;
9069
9070 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
9071 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
9072 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
9073 ok = True; isORR = True; break;
9074
9075 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
9076 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
9077 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
9078 ok = True; isMOV = True; break;
9079
9080 /* -------- x,0,1110 MOVI 8-bit -------- */
9081 case BITS5(0,1,1,1,0):
9082 ok = True; isMOV = True; break;
9083
sewardj6a785df2015-02-09 09:07:47 +00009084 /* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
9085 case BITS5(0,1,1,1,1): // 0:1111
9086 ok = True; isFMOV = True; break;
sewardj787a67f2014-06-23 09:09:41 +00009087
9088 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
9089 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
9090 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
9091 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
9092 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
9093 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
9094 ok = True; isMVN = True; break;
9095
sewardj2b6fd5e2014-06-19 14:21:37 +00009096 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
9097 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
9098 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
9099 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
9100 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
9101 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
9102 ok = True; isBIC = True; break;
9103
sewardj787a67f2014-06-23 09:09:41 +00009104 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
9105 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
9106 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
9107 ok = True; isMVN = True; break;
9108
9109 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
9110 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
9111 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
9112 ok = True; isBIC = True; break;
9113
9114 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
9115 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
9116 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
9117 ok = True; isMVN = True; break;
9118
9119 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
9120 /* -------- 1,1,1110 MOVI 64-bit vector -------- */
9121 case BITS5(1,1,1,1,0):
9122 ok = True; isMOV = True; break;
9123
sewardj6a785df2015-02-09 09:07:47 +00009124 /* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
sewardj787a67f2014-06-23 09:09:41 +00009125 case BITS5(1,1,1,1,1): // 1:1111
9126 ok = bitQ == 1; isFMOV = True; break;
9127
sewardjdf1628c2014-06-10 22:52:05 +00009128 default:
9129 break;
9130 }
9131 if (ok) {
sewardj787a67f2014-06-23 09:09:41 +00009132 vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
9133 + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
sewardjdf1628c2014-06-10 22:52:05 +00009134 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
9135 }
9136 if (ok) {
sewardj2b6fd5e2014-06-19 14:21:37 +00009137 if (isORR || isBIC) {
9138 ULong inv
9139 = isORR ? 0ULL : ~0ULL;
9140 IRExpr* immV128
9141 = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
9142 IRExpr* res
9143 = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
sewardj2b6fd5e2014-06-19 14:21:37 +00009144 const HChar* nm = isORR ? "orr" : "bic";
9145 if (bitQ == 0) {
9146 putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
9147 DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
9148 } else {
9149 putQReg128(dd, res);
9150 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
9151 nameQReg128(dd), imm64lo, imm64lo);
9152 }
sewardj787a67f2014-06-23 09:09:41 +00009153 }
9154 else if (isMOV || isMVN || isFMOV) {
9155 if (isMVN) imm64lo = ~imm64lo;
9156 ULong imm64hi = bitQ == 0 ? 0 : imm64lo;
sewardj8e91fd42014-07-11 12:05:47 +00009157 IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
9158 mkU64(imm64lo));
sewardj2b6fd5e2014-06-19 14:21:37 +00009159 putQReg128(dd, immV128);
9160 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
9161 }
sewardjdf1628c2014-06-10 22:52:05 +00009162 return True;
9163 }
9164 /* else fall through */
9165
9166 return False;
9167# undef INSN
9168}
9169
9170
9171static
9172Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
9173{
sewardjab33a7a2014-06-19 22:20:47 +00009174 /* 31 28 20 15 14 10 9 4
9175 01 op 11110000 imm5 0 imm4 1 n d
9176 Decode fields: op,imm4
9177 */
sewardjdf1628c2014-06-10 22:52:05 +00009178# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjab33a7a2014-06-19 22:20:47 +00009179 if (INSN(31,30) != BITS2(0,1)
9180 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
9181 || INSN(15,15) != 0 || INSN(10,10) != 1) {
9182 return False;
9183 }
9184 UInt bitOP = INSN(29,29);
9185 UInt imm5 = INSN(20,16);
9186 UInt imm4 = INSN(14,11);
9187 UInt nn = INSN(9,5);
9188 UInt dd = INSN(4,0);
9189
9190 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
9191 /* -------- 0,0000 DUP (element, scalar) -------- */
9192 IRTemp w0 = newTemp(Ity_I64);
9193 const HChar* arTs = "??";
9194 IRType laneTy = Ity_INVALID;
9195 UInt laneNo = 16; /* invalid */
9196 if (imm5 & 1) {
9197 arTs = "b";
9198 laneNo = (imm5 >> 1) & 15;
9199 laneTy = Ity_I8;
9200 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
9201 }
9202 else if (imm5 & 2) {
9203 arTs = "h";
9204 laneNo = (imm5 >> 2) & 7;
9205 laneTy = Ity_I16;
9206 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
9207 }
9208 else if (imm5 & 4) {
9209 arTs = "s";
9210 laneNo = (imm5 >> 3) & 3;
9211 laneTy = Ity_I32;
9212 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
9213 }
9214 else if (imm5 & 8) {
9215 arTs = "d";
9216 laneNo = (imm5 >> 4) & 1;
9217 laneTy = Ity_I64;
9218 assign(w0, getQRegLane(nn, laneNo, laneTy));
9219 }
9220 else {
9221 /* invalid; leave laneTy unchanged. */
9222 }
9223 /* */
9224 if (laneTy != Ity_INVALID) {
9225 vassert(laneNo < 16);
9226 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
9227 DIP("dup %s, %s.%s[%u]\n",
9228 nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
9229 return True;
9230 }
9231 /* else fall through */
9232 }
9233
sewardjdf1628c2014-06-10 22:52:05 +00009234 return False;
9235# undef INSN
9236}
9237
sewardjfc83d2c2014-06-12 10:15:46 +00009238
sewardjdf1628c2014-06-10 22:52:05 +00009239static
9240Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn)
9241{
sewardjb9aff1e2014-06-15 21:55:33 +00009242 /* 31 28 23 21 16 11 9 4
9243 01 u 11110 sz 11000 opcode 10 n d
9244 Decode fields: u,sz,opcode
9245 */
sewardjdf1628c2014-06-10 22:52:05 +00009246# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjb9aff1e2014-06-15 21:55:33 +00009247 if (INSN(31,30) != BITS2(0,1)
9248 || INSN(28,24) != BITS5(1,1,1,1,0)
9249 || INSN(21,17) != BITS5(1,1,0,0,0)
9250 || INSN(11,10) != BITS2(1,0)) {
9251 return False;
9252 }
9253 UInt bitU = INSN(29,29);
9254 UInt sz = INSN(23,22);
9255 UInt opcode = INSN(16,12);
9256 UInt nn = INSN(9,5);
9257 UInt dd = INSN(4,0);
9258
9259 if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
9260 /* -------- 0,11,11011 ADDP d_2d -------- */
sewardj8e91fd42014-07-11 12:05:47 +00009261 IRTemp xy = newTempV128();
9262 IRTemp xx = newTempV128();
sewardjb9aff1e2014-06-15 21:55:33 +00009263 assign(xy, getQReg128(nn));
9264 assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
9265 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9266 binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
9267 DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
9268 return True;
9269 }
9270
sewardj76927e62014-11-17 11:21:21 +00009271 if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) {
9272 /* -------- 1,00,01101 ADDP s_2s -------- */
9273 /* -------- 1,01,01101 ADDP d_2d -------- */
9274 Bool isD = sz == X01;
9275 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9276 IROp opADD = mkVecADDF(isD ? 3 : 2);
9277 IRTemp src = newTempV128();
9278 IRTemp argL = newTempV128();
9279 IRTemp argR = newTempV128();
9280 assign(src, getQReg128(nn));
9281 assign(argL, unop(opZHI, mkexpr(src)));
9282 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9283 mkU8(isD ? 8 : 4))));
9284 putQReg128(dd, unop(opZHI,
9285 triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
9286 mkexpr(argL), mkexpr(argR))));
9287 DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn);
9288 return True;
9289 }
9290
sewardj5cb53e72015-02-08 12:08:56 +00009291 if (bitU == 1
9292 && (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,1,1))) {
9293 /* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
9294 /* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
9295 /* -------- 1,0x,01111 FMAXP d_2d, s_2s -------- */
9296 /* -------- 1,1x,01111 FMINP d_2d, s_2s -------- */
9297 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
9298 Bool isD = (sz & 1) == 1;
9299 Bool isMIN = (sz & 2) == 2;
9300 Bool isNM = opcode == BITS5(0,1,1,0,0);
9301 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9302 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
9303 IRTemp src = newTempV128();
9304 IRTemp argL = newTempV128();
9305 IRTemp argR = newTempV128();
9306 assign(src, getQReg128(nn));
9307 assign(argL, unop(opZHI, mkexpr(src)));
9308 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9309 mkU8(isD ? 8 : 4))));
9310 putQReg128(dd, unop(opZHI,
9311 binop(opMXX, mkexpr(argL), mkexpr(argR))));
9312 HChar c = isD ? 'd' : 's';
9313 DIP("%s%sp %c%u, v%u.2%c\n",
9314 isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c);
9315 return True;
9316 }
9317
sewardjdf1628c2014-06-10 22:52:05 +00009318 return False;
9319# undef INSN
9320}
9321
sewardjfc83d2c2014-06-12 10:15:46 +00009322
sewardjdf1628c2014-06-10 22:52:05 +00009323static
9324Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
9325{
9326 /* 31 28 22 18 15 10 9 4
9327 01 u 111110 immh immb opcode 1 n d
9328 Decode fields: u,immh,opcode
9329 */
9330# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9331 if (INSN(31,30) != BITS2(0,1)
9332 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
9333 return False;
9334 }
9335 UInt bitU = INSN(29,29);
9336 UInt immh = INSN(22,19);
9337 UInt immb = INSN(18,16);
9338 UInt opcode = INSN(15,11);
9339 UInt nn = INSN(9,5);
9340 UInt dd = INSN(4,0);
9341 UInt immhb = (immh << 3) | immb;
9342
sewardja6b61f02014-08-17 18:32:14 +00009343 if ((immh & 8) == 8
9344 && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) {
9345 /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
9346 /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
9347 /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
9348 /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
9349 Bool isU = bitU == 1;
9350 Bool isAcc = opcode == BITS5(0,0,0,1,0);
9351 UInt sh = 128 - immhb;
sewardjfc83d2c2014-06-12 10:15:46 +00009352 vassert(sh >= 1 && sh <= 64);
sewardja6b61f02014-08-17 18:32:14 +00009353 IROp op = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
9354 IRExpr* src = getQReg128(nn);
9355 IRTemp shf = newTempV128();
9356 IRTemp res = newTempV128();
9357 if (sh == 64 && isU) {
9358 assign(shf, mkV128(0x0000));
9359 } else {
9360 UInt nudge = 0;
9361 if (sh == 64) {
9362 vassert(!isU);
9363 nudge = 1;
9364 }
9365 assign(shf, binop(op, src, mkU8(sh - nudge)));
9366 }
9367 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9368 : mkexpr(shf));
9369 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9370 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
9371 : (isU ? "ushr" : "sshr");
9372 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9373 return True;
9374 }
9375
9376 if ((immh & 8) == 8
9377 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) {
9378 /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
9379 /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
9380 /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
9381 /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
9382 Bool isU = bitU == 1;
9383 Bool isAcc = opcode == BITS5(0,0,1,1,0);
9384 UInt sh = 128 - immhb;
9385 vassert(sh >= 1 && sh <= 64);
9386 IROp op = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
9387 vassert(sh >= 1 && sh <= 64);
9388 IRExpr* src = getQReg128(nn);
9389 IRTemp imm8 = newTemp(Ity_I8);
9390 assign(imm8, mkU8((UChar)(-sh)));
9391 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
9392 IRTemp shf = newTempV128();
9393 IRTemp res = newTempV128();
9394 assign(shf, binop(op, src, amt));
9395 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9396 : mkexpr(shf));
9397 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9398 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
9399 : (isU ? "urshr" : "srshr");
9400 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
sewardjfc83d2c2014-06-12 10:15:46 +00009401 return True;
9402 }
9403
sewardj8e91fd42014-07-11 12:05:47 +00009404 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
9405 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
9406 UInt sh = 128 - immhb;
9407 vassert(sh >= 1 && sh <= 64);
9408 if (sh == 64) {
9409 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
9410 } else {
9411 /* sh is in range 1 .. 63 */
9412 ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
9413 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9414 IRTemp res = newTempV128();
9415 assign(res, binop(Iop_OrV128,
9416 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9417 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
9418 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9419 }
9420 DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
9421 return True;
9422 }
9423
sewardjacc29642014-08-15 05:35:35 +00009424 if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9425 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
9426 UInt sh = immhb - 64;
9427 vassert(sh >= 0 && sh < 64);
9428 putQReg128(dd,
9429 unop(Iop_ZeroHI64ofV128,
9430 sh == 0 ? getQReg128(nn)
9431 : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9432 DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
9433 return True;
9434 }
9435
sewardj8e91fd42014-07-11 12:05:47 +00009436 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9437 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
9438 UInt sh = immhb - 64;
9439 vassert(sh >= 0 && sh < 64);
9440 if (sh == 0) {
9441 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
9442 } else {
9443 /* sh is in range 1 .. 63 */
9444 ULong nmask = (1ULL << sh) - 1;
9445 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9446 IRTemp res = newTempV128();
9447 assign(res, binop(Iop_OrV128,
9448 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9449 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9450 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9451 }
9452 DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
9453 return True;
9454 }
9455
sewardjacc29642014-08-15 05:35:35 +00009456 if (opcode == BITS5(0,1,1,1,0)
9457 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
9458 /* -------- 0,01110 SQSHL #imm -------- */
9459 /* -------- 1,01110 UQSHL #imm -------- */
9460 /* -------- 1,01100 SQSHLU #imm -------- */
9461 UInt size = 0;
9462 UInt shift = 0;
9463 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9464 if (!ok) return False;
9465 vassert(size >= 0 && size <= 3);
9466 /* The shift encoding has opposite sign for the leftwards case.
9467 Adjust shift to compensate. */
9468 UInt lanebits = 8 << size;
9469 shift = lanebits - shift;
9470 vassert(shift >= 0 && shift < lanebits);
9471 const HChar* nm = NULL;
9472 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
9473 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
9474 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
9475 else vassert(0);
9476 IRTemp qDiff1 = IRTemp_INVALID;
9477 IRTemp qDiff2 = IRTemp_INVALID;
9478 IRTemp res = IRTemp_INVALID;
9479 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
9480 /* This relies on the fact that the zeroed out lanes generate zeroed
9481 result lanes and don't saturate, so there's no point in trimming
9482 the resulting res, qDiff1 or qDiff2 values. */
9483 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
9484 putQReg128(dd, mkexpr(res));
9485 updateQCFLAGwithDifference(qDiff1, qDiff2);
9486 const HChar arr = "bhsd"[size];
9487 DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
9488 return True;
9489 }
9490
sewardje741d162014-08-13 13:10:47 +00009491 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
9492 || (bitU == 1
9493 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
9494 /* -------- 0,10010 SQSHRN #imm -------- */
9495 /* -------- 1,10010 UQSHRN #imm -------- */
9496 /* -------- 0,10011 SQRSHRN #imm -------- */
9497 /* -------- 1,10011 UQRSHRN #imm -------- */
9498 /* -------- 1,10000 SQSHRUN #imm -------- */
9499 /* -------- 1,10001 SQRSHRUN #imm -------- */
9500 UInt size = 0;
9501 UInt shift = 0;
9502 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9503 if (!ok || size == X11) return False;
9504 vassert(size >= X00 && size <= X10);
9505 vassert(shift >= 1 && shift <= (8 << size));
9506 const HChar* nm = "??";
9507 IROp op = Iop_INVALID;
9508 /* Decide on the name and the operation. */
9509 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
9510 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
9511 }
9512 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9513 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
9514 }
9515 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
9516 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
9517 }
9518 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
9519 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
9520 }
9521 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
9522 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
9523 }
9524 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
9525 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
9526 }
9527 else vassert(0);
9528 /* Compute the result (Q, shifted value) pair. */
9529 IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
9530 IRTemp pair = newTempV128();
9531 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
9532 /* Update the result reg */
9533 IRTemp res64in128 = newTempV128();
9534 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
9535 putQReg128(dd, mkexpr(res64in128));
9536 /* Update the Q flag. */
9537 IRTemp q64q64 = newTempV128();
9538 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
9539 IRTemp z128 = newTempV128();
9540 assign(z128, mkV128(0x0000));
9541 updateQCFLAGwithDifference(q64q64, z128);
9542 /* */
9543 const HChar arrNarrow = "bhsd"[size];
9544 const HChar arrWide = "bhsd"[size+1];
9545 DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
9546 return True;
9547 }
9548
sewardj2130b342015-04-06 14:49:05 +00009549 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,0,0)) {
9550 /* -------- 0,!=00xx,11100 SCVTF d_d_imm, s_s_imm -------- */
9551 /* -------- 1,!=00xx,11100 UCVTF d_d_imm, s_s_imm -------- */
9552 UInt size = 0;
9553 UInt fbits = 0;
9554 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9555 /* The following holds because immh is never zero. */
9556 vassert(ok);
9557 /* The following holds because immh >= 0100. */
9558 vassert(size == X10 || size == X11);
9559 Bool isD = size == X11;
9560 Bool isU = bitU == 1;
9561 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9562 Double scale = two_to_the_minus(fbits);
9563 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9564 : IRExpr_Const(IRConst_F32( (Float)scale ));
9565 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
9566 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
9567 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
9568 IRType tyF = isD ? Ity_F64 : Ity_F32;
9569 IRType tyI = isD ? Ity_I64 : Ity_I32;
9570 IRTemp src = newTemp(tyI);
9571 IRTemp res = newTemp(tyF);
9572 IRTemp rm = mk_get_IR_rounding_mode();
9573 assign(src, getQRegLane(nn, 0, tyI));
9574 assign(res, triop(opMUL, mkexpr(rm),
9575 binop(opCVT, mkexpr(rm), mkexpr(src)), scaleE));
9576 putQRegLane(dd, 0, mkexpr(res));
9577 if (!isD) {
9578 putQRegLane(dd, 1, mkU32(0));
9579 }
9580 putQRegLane(dd, 1, mkU64(0));
9581 const HChar ch = isD ? 'd' : 's';
9582 DIP("%s %c%u, %c%u, #%u\n", isU ? "ucvtf" : "scvtf",
9583 ch, dd, ch, nn, fbits);
9584 return True;
9585 }
9586
9587 if (immh >= BITS4(0,1,0,0) && opcode == BITS5(1,1,1,1,1)) {
9588 /* -------- 0,!=00xx,11111 FCVTZS d_d_imm, s_s_imm -------- */
9589 /* -------- 1,!=00xx,11111 FCVTZU d_d_imm, s_s_imm -------- */
9590 UInt size = 0;
9591 UInt fbits = 0;
9592 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
9593 /* The following holds because immh is never zero. */
9594 vassert(ok);
9595 /* The following holds because immh >= 0100. */
9596 vassert(size == X10 || size == X11);
9597 Bool isD = size == X11;
9598 Bool isU = bitU == 1;
9599 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
9600 Double scale = two_to_the_plus(fbits);
9601 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
9602 : IRExpr_Const(IRConst_F32( (Float)scale ));
9603 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
9604 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
9605 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
9606 IRType tyF = isD ? Ity_F64 : Ity_F32;
9607 IRType tyI = isD ? Ity_I64 : Ity_I32;
9608 IRTemp src = newTemp(tyF);
9609 IRTemp res = newTemp(tyI);
9610 IRTemp rm = newTemp(Ity_I32);
9611 assign(src, getQRegLane(nn, 0, tyF));
9612 assign(rm, mkU32(Irrm_ZERO));
9613 assign(res, binop(opCVT, mkexpr(rm),
9614 triop(opMUL, mkexpr(rm), mkexpr(src), scaleE)));
9615 putQRegLane(dd, 0, mkexpr(res));
9616 if (!isD) {
9617 putQRegLane(dd, 1, mkU32(0));
9618 }
9619 putQRegLane(dd, 1, mkU64(0));
9620 const HChar ch = isD ? 'd' : 's';
9621 DIP("%s %c%u, %c%u, #%u\n", isU ? "fcvtzu" : "fcvtzs",
9622 ch, dd, ch, nn, fbits);
9623 return True;
9624 }
9625
sewardjdf1628c2014-06-10 22:52:05 +00009626# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9627 return False;
9628# undef INSN
9629}
9630
sewardjfc83d2c2014-06-12 10:15:46 +00009631
sewardjdf1628c2014-06-10 22:52:05 +00009632static
9633Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
9634{
sewardj54ffa1d2014-07-22 09:27:49 +00009635 /* 31 29 28 23 21 20 15 11 9 4
9636 01 U 11110 size 1 m opcode 00 n d
9637 Decode fields: u,opcode
9638 */
sewardjdf1628c2014-06-10 22:52:05 +00009639# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj54ffa1d2014-07-22 09:27:49 +00009640 if (INSN(31,30) != BITS2(0,1)
9641 || INSN(28,24) != BITS5(1,1,1,1,0)
9642 || INSN(21,21) != 1
9643 || INSN(11,10) != BITS2(0,0)) {
9644 return False;
9645 }
9646 UInt bitU = INSN(29,29);
9647 UInt size = INSN(23,22);
9648 UInt mm = INSN(20,16);
9649 UInt opcode = INSN(15,12);
9650 UInt nn = INSN(9,5);
9651 UInt dd = INSN(4,0);
9652 vassert(size < 4);
9653
9654 if (bitU == 0
9655 && (opcode == BITS4(1,1,0,1)
9656 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
9657 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
9658 /* -------- 0,1001 SQDMLAL -------- */ // 1
9659 /* -------- 0,1011 SQDMLSL -------- */ // 2
9660 /* Widens, and size refers to the narrowed lanes. */
9661 UInt ks = 3;
9662 switch (opcode) {
9663 case BITS4(1,1,0,1): ks = 0; break;
9664 case BITS4(1,0,0,1): ks = 1; break;
9665 case BITS4(1,0,1,1): ks = 2; break;
9666 default: vassert(0);
9667 }
9668 vassert(ks >= 0 && ks <= 2);
9669 if (size == X00 || size == X11) return False;
9670 vassert(size <= 2);
9671 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
9672 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
9673 newTempsV128_3(&vecN, &vecM, &vecD);
9674 assign(vecN, getQReg128(nn));
9675 assign(vecM, getQReg128(mm));
9676 assign(vecD, getQReg128(dd));
9677 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
9678 False/*!is2*/, size, "mas"[ks],
9679 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
9680 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
9681 putQReg128(dd, unop(opZHI, mkexpr(res)));
9682 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
9683 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
9684 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
9685 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
9686 }
9687 const HChar* nm = ks == 0 ? "sqdmull"
9688 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
9689 const HChar arrNarrow = "bhsd"[size];
9690 const HChar arrWide = "bhsd"[size+1];
florianb1737742015-08-03 16:03:13 +00009691 DIP("%s %c%u, %c%u, %c%u\n",
sewardj54ffa1d2014-07-22 09:27:49 +00009692 nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
9693 return True;
9694 }
9695
sewardjdf1628c2014-06-10 22:52:05 +00009696 return False;
9697# undef INSN
9698}
9699
9700
9701static
9702Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
9703{
9704 /* 31 29 28 23 21 20 15 10 9 4
9705 01 U 11110 size 1 m opcode 1 n d
sewardj51d012a2014-07-21 09:19:50 +00009706 Decode fields: u,size,opcode
sewardjdf1628c2014-06-10 22:52:05 +00009707 */
9708# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9709 if (INSN(31,30) != BITS2(0,1)
9710 || INSN(28,24) != BITS5(1,1,1,1,0)
9711 || INSN(21,21) != 1
9712 || INSN(10,10) != 1) {
9713 return False;
9714 }
9715 UInt bitU = INSN(29,29);
9716 UInt size = INSN(23,22);
9717 UInt mm = INSN(20,16);
9718 UInt opcode = INSN(15,11);
9719 UInt nn = INSN(9,5);
9720 UInt dd = INSN(4,0);
9721 vassert(size < 4);
9722
sewardj51d012a2014-07-21 09:19:50 +00009723 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
9724 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
9725 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
9726 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
9727 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
9728 Bool isADD = opcode == BITS5(0,0,0,0,1);
9729 Bool isU = bitU == 1;
9730 IROp qop = Iop_INVALID;
9731 IROp nop = Iop_INVALID;
9732 if (isADD) {
9733 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
9734 nop = mkVecADD(size);
9735 } else {
9736 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
9737 nop = mkVecSUB(size);
9738 }
9739 IRTemp argL = newTempV128();
9740 IRTemp argR = newTempV128();
9741 IRTemp qres = newTempV128();
9742 IRTemp nres = newTempV128();
9743 assign(argL, getQReg128(nn));
9744 assign(argR, getQReg128(mm));
9745 assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
sewardj257e99f2014-08-03 12:45:19 +00009746 size, binop(qop, mkexpr(argL), mkexpr(argR)))));
sewardj51d012a2014-07-21 09:19:50 +00009747 assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
sewardj257e99f2014-08-03 12:45:19 +00009748 size, binop(nop, mkexpr(argL), mkexpr(argR)))));
sewardj51d012a2014-07-21 09:19:50 +00009749 putQReg128(dd, mkexpr(qres));
9750 updateQCFLAGwithDifference(qres, nres);
9751 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
9752 : (isU ? "uqsub" : "sqsub");
9753 const HChar arr = "bhsd"[size];
sewardj12972182014-08-04 08:09:47 +00009754 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
sewardj51d012a2014-07-21 09:19:50 +00009755 return True;
9756 }
9757
sewardj2b6fd5e2014-06-19 14:21:37 +00009758 if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
9759 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
9760 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
9761 Bool isGT = bitU == 0;
9762 IRExpr* argL = getQReg128(nn);
9763 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00009764 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00009765 assign(res,
9766 isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
9767 : binop(Iop_CmpGT64Ux2, argL, argR));
9768 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9769 DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
9770 nameQRegLO(dd, Ity_I64),
9771 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9772 return True;
9773 }
9774
9775 if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
9776 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
9777 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
9778 Bool isGE = bitU == 0;
9779 IRExpr* argL = getQReg128(nn);
9780 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00009781 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00009782 assign(res,
9783 isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
9784 : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
9785 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9786 DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
9787 nameQRegLO(dd, Ity_I64),
9788 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9789 return True;
9790 }
9791
sewardja6b61f02014-08-17 18:32:14 +00009792 if (size == X11 && (opcode == BITS5(0,1,0,0,0)
9793 || opcode == BITS5(0,1,0,1,0))) {
9794 /* -------- 0,xx,01000 SSHL d_d_d -------- */
9795 /* -------- 0,xx,01010 SRSHL d_d_d -------- */
9796 /* -------- 1,xx,01000 USHL d_d_d -------- */
9797 /* -------- 1,xx,01010 URSHL d_d_d -------- */
9798 Bool isU = bitU == 1;
9799 Bool isR = opcode == BITS5(0,1,0,1,0);
9800 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
9801 : (isU ? mkVecSHU(size) : mkVecSHS(size));
9802 IRTemp res = newTempV128();
9803 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
9804 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9805 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
9806 : (isU ? "ushl" : "sshl");
9807 DIP("%s %s, %s, %s\n", nm,
9808 nameQRegLO(dd, Ity_I64),
9809 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9810 return True;
9811 }
9812
sewardj12972182014-08-04 08:09:47 +00009813 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
9814 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
9815 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
9816 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
9817 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
9818 Bool isU = bitU == 1;
9819 Bool isR = opcode == BITS5(0,1,0,1,1);
9820 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
9821 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
9822 /* This is a bit tricky. Since we're only interested in the lowest
9823 lane of the result, we zero out all the rest in the operands, so
9824 as to ensure that other lanes don't pollute the returned Q value.
9825 This works because it means, for the lanes we don't care about, we
9826 are shifting zero by zero, which can never saturate. */
9827 IRTemp res256 = newTemp(Ity_V256);
9828 IRTemp resSH = newTempV128();
9829 IRTemp resQ = newTempV128();
9830 IRTemp zero = newTempV128();
9831 assign(
9832 res256,
9833 binop(op,
9834 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
9835 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
9836 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
9837 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
9838 assign(zero, mkV128(0x0000));
9839 putQReg128(dd, mkexpr(resSH));
9840 updateQCFLAGwithDifference(resQ, zero);
9841 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
9842 : (isU ? "uqshl" : "sqshl");
9843 const HChar arr = "bhsd"[size];
9844 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
9845 return True;
9846 }
9847
sewardjdf1628c2014-06-10 22:52:05 +00009848 if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
9849 /* -------- 0,11,10000 ADD d_d_d -------- */
9850 /* -------- 1,11,10000 SUB d_d_d -------- */
9851 Bool isSUB = bitU == 1;
9852 IRTemp res = newTemp(Ity_I64);
9853 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
9854 getQRegLane(nn, 0, Ity_I64),
9855 getQRegLane(mm, 0, Ity_I64)));
9856 putQRegLane(dd, 0, mkexpr(res));
9857 putQRegLane(dd, 1, mkU64(0));
9858 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
9859 nameQRegLO(dd, Ity_I64),
9860 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9861 return True;
9862 }
9863
sewardj2b6fd5e2014-06-19 14:21:37 +00009864 if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
9865 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
9866 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
9867 Bool isEQ = bitU == 1;
9868 IRExpr* argL = getQReg128(nn);
9869 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00009870 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00009871 assign(res,
9872 isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
9873 : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
9874 binop(Iop_AndV128, argL, argR),
9875 mkV128(0x0000))));
9876 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9877 DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
9878 nameQRegLO(dd, Ity_I64),
9879 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9880 return True;
9881 }
9882
sewardj257e99f2014-08-03 12:45:19 +00009883 if (opcode == BITS5(1,0,1,1,0)) {
9884 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
9885 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
9886 if (size == X00 || size == X11) return False;
9887 Bool isR = bitU == 1;
9888 IRTemp res, sat1q, sat1n, vN, vM;
9889 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
9890 newTempsV128_2(&vN, &vM);
9891 assign(vN, getQReg128(nn));
9892 assign(vM, getQReg128(mm));
9893 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
9894 putQReg128(dd,
9895 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
9896 updateQCFLAGwithDifference(
9897 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
9898 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
9899 const HChar arr = "bhsd"[size];
9900 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
florianb1737742015-08-03 16:03:13 +00009901 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
sewardj257e99f2014-08-03 12:45:19 +00009902 return True;
9903 }
9904
sewardjdf1628c2014-06-10 22:52:05 +00009905 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
9906 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
9907 IRType ity = size == X11 ? Ity_F64 : Ity_F32;
9908 IRTemp res = newTemp(ity);
9909 assign(res, unop(mkABSF(ity),
9910 triop(mkSUBF(ity),
9911 mkexpr(mk_get_IR_rounding_mode()),
9912 getQRegLO(nn,ity), getQRegLO(mm,ity))));
9913 putQReg128(dd, mkV128(0x0000));
9914 putQRegLO(dd, mkexpr(res));
9915 DIP("fabd %s, %s, %s\n",
9916 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9917 return True;
9918 }
9919
sewardjee3db332015-02-08 18:24:38 +00009920 if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
9921 /* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
9922 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
9923 IRType ity = size == X01 ? Ity_F64 : Ity_F32;
9924 IRTemp res = newTemp(ity);
9925 assign(res, triop(mkMULF(ity),
9926 mkexpr(mk_get_IR_rounding_mode()),
9927 getQRegLO(nn,ity), getQRegLO(mm,ity)));
9928 putQReg128(dd, mkV128(0x0000));
9929 putQRegLO(dd, mkexpr(res));
9930 DIP("fmulx %s, %s, %s\n",
9931 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9932 return True;
9933 }
9934
sewardj13830dc2015-02-07 21:09:47 +00009935 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
9936 /* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
9937 /* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
9938 Bool isD = size == X01;
9939 IRType ity = isD ? Ity_F64 : Ity_F32;
9940 Bool isGE = bitU == 1;
9941 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
9942 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
9943 IRTemp res = newTempV128();
9944 assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
9945 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
9946 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9947 mkexpr(res))));
9948 DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq",
9949 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9950 return True;
9951 }
9952
9953 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
9954 /* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
9955 Bool isD = size == X11;
9956 IRType ity = isD ? Ity_F64 : Ity_F32;
9957 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
9958 IRTemp res = newTempV128();
9959 assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
9960 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9961 mkexpr(res))));
9962 DIP("%s %s, %s, %s\n", "fcmgt",
9963 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9964 return True;
9965 }
9966
9967 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
9968 /* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
9969 /* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
9970 Bool isD = (size & 1) == 1;
9971 IRType ity = isD ? Ity_F64 : Ity_F32;
9972 Bool isGT = (size & 2) == 2;
9973 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
9974 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
9975 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
9976 IRTemp res = newTempV128();
9977 assign(res, binop(opCMP, unop(opABS, getQReg128(mm)),
9978 unop(opABS, getQReg128(nn)))); // swapd
9979 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9980 mkexpr(res))));
9981 DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge",
9982 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9983 return True;
9984 }
9985
sewardj89cefe42015-02-24 12:21:01 +00009986 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
9987 /* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */
9988 /* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
9989 Bool isSQRT = (size & 2) == 2;
9990 Bool isD = (size & 1) == 1;
9991 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
9992 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
9993 IRTemp res = newTempV128();
9994 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
9995 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
9996 mkexpr(res))));
9997 HChar c = isD ? 'd' : 's';
9998 DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps",
9999 c, dd, c, nn, c, mm);
10000 return True;
10001 }
10002
sewardjdf1628c2014-06-10 22:52:05 +000010003 return False;
10004# undef INSN
10005}
10006
10007
10008static
10009Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
10010{
10011 /* 31 29 28 23 21 16 11 9 4
10012 01 U 11110 size 10000 opcode 10 n d
sewardj8e91fd42014-07-11 12:05:47 +000010013 Decode fields: u,size,opcode
sewardjdf1628c2014-06-10 22:52:05 +000010014 */
10015# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10016 if (INSN(31,30) != BITS2(0,1)
10017 || INSN(28,24) != BITS5(1,1,1,1,0)
10018 || INSN(21,17) != BITS5(1,0,0,0,0)
10019 || INSN(11,10) != BITS2(1,0)) {
10020 return False;
10021 }
10022 UInt bitU = INSN(29,29);
10023 UInt size = INSN(23,22);
10024 UInt opcode = INSN(16,12);
10025 UInt nn = INSN(9,5);
10026 UInt dd = INSN(4,0);
10027 vassert(size < 4);
10028
sewardjf7003bc2014-08-18 12:28:02 +000010029 if (opcode == BITS5(0,0,0,1,1)) {
10030 /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
10031 /* -------- 1,xx,00011: USQADD std4_std4 -------- */
10032 /* These are a bit tricky (to say the least). See comments on
10033 the vector variants (in dis_AdvSIMD_two_reg_misc) below for
10034 details. */
10035 Bool isUSQADD = bitU == 1;
10036 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
10037 : mkVecQADDEXTUSSATSS(size);
10038 IROp nop = mkVecADD(size);
10039 IRTemp argL = newTempV128();
10040 IRTemp argR = newTempV128();
10041 assign(argL, getQReg128(nn));
10042 assign(argR, getQReg128(dd));
10043 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10044 size, binop(qop, mkexpr(argL), mkexpr(argR)));
10045 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10046 size, binop(nop, mkexpr(argL), mkexpr(argR)));
10047 putQReg128(dd, mkexpr(qres));
10048 updateQCFLAGwithDifference(qres, nres);
10049 const HChar arr = "bhsd"[size];
10050 DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
10051 return True;
10052 }
10053
sewardj51d012a2014-07-21 09:19:50 +000010054 if (opcode == BITS5(0,0,1,1,1)) {
sewardj8e91fd42014-07-11 12:05:47 +000010055 /* -------- 0,xx,00111 SQABS std4_std4 -------- */
sewardj51d012a2014-07-21 09:19:50 +000010056 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
10057 Bool isNEG = bitU == 1;
10058 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
10059 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
10060 getQReg128(nn), size );
sewardj257e99f2014-08-03 12:45:19 +000010061 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
10062 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
sewardj8e91fd42014-07-11 12:05:47 +000010063 putQReg128(dd, mkexpr(qres));
10064 updateQCFLAGwithDifference(qres, nres);
10065 const HChar arr = "bhsd"[size];
sewardj51d012a2014-07-21 09:19:50 +000010066 DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
sewardj8e91fd42014-07-11 12:05:47 +000010067 return True;
10068 }
10069
sewardj2b6fd5e2014-06-19 14:21:37 +000010070 if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
10071 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
10072 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
10073 Bool isGT = bitU == 0;
10074 IRExpr* argL = getQReg128(nn);
10075 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +000010076 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +000010077 assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
10078 : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
10079 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10080 DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
10081 return True;
10082 }
10083
10084 if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
10085 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
10086 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
10087 Bool isEQ = bitU == 0;
10088 IRExpr* argL = getQReg128(nn);
10089 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +000010090 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +000010091 assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
10092 : unop(Iop_NotV128,
10093 binop(Iop_CmpGT64Sx2, argL, argR)));
10094 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
10095 DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
10096 return True;
10097 }
10098
10099 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
10100 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
sewardjdf1628c2014-06-10 22:52:05 +000010101 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
sewardj2b6fd5e2014-06-19 14:21:37 +000010102 binop(Iop_CmpGT64Sx2, mkV128(0x0000),
10103 getQReg128(nn))));
10104 DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
sewardjdf1628c2014-06-10 22:52:05 +000010105 return True;
10106 }
10107
sewardj25523c42014-06-15 19:36:29 +000010108 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
10109 /* -------- 0,11,01011 ABS d_d -------- */
10110 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10111 unop(Iop_Abs64x2, getQReg128(nn))));
10112 DIP("abs d%u, d%u\n", dd, nn);
10113 return True;
10114 }
10115
10116 if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
10117 /* -------- 1,11,01011 NEG d_d -------- */
10118 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
10119 binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
10120 DIP("neg d%u, d%u\n", dd, nn);
10121 return True;
10122 }
10123
sewardj13830dc2015-02-07 21:09:47 +000010124 UInt ix = 0; /*INVALID*/
10125 if (size >= X10) {
10126 switch (opcode) {
10127 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
10128 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
10129 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
10130 default: break;
10131 }
10132 }
10133 if (ix > 0) {
10134 /* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
10135 /* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
10136 /* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
10137 /* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
10138 /* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
10139 Bool isD = size == X11;
10140 IRType ity = isD ? Ity_F64 : Ity_F32;
10141 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
10142 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
10143 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
10144 IROp opCmp = Iop_INVALID;
10145 Bool swap = False;
10146 const HChar* nm = "??";
10147 switch (ix) {
10148 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
10149 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
10150 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
10151 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
10152 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
10153 default: vassert(0);
10154 }
10155 IRExpr* zero = mkV128(0x0000);
10156 IRTemp res = newTempV128();
10157 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
10158 : binop(opCmp, getQReg128(nn), zero));
10159 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10160 mkexpr(res))));
10161
10162 DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
10163 return True;
10164 }
10165
sewardjecedd982014-08-11 14:02:47 +000010166 if (opcode == BITS5(1,0,1,0,0)
10167 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
10168 /* -------- 0,xx,10100: SQXTN -------- */
10169 /* -------- 1,xx,10100: UQXTN -------- */
10170 /* -------- 1,xx,10010: SQXTUN -------- */
10171 if (size == X11) return False;
10172 vassert(size < 3);
10173 IROp opN = Iop_INVALID;
10174 Bool zWiden = True;
10175 const HChar* nm = "??";
10176 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
10177 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
10178 }
10179 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
10180 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
10181 }
10182 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10183 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
10184 }
10185 else vassert(0);
10186 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10187 size+1, getQReg128(nn));
10188 IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
10189 size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
10190 putQReg128(dd, mkexpr(resN));
10191 /* This widens zero lanes to zero, and compares it against zero, so all
10192 of the non-participating lanes make no contribution to the
10193 Q flag state. */
10194 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
10195 size, mkexpr(resN));
10196 updateQCFLAGwithDifference(src, resW);
10197 const HChar arrNarrow = "bhsd"[size];
10198 const HChar arrWide = "bhsd"[size+1];
10199 DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
10200 return True;
10201 }
10202
sewardj2130b342015-04-06 14:49:05 +000010203 if (opcode == BITS5(1,0,1,1,0) && bitU == 1 && size == X01) {
10204 /* -------- 1,01,10110 FCVTXN s_d -------- */
10205 /* Using Irrm_NEAREST here isn't right. The docs say "round to
10206 odd" but I don't know what that really means. */
10207 putQRegLO(dd,
10208 binop(Iop_F64toF32, mkU32(Irrm_NEAREST),
10209 getQRegLO(nn, Ity_F64)));
10210 putQRegLane(dd, 1, mkU32(0));
10211 putQRegLane(dd, 1, mkU64(0));
10212 DIP("fcvtxn s%u, d%u\n", dd, nn);
10213 return True;
10214 }
10215
sewardj400d6b92015-03-30 09:01:51 +000010216 ix = 0; /*INVALID*/
10217 switch (opcode) {
10218 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
10219 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
10220 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
10221 default: break;
10222 }
10223 if (ix > 0) {
10224 /* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10225 /* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10226 /* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10227 /* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10228 /* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
10229 /* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
10230 /* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
10231 /* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
10232 /* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
10233 /* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
sewardjbc0b7222015-03-30 18:49:38 +000010234 Bool isD = (size & 1) == 1;
10235 IRType tyF = isD ? Ity_F64 : Ity_F32;
10236 IRType tyI = isD ? Ity_I64 : Ity_I32;
sewardj400d6b92015-03-30 09:01:51 +000010237 IRRoundingMode irrm = 8; /*impossible*/
10238 HChar ch = '?';
10239 switch (ix) {
10240 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
10241 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
10242 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
10243 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
10244 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
10245 default: vassert(0);
10246 }
10247 IROp cvt = Iop_INVALID;
10248 if (bitU == 1) {
sewardjbc0b7222015-03-30 18:49:38 +000010249 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
sewardj400d6b92015-03-30 09:01:51 +000010250 } else {
sewardjbc0b7222015-03-30 18:49:38 +000010251 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
sewardj400d6b92015-03-30 09:01:51 +000010252 }
10253 IRTemp src = newTemp(tyF);
10254 IRTemp res = newTemp(tyI);
10255 assign(src, getQRegLane(nn, 0, tyF));
10256 assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
10257 putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
sewardjbc0b7222015-03-30 18:49:38 +000010258 if (!isD) {
sewardj400d6b92015-03-30 09:01:51 +000010259 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10260 }
10261 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
sewardjbc0b7222015-03-30 18:49:38 +000010262 HChar sOrD = isD ? 'd' : 's';
sewardj400d6b92015-03-30 09:01:51 +000010263 DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
10264 sOrD, dd, sOrD, nn);
10265 return True;
10266 }
10267
sewardj2130b342015-04-06 14:49:05 +000010268 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
10269 /* -------- 0,0x,11101: SCVTF d_d, s_s -------- */
10270 /* -------- 1,0x,11101: UCVTF d_d, s_s -------- */
10271 Bool isU = bitU == 1;
10272 Bool isD = (size & 1) == 1;
10273 IRType tyI = isD ? Ity_I64 : Ity_I32;
10274 IROp iop = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10275 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10276 IRTemp rm = mk_get_IR_rounding_mode();
10277 putQRegLO(dd, binop(iop, mkexpr(rm), getQRegLO(nn, tyI)));
10278 if (!isD) {
10279 putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
10280 }
10281 putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
10282 HChar c = isD ? 'd' : 's';
10283 DIP("%ccvtf %c%u, %c%u\n", isU ? 'u' : 's', c, dd, c, nn);
10284 return True;
10285 }
10286
sewardj89cefe42015-02-24 12:21:01 +000010287 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
10288 /* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
10289 /* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
10290 Bool isSQRT = bitU == 1;
10291 Bool isD = (size & 1) == 1;
10292 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
10293 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
10294 IRTemp resV = newTempV128();
10295 assign(resV, unop(op, getQReg128(nn)));
10296 putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
10297 mkexpr(resV))));
10298 HChar c = isD ? 'd' : 's';
10299 DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn);
10300 return True;
10301 }
10302
10303 if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
10304 /* -------- 0,1x,11111: FRECPX d_d, s_s -------- */
10305 Bool isD = (size & 1) == 1;
10306 IRType ty = isD ? Ity_F64 : Ity_F32;
10307 IROp op = isD ? Iop_RecpExpF64 : Iop_RecpExpF32;
10308 IRTemp res = newTemp(ty);
10309 IRTemp rm = mk_get_IR_rounding_mode();
10310 assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty)));
10311 putQReg128(dd, mkV128(0x0000));
10312 putQRegLane(dd, 0, mkexpr(res));
10313 HChar c = isD ? 'd' : 's';
10314 DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn);
10315 return True;
10316 }
10317
sewardjdf1628c2014-06-10 22:52:05 +000010318 return False;
10319# undef INSN
10320}
10321
sewardjfc83d2c2014-06-12 10:15:46 +000010322
sewardjdf1628c2014-06-10 22:52:05 +000010323static
10324Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
10325{
sewardj54ffa1d2014-07-22 09:27:49 +000010326 /* 31 28 23 21 20 19 15 11 9 4
10327 01 U 11111 size L M m opcode H 0 n d
10328 Decode fields are: u,size,opcode
10329 M is really part of the mm register number. Individual
10330 cases need to inspect L and H though.
10331 */
sewardjdf1628c2014-06-10 22:52:05 +000010332# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj54ffa1d2014-07-22 09:27:49 +000010333 if (INSN(31,30) != BITS2(0,1)
10334 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) {
10335 return False;
10336 }
10337 UInt bitU = INSN(29,29);
10338 UInt size = INSN(23,22);
10339 UInt bitL = INSN(21,21);
10340 UInt bitM = INSN(20,20);
10341 UInt mmLO4 = INSN(19,16);
10342 UInt opcode = INSN(15,12);
10343 UInt bitH = INSN(11,11);
10344 UInt nn = INSN(9,5);
10345 UInt dd = INSN(4,0);
10346 vassert(size < 4);
10347 vassert(bitH < 2 && bitM < 2 && bitL < 2);
10348
sewardjee3db332015-02-08 18:24:38 +000010349 if (bitU == 0 && size >= X10
10350 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
10351 /* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
10352 /* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
10353 Bool isD = (size & 1) == 1;
10354 Bool isSUB = opcode == BITS4(0,1,0,1);
10355 UInt index;
10356 if (!isD) index = (bitH << 1) | bitL;
10357 else if (isD && bitL == 0) index = bitH;
10358 else return False; // sz:L == x11 => unallocated encoding
10359 vassert(index < (isD ? 2 : 4));
10360 IRType ity = isD ? Ity_F64 : Ity_F32;
10361 IRTemp elem = newTemp(ity);
10362 UInt mm = (bitM << 4) | mmLO4;
10363 assign(elem, getQRegLane(mm, index, ity));
10364 IRTemp dupd = math_DUP_TO_V128(elem, ity);
10365 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
10366 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
10367 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10368 IRTemp rm = mk_get_IR_rounding_mode();
10369 IRTemp t1 = newTempV128();
10370 IRTemp t2 = newTempV128();
10371 // FIXME: double rounding; use FMA primops instead
10372 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10373 assign(t2, triop(isSUB ? opSUB : opADD,
10374 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
10375 putQReg128(dd,
10376 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10377 mkexpr(t2))));
10378 const HChar c = isD ? 'd' : 's';
10379 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
10380 c, dd, c, nn, nameQReg128(mm), c, index);
10381 return True;
10382 }
10383
10384 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
10385 /* -------- 0,1x,1001 FMUL d_d_d[], s_s_s[] -------- */
10386 /* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
10387 Bool isD = (size & 1) == 1;
10388 Bool isMULX = bitU == 1;
10389 UInt index;
10390 if (!isD) index = (bitH << 1) | bitL;
10391 else if (isD && bitL == 0) index = bitH;
10392 else return False; // sz:L == x11 => unallocated encoding
10393 vassert(index < (isD ? 2 : 4));
10394 IRType ity = isD ? Ity_F64 : Ity_F32;
10395 IRTemp elem = newTemp(ity);
10396 UInt mm = (bitM << 4) | mmLO4;
10397 assign(elem, getQRegLane(mm, index, ity));
10398 IRTemp dupd = math_DUP_TO_V128(elem, ity);
10399 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
10400 IRTemp rm = mk_get_IR_rounding_mode();
10401 IRTemp t1 = newTempV128();
10402 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
10403 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
10404 putQReg128(dd,
10405 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
10406 mkexpr(t1))));
10407 const HChar c = isD ? 'd' : 's';
10408 DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul",
10409 c, dd, c, nn, nameQReg128(mm), c, index);
10410 return True;
10411 }
10412
sewardj54ffa1d2014-07-22 09:27:49 +000010413 if (bitU == 0
10414 && (opcode == BITS4(1,0,1,1)
10415 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
10416 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
10417 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
10418 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
10419 /* Widens, and size refers to the narrowed lanes. */
10420 UInt ks = 3;
10421 switch (opcode) {
10422 case BITS4(1,0,1,1): ks = 0; break;
10423 case BITS4(0,0,1,1): ks = 1; break;
10424 case BITS4(0,1,1,1): ks = 2; break;
10425 default: vassert(0);
10426 }
10427 vassert(ks >= 0 && ks <= 2);
10428 UInt mm = 32; // invalid
10429 UInt ix = 16; // invalid
10430 switch (size) {
10431 case X00:
10432 return False; // h_b_b[] case is not allowed
10433 case X01:
10434 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10435 case X10:
10436 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10437 case X11:
10438 return False; // q_d_d[] case is not allowed
10439 default:
10440 vassert(0);
10441 }
10442 vassert(mm < 32 && ix < 16);
10443 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
10444 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10445 newTempsV128_2(&vecN, &vecD);
10446 assign(vecN, getQReg128(nn));
10447 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10448 assign(vecD, getQReg128(dd));
10449 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10450 False/*!is2*/, size, "mas"[ks],
10451 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10452 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
10453 putQReg128(dd, unop(opZHI, mkexpr(res)));
10454 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10455 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10456 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10457 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
10458 }
10459 const HChar* nm = ks == 0 ? "sqmull"
10460 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10461 const HChar arrNarrow = "bhsd"[size];
10462 const HChar arrWide = "bhsd"[size+1];
florianb1737742015-08-03 16:03:13 +000010463 DIP("%s %c%u, %c%u, v%u.%c[%u]\n",
sewardj54ffa1d2014-07-22 09:27:49 +000010464 nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
10465 return True;
10466 }
10467
sewardj257e99f2014-08-03 12:45:19 +000010468 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
10469 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
10470 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
10471 UInt mm = 32; // invalid
10472 UInt ix = 16; // invalid
10473 switch (size) {
10474 case X00:
10475 return False; // b case is not allowed
10476 case X01:
10477 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
10478 case X10:
10479 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
10480 case X11:
10481 return False; // q case is not allowed
10482 default:
10483 vassert(0);
10484 }
10485 vassert(mm < 32 && ix < 16);
10486 Bool isR = opcode == BITS4(1,1,0,1);
10487 IRTemp res, sat1q, sat1n, vN, vM;
10488 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10489 vN = newTempV128();
10490 assign(vN, getQReg128(nn));
10491 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
10492 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10493 IROp opZHI = mkVecZEROHIxxOFV128(size);
10494 putQReg128(dd, unop(opZHI, mkexpr(res)));
10495 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10496 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
10497 HChar ch = size == X01 ? 'h' : 's';
florianb1737742015-08-03 16:03:13 +000010498 DIP("%s %c%u, %c%u, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, (Int)dd, ix);
sewardj257e99f2014-08-03 12:45:19 +000010499 return True;
10500 }
10501
sewardjdf1628c2014-06-10 22:52:05 +000010502 return False;
10503# undef INSN
10504}
10505
sewardjfc83d2c2014-06-12 10:15:46 +000010506
sewardjdf1628c2014-06-10 22:52:05 +000010507static
10508Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
10509{
10510 /* 31 28 22 18 15 10 9 4
10511 0 q u 011110 immh immb opcode 1 n d
10512 Decode fields: u,opcode
10513 */
10514# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10515 if (INSN(31,31) != 0
10516 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
10517 return False;
10518 }
10519 UInt bitQ = INSN(30,30);
10520 UInt bitU = INSN(29,29);
10521 UInt immh = INSN(22,19);
10522 UInt immb = INSN(18,16);
10523 UInt opcode = INSN(15,11);
10524 UInt nn = INSN(9,5);
10525 UInt dd = INSN(4,0);
10526
sewardja6b61f02014-08-17 18:32:14 +000010527 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) {
sewardjdf1628c2014-06-10 22:52:05 +000010528 /* -------- 0,00000 SSHR std7_std7_#imm -------- */
10529 /* -------- 1,00000 USHR std7_std7_#imm -------- */
sewardja6b61f02014-08-17 18:32:14 +000010530 /* -------- 0,00010 SSRA std7_std7_#imm -------- */
10531 /* -------- 1,00010 USRA std7_std7_#imm -------- */
sewardjdf1628c2014-06-10 22:52:05 +000010532 /* laneTy, shift = case immh:immb of
10533 0001:xxx -> B, SHR:8-xxx
10534 001x:xxx -> H, SHR:16-xxxx
10535 01xx:xxx -> S, SHR:32-xxxxx
10536 1xxx:xxx -> D, SHR:64-xxxxxx
10537 other -> invalid
10538 */
sewardjdf1628c2014-06-10 22:52:05 +000010539 UInt size = 0;
10540 UInt shift = 0;
10541 Bool isQ = bitQ == 1;
10542 Bool isU = bitU == 1;
sewardja6b61f02014-08-17 18:32:14 +000010543 Bool isAcc = opcode == BITS5(0,0,0,1,0);
sewardjdf1628c2014-06-10 22:52:05 +000010544 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
sewardj8e91fd42014-07-11 12:05:47 +000010545 if (!ok || (bitQ == 0 && size == X11)) return False;
sewardjdf1628c2014-06-10 22:52:05 +000010546 vassert(size >= 0 && size <= 3);
sewardj8e91fd42014-07-11 12:05:47 +000010547 UInt lanebits = 8 << size;
10548 vassert(shift >= 1 && shift <= lanebits);
10549 IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size);
10550 IRExpr* src = getQReg128(nn);
sewardja6b61f02014-08-17 18:32:14 +000010551 IRTemp shf = newTempV128();
sewardj8e91fd42014-07-11 12:05:47 +000010552 IRTemp res = newTempV128();
10553 if (shift == lanebits && isU) {
sewardja6b61f02014-08-17 18:32:14 +000010554 assign(shf, mkV128(0x0000));
sewardj8e91fd42014-07-11 12:05:47 +000010555 } else {
10556 UInt nudge = 0;
10557 if (shift == lanebits) {
10558 vassert(!isU);
10559 nudge = 1;
10560 }
sewardja6b61f02014-08-17 18:32:14 +000010561 assign(shf, binop(op, src, mkU8(shift - nudge)));
sewardjdf1628c2014-06-10 22:52:05 +000010562 }
sewardja6b61f02014-08-17 18:32:14 +000010563 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10564 : mkexpr(shf));
sewardj8e91fd42014-07-11 12:05:47 +000010565 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10566 HChar laneCh = "bhsd"[size];
10567 UInt nLanes = (isQ ? 128 : 64) / lanebits;
sewardja6b61f02014-08-17 18:32:14 +000010568 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
10569 : (isU ? "ushr" : "sshr");
10570 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10571 nameQReg128(dd), nLanes, laneCh,
10572 nameQReg128(nn), nLanes, laneCh, shift);
10573 return True;
10574 }
10575
10576 if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) {
10577 /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
10578 /* -------- 1,00100 URSHR std7_std7_#imm -------- */
10579 /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
10580 /* -------- 1,00110 URSRA std7_std7_#imm -------- */
10581 /* laneTy, shift = case immh:immb of
10582 0001:xxx -> B, SHR:8-xxx
10583 001x:xxx -> H, SHR:16-xxxx
10584 01xx:xxx -> S, SHR:32-xxxxx
10585 1xxx:xxx -> D, SHR:64-xxxxxx
10586 other -> invalid
10587 */
10588 UInt size = 0;
10589 UInt shift = 0;
10590 Bool isQ = bitQ == 1;
10591 Bool isU = bitU == 1;
10592 Bool isAcc = opcode == BITS5(0,0,1,1,0);
10593 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10594 if (!ok || (bitQ == 0 && size == X11)) return False;
10595 vassert(size >= 0 && size <= 3);
10596 UInt lanebits = 8 << size;
10597 vassert(shift >= 1 && shift <= lanebits);
10598 IROp op = isU ? mkVecRSHU(size) : mkVecRSHS(size);
10599 IRExpr* src = getQReg128(nn);
10600 IRTemp imm8 = newTemp(Ity_I8);
10601 assign(imm8, mkU8((UChar)(-shift)));
10602 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
10603 IRTemp shf = newTempV128();
10604 IRTemp res = newTempV128();
10605 assign(shf, binop(op, src, amt));
10606 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
10607 : mkexpr(shf));
10608 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10609 HChar laneCh = "bhsd"[size];
10610 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10611 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
10612 : (isU ? "urshr" : "srshr");
sewardj8e91fd42014-07-11 12:05:47 +000010613 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10614 nameQReg128(dd), nLanes, laneCh,
10615 nameQReg128(nn), nLanes, laneCh, shift);
10616 return True;
sewardjdf1628c2014-06-10 22:52:05 +000010617 }
10618
sewardj8e91fd42014-07-11 12:05:47 +000010619 if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
10620 /* -------- 1,01000 SRI std7_std7_#imm -------- */
10621 /* laneTy, shift = case immh:immb of
10622 0001:xxx -> B, SHR:8-xxx
10623 001x:xxx -> H, SHR:16-xxxx
10624 01xx:xxx -> S, SHR:32-xxxxx
10625 1xxx:xxx -> D, SHR:64-xxxxxx
10626 other -> invalid
10627 */
10628 UInt size = 0;
10629 UInt shift = 0;
10630 Bool isQ = bitQ == 1;
10631 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10632 if (!ok || (bitQ == 0 && size == X11)) return False;
10633 vassert(size >= 0 && size <= 3);
10634 UInt lanebits = 8 << size;
10635 vassert(shift >= 1 && shift <= lanebits);
10636 IRExpr* src = getQReg128(nn);
10637 IRTemp res = newTempV128();
10638 if (shift == lanebits) {
10639 assign(res, getQReg128(dd));
10640 } else {
10641 assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
10642 IRExpr* nmask = binop(mkVecSHLN(size),
10643 mkV128(0xFFFF), mkU8(lanebits - shift));
10644 IRTemp tmp = newTempV128();
10645 assign(tmp, binop(Iop_OrV128,
10646 mkexpr(res),
10647 binop(Iop_AndV128, getQReg128(dd), nmask)));
10648 res = tmp;
10649 }
10650 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10651 HChar laneCh = "bhsd"[size];
10652 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10653 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
10654 nameQReg128(dd), nLanes, laneCh,
10655 nameQReg128(nn), nLanes, laneCh, shift);
10656 return True;
10657 }
10658
10659 if (opcode == BITS5(0,1,0,1,0)) {
sewardjdf1628c2014-06-10 22:52:05 +000010660 /* -------- 0,01010 SHL std7_std7_#imm -------- */
sewardj8e91fd42014-07-11 12:05:47 +000010661 /* -------- 1,01010 SLI std7_std7_#imm -------- */
sewardjdf1628c2014-06-10 22:52:05 +000010662 /* laneTy, shift = case immh:immb of
10663 0001:xxx -> B, xxx
10664 001x:xxx -> H, xxxx
10665 01xx:xxx -> S, xxxxx
10666 1xxx:xxx -> D, xxxxxx
10667 other -> invalid
10668 */
sewardjdf1628c2014-06-10 22:52:05 +000010669 UInt size = 0;
10670 UInt shift = 0;
sewardj8e91fd42014-07-11 12:05:47 +000010671 Bool isSLI = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +000010672 Bool isQ = bitQ == 1;
10673 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
sewardj8e91fd42014-07-11 12:05:47 +000010674 if (!ok || (bitQ == 0 && size == X11)) return False;
sewardjdf1628c2014-06-10 22:52:05 +000010675 vassert(size >= 0 && size <= 3);
10676 /* The shift encoding has opposite sign for the leftwards case.
10677 Adjust shift to compensate. */
sewardj8e91fd42014-07-11 12:05:47 +000010678 UInt lanebits = 8 << size;
10679 shift = lanebits - shift;
10680 vassert(shift >= 0 && shift < lanebits);
10681 IROp op = mkVecSHLN(size);
10682 IRExpr* src = getQReg128(nn);
10683 IRTemp res = newTempV128();
10684 if (shift == 0) {
10685 assign(res, src);
10686 } else {
sewardjdf9d6d52014-06-27 10:43:22 +000010687 assign(res, binop(op, src, mkU8(shift)));
sewardj8e91fd42014-07-11 12:05:47 +000010688 if (isSLI) {
10689 IRExpr* nmask = binop(mkVecSHRN(size),
10690 mkV128(0xFFFF), mkU8(lanebits - shift));
10691 IRTemp tmp = newTempV128();
10692 assign(tmp, binop(Iop_OrV128,
10693 mkexpr(res),
10694 binop(Iop_AndV128, getQReg128(dd), nmask)));
10695 res = tmp;
10696 }
sewardjdf1628c2014-06-10 22:52:05 +000010697 }
sewardj8e91fd42014-07-11 12:05:47 +000010698 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10699 HChar laneCh = "bhsd"[size];
10700 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10701 const HChar* nm = isSLI ? "sli" : "shl";
10702 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10703 nameQReg128(dd), nLanes, laneCh,
10704 nameQReg128(nn), nLanes, laneCh, shift);
10705 return True;
sewardjdf1628c2014-06-10 22:52:05 +000010706 }
10707
sewardja97dddf2014-08-14 22:26:52 +000010708 if (opcode == BITS5(0,1,1,1,0)
10709 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
10710 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
10711 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
10712 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
10713 UInt size = 0;
10714 UInt shift = 0;
10715 Bool isQ = bitQ == 1;
10716 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10717 if (!ok || (bitQ == 0 && size == X11)) return False;
10718 vassert(size >= 0 && size <= 3);
10719 /* The shift encoding has opposite sign for the leftwards case.
10720 Adjust shift to compensate. */
10721 UInt lanebits = 8 << size;
10722 shift = lanebits - shift;
10723 vassert(shift >= 0 && shift < lanebits);
10724 const HChar* nm = NULL;
10725 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
10726 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
10727 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
10728 else vassert(0);
10729 IRTemp qDiff1 = IRTemp_INVALID;
10730 IRTemp qDiff2 = IRTemp_INVALID;
10731 IRTemp res = IRTemp_INVALID;
10732 IRTemp src = newTempV128();
10733 assign(src, getQReg128(nn));
10734 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
10735 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10736 updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
sewardjacc29642014-08-15 05:35:35 +000010737 isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
sewardja97dddf2014-08-14 22:26:52 +000010738 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10739 DIP("%s %s.%s, %s.%s, #%u\n", nm,
10740 nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
10741 return True;
10742 }
10743
sewardj487559e2014-07-10 14:22:45 +000010744 if (bitU == 0
10745 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
10746 /* -------- 0,10000 SHRN{,2} #imm -------- */
10747 /* -------- 0,10001 RSHRN{,2} #imm -------- */
10748 /* Narrows, and size is the narrow size. */
10749 UInt size = 0;
10750 UInt shift = 0;
10751 Bool is2 = bitQ == 1;
10752 Bool isR = opcode == BITS5(1,0,0,0,1);
10753 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10754 if (!ok || size == X11) return False;
10755 vassert(shift >= 1);
sewardj8e91fd42014-07-11 12:05:47 +000010756 IRTemp t1 = newTempV128();
10757 IRTemp t2 = newTempV128();
10758 IRTemp t3 = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000010759 assign(t1, getQReg128(nn));
10760 assign(t2, isR ? binop(mkVecADD(size+1),
10761 mkexpr(t1),
10762 mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
10763 : mkexpr(t1));
10764 assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
10765 IRTemp t4 = math_NARROW_LANES(t3, t3, size);
10766 putLO64andZUorPutHI64(is2, dd, t4);
10767 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10768 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10769 DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
10770 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10771 return True;
10772 }
10773
sewardjecedd982014-08-11 14:02:47 +000010774 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
10775 || (bitU == 1
10776 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
10777 /* -------- 0,10010 SQSHRN{,2} #imm -------- */
10778 /* -------- 1,10010 UQSHRN{,2} #imm -------- */
10779 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
10780 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
10781 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
10782 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
10783 UInt size = 0;
10784 UInt shift = 0;
10785 Bool is2 = bitQ == 1;
10786 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10787 if (!ok || size == X11) return False;
10788 vassert(shift >= 1 && shift <= (8 << size));
10789 const HChar* nm = "??";
10790 IROp op = Iop_INVALID;
10791 /* Decide on the name and the operation. */
10792 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
10793 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
10794 }
10795 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10796 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
10797 }
10798 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
10799 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
10800 }
10801 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
10802 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
10803 }
10804 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
10805 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
10806 }
10807 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
10808 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
10809 }
10810 else vassert(0);
10811 /* Compute the result (Q, shifted value) pair. */
10812 IRTemp src128 = newTempV128();
10813 assign(src128, getQReg128(nn));
10814 IRTemp pair = newTempV128();
10815 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
10816 /* Update the result reg */
10817 IRTemp res64in128 = newTempV128();
10818 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
10819 putLO64andZUorPutHI64(is2, dd, res64in128);
10820 /* Update the Q flag. */
10821 IRTemp q64q64 = newTempV128();
10822 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
10823 IRTemp z128 = newTempV128();
10824 assign(z128, mkV128(0x0000));
10825 updateQCFLAGwithDifference(q64q64, z128);
10826 /* */
10827 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10828 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10829 DIP("%s %s.%s, %s.%s, #%u\n", nm,
10830 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10831 return True;
10832 }
10833
sewardjdf1628c2014-06-10 22:52:05 +000010834 if (opcode == BITS5(1,0,1,0,0)) {
10835 /* -------- 0,10100 SSHLL{,2} #imm -------- */
10836 /* -------- 1,10100 USHLL{,2} #imm -------- */
10837 /* 31 28 22 18 15 9 4
10838 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
10839 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
10840 where Ta,Tb,sh
10841 = case immh of 1xxx -> invalid
10842 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
10843 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
10844 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
10845 0000 -> AdvSIMD modified immediate (???)
10846 */
10847 Bool isQ = bitQ == 1;
10848 Bool isU = bitU == 1;
10849 UInt immhb = (immh << 3) | immb;
sewardj8e91fd42014-07-11 12:05:47 +000010850 IRTemp src = newTempV128();
10851 IRTemp zero = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000010852 IRExpr* res = NULL;
10853 UInt sh = 0;
10854 const HChar* ta = "??";
10855 const HChar* tb = "??";
10856 assign(src, getQReg128(nn));
10857 assign(zero, mkV128(0x0000));
10858 if (immh & 8) {
10859 /* invalid; don't assign to res */
10860 }
10861 else if (immh & 4) {
10862 sh = immhb - 32;
10863 vassert(sh < 32); /* so 32-sh is 1..32 */
10864 ta = "2d";
10865 tb = isQ ? "4s" : "2s";
10866 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
10867 : mk_InterleaveLO32x4(src, zero);
10868 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
10869 }
10870 else if (immh & 2) {
10871 sh = immhb - 16;
10872 vassert(sh < 16); /* so 16-sh is 1..16 */
10873 ta = "4s";
10874 tb = isQ ? "8h" : "4h";
10875 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
10876 : mk_InterleaveLO16x8(src, zero);
10877 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
10878 }
10879 else if (immh & 1) {
10880 sh = immhb - 8;
10881 vassert(sh < 8); /* so 8-sh is 1..8 */
10882 ta = "8h";
10883 tb = isQ ? "16b" : "8b";
10884 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
10885 : mk_InterleaveLO8x16(src, zero);
10886 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
10887 } else {
10888 vassert(immh == 0);
10889 /* invalid; don't assign to res */
10890 }
10891 /* */
10892 if (res) {
10893 putQReg128(dd, res);
florianb1737742015-08-03 16:03:13 +000010894 DIP("%cshll%s %s.%s, %s.%s, #%u\n",
sewardjdf1628c2014-06-10 22:52:05 +000010895 isU ? 'u' : 's', isQ ? "2" : "",
10896 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
10897 return True;
10898 }
10899 return False;
10900 }
10901
sewardj2130b342015-04-06 14:49:05 +000010902 if (opcode == BITS5(1,1,1,0,0)) {
10903 /* -------- 0,11100 SCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
10904 /* -------- 1,11100 UCVTF {2d_2d,4s_4s,2s_2s}_imm -------- */
10905 /* If immh is of the form 00xx, the insn is invalid. */
10906 if (immh < BITS4(0,1,0,0)) return False;
10907 UInt size = 0;
10908 UInt fbits = 0;
10909 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
10910 /* The following holds because immh is never zero. */
10911 vassert(ok);
10912 /* The following holds because immh >= 0100. */
10913 vassert(size == X10 || size == X11);
10914 Bool isD = size == X11;
10915 Bool isU = bitU == 1;
10916 Bool isQ = bitQ == 1;
10917 if (isD && !isQ) return False; /* reject .1d case */
10918 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
10919 Double scale = two_to_the_minus(fbits);
10920 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
10921 : IRExpr_Const(IRConst_F32( (Float)scale ));
10922 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
10923 IROp opCVT = isU ? (isD ? Iop_I64UtoF64 : Iop_I32UtoF32)
10924 : (isD ? Iop_I64StoF64 : Iop_I32StoF32);
10925 IRType tyF = isD ? Ity_F64 : Ity_F32;
10926 IRType tyI = isD ? Ity_I64 : Ity_I32;
10927 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
10928 vassert(nLanes == 2 || nLanes == 4);
10929 for (UInt i = 0; i < nLanes; i++) {
10930 IRTemp src = newTemp(tyI);
10931 IRTemp res = newTemp(tyF);
10932 IRTemp rm = mk_get_IR_rounding_mode();
10933 assign(src, getQRegLane(nn, i, tyI));
10934 assign(res, triop(opMUL, mkexpr(rm),
10935 binop(opCVT, mkexpr(rm), mkexpr(src)),
10936 scaleE));
10937 putQRegLane(dd, i, mkexpr(res));
10938 }
10939 if (!isQ) {
10940 putQRegLane(dd, 1, mkU64(0));
10941 }
10942 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10943 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "ucvtf" : "scvtf",
10944 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
10945 return True;
10946 }
10947
10948 if (opcode == BITS5(1,1,1,1,1)) {
10949 /* -------- 0,11111 FCVTZS {2d_2d,4s_4s,2s_2s}_imm -------- */
10950 /* -------- 1,11111 FCVTZU {2d_2d,4s_4s,2s_2s}_imm -------- */
10951 /* If immh is of the form 00xx, the insn is invalid. */
10952 if (immh < BITS4(0,1,0,0)) return False;
10953 UInt size = 0;
10954 UInt fbits = 0;
10955 Bool ok = getLaneInfo_IMMH_IMMB(&fbits, &size, immh, immb);
10956 /* The following holds because immh is never zero. */
10957 vassert(ok);
10958 /* The following holds because immh >= 0100. */
10959 vassert(size == X10 || size == X11);
10960 Bool isD = size == X11;
10961 Bool isU = bitU == 1;
10962 Bool isQ = bitQ == 1;
10963 if (isD && !isQ) return False; /* reject .1d case */
10964 vassert(fbits >= 1 && fbits <= (isD ? 64 : 32));
10965 Double scale = two_to_the_plus(fbits);
10966 IRExpr* scaleE = isD ? IRExpr_Const(IRConst_F64(scale))
10967 : IRExpr_Const(IRConst_F32( (Float)scale ));
10968 IROp opMUL = isD ? Iop_MulF64 : Iop_MulF32;
10969 IROp opCVT = isU ? (isD ? Iop_F64toI64U : Iop_F32toI32U)
10970 : (isD ? Iop_F64toI64S : Iop_F32toI32S);
10971 IRType tyF = isD ? Ity_F64 : Ity_F32;
10972 IRType tyI = isD ? Ity_I64 : Ity_I32;
10973 UInt nLanes = (isQ ? 2 : 1) * (isD ? 1 : 2);
10974 vassert(nLanes == 2 || nLanes == 4);
10975 for (UInt i = 0; i < nLanes; i++) {
10976 IRTemp src = newTemp(tyF);
10977 IRTemp res = newTemp(tyI);
10978 IRTemp rm = newTemp(Ity_I32);
10979 assign(src, getQRegLane(nn, i, tyF));
10980 assign(rm, mkU32(Irrm_ZERO));
10981 assign(res, binop(opCVT, mkexpr(rm),
10982 triop(opMUL, mkexpr(rm),
10983 mkexpr(src), scaleE)));
10984 putQRegLane(dd, i, mkexpr(res));
10985 }
10986 if (!isQ) {
10987 putQRegLane(dd, 1, mkU64(0));
10988 }
10989 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10990 DIP("%s %s.%s, %s.%s, #%u\n", isU ? "fcvtzu" : "fcvtzs",
10991 nameQReg128(dd), arr, nameQReg128(nn), arr, fbits);
10992 return True;
10993 }
10994
sewardjdf1628c2014-06-10 22:52:05 +000010995# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10996 return False;
10997# undef INSN
10998}
10999
sewardjfc83d2c2014-06-12 10:15:46 +000011000
sewardjdf1628c2014-06-10 22:52:05 +000011001static
11002Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
11003{
sewardj25523c42014-06-15 19:36:29 +000011004 /* 31 30 29 28 23 21 20 15 11 9 4
11005 0 Q U 01110 size 1 m opcode 00 n d
11006 Decode fields: u,opcode
11007 */
sewardjdf1628c2014-06-10 22:52:05 +000011008# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj25523c42014-06-15 19:36:29 +000011009 if (INSN(31,31) != 0
11010 || INSN(28,24) != BITS5(0,1,1,1,0)
11011 || INSN(21,21) != 1
11012 || INSN(11,10) != BITS2(0,0)) {
11013 return False;
11014 }
11015 UInt bitQ = INSN(30,30);
11016 UInt bitU = INSN(29,29);
11017 UInt size = INSN(23,22);
11018 UInt mm = INSN(20,16);
11019 UInt opcode = INSN(15,12);
11020 UInt nn = INSN(9,5);
11021 UInt dd = INSN(4,0);
11022 vassert(size < 4);
11023 Bool is2 = bitQ == 1;
11024
sewardj6f312d02014-06-28 12:21:37 +000011025 if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
11026 /* -------- 0,0000 SADDL{2} -------- */
11027 /* -------- 1,0000 UADDL{2} -------- */
11028 /* -------- 0,0010 SSUBL{2} -------- */
11029 /* -------- 1,0010 USUBL{2} -------- */
Elliott Hughesa0664b92017-04-18 17:46:52 -070011030 /* Widens, and size refers to the narrow lanes. */
sewardj6f312d02014-06-28 12:21:37 +000011031 if (size == X11) return False;
11032 vassert(size <= 2);
11033 Bool isU = bitU == 1;
11034 Bool isADD = opcode == BITS4(0,0,0,0);
sewardja5a6b752014-06-30 07:33:56 +000011035 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
11036 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +000011037 IRTemp res = newTempV128();
sewardj54ffa1d2014-07-22 09:27:49 +000011038 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
sewardj6f312d02014-06-28 12:21:37 +000011039 mkexpr(argL), mkexpr(argR)));
11040 putQReg128(dd, mkexpr(res));
11041 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11042 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11043 const HChar* nm = isADD ? (isU ? "uaddl" : "saddl")
11044 : (isU ? "usubl" : "ssubl");
11045 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11046 nameQReg128(dd), arrWide,
11047 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11048 return True;
11049 }
11050
sewardja5a6b752014-06-30 07:33:56 +000011051 if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
11052 /* -------- 0,0001 SADDW{2} -------- */
11053 /* -------- 1,0001 UADDW{2} -------- */
11054 /* -------- 0,0011 SSUBW{2} -------- */
11055 /* -------- 1,0011 USUBW{2} -------- */
Elliott Hughesa0664b92017-04-18 17:46:52 -070011056 /* Widens, and size refers to the narrow lanes. */
sewardja5a6b752014-06-30 07:33:56 +000011057 if (size == X11) return False;
11058 vassert(size <= 2);
11059 Bool isU = bitU == 1;
11060 Bool isADD = opcode == BITS4(0,0,0,1);
11061 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +000011062 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000011063 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
11064 getQReg128(nn), mkexpr(argR)));
11065 putQReg128(dd, mkexpr(res));
11066 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11067 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11068 const HChar* nm = isADD ? (isU ? "uaddw" : "saddw")
11069 : (isU ? "usubw" : "ssubw");
11070 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11071 nameQReg128(dd), arrWide,
11072 nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
11073 return True;
11074 }
11075
sewardj25523c42014-06-15 19:36:29 +000011076 if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
11077 /* -------- 0,0100 ADDHN{2} -------- */
11078 /* -------- 1,0100 RADDHN{2} -------- */
11079 /* -------- 0,0110 SUBHN{2} -------- */
11080 /* -------- 1,0110 RSUBHN{2} -------- */
11081 /* Narrows, and size refers to the narrowed lanes. */
11082 if (size == X11) return False;
11083 vassert(size <= 2);
sewardj487559e2014-07-10 14:22:45 +000011084 const UInt shift[3] = { 8, 16, 32 };
sewardj25523c42014-06-15 19:36:29 +000011085 Bool isADD = opcode == BITS4(0,1,0,0);
11086 Bool isR = bitU == 1;
11087 /* Combined elements in wide lanes */
sewardj8e91fd42014-07-11 12:05:47 +000011088 IRTemp wide = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000011089 IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
sewardj25523c42014-06-15 19:36:29 +000011090 getQReg128(nn), getQReg128(mm));
11091 if (isR) {
sewardj487559e2014-07-10 14:22:45 +000011092 wideE = binop(mkVecADD(size+1),
11093 wideE,
11094 mkexpr(math_VEC_DUP_IMM(size+1,
11095 1ULL << (shift[size]-1))));
sewardj25523c42014-06-15 19:36:29 +000011096 }
11097 assign(wide, wideE);
11098 /* Top halves of elements, still in wide lanes */
sewardj8e91fd42014-07-11 12:05:47 +000011099 IRTemp shrd = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000011100 assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
sewardj25523c42014-06-15 19:36:29 +000011101 /* Elements now compacted into lower 64 bits */
sewardj8e91fd42014-07-11 12:05:47 +000011102 IRTemp new64 = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000011103 assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
sewardj25523c42014-06-15 19:36:29 +000011104 putLO64andZUorPutHI64(is2, dd, new64);
11105 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11106 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11107 const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
11108 : (isR ? "rsubhn" : "subhn");
11109 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11110 nameQReg128(dd), arrNarrow,
11111 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
11112 return True;
11113 }
11114
sewardj6f312d02014-06-28 12:21:37 +000011115 if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
11116 /* -------- 0,0101 SABAL{2} -------- */
11117 /* -------- 1,0101 UABAL{2} -------- */
11118 /* -------- 0,0111 SABDL{2} -------- */
11119 /* -------- 1,0111 UABDL{2} -------- */
Elliott Hughesa0664b92017-04-18 17:46:52 -070011120 /* Widens, and size refers to the narrow lanes. */
sewardj6f312d02014-06-28 12:21:37 +000011121 if (size == X11) return False;
11122 vassert(size <= 2);
11123 Bool isU = bitU == 1;
11124 Bool isACC = opcode == BITS4(0,1,0,1);
sewardja5a6b752014-06-30 07:33:56 +000011125 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
11126 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj6f312d02014-06-28 12:21:37 +000011127 IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
sewardj8e91fd42014-07-11 12:05:47 +000011128 IRTemp res = newTempV128();
11129 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
sewardj6f312d02014-06-28 12:21:37 +000011130 : mkexpr(abd));
11131 putQReg128(dd, mkexpr(res));
11132 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11133 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11134 const HChar* nm = isACC ? (isU ? "uabal" : "sabal")
11135 : (isU ? "uabdl" : "sabdl");
11136 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11137 nameQReg128(dd), arrWide,
11138 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11139 return True;
11140 }
11141
11142 if (opcode == BITS4(1,1,0,0)
11143 || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
sewardj487559e2014-07-10 14:22:45 +000011144 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
sewardj6f312d02014-06-28 12:21:37 +000011145 /* -------- 1,1100 UMULL{2} -------- */ // 0
11146 /* -------- 0,1000 SMLAL{2} -------- */ // 1
11147 /* -------- 1,1000 UMLAL{2} -------- */ // 1
11148 /* -------- 0,1010 SMLSL{2} -------- */ // 2
11149 /* -------- 1,1010 UMLSL{2} -------- */ // 2
Elliott Hughesa0664b92017-04-18 17:46:52 -070011150 /* Widens, and size refers to the narrow lanes. */
sewardj487559e2014-07-10 14:22:45 +000011151 UInt ks = 3;
sewardj6f312d02014-06-28 12:21:37 +000011152 switch (opcode) {
sewardj487559e2014-07-10 14:22:45 +000011153 case BITS4(1,1,0,0): ks = 0; break;
11154 case BITS4(1,0,0,0): ks = 1; break;
11155 case BITS4(1,0,1,0): ks = 2; break;
sewardj6f312d02014-06-28 12:21:37 +000011156 default: vassert(0);
11157 }
sewardj487559e2014-07-10 14:22:45 +000011158 vassert(ks >= 0 && ks <= 2);
sewardj6f312d02014-06-28 12:21:37 +000011159 if (size == X11) return False;
11160 vassert(size <= 2);
sewardj51d012a2014-07-21 09:19:50 +000011161 Bool isU = bitU == 1;
11162 IRTemp vecN = newTempV128();
11163 IRTemp vecM = newTempV128();
11164 IRTemp vecD = newTempV128();
11165 assign(vecN, getQReg128(nn));
11166 assign(vecM, getQReg128(mm));
11167 assign(vecD, getQReg128(dd));
11168 IRTemp res = IRTemp_INVALID;
11169 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
11170 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
sewardj6f312d02014-06-28 12:21:37 +000011171 putQReg128(dd, mkexpr(res));
11172 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11173 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
sewardj487559e2014-07-10 14:22:45 +000011174 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
sewardj6f312d02014-06-28 12:21:37 +000011175 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
11176 nameQReg128(dd), arrWide,
11177 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11178 return True;
11179 }
11180
sewardj54ffa1d2014-07-22 09:27:49 +000011181 if (bitU == 0
11182 && (opcode == BITS4(1,1,0,1)
11183 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
11184 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
11185 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1
11186 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2
Elliott Hughesa0664b92017-04-18 17:46:52 -070011187 /* Widens, and size refers to the narrow lanes. */
sewardj54ffa1d2014-07-22 09:27:49 +000011188 UInt ks = 3;
11189 switch (opcode) {
11190 case BITS4(1,1,0,1): ks = 0; break;
11191 case BITS4(1,0,0,1): ks = 1; break;
11192 case BITS4(1,0,1,1): ks = 2; break;
11193 default: vassert(0);
11194 }
11195 vassert(ks >= 0 && ks <= 2);
11196 if (size == X00 || size == X11) return False;
11197 vassert(size <= 2);
11198 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
11199 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
11200 newTempsV128_3(&vecN, &vecM, &vecD);
11201 assign(vecN, getQReg128(nn));
11202 assign(vecM, getQReg128(mm));
11203 assign(vecD, getQReg128(dd));
11204 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
11205 is2, size, "mas"[ks],
11206 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11207 putQReg128(dd, mkexpr(res));
11208 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
11209 updateQCFLAGwithDifference(sat1q, sat1n);
11210 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
11211 updateQCFLAGwithDifference(sat2q, sat2n);
11212 }
11213 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11214 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11215 const HChar* nm = ks == 0 ? "sqdmull"
11216 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
11217 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
11218 nameQReg128(dd), arrWide,
11219 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
11220 return True;
11221 }
11222
sewardj31b5a952014-06-26 07:41:14 +000011223 if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
11224 /* -------- 0,1110 PMULL{2} -------- */
Elliott Hughesa0664b92017-04-18 17:46:52 -070011225 /* Widens, and size refers to the narrow lanes. */
11226 if (size != X00 && size != X11) return False;
11227 IRTemp res = IRTemp_INVALID;
11228 IRExpr* srcN = getQReg128(nn);
11229 IRExpr* srcM = getQReg128(mm);
11230 const HChar* arrNarrow = NULL;
11231 const HChar* arrWide = NULL;
11232 if (size == X00) {
11233 res = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
11234 srcN, srcM);
11235 arrNarrow = nameArr_Q_SZ(bitQ, size);
11236 arrWide = nameArr_Q_SZ(1, size+1);
11237 } else {
11238 /* The same thing as the X00 case, except we have to call
11239 a helper to do it. */
11240 vassert(size == X11);
11241 res = newTemp(Ity_V128);
11242 IROp slice
11243 = is2 ? Iop_V128HIto64 : Iop_V128to64;
11244 IRExpr** args
11245 = mkIRExprVec_3( IRExpr_VECRET(),
11246 unop(slice, srcN), unop(slice, srcM));
11247 IRDirty* di
11248 = unsafeIRDirty_1_N( res, 0/*regparms*/,
11249 "arm64g_dirtyhelper_PMULLQ",
11250 &arm64g_dirtyhelper_PMULLQ, args);
11251 stmt(IRStmt_Dirty(di));
11252 /* We can't use nameArr_Q_SZ for this because it can't deal with
11253 Q-sized (128 bit) results. Hence do it by hand. */
11254 arrNarrow = bitQ == 0 ? "1d" : "2d";
11255 arrWide = "1q";
11256 }
11257 putQReg128(dd, mkexpr(res));
sewardj31b5a952014-06-26 07:41:14 +000011258 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
Elliott Hughesa0664b92017-04-18 17:46:52 -070011259 nameQReg128(dd), arrWide,
11260 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
sewardj31b5a952014-06-26 07:41:14 +000011261 return True;
11262 }
11263
sewardjdf1628c2014-06-10 22:52:05 +000011264 return False;
11265# undef INSN
11266}
11267
11268
11269static
11270Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
11271{
11272 /* 31 30 29 28 23 21 20 15 10 9 4
11273 0 Q U 01110 size 1 m opcode 1 n d
11274 Decode fields: u,size,opcode
11275 */
11276# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11277 if (INSN(31,31) != 0
11278 || INSN(28,24) != BITS5(0,1,1,1,0)
11279 || INSN(21,21) != 1
11280 || INSN(10,10) != 1) {
11281 return False;
11282 }
11283 UInt bitQ = INSN(30,30);
11284 UInt bitU = INSN(29,29);
11285 UInt size = INSN(23,22);
11286 UInt mm = INSN(20,16);
11287 UInt opcode = INSN(15,11);
11288 UInt nn = INSN(9,5);
11289 UInt dd = INSN(4,0);
11290 vassert(size < 4);
11291
sewardja5a6b752014-06-30 07:33:56 +000011292 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
11293 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
11294 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
11295 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
11296 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
11297 if (size == X11) return False;
11298 Bool isADD = opcode == BITS5(0,0,0,0,0);
11299 Bool isU = bitU == 1;
11300 /* Widen both args out, do the math, narrow to final result. */
sewardj8e91fd42014-07-11 12:05:47 +000011301 IRTemp argL = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000011302 IRTemp argLhi = IRTemp_INVALID;
11303 IRTemp argLlo = IRTemp_INVALID;
sewardj8e91fd42014-07-11 12:05:47 +000011304 IRTemp argR = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000011305 IRTemp argRhi = IRTemp_INVALID;
11306 IRTemp argRlo = IRTemp_INVALID;
sewardj8e91fd42014-07-11 12:05:47 +000011307 IRTemp resHi = newTempV128();
11308 IRTemp resLo = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000011309 IRTemp res = IRTemp_INVALID;
11310 assign(argL, getQReg128(nn));
11311 argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
11312 argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL));
11313 assign(argR, getQReg128(mm));
11314 argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
11315 argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR));
11316 IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
11317 IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
11318 assign(resHi, binop(opSxR,
11319 binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
11320 mkU8(1)));
11321 assign(resLo, binop(opSxR,
11322 binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
11323 mkU8(1)));
11324 res = math_NARROW_LANES ( resHi, resLo, size );
11325 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11326 const HChar* nm = isADD ? (isU ? "uhadd" : "shadd")
11327 : (isU ? "uhsub" : "shsub");
11328 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11329 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11330 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11331 return True;
11332 }
11333
sewardj62ece662014-08-17 19:59:09 +000011334 if (opcode == BITS5(0,0,0,1,0)) {
11335 /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
11336 /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
11337 if (bitQ == 0 && size == X11) return False; // implied 1d case
11338 Bool isU = bitU == 1;
11339 IRTemp argL = newTempV128();
11340 IRTemp argR = newTempV128();
11341 assign(argL, getQReg128(nn));
11342 assign(argR, getQReg128(mm));
11343 IRTemp res = math_RHADD(size, isU, argL, argR);
11344 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11345 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11346 DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
11347 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11348 return True;
11349 }
11350
sewardja5a6b752014-06-30 07:33:56 +000011351 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
11352 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
11353 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
11354 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
11355 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
11356 if (bitQ == 0 && size == X11) return False; // implied 1d case
11357 Bool isADD = opcode == BITS5(0,0,0,0,1);
11358 Bool isU = bitU == 1;
11359 IROp qop = Iop_INVALID;
11360 IROp nop = Iop_INVALID;
11361 if (isADD) {
11362 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
11363 nop = mkVecADD(size);
11364 } else {
11365 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
11366 nop = mkVecSUB(size);
11367 }
sewardj8e91fd42014-07-11 12:05:47 +000011368 IRTemp argL = newTempV128();
11369 IRTemp argR = newTempV128();
11370 IRTemp qres = newTempV128();
11371 IRTemp nres = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000011372 assign(argL, getQReg128(nn));
11373 assign(argR, getQReg128(mm));
11374 assign(qres, math_MAYBE_ZERO_HI64_fromE(
11375 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
11376 assign(nres, math_MAYBE_ZERO_HI64_fromE(
11377 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
11378 putQReg128(dd, mkexpr(qres));
sewardj8e91fd42014-07-11 12:05:47 +000011379 updateQCFLAGwithDifference(qres, nres);
sewardja5a6b752014-06-30 07:33:56 +000011380 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
11381 : (isU ? "uqsub" : "sqsub");
11382 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11383 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11384 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11385 return True;
11386 }
11387
sewardjdf1628c2014-06-10 22:52:05 +000011388 if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
11389 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
11390 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
11391 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
11392 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
sewardjdf9d6d52014-06-27 10:43:22 +000011393 Bool isORx = (size & 2) == 2;
sewardjdf1628c2014-06-10 22:52:05 +000011394 Bool invert = (size & 1) == 1;
sewardj8e91fd42014-07-11 12:05:47 +000011395 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +000011396 assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
sewardjdf1628c2014-06-10 22:52:05 +000011397 getQReg128(nn),
11398 invert ? unop(Iop_NotV128, getQReg128(mm))
11399 : getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011400 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011401 const HChar* names[4] = { "and", "bic", "orr", "orn" };
sewardjdf9d6d52014-06-27 10:43:22 +000011402 const HChar* ar = bitQ == 1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +000011403 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
11404 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
11405 return True;
11406 }
11407
11408 if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
11409 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
11410 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
11411 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
11412 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +000011413 IRTemp argD = newTempV128();
11414 IRTemp argN = newTempV128();
11415 IRTemp argM = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011416 assign(argD, getQReg128(dd));
11417 assign(argN, getQReg128(nn));
11418 assign(argM, getQReg128(mm));
11419 const IROp opXOR = Iop_XorV128;
11420 const IROp opAND = Iop_AndV128;
11421 const IROp opNOT = Iop_NotV128;
sewardj8e91fd42014-07-11 12:05:47 +000011422 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011423 switch (size) {
11424 case BITS2(0,0): /* EOR */
sewardjdf9d6d52014-06-27 10:43:22 +000011425 assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
sewardjdf1628c2014-06-10 22:52:05 +000011426 break;
11427 case BITS2(0,1): /* BSL */
sewardjdf9d6d52014-06-27 10:43:22 +000011428 assign(res, binop(opXOR, mkexpr(argM),
11429 binop(opAND,
11430 binop(opXOR, mkexpr(argM), mkexpr(argN)),
11431 mkexpr(argD))));
sewardjdf1628c2014-06-10 22:52:05 +000011432 break;
11433 case BITS2(1,0): /* BIT */
sewardjdf9d6d52014-06-27 10:43:22 +000011434 assign(res, binop(opXOR, mkexpr(argD),
11435 binop(opAND,
11436 binop(opXOR, mkexpr(argD), mkexpr(argN)),
11437 mkexpr(argM))));
sewardjdf1628c2014-06-10 22:52:05 +000011438 break;
11439 case BITS2(1,1): /* BIF */
sewardjdf9d6d52014-06-27 10:43:22 +000011440 assign(res, binop(opXOR, mkexpr(argD),
11441 binop(opAND,
11442 binop(opXOR, mkexpr(argD), mkexpr(argN)),
11443 unop(opNOT, mkexpr(argM)))));
sewardjdf1628c2014-06-10 22:52:05 +000011444 break;
11445 default:
11446 vassert(0);
11447 }
sewardjdf9d6d52014-06-27 10:43:22 +000011448 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011449 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
sewardjdf9d6d52014-06-27 10:43:22 +000011450 const HChar* arr = bitQ == 1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +000011451 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
11452 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11453 return True;
11454 }
11455
11456 if (opcode == BITS5(0,0,1,1,0)) {
11457 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
11458 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
11459 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000011460 Bool isGT = bitU == 0;
sewardjdf1628c2014-06-10 22:52:05 +000011461 IRExpr* argL = getQReg128(nn);
11462 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +000011463 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011464 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +000011465 isGT ? binop(mkVecCMPGTS(size), argL, argR)
11466 : binop(mkVecCMPGTU(size), argL, argR));
sewardjdf9d6d52014-06-27 10:43:22 +000011467 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011468 const HChar* nm = isGT ? "cmgt" : "cmhi";
11469 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11470 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11471 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11472 return True;
11473 }
11474
11475 if (opcode == BITS5(0,0,1,1,1)) {
11476 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
11477 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
11478 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000011479 Bool isGE = bitU == 0;
sewardjdf1628c2014-06-10 22:52:05 +000011480 IRExpr* argL = getQReg128(nn);
11481 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +000011482 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011483 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +000011484 isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
11485 : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
sewardjdf9d6d52014-06-27 10:43:22 +000011486 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011487 const HChar* nm = isGE ? "cmge" : "cmhs";
11488 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11489 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11490 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11491 return True;
11492 }
11493
sewardja6b61f02014-08-17 18:32:14 +000011494 if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) {
11495 /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
11496 /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
11497 /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
11498 /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
11499 if (bitQ == 0 && size == X11) return False; // implied 1d case
11500 Bool isU = bitU == 1;
11501 Bool isR = opcode == BITS5(0,1,0,1,0);
11502 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
11503 : (isU ? mkVecSHU(size) : mkVecSHS(size));
11504 IRTemp res = newTempV128();
11505 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11506 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11507 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
11508 : (isU ? "ushl" : "sshl");
11509 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11510 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11511 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11512 return True;
11513 }
11514
sewardj12972182014-08-04 08:09:47 +000011515 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
11516 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
11517 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
11518 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
11519 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
11520 if (bitQ == 0 && size == X11) return False; // implied 1d case
11521 Bool isU = bitU == 1;
11522 Bool isR = opcode == BITS5(0,1,0,1,1);
11523 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
11524 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
11525 /* This is a bit tricky. If we're only interested in the lowest 64 bits
11526 of the result (viz, bitQ == 0), then we must adjust the operands to
11527 ensure that the upper part of the result, that we don't care about,
11528 doesn't pollute the returned Q value. To do this, zero out the upper
11529 operand halves beforehand. This works because it means, for the
11530 lanes we don't care about, we are shifting zero by zero, which can
11531 never saturate. */
11532 IRTemp res256 = newTemp(Ity_V256);
11533 IRTemp resSH = newTempV128();
11534 IRTemp resQ = newTempV128();
11535 IRTemp zero = newTempV128();
11536 assign(res256, binop(op,
11537 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
11538 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
11539 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
11540 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
11541 assign(zero, mkV128(0x0000));
11542 putQReg128(dd, mkexpr(resSH));
11543 updateQCFLAGwithDifference(resQ, zero);
11544 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
11545 : (isU ? "uqshl" : "sqshl");
11546 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11547 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11548 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11549 return True;
11550 }
11551
sewardjdf1628c2014-06-10 22:52:05 +000011552 if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
11553 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
11554 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
11555 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
11556 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
11557 if (bitQ == 0 && size == X11) return False; // implied 1d case
11558 Bool isU = bitU == 1;
11559 Bool isMAX = (opcode & 1) == 0;
sewardj8e91fd42014-07-11 12:05:47 +000011560 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11561 : (isU ? mkVecMINU(size) : mkVecMINS(size));
11562 IRTemp t = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011563 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011564 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
sewardjdf1628c2014-06-10 22:52:05 +000011565 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
11566 : (isU ? "umin" : "smin");
11567 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11568 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11569 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11570 return True;
11571 }
11572
sewardjdf9d6d52014-06-27 10:43:22 +000011573 if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
11574 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
11575 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
11576 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
11577 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
11578 if (size == X11) return False; // 1d/2d cases not allowed
11579 Bool isU = bitU == 1;
11580 Bool isACC = opcode == BITS5(0,1,1,1,1);
sewardjdf9d6d52014-06-27 10:43:22 +000011581 vassert(size <= 2);
11582 IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +000011583 IRTemp t2 = newTempV128();
11584 assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
sewardjdf9d6d52014-06-27 10:43:22 +000011585 : mkexpr(t1));
11586 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11587 const HChar* nm = isACC ? (isU ? "uaba" : "saba")
11588 : (isU ? "uabd" : "sabd");
11589 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11590 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11591 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11592 return True;
11593 }
11594
sewardjdf1628c2014-06-10 22:52:05 +000011595 if (opcode == BITS5(1,0,0,0,0)) {
11596 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
11597 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
11598 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000011599 Bool isSUB = bitU == 1;
11600 IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size);
11601 IRTemp t = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011602 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011603 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
sewardjdf1628c2014-06-10 22:52:05 +000011604 const HChar* nm = isSUB ? "sub" : "add";
11605 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11606 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11607 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11608 return True;
11609 }
11610
11611 if (opcode == BITS5(1,0,0,0,1)) {
11612 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
11613 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
11614 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000011615 Bool isEQ = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +000011616 IRExpr* argL = getQReg128(nn);
11617 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +000011618 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011619 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +000011620 isEQ ? binop(mkVecCMPEQ(size), argL, argR)
11621 : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
sewardjdf1628c2014-06-10 22:52:05 +000011622 binop(Iop_AndV128, argL, argR),
11623 mkV128(0x0000))));
sewardjdf9d6d52014-06-27 10:43:22 +000011624 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011625 const HChar* nm = isEQ ? "cmeq" : "cmtst";
11626 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11627 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11628 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11629 return True;
11630 }
11631
11632 if (opcode == BITS5(1,0,0,1,0)) {
11633 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
11634 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
11635 if (bitQ == 0 && size == X11) return False; // implied 1d case
11636 Bool isMLS = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +000011637 IROp opMUL = mkVecMUL(size);
11638 IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
11639 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011640 if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
11641 assign(res, binop(opADDSUB,
11642 getQReg128(dd),
11643 binop(opMUL, getQReg128(nn), getQReg128(mm))));
sewardjdf9d6d52014-06-27 10:43:22 +000011644 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011645 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11646 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
11647 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11648 return True;
11649 }
11650 return False;
11651 }
11652
11653 if (opcode == BITS5(1,0,0,1,1)) {
11654 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
11655 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
11656 if (bitQ == 0 && size == X11) return False; // implied 1d case
11657 Bool isPMUL = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +000011658 const IROp opsPMUL[4]
11659 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
sewardj8e91fd42014-07-11 12:05:47 +000011660 IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
11661 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011662 if (opMUL != Iop_INVALID) {
11663 assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011664 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011665 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11666 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
11667 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11668 return True;
11669 }
11670 return False;
11671 }
11672
sewardja5a6b752014-06-30 07:33:56 +000011673 if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
11674 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
11675 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
11676 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
11677 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
11678 if (size == X11) return False;
11679 Bool isU = bitU == 1;
11680 Bool isMAX = opcode == BITS5(1,0,1,0,0);
sewardj8e91fd42014-07-11 12:05:47 +000011681 IRTemp vN = newTempV128();
11682 IRTemp vM = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000011683 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
11684 : (isU ? mkVecMINU(size) : mkVecMINS(size));
11685 assign(vN, getQReg128(nn));
11686 assign(vM, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +000011687 IRTemp res128 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000011688 assign(res128,
11689 binop(op,
11690 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11691 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
11692 /* In the half-width case, use CatEL32x4 to extract the half-width
11693 result from the full-width result. */
11694 IRExpr* res
11695 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11696 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11697 mkexpr(res128)))
11698 : mkexpr(res128);
11699 putQReg128(dd, res);
11700 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11701 const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp")
11702 : (isU ? "uminp" : "sminp");
11703 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11704 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11705 return True;
11706 }
11707
sewardj54ffa1d2014-07-22 09:27:49 +000011708 if (opcode == BITS5(1,0,1,1,0)) {
11709 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
11710 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
11711 if (size == X00 || size == X11) return False;
11712 Bool isR = bitU == 1;
11713 IRTemp res, sat1q, sat1n, vN, vM;
11714 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
11715 newTempsV128_2(&vN, &vM);
11716 assign(vN, getQReg128(nn));
11717 assign(vM, getQReg128(mm));
11718 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
11719 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11720 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
11721 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
11722 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11723 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
11724 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
11725 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11726 return True;
11727 }
11728
sewardja5a6b752014-06-30 07:33:56 +000011729 if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
11730 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
11731 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000011732 IRTemp vN = newTempV128();
11733 IRTemp vM = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000011734 assign(vN, getQReg128(nn));
11735 assign(vM, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +000011736 IRTemp res128 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000011737 assign(res128,
11738 binop(mkVecADD(size),
11739 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
11740 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
11741 /* In the half-width case, use CatEL32x4 to extract the half-width
11742 result from the full-width result. */
11743 IRExpr* res
11744 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
11745 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
11746 mkexpr(res128)))
11747 : mkexpr(res128);
11748 putQReg128(dd, res);
11749 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11750 DIP("addp %s.%s, %s.%s, %s.%s\n",
11751 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11752 return True;
11753 }
11754
sewardj5cb53e72015-02-08 12:08:56 +000011755 if (bitU == 0
11756 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
11757 /* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11758 /* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11759 /* -------- 0,0x,11110 FMAX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11760 /* -------- 0,1x,11110 FMIN 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11761 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
11762 Bool isD = (size & 1) == 1;
11763 if (bitQ == 0 && isD) return False; // implied 1d case
11764 Bool isMIN = (size & 2) == 2;
11765 Bool isNM = opcode == BITS5(1,1,0,0,0);
11766 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10);
11767 IRTemp res = newTempV128();
11768 assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm)));
11769 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11770 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11771 DIP("%s%s %s.%s, %s.%s, %s.%s\n",
11772 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
11773 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11774 return True;
11775 }
11776
sewardjdf1628c2014-06-10 22:52:05 +000011777 if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
11778 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11779 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11780 Bool isD = (size & 1) == 1;
11781 Bool isSUB = (size & 2) == 2;
11782 if (bitQ == 0 && isD) return False; // implied 1d case
11783 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
11784 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11785 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
11786 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +000011787 IRTemp t1 = newTempV128();
11788 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011789 // FIXME: double rounding; use FMA primops instead
11790 assign(t1, triop(opMUL,
11791 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11792 assign(t2, triop(isSUB ? opSUB : opADD,
11793 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
sewardjdf9d6d52014-06-27 10:43:22 +000011794 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
sewardjdf1628c2014-06-10 22:52:05 +000011795 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11796 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
11797 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11798 return True;
11799 }
11800
11801 if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
11802 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11803 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11804 Bool isD = (size & 1) == 1;
11805 Bool isSUB = (size & 2) == 2;
11806 if (bitQ == 0 && isD) return False; // implied 1d case
11807 const IROp ops[4]
11808 = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
11809 IROp op = ops[size];
11810 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +000011811 IRTemp t1 = newTempV128();
11812 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011813 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011814 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011815 putQReg128(dd, mkexpr(t2));
11816 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11817 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
11818 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11819 return True;
11820 }
11821
11822 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
11823 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11824 Bool isD = (size & 1) == 1;
11825 if (bitQ == 0 && isD) return False; // implied 1d case
11826 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11827 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
11828 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +000011829 IRTemp t1 = newTempV128();
11830 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011831 // FIXME: use Abd primop instead?
sewardjdf9d6d52014-06-27 10:43:22 +000011832 assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf1628c2014-06-10 22:52:05 +000011833 assign(t2, unop(opABS, mkexpr(t1)));
sewardjdf9d6d52014-06-27 10:43:22 +000011834 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
sewardjdf1628c2014-06-10 22:52:05 +000011835 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11836 DIP("fabd %s.%s, %s.%s, %s.%s\n",
11837 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11838 return True;
11839 }
11840
sewardjee3db332015-02-08 18:24:38 +000011841 if (size <= X01 && opcode == BITS5(1,1,0,1,1)) {
11842 /* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11843 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11844 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
11845 Bool isD = (size & 1) == 1;
11846 Bool isMULX = bitU == 0;
sewardjdf1628c2014-06-10 22:52:05 +000011847 if (bitQ == 0 && isD) return False; // implied 1d case
11848 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +000011849 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011850 assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
11851 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011852 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011853 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
sewardjee3db332015-02-08 18:24:38 +000011854 DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul",
sewardjdf1628c2014-06-10 22:52:05 +000011855 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11856 return True;
11857 }
11858
11859 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
11860 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11861 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11862 Bool isD = (size & 1) == 1;
11863 if (bitQ == 0 && isD) return False; // implied 1d case
11864 Bool isGE = bitU == 1;
11865 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
11866 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
sewardj8e91fd42014-07-11 12:05:47 +000011867 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011868 assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
11869 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011870 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011871 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11872 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
11873 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11874 return True;
11875 }
11876
11877 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
11878 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11879 Bool isD = (size & 1) == 1;
11880 if (bitQ == 0 && isD) return False; // implied 1d case
11881 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
sewardj8e91fd42014-07-11 12:05:47 +000011882 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011883 assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
sewardjdf9d6d52014-06-27 10:43:22 +000011884 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011885 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11886 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
11887 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11888 return True;
11889 }
11890
11891 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
11892 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11893 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11894 Bool isD = (size & 1) == 1;
11895 Bool isGT = (size & 2) == 2;
11896 if (bitQ == 0 && isD) return False; // implied 1d case
11897 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
11898 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
11899 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
sewardj8e91fd42014-07-11 12:05:47 +000011900 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011901 assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
11902 unop(opABS, getQReg128(nn)))); // swapd
sewardjdf9d6d52014-06-27 10:43:22 +000011903 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011904 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11905 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
11906 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11907 return True;
11908 }
11909
sewardj5cb53e72015-02-08 12:08:56 +000011910 if (bitU == 1
11911 && (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,1,1,0))) {
11912 /* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11913 /* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11914 /* -------- 1,0x,11110 FMAXP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11915 /* -------- 1,1x,11110 FMINP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11916 /* FMAXNM, FMINNM: FIXME -- KLUDGED */
11917 Bool isD = (size & 1) == 1;
11918 if (bitQ == 0 && isD) return False; // implied 1d case
11919 Bool isMIN = (size & 2) == 2;
11920 Bool isNM = opcode == BITS5(1,1,0,0,0);
11921 IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
11922 IRTemp srcN = newTempV128();
11923 IRTemp srcM = newTempV128();
11924 IRTemp preL = IRTemp_INVALID;
11925 IRTemp preR = IRTemp_INVALID;
11926 assign(srcN, getQReg128(nn));
11927 assign(srcM, getQReg128(mm));
11928 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
11929 srcM, srcN, isD, bitQ);
11930 putQReg128(
11931 dd, math_MAYBE_ZERO_HI64_fromE(
11932 bitQ,
11933 binop(opMXX, mkexpr(preL), mkexpr(preR))));
11934 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11935 DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
11936 isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
11937 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11938 return True;
11939 }
11940
sewardj76927e62014-11-17 11:21:21 +000011941 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
11942 /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11943 Bool isD = size == X01;
11944 if (bitQ == 0 && isD) return False; // implied 1d case
11945 IRTemp srcN = newTempV128();
11946 IRTemp srcM = newTempV128();
11947 IRTemp preL = IRTemp_INVALID;
11948 IRTemp preR = IRTemp_INVALID;
11949 assign(srcN, getQReg128(nn));
11950 assign(srcM, getQReg128(mm));
11951 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
11952 srcM, srcN, isD, bitQ);
11953 putQReg128(
11954 dd, math_MAYBE_ZERO_HI64_fromE(
11955 bitQ,
11956 triop(mkVecADDF(isD ? 3 : 2),
11957 mkexpr(mk_get_IR_rounding_mode()),
11958 mkexpr(preL), mkexpr(preR))));
11959 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11960 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
11961 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11962 return True;
11963 }
11964
sewardjdf1628c2014-06-10 22:52:05 +000011965 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
11966 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11967 Bool isD = (size & 1) == 1;
11968 if (bitQ == 0 && isD) return False; // implied 1d case
11969 vassert(size <= 1);
11970 const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
11971 IROp op = ops[size];
11972 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +000011973 IRTemp t1 = newTempV128();
11974 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011975 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011976 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011977 putQReg128(dd, mkexpr(t2));
11978 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11979 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
11980 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11981 return True;
11982 }
11983
sewardj89cefe42015-02-24 12:21:01 +000011984 if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
11985 /* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11986 /* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11987 Bool isSQRT = (size & 2) == 2;
11988 Bool isD = (size & 1) == 1;
11989 if (bitQ == 0 && isD) return False; // implied 1d case
11990 IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
11991 : (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
11992 IRTemp res = newTempV128();
11993 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
11994 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11995 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11996 DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps",
11997 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11998 return True;
11999 }
12000
sewardjdf1628c2014-06-10 22:52:05 +000012001 return False;
12002# undef INSN
12003}
12004
12005
12006static
12007Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
12008{
12009 /* 31 30 29 28 23 21 16 11 9 4
12010 0 Q U 01110 size 10000 opcode 10 n d
12011 Decode fields: U,size,opcode
12012 */
12013# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12014 if (INSN(31,31) != 0
12015 || INSN(28,24) != BITS5(0,1,1,1,0)
12016 || INSN(21,17) != BITS5(1,0,0,0,0)
12017 || INSN(11,10) != BITS2(1,0)) {
12018 return False;
12019 }
12020 UInt bitQ = INSN(30,30);
12021 UInt bitU = INSN(29,29);
12022 UInt size = INSN(23,22);
12023 UInt opcode = INSN(16,12);
12024 UInt nn = INSN(9,5);
12025 UInt dd = INSN(4,0);
12026 vassert(size < 4);
12027
sewardjdf9d6d52014-06-27 10:43:22 +000012028 if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
12029 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
12030 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
12031 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
12032 const IROp iops[3] = { Iop_Reverse8sIn64_x2,
12033 Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
12034 vassert(size <= 2);
sewardj8e91fd42014-07-11 12:05:47 +000012035 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +000012036 assign(res, unop(iops[size], getQReg128(nn)));
12037 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12038 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12039 DIP("%s %s.%s, %s.%s\n", "rev64",
12040 nameQReg128(dd), arr, nameQReg128(nn), arr);
12041 return True;
12042 }
12043
12044 if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
12045 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
12046 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
12047 Bool isH = size == X01;
sewardj8e91fd42014-07-11 12:05:47 +000012048 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +000012049 IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
12050 assign(res, unop(iop, getQReg128(nn)));
12051 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12052 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12053 DIP("%s %s.%s, %s.%s\n", "rev32",
12054 nameQReg128(dd), arr, nameQReg128(nn), arr);
12055 return True;
12056 }
12057
sewardj715d1622014-06-26 12:39:05 +000012058 if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
12059 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +000012060 IRTemp res = newTempV128();
sewardj715d1622014-06-26 12:39:05 +000012061 assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
12062 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf9d6d52014-06-27 10:43:22 +000012063 const HChar* arr = nameArr_Q_SZ(bitQ, size);
sewardj715d1622014-06-26 12:39:05 +000012064 DIP("%s %s.%s, %s.%s\n", "rev16",
12065 nameQReg128(dd), arr, nameQReg128(nn), arr);
12066 return True;
12067 }
12068
sewardja5a6b752014-06-30 07:33:56 +000012069 if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
12070 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
12071 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
12072 /* -------- 0,xx,00110: SADALP std6_std6 -------- */
12073 /* -------- 1,xx,00110: UADALP std6_std6 -------- */
12074 /* Widens, and size refers to the narrow size. */
12075 if (size == X11) return False; // no 1d or 2d cases
12076 Bool isU = bitU == 1;
12077 Bool isACC = opcode == BITS5(0,0,1,1,0);
sewardj8e91fd42014-07-11 12:05:47 +000012078 IRTemp src = newTempV128();
12079 IRTemp sum = newTempV128();
12080 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000012081 assign(src, getQReg128(nn));
12082 assign(sum,
12083 binop(mkVecADD(size+1),
12084 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12085 isU, True/*fromOdd*/, size, mkexpr(src))),
12086 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
12087 isU, False/*!fromOdd*/, size, mkexpr(src)))));
12088 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
12089 : mkexpr(sum));
12090 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12091 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12092 const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1);
12093 DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
12094 : (isU ? "uaddlp" : "saddlp"),
12095 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
12096 return True;
12097 }
12098
sewardjf7003bc2014-08-18 12:28:02 +000012099 if (opcode == BITS5(0,0,0,1,1)) {
12100 /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
12101 /* -------- 1,xx,00011: USQADD std7_std7 -------- */
12102 if (bitQ == 0 && size == X11) return False; // implied 1d case
12103 Bool isUSQADD = bitU == 1;
12104 /* This is switched (in the US vs SU sense) deliberately.
12105 SUQADD corresponds to the ExtUSsatSS variants and
12106 USQADD corresponds to the ExtSUsatUU variants.
12107 See libvex_ir for more details. */
12108 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
12109 : mkVecQADDEXTUSSATSS(size);
12110 IROp nop = mkVecADD(size);
12111 IRTemp argL = newTempV128();
12112 IRTemp argR = newTempV128();
12113 IRTemp qres = newTempV128();
12114 IRTemp nres = newTempV128();
12115 /* Because the two arguments to the addition are implicitly
12116 extended differently (one signedly, the other unsignedly) it is
12117 important to present them to the primop in the correct order. */
12118 assign(argL, getQReg128(nn));
12119 assign(argR, getQReg128(dd));
12120 assign(qres, math_MAYBE_ZERO_HI64_fromE(
12121 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
12122 assign(nres, math_MAYBE_ZERO_HI64_fromE(
12123 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
12124 putQReg128(dd, mkexpr(qres));
12125 updateQCFLAGwithDifference(qres, nres);
12126 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12127 DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
12128 nameQReg128(dd), arr, nameQReg128(nn), arr);
12129 return True;
12130 }
12131
sewardj2b6fd5e2014-06-19 14:21:37 +000012132 if (opcode == BITS5(0,0,1,0,0)) {
12133 /* -------- 0,xx,00100: CLS std6_std6 -------- */
12134 /* -------- 1,xx,00100: CLZ std6_std6 -------- */
12135 if (size == X11) return False; // no 1d or 2d cases
sewardja8c7b0f2014-06-26 08:18:08 +000012136 const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
12137 const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
sewardj2b6fd5e2014-06-19 14:21:37 +000012138 Bool isCLZ = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +000012139 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +000012140 vassert(size <= 2);
12141 assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000012142 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj2b6fd5e2014-06-19 14:21:37 +000012143 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12144 DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
12145 nameQReg128(dd), arr, nameQReg128(nn), arr);
12146 return True;
12147 }
12148
sewardj787a67f2014-06-23 09:09:41 +000012149 if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
sewardj2b6fd5e2014-06-19 14:21:37 +000012150 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
sewardj787a67f2014-06-23 09:09:41 +000012151 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +000012152 IRTemp res = newTempV128();
sewardj787a67f2014-06-23 09:09:41 +000012153 assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000012154 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj715d1622014-06-26 12:39:05 +000012155 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
sewardj787a67f2014-06-23 09:09:41 +000012156 DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
sewardj2b6fd5e2014-06-19 14:21:37 +000012157 nameQReg128(dd), arr, nameQReg128(nn), arr);
12158 return True;
12159 }
12160
sewardj715d1622014-06-26 12:39:05 +000012161 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
12162 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +000012163 IRTemp res = newTempV128();
sewardj715d1622014-06-26 12:39:05 +000012164 assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000012165 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj715d1622014-06-26 12:39:05 +000012166 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
12167 DIP("%s %s.%s, %s.%s\n", "rbit",
12168 nameQReg128(dd), arr, nameQReg128(nn), arr);
12169 return True;
12170 }
12171
sewardj51d012a2014-07-21 09:19:50 +000012172 if (opcode == BITS5(0,0,1,1,1)) {
sewardj8e91fd42014-07-11 12:05:47 +000012173 /* -------- 0,xx,00111 SQABS std7_std7 -------- */
sewardj51d012a2014-07-21 09:19:50 +000012174 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
sewardj8e91fd42014-07-11 12:05:47 +000012175 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj51d012a2014-07-21 09:19:50 +000012176 Bool isNEG = bitU == 1;
12177 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
12178 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
12179 getQReg128(nn), size );
sewardj8e91fd42014-07-11 12:05:47 +000012180 IRTemp qres = newTempV128(), nres = newTempV128();
sewardj51d012a2014-07-21 09:19:50 +000012181 assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
12182 assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
sewardj8e91fd42014-07-11 12:05:47 +000012183 putQReg128(dd, mkexpr(qres));
12184 updateQCFLAGwithDifference(qres, nres);
12185 const HChar* arr = nameArr_Q_SZ(bitQ, size);
sewardj51d012a2014-07-21 09:19:50 +000012186 DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
sewardj8e91fd42014-07-11 12:05:47 +000012187 nameQReg128(dd), arr, nameQReg128(nn), arr);
12188 return True;
12189 }
12190
sewardjdf1628c2014-06-10 22:52:05 +000012191 if (opcode == BITS5(0,1,0,0,0)) {
12192 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
12193 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
12194 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000012195 Bool isGT = bitU == 0;
12196 IRExpr* argL = getQReg128(nn);
12197 IRExpr* argR = mkV128(0x0000);
12198 IRTemp res = newTempV128();
12199 IROp opGTS = mkVecCMPGTS(size);
12200 assign(res, isGT ? binop(opGTS, argL, argR)
12201 : unop(Iop_NotV128, binop(opGTS, argR, argL)));
sewardjdf9d6d52014-06-27 10:43:22 +000012202 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000012203 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12204 DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
12205 nameQReg128(dd), arr, nameQReg128(nn), arr);
12206 return True;
12207 }
12208
12209 if (opcode == BITS5(0,1,0,0,1)) {
12210 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
12211 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
12212 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardjdf1628c2014-06-10 22:52:05 +000012213 Bool isEQ = bitU == 0;
12214 IRExpr* argL = getQReg128(nn);
12215 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +000012216 IRTemp res = newTempV128();
12217 assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
sewardjdf1628c2014-06-10 22:52:05 +000012218 : unop(Iop_NotV128,
sewardj8e91fd42014-07-11 12:05:47 +000012219 binop(mkVecCMPGTS(size), argL, argR)));
sewardjdf9d6d52014-06-27 10:43:22 +000012220 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000012221 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12222 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
12223 nameQReg128(dd), arr, nameQReg128(nn), arr);
12224 return True;
12225 }
12226
12227 if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
12228 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
12229 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardjdf1628c2014-06-10 22:52:05 +000012230 IRExpr* argL = getQReg128(nn);
12231 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +000012232 IRTemp res = newTempV128();
12233 assign(res, binop(mkVecCMPGTS(size), argR, argL));
sewardjdf9d6d52014-06-27 10:43:22 +000012234 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000012235 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12236 DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
12237 nameQReg128(dd), arr, nameQReg128(nn), arr);
12238 return True;
12239 }
12240
sewardj25523c42014-06-15 19:36:29 +000012241 if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
12242 /* -------- 0,xx,01011: ABS std7_std7 -------- */
12243 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000012244 IRTemp res = newTempV128();
12245 assign(res, unop(mkVecABS(size), getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000012246 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj25523c42014-06-15 19:36:29 +000012247 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12248 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12249 return True;
12250 }
12251
sewardjdf1628c2014-06-10 22:52:05 +000012252 if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
12253 /* -------- 1,xx,01011: NEG std7_std7 -------- */
12254 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000012255 IRTemp res = newTempV128();
12256 assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000012257 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000012258 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12259 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
12260 return True;
12261 }
12262
sewardj13830dc2015-02-07 21:09:47 +000012263 UInt ix = 0; /*INVALID*/
12264 if (size >= X10) {
12265 switch (opcode) {
12266 case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
12267 case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
12268 case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
12269 default: break;
12270 }
12271 }
12272 if (ix > 0) {
12273 /* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
12274 /* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
12275 /* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
12276 /* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
12277 /* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
12278 if (bitQ == 0 && size == X11) return False; // implied 1d case
12279 Bool isD = size == X11;
12280 IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
12281 IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
12282 IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
12283 IROp opCmp = Iop_INVALID;
12284 Bool swap = False;
12285 const HChar* nm = "??";
12286 switch (ix) {
12287 case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
12288 case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
12289 case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
12290 case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
12291 case 5: nm = "fcmle"; opCmp = opCmpLE; break;
12292 default: vassert(0);
12293 }
12294 IRExpr* zero = mkV128(0x0000);
12295 IRTemp res = newTempV128();
12296 assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
12297 : binop(opCmp, getQReg128(nn), zero));
12298 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12299 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12300 DIP("%s %s.%s, %s.%s, #0.0\n", nm,
12301 nameQReg128(dd), arr, nameQReg128(nn), arr);
12302 return True;
12303 }
12304
sewardjdf1628c2014-06-10 22:52:05 +000012305 if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
12306 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
12307 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
12308 if (bitQ == 0 && size == X11) return False; // implied 1d case
12309 Bool isFNEG = bitU == 1;
12310 IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
12311 : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
sewardj8e91fd42014-07-11 12:05:47 +000012312 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000012313 assign(res, unop(op, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000012314 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000012315 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12316 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
12317 nameQReg128(dd), arr, nameQReg128(nn), arr);
12318 return True;
12319 }
12320
12321 if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
12322 /* -------- 0,xx,10010: XTN{,2} -------- */
sewardjecedd982014-08-11 14:02:47 +000012323 if (size == X11) return False;
12324 vassert(size < 3);
12325 Bool is2 = bitQ == 1;
12326 IROp opN = mkVecNARROWUN(size);
12327 IRTemp resN = newTempV128();
12328 assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
12329 putLO64andZUorPutHI64(is2, dd, resN);
12330 const HChar* nm = "xtn";
12331 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12332 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12333 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12334 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12335 return True;
12336 }
12337
12338 if (opcode == BITS5(1,0,1,0,0)
12339 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
12340 /* -------- 0,xx,10100: SQXTN{,2} -------- */
12341 /* -------- 1,xx,10100: UQXTN{,2} -------- */
12342 /* -------- 1,xx,10010: SQXTUN{,2} -------- */
12343 if (size == X11) return False;
12344 vassert(size < 3);
12345 Bool is2 = bitQ == 1;
12346 IROp opN = Iop_INVALID;
12347 Bool zWiden = True;
12348 const HChar* nm = "??";
12349 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
12350 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
sewardjdf1628c2014-06-10 22:52:05 +000012351 }
sewardjecedd982014-08-11 14:02:47 +000012352 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
12353 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
sewardjdf1628c2014-06-10 22:52:05 +000012354 }
sewardjecedd982014-08-11 14:02:47 +000012355 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
12356 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
12357 }
12358 else vassert(0);
12359 IRTemp src = newTempV128();
12360 assign(src, getQReg128(nn));
12361 IRTemp resN = newTempV128();
12362 assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
12363 putLO64andZUorPutHI64(is2, dd, resN);
12364 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
12365 size, mkexpr(resN));
12366 updateQCFLAGwithDifference(src, resW);
12367 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12368 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12369 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
12370 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12371 return True;
sewardjdf1628c2014-06-10 22:52:05 +000012372 }
12373
sewardj487559e2014-07-10 14:22:45 +000012374 if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
12375 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
12376 /* Widens, and size is the narrow size. */
12377 if (size == X11) return False;
12378 Bool is2 = bitQ == 1;
12379 IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
12380 IROp opSHL = mkVecSHLN(size+1);
sewardj8e91fd42014-07-11 12:05:47 +000012381 IRTemp src = newTempV128();
12382 IRTemp res = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000012383 assign(src, getQReg128(nn));
12384 assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
12385 mkU8(8 << size)));
12386 putQReg128(dd, mkexpr(res));
12387 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12388 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
florianb1737742015-08-03 16:03:13 +000012389 DIP("shll%s %s.%s, %s.%s, #%d\n", is2 ? "2" : "",
sewardj487559e2014-07-10 14:22:45 +000012390 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
12391 return True;
12392 }
12393
sewardj400d6b92015-03-30 09:01:51 +000012394 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
12395 /* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
12396 UInt nLanes = size == X00 ? 4 : 2;
12397 IRType srcTy = size == X00 ? Ity_F32 : Ity_F64;
12398 IROp opCvt = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
12399 IRTemp rm = mk_get_IR_rounding_mode();
12400 IRTemp src[nLanes];
12401 for (UInt i = 0; i < nLanes; i++) {
12402 src[i] = newTemp(srcTy);
12403 assign(src[i], getQRegLane(nn, i, srcTy));
12404 }
12405 for (UInt i = 0; i < nLanes; i++) {
12406 putQRegLane(dd, nLanes * bitQ + i,
12407 binop(opCvt, mkexpr(rm), mkexpr(src[i])));
12408 }
sewardjdf1628c2014-06-10 22:52:05 +000012409 if (bitQ == 0) {
12410 putQRegLane(dd, 1, mkU64(0));
12411 }
sewardj400d6b92015-03-30 09:01:51 +000012412 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12413 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12414 DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12415 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12416 return True;
12417 }
12418
sewardj2130b342015-04-06 14:49:05 +000012419 if (bitU == 1 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
12420 /* -------- 1,01,10110: FCVTXN 2s/4s_2d -------- */
12421 /* Using Irrm_NEAREST here isn't right. The docs say "round to
12422 odd" but I don't know what that really means. */
12423 IRType srcTy = Ity_F64;
12424 IROp opCvt = Iop_F64toF32;
12425 IRTemp src[2];
12426 for (UInt i = 0; i < 2; i++) {
12427 src[i] = newTemp(srcTy);
12428 assign(src[i], getQRegLane(nn, i, srcTy));
12429 }
12430 for (UInt i = 0; i < 2; i++) {
12431 putQRegLane(dd, 2 * bitQ + i,
12432 binop(opCvt, mkU32(Irrm_NEAREST), mkexpr(src[i])));
12433 }
12434 if (bitQ == 0) {
12435 putQRegLane(dd, 1, mkU64(0));
12436 }
12437 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12438 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12439 DIP("fcvtxn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12440 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
12441 return True;
12442 }
12443
sewardj400d6b92015-03-30 09:01:51 +000012444 if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
sewardj2130b342015-04-06 14:49:05 +000012445 /* -------- 0,0x,10111: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
sewardj400d6b92015-03-30 09:01:51 +000012446 UInt nLanes = size == X00 ? 4 : 2;
12447 IRType srcTy = size == X00 ? Ity_F16 : Ity_F32;
12448 IROp opCvt = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
12449 IRTemp src[nLanes];
12450 for (UInt i = 0; i < nLanes; i++) {
12451 src[i] = newTemp(srcTy);
12452 assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
12453 }
12454 for (UInt i = 0; i < nLanes; i++) {
12455 putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
12456 }
12457 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
12458 const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
12459 DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
12460 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
sewardjdf1628c2014-06-10 22:52:05 +000012461 return True;
12462 }
12463
sewardj6a785df2015-02-09 09:07:47 +000012464 ix = 0;
12465 if (opcode == BITS5(1,1,0,0,0) || opcode == BITS5(1,1,0,0,1)) {
12466 ix = 1 + ((((bitU & 1) << 2) | ((size & 2) << 0)) | ((opcode & 1) << 0));
12467 // = 1 + bitU[0]:size[1]:opcode[0]
12468 vassert(ix >= 1 && ix <= 8);
12469 if (ix == 7) ix = 0;
12470 }
12471 if (ix > 0) {
12472 /* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
12473 /* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
12474 /* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
12475 /* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
12476 /* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
12477 /* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
12478 /* -------- 1,1x,11000 (apparently unassigned) (7) -------- */
12479 /* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
12480 /* rm plan:
12481 FRINTN: tieeven -- !! FIXME KLUDGED !!
12482 FRINTM: -inf
12483 FRINTP: +inf
12484 FRINTZ: zero
12485 FRINTA: tieaway -- !! FIXME KLUDGED !!
12486 FRINTX: per FPCR + "exact = TRUE"
12487 FRINTI: per FPCR
12488 */
12489 Bool isD = (size & 1) == 1;
12490 if (bitQ == 0 && isD) return False; // implied 1d case
12491
12492 IRTemp irrmRM = mk_get_IR_rounding_mode();
12493
12494 UChar ch = '?';
12495 IRTemp irrm = newTemp(Ity_I32);
12496 switch (ix) {
12497 case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12498 case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
12499 case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
12500 case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break;
12501 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
12502 case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
12503 // I am unsure about the following, due to the "integral exact"
12504 // description in the manual. What does it mean? (frintx, that is)
12505 case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
12506 case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break;
12507 default: vassert(0);
12508 }
12509
sewardj6a785df2015-02-09 09:07:47 +000012510 IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
12511 if (isD) {
12512 for (UInt i = 0; i < 2; i++) {
12513 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12514 getQRegLane(nn, i, Ity_F64)));
12515 }
12516 } else {
12517 UInt n = bitQ==1 ? 4 : 2;
12518 for (UInt i = 0; i < n; i++) {
12519 putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
12520 getQRegLane(nn, i, Ity_F32)));
12521 }
12522 if (bitQ == 0)
12523 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12524 }
12525 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12526 DIP("frint%c %s.%s, %s.%s\n", ch,
12527 nameQReg128(dd), arr, nameQReg128(nn), arr);
12528 return True;
12529 }
12530
sewardjbc0b7222015-03-30 18:49:38 +000012531 ix = 0; /*INVALID*/
12532 switch (opcode) {
12533 case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
12534 case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
12535 case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
12536 default: break;
12537 }
12538 if (ix > 0) {
12539 /* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12540 /* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12541 /* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12542 /* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12543 /* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12544 /* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
12545 /* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
12546 /* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
12547 /* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
12548 /* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
12549 Bool isD = (size & 1) == 1;
12550 if (bitQ == 0 && isD) return False; // implied 1d case
12551
12552 IRRoundingMode irrm = 8; /*impossible*/
12553 HChar ch = '?';
12554 switch (ix) {
12555 case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
12556 case 2: ch = 'm'; irrm = Irrm_NegINF; break;
12557 case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
12558 case 4: ch = 'p'; irrm = Irrm_PosINF; break;
12559 case 5: ch = 'z'; irrm = Irrm_ZERO; break;
12560 default: vassert(0);
12561 }
12562 IROp cvt = Iop_INVALID;
12563 if (bitU == 1) {
12564 cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
12565 } else {
12566 cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
12567 }
12568 if (isD) {
12569 for (UInt i = 0; i < 2; i++) {
12570 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12571 getQRegLane(nn, i, Ity_F64)));
12572 }
12573 } else {
12574 UInt n = bitQ==1 ? 4 : 2;
12575 for (UInt i = 0; i < n; i++) {
12576 putQRegLane(dd, i, binop(cvt, mkU32(irrm),
12577 getQRegLane(nn, i, Ity_F32)));
12578 }
12579 if (bitQ == 0)
12580 putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
12581 }
12582 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12583 DIP("fcvt%c%c %s.%s, %s.%s\n", ch, bitU == 1 ? 'u' : 's',
12584 nameQReg128(dd), arr, nameQReg128(nn), arr);
12585 return True;
12586 }
12587
sewardjfc261d92014-08-24 20:36:14 +000012588 if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
12589 /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
12590 /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
12591 Bool isREC = bitU == 0;
12592 IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
12593 IRTemp res = newTempV128();
12594 assign(res, unop(op, getQReg128(nn)));
12595 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12596 const HChar* nm = isREC ? "urecpe" : "ursqrte";
12597 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12598 DIP("%s %s.%s, %s.%s\n", nm,
12599 nameQReg128(dd), arr, nameQReg128(nn), arr);
12600 return True;
12601 }
12602
sewardj5747c4a2014-06-11 20:57:23 +000012603 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
12604 /* -------- 0,0x,11101: SCVTF -------- */
12605 /* -------- 1,0x,11101: UCVTF -------- */
12606 /* 31 28 22 21 15 9 4
12607 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
12608 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
12609 with laneage:
12610 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
12611 */
12612 Bool isQ = bitQ == 1;
12613 Bool isU = bitU == 1;
12614 Bool isF64 = (size & 1) == 1;
12615 if (isQ || !isF64) {
12616 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
12617 UInt nLanes = 0;
12618 Bool zeroHI = False;
12619 const HChar* arrSpec = NULL;
12620 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
12621 isQ, isF64 );
12622 IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
12623 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
12624 IRTemp rm = mk_get_IR_rounding_mode();
12625 UInt i;
12626 vassert(ok); /* the 'if' above should ensure this */
12627 for (i = 0; i < nLanes; i++) {
12628 putQRegLane(dd, i,
12629 binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
12630 }
12631 if (zeroHI) {
12632 putQRegLane(dd, 1, mkU64(0));
12633 }
12634 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
12635 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
12636 return True;
12637 }
12638 /* else fall through */
12639 }
12640
sewardj89cefe42015-02-24 12:21:01 +000012641 if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
12642 /* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */
12643 /* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
12644 Bool isSQRT = bitU == 1;
12645 Bool isD = (size & 1) == 1;
12646 IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
12647 : (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
12648 if (bitQ == 0 && isD) return False; // implied 1d case
12649 IRTemp resV = newTempV128();
12650 assign(resV, unop(op, getQReg128(nn)));
12651 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
12652 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12653 DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe",
12654 nameQReg128(dd), arr, nameQReg128(nn), arr);
12655 return True;
12656 }
12657
sewardj4b21c3d2015-04-06 19:34:03 +000012658 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
12659 /* -------- 1,1x,11111: FSQRT 2d_2d, 4s_4s, 2s_2s -------- */
12660 Bool isD = (size & 1) == 1;
12661 IROp op = isD ? Iop_Sqrt64Fx2 : Iop_Sqrt32Fx4;
12662 if (bitQ == 0 && isD) return False; // implied 1d case
12663 IRTemp resV = newTempV128();
12664 assign(resV, binop(op, mkexpr(mk_get_IR_rounding_mode()),
12665 getQReg128(nn)));
12666 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
12667 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
12668 DIP("%s %s.%s, %s.%s\n", "fsqrt",
12669 nameQReg128(dd), arr, nameQReg128(nn), arr);
12670 return True;
12671 }
12672
sewardjdf1628c2014-06-10 22:52:05 +000012673 return False;
12674# undef INSN
12675}
12676
sewardjfc83d2c2014-06-12 10:15:46 +000012677
sewardjdf1628c2014-06-10 22:52:05 +000012678static
12679Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
12680{
sewardj85fbb022014-06-12 13:16:01 +000012681 /* 31 28 23 21 20 19 15 11 9 4
12682 0 Q U 01111 size L M m opcode H 0 n d
12683 Decode fields are: u,size,opcode
sewardj787a67f2014-06-23 09:09:41 +000012684 M is really part of the mm register number. Individual
12685 cases need to inspect L and H though.
sewardj85fbb022014-06-12 13:16:01 +000012686 */
sewardjdf1628c2014-06-10 22:52:05 +000012687# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj85fbb022014-06-12 13:16:01 +000012688 if (INSN(31,31) != 0
sewardj8e91fd42014-07-11 12:05:47 +000012689 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
sewardj85fbb022014-06-12 13:16:01 +000012690 return False;
12691 }
12692 UInt bitQ = INSN(30,30);
12693 UInt bitU = INSN(29,29);
12694 UInt size = INSN(23,22);
12695 UInt bitL = INSN(21,21);
12696 UInt bitM = INSN(20,20);
12697 UInt mmLO4 = INSN(19,16);
12698 UInt opcode = INSN(15,12);
12699 UInt bitH = INSN(11,11);
12700 UInt nn = INSN(9,5);
12701 UInt dd = INSN(4,0);
sewardj85fbb022014-06-12 13:16:01 +000012702 vassert(size < 4);
sewardj787a67f2014-06-23 09:09:41 +000012703 vassert(bitH < 2 && bitM < 2 && bitL < 2);
sewardj85fbb022014-06-12 13:16:01 +000012704
sewardjd0e5e532014-10-30 16:36:53 +000012705 if (bitU == 0 && size >= X10
12706 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
12707 /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12708 /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12709 if (bitQ == 0 && size == X11) return False; // implied 1d case
12710 Bool isD = (size & 1) == 1;
12711 Bool isSUB = opcode == BITS4(0,1,0,1);
12712 UInt index;
12713 if (!isD) index = (bitH << 1) | bitL;
12714 else if (isD && bitL == 0) index = bitH;
12715 else return False; // sz:L == x11 => unallocated encoding
12716 vassert(index < (isD ? 2 : 4));
12717 IRType ity = isD ? Ity_F64 : Ity_F32;
12718 IRTemp elem = newTemp(ity);
12719 UInt mm = (bitM << 4) | mmLO4;
12720 assign(elem, getQRegLane(mm, index, ity));
12721 IRTemp dupd = math_DUP_TO_V128(elem, ity);
12722 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
12723 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
12724 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
12725 IRTemp rm = mk_get_IR_rounding_mode();
12726 IRTemp t1 = newTempV128();
12727 IRTemp t2 = newTempV128();
12728 // FIXME: double rounding; use FMA primops instead
12729 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
12730 assign(t2, triop(isSUB ? opSUB : opADD,
12731 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
12732 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
12733 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
12734 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
12735 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm),
12736 isD ? 'd' : 's', index);
12737 return True;
12738 }
12739
sewardjee3db332015-02-08 18:24:38 +000012740 if (size >= X10 && opcode == BITS4(1,0,0,1)) {
12741 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
12742 /* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
sewardj85fbb022014-06-12 13:16:01 +000012743 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardjee3db332015-02-08 18:24:38 +000012744 Bool isD = (size & 1) == 1;
12745 Bool isMULX = bitU == 1;
sewardj85fbb022014-06-12 13:16:01 +000012746 UInt index;
12747 if (!isD) index = (bitH << 1) | bitL;
12748 else if (isD && bitL == 0) index = bitH;
12749 else return False; // sz:L == x11 => unallocated encoding
12750 vassert(index < (isD ? 2 : 4));
12751 IRType ity = isD ? Ity_F64 : Ity_F32;
12752 IRTemp elem = newTemp(ity);
sewardj787a67f2014-06-23 09:09:41 +000012753 UInt mm = (bitM << 4) | mmLO4;
sewardj85fbb022014-06-12 13:16:01 +000012754 assign(elem, getQRegLane(mm, index, ity));
12755 IRTemp dupd = math_DUP_TO_V128(elem, ity);
sewardjee3db332015-02-08 18:24:38 +000012756 // KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
sewardj8e91fd42014-07-11 12:05:47 +000012757 IRTemp res = newTempV128();
sewardj85fbb022014-06-12 13:16:01 +000012758 assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
12759 mkexpr(mk_get_IR_rounding_mode()),
12760 getQReg128(nn), mkexpr(dupd)));
sewardjdf9d6d52014-06-27 10:43:22 +000012761 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj85fbb022014-06-12 13:16:01 +000012762 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
sewardjee3db332015-02-08 18:24:38 +000012763 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
12764 isMULX ? "fmulx" : "fmul", nameQReg128(dd), arr,
sewardj85fbb022014-06-12 13:16:01 +000012765 nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
12766 return True;
12767 }
12768
sewardj787a67f2014-06-23 09:09:41 +000012769 if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
12770 || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
12771 /* -------- 1,xx,0000 MLA s/h variants only -------- */
12772 /* -------- 1,xx,0100 MLS s/h variants only -------- */
12773 /* -------- 0,xx,1000 MUL s/h variants only -------- */
12774 Bool isMLA = opcode == BITS4(0,0,0,0);
12775 Bool isMLS = opcode == BITS4(0,1,0,0);
12776 UInt mm = 32; // invalid
12777 UInt ix = 16; // invalid
12778 switch (size) {
12779 case X00:
12780 return False; // b case is not allowed
12781 case X01:
12782 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12783 case X10:
12784 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12785 case X11:
12786 return False; // d case is not allowed
12787 default:
12788 vassert(0);
12789 }
12790 vassert(mm < 32 && ix < 16);
sewardj487559e2014-07-10 14:22:45 +000012791 IROp opMUL = mkVecMUL(size);
12792 IROp opADD = mkVecADD(size);
12793 IROp opSUB = mkVecSUB(size);
sewardj787a67f2014-06-23 09:09:41 +000012794 HChar ch = size == X01 ? 'h' : 's';
sewardj487559e2014-07-10 14:22:45 +000012795 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
sewardj8e91fd42014-07-11 12:05:47 +000012796 IRTemp vecD = newTempV128();
12797 IRTemp vecN = newTempV128();
12798 IRTemp res = newTempV128();
sewardj787a67f2014-06-23 09:09:41 +000012799 assign(vecD, getQReg128(dd));
12800 assign(vecN, getQReg128(nn));
12801 IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
12802 if (isMLA || isMLS) {
12803 assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
12804 } else {
12805 assign(res, prod);
12806 }
sewardjdf9d6d52014-06-27 10:43:22 +000012807 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj787a67f2014-06-23 09:09:41 +000012808 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12809 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
12810 : (isMLS ? "mls" : "mul"),
12811 nameQReg128(dd), arr,
12812 nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
12813 return True;
12814 }
12815
sewardj487559e2014-07-10 14:22:45 +000012816 if (opcode == BITS4(1,0,1,0)
12817 || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
12818 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
12819 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
12820 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
12821 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
12822 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
12823 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
12824 /* Widens, and size refers to the narrowed lanes. */
12825 UInt ks = 3;
12826 switch (opcode) {
12827 case BITS4(1,0,1,0): ks = 0; break;
12828 case BITS4(0,0,1,0): ks = 1; break;
12829 case BITS4(0,1,1,0): ks = 2; break;
12830 default: vassert(0);
12831 }
12832 vassert(ks >= 0 && ks <= 2);
12833 Bool isU = bitU == 1;
12834 Bool is2 = bitQ == 1;
12835 UInt mm = 32; // invalid
12836 UInt ix = 16; // invalid
12837 switch (size) {
12838 case X00:
12839 return False; // h_b_b[] case is not allowed
12840 case X01:
12841 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12842 case X10:
12843 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12844 case X11:
12845 return False; // q_d_d[] case is not allowed
12846 default:
12847 vassert(0);
12848 }
12849 vassert(mm < 32 && ix < 16);
sewardj51d012a2014-07-21 09:19:50 +000012850 IRTemp vecN = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000012851 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
sewardj8e91fd42014-07-11 12:05:47 +000012852 IRTemp vecD = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000012853 assign(vecN, getQReg128(nn));
sewardj51d012a2014-07-21 09:19:50 +000012854 assign(vecD, getQReg128(dd));
12855 IRTemp res = IRTemp_INVALID;
12856 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
12857 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
sewardj487559e2014-07-10 14:22:45 +000012858 putQReg128(dd, mkexpr(res));
12859 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
12860 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12861 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12862 HChar ch = size == X01 ? 'h' : 's';
12863 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
12864 isU ? 'u' : 's', nm, is2 ? "2" : "",
12865 nameQReg128(dd), arrWide,
12866 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
12867 return True;
12868 }
12869
sewardj51d012a2014-07-21 09:19:50 +000012870 if (bitU == 0
12871 && (opcode == BITS4(1,0,1,1)
12872 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
12873 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
12874 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
12875 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
12876 /* Widens, and size refers to the narrowed lanes. */
12877 UInt ks = 3;
12878 switch (opcode) {
12879 case BITS4(1,0,1,1): ks = 0; break;
12880 case BITS4(0,0,1,1): ks = 1; break;
12881 case BITS4(0,1,1,1): ks = 2; break;
12882 default: vassert(0);
12883 }
12884 vassert(ks >= 0 && ks <= 2);
12885 Bool is2 = bitQ == 1;
12886 UInt mm = 32; // invalid
12887 UInt ix = 16; // invalid
12888 switch (size) {
12889 case X00:
12890 return False; // h_b_b[] case is not allowed
12891 case X01:
12892 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12893 case X10:
12894 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12895 case X11:
12896 return False; // q_d_d[] case is not allowed
12897 default:
12898 vassert(0);
12899 }
12900 vassert(mm < 32 && ix < 16);
12901 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
12902 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
12903 newTempsV128_2(&vecN, &vecD);
12904 assign(vecN, getQReg128(nn));
12905 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12906 assign(vecD, getQReg128(dd));
12907 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
12908 is2, size, "mas"[ks],
12909 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
12910 putQReg128(dd, mkexpr(res));
12911 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
12912 updateQCFLAGwithDifference(sat1q, sat1n);
12913 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
12914 updateQCFLAGwithDifference(sat2q, sat2n);
12915 }
sewardj54ffa1d2014-07-22 09:27:49 +000012916 const HChar* nm = ks == 0 ? "sqdmull"
sewardj51d012a2014-07-21 09:19:50 +000012917 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
12918 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
12919 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
12920 HChar ch = size == X01 ? 'h' : 's';
12921 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
12922 nm, is2 ? "2" : "",
12923 nameQReg128(dd), arrWide,
12924 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
12925 return True;
12926 }
12927
sewardj257e99f2014-08-03 12:45:19 +000012928 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
12929 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
12930 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
12931 UInt mm = 32; // invalid
12932 UInt ix = 16; // invalid
12933 switch (size) {
12934 case X00:
12935 return False; // b case is not allowed
12936 case X01:
12937 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
12938 case X10:
12939 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
12940 case X11:
12941 return False; // q case is not allowed
12942 default:
12943 vassert(0);
12944 }
12945 vassert(mm < 32 && ix < 16);
12946 Bool isR = opcode == BITS4(1,1,0,1);
12947 IRTemp res, sat1q, sat1n, vN, vM;
12948 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
12949 vN = newTempV128();
12950 assign(vN, getQReg128(nn));
12951 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
12952 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
12953 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
12954 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
12955 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
12956 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
12957 const HChar* arr = nameArr_Q_SZ(bitQ, size);
12958 HChar ch = size == X01 ? 'h' : 's';
12959 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
12960 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
12961 return True;
12962 }
12963
sewardjdf1628c2014-06-10 22:52:05 +000012964 return False;
12965# undef INSN
12966}
12967
sewardjfc83d2c2014-06-12 10:15:46 +000012968
sewardjdf1628c2014-06-10 22:52:05 +000012969static
12970Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
12971{
Elliott Hughesa0664b92017-04-18 17:46:52 -070012972 /* 31 23 21 16 11 9 4
12973 0100 1110 size 10100 opcode 10 n d
12974 Decode fields are: size,opcode
12975 Size is always 00 in ARMv8, it appears.
12976 */
sewardjdf1628c2014-06-10 22:52:05 +000012977# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
Elliott Hughesa0664b92017-04-18 17:46:52 -070012978 if (INSN(31,24) != BITS8(0,1,0,0,1,1,1,0)
12979 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
12980 return False;
12981 }
12982 UInt size = INSN(23,22);
12983 UInt opcode = INSN(16,12);
12984 UInt nn = INSN(9,5);
12985 UInt dd = INSN(4,0);
12986
12987 if (size == BITS2(0,0)
12988 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,0,1))) {
12989 /* -------- 00,00100: AESE Vd.16b, Vn.16b -------- */
12990 /* -------- 00,00101: AESD Vd.16b, Vn.16b -------- */
12991 Bool isD = opcode == BITS5(0,0,1,0,1);
12992 IRTemp op1 = newTemp(Ity_V128);
12993 IRTemp op2 = newTemp(Ity_V128);
12994 IRTemp xord = newTemp(Ity_V128);
12995 IRTemp res = newTemp(Ity_V128);
12996 void* helper = isD ? &arm64g_dirtyhelper_AESD
12997 : &arm64g_dirtyhelper_AESE;
12998 const HChar* hname = isD ? "arm64g_dirtyhelper_AESD"
12999 : "arm64g_dirtyhelper_AESE";
13000 assign(op1, getQReg128(dd));
13001 assign(op2, getQReg128(nn));
13002 assign(xord, binop(Iop_XorV128, mkexpr(op1), mkexpr(op2)));
13003 IRDirty* di
13004 = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
13005 mkIRExprVec_3(
13006 IRExpr_VECRET(),
13007 unop(Iop_V128HIto64, mkexpr(xord)),
13008 unop(Iop_V128to64, mkexpr(xord)) ) );
13009 stmt(IRStmt_Dirty(di));
13010 putQReg128(dd, mkexpr(res));
13011 DIP("aes%c %s.16b, %s.16b\n", isD ? 'd' : 'e',
13012 nameQReg128(dd), nameQReg128(nn));
13013 return True;
13014 }
13015
13016 if (size == BITS2(0,0)
13017 && (opcode == BITS5(0,0,1,1,0) || opcode == BITS5(0,0,1,1,1))) {
13018 /* -------- 00,00110: AESMC Vd.16b, Vn.16b -------- */
13019 /* -------- 00,00111: AESIMC Vd.16b, Vn.16b -------- */
13020 Bool isI = opcode == BITS5(0,0,1,1,1);
13021 IRTemp src = newTemp(Ity_V128);
13022 IRTemp res = newTemp(Ity_V128);
13023 void* helper = isI ? &arm64g_dirtyhelper_AESIMC
13024 : &arm64g_dirtyhelper_AESMC;
13025 const HChar* hname = isI ? "arm64g_dirtyhelper_AESIMC"
13026 : "arm64g_dirtyhelper_AESMC";
13027 assign(src, getQReg128(nn));
13028 IRDirty* di
13029 = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
13030 mkIRExprVec_3(
13031 IRExpr_VECRET(),
13032 unop(Iop_V128HIto64, mkexpr(src)),
13033 unop(Iop_V128to64, mkexpr(src)) ) );
13034 stmt(IRStmt_Dirty(di));
13035 putQReg128(dd, mkexpr(res));
13036 DIP("aes%s %s.16b, %s.16b\n", isI ? "imc" : "mc",
13037 nameQReg128(dd), nameQReg128(nn));
13038 return True;
13039 }
13040
sewardjdf1628c2014-06-10 22:52:05 +000013041 return False;
13042# undef INSN
13043}
13044
sewardjfc83d2c2014-06-12 10:15:46 +000013045
sewardjdf1628c2014-06-10 22:52:05 +000013046static
13047Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
13048{
Elliott Hughesa0664b92017-04-18 17:46:52 -070013049 /* 31 28 23 21 20 15 14 11 9 4
13050 0101 1110 sz 0 m 0 opc 00 n d
13051 Decode fields are: sz,opc
13052 */
sewardjdf1628c2014-06-10 22:52:05 +000013053# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
Elliott Hughesa0664b92017-04-18 17:46:52 -070013054 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0) || INSN(21,21) != 0
13055 || INSN(15,15) != 0 || INSN(11,10) != BITS2(0,0)) {
13056 return False;
13057 }
13058 UInt sz = INSN(23,22);
13059 UInt mm = INSN(20,16);
13060 UInt opc = INSN(14,12);
13061 UInt nn = INSN(9,5);
13062 UInt dd = INSN(4,0);
13063 if (sz == BITS2(0,0) && opc <= BITS3(1,1,0)) {
13064 /* -------- 00,000 SHA1C Qd, Sn, Vm.4S -------- */
13065 /* -------- 00,001 SHA1P Qd, Sn, Vm.4S -------- */
13066 /* -------- 00,010 SHA1M Qd, Sn, Vm.4S -------- */
13067 /* -------- 00,011 SHA1SU0 Vd.4S, Vn.4S, Vm.4S -------- */
13068 /* -------- 00,100 SHA256H Qd, Qn, Vm.4S -------- */
13069 /* -------- 00,101 SHA256H2 Qd, Qn, Vm.4S -------- */
13070 /* -------- 00,110 SHA256SU1 Vd.4S, Vn.4S, Vm.4S -------- */
13071 vassert(opc < 7);
13072 const HChar* inames[7]
13073 = { "sha1c", "sha1p", "sha1m", "sha1su0",
13074 "sha256h", "sha256h2", "sha256su1" };
13075 void(*helpers[7])(V128*,ULong,ULong,ULong,ULong,ULong,ULong)
13076 = { &arm64g_dirtyhelper_SHA1C, &arm64g_dirtyhelper_SHA1P,
13077 &arm64g_dirtyhelper_SHA1M, &arm64g_dirtyhelper_SHA1SU0,
13078 &arm64g_dirtyhelper_SHA256H, &arm64g_dirtyhelper_SHA256H2,
13079 &arm64g_dirtyhelper_SHA256SU1 };
13080 const HChar* hnames[7]
13081 = { "arm64g_dirtyhelper_SHA1C", "arm64g_dirtyhelper_SHA1P",
13082 "arm64g_dirtyhelper_SHA1M", "arm64g_dirtyhelper_SHA1SU0",
13083 "arm64g_dirtyhelper_SHA256H", "arm64g_dirtyhelper_SHA256H2",
13084 "arm64g_dirtyhelper_SHA256SU1" };
13085 IRTemp vD = newTemp(Ity_V128);
13086 IRTemp vN = newTemp(Ity_V128);
13087 IRTemp vM = newTemp(Ity_V128);
13088 IRTemp vDhi = newTemp(Ity_I64);
13089 IRTemp vDlo = newTemp(Ity_I64);
13090 IRTemp vNhiPre = newTemp(Ity_I64);
13091 IRTemp vNloPre = newTemp(Ity_I64);
13092 IRTemp vNhi = newTemp(Ity_I64);
13093 IRTemp vNlo = newTemp(Ity_I64);
13094 IRTemp vMhi = newTemp(Ity_I64);
13095 IRTemp vMlo = newTemp(Ity_I64);
13096 assign(vD, getQReg128(dd));
13097 assign(vN, getQReg128(nn));
13098 assign(vM, getQReg128(mm));
13099 assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
13100 assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
13101 assign(vNhiPre, unop(Iop_V128HIto64, mkexpr(vN)));
13102 assign(vNloPre, unop(Iop_V128to64, mkexpr(vN)));
13103 assign(vMhi, unop(Iop_V128HIto64, mkexpr(vM)));
13104 assign(vMlo, unop(Iop_V128to64, mkexpr(vM)));
13105 /* Mask off any bits of the N register operand that aren't actually
13106 needed, so that Memcheck doesn't complain unnecessarily. */
13107 switch (opc) {
13108 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13109 assign(vNhi, mkU64(0));
13110 assign(vNlo, unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(vNloPre))));
13111 break;
13112 case BITS3(0,1,1): case BITS3(1,0,0):
13113 case BITS3(1,0,1): case BITS3(1,1,0):
13114 assign(vNhi, mkexpr(vNhiPre));
13115 assign(vNlo, mkexpr(vNloPre));
13116 break;
13117 default:
13118 vassert(0);
13119 }
13120 IRTemp res = newTemp(Ity_V128);
13121 IRDirty* di
13122 = unsafeIRDirty_1_N( res, 0/*regparms*/, hnames[opc], helpers[opc],
13123 mkIRExprVec_7(
13124 IRExpr_VECRET(),
13125 mkexpr(vDhi), mkexpr(vDlo), mkexpr(vNhi),
13126 mkexpr(vNlo), mkexpr(vMhi), mkexpr(vMlo)));
13127 stmt(IRStmt_Dirty(di));
13128 putQReg128(dd, mkexpr(res));
13129 switch (opc) {
13130 case BITS3(0,0,0): case BITS3(0,0,1): case BITS3(0,1,0):
13131 DIP("%s q%u, s%u, v%u.4s\n", inames[opc], dd, nn, mm);
13132 break;
13133 case BITS3(0,1,1): case BITS3(1,1,0):
13134 DIP("%s v%u.4s, v%u.4s, v%u.4s\n", inames[opc], dd, nn, mm);
13135 break;
13136 case BITS3(1,0,0): case BITS3(1,0,1):
13137 DIP("%s q%u, q%u, v%u.4s\n", inames[opc], dd, nn, mm);
13138 break;
13139 default:
13140 vassert(0);
13141 }
13142 return True;
13143 }
13144
sewardjdf1628c2014-06-10 22:52:05 +000013145 return False;
13146# undef INSN
13147}
13148
sewardjfc83d2c2014-06-12 10:15:46 +000013149
sewardjdf1628c2014-06-10 22:52:05 +000013150static
13151Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
13152{
Elliott Hughesa0664b92017-04-18 17:46:52 -070013153 /* 31 28 23 21 16 11 9 4
13154 0101 1110 sz 10100 opc 10 n d
13155 Decode fields are: sz,opc
13156 */
sewardjdf1628c2014-06-10 22:52:05 +000013157# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
Elliott Hughesa0664b92017-04-18 17:46:52 -070013158 if (INSN(31,24) != BITS8(0,1,0,1,1,1,1,0)
13159 || INSN(21,17) != BITS5(1,0,1,0,0) || INSN(11,10) != BITS2(1,0)) {
13160 return False;
13161 }
13162 UInt sz = INSN(23,22);
13163 UInt opc = INSN(16,12);
13164 UInt nn = INSN(9,5);
13165 UInt dd = INSN(4,0);
13166 if (sz == BITS2(0,0) && opc <= BITS5(0,0,0,1,0)) {
13167 /* -------- 00,00000 SHA1H Sd, Sn -------- */
13168 /* -------- 00,00001 SHA1SU1 Vd.4S, Vn.4S -------- */
13169 /* -------- 00,00010 SHA256SU0 Vd.4S, Vn.4S -------- */
13170 vassert(opc < 3);
13171 const HChar* inames[3] = { "sha1h", "sha1su1", "sha256su0" };
13172 IRTemp vD = newTemp(Ity_V128);
13173 IRTemp vN = newTemp(Ity_V128);
13174 IRTemp vDhi = newTemp(Ity_I64);
13175 IRTemp vDlo = newTemp(Ity_I64);
13176 IRTemp vNhi = newTemp(Ity_I64);
13177 IRTemp vNlo = newTemp(Ity_I64);
13178 assign(vD, getQReg128(dd));
13179 assign(vN, getQReg128(nn));
13180 assign(vDhi, unop(Iop_V128HIto64, mkexpr(vD)));
13181 assign(vDlo, unop(Iop_V128to64, mkexpr(vD)));
13182 assign(vNhi, unop(Iop_V128HIto64, mkexpr(vN)));
13183 assign(vNlo, unop(Iop_V128to64, mkexpr(vN)));
13184 /* Mask off any bits of the N register operand that aren't actually
13185 needed, so that Memcheck doesn't complain unnecessarily. Also
13186 construct the calls, given that the helper functions don't take
13187 the same number of arguments. */
13188 IRDirty* di = NULL;
13189 IRTemp res = newTemp(Ity_V128);
13190 switch (opc) {
13191 case BITS5(0,0,0,0,0): {
13192 IRExpr* vNloMasked = unop(Iop_32Uto64,
13193 unop(Iop_64to32, mkexpr(vNlo)));
13194 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13195 "arm64g_dirtyhelper_SHA1H",
13196 &arm64g_dirtyhelper_SHA1H,
13197 mkIRExprVec_3(
13198 IRExpr_VECRET(),
13199 mkU64(0), vNloMasked) );
13200 break;
13201 }
13202 case BITS5(0,0,0,0,1):
13203 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13204 "arm64g_dirtyhelper_SHA1SU1",
13205 &arm64g_dirtyhelper_SHA1SU1,
13206 mkIRExprVec_5(
13207 IRExpr_VECRET(),
13208 mkexpr(vDhi), mkexpr(vDlo),
13209 mkexpr(vNhi), mkexpr(vNlo)) );
13210 break;
13211 case BITS5(0,0,0,1,0):
13212 di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13213 "arm64g_dirtyhelper_SHA256SU0",
13214 &arm64g_dirtyhelper_SHA256SU0,
13215 mkIRExprVec_5(
13216 IRExpr_VECRET(),
13217 mkexpr(vDhi), mkexpr(vDlo),
13218 mkexpr(vNhi), mkexpr(vNlo)) );
13219 break;
13220 default:
13221 vassert(0);
13222 }
13223 stmt(IRStmt_Dirty(di));
13224 putQReg128(dd, mkexpr(res));
13225 switch (opc) {
13226 case BITS5(0,0,0,0,0):
13227 DIP("%s s%u, s%u\n", inames[opc], dd, nn);
13228 break;
13229 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,0):
13230 DIP("%s v%u.4s, v%u.4s\n", inames[opc], dd, nn);
13231 break;
13232 default:
13233 vassert(0);
13234 }
13235 return True;
13236 }
13237
sewardjdf1628c2014-06-10 22:52:05 +000013238 return False;
13239# undef INSN
13240}
13241
sewardj5747c4a2014-06-11 20:57:23 +000013242
sewardjdf1628c2014-06-10 22:52:05 +000013243static
13244Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
13245{
sewardj5747c4a2014-06-11 20:57:23 +000013246 /* 31 28 23 21 20 15 13 9 4
13247 000 11110 ty 1 m op 1000 n opcode2
13248 The first 3 bits are really "M 0 S", but M and S are always zero.
13249 Decode fields are: ty,op,opcode2
13250 */
sewardjdf1628c2014-06-10 22:52:05 +000013251# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000013252 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13253 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
13254 return False;
13255 }
13256 UInt ty = INSN(23,22);
13257 UInt mm = INSN(20,16);
13258 UInt op = INSN(15,14);
13259 UInt nn = INSN(9,5);
13260 UInt opcode2 = INSN(4,0);
13261 vassert(ty < 4);
13262
13263 if (ty <= X01 && op == X00
13264 && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
13265 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */
13266 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
13267 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
13268 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
13269 /* 31 23 20 15 9 4
13270 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
13271 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
13272 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
13273 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
13274
13275 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
13276 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
13277 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
13278 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
13279
13280 FCMPE generates Invalid Operation exn if either arg is any kind
13281 of NaN. FCMP generates Invalid Operation exn if either arg is a
13282 signalling NaN. We ignore this detail here and produce the same
13283 IR for both.
13284 */
13285 Bool isD = (ty & 1) == 1;
13286 Bool isCMPE = (opcode2 & 16) == 16;
13287 Bool cmpZero = (opcode2 & 8) == 8;
13288 IRType ity = isD ? Ity_F64 : Ity_F32;
13289 Bool valid = True;
13290 if (cmpZero && mm != 0) valid = False;
13291 if (valid) {
13292 IRTemp argL = newTemp(ity);
13293 IRTemp argR = newTemp(ity);
13294 IRTemp irRes = newTemp(Ity_I32);
13295 assign(argL, getQRegLO(nn, ity));
13296 assign(argR,
13297 cmpZero
13298 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
13299 : getQRegLO(mm, ity));
13300 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
13301 mkexpr(argL), mkexpr(argR)));
13302 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
13303 IRTemp nzcv_28x0 = newTemp(Ity_I64);
13304 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
13305 setFlags_COPY(nzcv_28x0);
13306 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
13307 cmpZero ? "#0.0" : nameQRegLO(mm, ity));
13308 return True;
13309 }
13310 return False;
13311 }
13312
sewardjdf1628c2014-06-10 22:52:05 +000013313 return False;
13314# undef INSN
13315}
13316
sewardj5747c4a2014-06-11 20:57:23 +000013317
sewardjdf1628c2014-06-10 22:52:05 +000013318static
13319Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn)
13320{
sewardj13830dc2015-02-07 21:09:47 +000013321 /* 31 28 23 21 20 15 11 9 4 3
13322 000 11110 ty 1 m cond 01 n op nzcv
13323 The first 3 bits are really "M 0 S", but M and S are always zero.
13324 Decode fields are: ty,op
13325 */
sewardjdf1628c2014-06-10 22:52:05 +000013326# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj13830dc2015-02-07 21:09:47 +000013327 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13328 || INSN(21,21) != 1 || INSN(11,10) != BITS2(0,1)) {
13329 return False;
13330 }
13331 UInt ty = INSN(23,22);
13332 UInt mm = INSN(20,16);
13333 UInt cond = INSN(15,12);
13334 UInt nn = INSN(9,5);
13335 UInt op = INSN(4,4);
13336 UInt nzcv = INSN(3,0);
13337 vassert(ty < 4 && op <= 1);
13338
13339 if (ty <= BITS2(0,1)) {
13340 /* -------- 00,0 FCCMP s_s -------- */
13341 /* -------- 00,1 FCCMPE s_s -------- */
13342 /* -------- 01,0 FCCMP d_d -------- */
13343 /* -------- 01,1 FCCMPE d_d -------- */
13344
13345 /* FCCMPE generates Invalid Operation exn if either arg is any kind
13346 of NaN. FCCMP generates Invalid Operation exn if either arg is a
13347 signalling NaN. We ignore this detail here and produce the same
13348 IR for both.
13349 */
13350 Bool isD = (ty & 1) == 1;
13351 Bool isCMPE = op == 1;
13352 IRType ity = isD ? Ity_F64 : Ity_F32;
13353 IRTemp argL = newTemp(ity);
13354 IRTemp argR = newTemp(ity);
13355 IRTemp irRes = newTemp(Ity_I32);
13356 assign(argL, getQRegLO(nn, ity));
13357 assign(argR, getQRegLO(mm, ity));
13358 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
13359 mkexpr(argL), mkexpr(argR)));
13360 IRTemp condT = newTemp(Ity_I1);
13361 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
13362 IRTemp nzcvT = mk_convert_IRCmpF64Result_to_NZCV(irRes);
13363
13364 IRTemp nzcvT_28x0 = newTemp(Ity_I64);
13365 assign(nzcvT_28x0, binop(Iop_Shl64, mkexpr(nzcvT), mkU8(28)));
13366
13367 IRExpr* nzcvF_28x0 = mkU64(((ULong)nzcv) << 28);
13368
13369 IRTemp nzcv_28x0 = newTemp(Ity_I64);
13370 assign(nzcv_28x0, IRExpr_ITE(mkexpr(condT),
13371 mkexpr(nzcvT_28x0), nzcvF_28x0));
13372 setFlags_COPY(nzcv_28x0);
13373 DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE ? "e" : "",
13374 nameQRegLO(nn, ity), nameQRegLO(mm, ity), nzcv, nameCC(cond));
13375 return True;
13376 }
13377
sewardjdf1628c2014-06-10 22:52:05 +000013378 return False;
13379# undef INSN
13380}
13381
sewardjfc83d2c2014-06-12 10:15:46 +000013382
sewardjdf1628c2014-06-10 22:52:05 +000013383static
13384Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
13385{
sewardje23ec112014-11-15 16:07:14 +000013386 /* 31 23 21 20 15 11 9 5
13387 000 11110 ty 1 m cond 11 n d
13388 The first 3 bits are really "M 0 S", but M and S are always zero.
13389 Decode fields: ty
13390 */
sewardjdf1628c2014-06-10 22:52:05 +000013391# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardje23ec112014-11-15 16:07:14 +000013392 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
13393 || INSN(11,10) != BITS2(1,1)) {
13394 return False;
13395 }
13396 UInt ty = INSN(23,22);
13397 UInt mm = INSN(20,16);
13398 UInt cond = INSN(15,12);
13399 UInt nn = INSN(9,5);
13400 UInt dd = INSN(4,0);
13401 if (ty <= X01) {
13402 /* -------- 00: FCSEL s_s -------- */
13403 /* -------- 00: FCSEL d_d -------- */
13404 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
13405 IRTemp srcT = newTemp(ity);
13406 IRTemp srcF = newTemp(ity);
13407 IRTemp res = newTemp(ity);
13408 assign(srcT, getQRegLO(nn, ity));
13409 assign(srcF, getQRegLO(mm, ity));
13410 assign(res, IRExpr_ITE(
13411 unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
13412 mkexpr(srcT), mkexpr(srcF)));
13413 putQReg128(dd, mkV128(0x0000));
13414 putQRegLO(dd, mkexpr(res));
13415 DIP("fcsel %s, %s, %s, %s\n",
13416 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity),
13417 nameCC(cond));
13418 return True;
13419 }
sewardjdf1628c2014-06-10 22:52:05 +000013420 return False;
13421# undef INSN
13422}
13423
sewardj5747c4a2014-06-11 20:57:23 +000013424
sewardjdf1628c2014-06-10 22:52:05 +000013425static
13426Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
13427{
13428 /* 31 28 23 21 20 14 9 4
13429 000 11110 ty 1 opcode 10000 n d
13430 The first 3 bits are really "M 0 S", but M and S are always zero.
sewardj5747c4a2014-06-11 20:57:23 +000013431 Decode fields: ty,opcode
sewardjdf1628c2014-06-10 22:52:05 +000013432 */
13433# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13434 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13435 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
13436 return False;
13437 }
13438 UInt ty = INSN(23,22);
13439 UInt opcode = INSN(20,15);
13440 UInt nn = INSN(9,5);
13441 UInt dd = INSN(4,0);
13442
13443 if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
13444 /* -------- 0x,000000: FMOV d_d, s_s -------- */
13445 /* -------- 0x,000001: FABS d_d, s_s -------- */
13446 /* -------- 0x,000010: FNEG d_d, s_s -------- */
13447 /* -------- 0x,000011: FSQRT d_d, s_s -------- */
13448 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
13449 IRTemp src = newTemp(ity);
13450 IRTemp res = newTemp(ity);
13451 const HChar* nm = "??";
13452 assign(src, getQRegLO(nn, ity));
13453 switch (opcode) {
13454 case BITS6(0,0,0,0,0,0):
13455 nm = "fmov"; assign(res, mkexpr(src)); break;
13456 case BITS6(0,0,0,0,0,1):
13457 nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
13458 case BITS6(0,0,0,0,1,0):
13459 nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
13460 case BITS6(0,0,0,0,1,1):
13461 nm = "fsqrt";
13462 assign(res, binop(mkSQRTF(ity),
13463 mkexpr(mk_get_IR_rounding_mode()),
13464 mkexpr(src))); break;
13465 default:
13466 vassert(0);
13467 }
13468 putQReg128(dd, mkV128(0x0000));
13469 putQRegLO(dd, mkexpr(res));
13470 DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
13471 return True;
13472 }
13473
sewardj5747c4a2014-06-11 20:57:23 +000013474 if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
13475 || opcode == BITS6(0,0,0,1,0,1)))
13476 || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
13477 || opcode == BITS6(0,0,0,1,0,1)))
13478 || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
13479 || opcode == BITS6(0,0,0,1,0,0)))) {
13480 /* -------- 11,000100: FCVT s_h -------- */
13481 /* -------- 11,000101: FCVT d_h -------- */
13482 /* -------- 00,000111: FCVT h_s -------- */
13483 /* -------- 00,000101: FCVT d_s -------- */
13484 /* -------- 01,000111: FCVT h_d -------- */
13485 /* -------- 01,000100: FCVT s_d -------- */
13486 /* 31 23 21 16 14 9 4
sewardj400d6b92015-03-30 09:01:51 +000013487 000 11110 11 10001 00 10000 n d FCVT Sd, Hn
13488 --------- 11 ----- 01 --------- FCVT Dd, Hn
13489 --------- 00 ----- 11 --------- FCVT Hd, Sn
sewardj5747c4a2014-06-11 20:57:23 +000013490 --------- 00 ----- 01 --------- FCVT Dd, Sn
sewardj400d6b92015-03-30 09:01:51 +000013491 --------- 01 ----- 11 --------- FCVT Hd, Dn
sewardj5747c4a2014-06-11 20:57:23 +000013492 --------- 01 ----- 00 --------- FCVT Sd, Dn
13493 Rounding, when dst is smaller than src, is per the FPCR.
13494 */
13495 UInt b2322 = ty;
13496 UInt b1615 = opcode & BITS2(1,1);
sewardj400d6b92015-03-30 09:01:51 +000013497 switch ((b2322 << 2) | b1615) {
13498 case BITS4(0,0,0,1): // S -> D
13499 case BITS4(1,1,0,1): { // H -> D
13500 Bool srcIsH = b2322 == BITS2(1,1);
13501 IRType srcTy = srcIsH ? Ity_F16 : Ity_F32;
13502 IRTemp res = newTemp(Ity_F64);
13503 assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64,
13504 getQRegLO(nn, srcTy)));
13505 putQReg128(dd, mkV128(0x0000));
13506 putQRegLO(dd, mkexpr(res));
13507 DIP("fcvt %s, %s\n",
13508 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy));
13509 return True;
13510 }
13511 case BITS4(0,1,0,0): // D -> S
13512 case BITS4(0,1,1,1): { // D -> H
13513 Bool dstIsH = b1615 == BITS2(1,1);
13514 IRType dstTy = dstIsH ? Ity_F16 : Ity_F32;
13515 IRTemp res = newTemp(dstTy);
13516 assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32,
13517 mkexpr(mk_get_IR_rounding_mode()),
13518 getQRegLO(nn, Ity_F64)));
13519 putQReg128(dd, mkV128(0x0000));
13520 putQRegLO(dd, mkexpr(res));
13521 DIP("fcvt %s, %s\n",
13522 nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64));
13523 return True;
13524 }
13525 case BITS4(0,0,1,1): // S -> H
13526 case BITS4(1,1,0,0): { // H -> S
13527 Bool toH = b1615 == BITS2(1,1);
13528 IRType srcTy = toH ? Ity_F32 : Ity_F16;
13529 IRType dstTy = toH ? Ity_F16 : Ity_F32;
13530 IRTemp res = newTemp(dstTy);
13531 if (toH) {
13532 assign(res, binop(Iop_F32toF16,
13533 mkexpr(mk_get_IR_rounding_mode()),
13534 getQRegLO(nn, srcTy)));
13535
13536 } else {
13537 assign(res, unop(Iop_F16toF32,
13538 getQRegLO(nn, srcTy)));
13539 }
13540 putQReg128(dd, mkV128(0x0000));
13541 putQRegLO(dd, mkexpr(res));
13542 DIP("fcvt %s, %s\n",
13543 nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy));
13544 return True;
13545 }
13546 default:
13547 break;
sewardj5747c4a2014-06-11 20:57:23 +000013548 }
13549 /* else unhandled */
13550 return False;
13551 }
13552
13553 if (ty <= X01
13554 && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
13555 && opcode != BITS6(0,0,1,1,0,1)) {
13556 /* -------- 0x,001000 FRINTN d_d, s_s -------- */
13557 /* -------- 0x,001001 FRINTP d_d, s_s -------- */
13558 /* -------- 0x,001010 FRINTM d_d, s_s -------- */
13559 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
13560 /* -------- 0x,001100 FRINTA d_d, s_s -------- */
13561 /* -------- 0x,001110 FRINTX d_d, s_s -------- */
13562 /* -------- 0x,001111 FRINTI d_d, s_s -------- */
13563 /* 31 23 21 17 14 9 4
13564 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
13565 rm
13566 x==0 => S-registers, x==1 => D-registers
13567 rm (17:15) encodings:
13568 111 per FPCR (FRINTI)
13569 001 +inf (FRINTP)
13570 010 -inf (FRINTM)
13571 011 zero (FRINTZ)
sewardj6a785df2015-02-09 09:07:47 +000013572 000 tieeven (FRINTN) -- !! FIXME KLUDGED !!
sewardj5747c4a2014-06-11 20:57:23 +000013573 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
sewardjd8ad76a2014-10-30 15:37:16 +000013574 110 per FPCR + "exact = TRUE" (FRINTX)
sewardj5747c4a2014-06-11 20:57:23 +000013575 101 unallocated
13576 */
13577 Bool isD = (ty & 1) == 1;
13578 UInt rm = opcode & BITS6(0,0,0,1,1,1);
13579 IRType ity = isD ? Ity_F64 : Ity_F32;
13580 IRExpr* irrmE = NULL;
13581 UChar ch = '?';
13582 switch (rm) {
13583 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
13584 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
13585 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
13586 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
13587 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
sewardjd8ad76a2014-10-30 15:37:16 +000013588 // I am unsure about the following, due to the "integral exact"
sewardj9e1c2b02014-11-25 17:42:52 +000013589 // description in the manual. What does it mean? (frintx, that is)
sewardjd8ad76a2014-10-30 15:37:16 +000013590 case BITS3(1,1,0):
13591 ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
sewardj9e1c2b02014-11-25 17:42:52 +000013592 case BITS3(1,1,1):
13593 ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
sewardj6a785df2015-02-09 09:07:47 +000013594 // The following is a kludge. There's no Irrm_ value to represent
13595 // this ("to nearest, with ties to even")
13596 case BITS3(0,0,0): ch = 'n'; irrmE = mkU32(Irrm_NEAREST); break;
sewardj5747c4a2014-06-11 20:57:23 +000013597 default: break;
13598 }
13599 if (irrmE) {
13600 IRTemp src = newTemp(ity);
13601 IRTemp dst = newTemp(ity);
13602 assign(src, getQRegLO(nn, ity));
13603 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13604 irrmE, mkexpr(src)));
13605 putQReg128(dd, mkV128(0x0000));
13606 putQRegLO(dd, mkexpr(dst));
13607 DIP("frint%c %s, %s\n",
13608 ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
13609 return True;
13610 }
13611 return False;
13612 }
13613
sewardjdf1628c2014-06-10 22:52:05 +000013614 return False;
13615# undef INSN
13616}
13617
13618
13619static
13620Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn)
13621{
13622 /* 31 28 23 21 20 15 11 9 4
13623 000 11110 ty 1 m opcode 10 n d
13624 The first 3 bits are really "M 0 S", but M and S are always zero.
sewardj76927e62014-11-17 11:21:21 +000013625 Decode fields: ty, opcode
sewardjdf1628c2014-06-10 22:52:05 +000013626 */
13627# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13628 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13629 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
13630 return False;
13631 }
13632 UInt ty = INSN(23,22);
13633 UInt mm = INSN(20,16);
13634 UInt opcode = INSN(15,12);
13635 UInt nn = INSN(9,5);
13636 UInt dd = INSN(4,0);
13637
sewardj76927e62014-11-17 11:21:21 +000013638 if (ty <= X01 && opcode <= BITS4(0,1,1,1)) {
sewardjdf1628c2014-06-10 22:52:05 +000013639 /* ------- 0x,0000: FMUL d_d, s_s ------- */
13640 /* ------- 0x,0001: FDIV d_d, s_s ------- */
13641 /* ------- 0x,0010: FADD d_d, s_s ------- */
13642 /* ------- 0x,0011: FSUB d_d, s_s ------- */
sewardj76927e62014-11-17 11:21:21 +000013643 /* ------- 0x,0100: FMAX d_d, s_s ------- */
13644 /* ------- 0x,0101: FMIN d_d, s_s ------- */
13645 /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
13646 /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
sewardjdf1628c2014-06-10 22:52:05 +000013647 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
13648 IROp iop = Iop_INVALID;
13649 const HChar* nm = "???";
13650 switch (opcode) {
sewardj76927e62014-11-17 11:21:21 +000013651 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
13652 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
13653 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
13654 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
13655 case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break;
13656 case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break;
13657 case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!!
13658 case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!!
sewardjdf1628c2014-06-10 22:52:05 +000013659 default: vassert(0);
13660 }
sewardj76927e62014-11-17 11:21:21 +000013661 if (opcode <= BITS4(0,0,1,1)) {
13662 // This is really not good code. TODO: avoid width-changing
sewardjb963eef2014-11-17 14:16:56 +000013663 IRTemp res = newTemp(ity);
13664 assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()),
13665 getQRegLO(nn, ity), getQRegLO(mm, ity)));
sewardj76927e62014-11-17 11:21:21 +000013666 putQReg128(dd, mkV128(0));
sewardjb963eef2014-11-17 14:16:56 +000013667 putQRegLO(dd, mkexpr(res));
sewardj76927e62014-11-17 11:21:21 +000013668 } else {
13669 putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2),
13670 binop(iop, getQReg128(nn), getQReg128(mm))));
13671 }
sewardjdf1628c2014-06-10 22:52:05 +000013672 DIP("%s %s, %s, %s\n",
13673 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
13674 return True;
13675 }
13676
13677 if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
13678 /* ------- 0x,1000: FNMUL d_d, s_s ------- */
13679 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
13680 IROp iop = mkMULF(ity);
13681 IROp iopn = mkNEGF(ity);
13682 const HChar* nm = "fnmul";
13683 IRExpr* resE = unop(iopn,
13684 triop(iop, mkexpr(mk_get_IR_rounding_mode()),
13685 getQRegLO(nn, ity), getQRegLO(mm, ity)));
13686 IRTemp res = newTemp(ity);
13687 assign(res, resE);
13688 putQReg128(dd, mkV128(0));
13689 putQRegLO(dd, mkexpr(res));
13690 DIP("%s %s, %s, %s\n",
13691 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
13692 return True;
13693 }
13694
sewardjdf1628c2014-06-10 22:52:05 +000013695 return False;
13696# undef INSN
13697}
13698
13699
13700static
13701Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
13702{
sewardj5747c4a2014-06-11 20:57:23 +000013703 /* 31 28 23 21 20 15 14 9 4
13704 000 11111 ty o1 m o0 a n d
13705 The first 3 bits are really "M 0 S", but M and S are always zero.
13706 Decode fields: ty,o1,o0
13707 */
sewardjdf1628c2014-06-10 22:52:05 +000013708# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000013709 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
13710 return False;
13711 }
13712 UInt ty = INSN(23,22);
13713 UInt bitO1 = INSN(21,21);
13714 UInt mm = INSN(20,16);
13715 UInt bitO0 = INSN(15,15);
13716 UInt aa = INSN(14,10);
13717 UInt nn = INSN(9,5);
13718 UInt dd = INSN(4,0);
13719 vassert(ty < 4);
13720
13721 if (ty <= X01) {
13722 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
13723 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
13724 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
13725 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
13726 /* -------------------- F{N}M{ADD,SUB} -------------------- */
13727 /* 31 22 20 15 14 9 4 ix
13728 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
13729 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
13730 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
13731 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
13732 where Fx=Dx when sz=1, Fx=Sx when sz=0
13733
13734 -----SPEC------ ----IMPL----
13735 fmadd a + n * m a + n * m
13736 fmsub a + (-n) * m a - n * m
13737 fnmadd (-a) + (-n) * m -(a + n * m)
13738 fnmsub (-a) + n * m -(a - n * m)
13739 */
13740 Bool isD = (ty & 1) == 1;
13741 UInt ix = (bitO1 << 1) | bitO0;
13742 IRType ity = isD ? Ity_F64 : Ity_F32;
13743 IROp opADD = mkADDF(ity);
13744 IROp opSUB = mkSUBF(ity);
13745 IROp opMUL = mkMULF(ity);
13746 IROp opNEG = mkNEGF(ity);
13747 IRTemp res = newTemp(ity);
13748 IRExpr* eA = getQRegLO(aa, ity);
13749 IRExpr* eN = getQRegLO(nn, ity);
13750 IRExpr* eM = getQRegLO(mm, ity);
13751 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
13752 IRExpr* eNxM = triop(opMUL, rm, eN, eM);
13753 switch (ix) {
13754 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
13755 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
13756 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
13757 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
13758 default: vassert(0);
13759 }
13760 putQReg128(dd, mkV128(0x0000));
13761 putQRegLO(dd, mkexpr(res));
13762 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
13763 DIP("%s %s, %s, %s, %s\n",
13764 names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
13765 nameQRegLO(mm, ity), nameQRegLO(aa, ity));
13766 return True;
13767 }
13768
sewardjdf1628c2014-06-10 22:52:05 +000013769 return False;
13770# undef INSN
13771}
13772
13773
13774static
13775Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
13776{
13777 /* 31 28 23 21 20 12 9 4
13778 000 11110 ty 1 imm8 100 imm5 d
13779 The first 3 bits are really "M 0 S", but M and S are always zero.
13780 */
13781# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13782 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
13783 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
13784 return False;
13785 }
13786 UInt ty = INSN(23,22);
13787 UInt imm8 = INSN(20,13);
13788 UInt imm5 = INSN(9,5);
13789 UInt dd = INSN(4,0);
13790
13791 /* ------- 00,00000: FMOV s_imm ------- */
13792 /* ------- 01,00000: FMOV d_imm ------- */
13793 if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
13794 Bool isD = (ty & 1) == 1;
13795 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
13796 if (!isD) {
13797 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
13798 }
13799 putQReg128(dd, mkV128(0));
13800 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
13801 DIP("fmov %s, #0x%llx\n",
13802 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
13803 return True;
13804 }
13805
13806 return False;
13807# undef INSN
13808}
13809
13810
13811static
sewardj1aff76b2014-11-20 10:14:06 +000013812Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
sewardjdf1628c2014-06-10 22:52:05 +000013813{
13814# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj1aff76b2014-11-20 10:14:06 +000013815 /* 31 30 29 28 23 21 20 18 15 9 4
13816 sf 0 0 11110 type 0 rmode opcode scale n d
13817 The first 3 bits are really "sf 0 S", but S is always zero.
13818 Decode fields: sf,type,rmode,opcode
13819 */
13820# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
13821 if (INSN(30,29) != BITS2(0,0)
13822 || INSN(28,24) != BITS5(1,1,1,1,0)
13823 || INSN(21,21) != 0) {
13824 return False;
13825 }
13826 UInt bitSF = INSN(31,31);
13827 UInt ty = INSN(23,22); // type
13828 UInt rm = INSN(20,19); // rmode
13829 UInt op = INSN(18,16); // opcode
13830 UInt sc = INSN(15,10); // scale
13831 UInt nn = INSN(9,5);
13832 UInt dd = INSN(4,0);
13833
sewardj2130b342015-04-06 14:49:05 +000013834 if (ty <= X01 && rm == X11
13835 && (op == BITS3(0,0,0) || op == BITS3(0,0,1))) {
13836 /* -------- (ix) sf ty rm opc -------- */
13837 /* -------- 0 0 00 11 000: FCVTZS w_s_#fbits -------- */
13838 /* -------- 1 0 01 11 000: FCVTZS w_d_#fbits -------- */
13839 /* -------- 2 1 00 11 000: FCVTZS x_s_#fbits -------- */
13840 /* -------- 3 1 01 11 000: FCVTZS x_d_#fbits -------- */
13841
13842 /* -------- 4 0 00 11 001: FCVTZU w_s_#fbits -------- */
13843 /* -------- 5 0 01 11 001: FCVTZU w_d_#fbits -------- */
13844 /* -------- 6 1 00 11 001: FCVTZU x_s_#fbits -------- */
13845 /* -------- 7 1 01 11 001: FCVTZU x_d_#fbits -------- */
13846 Bool isI64 = bitSF == 1;
13847 Bool isF64 = (ty & 1) == 1;
13848 Bool isU = (op & 1) == 1;
13849 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
13850
13851 Int fbits = 64 - sc;
13852 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
13853
13854 Double scale = two_to_the_plus(fbits);
13855 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
13856 : IRExpr_Const(IRConst_F32( (Float)scale ));
13857 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
13858
13859 const IROp ops[8]
13860 = { Iop_F32toI32S, Iop_F64toI32S, Iop_F32toI64S, Iop_F64toI64S,
13861 Iop_F32toI32U, Iop_F64toI32U, Iop_F32toI64U, Iop_F64toI64U };
13862 IRTemp irrm = newTemp(Ity_I32);
13863 assign(irrm, mkU32(Irrm_ZERO));
13864
13865 IRExpr* src = getQRegLO(nn, isF64 ? Ity_F64 : Ity_F32);
13866 IRExpr* res = binop(ops[ix], mkexpr(irrm),
13867 triop(opMUL, mkexpr(irrm), src, scaleE));
13868 putIRegOrZR(isI64, dd, res);
13869
13870 DIP("fcvtz%c %s, %s, #%d\n",
13871 isU ? 'u' : 's', nameIRegOrZR(isI64, dd),
13872 nameQRegLO(nn, isF64 ? Ity_F64 : Ity_F32), fbits);
13873 return True;
13874 }
13875
13876 /* ------ sf,ty,rm,opc ------ */
13877 /* ------ x,0x,00,010 SCVTF s/d, w/x, #fbits ------ */
13878 /* ------ x,0x,00,011 UCVTF s/d, w/x, #fbits ------ */
13879 /* (ix) sf S 28 ty rm opc 15 9 4
sewardj1aff76b2014-11-20 10:14:06 +000013880 0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits
13881 1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits
13882 2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits
13883 3 1 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Xn, #fbits
13884
13885 4 0 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Wn, #fbits
13886 5 0 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Wn, #fbits
13887 6 1 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Xn, #fbits
13888 7 1 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Xn, #fbits
13889
13890 These are signed/unsigned conversion from integer registers to
13891 FP registers, all 4 32/64-bit combinations, rounded per FPCR,
13892 scaled per |scale|.
13893 */
13894 if (ty <= X01 && rm == X00
13895 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))
13896 && (bitSF == 1 || ((sc >> 5) & 1) == 1)) {
13897 Bool isI64 = bitSF == 1;
13898 Bool isF64 = (ty & 1) == 1;
13899 Bool isU = (op & 1) == 1;
13900 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
13901
13902 Int fbits = 64 - sc;
13903 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
13904
13905 Double scale = two_to_the_minus(fbits);
13906 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
13907 : IRExpr_Const(IRConst_F32( (Float)scale ));
13908 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
13909
13910 const IROp ops[8]
13911 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
13912 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
13913 IRExpr* src = getIRegOrZR(isI64, nn);
13914 IRExpr* res = (isF64 && !isI64)
13915 ? unop(ops[ix], src)
13916 : binop(ops[ix],
13917 mkexpr(mk_get_IR_rounding_mode()), src);
13918 putQReg128(dd, mkV128(0));
13919 putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE));
13920
13921 DIP("%ccvtf %s, %s, #%d\n",
13922 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
13923 nameIRegOrZR(isI64, nn), fbits);
13924 return True;
13925 }
13926
sewardjdf1628c2014-06-10 22:52:05 +000013927 return False;
13928# undef INSN
13929}
13930
13931
13932static
sewardj5747c4a2014-06-11 20:57:23 +000013933Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
sewardjdf1628c2014-06-10 22:52:05 +000013934{
13935 /* 31 30 29 28 23 21 20 18 15 9 4
sewardj5747c4a2014-06-11 20:57:23 +000013936 sf 0 0 11110 type 1 rmode opcode 000000 n d
13937 The first 3 bits are really "sf 0 S", but S is always zero.
sewardjf67fcb92014-10-30 23:10:45 +000013938 Decode fields: sf,type,rmode,opcode
sewardjdf1628c2014-06-10 22:52:05 +000013939 */
13940# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000013941 if (INSN(30,29) != BITS2(0,0)
sewardjdf1628c2014-06-10 22:52:05 +000013942 || INSN(28,24) != BITS5(1,1,1,1,0)
13943 || INSN(21,21) != 1
13944 || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
13945 return False;
13946 }
13947 UInt bitSF = INSN(31,31);
sewardjdf1628c2014-06-10 22:52:05 +000013948 UInt ty = INSN(23,22); // type
13949 UInt rm = INSN(20,19); // rmode
13950 UInt op = INSN(18,16); // opcode
13951 UInt nn = INSN(9,5);
13952 UInt dd = INSN(4,0);
13953
sewardj5747c4a2014-06-11 20:57:23 +000013954 // op = 000, 001
sewardjf67fcb92014-10-30 23:10:45 +000013955 /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
sewardj5747c4a2014-06-11 20:57:23 +000013956 /* 30 23 20 18 15 9 4
13957 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
13958 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
13959 ---------------- 01 -------------- FCVTP-------- (round to +inf)
13960 ---------------- 10 -------------- FCVTM-------- (round to -inf)
13961 ---------------- 11 -------------- FCVTZ-------- (round to zero)
sewardjf67fcb92014-10-30 23:10:45 +000013962 ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
13963 ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
13964
sewardj5747c4a2014-06-11 20:57:23 +000013965 Rd is Xd when sf==1, Wd when sf==0
13966 Fn is Dn when x==1, Sn when x==0
13967 20:19 carry the rounding mode, using the same encoding as FPCR
13968 */
sewardjf67fcb92014-10-30 23:10:45 +000013969 if (ty <= X01
13970 && ( ((op == BITS3(0,0,0) || op == BITS3(0,0,1)) && True)
13971 || ((op == BITS3(1,0,0) || op == BITS3(1,0,1)) && rm == BITS2(0,0))
13972 )
13973 ) {
sewardj5747c4a2014-06-11 20:57:23 +000013974 Bool isI64 = bitSF == 1;
13975 Bool isF64 = (ty & 1) == 1;
13976 Bool isU = (op & 1) == 1;
13977 /* Decide on the IR rounding mode to use. */
13978 IRRoundingMode irrm = 8; /*impossible*/
13979 HChar ch = '?';
sewardjf67fcb92014-10-30 23:10:45 +000013980 if (op == BITS3(0,0,0) || op == BITS3(0,0,1)) {
13981 switch (rm) {
13982 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
13983 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
13984 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
13985 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
13986 default: vassert(0);
13987 }
13988 } else {
13989 vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1));
13990 switch (rm) {
13991 case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break;
13992 default: vassert(0);
13993 }
sewardj5747c4a2014-06-11 20:57:23 +000013994 }
13995 vassert(irrm != 8);
13996 /* Decide on the conversion primop, based on the source size,
13997 dest size and signedness (8 possibilities). Case coding:
13998 F32 ->s I32 0
13999 F32 ->u I32 1
14000 F32 ->s I64 2
14001 F32 ->u I64 3
14002 F64 ->s I32 4
14003 F64 ->u I32 5
14004 F64 ->s I64 6
14005 F64 ->u I64 7
14006 */
14007 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
14008 vassert(ix < 8);
14009 const IROp iops[8]
14010 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
14011 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
14012 IROp iop = iops[ix];
14013 // A bit of ATCery: bounce all cases we haven't seen an example of.
14014 if (/* F32toI32S */
14015 (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
14016 || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
14017 || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
sewardjf67fcb92014-10-30 23:10:45 +000014018 || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */
sewardj5747c4a2014-06-11 20:57:23 +000014019 /* F32toI32U */
14020 || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
14021 || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
sewardj0728a522014-11-15 22:24:18 +000014022 || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
sewardjf67fcb92014-10-30 23:10:45 +000014023 || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */
sewardj5747c4a2014-06-11 20:57:23 +000014024 /* F32toI64S */
14025 || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
sewardjf4edb1d2015-02-24 13:23:38 +000014026 || (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */
14027 || (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */
sewardj266d5962014-11-20 11:30:41 +000014028 || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */
sewardj5747c4a2014-06-11 20:57:23 +000014029 /* F32toI64U */
14030 || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
sewardj6d5985e2015-02-05 15:22:30 +000014031 || (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */
sewardjefe536b2014-09-06 08:08:47 +000014032 || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
sewardjf4edb1d2015-02-24 13:23:38 +000014033 || (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */
sewardj5747c4a2014-06-11 20:57:23 +000014034 /* F64toI32S */
14035 || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
14036 || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
14037 || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
sewardj76927e62014-11-17 11:21:21 +000014038 || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */
sewardj5747c4a2014-06-11 20:57:23 +000014039 /* F64toI32U */
14040 || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
14041 || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
14042 || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
sewardjf4edb1d2015-02-24 13:23:38 +000014043 || (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */
sewardj5747c4a2014-06-11 20:57:23 +000014044 /* F64toI64S */
14045 || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
14046 || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
14047 || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
sewardj76927e62014-11-17 11:21:21 +000014048 || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */
sewardj5747c4a2014-06-11 20:57:23 +000014049 /* F64toI64U */
14050 || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
sewardj31b29af2014-10-30 15:54:53 +000014051 || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
sewardj5747c4a2014-06-11 20:57:23 +000014052 || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
sewardjf4edb1d2015-02-24 13:23:38 +000014053 || (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */
sewardj5747c4a2014-06-11 20:57:23 +000014054 ) {
14055 /* validated */
14056 } else {
14057 return False;
14058 }
14059 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
14060 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
14061 IRTemp src = newTemp(srcTy);
14062 IRTemp dst = newTemp(dstTy);
14063 assign(src, getQRegLO(nn, srcTy));
14064 assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
14065 putIRegOrZR(isI64, dd, mkexpr(dst));
14066 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
14067 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
14068 return True;
14069 }
14070
14071 // op = 010, 011
sewardjdf1628c2014-06-10 22:52:05 +000014072 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
14073 /* (ix) sf S 28 ty rm op 15 9 4
14074 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
14075 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
14076 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
14077 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
14078
14079 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
14080 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
14081 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
14082 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
14083
14084 These are signed/unsigned conversion from integer registers to
14085 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
14086 */
sewardj5747c4a2014-06-11 20:57:23 +000014087 if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
sewardjdf1628c2014-06-10 22:52:05 +000014088 Bool isI64 = bitSF == 1;
14089 Bool isF64 = (ty & 1) == 1;
14090 Bool isU = (op & 1) == 1;
14091 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
14092 const IROp ops[8]
14093 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
14094 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
14095 IRExpr* src = getIRegOrZR(isI64, nn);
14096 IRExpr* res = (isF64 && !isI64)
14097 ? unop(ops[ix], src)
sewardj1aff76b2014-11-20 10:14:06 +000014098 : binop(ops[ix],
14099 mkexpr(mk_get_IR_rounding_mode()), src);
sewardjdf1628c2014-06-10 22:52:05 +000014100 putQReg128(dd, mkV128(0));
14101 putQRegLO(dd, res);
14102 DIP("%ccvtf %s, %s\n",
14103 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
14104 nameIRegOrZR(isI64, nn));
14105 return True;
14106 }
14107
sewardj5747c4a2014-06-11 20:57:23 +000014108 // op = 110, 111
sewardjdf1628c2014-06-10 22:52:05 +000014109 /* -------- FMOV (general) -------- */
14110 /* case sf S ty rm op 15 9 4
14111 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
14112 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
14113 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
14114
14115 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
14116 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
14117 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
14118 */
sewardj5747c4a2014-06-11 20:57:23 +000014119 if (1) {
sewardjbbcf1882014-01-12 12:49:10 +000014120 UInt ix = 0; // case
sewardjdf1628c2014-06-10 22:52:05 +000014121 if (bitSF == 0) {
sewardjbbcf1882014-01-12 12:49:10 +000014122 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
14123 ix = 1;
14124 else
14125 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
14126 ix = 4;
14127 } else {
sewardjdf1628c2014-06-10 22:52:05 +000014128 vassert(bitSF == 1);
sewardjbbcf1882014-01-12 12:49:10 +000014129 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
14130 ix = 2;
14131 else
14132 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
14133 ix = 5;
14134 else
14135 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
14136 ix = 3;
14137 else
14138 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
14139 ix = 6;
14140 }
14141 if (ix > 0) {
14142 switch (ix) {
14143 case 1:
14144 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +000014145 putQRegLO(dd, getIReg32orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000014146 DIP("fmov s%u, w%u\n", dd, nn);
14147 break;
14148 case 2:
14149 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +000014150 putQRegLO(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000014151 DIP("fmov d%u, x%u\n", dd, nn);
14152 break;
14153 case 3:
sewardj606c4ba2014-01-26 19:11:14 +000014154 putQRegHI64(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000014155 DIP("fmov v%u.d[1], x%u\n", dd, nn);
14156 break;
14157 case 4:
sewardj606c4ba2014-01-26 19:11:14 +000014158 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
sewardjbbcf1882014-01-12 12:49:10 +000014159 DIP("fmov w%u, s%u\n", dd, nn);
14160 break;
14161 case 5:
sewardj606c4ba2014-01-26 19:11:14 +000014162 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
sewardjbbcf1882014-01-12 12:49:10 +000014163 DIP("fmov x%u, d%u\n", dd, nn);
14164 break;
14165 case 6:
sewardj606c4ba2014-01-26 19:11:14 +000014166 putIReg64orZR(dd, getQRegHI64(nn));
sewardjbbcf1882014-01-12 12:49:10 +000014167 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
14168 break;
14169 default:
14170 vassert(0);
14171 }
14172 return True;
14173 }
14174 /* undecodable; fall through */
14175 }
14176
sewardjdf1628c2014-06-10 22:52:05 +000014177 return False;
14178# undef INSN
14179}
14180
14181
14182static
14183Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
14184{
14185 Bool ok;
14186 ok = dis_AdvSIMD_EXT(dres, insn);
14187 if (UNLIKELY(ok)) return True;
14188 ok = dis_AdvSIMD_TBL_TBX(dres, insn);
14189 if (UNLIKELY(ok)) return True;
14190 ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
14191 if (UNLIKELY(ok)) return True;
14192 ok = dis_AdvSIMD_across_lanes(dres, insn);
14193 if (UNLIKELY(ok)) return True;
14194 ok = dis_AdvSIMD_copy(dres, insn);
14195 if (UNLIKELY(ok)) return True;
14196 ok = dis_AdvSIMD_modified_immediate(dres, insn);
14197 if (UNLIKELY(ok)) return True;
14198 ok = dis_AdvSIMD_scalar_copy(dres, insn);
14199 if (UNLIKELY(ok)) return True;
14200 ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
14201 if (UNLIKELY(ok)) return True;
14202 ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
14203 if (UNLIKELY(ok)) return True;
14204 ok = dis_AdvSIMD_scalar_three_different(dres, insn);
14205 if (UNLIKELY(ok)) return True;
14206 ok = dis_AdvSIMD_scalar_three_same(dres, insn);
14207 if (UNLIKELY(ok)) return True;
14208 ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
14209 if (UNLIKELY(ok)) return True;
14210 ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
14211 if (UNLIKELY(ok)) return True;
14212 ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
14213 if (UNLIKELY(ok)) return True;
14214 ok = dis_AdvSIMD_three_different(dres, insn);
14215 if (UNLIKELY(ok)) return True;
14216 ok = dis_AdvSIMD_three_same(dres, insn);
14217 if (UNLIKELY(ok)) return True;
14218 ok = dis_AdvSIMD_two_reg_misc(dres, insn);
14219 if (UNLIKELY(ok)) return True;
14220 ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
14221 if (UNLIKELY(ok)) return True;
14222 ok = dis_AdvSIMD_crypto_aes(dres, insn);
14223 if (UNLIKELY(ok)) return True;
14224 ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
14225 if (UNLIKELY(ok)) return True;
14226 ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
14227 if (UNLIKELY(ok)) return True;
14228 ok = dis_AdvSIMD_fp_compare(dres, insn);
14229 if (UNLIKELY(ok)) return True;
14230 ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
14231 if (UNLIKELY(ok)) return True;
14232 ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
14233 if (UNLIKELY(ok)) return True;
14234 ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
14235 if (UNLIKELY(ok)) return True;
14236 ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
14237 if (UNLIKELY(ok)) return True;
14238 ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
14239 if (UNLIKELY(ok)) return True;
14240 ok = dis_AdvSIMD_fp_immediate(dres, insn);
14241 if (UNLIKELY(ok)) return True;
sewardj1aff76b2014-11-20 10:14:06 +000014242 ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn);
sewardjdf1628c2014-06-10 22:52:05 +000014243 if (UNLIKELY(ok)) return True;
sewardj5747c4a2014-06-11 20:57:23 +000014244 ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
sewardjdf1628c2014-06-10 22:52:05 +000014245 if (UNLIKELY(ok)) return True;
14246 return False;
14247}
14248
sewardjbbcf1882014-01-12 12:49:10 +000014249
14250/*------------------------------------------------------------*/
14251/*--- Disassemble a single ARM64 instruction ---*/
14252/*------------------------------------------------------------*/
14253
14254/* Disassemble a single ARM64 instruction into IR. The instruction
14255 has is located at |guest_instr| and has guest IP of
14256 |guest_PC_curr_instr|, which will have been set before the call
14257 here. Returns True iff the instruction was decoded, in which case
14258 *dres will be set accordingly, or False, in which case *dres should
14259 be ignored by the caller. */
14260
14261static
14262Bool disInstr_ARM64_WRK (
14263 /*MB_OUT*/DisResult* dres,
florianbeac5302014-12-31 12:09:38 +000014264 Bool (*resteerOkFn) ( /*opaque*/void*, Addr ),
sewardjbbcf1882014-01-12 12:49:10 +000014265 Bool resteerCisOk,
14266 void* callback_opaque,
florian8462d112014-09-24 15:18:09 +000014267 const UChar* guest_instr,
floriancacba8e2014-12-15 18:58:07 +000014268 const VexArchInfo* archinfo,
14269 const VexAbiInfo* abiinfo
sewardjbbcf1882014-01-12 12:49:10 +000014270 )
14271{
14272 // A macro to fish bits out of 'insn'.
14273# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
14274
14275//ZZ DisResult dres;
14276//ZZ UInt insn;
14277//ZZ //Bool allow_VFP = False;
14278//ZZ //UInt hwcaps = archinfo->hwcaps;
14279//ZZ IRTemp condT; /* :: Ity_I32 */
14280//ZZ UInt summary;
14281//ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
14282//ZZ
14283//ZZ /* What insn variants are we supporting today? */
14284//ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
14285//ZZ // etc etc
14286
14287 /* Set result defaults. */
14288 dres->whatNext = Dis_Continue;
14289 dres->len = 4;
14290 dres->continueAt = 0;
14291 dres->jk_StopHere = Ijk_INVALID;
14292
14293 /* At least this is simple on ARM64: insns are all 4 bytes long, and
14294 4-aligned. So just fish the whole thing out of memory right now
14295 and have done. */
14296 UInt insn = getUIntLittleEndianly( guest_instr );
14297
14298 if (0) vex_printf("insn: 0x%x\n", insn);
14299
14300 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
14301
14302 vassert(0 == (guest_PC_curr_instr & 3ULL));
14303
14304 /* ----------------------------------------------------------- */
14305
14306 /* Spot "Special" instructions (see comment at top of file). */
14307 {
florian8462d112014-09-24 15:18:09 +000014308 const UChar* code = guest_instr;
sewardjbbcf1882014-01-12 12:49:10 +000014309 /* Spot the 16-byte preamble:
14310 93CC0D8C ror x12, x12, #3
14311 93CC358C ror x12, x12, #13
14312 93CCCD8C ror x12, x12, #51
14313 93CCF58C ror x12, x12, #61
14314 */
14315 UInt word1 = 0x93CC0D8C;
14316 UInt word2 = 0x93CC358C;
14317 UInt word3 = 0x93CCCD8C;
14318 UInt word4 = 0x93CCF58C;
14319 if (getUIntLittleEndianly(code+ 0) == word1 &&
14320 getUIntLittleEndianly(code+ 4) == word2 &&
14321 getUIntLittleEndianly(code+ 8) == word3 &&
14322 getUIntLittleEndianly(code+12) == word4) {
14323 /* Got a "Special" instruction preamble. Which one is it? */
14324 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
14325 /* orr x10,x10,x10 */) {
14326 /* X3 = client_request ( X4 ) */
14327 DIP("x3 = client_request ( x4 )\n");
14328 putPC(mkU64( guest_PC_curr_instr + 20 ));
14329 dres->jk_StopHere = Ijk_ClientReq;
14330 dres->whatNext = Dis_StopHere;
14331 return True;
14332 }
14333 else
14334 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
14335 /* orr x11,x11,x11 */) {
14336 /* X3 = guest_NRADDR */
14337 DIP("x3 = guest_NRADDR\n");
14338 dres->len = 20;
14339 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
14340 return True;
14341 }
14342 else
14343 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
14344 /* orr x12,x12,x12 */) {
14345 /* branch-and-link-to-noredir X8 */
14346 DIP("branch-and-link-to-noredir x8\n");
14347 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
14348 putPC(getIReg64orZR(8));
14349 dres->jk_StopHere = Ijk_NoRedir;
14350 dres->whatNext = Dis_StopHere;
14351 return True;
14352 }
14353 else
14354 if (getUIntLittleEndianly(code+16) == 0xAA090129
14355 /* orr x9,x9,x9 */) {
14356 /* IR injection */
14357 DIP("IR injection\n");
14358 vex_inject_ir(irsb, Iend_LE);
14359 // Invalidate the current insn. The reason is that the IRop we're
14360 // injecting here can change. In which case the translation has to
14361 // be redone. For ease of handling, we simply invalidate all the
14362 // time.
sewardj05f5e012014-05-04 10:52:11 +000014363 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
14364 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
sewardjbbcf1882014-01-12 12:49:10 +000014365 putPC(mkU64( guest_PC_curr_instr + 20 ));
14366 dres->whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +000014367 dres->jk_StopHere = Ijk_InvalICache;
sewardjbbcf1882014-01-12 12:49:10 +000014368 return True;
14369 }
14370 /* We don't know what it is. */
14371 return False;
14372 /*NOTREACHED*/
14373 }
14374 }
14375
14376 /* ----------------------------------------------------------- */
14377
14378 /* Main ARM64 instruction decoder starts here. */
14379
14380 Bool ok = False;
14381
14382 /* insn[28:25] determines the top-level grouping, so let's start
14383 off with that.
14384
14385 For all of these dis_ARM64_ functions, we pass *dres with the
14386 normal default results "insn OK, 4 bytes long, keep decoding" so
14387 they don't need to change it. However, decodes of control-flow
14388 insns may cause *dres to change.
14389 */
14390 switch (INSN(28,25)) {
14391 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
14392 // Data processing - immediate
14393 ok = dis_ARM64_data_processing_immediate(dres, insn);
14394 break;
14395 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
14396 // Branch, exception generation and system instructions
sewardj65902992014-05-03 21:20:56 +000014397 ok = dis_ARM64_branch_etc(dres, insn, archinfo);
sewardjbbcf1882014-01-12 12:49:10 +000014398 break;
14399 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
14400 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
14401 // Loads and stores
14402 ok = dis_ARM64_load_store(dres, insn);
14403 break;
14404 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
14405 // Data processing - register
14406 ok = dis_ARM64_data_processing_register(dres, insn);
14407 break;
14408 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
14409 // Data processing - SIMD and floating point
14410 ok = dis_ARM64_simd_and_fp(dres, insn);
14411 break;
14412 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
14413 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
14414 // UNALLOCATED
14415 break;
14416 default:
14417 vassert(0); /* Can't happen */
14418 }
14419
14420 /* If the next-level down decoders failed, make sure |dres| didn't
14421 get changed. */
14422 if (!ok) {
14423 vassert(dres->whatNext == Dis_Continue);
14424 vassert(dres->len == 4);
14425 vassert(dres->continueAt == 0);
14426 vassert(dres->jk_StopHere == Ijk_INVALID);
14427 }
14428
14429 return ok;
14430
14431# undef INSN
14432}
14433
14434
14435/*------------------------------------------------------------*/
14436/*--- Top-level fn ---*/
14437/*------------------------------------------------------------*/
14438
14439/* Disassemble a single instruction into IR. The instruction
14440 is located in host memory at &guest_code[delta]. */
14441
14442DisResult disInstr_ARM64 ( IRSB* irsb_IN,
florianbeac5302014-12-31 12:09:38 +000014443 Bool (*resteerOkFn) ( void*, Addr ),
sewardjbbcf1882014-01-12 12:49:10 +000014444 Bool resteerCisOk,
14445 void* callback_opaque,
florian8462d112014-09-24 15:18:09 +000014446 const UChar* guest_code_IN,
sewardjbbcf1882014-01-12 12:49:10 +000014447 Long delta_IN,
floriand4cc0de2015-01-02 11:44:12 +000014448 Addr guest_IP,
sewardjbbcf1882014-01-12 12:49:10 +000014449 VexArch guest_arch,
floriancacba8e2014-12-15 18:58:07 +000014450 const VexArchInfo* archinfo,
14451 const VexAbiInfo* abiinfo,
sewardj9b769162014-07-24 12:42:03 +000014452 VexEndness host_endness_IN,
sewardjbbcf1882014-01-12 12:49:10 +000014453 Bool sigill_diag_IN )
14454{
14455 DisResult dres;
14456 vex_bzero(&dres, sizeof(dres));
14457
14458 /* Set globals (see top of this file) */
14459 vassert(guest_arch == VexArchARM64);
14460
14461 irsb = irsb_IN;
sewardj9b769162014-07-24 12:42:03 +000014462 host_endness = host_endness_IN;
sewardjbbcf1882014-01-12 12:49:10 +000014463 guest_PC_curr_instr = (Addr64)guest_IP;
14464
sewardj65902992014-05-03 21:20:56 +000014465 /* Sanity checks */
14466 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
14467 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
14468 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
14469
sewardjbbcf1882014-01-12 12:49:10 +000014470 /* Try to decode */
14471 Bool ok = disInstr_ARM64_WRK( &dres,
14472 resteerOkFn, resteerCisOk, callback_opaque,
florian8462d112014-09-24 15:18:09 +000014473 &guest_code_IN[delta_IN],
sewardjbbcf1882014-01-12 12:49:10 +000014474 archinfo, abiinfo );
14475 if (ok) {
14476 /* All decode successes end up here. */
sewardjdc9259c2014-02-27 11:10:19 +000014477 vassert(dres.len == 4 || dres.len == 20);
sewardjbbcf1882014-01-12 12:49:10 +000014478 switch (dres.whatNext) {
14479 case Dis_Continue:
14480 putPC( mkU64(dres.len + guest_PC_curr_instr) );
14481 break;
14482 case Dis_ResteerU:
14483 case Dis_ResteerC:
14484 putPC(mkU64(dres.continueAt));
14485 break;
14486 case Dis_StopHere:
14487 break;
14488 default:
14489 vassert(0);
14490 }
14491 DIP("\n");
14492 } else {
14493 /* All decode failures end up here. */
14494 if (sigill_diag_IN) {
14495 Int i, j;
14496 UChar buf[64];
14497 UInt insn
florian8462d112014-09-24 15:18:09 +000014498 = getUIntLittleEndianly( &guest_code_IN[delta_IN] );
sewardjbbcf1882014-01-12 12:49:10 +000014499 vex_bzero(buf, sizeof(buf));
14500 for (i = j = 0; i < 32; i++) {
14501 if (i > 0) {
14502 if ((i & 7) == 0) buf[j++] = ' ';
14503 else if ((i & 3) == 0) buf[j++] = '\'';
14504 }
14505 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
14506 }
14507 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
14508 vex_printf("disInstr(arm64): %s\n", buf);
14509 }
14510
14511 /* Tell the dispatcher that this insn cannot be decoded, and so
14512 has not been executed, and (is currently) the next to be
14513 executed. PC should be up-to-date since it is made so at the
14514 start of each insn, but nevertheless be paranoid and update
14515 it again right now. */
14516 putPC( mkU64(guest_PC_curr_instr) );
sewardjbbcf1882014-01-12 12:49:10 +000014517 dres.len = 0;
philippe2faf5912014-08-11 22:45:47 +000014518 dres.whatNext = Dis_StopHere;
sewardjbbcf1882014-01-12 12:49:10 +000014519 dres.jk_StopHere = Ijk_NoDecode;
philippe2faf5912014-08-11 22:45:47 +000014520 dres.continueAt = 0;
sewardjbbcf1882014-01-12 12:49:10 +000014521 }
14522 return dres;
14523}
14524
sewardjecde6972014-02-05 11:01:19 +000014525
sewardjbbcf1882014-01-12 12:49:10 +000014526/*--------------------------------------------------------------------*/
14527/*--- end guest_arm64_toIR.c ---*/
14528/*--------------------------------------------------------------------*/