blob: c8fadd90c98706b410a5b642d3dd56e229ec0076 [file] [log] [blame]
sewardjbbcf1882014-01-12 12:49:10 +00001/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- begin guest_arm64_toIR.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2013-2013 OpenWorks
12 info@open-works.net
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
sewardj76927e62014-11-17 11:21:21 +000032/* KNOWN LIMITATIONS 2014-Nov-16
33
34 * Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
35
36 Also FP comparison "unordered" .. is implemented as normal FP
37 comparison.
38
39 Both should be fixed. They behave incorrectly in the presence of
40 NaNs.
41
42 * Floating multiply-add (etc) insns. Are split into a multiply and
43 an add, and so suffer double rounding and hence sometimes the
44 least significant mantissa bit is incorrect. Fix: use the IR
45 multiply-add IROps instead.
46*/
sewardjbbcf1882014-01-12 12:49:10 +000047
48/* "Special" instructions.
49
50 This instruction decoder can decode four special instructions
51 which mean nothing natively (are no-ops as far as regs/mem are
52 concerned) but have meaning for supporting Valgrind. A special
53 instruction is flagged by a 16-byte preamble:
54
55 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
56 (ror x12, x12, #3; ror x12, x12, #13
57 ror x12, x12, #51; ror x12, x12, #61)
58
59 Following that, one of the following 3 are allowed
60 (standard interpretation in parentheses):
61
62 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
63 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
64 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
65 AA090129 (orr x9,x9,x9) IR injection
66
67 Any other bytes following the 16-byte preamble are illegal and
68 constitute a failure in instruction decoding. This all assumes
69 that the preamble will never occur except in specific code
70 fragments designed for Valgrind to catch.
71*/
72
73/* Translates ARM64 code to IR. */
74
75#include "libvex_basictypes.h"
76#include "libvex_ir.h"
77#include "libvex.h"
78#include "libvex_guest_arm64.h"
79
80#include "main_util.h"
81#include "main_globals.h"
82#include "guest_generic_bb_to_IR.h"
83#include "guest_arm64_defs.h"
84
85
86/*------------------------------------------------------------*/
87/*--- Globals ---*/
88/*------------------------------------------------------------*/
89
90/* These are set at the start of the translation of a instruction, so
91 that we don't have to pass them around endlessly. CONST means does
92 not change during translation of the instruction.
93*/
94
sewardj9b769162014-07-24 12:42:03 +000095/* CONST: what is the host's endianness? We need to know this in
96 order to do sub-register accesses to the SIMD/FP registers
97 correctly. */
98static VexEndness host_endness;
sewardjbbcf1882014-01-12 12:49:10 +000099
100/* CONST: The guest address for the instruction currently being
101 translated. */
102static Addr64 guest_PC_curr_instr;
103
104/* MOD: The IRSB* into which we're generating code. */
105static IRSB* irsb;
106
107
108/*------------------------------------------------------------*/
109/*--- Debugging output ---*/
110/*------------------------------------------------------------*/
111
112#define DIP(format, args...) \
113 if (vex_traceflags & VEX_TRACE_FE) \
114 vex_printf(format, ## args)
115
116#define DIS(buf, format, args...) \
117 if (vex_traceflags & VEX_TRACE_FE) \
118 vex_sprintf(buf, format, ## args)
119
120
121/*------------------------------------------------------------*/
122/*--- Helper bits and pieces for deconstructing the ---*/
123/*--- arm insn stream. ---*/
124/*------------------------------------------------------------*/
125
126/* Do a little-endian load of a 32-bit word, regardless of the
127 endianness of the underlying host. */
florian8462d112014-09-24 15:18:09 +0000128static inline UInt getUIntLittleEndianly ( const UChar* p )
sewardjbbcf1882014-01-12 12:49:10 +0000129{
130 UInt w = 0;
131 w = (w << 8) | p[3];
132 w = (w << 8) | p[2];
133 w = (w << 8) | p[1];
134 w = (w << 8) | p[0];
135 return w;
136}
137
138/* Sign extend a N-bit value up to 64 bits, by copying
139 bit N-1 into all higher positions. */
140static ULong sx_to_64 ( ULong x, UInt n )
141{
142 vassert(n > 1 && n < 64);
143 Long r = (Long)x;
144 r = (r << (64-n)) >> (64-n);
145 return (ULong)r;
146}
147
148//ZZ /* Do a little-endian load of a 16-bit word, regardless of the
149//ZZ endianness of the underlying host. */
150//ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
151//ZZ {
152//ZZ UShort w = 0;
153//ZZ w = (w << 8) | p[1];
154//ZZ w = (w << 8) | p[0];
155//ZZ return w;
156//ZZ }
157//ZZ
158//ZZ static UInt ROR32 ( UInt x, UInt sh ) {
159//ZZ vassert(sh >= 0 && sh < 32);
160//ZZ if (sh == 0)
161//ZZ return x;
162//ZZ else
163//ZZ return (x << (32-sh)) | (x >> sh);
164//ZZ }
165//ZZ
166//ZZ static Int popcount32 ( UInt x )
167//ZZ {
168//ZZ Int res = 0, i;
169//ZZ for (i = 0; i < 32; i++) {
170//ZZ res += (x & 1);
171//ZZ x >>= 1;
172//ZZ }
173//ZZ return res;
174//ZZ }
175//ZZ
176//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
177//ZZ {
178//ZZ UInt mask = 1 << ix;
179//ZZ x &= ~mask;
180//ZZ x |= ((b << ix) & mask);
181//ZZ return x;
182//ZZ }
183
184#define BITS2(_b1,_b0) \
185 (((_b1) << 1) | (_b0))
186
187#define BITS3(_b2,_b1,_b0) \
188 (((_b2) << 2) | ((_b1) << 1) | (_b0))
189
190#define BITS4(_b3,_b2,_b1,_b0) \
191 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
192
193#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
194 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
195 | BITS4((_b3),(_b2),(_b1),(_b0)))
196
197#define BITS5(_b4,_b3,_b2,_b1,_b0) \
198 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
199#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
200 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
201#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
202 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
203
204#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
205 (((_b8) << 8) \
206 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
207
208#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
209 (((_b9) << 9) | ((_b8) << 8) \
210 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
211
212#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
213 (((_b10) << 10) \
214 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
215
sewardjdc9259c2014-02-27 11:10:19 +0000216#define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
217 (((_b11) << 11) \
218 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
219
sewardjdf1628c2014-06-10 22:52:05 +0000220#define X00 BITS2(0,0)
221#define X01 BITS2(0,1)
222#define X10 BITS2(1,0)
223#define X11 BITS2(1,1)
224
sewardjbbcf1882014-01-12 12:49:10 +0000225// produces _uint[_bMax:_bMin]
226#define SLICE_UInt(_uint,_bMax,_bMin) \
227 (( ((UInt)(_uint)) >> (_bMin)) \
228 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
229
230
231/*------------------------------------------------------------*/
232/*--- Helper bits and pieces for creating IR fragments. ---*/
233/*------------------------------------------------------------*/
234
235static IRExpr* mkV128 ( UShort w )
236{
237 return IRExpr_Const(IRConst_V128(w));
238}
239
240static IRExpr* mkU64 ( ULong i )
241{
242 return IRExpr_Const(IRConst_U64(i));
243}
244
245static IRExpr* mkU32 ( UInt i )
246{
247 return IRExpr_Const(IRConst_U32(i));
248}
249
sewardj25523c42014-06-15 19:36:29 +0000250static IRExpr* mkU16 ( UInt i )
251{
252 vassert(i < 65536);
253 return IRExpr_Const(IRConst_U16(i));
254}
255
sewardjbbcf1882014-01-12 12:49:10 +0000256static IRExpr* mkU8 ( UInt i )
257{
258 vassert(i < 256);
259 return IRExpr_Const(IRConst_U8( (UChar)i ));
260}
261
262static IRExpr* mkexpr ( IRTemp tmp )
263{
264 return IRExpr_RdTmp(tmp);
265}
266
267static IRExpr* unop ( IROp op, IRExpr* a )
268{
269 return IRExpr_Unop(op, a);
270}
271
272static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
273{
274 return IRExpr_Binop(op, a1, a2);
275}
276
277static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
278{
279 return IRExpr_Triop(op, a1, a2, a3);
280}
281
282static IRExpr* loadLE ( IRType ty, IRExpr* addr )
283{
284 return IRExpr_Load(Iend_LE, ty, addr);
285}
286
287/* Add a statement to the list held by "irbb". */
288static void stmt ( IRStmt* st )
289{
290 addStmtToIRSB( irsb, st );
291}
292
293static void assign ( IRTemp dst, IRExpr* e )
294{
295 stmt( IRStmt_WrTmp(dst, e) );
296}
297
298static void storeLE ( IRExpr* addr, IRExpr* data )
299{
300 stmt( IRStmt_Store(Iend_LE, addr, data) );
301}
302
303//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
304//ZZ {
305//ZZ if (guardT == IRTemp_INVALID) {
306//ZZ /* unconditional */
307//ZZ storeLE(addr, data);
308//ZZ } else {
309//ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
310//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
311//ZZ }
312//ZZ }
313//ZZ
314//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
315//ZZ IRExpr* addr, IRExpr* alt,
316//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
317//ZZ {
318//ZZ if (guardT == IRTemp_INVALID) {
319//ZZ /* unconditional */
320//ZZ IRExpr* loaded = NULL;
321//ZZ switch (cvt) {
322//ZZ case ILGop_Ident32:
323//ZZ loaded = loadLE(Ity_I32, addr); break;
324//ZZ case ILGop_8Uto32:
325//ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
326//ZZ case ILGop_8Sto32:
327//ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
328//ZZ case ILGop_16Uto32:
329//ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
330//ZZ case ILGop_16Sto32:
331//ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
332//ZZ default:
333//ZZ vassert(0);
334//ZZ }
335//ZZ vassert(loaded != NULL);
336//ZZ assign(dst, loaded);
337//ZZ } else {
338//ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
339//ZZ loaded data before putting the data in 'dst'. If the load
340//ZZ does not take place, 'alt' is placed directly in 'dst'. */
341//ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
342//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
343//ZZ }
344//ZZ }
345
346/* Generate a new temporary of the given type. */
347static IRTemp newTemp ( IRType ty )
348{
349 vassert(isPlausibleIRType(ty));
350 return newIRTemp( irsb->tyenv, ty );
351}
352
sewardj8e91fd42014-07-11 12:05:47 +0000353/* This is used in many places, so the brevity is an advantage. */
354static IRTemp newTempV128(void)
355{
356 return newTemp(Ity_V128);
357}
358
359/* Initialise V128 temporaries en masse. */
360static
sewardj51d012a2014-07-21 09:19:50 +0000361void newTempsV128_2(IRTemp* t1, IRTemp* t2)
362{
363 vassert(t1 && *t1 == IRTemp_INVALID);
364 vassert(t2 && *t2 == IRTemp_INVALID);
365 *t1 = newTempV128();
366 *t2 = newTempV128();
367}
368
sewardj51d012a2014-07-21 09:19:50 +0000369static
370void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
371{
372 vassert(t1 && *t1 == IRTemp_INVALID);
373 vassert(t2 && *t2 == IRTemp_INVALID);
374 vassert(t3 && *t3 == IRTemp_INVALID);
375 *t1 = newTempV128();
376 *t2 = newTempV128();
377 *t3 = newTempV128();
378}
379
sewardj208a7762014-10-22 13:52:51 +0000380static
381void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
382{
383 vassert(t1 && *t1 == IRTemp_INVALID);
384 vassert(t2 && *t2 == IRTemp_INVALID);
385 vassert(t3 && *t3 == IRTemp_INVALID);
386 vassert(t4 && *t4 == IRTemp_INVALID);
387 *t1 = newTempV128();
388 *t2 = newTempV128();
389 *t3 = newTempV128();
390 *t4 = newTempV128();
391}
sewardj54ffa1d2014-07-22 09:27:49 +0000392
sewardj51d012a2014-07-21 09:19:50 +0000393static
sewardj8e91fd42014-07-11 12:05:47 +0000394void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
395 IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
396{
397 vassert(t1 && *t1 == IRTemp_INVALID);
398 vassert(t2 && *t2 == IRTemp_INVALID);
399 vassert(t3 && *t3 == IRTemp_INVALID);
400 vassert(t4 && *t4 == IRTemp_INVALID);
401 vassert(t5 && *t5 == IRTemp_INVALID);
402 vassert(t6 && *t6 == IRTemp_INVALID);
403 vassert(t7 && *t7 == IRTemp_INVALID);
404 *t1 = newTempV128();
405 *t2 = newTempV128();
406 *t3 = newTempV128();
407 *t4 = newTempV128();
408 *t5 = newTempV128();
409 *t6 = newTempV128();
410 *t7 = newTempV128();
411}
412
sewardjbbcf1882014-01-12 12:49:10 +0000413//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
414//ZZ IRRoundingMode. */
415//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
416//ZZ {
417//ZZ return mkU32(Irrm_NEAREST);
418//ZZ }
419//ZZ
420//ZZ /* Generate an expression for SRC rotated right by ROT. */
421//ZZ static IRExpr* genROR32( IRTemp src, Int rot )
422//ZZ {
423//ZZ vassert(rot >= 0 && rot < 32);
424//ZZ if (rot == 0)
425//ZZ return mkexpr(src);
426//ZZ return
427//ZZ binop(Iop_Or32,
428//ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
429//ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
430//ZZ }
431//ZZ
432//ZZ static IRExpr* mkU128 ( ULong i )
433//ZZ {
434//ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
435//ZZ }
436//ZZ
437//ZZ /* Generate a 4-aligned version of the given expression if
438//ZZ the given condition is true. Else return it unchanged. */
439//ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
440//ZZ {
441//ZZ if (b)
442//ZZ return binop(Iop_And32, e, mkU32(~3));
443//ZZ else
444//ZZ return e;
445//ZZ }
446
447/* Other IR construction helpers. */
448static IROp mkAND ( IRType ty ) {
449 switch (ty) {
450 case Ity_I32: return Iop_And32;
451 case Ity_I64: return Iop_And64;
452 default: vpanic("mkAND");
453 }
454}
455
456static IROp mkOR ( IRType ty ) {
457 switch (ty) {
458 case Ity_I32: return Iop_Or32;
459 case Ity_I64: return Iop_Or64;
460 default: vpanic("mkOR");
461 }
462}
463
464static IROp mkXOR ( IRType ty ) {
465 switch (ty) {
466 case Ity_I32: return Iop_Xor32;
467 case Ity_I64: return Iop_Xor64;
468 default: vpanic("mkXOR");
469 }
470}
471
472static IROp mkSHL ( IRType ty ) {
473 switch (ty) {
474 case Ity_I32: return Iop_Shl32;
475 case Ity_I64: return Iop_Shl64;
476 default: vpanic("mkSHL");
477 }
478}
479
480static IROp mkSHR ( IRType ty ) {
481 switch (ty) {
482 case Ity_I32: return Iop_Shr32;
483 case Ity_I64: return Iop_Shr64;
484 default: vpanic("mkSHR");
485 }
486}
487
488static IROp mkSAR ( IRType ty ) {
489 switch (ty) {
490 case Ity_I32: return Iop_Sar32;
491 case Ity_I64: return Iop_Sar64;
492 default: vpanic("mkSAR");
493 }
494}
495
496static IROp mkNOT ( IRType ty ) {
497 switch (ty) {
498 case Ity_I32: return Iop_Not32;
499 case Ity_I64: return Iop_Not64;
500 default: vpanic("mkNOT");
501 }
502}
503
504static IROp mkADD ( IRType ty ) {
505 switch (ty) {
506 case Ity_I32: return Iop_Add32;
507 case Ity_I64: return Iop_Add64;
508 default: vpanic("mkADD");
509 }
510}
511
512static IROp mkSUB ( IRType ty ) {
513 switch (ty) {
514 case Ity_I32: return Iop_Sub32;
515 case Ity_I64: return Iop_Sub64;
516 default: vpanic("mkSUB");
517 }
518}
519
520static IROp mkADDF ( IRType ty ) {
521 switch (ty) {
522 case Ity_F32: return Iop_AddF32;
523 case Ity_F64: return Iop_AddF64;
524 default: vpanic("mkADDF");
525 }
526}
527
528static IROp mkSUBF ( IRType ty ) {
529 switch (ty) {
530 case Ity_F32: return Iop_SubF32;
531 case Ity_F64: return Iop_SubF64;
532 default: vpanic("mkSUBF");
533 }
534}
535
536static IROp mkMULF ( IRType ty ) {
537 switch (ty) {
538 case Ity_F32: return Iop_MulF32;
539 case Ity_F64: return Iop_MulF64;
540 default: vpanic("mkMULF");
541 }
542}
543
544static IROp mkDIVF ( IRType ty ) {
545 switch (ty) {
546 case Ity_F32: return Iop_DivF32;
547 case Ity_F64: return Iop_DivF64;
548 default: vpanic("mkMULF");
549 }
550}
551
552static IROp mkNEGF ( IRType ty ) {
553 switch (ty) {
554 case Ity_F32: return Iop_NegF32;
555 case Ity_F64: return Iop_NegF64;
556 default: vpanic("mkNEGF");
557 }
558}
559
560static IROp mkABSF ( IRType ty ) {
561 switch (ty) {
562 case Ity_F32: return Iop_AbsF32;
563 case Ity_F64: return Iop_AbsF64;
564 default: vpanic("mkNEGF");
565 }
566}
567
568static IROp mkSQRTF ( IRType ty ) {
569 switch (ty) {
570 case Ity_F32: return Iop_SqrtF32;
571 case Ity_F64: return Iop_SqrtF64;
572 default: vpanic("mkNEGF");
573 }
574}
575
sewardja5a6b752014-06-30 07:33:56 +0000576static IROp mkVecADD ( UInt size ) {
577 const IROp ops[4]
578 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
579 vassert(size < 4);
580 return ops[size];
581}
582
583static IROp mkVecQADDU ( UInt size ) {
584 const IROp ops[4]
585 = { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
586 vassert(size < 4);
587 return ops[size];
588}
589
590static IROp mkVecQADDS ( UInt size ) {
591 const IROp ops[4]
592 = { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
593 vassert(size < 4);
594 return ops[size];
595}
596
sewardjf7003bc2014-08-18 12:28:02 +0000597static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
598 const IROp ops[4]
599 = { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
600 Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
601 vassert(size < 4);
602 return ops[size];
603}
604
605static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
606 const IROp ops[4]
607 = { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
608 Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
609 vassert(size < 4);
610 return ops[size];
611}
612
sewardja5a6b752014-06-30 07:33:56 +0000613static IROp mkVecSUB ( UInt size ) {
614 const IROp ops[4]
615 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
616 vassert(size < 4);
617 return ops[size];
618}
619
620static IROp mkVecQSUBU ( UInt size ) {
621 const IROp ops[4]
622 = { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
623 vassert(size < 4);
624 return ops[size];
625}
626
627static IROp mkVecQSUBS ( UInt size ) {
628 const IROp ops[4]
629 = { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
630 vassert(size < 4);
631 return ops[size];
632}
633
634static IROp mkVecSARN ( UInt size ) {
635 const IROp ops[4]
636 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
637 vassert(size < 4);
638 return ops[size];
639}
640
641static IROp mkVecSHRN ( UInt size ) {
642 const IROp ops[4]
643 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
644 vassert(size < 4);
645 return ops[size];
646}
647
648static IROp mkVecSHLN ( UInt size ) {
649 const IROp ops[4]
650 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
651 vassert(size < 4);
652 return ops[size];
653}
654
655static IROp mkVecCATEVENLANES ( UInt size ) {
656 const IROp ops[4]
657 = { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
658 Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
659 vassert(size < 4);
660 return ops[size];
661}
662
663static IROp mkVecCATODDLANES ( UInt size ) {
664 const IROp ops[4]
665 = { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
666 Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
667 vassert(size < 4);
668 return ops[size];
669}
670
sewardj487559e2014-07-10 14:22:45 +0000671static IROp mkVecINTERLEAVELO ( UInt size ) {
672 const IROp ops[4]
673 = { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
674 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
675 vassert(size < 4);
676 return ops[size];
677}
678
679static IROp mkVecINTERLEAVEHI ( UInt size ) {
680 const IROp ops[4]
681 = { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
682 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
683 vassert(size < 4);
684 return ops[size];
685}
686
sewardja5a6b752014-06-30 07:33:56 +0000687static IROp mkVecMAXU ( UInt size ) {
688 const IROp ops[4]
689 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
690 vassert(size < 4);
691 return ops[size];
692}
693
694static IROp mkVecMAXS ( UInt size ) {
695 const IROp ops[4]
696 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
697 vassert(size < 4);
698 return ops[size];
699}
700
701static IROp mkVecMINU ( UInt size ) {
702 const IROp ops[4]
703 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
704 vassert(size < 4);
705 return ops[size];
706}
707
708static IROp mkVecMINS ( UInt size ) {
709 const IROp ops[4]
710 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
711 vassert(size < 4);
712 return ops[size];
713}
714
sewardj487559e2014-07-10 14:22:45 +0000715static IROp mkVecMUL ( UInt size ) {
716 const IROp ops[4]
717 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
718 vassert(size < 3);
719 return ops[size];
720}
721
722static IROp mkVecMULLU ( UInt sizeNarrow ) {
723 const IROp ops[4]
724 = { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
725 vassert(sizeNarrow < 3);
726 return ops[sizeNarrow];
727}
728
729static IROp mkVecMULLS ( UInt sizeNarrow ) {
730 const IROp ops[4]
731 = { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
732 vassert(sizeNarrow < 3);
733 return ops[sizeNarrow];
734}
735
sewardj51d012a2014-07-21 09:19:50 +0000736static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
737 const IROp ops[4]
738 = { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
739 vassert(sizeNarrow < 3);
740 return ops[sizeNarrow];
741}
742
sewardj8e91fd42014-07-11 12:05:47 +0000743static IROp mkVecCMPEQ ( UInt size ) {
744 const IROp ops[4]
745 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
746 vassert(size < 4);
747 return ops[size];
748}
749
750static IROp mkVecCMPGTU ( UInt size ) {
751 const IROp ops[4]
752 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
753 vassert(size < 4);
754 return ops[size];
755}
756
757static IROp mkVecCMPGTS ( UInt size ) {
758 const IROp ops[4]
759 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
760 vassert(size < 4);
761 return ops[size];
762}
763
764static IROp mkVecABS ( UInt size ) {
765 const IROp ops[4]
766 = { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
767 vassert(size < 4);
768 return ops[size];
769}
770
771static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
772 const IROp ops[4]
773 = { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
774 Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 };
775 vassert(size < 4);
776 return ops[size];
777}
778
sewardjbbcf1882014-01-12 12:49:10 +0000779static IRExpr* mkU ( IRType ty, ULong imm ) {
780 switch (ty) {
781 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
782 case Ity_I64: return mkU64(imm);
783 default: vpanic("mkU");
784 }
785}
786
sewardj54ffa1d2014-07-22 09:27:49 +0000787static IROp mkVecQDMULHIS ( UInt size ) {
788 const IROp ops[4]
789 = { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
790 vassert(size < 4);
791 return ops[size];
792}
793
794static IROp mkVecQRDMULHIS ( UInt size ) {
795 const IROp ops[4]
796 = { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
797 vassert(size < 4);
798 return ops[size];
799}
800
sewardjecedd982014-08-11 14:02:47 +0000801static IROp mkVecQANDUQSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000802 const IROp ops[4]
803 = { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
804 Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
805 vassert(size < 4);
806 return ops[size];
807}
808
sewardjecedd982014-08-11 14:02:47 +0000809static IROp mkVecQANDSQSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000810 const IROp ops[4]
811 = { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
812 Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
813 vassert(size < 4);
814 return ops[size];
815}
816
sewardjecedd982014-08-11 14:02:47 +0000817static IROp mkVecQANDUQRSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000818 const IROp ops[4]
819 = { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
820 Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
821 vassert(size < 4);
822 return ops[size];
823}
824
sewardjecedd982014-08-11 14:02:47 +0000825static IROp mkVecQANDSQRSH ( UInt size ) {
sewardj12972182014-08-04 08:09:47 +0000826 const IROp ops[4]
827 = { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
828 Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
829 vassert(size < 4);
830 return ops[size];
831}
832
sewardja6b61f02014-08-17 18:32:14 +0000833static IROp mkVecSHU ( UInt size ) {
834 const IROp ops[4]
835 = { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
836 vassert(size < 4);
837 return ops[size];
838}
839
840static IROp mkVecSHS ( UInt size ) {
841 const IROp ops[4]
842 = { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
843 vassert(size < 4);
844 return ops[size];
845}
846
847static IROp mkVecRSHU ( UInt size ) {
848 const IROp ops[4]
849 = { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
850 vassert(size < 4);
851 return ops[size];
852}
853
854static IROp mkVecRSHS ( UInt size ) {
855 const IROp ops[4]
856 = { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
857 vassert(size < 4);
858 return ops[size];
859}
860
sewardjecedd982014-08-11 14:02:47 +0000861static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
862 const IROp ops[4]
863 = { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
864 Iop_NarrowUn64to32x2, Iop_INVALID };
865 vassert(sizeNarrow < 4);
866 return ops[sizeNarrow];
867}
868
869static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
870 const IROp ops[4]
871 = { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4,
872 Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
873 vassert(sizeNarrow < 4);
874 return ops[sizeNarrow];
875}
876
877static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
878 const IROp ops[4]
879 = { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4,
880 Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
881 vassert(sizeNarrow < 4);
882 return ops[sizeNarrow];
883}
884
885static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
886 const IROp ops[4]
887 = { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4,
888 Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
889 vassert(sizeNarrow < 4);
890 return ops[sizeNarrow];
891}
892
893static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
894 const IROp ops[4]
895 = { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
896 Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
897 vassert(sizeNarrow < 4);
898 return ops[sizeNarrow];
899}
900
901static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
902 const IROp ops[4]
903 = { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4,
904 Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
905 vassert(sizeNarrow < 4);
906 return ops[sizeNarrow];
907}
908
909static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
910 const IROp ops[4]
911 = { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4,
912 Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
913 vassert(sizeNarrow < 4);
914 return ops[sizeNarrow];
915}
916
917static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
918 const IROp ops[4]
919 = { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4,
920 Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
921 vassert(sizeNarrow < 4);
922 return ops[sizeNarrow];
923}
924
925static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
926 const IROp ops[4]
927 = { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4,
928 Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
929 vassert(sizeNarrow < 4);
930 return ops[sizeNarrow];
931}
932
933static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
934 const IROp ops[4]
935 = { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4,
936 Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
937 vassert(sizeNarrow < 4);
938 return ops[sizeNarrow];
939}
940
sewardj1dd3ec12014-08-15 09:11:08 +0000941static IROp mkVecQSHLNSATUU ( UInt size ) {
sewardja97dddf2014-08-14 22:26:52 +0000942 const IROp ops[4]
sewardj1dd3ec12014-08-15 09:11:08 +0000943 = { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
944 Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
sewardja97dddf2014-08-14 22:26:52 +0000945 vassert(size < 4);
946 return ops[size];
947}
948
sewardj1dd3ec12014-08-15 09:11:08 +0000949static IROp mkVecQSHLNSATSS ( UInt size ) {
sewardja97dddf2014-08-14 22:26:52 +0000950 const IROp ops[4]
sewardj1dd3ec12014-08-15 09:11:08 +0000951 = { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
952 Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
sewardja97dddf2014-08-14 22:26:52 +0000953 vassert(size < 4);
954 return ops[size];
955}
956
sewardj1dd3ec12014-08-15 09:11:08 +0000957static IROp mkVecQSHLNSATSU ( UInt size ) {
sewardja97dddf2014-08-14 22:26:52 +0000958 const IROp ops[4]
sewardj1dd3ec12014-08-15 09:11:08 +0000959 = { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
960 Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
sewardja97dddf2014-08-14 22:26:52 +0000961 vassert(size < 4);
962 return ops[size];
963}
964
sewardj76927e62014-11-17 11:21:21 +0000965static IROp mkVecADDF ( UInt size ) {
966 const IROp ops[4]
967 = { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 };
968 vassert(size < 4);
969 return ops[size];
970}
971
972static IROp mkVecMAXF ( UInt size ) {
973 const IROp ops[4]
974 = { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 };
975 vassert(size < 4);
976 return ops[size];
977}
978
979static IROp mkVecMINF ( UInt size ) {
980 const IROp ops[4]
981 = { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 };
982 vassert(size < 4);
983 return ops[size];
984}
sewardja97dddf2014-08-14 22:26:52 +0000985
sewardjbbcf1882014-01-12 12:49:10 +0000986/* Generate IR to create 'arg rotated right by imm', for sane values
987 of 'ty' and 'imm'. */
988static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
989{
990 UInt w = 0;
991 if (ty == Ity_I64) {
992 w = 64;
993 } else {
994 vassert(ty == Ity_I32);
995 w = 32;
996 }
997 vassert(w != 0);
998 vassert(imm < w);
999 if (imm == 0) {
1000 return arg;
1001 }
1002 IRTemp res = newTemp(ty);
1003 assign(res, binop(mkOR(ty),
1004 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
1005 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
1006 return res;
1007}
1008
1009/* Generate IR to set the returned temp to either all-zeroes or
1010 all ones, as a copy of arg<imm>. */
1011static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
1012{
1013 UInt w = 0;
1014 if (ty == Ity_I64) {
1015 w = 64;
1016 } else {
1017 vassert(ty == Ity_I32);
1018 w = 32;
1019 }
1020 vassert(w != 0);
1021 vassert(imm < w);
1022 IRTemp res = newTemp(ty);
1023 assign(res, binop(mkSAR(ty),
1024 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
1025 mkU8(w - 1)));
1026 return res;
1027}
1028
sewardj7d009132014-02-20 17:43:38 +00001029/* U-widen 8/16/32/64 bit int expr to 64. */
1030static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
1031{
1032 switch (srcTy) {
1033 case Ity_I64: return e;
1034 case Ity_I32: return unop(Iop_32Uto64, e);
1035 case Ity_I16: return unop(Iop_16Uto64, e);
1036 case Ity_I8: return unop(Iop_8Uto64, e);
1037 default: vpanic("widenUto64(arm64)");
1038 }
1039}
1040
1041/* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
1042 of these combinations make sense. */
1043static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
1044{
1045 switch (dstTy) {
1046 case Ity_I64: return e;
1047 case Ity_I32: return unop(Iop_64to32, e);
1048 case Ity_I16: return unop(Iop_64to16, e);
1049 case Ity_I8: return unop(Iop_64to8, e);
1050 default: vpanic("narrowFrom64(arm64)");
1051 }
1052}
1053
sewardjbbcf1882014-01-12 12:49:10 +00001054
1055/*------------------------------------------------------------*/
1056/*--- Helpers for accessing guest registers. ---*/
1057/*------------------------------------------------------------*/
1058
1059#define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
1060#define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
1061#define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
1062#define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
1063#define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
1064#define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
1065#define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
1066#define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
1067#define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
1068#define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
1069#define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
1070#define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
1071#define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
1072#define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
1073#define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
1074#define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
1075#define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
1076#define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
1077#define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
1078#define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
1079#define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
1080#define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
1081#define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
1082#define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
1083#define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
1084#define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
1085#define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
1086#define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
1087#define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
1088#define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
1089#define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
1090
sewardj60687882014-01-15 10:25:21 +00001091#define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
sewardjbbcf1882014-01-12 12:49:10 +00001092#define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
1093
1094#define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
1095#define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
1096#define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
1097#define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
1098
1099#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
1100#define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
1101
1102#define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
1103#define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
1104#define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
1105#define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
1106#define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
1107#define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
1108#define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
1109#define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
1110#define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
1111#define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
1112#define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
1113#define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
1114#define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
1115#define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
1116#define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
1117#define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
1118#define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
1119#define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
1120#define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
1121#define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
1122#define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
1123#define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
1124#define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
1125#define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
1126#define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
1127#define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
1128#define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
1129#define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
1130#define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
1131#define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
1132#define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
1133#define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
1134
1135#define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
sewardja0645d52014-06-28 22:11:16 +00001136#define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
sewardjbbcf1882014-01-12 12:49:10 +00001137
sewardj05f5e012014-05-04 10:52:11 +00001138#define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
1139#define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
sewardjbbcf1882014-01-12 12:49:10 +00001140
1141
1142/* ---------------- Integer registers ---------------- */
1143
1144static Int offsetIReg64 ( UInt iregNo )
1145{
1146 /* Do we care about endianness here? We do if sub-parts of integer
1147 registers are accessed. */
1148 switch (iregNo) {
1149 case 0: return OFFB_X0;
1150 case 1: return OFFB_X1;
1151 case 2: return OFFB_X2;
1152 case 3: return OFFB_X3;
1153 case 4: return OFFB_X4;
1154 case 5: return OFFB_X5;
1155 case 6: return OFFB_X6;
1156 case 7: return OFFB_X7;
1157 case 8: return OFFB_X8;
1158 case 9: return OFFB_X9;
1159 case 10: return OFFB_X10;
1160 case 11: return OFFB_X11;
1161 case 12: return OFFB_X12;
1162 case 13: return OFFB_X13;
1163 case 14: return OFFB_X14;
1164 case 15: return OFFB_X15;
1165 case 16: return OFFB_X16;
1166 case 17: return OFFB_X17;
1167 case 18: return OFFB_X18;
1168 case 19: return OFFB_X19;
1169 case 20: return OFFB_X20;
1170 case 21: return OFFB_X21;
1171 case 22: return OFFB_X22;
1172 case 23: return OFFB_X23;
1173 case 24: return OFFB_X24;
1174 case 25: return OFFB_X25;
1175 case 26: return OFFB_X26;
1176 case 27: return OFFB_X27;
1177 case 28: return OFFB_X28;
1178 case 29: return OFFB_X29;
1179 case 30: return OFFB_X30;
1180 /* but not 31 */
1181 default: vassert(0);
1182 }
1183}
1184
1185static Int offsetIReg64orSP ( UInt iregNo )
1186{
sewardj60687882014-01-15 10:25:21 +00001187 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
sewardjbbcf1882014-01-12 12:49:10 +00001188}
1189
1190static const HChar* nameIReg64orZR ( UInt iregNo )
1191{
1192 vassert(iregNo < 32);
1193 static const HChar* names[32]
1194 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
1195 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
1196 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
1197 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
1198 return names[iregNo];
1199}
1200
1201static const HChar* nameIReg64orSP ( UInt iregNo )
1202{
1203 if (iregNo == 31) {
1204 return "sp";
1205 }
1206 vassert(iregNo < 31);
1207 return nameIReg64orZR(iregNo);
1208}
1209
1210static IRExpr* getIReg64orSP ( UInt iregNo )
1211{
1212 vassert(iregNo < 32);
1213 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1214}
1215
1216static IRExpr* getIReg64orZR ( UInt iregNo )
1217{
1218 if (iregNo == 31) {
1219 return mkU64(0);
1220 }
1221 vassert(iregNo < 31);
1222 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
1223}
1224
1225static void putIReg64orSP ( UInt iregNo, IRExpr* e )
1226{
1227 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1228 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1229}
1230
1231static void putIReg64orZR ( UInt iregNo, IRExpr* e )
1232{
1233 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1234 if (iregNo == 31) {
1235 return;
1236 }
1237 vassert(iregNo < 31);
1238 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
1239}
1240
1241static const HChar* nameIReg32orZR ( UInt iregNo )
1242{
1243 vassert(iregNo < 32);
1244 static const HChar* names[32]
1245 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
1246 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
1247 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
1248 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
1249 return names[iregNo];
1250}
1251
1252static const HChar* nameIReg32orSP ( UInt iregNo )
1253{
1254 if (iregNo == 31) {
1255 return "wsp";
1256 }
1257 vassert(iregNo < 31);
1258 return nameIReg32orZR(iregNo);
1259}
1260
1261static IRExpr* getIReg32orSP ( UInt iregNo )
1262{
1263 vassert(iregNo < 32);
1264 return unop(Iop_64to32,
1265 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1266}
1267
1268static IRExpr* getIReg32orZR ( UInt iregNo )
1269{
1270 if (iregNo == 31) {
1271 return mkU32(0);
1272 }
1273 vassert(iregNo < 31);
1274 return unop(Iop_64to32,
1275 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
1276}
1277
1278static void putIReg32orSP ( UInt iregNo, IRExpr* e )
1279{
1280 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1281 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1282}
1283
1284static void putIReg32orZR ( UInt iregNo, IRExpr* e )
1285{
1286 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1287 if (iregNo == 31) {
1288 return;
1289 }
1290 vassert(iregNo < 31);
1291 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
1292}
1293
1294static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
1295{
1296 vassert(is64 == True || is64 == False);
1297 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
1298}
1299
1300static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
1301{
1302 vassert(is64 == True || is64 == False);
1303 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
1304}
1305
1306static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
1307{
1308 vassert(is64 == True || is64 == False);
1309 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
1310}
1311
1312static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
1313{
1314 vassert(is64 == True || is64 == False);
1315 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
1316}
1317
1318static void putPC ( IRExpr* e )
1319{
1320 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
1321 stmt( IRStmt_Put(OFFB_PC, e) );
1322}
1323
1324
1325/* ---------------- Vector (Q) registers ---------------- */
1326
1327static Int offsetQReg128 ( UInt qregNo )
1328{
1329 /* We don't care about endianness at this point. It only becomes
1330 relevant when dealing with sections of these registers.*/
1331 switch (qregNo) {
1332 case 0: return OFFB_Q0;
1333 case 1: return OFFB_Q1;
1334 case 2: return OFFB_Q2;
1335 case 3: return OFFB_Q3;
1336 case 4: return OFFB_Q4;
1337 case 5: return OFFB_Q5;
1338 case 6: return OFFB_Q6;
1339 case 7: return OFFB_Q7;
1340 case 8: return OFFB_Q8;
1341 case 9: return OFFB_Q9;
1342 case 10: return OFFB_Q10;
1343 case 11: return OFFB_Q11;
1344 case 12: return OFFB_Q12;
1345 case 13: return OFFB_Q13;
1346 case 14: return OFFB_Q14;
1347 case 15: return OFFB_Q15;
1348 case 16: return OFFB_Q16;
1349 case 17: return OFFB_Q17;
1350 case 18: return OFFB_Q18;
1351 case 19: return OFFB_Q19;
1352 case 20: return OFFB_Q20;
1353 case 21: return OFFB_Q21;
1354 case 22: return OFFB_Q22;
1355 case 23: return OFFB_Q23;
1356 case 24: return OFFB_Q24;
1357 case 25: return OFFB_Q25;
1358 case 26: return OFFB_Q26;
1359 case 27: return OFFB_Q27;
1360 case 28: return OFFB_Q28;
1361 case 29: return OFFB_Q29;
1362 case 30: return OFFB_Q30;
1363 case 31: return OFFB_Q31;
1364 default: vassert(0);
1365 }
1366}
1367
sewardjbbcf1882014-01-12 12:49:10 +00001368/* Write to a complete Qreg. */
1369static void putQReg128 ( UInt qregNo, IRExpr* e )
1370{
1371 vassert(qregNo < 32);
1372 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
1373 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
1374}
1375
1376/* Read a complete Qreg. */
1377static IRExpr* getQReg128 ( UInt qregNo )
1378{
1379 vassert(qregNo < 32);
1380 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
1381}
1382
1383/* Produce the IR type for some sub-part of a vector. For 32- and 64-
1384 bit sub-parts we can choose either integer or float types, and
1385 choose float on the basis that that is the common use case and so
1386 will give least interference with Put-to-Get forwarding later
1387 on. */
1388static IRType preferredVectorSubTypeFromSize ( UInt szB )
1389{
1390 switch (szB) {
1391 case 1: return Ity_I8;
1392 case 2: return Ity_I16;
1393 case 4: return Ity_I32; //Ity_F32;
1394 case 8: return Ity_F64;
1395 case 16: return Ity_V128;
1396 default: vassert(0);
1397 }
1398}
1399
sewardj606c4ba2014-01-26 19:11:14 +00001400/* Find the offset of the laneNo'th lane of type laneTy in the given
1401 Qreg. Since the host is little-endian, the least significant lane
1402 has the lowest offset. */
1403static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
sewardjbbcf1882014-01-12 12:49:10 +00001404{
sewardj9b769162014-07-24 12:42:03 +00001405 vassert(host_endness == VexEndnessLE);
sewardjbbcf1882014-01-12 12:49:10 +00001406 Int base = offsetQReg128(qregNo);
sewardj606c4ba2014-01-26 19:11:14 +00001407 /* Since the host is little-endian, the least significant lane
1408 will be at the lowest address. */
1409 /* Restrict this to known types, so as to avoid silently accepting
1410 stupid types. */
1411 UInt laneSzB = 0;
1412 switch (laneTy) {
sewardj5860ec72014-03-01 11:19:45 +00001413 case Ity_I8: laneSzB = 1; break;
1414 case Ity_I16: laneSzB = 2; break;
sewardj606c4ba2014-01-26 19:11:14 +00001415 case Ity_F32: case Ity_I32: laneSzB = 4; break;
1416 case Ity_F64: case Ity_I64: laneSzB = 8; break;
1417 case Ity_V128: laneSzB = 16; break;
1418 default: break;
sewardjbbcf1882014-01-12 12:49:10 +00001419 }
sewardj606c4ba2014-01-26 19:11:14 +00001420 vassert(laneSzB > 0);
1421 UInt minOff = laneNo * laneSzB;
1422 UInt maxOff = minOff + laneSzB - 1;
1423 vassert(maxOff < 16);
1424 return base + minOff;
sewardjbbcf1882014-01-12 12:49:10 +00001425}
1426
sewardj606c4ba2014-01-26 19:11:14 +00001427/* Put to the least significant lane of a Qreg. */
1428static void putQRegLO ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +00001429{
1430 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +00001431 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +00001432 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001433 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
1434 case Ity_F32: case Ity_F64: case Ity_V128:
1435 break;
1436 default:
1437 vassert(0); // Other cases are probably invalid
sewardjbbcf1882014-01-12 12:49:10 +00001438 }
1439 stmt(IRStmt_Put(off, e));
1440}
1441
sewardj606c4ba2014-01-26 19:11:14 +00001442/* Get from the least significant lane of a Qreg. */
1443static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
sewardjbbcf1882014-01-12 12:49:10 +00001444{
sewardj606c4ba2014-01-26 19:11:14 +00001445 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +00001446 switch (ty) {
sewardjb3553472014-05-15 16:49:21 +00001447 case Ity_I8:
1448 case Ity_I16:
sewardj606c4ba2014-01-26 19:11:14 +00001449 case Ity_I32: case Ity_I64:
1450 case Ity_F32: case Ity_F64: case Ity_V128:
1451 break;
1452 default:
1453 vassert(0); // Other cases are ATC
sewardjbbcf1882014-01-12 12:49:10 +00001454 }
1455 return IRExpr_Get(off, ty);
1456}
1457
sewardj606c4ba2014-01-26 19:11:14 +00001458static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
sewardjbbcf1882014-01-12 12:49:10 +00001459{
1460 static const HChar* namesQ[32]
1461 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1462 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1463 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1464 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1465 static const HChar* namesD[32]
1466 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1467 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1468 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1469 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1470 static const HChar* namesS[32]
1471 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1472 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1473 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1474 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1475 static const HChar* namesH[32]
1476 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1477 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1478 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1479 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1480 static const HChar* namesB[32]
1481 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1482 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1483 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1484 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1485 vassert(qregNo < 32);
sewardj606c4ba2014-01-26 19:11:14 +00001486 switch (sizeofIRType(laneTy)) {
sewardjbbcf1882014-01-12 12:49:10 +00001487 case 1: return namesB[qregNo];
1488 case 2: return namesH[qregNo];
1489 case 4: return namesS[qregNo];
1490 case 8: return namesD[qregNo];
1491 case 16: return namesQ[qregNo];
1492 default: vassert(0);
1493 }
1494 /*NOTREACHED*/
1495}
1496
sewardj606c4ba2014-01-26 19:11:14 +00001497static const HChar* nameQReg128 ( UInt qregNo )
1498{
1499 return nameQRegLO(qregNo, Ity_V128);
1500}
1501
sewardjbbcf1882014-01-12 12:49:10 +00001502/* Find the offset of the most significant half (8 bytes) of the given
1503 Qreg. This requires knowing the endianness of the host. */
sewardj606c4ba2014-01-26 19:11:14 +00001504static Int offsetQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001505{
sewardj606c4ba2014-01-26 19:11:14 +00001506 return offsetQRegLane(qregNo, Ity_I64, 1);
sewardjbbcf1882014-01-12 12:49:10 +00001507}
1508
sewardj606c4ba2014-01-26 19:11:14 +00001509static IRExpr* getQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001510{
sewardj606c4ba2014-01-26 19:11:14 +00001511 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
sewardjbbcf1882014-01-12 12:49:10 +00001512}
1513
sewardj606c4ba2014-01-26 19:11:14 +00001514static void putQRegHI64 ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +00001515{
1516 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +00001517 Int off = offsetQRegHI64(qregNo);
sewardjbbcf1882014-01-12 12:49:10 +00001518 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001519 case Ity_I64: case Ity_F64:
1520 break;
1521 default:
1522 vassert(0); // Other cases are plain wrong
sewardjbbcf1882014-01-12 12:49:10 +00001523 }
1524 stmt(IRStmt_Put(off, e));
1525}
1526
sewardj606c4ba2014-01-26 19:11:14 +00001527/* Put to a specified lane of a Qreg. */
1528static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1529{
1530 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1531 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1532 switch (laneTy) {
1533 case Ity_F64: case Ity_I64:
sewardj32d86752014-03-02 12:47:18 +00001534 case Ity_I32: case Ity_F32:
sewardj5860ec72014-03-01 11:19:45 +00001535 case Ity_I16:
1536 case Ity_I8:
sewardj606c4ba2014-01-26 19:11:14 +00001537 break;
1538 default:
1539 vassert(0); // Other cases are ATC
1540 }
1541 stmt(IRStmt_Put(off, e));
1542}
1543
sewardj32d86752014-03-02 12:47:18 +00001544/* Get from a specified lane of a Qreg. */
sewardj606c4ba2014-01-26 19:11:14 +00001545static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1546{
1547 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1548 switch (laneTy) {
sewardj32d86752014-03-02 12:47:18 +00001549 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
sewardj85fbb022014-06-12 13:16:01 +00001550 case Ity_F64: case Ity_F32:
sewardj606c4ba2014-01-26 19:11:14 +00001551 break;
1552 default:
1553 vassert(0); // Other cases are ATC
1554 }
1555 return IRExpr_Get(off, laneTy);
1556}
1557
1558
sewardjbbcf1882014-01-12 12:49:10 +00001559//ZZ /* ---------------- Misc registers ---------------- */
1560//ZZ
1561//ZZ static void putMiscReg32 ( UInt gsoffset,
1562//ZZ IRExpr* e, /* :: Ity_I32 */
1563//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1564//ZZ {
1565//ZZ switch (gsoffset) {
1566//ZZ case OFFB_FPSCR: break;
1567//ZZ case OFFB_QFLAG32: break;
1568//ZZ case OFFB_GEFLAG0: break;
1569//ZZ case OFFB_GEFLAG1: break;
1570//ZZ case OFFB_GEFLAG2: break;
1571//ZZ case OFFB_GEFLAG3: break;
1572//ZZ default: vassert(0); /* awaiting more cases */
1573//ZZ }
1574//ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1575//ZZ
1576//ZZ if (guardT == IRTemp_INVALID) {
1577//ZZ /* unconditional write */
1578//ZZ stmt(IRStmt_Put(gsoffset, e));
1579//ZZ } else {
1580//ZZ stmt(IRStmt_Put(
1581//ZZ gsoffset,
1582//ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1583//ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1584//ZZ ));
1585//ZZ }
1586//ZZ }
1587//ZZ
1588//ZZ static IRTemp get_ITSTATE ( void )
1589//ZZ {
1590//ZZ ASSERT_IS_THUMB;
1591//ZZ IRTemp t = newTemp(Ity_I32);
1592//ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1593//ZZ return t;
1594//ZZ }
1595//ZZ
1596//ZZ static void put_ITSTATE ( IRTemp t )
1597//ZZ {
1598//ZZ ASSERT_IS_THUMB;
1599//ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1600//ZZ }
1601//ZZ
1602//ZZ static IRTemp get_QFLAG32 ( void )
1603//ZZ {
1604//ZZ IRTemp t = newTemp(Ity_I32);
1605//ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1606//ZZ return t;
1607//ZZ }
1608//ZZ
1609//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1610//ZZ {
1611//ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1612//ZZ }
1613//ZZ
1614//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1615//ZZ Status Register) to indicate that overflow or saturation occurred.
1616//ZZ Nb: t must be zero to denote no saturation, and any nonzero
1617//ZZ value to indicate saturation. */
1618//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1619//ZZ {
1620//ZZ IRTemp old = get_QFLAG32();
1621//ZZ IRTemp nyu = newTemp(Ity_I32);
1622//ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1623//ZZ put_QFLAG32(nyu, condT);
1624//ZZ }
1625
1626
1627/* ---------------- FPCR stuff ---------------- */
1628
1629/* Generate IR to get hold of the rounding mode bits in FPCR, and
1630 convert them to IR format. Bind the final result to the
1631 returned temp. */
1632static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1633{
1634 /* The ARMvfp encoding for rounding mode bits is:
1635 00 to nearest
1636 01 to +infinity
1637 10 to -infinity
1638 11 to zero
1639 We need to convert that to the IR encoding:
1640 00 to nearest (the default)
1641 10 to +infinity
1642 01 to -infinity
1643 11 to zero
1644 Which can be done by swapping bits 0 and 1.
1645 The rmode bits are at 23:22 in FPSCR.
1646 */
1647 IRTemp armEncd = newTemp(Ity_I32);
1648 IRTemp swapped = newTemp(Ity_I32);
1649 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1650 we don't zero out bits 24 and above, since the assignment to
1651 'swapped' will mask them out anyway. */
1652 assign(armEncd,
1653 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1654 /* Now swap them. */
1655 assign(swapped,
1656 binop(Iop_Or32,
1657 binop(Iop_And32,
1658 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1659 mkU32(2)),
1660 binop(Iop_And32,
1661 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1662 mkU32(1))
1663 ));
1664 return swapped;
1665}
1666
1667
1668/*------------------------------------------------------------*/
1669/*--- Helpers for flag handling and conditional insns ---*/
1670/*------------------------------------------------------------*/
1671
1672static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1673{
1674 switch (cond) {
1675 case ARM64CondEQ: return "eq";
1676 case ARM64CondNE: return "ne";
1677 case ARM64CondCS: return "cs"; // or 'hs'
1678 case ARM64CondCC: return "cc"; // or 'lo'
1679 case ARM64CondMI: return "mi";
1680 case ARM64CondPL: return "pl";
1681 case ARM64CondVS: return "vs";
1682 case ARM64CondVC: return "vc";
1683 case ARM64CondHI: return "hi";
1684 case ARM64CondLS: return "ls";
1685 case ARM64CondGE: return "ge";
1686 case ARM64CondLT: return "lt";
1687 case ARM64CondGT: return "gt";
1688 case ARM64CondLE: return "le";
1689 case ARM64CondAL: return "al";
1690 case ARM64CondNV: return "nv";
1691 default: vpanic("name_ARM64Condcode");
1692 }
1693}
1694
1695/* and a handy shorthand for it */
1696static const HChar* nameCC ( ARM64Condcode cond ) {
1697 return nameARM64Condcode(cond);
1698}
1699
1700
1701/* Build IR to calculate some particular condition from stored
1702 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1703 Ity_I64, suitable for narrowing. Although the return type is
1704 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1705 :: Ity_I64 and must denote the condition to compute in
1706 bits 7:4, and be zero everywhere else.
1707*/
1708static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1709{
1710 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1711 /* And 'cond' had better produce a value in which only bits 7:4 are
1712 nonzero. However, obviously we can't assert for that. */
1713
1714 /* So what we're constructing for the first argument is
1715 "(cond << 4) | stored-operation".
1716 However, as per comments above, 'cond' must be supplied
1717 pre-shifted to this function.
1718
1719 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1720 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1721 8 bits of the first argument. */
1722 IRExpr** args
1723 = mkIRExprVec_4(
1724 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1725 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1726 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1727 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1728 );
1729 IRExpr* call
1730 = mkIRExprCCall(
1731 Ity_I64,
1732 0/*regparm*/,
1733 "arm64g_calculate_condition", &arm64g_calculate_condition,
1734 args
1735 );
1736
1737 /* Exclude the requested condition, OP and NDEP from definedness
1738 checking. We're only interested in DEP1 and DEP2. */
1739 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1740 return call;
1741}
1742
1743
1744/* Build IR to calculate some particular condition from stored
1745 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1746 Ity_I64, suitable for narrowing. Although the return type is
1747 Ity_I64, the returned value is either 0 or 1.
1748*/
1749static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1750{
1751 /* First arg is "(cond << 4) | condition". This requires that the
1752 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1753 (COND, OP) pair in the lowest 8 bits of the first argument. */
1754 vassert(cond >= 0 && cond <= 15);
1755 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1756}
1757
1758
sewardjdee30502014-06-04 13:09:44 +00001759/* Build IR to calculate just the carry flag from stored
1760 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1761 Ity_I64. */
1762static IRExpr* mk_arm64g_calculate_flag_c ( void )
1763{
1764 IRExpr** args
1765 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1766 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1767 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1768 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1769 IRExpr* call
1770 = mkIRExprCCall(
1771 Ity_I64,
1772 0/*regparm*/,
1773 "arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
1774 args
1775 );
1776 /* Exclude OP and NDEP from definedness checking. We're only
1777 interested in DEP1 and DEP2. */
1778 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1779 return call;
1780}
1781
1782
sewardjbbcf1882014-01-12 12:49:10 +00001783//ZZ /* Build IR to calculate just the overflow flag from stored
1784//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1785//ZZ Ity_I32. */
1786//ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1787//ZZ {
1788//ZZ IRExpr** args
1789//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1790//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1791//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1792//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1793//ZZ IRExpr* call
1794//ZZ = mkIRExprCCall(
1795//ZZ Ity_I32,
1796//ZZ 0/*regparm*/,
1797//ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1798//ZZ args
1799//ZZ );
1800//ZZ /* Exclude OP and NDEP from definedness checking. We're only
1801//ZZ interested in DEP1 and DEP2. */
1802//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1803//ZZ return call;
1804//ZZ }
1805
1806
1807/* Build IR to calculate N Z C V in bits 31:28 of the
1808 returned word. */
1809static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1810{
1811 IRExpr** args
1812 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1813 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1814 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1815 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1816 IRExpr* call
1817 = mkIRExprCCall(
1818 Ity_I64,
1819 0/*regparm*/,
1820 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1821 args
1822 );
1823 /* Exclude OP and NDEP from definedness checking. We're only
1824 interested in DEP1 and DEP2. */
1825 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1826 return call;
1827}
1828
1829
1830/* Build IR to set the flags thunk, in the most general case. */
1831static
1832void setFlags_D1_D2_ND ( UInt cc_op,
1833 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1834{
1835 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1836 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1837 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1838 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1839 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1840 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1841 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1842 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1843}
1844
1845/* Build IR to set the flags thunk after ADD or SUB. */
1846static
1847void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1848{
1849 IRTemp argL64 = IRTemp_INVALID;
1850 IRTemp argR64 = IRTemp_INVALID;
1851 IRTemp z64 = newTemp(Ity_I64);
1852 if (is64) {
1853 argL64 = argL;
1854 argR64 = argR;
1855 } else {
1856 argL64 = newTemp(Ity_I64);
1857 argR64 = newTemp(Ity_I64);
1858 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1859 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1860 }
1861 assign(z64, mkU64(0));
1862 UInt cc_op = ARM64G_CC_OP_NUMBER;
1863 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1864 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1865 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1866 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1867 else { vassert(0); }
1868 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1869}
1870
sewardjdee30502014-06-04 13:09:44 +00001871/* Build IR to set the flags thunk after ADC or SBC. */
1872static
1873void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
1874 IRTemp argL, IRTemp argR, IRTemp oldC )
1875{
1876 IRTemp argL64 = IRTemp_INVALID;
1877 IRTemp argR64 = IRTemp_INVALID;
1878 IRTemp oldC64 = IRTemp_INVALID;
1879 if (is64) {
1880 argL64 = argL;
1881 argR64 = argR;
1882 oldC64 = oldC;
1883 } else {
1884 argL64 = newTemp(Ity_I64);
1885 argR64 = newTemp(Ity_I64);
1886 oldC64 = newTemp(Ity_I64);
1887 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1888 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1889 assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
1890 }
1891 UInt cc_op = ARM64G_CC_OP_NUMBER;
1892 /**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; }
1893 else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
1894 else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; }
1895 else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
1896 else { vassert(0); }
1897 setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
1898}
1899
sewardjbbcf1882014-01-12 12:49:10 +00001900/* Build IR to set the flags thunk after ADD or SUB, if the given
1901 condition evaluates to True at run time. If not, the flags are set
1902 to the specified NZCV value. */
1903static
1904void setFlags_ADD_SUB_conditionally (
1905 Bool is64, Bool isSUB,
1906 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1907 )
1908{
1909 /* Generate IR as follows:
1910 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1911 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1912 CC_DEP2 = ITE(cond, argR64, 0)
1913 CC_NDEP = 0
1914 */
1915
1916 IRTemp z64 = newTemp(Ity_I64);
1917 assign(z64, mkU64(0));
1918
1919 /* Establish the operation and operands for the True case. */
1920 IRTemp t_dep1 = IRTemp_INVALID;
1921 IRTemp t_dep2 = IRTemp_INVALID;
1922 UInt t_op = ARM64G_CC_OP_NUMBER;
1923 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1924 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1925 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1926 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1927 else { vassert(0); }
1928 /* */
1929 if (is64) {
1930 t_dep1 = argL;
1931 t_dep2 = argR;
1932 } else {
1933 t_dep1 = newTemp(Ity_I64);
1934 t_dep2 = newTemp(Ity_I64);
1935 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1936 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1937 }
1938
1939 /* Establish the operation and operands for the False case. */
1940 IRTemp f_dep1 = newTemp(Ity_I64);
1941 IRTemp f_dep2 = z64;
1942 UInt f_op = ARM64G_CC_OP_COPY;
1943 assign(f_dep1, mkU64(nzcv << 28));
1944
1945 /* Final thunk values */
1946 IRTemp dep1 = newTemp(Ity_I64);
1947 IRTemp dep2 = newTemp(Ity_I64);
1948 IRTemp op = newTemp(Ity_I64);
1949
1950 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1951 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1952 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1953
1954 /* finally .. */
1955 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
1956 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1957 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1958 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1959}
1960
1961/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1962static
1963void setFlags_LOGIC ( Bool is64, IRTemp res )
1964{
1965 IRTemp res64 = IRTemp_INVALID;
1966 IRTemp z64 = newTemp(Ity_I64);
1967 UInt cc_op = ARM64G_CC_OP_NUMBER;
1968 if (is64) {
1969 res64 = res;
1970 cc_op = ARM64G_CC_OP_LOGIC64;
1971 } else {
1972 res64 = newTemp(Ity_I64);
1973 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1974 cc_op = ARM64G_CC_OP_LOGIC32;
1975 }
1976 assign(z64, mkU64(0));
1977 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1978}
1979
1980/* Build IR to set the flags thunk to a given NZCV value. NZCV is
1981 located in bits 31:28 of the supplied value. */
1982static
1983void setFlags_COPY ( IRTemp nzcv_28x0 )
1984{
1985 IRTemp z64 = newTemp(Ity_I64);
1986 assign(z64, mkU64(0));
1987 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
1988}
1989
1990
1991//ZZ /* Minor variant of the above that sets NDEP to zero (if it
1992//ZZ sets it at all) */
1993//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1994//ZZ IRTemp t_dep2,
1995//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1996//ZZ {
1997//ZZ IRTemp z32 = newTemp(Ity_I32);
1998//ZZ assign( z32, mkU32(0) );
1999//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
2000//ZZ }
2001//ZZ
2002//ZZ
2003//ZZ /* Minor variant of the above that sets DEP2 to zero (if it
2004//ZZ sets it at all) */
2005//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
2006//ZZ IRTemp t_ndep,
2007//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2008//ZZ {
2009//ZZ IRTemp z32 = newTemp(Ity_I32);
2010//ZZ assign( z32, mkU32(0) );
2011//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
2012//ZZ }
2013//ZZ
2014//ZZ
2015//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
2016//ZZ sets them at all) */
2017//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
2018//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
2019//ZZ {
2020//ZZ IRTemp z32 = newTemp(Ity_I32);
2021//ZZ assign( z32, mkU32(0) );
2022//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
2023//ZZ }
2024
2025
2026/*------------------------------------------------------------*/
2027/*--- Misc math helpers ---*/
2028/*------------------------------------------------------------*/
2029
sewardj32d86752014-03-02 12:47:18 +00002030/* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
2031static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
sewardjbbcf1882014-01-12 12:49:10 +00002032{
sewardj32d86752014-03-02 12:47:18 +00002033 IRTemp maskT = newTemp(Ity_I64);
2034 IRTemp res = newTemp(Ity_I64);
2035 vassert(sh >= 1 && sh <= 63);
2036 assign(maskT, mkU64(mask));
sewardjdc9259c2014-02-27 11:10:19 +00002037 assign( res,
sewardjbbcf1882014-01-12 12:49:10 +00002038 binop(Iop_Or64,
2039 binop(Iop_Shr64,
sewardj32d86752014-03-02 12:47:18 +00002040 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
2041 mkU8(sh)),
sewardjbbcf1882014-01-12 12:49:10 +00002042 binop(Iop_And64,
sewardj32d86752014-03-02 12:47:18 +00002043 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
2044 mkexpr(maskT))
sewardjbbcf1882014-01-12 12:49:10 +00002045 )
2046 );
sewardjdc9259c2014-02-27 11:10:19 +00002047 return res;
2048}
2049
sewardj32d86752014-03-02 12:47:18 +00002050/* Generates byte swaps within 32-bit lanes. */
2051static IRTemp math_UINTSWAP64 ( IRTemp src )
2052{
2053 IRTemp res;
2054 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2055 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2056 return res;
2057}
2058
2059/* Generates byte swaps within 16-bit lanes. */
2060static IRTemp math_USHORTSWAP64 ( IRTemp src )
2061{
2062 IRTemp res;
2063 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2064 return res;
2065}
2066
2067/* Generates a 64-bit byte swap. */
2068static IRTemp math_BYTESWAP64 ( IRTemp src )
2069{
2070 IRTemp res;
2071 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
2072 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
2073 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
2074 return res;
2075}
sewardjdc9259c2014-02-27 11:10:19 +00002076
2077/* Generates a 64-bit bit swap. */
2078static IRTemp math_BITSWAP64 ( IRTemp src )
2079{
sewardj32d86752014-03-02 12:47:18 +00002080 IRTemp res;
2081 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
2082 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
2083 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
2084 return math_BYTESWAP64(res);
sewardjbbcf1882014-01-12 12:49:10 +00002085}
2086
sewardj606c4ba2014-01-26 19:11:14 +00002087/* Duplicates the bits at the bottom of the given word to fill the
2088 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
2089 except for the bottom bits. */
2090static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
2091{
2092 if (srcTy == Ity_I8) {
2093 IRTemp t16 = newTemp(Ity_I64);
2094 assign(t16, binop(Iop_Or64, mkexpr(src),
2095 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
2096 IRTemp t32 = newTemp(Ity_I64);
2097 assign(t32, binop(Iop_Or64, mkexpr(t16),
2098 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
2099 IRTemp t64 = newTemp(Ity_I64);
2100 assign(t64, binop(Iop_Or64, mkexpr(t32),
2101 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2102 return t64;
2103 }
2104 if (srcTy == Ity_I16) {
2105 IRTemp t32 = newTemp(Ity_I64);
2106 assign(t32, binop(Iop_Or64, mkexpr(src),
2107 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
2108 IRTemp t64 = newTemp(Ity_I64);
2109 assign(t64, binop(Iop_Or64, mkexpr(t32),
2110 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
2111 return t64;
2112 }
2113 if (srcTy == Ity_I32) {
2114 IRTemp t64 = newTemp(Ity_I64);
2115 assign(t64, binop(Iop_Or64, mkexpr(src),
2116 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
2117 return t64;
2118 }
2119 if (srcTy == Ity_I64) {
2120 return src;
2121 }
2122 vassert(0);
2123}
2124
2125
sewardj18bf5172014-06-14 18:05:30 +00002126/* Duplicates the src element exactly so as to fill a V128 value. */
sewardj85fbb022014-06-12 13:16:01 +00002127static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
2128{
sewardj8e91fd42014-07-11 12:05:47 +00002129 IRTemp res = newTempV128();
sewardj85fbb022014-06-12 13:16:01 +00002130 if (srcTy == Ity_F64) {
2131 IRTemp i64 = newTemp(Ity_I64);
2132 assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
2133 assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
2134 return res;
2135 }
2136 if (srcTy == Ity_F32) {
2137 IRTemp i64a = newTemp(Ity_I64);
2138 assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
2139 IRTemp i64b = newTemp(Ity_I64);
2140 assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
2141 mkexpr(i64a)));
2142 assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
2143 return res;
2144 }
sewardj18bf5172014-06-14 18:05:30 +00002145 if (srcTy == Ity_I64) {
2146 assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
2147 return res;
2148 }
2149 if (srcTy == Ity_I32 || srcTy == Ity_I16 || srcTy == Ity_I8) {
2150 IRTemp t1 = newTemp(Ity_I64);
2151 assign(t1, widenUto64(srcTy, mkexpr(src)));
2152 IRTemp t2 = math_DUP_TO_64(t1, srcTy);
2153 assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
2154 return res;
2155 }
sewardj85fbb022014-06-12 13:16:01 +00002156 vassert(0);
2157}
2158
2159
sewardjdf9d6d52014-06-27 10:43:22 +00002160/* |fullWidth| is a full V128 width result. Depending on bitQ,
2161 zero out the upper half. */
2162static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
2163{
2164 if (bitQ == 1) return mkexpr(fullWidth);
2165 if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
2166 vassert(0);
2167}
2168
sewardja5a6b752014-06-30 07:33:56 +00002169/* The same, but from an expression instead. */
2170static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
2171{
sewardj8e91fd42014-07-11 12:05:47 +00002172 IRTemp fullWidthT = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00002173 assign(fullWidthT, fullWidth);
2174 return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
2175}
2176
sewardjdf9d6d52014-06-27 10:43:22 +00002177
sewardjbbcf1882014-01-12 12:49:10 +00002178/*------------------------------------------------------------*/
2179/*--- FP comparison helpers ---*/
2180/*------------------------------------------------------------*/
2181
2182/* irRes :: Ity_I32 holds a floating point comparison result encoded
2183 as an IRCmpF64Result. Generate code to convert it to an
2184 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
2185 Assign a new temp to hold that value, and return the temp. */
2186static
2187IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
2188{
2189 IRTemp ix = newTemp(Ity_I64);
2190 IRTemp termL = newTemp(Ity_I64);
2191 IRTemp termR = newTemp(Ity_I64);
2192 IRTemp nzcv = newTemp(Ity_I64);
2193 IRTemp irRes = newTemp(Ity_I64);
2194
2195 /* This is where the fun starts. We have to convert 'irRes' from
2196 an IR-convention return result (IRCmpF64Result) to an
2197 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
2198 4 bits of 'nzcv'. */
2199 /* Map compare result from IR to ARM(nzcv) */
2200 /*
2201 FP cmp result | IR | ARM(nzcv)
2202 --------------------------------
2203 UN 0x45 0011
2204 LT 0x01 1000
2205 GT 0x00 0010
2206 EQ 0x40 0110
2207 */
2208 /* Now since you're probably wondering WTF ..
2209
2210 ix fishes the useful bits out of the IR value, bits 6 and 0, and
2211 places them side by side, giving a number which is 0, 1, 2 or 3.
2212
2213 termL is a sequence cooked up by GNU superopt. It converts ix
2214 into an almost correct value NZCV value (incredibly), except
2215 for the case of UN, where it produces 0100 instead of the
2216 required 0011.
2217
2218 termR is therefore a correction term, also computed from ix. It
2219 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
2220 the final correct value, we subtract termR from termL.
2221
2222 Don't take my word for it. There's a test program at the bottom
2223 of guest_arm_toIR.c, to try this out with.
2224 */
2225 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
2226
2227 assign(
2228 ix,
2229 binop(Iop_Or64,
2230 binop(Iop_And64,
2231 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
2232 mkU64(3)),
2233 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
2234
2235 assign(
2236 termL,
2237 binop(Iop_Add64,
2238 binop(Iop_Shr64,
2239 binop(Iop_Sub64,
2240 binop(Iop_Shl64,
2241 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
2242 mkU8(62)),
2243 mkU64(1)),
2244 mkU8(61)),
2245 mkU64(1)));
2246
2247 assign(
2248 termR,
2249 binop(Iop_And64,
2250 binop(Iop_And64,
2251 mkexpr(ix),
2252 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
2253 mkU64(1)));
2254
2255 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
2256 return nzcv;
2257}
2258
2259
2260/*------------------------------------------------------------*/
2261/*--- Data processing (immediate) ---*/
2262/*------------------------------------------------------------*/
2263
2264/* Helper functions for supporting "DecodeBitMasks" */
2265
2266static ULong dbm_ROR ( Int width, ULong x, Int rot )
2267{
2268 vassert(width > 0 && width <= 64);
2269 vassert(rot >= 0 && rot < width);
2270 if (rot == 0) return x;
2271 ULong res = x >> rot;
2272 res |= (x << (width - rot));
2273 if (width < 64)
2274 res &= ((1ULL << width) - 1);
2275 return res;
2276}
2277
2278static ULong dbm_RepTo64( Int esize, ULong x )
2279{
2280 switch (esize) {
2281 case 64:
2282 return x;
2283 case 32:
2284 x &= 0xFFFFFFFF; x |= (x << 32);
2285 return x;
2286 case 16:
2287 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
2288 return x;
2289 case 8:
2290 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
2291 return x;
2292 case 4:
2293 x &= 0xF; x |= (x << 4); x |= (x << 8);
2294 x |= (x << 16); x |= (x << 32);
2295 return x;
2296 case 2:
2297 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
2298 x |= (x << 16); x |= (x << 32);
2299 return x;
2300 default:
2301 break;
2302 }
2303 vpanic("dbm_RepTo64");
2304 /*NOTREACHED*/
2305 return 0;
2306}
2307
2308static Int dbm_highestSetBit ( ULong x )
2309{
2310 Int i;
2311 for (i = 63; i >= 0; i--) {
2312 if (x & (1ULL << i))
2313 return i;
2314 }
2315 vassert(x == 0);
2316 return -1;
2317}
2318
2319static
2320Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
2321 ULong immN, ULong imms, ULong immr, Bool immediate,
2322 UInt M /*32 or 64*/)
2323{
2324 vassert(immN < (1ULL << 1));
2325 vassert(imms < (1ULL << 6));
2326 vassert(immr < (1ULL << 6));
2327 vassert(immediate == False || immediate == True);
2328 vassert(M == 32 || M == 64);
2329
2330 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
2331 if (len < 1) { /* printf("fail1\n"); */ return False; }
2332 vassert(len <= 6);
2333 vassert(M >= (1 << len));
2334
2335 vassert(len >= 1 && len <= 6);
2336 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
2337 (1 << len) - 1;
2338 vassert(levels >= 1 && levels <= 63);
2339
2340 if (immediate && ((imms & levels) == levels)) {
2341 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
2342 return False;
2343 }
2344
2345 ULong S = imms & levels;
2346 ULong R = immr & levels;
2347 Int diff = S - R;
2348 diff &= 63;
2349 Int esize = 1 << len;
2350 vassert(2 <= esize && esize <= 64);
2351
2352 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
2353 same below with d. S can be 63 in which case we have an out of
2354 range and hence undefined shift. */
2355 vassert(S >= 0 && S <= 63);
2356 vassert(esize >= (S+1));
2357 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
2358 //(1ULL << (S+1)) - 1;
2359 ((1ULL << S) - 1) + (1ULL << S);
2360
2361 Int d = // diff<len-1:0>
2362 diff & ((1 << len)-1);
2363 vassert(esize >= (d+1));
2364 vassert(d >= 0 && d <= 63);
2365
2366 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
2367 //(1ULL << (d+1)) - 1;
2368 ((1ULL << d) - 1) + (1ULL << d);
2369
2370 if (esize != 64) vassert(elem_s < (1ULL << esize));
2371 if (esize != 64) vassert(elem_d < (1ULL << esize));
2372
2373 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
2374 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
2375
2376 return True;
2377}
2378
2379
2380static
2381Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
2382 UInt insn)
2383{
2384# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2385
2386 /* insn[28:23]
2387 10000x PC-rel addressing
2388 10001x Add/subtract (immediate)
2389 100100 Logical (immediate)
2390 100101 Move Wide (immediate)
2391 100110 Bitfield
2392 100111 Extract
2393 */
2394
2395 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
2396 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
2397 Bool is64 = INSN(31,31) == 1;
2398 Bool isSub = INSN(30,30) == 1;
2399 Bool setCC = INSN(29,29) == 1;
2400 UInt sh = INSN(23,22);
2401 UInt uimm12 = INSN(21,10);
2402 UInt nn = INSN(9,5);
2403 UInt dd = INSN(4,0);
2404 const HChar* nm = isSub ? "sub" : "add";
2405 if (sh >= 2) {
2406 /* Invalid; fall through */
2407 } else {
2408 vassert(sh <= 1);
2409 uimm12 <<= (12 * sh);
2410 if (is64) {
2411 IRTemp argL = newTemp(Ity_I64);
2412 IRTemp argR = newTemp(Ity_I64);
2413 IRTemp res = newTemp(Ity_I64);
2414 assign(argL, getIReg64orSP(nn));
2415 assign(argR, mkU64(uimm12));
2416 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2417 mkexpr(argL), mkexpr(argR)));
2418 if (setCC) {
2419 putIReg64orZR(dd, mkexpr(res));
2420 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2421 DIP("%ss %s, %s, 0x%x\n",
2422 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
2423 } else {
2424 putIReg64orSP(dd, mkexpr(res));
2425 DIP("%s %s, %s, 0x%x\n",
2426 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
2427 }
2428 } else {
2429 IRTemp argL = newTemp(Ity_I32);
2430 IRTemp argR = newTemp(Ity_I32);
2431 IRTemp res = newTemp(Ity_I32);
2432 assign(argL, getIReg32orSP(nn));
2433 assign(argR, mkU32(uimm12));
2434 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
2435 mkexpr(argL), mkexpr(argR)));
2436 if (setCC) {
2437 putIReg32orZR(dd, mkexpr(res));
2438 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
2439 DIP("%ss %s, %s, 0x%x\n",
2440 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
2441 } else {
2442 putIReg32orSP(dd, mkexpr(res));
2443 DIP("%s %s, %s, 0x%x\n",
2444 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
2445 }
2446 }
2447 return True;
2448 }
2449 }
2450
2451 /* -------------------- ADR/ADRP -------------------- */
2452 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
2453 UInt bP = INSN(31,31);
2454 UInt immLo = INSN(30,29);
2455 UInt immHi = INSN(23,5);
2456 UInt rD = INSN(4,0);
2457 ULong uimm = (immHi << 2) | immLo;
2458 ULong simm = sx_to_64(uimm, 21);
2459 ULong val;
2460 if (bP) {
2461 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
2462 } else {
2463 val = guest_PC_curr_instr + simm;
2464 }
2465 putIReg64orZR(rD, mkU64(val));
2466 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
2467 return True;
2468 }
2469
2470 /* -------------------- LOGIC(imm) -------------------- */
2471 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
2472 /* 31 30 28 22 21 15 9 4
2473 sf op 100100 N immr imms Rn Rd
2474 op=00: AND Rd|SP, Rn, #imm
2475 op=01: ORR Rd|SP, Rn, #imm
2476 op=10: EOR Rd|SP, Rn, #imm
2477 op=11: ANDS Rd|ZR, Rn, #imm
2478 */
2479 Bool is64 = INSN(31,31) == 1;
2480 UInt op = INSN(30,29);
2481 UInt N = INSN(22,22);
2482 UInt immR = INSN(21,16);
2483 UInt immS = INSN(15,10);
2484 UInt nn = INSN(9,5);
2485 UInt dd = INSN(4,0);
2486 ULong imm = 0;
2487 Bool ok;
2488 if (N == 1 && !is64)
2489 goto after_logic_imm; /* not allowed; fall through */
2490 ok = dbm_DecodeBitMasks(&imm, NULL,
2491 N, immS, immR, True, is64 ? 64 : 32);
2492 if (!ok)
2493 goto after_logic_imm;
2494
2495 const HChar* names[4] = { "and", "orr", "eor", "ands" };
2496 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
2497 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
2498
2499 vassert(op < 4);
2500 if (is64) {
2501 IRExpr* argL = getIReg64orZR(nn);
2502 IRExpr* argR = mkU64(imm);
2503 IRTemp res = newTemp(Ity_I64);
2504 assign(res, binop(ops64[op], argL, argR));
2505 if (op < 3) {
2506 putIReg64orSP(dd, mkexpr(res));
2507 DIP("%s %s, %s, 0x%llx\n", names[op],
2508 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
2509 } else {
2510 putIReg64orZR(dd, mkexpr(res));
2511 setFlags_LOGIC(True/*is64*/, res);
2512 DIP("%s %s, %s, 0x%llx\n", names[op],
2513 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
2514 }
2515 } else {
2516 IRExpr* argL = getIReg32orZR(nn);
2517 IRExpr* argR = mkU32((UInt)imm);
2518 IRTemp res = newTemp(Ity_I32);
2519 assign(res, binop(ops32[op], argL, argR));
2520 if (op < 3) {
2521 putIReg32orSP(dd, mkexpr(res));
2522 DIP("%s %s, %s, 0x%x\n", names[op],
2523 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2524 } else {
2525 putIReg32orZR(dd, mkexpr(res));
2526 setFlags_LOGIC(False/*!is64*/, res);
2527 DIP("%s %s, %s, 0x%x\n", names[op],
2528 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2529 }
2530 }
2531 return True;
2532 }
2533 after_logic_imm:
2534
2535 /* -------------------- MOV{Z,N,K} -------------------- */
2536 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2537 /* 31 30 28 22 20 4
2538 | | | | | |
2539 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2540 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2541 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2542 */
2543 Bool is64 = INSN(31,31) == 1;
2544 UInt subopc = INSN(30,29);
2545 UInt hw = INSN(22,21);
2546 UInt imm16 = INSN(20,5);
2547 UInt dd = INSN(4,0);
2548 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2549 /* invalid; fall through */
2550 } else {
2551 ULong imm64 = ((ULong)imm16) << (16 * hw);
2552 if (!is64)
2553 vassert(imm64 < 0x100000000ULL);
2554 switch (subopc) {
2555 case BITS2(1,0): // MOVZ
2556 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2557 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2558 break;
2559 case BITS2(0,0): // MOVN
2560 imm64 = ~imm64;
2561 if (!is64)
2562 imm64 &= 0xFFFFFFFFULL;
2563 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2564 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2565 break;
2566 case BITS2(1,1): // MOVK
2567 /* This is more complex. We are inserting a slice into
2568 the destination register, so we need to have the old
2569 value of it. */
2570 if (is64) {
2571 IRTemp old = newTemp(Ity_I64);
2572 assign(old, getIReg64orZR(dd));
2573 ULong mask = 0xFFFFULL << (16 * hw);
2574 IRExpr* res
2575 = binop(Iop_Or64,
2576 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2577 mkU64(imm64));
2578 putIReg64orZR(dd, res);
2579 DIP("movk %s, 0x%x, lsl %u\n",
2580 nameIReg64orZR(dd), imm16, 16*hw);
2581 } else {
2582 IRTemp old = newTemp(Ity_I32);
2583 assign(old, getIReg32orZR(dd));
2584 vassert(hw <= 1);
2585 UInt mask = 0xFFFF << (16 * hw);
2586 IRExpr* res
2587 = binop(Iop_Or32,
2588 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2589 mkU32((UInt)imm64));
2590 putIReg32orZR(dd, res);
2591 DIP("movk %s, 0x%x, lsl %u\n",
2592 nameIReg32orZR(dd), imm16, 16*hw);
2593 }
2594 break;
2595 default:
2596 vassert(0);
2597 }
2598 return True;
2599 }
2600 }
2601
2602 /* -------------------- {U,S,}BFM -------------------- */
2603 /* 30 28 22 21 15 9 4
2604
2605 sf 10 100110 N immr imms nn dd
2606 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2607 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2608
2609 sf 00 100110 N immr imms nn dd
2610 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2611 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2612
2613 sf 01 100110 N immr imms nn dd
2614 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2615 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2616 */
2617 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2618 UInt sf = INSN(31,31);
2619 UInt opc = INSN(30,29);
2620 UInt N = INSN(22,22);
2621 UInt immR = INSN(21,16);
2622 UInt immS = INSN(15,10);
2623 UInt nn = INSN(9,5);
2624 UInt dd = INSN(4,0);
2625 Bool inZero = False;
2626 Bool extend = False;
2627 const HChar* nm = "???";
2628 /* skip invalid combinations */
2629 switch (opc) {
2630 case BITS2(0,0):
2631 inZero = True; extend = True; nm = "sbfm"; break;
2632 case BITS2(0,1):
2633 inZero = False; extend = False; nm = "bfm"; break;
2634 case BITS2(1,0):
2635 inZero = True; extend = False; nm = "ubfm"; break;
2636 case BITS2(1,1):
2637 goto after_bfm; /* invalid */
2638 default:
2639 vassert(0);
2640 }
2641 if (sf == 1 && N != 1) goto after_bfm;
2642 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2643 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2644 ULong wmask = 0, tmask = 0;
2645 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2646 N, immS, immR, False, sf == 1 ? 64 : 32);
2647 if (!ok) goto after_bfm; /* hmmm */
2648
2649 Bool is64 = sf == 1;
2650 IRType ty = is64 ? Ity_I64 : Ity_I32;
2651
2652 IRTemp dst = newTemp(ty);
2653 IRTemp src = newTemp(ty);
2654 IRTemp bot = newTemp(ty);
2655 IRTemp top = newTemp(ty);
2656 IRTemp res = newTemp(ty);
2657 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2658 assign(src, getIRegOrZR(is64, nn));
2659 /* perform bitfield move on low bits */
2660 assign(bot, binop(mkOR(ty),
2661 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2662 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2663 mkU(ty, wmask))));
2664 /* determine extension bits (sign, zero or dest register) */
2665 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2666 /* combine extension bits and result bits */
2667 assign(res, binop(mkOR(ty),
2668 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2669 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2670 putIRegOrZR(is64, dd, mkexpr(res));
2671 DIP("%s %s, %s, immR=%u, immS=%u\n",
2672 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2673 return True;
2674 }
2675 after_bfm:
2676
2677 /* ---------------------- EXTR ---------------------- */
2678 /* 30 28 22 20 15 9 4
2679 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2680 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2681 */
2682 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2683 Bool is64 = INSN(31,31) == 1;
2684 UInt mm = INSN(20,16);
2685 UInt imm6 = INSN(15,10);
2686 UInt nn = INSN(9,5);
2687 UInt dd = INSN(4,0);
2688 Bool valid = True;
2689 if (INSN(31,31) != INSN(22,22))
2690 valid = False;
2691 if (!is64 && imm6 >= 32)
2692 valid = False;
2693 if (!valid) goto after_extr;
2694 IRType ty = is64 ? Ity_I64 : Ity_I32;
2695 IRTemp srcHi = newTemp(ty);
2696 IRTemp srcLo = newTemp(ty);
2697 IRTemp res = newTemp(ty);
2698 assign(srcHi, getIRegOrZR(is64, nn));
2699 assign(srcLo, getIRegOrZR(is64, mm));
2700 if (imm6 == 0) {
2701 assign(res, mkexpr(srcLo));
2702 } else {
2703 UInt szBits = 8 * sizeofIRType(ty);
2704 vassert(imm6 > 0 && imm6 < szBits);
2705 assign(res, binop(mkOR(ty),
2706 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2707 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2708 }
2709 putIRegOrZR(is64, dd, mkexpr(res));
2710 DIP("extr %s, %s, %s, #%u\n",
2711 nameIRegOrZR(is64,dd),
2712 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2713 return True;
2714 }
2715 after_extr:
2716
2717 vex_printf("ARM64 front end: data_processing_immediate\n");
2718 return False;
2719# undef INSN
2720}
2721
2722
2723/*------------------------------------------------------------*/
2724/*--- Data processing (register) instructions ---*/
2725/*------------------------------------------------------------*/
2726
2727static const HChar* nameSH ( UInt sh ) {
2728 switch (sh) {
2729 case 0: return "lsl";
2730 case 1: return "lsr";
2731 case 2: return "asr";
2732 case 3: return "ror";
2733 default: vassert(0);
2734 }
2735}
2736
2737/* Generate IR to get a register value, possibly shifted by an
2738 immediate. Returns either a 32- or 64-bit temporary holding the
2739 result. After the shift, the value can optionally be NOT-ed
2740 too.
2741
2742 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2743 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2744 isn't allowed, but it's the job of the caller to check that.
2745*/
2746static IRTemp getShiftedIRegOrZR ( Bool is64,
2747 UInt sh_how, UInt sh_amt, UInt regNo,
2748 Bool invert )
2749{
2750 vassert(sh_how < 4);
2751 vassert(sh_amt < (is64 ? 64 : 32));
2752 IRType ty = is64 ? Ity_I64 : Ity_I32;
2753 IRTemp t0 = newTemp(ty);
2754 assign(t0, getIRegOrZR(is64, regNo));
2755 IRTemp t1 = newTemp(ty);
2756 switch (sh_how) {
2757 case BITS2(0,0):
2758 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2759 break;
2760 case BITS2(0,1):
2761 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2762 break;
2763 case BITS2(1,0):
2764 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2765 break;
2766 case BITS2(1,1):
2767 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2768 break;
2769 default:
2770 vassert(0);
2771 }
2772 if (invert) {
2773 IRTemp t2 = newTemp(ty);
2774 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2775 return t2;
2776 } else {
2777 return t1;
2778 }
2779}
2780
2781
2782static
2783Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2784 UInt insn)
2785{
2786# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2787
2788 /* ------------------- ADD/SUB(reg) ------------------- */
2789 /* x==0 => 32 bit op x==1 => 64 bit op
2790 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2791
2792 31 30 29 28 23 21 20 15 9 4
2793 | | | | | | | | | |
2794 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2795 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2796 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2797 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2798 */
2799 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2800 UInt bX = INSN(31,31);
2801 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2802 UInt bS = INSN(29, 29); /* set flags? */
2803 UInt sh = INSN(23,22);
2804 UInt rM = INSN(20,16);
2805 UInt imm6 = INSN(15,10);
2806 UInt rN = INSN(9,5);
2807 UInt rD = INSN(4,0);
2808 Bool isSUB = bOP == 1;
2809 Bool is64 = bX == 1;
2810 IRType ty = is64 ? Ity_I64 : Ity_I32;
2811 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2812 /* invalid; fall through */
2813 } else {
2814 IRTemp argL = newTemp(ty);
2815 assign(argL, getIRegOrZR(is64, rN));
2816 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2817 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2818 IRTemp res = newTemp(ty);
2819 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2820 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2821 if (bS) {
2822 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2823 }
2824 DIP("%s%s %s, %s, %s, %s #%u\n",
2825 bOP ? "sub" : "add", bS ? "s" : "",
2826 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2827 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2828 return True;
2829 }
2830 }
2831
sewardjdee30502014-06-04 13:09:44 +00002832 /* ------------------- ADC/SBC(reg) ------------------- */
2833 /* x==0 => 32 bit op x==1 => 64 bit op
2834
2835 31 30 29 28 23 21 20 15 9 4
2836 | | | | | | | | | |
2837 x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
2838 x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
2839 x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
2840 x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
2841 */
2842
2843 if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
2844 UInt bX = INSN(31,31);
2845 UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */
2846 UInt bS = INSN(29,29); /* set flags */
2847 UInt rM = INSN(20,16);
2848 UInt rN = INSN(9,5);
2849 UInt rD = INSN(4,0);
2850
2851 Bool isSUB = bOP == 1;
2852 Bool is64 = bX == 1;
2853 IRType ty = is64 ? Ity_I64 : Ity_I32;
2854
2855 IRTemp oldC = newTemp(ty);
2856 assign(oldC,
2857 is64 ? mk_arm64g_calculate_flag_c()
2858 : unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
2859
2860 IRTemp argL = newTemp(ty);
2861 assign(argL, getIRegOrZR(is64, rN));
2862 IRTemp argR = newTemp(ty);
2863 assign(argR, getIRegOrZR(is64, rM));
2864
2865 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2866 IRTemp res = newTemp(ty);
2867 if (isSUB) {
2868 IRExpr* one = is64 ? mkU64(1) : mkU32(1);
2869 IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
2870 assign(res,
2871 binop(op,
2872 binop(op, mkexpr(argL), mkexpr(argR)),
2873 binop(xorOp, mkexpr(oldC), one)));
2874 } else {
2875 assign(res,
2876 binop(op,
2877 binop(op, mkexpr(argL), mkexpr(argR)),
2878 mkexpr(oldC)));
2879 }
2880
2881 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2882
2883 if (bS) {
2884 setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
2885 }
2886
2887 DIP("%s%s %s, %s, %s\n",
2888 bOP ? "sbc" : "adc", bS ? "s" : "",
2889 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2890 nameIRegOrZR(is64, rM));
2891 return True;
2892 }
2893
sewardjbbcf1882014-01-12 12:49:10 +00002894 /* -------------------- LOGIC(reg) -------------------- */
2895 /* x==0 => 32 bit op x==1 => 64 bit op
2896 N==0 => inv? is no-op (no inversion)
2897 N==1 => inv? is NOT
2898 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2899
2900 31 30 28 23 21 20 15 9 4
2901 | | | | | | | | |
2902 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2903 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2904 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2905 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2906 With N=1, the names are: BIC ORN EON BICS
2907 */
2908 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2909 UInt bX = INSN(31,31);
2910 UInt sh = INSN(23,22);
2911 UInt bN = INSN(21,21);
2912 UInt rM = INSN(20,16);
2913 UInt imm6 = INSN(15,10);
2914 UInt rN = INSN(9,5);
2915 UInt rD = INSN(4,0);
2916 Bool is64 = bX == 1;
2917 IRType ty = is64 ? Ity_I64 : Ity_I32;
2918 if (!is64 && imm6 > 31) {
2919 /* invalid; fall though */
2920 } else {
2921 IRTemp argL = newTemp(ty);
2922 assign(argL, getIRegOrZR(is64, rN));
2923 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2924 IROp op = Iop_INVALID;
2925 switch (INSN(30,29)) {
2926 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2927 case BITS2(0,1): op = mkOR(ty); break;
2928 case BITS2(1,0): op = mkXOR(ty); break;
2929 default: vassert(0);
2930 }
2931 IRTemp res = newTemp(ty);
2932 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2933 if (INSN(30,29) == BITS2(1,1)) {
2934 setFlags_LOGIC(is64, res);
2935 }
2936 putIRegOrZR(is64, rD, mkexpr(res));
2937
2938 static const HChar* names_op[8]
2939 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2940 vassert(((bN << 2) | INSN(30,29)) < 8);
2941 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2942 /* Special-case the printing of "MOV" */
2943 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2944 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2945 nameIRegOrZR(is64, rM));
2946 } else {
2947 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2948 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2949 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2950 }
2951 return True;
2952 }
2953 }
2954
2955 /* -------------------- {U,S}MULH -------------------- */
2956 /* 31 23 22 20 15 9 4
2957 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2958 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2959 */
2960 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
sewardj7fce7cc2014-05-07 09:41:40 +00002961 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
sewardjbbcf1882014-01-12 12:49:10 +00002962 Bool isU = INSN(23,23) == 1;
2963 UInt mm = INSN(20,16);
2964 UInt nn = INSN(9,5);
2965 UInt dd = INSN(4,0);
2966 putIReg64orZR(dd, unop(Iop_128HIto64,
2967 binop(isU ? Iop_MullU64 : Iop_MullS64,
2968 getIReg64orZR(nn), getIReg64orZR(mm))));
2969 DIP("%cmulh %s, %s, %s\n",
2970 isU ? 'u' : 's',
2971 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2972 return True;
2973 }
2974
2975 /* -------------------- M{ADD,SUB} -------------------- */
2976 /* 31 30 20 15 14 9 4
2977 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
2978 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
2979 */
2980 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2981 Bool is64 = INSN(31,31) == 1;
2982 UInt mm = INSN(20,16);
2983 Bool isAdd = INSN(15,15) == 0;
2984 UInt aa = INSN(14,10);
2985 UInt nn = INSN(9,5);
2986 UInt dd = INSN(4,0);
2987 if (is64) {
2988 putIReg64orZR(
2989 dd,
2990 binop(isAdd ? Iop_Add64 : Iop_Sub64,
2991 getIReg64orZR(aa),
2992 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
2993 } else {
2994 putIReg32orZR(
2995 dd,
2996 binop(isAdd ? Iop_Add32 : Iop_Sub32,
2997 getIReg32orZR(aa),
2998 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
2999 }
3000 DIP("%s %s, %s, %s, %s\n",
3001 isAdd ? "madd" : "msub",
3002 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3003 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
3004 return True;
3005 }
3006
3007 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
3008 /* 31 30 28 20 15 11 9 4
3009 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
3010 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
3011 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
3012 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
3013 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
3014 */
3015 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
3016 Bool is64 = INSN(31,31) == 1;
3017 UInt b30 = INSN(30,30);
3018 UInt mm = INSN(20,16);
3019 UInt cond = INSN(15,12);
3020 UInt b10 = INSN(10,10);
3021 UInt nn = INSN(9,5);
3022 UInt dd = INSN(4,0);
3023 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
3024 IRType ty = is64 ? Ity_I64 : Ity_I32;
3025 IRExpr* argL = getIRegOrZR(is64, nn);
3026 IRExpr* argR = getIRegOrZR(is64, mm);
3027 switch (op) {
3028 case BITS2(0,0):
3029 break;
3030 case BITS2(0,1):
3031 argR = binop(mkADD(ty), argR, mkU(ty,1));
3032 break;
3033 case BITS2(1,0):
3034 argR = unop(mkNOT(ty), argR);
3035 break;
3036 case BITS2(1,1):
3037 argR = binop(mkSUB(ty), mkU(ty,0), argR);
3038 break;
3039 default:
3040 vassert(0);
3041 }
3042 putIRegOrZR(
3043 is64, dd,
3044 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
3045 argL, argR)
3046 );
3047 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
3048 DIP("%s %s, %s, %s, %s\n", op_nm[op],
3049 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
3050 nameIRegOrZR(is64, mm), nameCC(cond));
3051 return True;
3052 }
3053
3054 /* -------------- ADD/SUB(extended reg) -------------- */
3055 /* 28 20 15 12 9 4
3056 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
3057 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
3058
3059 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
3060 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
3061
3062 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
3063 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
3064
3065 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
3066 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
3067
3068 The 'm' operand is extended per opt, thusly:
3069
3070 000 Xm & 0xFF UXTB
3071 001 Xm & 0xFFFF UXTH
3072 010 Xm & (2^32)-1 UXTW
3073 011 Xm UXTX
3074
3075 100 Xm sx from bit 7 SXTB
3076 101 Xm sx from bit 15 SXTH
3077 110 Xm sx from bit 31 SXTW
3078 111 Xm SXTX
3079
3080 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
3081 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
3082 are the identity operation on Wm.
3083
3084 After extension, the value is shifted left by imm3 bits, which
3085 may only be in the range 0 .. 4 inclusive.
3086 */
3087 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
3088 Bool is64 = INSN(31,31) == 1;
3089 Bool isSub = INSN(30,30) == 1;
3090 Bool setCC = INSN(29,29) == 1;
3091 UInt mm = INSN(20,16);
3092 UInt opt = INSN(15,13);
3093 UInt imm3 = INSN(12,10);
3094 UInt nn = INSN(9,5);
3095 UInt dd = INSN(4,0);
3096 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
3097 "sxtb", "sxth", "sxtw", "sxtx" };
3098 /* Do almost the same thing in the 32- and 64-bit cases. */
3099 IRTemp xN = newTemp(Ity_I64);
3100 IRTemp xM = newTemp(Ity_I64);
3101 assign(xN, getIReg64orSP(nn));
3102 assign(xM, getIReg64orZR(mm));
3103 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
3104 Int shSX = 0;
3105 /* widen Xm .. */
3106 switch (opt) {
3107 case BITS3(0,0,0): // UXTB
3108 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
3109 case BITS3(0,0,1): // UXTH
3110 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
3111 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
3112 if (is64) {
3113 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
3114 }
3115 break;
3116 case BITS3(0,1,1): // UXTX -- always a noop
3117 break;
3118 case BITS3(1,0,0): // SXTB
3119 shSX = 56; goto sxTo64;
3120 case BITS3(1,0,1): // SXTH
3121 shSX = 48; goto sxTo64;
3122 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
3123 if (is64) {
3124 shSX = 32; goto sxTo64;
3125 }
3126 break;
3127 case BITS3(1,1,1): // SXTX -- always a noop
3128 break;
3129 sxTo64:
3130 vassert(shSX >= 32);
3131 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
3132 mkU8(shSX));
3133 break;
3134 default:
3135 vassert(0);
3136 }
3137 /* and now shift */
3138 IRTemp argL = xN;
3139 IRTemp argR = newTemp(Ity_I64);
3140 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
3141 IRTemp res = newTemp(Ity_I64);
3142 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
3143 mkexpr(argL), mkexpr(argR)));
3144 if (is64) {
3145 if (setCC) {
3146 putIReg64orZR(dd, mkexpr(res));
3147 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
3148 } else {
3149 putIReg64orSP(dd, mkexpr(res));
3150 }
3151 } else {
3152 if (setCC) {
3153 IRTemp argL32 = newTemp(Ity_I32);
3154 IRTemp argR32 = newTemp(Ity_I32);
3155 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
3156 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
3157 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
3158 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
3159 } else {
3160 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
3161 }
3162 }
3163 DIP("%s%s %s, %s, %s %s lsl %u\n",
3164 isSub ? "sub" : "add", setCC ? "s" : "",
3165 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
3166 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
3167 nameExt[opt], imm3);
3168 return True;
3169 }
3170
3171 /* ---------------- CCMP/CCMN(imm) ---------------- */
3172 /* Bizarrely, these appear in the "data processing register"
3173 category, even though they are operations against an
3174 immediate. */
3175 /* 31 29 20 15 11 9 3
3176 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
3177 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
3178
3179 Operation is:
3180 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
3181 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
3182 */
3183 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3184 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
3185 Bool is64 = INSN(31,31) == 1;
3186 Bool isSUB = INSN(30,30) == 1;
3187 UInt imm5 = INSN(20,16);
3188 UInt cond = INSN(15,12);
3189 UInt nn = INSN(9,5);
3190 UInt nzcv = INSN(3,0);
3191
3192 IRTemp condT = newTemp(Ity_I1);
3193 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3194
3195 IRType ty = is64 ? Ity_I64 : Ity_I32;
3196 IRTemp argL = newTemp(ty);
3197 IRTemp argR = newTemp(ty);
3198
3199 if (is64) {
3200 assign(argL, getIReg64orZR(nn));
3201 assign(argR, mkU64(imm5));
3202 } else {
3203 assign(argL, getIReg32orZR(nn));
3204 assign(argR, mkU32(imm5));
3205 }
3206 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3207
3208 DIP("ccm%c %s, #%u, #%u, %s\n",
3209 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3210 imm5, nzcv, nameCC(cond));
3211 return True;
3212 }
3213
3214 /* ---------------- CCMP/CCMN(reg) ---------------- */
3215 /* 31 29 20 15 11 9 3
3216 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
3217 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
3218 Operation is:
3219 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
3220 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
3221 */
3222 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
3223 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
3224 Bool is64 = INSN(31,31) == 1;
3225 Bool isSUB = INSN(30,30) == 1;
3226 UInt mm = INSN(20,16);
3227 UInt cond = INSN(15,12);
3228 UInt nn = INSN(9,5);
3229 UInt nzcv = INSN(3,0);
3230
3231 IRTemp condT = newTemp(Ity_I1);
3232 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
3233
3234 IRType ty = is64 ? Ity_I64 : Ity_I32;
3235 IRTemp argL = newTemp(ty);
3236 IRTemp argR = newTemp(ty);
3237
3238 if (is64) {
3239 assign(argL, getIReg64orZR(nn));
3240 assign(argR, getIReg64orZR(mm));
3241 } else {
3242 assign(argL, getIReg32orZR(nn));
3243 assign(argR, getIReg32orZR(mm));
3244 }
3245 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
3246
3247 DIP("ccm%c %s, %s, #%u, %s\n",
3248 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
3249 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
3250 return True;
3251 }
3252
3253
3254 /* -------------- REV/REV16/REV32/RBIT -------------- */
3255 /* 31 30 28 20 15 11 9 4
3256
sewardj32d86752014-03-02 12:47:18 +00003257 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
3258 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003259
sewardj32d86752014-03-02 12:47:18 +00003260 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
3261 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003262
sewardjdc9259c2014-02-27 11:10:19 +00003263 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
3264 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00003265
sewardjdc9259c2014-02-27 11:10:19 +00003266 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
sewardjbbcf1882014-01-12 12:49:10 +00003267 */
sewardjbbcf1882014-01-12 12:49:10 +00003268 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
sewardjdc9259c2014-02-27 11:10:19 +00003269 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
3270 UInt b31 = INSN(31,31);
3271 UInt opc = INSN(11,10);
3272
3273 UInt ix = 0;
3274 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
3275 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
3276 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
3277 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
3278 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
3279 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
3280 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
sewardj32d86752014-03-02 12:47:18 +00003281 if (ix >= 1 && ix <= 7) {
3282 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
sewardjdc9259c2014-02-27 11:10:19 +00003283 UInt nn = INSN(9,5);
3284 UInt dd = INSN(4,0);
3285 IRTemp src = newTemp(Ity_I64);
3286 IRTemp dst = IRTemp_INVALID;
sewardj32d86752014-03-02 12:47:18 +00003287 IRTemp (*math)(IRTemp) = NULL;
3288 switch (ix) {
3289 case 1: case 2: math = math_BYTESWAP64; break;
3290 case 3: case 4: math = math_BITSWAP64; break;
3291 case 5: case 6: math = math_USHORTSWAP64; break;
3292 case 7: math = math_UINTSWAP64; break;
3293 default: vassert(0);
3294 }
3295 const HChar* names[7]
3296 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
3297 const HChar* nm = names[ix-1];
3298 vassert(math);
3299 if (ix == 6) {
3300 /* This has to be special cased, since the logic below doesn't
3301 handle it correctly. */
sewardjdc9259c2014-02-27 11:10:19 +00003302 assign(src, getIReg64orZR(nn));
sewardj32d86752014-03-02 12:47:18 +00003303 dst = math(src);
3304 putIReg64orZR(dd,
3305 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
3306 } else if (is64) {
3307 assign(src, getIReg64orZR(nn));
3308 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00003309 putIReg64orZR(dd, mkexpr(dst));
3310 } else {
3311 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
sewardj32d86752014-03-02 12:47:18 +00003312 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00003313 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3314 }
sewardj32d86752014-03-02 12:47:18 +00003315 DIP("%s %s, %s\n", nm,
sewardjdc9259c2014-02-27 11:10:19 +00003316 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
3317 return True;
sewardjbbcf1882014-01-12 12:49:10 +00003318 }
sewardjdc9259c2014-02-27 11:10:19 +00003319 /* else fall through */
sewardjbbcf1882014-01-12 12:49:10 +00003320 }
3321
3322 /* -------------------- CLZ/CLS -------------------- */
3323 /* 30 28 24 20 15 9 4
3324 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
3325 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
3326 */
3327 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
3328 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
3329 Bool is64 = INSN(31,31) == 1;
3330 Bool isCLS = INSN(10,10) == 1;
3331 UInt nn = INSN(9,5);
3332 UInt dd = INSN(4,0);
3333 IRTemp src = newTemp(Ity_I64);
sewardj928540c2014-11-25 15:51:07 +00003334 IRTemp srcZ = newTemp(Ity_I64);
sewardjbbcf1882014-01-12 12:49:10 +00003335 IRTemp dst = newTemp(Ity_I64);
sewardj928540c2014-11-25 15:51:07 +00003336 /* Get the argument, widened out to 64 bit */
3337 if (is64) {
3338 assign(src, getIReg64orZR(nn));
3339 } else {
3340 assign(src, binop(Iop_Shl64,
3341 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
sewardjbbcf1882014-01-12 12:49:10 +00003342 }
sewardj928540c2014-11-25 15:51:07 +00003343 /* If this is CLS, mash the arg around accordingly */
3344 if (isCLS) {
3345 IRExpr* one = mkU8(1);
3346 assign(srcZ,
3347 binop(Iop_Xor64,
3348 binop(Iop_Shl64, mkexpr(src), one),
3349 binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one)));
3350 } else {
3351 assign(srcZ, mkexpr(src));
3352 }
3353 /* And compute CLZ. */
3354 if (is64) {
3355 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3356 mkU64(isCLS ? 63 : 64),
3357 unop(Iop_Clz64, mkexpr(srcZ))));
3358 putIReg64orZR(dd, mkexpr(dst));
3359 } else {
3360 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
3361 mkU64(isCLS ? 31 : 32),
3362 unop(Iop_Clz64, mkexpr(srcZ))));
3363 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
3364 }
3365 DIP("cl%c %s, %s\n", isCLS ? 's' : 'z',
3366 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
3367 return True;
sewardjbbcf1882014-01-12 12:49:10 +00003368 }
3369
sewardjca95f2d2014-11-25 17:27:32 +00003370 /* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
sewardjbbcf1882014-01-12 12:49:10 +00003371 /* 30 28 20 15 11 9 4
3372 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
3373 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
3374 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
sewardjca95f2d2014-11-25 17:27:32 +00003375 sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm
sewardjbbcf1882014-01-12 12:49:10 +00003376 */
3377 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
sewardjca95f2d2014-11-25 17:27:32 +00003378 && INSN(15,12) == BITS4(0,0,1,0)) {
sewardjbbcf1882014-01-12 12:49:10 +00003379 Bool is64 = INSN(31,31) == 1;
3380 UInt mm = INSN(20,16);
3381 UInt op = INSN(11,10);
3382 UInt nn = INSN(9,5);
3383 UInt dd = INSN(4,0);
3384 IRType ty = is64 ? Ity_I64 : Ity_I32;
3385 IRTemp srcL = newTemp(ty);
sewardjca95f2d2014-11-25 17:27:32 +00003386 IRTemp srcR = newTemp(Ity_I64);
sewardjbbcf1882014-01-12 12:49:10 +00003387 IRTemp res = newTemp(ty);
3388 IROp iop = Iop_INVALID;
3389 assign(srcL, getIRegOrZR(is64, nn));
sewardjca95f2d2014-11-25 17:27:32 +00003390 assign(srcR, binop(Iop_And64, getIReg64orZR(mm),
3391 mkU64(is64 ? 63 : 31)));
3392 if (op < 3) {
3393 // LSLV, LSRV, ASRV
3394 switch (op) {
3395 case BITS2(0,0): iop = mkSHL(ty); break;
3396 case BITS2(0,1): iop = mkSHR(ty); break;
3397 case BITS2(1,0): iop = mkSAR(ty); break;
3398 default: vassert(0);
3399 }
3400 assign(res, binop(iop, mkexpr(srcL),
3401 unop(Iop_64to8, mkexpr(srcR))));
3402 } else {
3403 // RORV
3404 IROp opSHL = mkSHL(ty);
3405 IROp opSHR = mkSHR(ty);
3406 IROp opOR = mkOR(ty);
3407 IRExpr* width = mkU64(is64 ? 64: 32);
3408 assign(
3409 res,
3410 IRExpr_ITE(
3411 binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)),
3412 mkexpr(srcL),
3413 binop(opOR,
3414 binop(opSHL,
3415 mkexpr(srcL),
3416 unop(Iop_64to8, binop(Iop_Sub64, width,
3417 mkexpr(srcR)))),
3418 binop(opSHR,
3419 mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR))))
3420 ));
sewardjbbcf1882014-01-12 12:49:10 +00003421 }
sewardjbbcf1882014-01-12 12:49:10 +00003422 putIRegOrZR(is64, dd, mkexpr(res));
sewardjca95f2d2014-11-25 17:27:32 +00003423 vassert(op < 4);
3424 const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" };
sewardjbbcf1882014-01-12 12:49:10 +00003425 DIP("%s %s, %s, %s\n",
3426 names[op], nameIRegOrZR(is64,dd),
3427 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
3428 return True;
3429 }
3430
3431 /* -------------------- SDIV/UDIV -------------------- */
3432 /* 30 28 20 15 10 9 4
3433 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
3434 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
3435 */
3436 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
3437 && INSN(15,11) == BITS5(0,0,0,0,1)) {
3438 Bool is64 = INSN(31,31) == 1;
3439 UInt mm = INSN(20,16);
3440 Bool isS = INSN(10,10) == 1;
3441 UInt nn = INSN(9,5);
3442 UInt dd = INSN(4,0);
3443 if (isS) {
3444 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
3445 getIRegOrZR(is64, nn),
3446 getIRegOrZR(is64, mm)));
3447 } else {
3448 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
3449 getIRegOrZR(is64, nn),
3450 getIRegOrZR(is64, mm)));
3451 }
3452 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
3453 nameIRegOrZR(is64, dd),
3454 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
3455 return True;
3456 }
3457
3458 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
3459 /* 31 23 20 15 14 9 4
3460 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
3461 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
3462 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
3463 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
3464 with operation
3465 Xd = Xa +/- (Wn *u/s Wm)
3466 */
3467 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
3468 Bool isU = INSN(23,23) == 1;
3469 UInt mm = INSN(20,16);
3470 Bool isAdd = INSN(15,15) == 0;
3471 UInt aa = INSN(14,10);
3472 UInt nn = INSN(9,5);
3473 UInt dd = INSN(4,0);
3474 IRTemp wN = newTemp(Ity_I32);
3475 IRTemp wM = newTemp(Ity_I32);
3476 IRTemp xA = newTemp(Ity_I64);
3477 IRTemp muld = newTemp(Ity_I64);
3478 IRTemp res = newTemp(Ity_I64);
3479 assign(wN, getIReg32orZR(nn));
3480 assign(wM, getIReg32orZR(mm));
3481 assign(xA, getIReg64orZR(aa));
3482 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
3483 mkexpr(wN), mkexpr(wM)));
3484 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
3485 mkexpr(xA), mkexpr(muld)));
3486 putIReg64orZR(dd, mkexpr(res));
3487 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
3488 nameIReg64orZR(dd), nameIReg32orZR(nn),
3489 nameIReg32orZR(mm), nameIReg64orZR(aa));
3490 return True;
3491 }
3492 vex_printf("ARM64 front end: data_processing_register\n");
3493 return False;
3494# undef INSN
3495}
3496
3497
3498/*------------------------------------------------------------*/
sewardj208a7762014-10-22 13:52:51 +00003499/*--- Math helpers for vector interleave/deinterleave ---*/
3500/*------------------------------------------------------------*/
3501
3502#define EX(_tmp) \
3503 mkexpr(_tmp)
3504#define SL(_hi128,_lo128,_nbytes) \
3505 ( (_nbytes) == 0 \
3506 ? (_lo128) \
3507 : triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
3508#define ROR(_v128,_nbytes) \
3509 SL((_v128),(_v128),(_nbytes))
3510#define ROL(_v128,_nbytes) \
3511 SL((_v128),(_v128),16-(_nbytes))
3512#define SHR(_v128,_nbytes) \
3513 binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
3514#define SHL(_v128,_nbytes) \
3515 binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
3516#define ILO64x2(_argL,_argR) \
3517 binop(Iop_InterleaveLO64x2,(_argL),(_argR))
3518#define IHI64x2(_argL,_argR) \
3519 binop(Iop_InterleaveHI64x2,(_argL),(_argR))
3520#define ILO32x4(_argL,_argR) \
3521 binop(Iop_InterleaveLO32x4,(_argL),(_argR))
3522#define IHI32x4(_argL,_argR) \
3523 binop(Iop_InterleaveHI32x4,(_argL),(_argR))
3524#define ILO16x8(_argL,_argR) \
3525 binop(Iop_InterleaveLO16x8,(_argL),(_argR))
3526#define IHI16x8(_argL,_argR) \
3527 binop(Iop_InterleaveHI16x8,(_argL),(_argR))
3528#define ILO8x16(_argL,_argR) \
3529 binop(Iop_InterleaveLO8x16,(_argL),(_argR))
3530#define IHI8x16(_argL,_argR) \
3531 binop(Iop_InterleaveHI8x16,(_argL),(_argR))
3532#define CEV32x4(_argL,_argR) \
3533 binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
3534#define COD32x4(_argL,_argR) \
3535 binop(Iop_CatOddLanes32x4,(_argL),(_argR))
3536#define COD16x8(_argL,_argR) \
3537 binop(Iop_CatOddLanes16x8,(_argL),(_argR))
3538#define COD8x16(_argL,_argR) \
3539 binop(Iop_CatOddLanes8x16,(_argL),(_argR))
3540#define CEV8x16(_argL,_argR) \
3541 binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
3542#define AND(_arg1,_arg2) \
3543 binop(Iop_AndV128,(_arg1),(_arg2))
3544#define OR2(_arg1,_arg2) \
3545 binop(Iop_OrV128,(_arg1),(_arg2))
3546#define OR3(_arg1,_arg2,_arg3) \
3547 binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
3548#define OR4(_arg1,_arg2,_arg3,_arg4) \
3549 binop(Iop_OrV128, \
3550 binop(Iop_OrV128,(_arg1),(_arg2)), \
3551 binop(Iop_OrV128,(_arg3),(_arg4)))
3552
3553
3554/* Do interleaving for 1 128 bit vector, for ST1 insns. */
3555static
3556void math_INTERLEAVE1_128( /*OUTx1*/ IRTemp* i0,
3557 UInt laneSzBlg2, IRTemp u0 )
3558{
3559 assign(*i0, mkexpr(u0));
3560}
3561
3562
3563/* Do interleaving for 2 128 bit vectors, for ST2 insns. */
3564static
3565void math_INTERLEAVE2_128( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
3566 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
3567{
3568 /* This is pretty easy, since we have primitives directly to
3569 hand. */
3570 if (laneSzBlg2 == 3) {
3571 // 64x2
3572 // u1 == B1 B0, u0 == A1 A0
3573 // i1 == B1 A1, i0 == B0 A0
3574 assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0)));
3575 assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0)));
3576 return;
3577 }
3578 if (laneSzBlg2 == 2) {
3579 // 32x4
3580 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3581 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3582 assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0)));
3583 assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0)));
3584 return;
3585 }
3586 if (laneSzBlg2 == 1) {
3587 // 16x8
3588 // u1 == B{7..0}, u0 == A{7..0}
3589 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3590 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3591 assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0)));
3592 assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0)));
3593 return;
3594 }
3595 if (laneSzBlg2 == 0) {
3596 // 8x16
3597 // u1 == B{f..0}, u0 == A{f..0}
3598 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3599 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3600 assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0)));
3601 assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0)));
3602 return;
3603 }
3604 /*NOTREACHED*/
3605 vassert(0);
3606}
3607
3608
3609/* Do interleaving for 3 128 bit vectors, for ST3 insns. */
3610static
3611void math_INTERLEAVE3_128(
3612 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
3613 UInt laneSzBlg2,
3614 IRTemp u0, IRTemp u1, IRTemp u2 )
3615{
3616 if (laneSzBlg2 == 3) {
3617 // 64x2
3618 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3619 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3620 assign(*i2, IHI64x2( EX(u2), EX(u1) ));
3621 assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) ));
3622 assign(*i0, ILO64x2( EX(u1), EX(u0) ));
3623 return;
3624 }
3625
3626 if (laneSzBlg2 == 2) {
3627 // 32x4
3628 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3629 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3630 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3631 IRTemp p0 = newTempV128();
3632 IRTemp p1 = newTempV128();
3633 IRTemp p2 = newTempV128();
3634 IRTemp c1100 = newTempV128();
3635 IRTemp c0011 = newTempV128();
3636 IRTemp c0110 = newTempV128();
3637 assign(c1100, mkV128(0xFF00));
3638 assign(c0011, mkV128(0x00FF));
3639 assign(c0110, mkV128(0x0FF0));
3640 // First interleave them at 64x2 granularity,
3641 // generating partial ("p") values.
3642 math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2);
3643 // And more shuffling around for the final answer
3644 assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ),
3645 AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) ));
3646 assign(*i1, OR3( SHL(EX(p2),12),
3647 AND(EX(p1),EX(c0110)),
3648 SHR(EX(p0),12) ));
3649 assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ),
3650 AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) ));
3651 return;
3652 }
3653
3654 if (laneSzBlg2 == 1) {
3655 // 16x8
3656 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3657 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3658 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3659 //
3660 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3661 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3662 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3663 //
3664 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3665 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3666 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3667 IRTemp p0 = newTempV128();
3668 IRTemp p1 = newTempV128();
3669 IRTemp p2 = newTempV128();
3670 IRTemp c1000 = newTempV128();
3671 IRTemp c0100 = newTempV128();
3672 IRTemp c0010 = newTempV128();
3673 IRTemp c0001 = newTempV128();
3674 assign(c1000, mkV128(0xF000));
3675 assign(c0100, mkV128(0x0F00));
3676 assign(c0010, mkV128(0x00F0));
3677 assign(c0001, mkV128(0x000F));
3678 // First interleave them at 32x4 granularity,
3679 // generating partial ("p") values.
3680 math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2);
3681 // And more shuffling around for the final answer
3682 assign(*i2,
3683 OR4( AND( IHI16x8( EX(p2), ROL(EX(p2),4) ), EX(c1000) ),
3684 AND( IHI16x8( ROL(EX(p2),6), EX(p2) ), EX(c0100) ),
3685 AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ),
3686 AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) )
3687 ));
3688 assign(*i1,
3689 OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ),
3690 AND( IHI16x8( EX(p1), ROL(EX(p1),4) ), EX(c0100) ),
3691 AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ),
3692 AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) )
3693 ));
3694 assign(*i0,
3695 OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ),
3696 AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ),
3697 AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ),
3698 AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) )
3699 ));
3700 return;
3701 }
3702
3703 if (laneSzBlg2 == 0) {
3704 // 8x16. It doesn't seem worth the hassle of first doing a
3705 // 16x8 interleave, so just generate all 24 partial results
3706 // directly :-(
3707 // u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
3708 // i2 == Cf Bf Af Ce .. Bb Ab Ca
3709 // i1 == Ba Aa C9 B9 .. A6 C5 B5
3710 // i0 == A5 C4 B4 A4 .. C0 B0 A0
3711
3712 IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128();
3713 IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128();
3714 IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128();
3715 IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128();
3716 IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128();
3717 IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128();
3718 IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128();
3719 IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128();
3720 IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128();
3721
3722 // eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
3723 // of the form 14 bytes junk : CC[0xF] : BB[0xA]
3724 //
3725# define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
3726 IRTemp t_##_tempName = newTempV128(); \
3727 assign(t_##_tempName, \
3728 ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
3729 ROR(EX(_srcVec2),(_srcShift2)) ) )
3730
3731 // Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
3732 IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0;
3733
3734 // The slicing and reassembly are done as interleavedly as possible,
3735 // so as to minimise the demand for registers in the back end, which
3736 // was observed to be a problem in testing.
3737
3738 XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14]
3739 XXXX(AfCe, AA, 0xf, CC, 0xe);
3740 assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe)));
3741
3742 XXXX(BeAe, BB, 0xe, AA, 0xe);
3743 XXXX(CdBd, CC, 0xd, BB, 0xd);
3744 assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd)));
3745 assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98)));
3746
3747 XXXX(AdCc, AA, 0xd, CC, 0xc);
3748 XXXX(BcAc, BB, 0xc, AA, 0xc);
3749 assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc)));
3750
3751 XXXX(CbBb, CC, 0xb, BB, 0xb);
3752 XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0]
3753 assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa)));
3754 assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210)));
3755 assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64)));
3756
3757 XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14]
3758 XXXX(C9B9, CC, 0x9, BB, 0x9);
3759 assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9)));
3760
3761 XXXX(A9C8, AA, 0x9, CC, 0x8);
3762 XXXX(B8A8, BB, 0x8, AA, 0x8);
3763 assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8)));
3764 assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98)));
3765
3766 XXXX(C7B7, CC, 0x7, BB, 0x7);
3767 XXXX(A7C6, AA, 0x7, CC, 0x6);
3768 assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6)));
3769
3770 XXXX(B6A6, BB, 0x6, AA, 0x6);
3771 XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0]
3772 assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5)));
3773 assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210)));
3774 assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64)));
3775
3776 XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14]
3777 XXXX(B4A4, BB, 0x4, AA, 0x4);
3778 assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4)));
3779
3780 XXXX(C3B3, CC, 0x3, BB, 0x3);
3781 XXXX(A3C2, AA, 0x3, CC, 0x2);
3782 assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2)));
3783 assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98)));
3784
3785 XXXX(B2A2, BB, 0x2, AA, 0x2);
3786 XXXX(C1B1, CC, 0x1, BB, 0x1);
3787 assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1)));
3788
3789 XXXX(A1C0, AA, 0x1, CC, 0x0);
3790 XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0]
3791 assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0)));
3792 assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210)));
3793 assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64)));
3794
3795# undef XXXX
3796 return;
3797 }
3798
3799 /*NOTREACHED*/
3800 vassert(0);
3801}
3802
3803
3804/* Do interleaving for 4 128 bit vectors, for ST4 insns. */
3805static
3806void math_INTERLEAVE4_128(
3807 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
3808 UInt laneSzBlg2,
3809 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
3810{
3811 if (laneSzBlg2 == 3) {
3812 // 64x2
3813 assign(*i0, ILO64x2(EX(u1), EX(u0)));
3814 assign(*i1, ILO64x2(EX(u3), EX(u2)));
3815 assign(*i2, IHI64x2(EX(u1), EX(u0)));
3816 assign(*i3, IHI64x2(EX(u3), EX(u2)));
3817 return;
3818 }
3819 if (laneSzBlg2 == 2) {
3820 // 32x4
3821 // First, interleave at the 64-bit lane size.
3822 IRTemp p0 = newTempV128();
3823 IRTemp p1 = newTempV128();
3824 IRTemp p2 = newTempV128();
3825 IRTemp p3 = newTempV128();
3826 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3);
3827 // And interleave (cat) at the 32 bit size.
3828 assign(*i0, CEV32x4(EX(p1), EX(p0)));
3829 assign(*i1, COD32x4(EX(p1), EX(p0)));
3830 assign(*i2, CEV32x4(EX(p3), EX(p2)));
3831 assign(*i3, COD32x4(EX(p3), EX(p2)));
3832 return;
3833 }
3834 if (laneSzBlg2 == 1) {
3835 // 16x8
3836 // First, interleave at the 32-bit lane size.
3837 IRTemp p0 = newTempV128();
3838 IRTemp p1 = newTempV128();
3839 IRTemp p2 = newTempV128();
3840 IRTemp p3 = newTempV128();
3841 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3);
3842 // And rearrange within each vector, to get the right 16 bit lanes.
3843 assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2)));
3844 assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2)));
3845 assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2)));
3846 assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2)));
3847 return;
3848 }
3849 if (laneSzBlg2 == 0) {
3850 // 8x16
3851 // First, interleave at the 16-bit lane size.
3852 IRTemp p0 = newTempV128();
3853 IRTemp p1 = newTempV128();
3854 IRTemp p2 = newTempV128();
3855 IRTemp p3 = newTempV128();
3856 math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3);
3857 // And rearrange within each vector, to get the right 8 bit lanes.
3858 assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0))));
3859 assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1))));
3860 assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2))));
3861 assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3))));
3862 return;
3863 }
3864 /*NOTREACHED*/
3865 vassert(0);
3866}
3867
3868
3869/* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
3870static
3871void math_DEINTERLEAVE1_128( /*OUTx1*/ IRTemp* u0,
3872 UInt laneSzBlg2, IRTemp i0 )
3873{
3874 assign(*u0, mkexpr(i0));
3875}
3876
3877
3878/* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
3879static
3880void math_DEINTERLEAVE2_128( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
3881 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
3882{
3883 /* This is pretty easy, since we have primitives directly to
3884 hand. */
3885 if (laneSzBlg2 == 3) {
3886 // 64x2
3887 // i1 == B1 A1, i0 == B0 A0
3888 // u1 == B1 B0, u0 == A1 A0
3889 assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0)));
3890 assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0)));
3891 return;
3892 }
3893 if (laneSzBlg2 == 2) {
3894 // 32x4
3895 // i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
3896 // u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
3897 assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0)));
3898 assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0)));
3899 return;
3900 }
3901 if (laneSzBlg2 == 1) {
3902 // 16x8
3903 // i0 == B3 A3 B2 A2 B1 A1 B0 A0
3904 // i1 == B7 A7 B6 A6 B5 A5 B4 A4
3905 // u1 == B{7..0}, u0 == A{7..0}
3906 assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0)));
3907 assign(*u1, binop(Iop_CatOddLanes16x8, mkexpr(i1), mkexpr(i0)));
3908 return;
3909 }
3910 if (laneSzBlg2 == 0) {
3911 // 8x16
3912 // i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
3913 // i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
3914 // u1 == B{f..0}, u0 == A{f..0}
3915 assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0)));
3916 assign(*u1, binop(Iop_CatOddLanes8x16, mkexpr(i1), mkexpr(i0)));
3917 return;
3918 }
3919 /*NOTREACHED*/
3920 vassert(0);
3921}
3922
3923
3924/* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
3925static
3926void math_DEINTERLEAVE3_128(
3927 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
3928 UInt laneSzBlg2,
3929 IRTemp i0, IRTemp i1, IRTemp i2 )
3930{
3931 if (laneSzBlg2 == 3) {
3932 // 64x2
3933 // i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
3934 // u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
3935 assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1) ));
3936 assign(*u1, ILO64x2( EX(i2), ROL(EX(i0),8) ));
3937 assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0) ));
3938 return;
3939 }
3940
3941 if (laneSzBlg2 == 2) {
3942 // 32x4
3943 // i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
3944 // p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
3945 // u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
3946 IRTemp t_a1c0b0a0 = newTempV128();
3947 IRTemp t_a2c1b1a1 = newTempV128();
3948 IRTemp t_a3c2b2a2 = newTempV128();
3949 IRTemp t_a0c3b3a3 = newTempV128();
3950 IRTemp p0 = newTempV128();
3951 IRTemp p1 = newTempV128();
3952 IRTemp p2 = newTempV128();
3953 // Compute some intermediate values.
3954 assign(t_a1c0b0a0, EX(i0));
3955 assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4));
3956 assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4));
3957 assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4));
3958 // First deinterleave into lane-pairs
3959 assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0)));
3960 assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)),
3961 IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0))));
3962 assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),1*4), ROR(EX(t_a3c2b2a2),1*4)));
3963 // Then deinterleave at 64x2 granularity.
3964 math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2);
3965 return;
3966 }
3967
3968 if (laneSzBlg2 == 1) {
3969 // 16x8
3970 // u2 == C7 C6 C5 C4 C3 C2 C1 C0
3971 // u1 == B7 B6 B5 B4 B3 B2 B1 B0
3972 // u0 == A7 A6 A5 A4 A3 A2 A1 A0
3973 //
3974 // i2 == C7 B7 A7 C6 B6 A6 C5 B5
3975 // i1 == A5 C4 B4 A4 C4 B3 A3 C2
3976 // i0 == B2 A2 C1 B1 A1 C0 B0 A0
3977 //
3978 // p2 == C7 C6 B7 B6 A7 A6 C5 C4
3979 // p1 == B5 B4 A5 A4 C3 C2 B3 B2
3980 // p0 == A3 A2 C1 C0 B1 B0 A1 A0
3981
3982 IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111;
3983 s0 = s1 = s2 = s3
3984 = t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID;
3985 newTempsV128_4(&s0, &s1, &s2, &s3);
3986 newTempsV128_4(&t0, &t1, &t2, &t3);
3987 newTempsV128_4(&p0, &p1, &p2, &c00111111);
3988
3989 // s0 == b2a2 c1b1a1 c0b0a0
3990 // s1 == b4a4 c3b3c3 c2b2a2
3991 // s2 == b6a6 c5b5a5 c4b4a4
3992 // s3 == b0a0 c7b7a7 c6b6a6
3993 assign(s0, EX(i0));
3994 assign(s1, SL(EX(i1),EX(i0),6*2));
3995 assign(s2, SL(EX(i2),EX(i1),4*2));
3996 assign(s3, SL(EX(i0),EX(i2),2*2));
3997
3998 // t0 == 0 0 c1c0 b1b0 a1a0
3999 // t1 == 0 0 c3c2 b3b2 a3a2
4000 // t2 == 0 0 c5c4 b5b4 a5a4
4001 // t3 == 0 0 c7c6 b7b6 a7a6
4002 assign(c00111111, mkV128(0x0FFF));
4003 assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111)));
4004 assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111)));
4005 assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111)));
4006 assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111)));
4007
4008 assign(p0, OR2(EX(t0), SHL(EX(t1),6*2)));
4009 assign(p1, OR2(SHL(EX(t2),4*2), SHR(EX(t1),2*2)));
4010 assign(p2, OR2(SHL(EX(t3),2*2), SHR(EX(t2),4*2)));
4011
4012 // Then deinterleave at 32x4 granularity.
4013 math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2);
4014 return;
4015 }
4016
4017 if (laneSzBlg2 == 0) {
4018 // 8x16. This is the same scheme as for 16x8, with twice the
4019 // number of intermediate values.
4020 //
4021 // u2 == C{f..0}
4022 // u1 == B{f..0}
4023 // u0 == A{f..0}
4024 //
4025 // i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
4026 // i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
4027 // i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4028 //
4029 // p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
4030 // p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
4031 // p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
4032 //
4033 IRTemp s0, s1, s2, s3, s4, s5, s6, s7,
4034 t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK;
4035 s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7
4036 = t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK
4037 = IRTemp_INVALID;
4038 newTempsV128_4(&s0, &s1, &s2, &s3);
4039 newTempsV128_4(&s4, &s5, &s6, &s7);
4040 newTempsV128_4(&t0, &t1, &t2, &t3);
4041 newTempsV128_4(&t4, &t5, &t6, &t7);
4042 newTempsV128_4(&p0, &p1, &p2, &cMASK);
4043
4044 // s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
4045 // s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
4046 // s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
4047 // s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
4048 // s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
4049 // s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
4050 // s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
4051 // s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
4052 assign(s0, SL(EX(i1),EX(i0), 0));
4053 assign(s1, SL(EX(i1),EX(i0), 6));
4054 assign(s2, SL(EX(i1),EX(i0),12));
4055 assign(s3, SL(EX(i2),EX(i1), 2));
4056 assign(s4, SL(EX(i2),EX(i1), 8));
4057 assign(s5, SL(EX(i2),EX(i1),14));
4058 assign(s6, SL(EX(i0),EX(i2), 4));
4059 assign(s7, SL(EX(i0),EX(i2),10));
4060
4061 // t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
4062 // t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
4063 // t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
4064 // t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
4065 // t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
4066 // t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
4067 // t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
4068 // t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
4069 assign(cMASK, mkV128(0x003F));
4070 assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK)));
4071 assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK)));
4072 assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK)));
4073 assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK)));
4074 assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK)));
4075 assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK)));
4076 assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK)));
4077 assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK)));
4078
4079 assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) ));
4080 assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8),
4081 SHL(EX(t3),2), SHR(EX(t2),4) ));
4082 assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) ));
4083
4084 // Then deinterleave at 16x8 granularity.
4085 math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2);
4086 return;
4087 }
4088
4089 /*NOTREACHED*/
4090 vassert(0);
4091}
4092
4093
4094/* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
4095static
4096void math_DEINTERLEAVE4_128(
4097 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4098 UInt laneSzBlg2,
4099 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4100{
4101 if (laneSzBlg2 == 3) {
4102 // 64x2
4103 assign(*u0, ILO64x2(EX(i2), EX(i0)));
4104 assign(*u1, IHI64x2(EX(i2), EX(i0)));
4105 assign(*u2, ILO64x2(EX(i3), EX(i1)));
4106 assign(*u3, IHI64x2(EX(i3), EX(i1)));
4107 return;
4108 }
4109 if (laneSzBlg2 == 2) {
4110 // 32x4
4111 IRTemp p0 = newTempV128();
4112 IRTemp p2 = newTempV128();
4113 IRTemp p1 = newTempV128();
4114 IRTemp p3 = newTempV128();
4115 assign(p0, ILO32x4(EX(i1), EX(i0)));
4116 assign(p1, IHI32x4(EX(i1), EX(i0)));
4117 assign(p2, ILO32x4(EX(i3), EX(i2)));
4118 assign(p3, IHI32x4(EX(i3), EX(i2)));
4119 // And now do what we did for the 64-bit case.
4120 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3);
4121 return;
4122 }
4123 if (laneSzBlg2 == 1) {
4124 // 16x8
4125 // Deinterleave into 32-bit chunks, then do as the 32-bit case.
4126 IRTemp p0 = newTempV128();
4127 IRTemp p1 = newTempV128();
4128 IRTemp p2 = newTempV128();
4129 IRTemp p3 = newTempV128();
4130 assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8)));
4131 assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8)));
4132 assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8)));
4133 assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8)));
4134 // From here on is like the 32 bit case.
4135 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3);
4136 return;
4137 }
4138 if (laneSzBlg2 == 0) {
4139 // 8x16
4140 // Deinterleave into 16-bit chunks, then do as the 16-bit case.
4141 IRTemp p0 = newTempV128();
4142 IRTemp p1 = newTempV128();
4143 IRTemp p2 = newTempV128();
4144 IRTemp p3 = newTempV128();
4145 assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)),
4146 ILO8x16(EX(i0),ROL(EX(i0),4)) ));
4147 assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)),
4148 ILO8x16(EX(i1),ROL(EX(i1),4)) ));
4149 assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)),
4150 ILO8x16(EX(i2),ROL(EX(i2),4)) ));
4151 assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)),
4152 ILO8x16(EX(i3),ROL(EX(i3),4)) ));
4153 // From here on is like the 16 bit case.
4154 math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3);
4155 return;
4156 }
4157 /*NOTREACHED*/
4158 vassert(0);
4159}
4160
4161
4162/* Wrappers that use the full-width (de)interleavers to do half-width
4163 (de)interleaving. The scheme is to clone each input lane in the
4164 lower half of each incoming value, do a full width (de)interleave
4165 at the next lane size up, and remove every other lane of the the
4166 result. The returned values may have any old junk in the upper
4167 64 bits -- the caller must ignore that. */
4168
4169/* Helper function -- get doubling and narrowing operations. */
4170static
4171void math_get_doubler_and_halver ( /*OUT*/IROp* doubler,
4172 /*OUT*/IROp* halver,
4173 UInt laneSzBlg2 )
4174{
4175 switch (laneSzBlg2) {
4176 case 2:
4177 *doubler = Iop_InterleaveLO32x4; *halver = Iop_CatEvenLanes32x4;
4178 break;
4179 case 1:
4180 *doubler = Iop_InterleaveLO16x8; *halver = Iop_CatEvenLanes16x8;
4181 break;
4182 case 0:
4183 *doubler = Iop_InterleaveLO8x16; *halver = Iop_CatEvenLanes8x16;
4184 break;
4185 default:
4186 vassert(0);
4187 }
4188}
4189
4190/* Do interleaving for 1 64 bit vector, for ST1 insns. */
4191static
4192void math_INTERLEAVE1_64( /*OUTx1*/ IRTemp* i0,
4193 UInt laneSzBlg2, IRTemp u0 )
4194{
4195 assign(*i0, mkexpr(u0));
4196}
4197
4198
4199/* Do interleaving for 2 64 bit vectors, for ST2 insns. */
4200static
4201void math_INTERLEAVE2_64( /*OUTx2*/ IRTemp* i0, IRTemp* i1,
4202 UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
4203{
4204 if (laneSzBlg2 == 3) {
4205 // 1x64, degenerate case
4206 assign(*i0, EX(u0));
4207 assign(*i1, EX(u1));
4208 return;
4209 }
4210
4211 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4212 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4213 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4214
4215 IRTemp du0 = newTempV128();
4216 IRTemp du1 = newTempV128();
4217 assign(du0, binop(doubler, EX(u0), EX(u0)));
4218 assign(du1, binop(doubler, EX(u1), EX(u1)));
4219 IRTemp di0 = newTempV128();
4220 IRTemp di1 = newTempV128();
4221 math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1);
4222 assign(*i0, binop(halver, EX(di0), EX(di0)));
4223 assign(*i1, binop(halver, EX(di1), EX(di1)));
4224}
4225
4226
4227/* Do interleaving for 3 64 bit vectors, for ST3 insns. */
4228static
4229void math_INTERLEAVE3_64(
4230 /*OUTx3*/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
4231 UInt laneSzBlg2,
4232 IRTemp u0, IRTemp u1, IRTemp u2 )
4233{
4234 if (laneSzBlg2 == 3) {
4235 // 1x64, degenerate case
4236 assign(*i0, EX(u0));
4237 assign(*i1, EX(u1));
4238 assign(*i2, EX(u2));
4239 return;
4240 }
4241
4242 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4243 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4244 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4245
4246 IRTemp du0 = newTempV128();
4247 IRTemp du1 = newTempV128();
4248 IRTemp du2 = newTempV128();
4249 assign(du0, binop(doubler, EX(u0), EX(u0)));
4250 assign(du1, binop(doubler, EX(u1), EX(u1)));
4251 assign(du2, binop(doubler, EX(u2), EX(u2)));
4252 IRTemp di0 = newTempV128();
4253 IRTemp di1 = newTempV128();
4254 IRTemp di2 = newTempV128();
4255 math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2);
4256 assign(*i0, binop(halver, EX(di0), EX(di0)));
4257 assign(*i1, binop(halver, EX(di1), EX(di1)));
4258 assign(*i2, binop(halver, EX(di2), EX(di2)));
4259}
4260
4261
4262/* Do interleaving for 4 64 bit vectors, for ST4 insns. */
4263static
4264void math_INTERLEAVE4_64(
4265 /*OUTx4*/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
4266 UInt laneSzBlg2,
4267 IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
4268{
4269 if (laneSzBlg2 == 3) {
4270 // 1x64, degenerate case
4271 assign(*i0, EX(u0));
4272 assign(*i1, EX(u1));
4273 assign(*i2, EX(u2));
4274 assign(*i3, EX(u3));
4275 return;
4276 }
4277
4278 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4279 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4280 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4281
4282 IRTemp du0 = newTempV128();
4283 IRTemp du1 = newTempV128();
4284 IRTemp du2 = newTempV128();
4285 IRTemp du3 = newTempV128();
4286 assign(du0, binop(doubler, EX(u0), EX(u0)));
4287 assign(du1, binop(doubler, EX(u1), EX(u1)));
4288 assign(du2, binop(doubler, EX(u2), EX(u2)));
4289 assign(du3, binop(doubler, EX(u3), EX(u3)));
4290 IRTemp di0 = newTempV128();
4291 IRTemp di1 = newTempV128();
4292 IRTemp di2 = newTempV128();
4293 IRTemp di3 = newTempV128();
4294 math_INTERLEAVE4_128(&di0, &di1, &di2, &di3,
4295 laneSzBlg2 + 1, du0, du1, du2, du3);
4296 assign(*i0, binop(halver, EX(di0), EX(di0)));
4297 assign(*i1, binop(halver, EX(di1), EX(di1)));
4298 assign(*i2, binop(halver, EX(di2), EX(di2)));
4299 assign(*i3, binop(halver, EX(di3), EX(di3)));
4300}
4301
4302
4303/* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
4304static
4305void math_DEINTERLEAVE1_64( /*OUTx1*/ IRTemp* u0,
4306 UInt laneSzBlg2, IRTemp i0 )
4307{
4308 assign(*u0, mkexpr(i0));
4309}
4310
4311
4312/* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
4313static
4314void math_DEINTERLEAVE2_64( /*OUTx2*/ IRTemp* u0, IRTemp* u1,
4315 UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
4316{
4317 if (laneSzBlg2 == 3) {
4318 // 1x64, degenerate case
4319 assign(*u0, EX(i0));
4320 assign(*u1, EX(i1));
4321 return;
4322 }
4323
4324 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4325 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4326 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4327
4328 IRTemp di0 = newTempV128();
4329 IRTemp di1 = newTempV128();
4330 assign(di0, binop(doubler, EX(i0), EX(i0)));
4331 assign(di1, binop(doubler, EX(i1), EX(i1)));
4332
4333 IRTemp du0 = newTempV128();
4334 IRTemp du1 = newTempV128();
4335 math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1);
4336 assign(*u0, binop(halver, EX(du0), EX(du0)));
4337 assign(*u1, binop(halver, EX(du1), EX(du1)));
4338}
4339
4340
4341/* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
4342static
4343void math_DEINTERLEAVE3_64(
4344 /*OUTx3*/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
4345 UInt laneSzBlg2,
4346 IRTemp i0, IRTemp i1, IRTemp i2 )
4347{
4348 if (laneSzBlg2 == 3) {
4349 // 1x64, degenerate case
4350 assign(*u0, EX(i0));
4351 assign(*u1, EX(i1));
4352 assign(*u2, EX(i2));
4353 return;
4354 }
4355
4356 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4357 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4358 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4359
4360 IRTemp di0 = newTempV128();
4361 IRTemp di1 = newTempV128();
4362 IRTemp di2 = newTempV128();
4363 assign(di0, binop(doubler, EX(i0), EX(i0)));
4364 assign(di1, binop(doubler, EX(i1), EX(i1)));
4365 assign(di2, binop(doubler, EX(i2), EX(i2)));
4366 IRTemp du0 = newTempV128();
4367 IRTemp du1 = newTempV128();
4368 IRTemp du2 = newTempV128();
4369 math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2);
4370 assign(*u0, binop(halver, EX(du0), EX(du0)));
4371 assign(*u1, binop(halver, EX(du1), EX(du1)));
4372 assign(*u2, binop(halver, EX(du2), EX(du2)));
4373}
4374
4375
4376/* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
4377static
4378void math_DEINTERLEAVE4_64(
4379 /*OUTx4*/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
4380 UInt laneSzBlg2,
4381 IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
4382{
4383 if (laneSzBlg2 == 3) {
4384 // 1x64, degenerate case
4385 assign(*u0, EX(i0));
4386 assign(*u1, EX(i1));
4387 assign(*u2, EX(i2));
4388 assign(*u3, EX(i3));
4389 return;
4390 }
4391
4392 vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
4393 IROp doubler = Iop_INVALID, halver = Iop_INVALID;
4394 math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
4395
4396 IRTemp di0 = newTempV128();
4397 IRTemp di1 = newTempV128();
4398 IRTemp di2 = newTempV128();
4399 IRTemp di3 = newTempV128();
4400 assign(di0, binop(doubler, EX(i0), EX(i0)));
4401 assign(di1, binop(doubler, EX(i1), EX(i1)));
4402 assign(di2, binop(doubler, EX(i2), EX(i2)));
4403 assign(di3, binop(doubler, EX(i3), EX(i3)));
4404 IRTemp du0 = newTempV128();
4405 IRTemp du1 = newTempV128();
4406 IRTemp du2 = newTempV128();
4407 IRTemp du3 = newTempV128();
4408 math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3,
4409 laneSzBlg2 + 1, di0, di1, di2, di3);
4410 assign(*u0, binop(halver, EX(du0), EX(du0)));
4411 assign(*u1, binop(halver, EX(du1), EX(du1)));
4412 assign(*u2, binop(halver, EX(du2), EX(du2)));
4413 assign(*u3, binop(halver, EX(du3), EX(du3)));
4414}
4415
4416
4417#undef EX
4418#undef SL
4419#undef ROR
4420#undef ROL
4421#undef SHR
4422#undef SHL
4423#undef ILO64x2
4424#undef IHI64x2
4425#undef ILO32x4
4426#undef IHI32x4
4427#undef ILO16x8
4428#undef IHI16x8
4429#undef ILO16x8
4430#undef IHI16x8
4431#undef CEV32x4
4432#undef COD32x4
4433#undef COD16x8
4434#undef COD8x16
4435#undef CEV8x16
4436#undef AND
4437#undef OR2
4438#undef OR3
4439#undef OR4
4440
4441
4442/*------------------------------------------------------------*/
sewardjbbcf1882014-01-12 12:49:10 +00004443/*--- Load and Store instructions ---*/
4444/*------------------------------------------------------------*/
4445
4446/* Generate the EA for a "reg + reg" style amode. This is done from
4447 parts of the insn, but for sanity checking sake it takes the whole
4448 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
4449 and S=insn[12]:
4450
4451 The possible forms, along with their opt:S values, are:
4452 011:0 Xn|SP + Xm
4453 111:0 Xn|SP + Xm
4454 011:1 Xn|SP + Xm * transfer_szB
4455 111:1 Xn|SP + Xm * transfer_szB
4456 010:0 Xn|SP + 32Uto64(Wm)
4457 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
4458 110:0 Xn|SP + 32Sto64(Wm)
4459 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
4460
4461 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
4462 the transfer size is insn[23,31,30]. For integer loads/stores,
4463 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
4464
4465 If the decoding fails, it returns IRTemp_INVALID.
4466
4467 isInt is True iff this is decoding is for transfers to/from integer
4468 registers. If False it is for transfers to/from vector registers.
4469*/
4470static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
4471{
4472 UInt optS = SLICE_UInt(insn, 15, 12);
4473 UInt mm = SLICE_UInt(insn, 20, 16);
4474 UInt nn = SLICE_UInt(insn, 9, 5);
4475 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
4476 | SLICE_UInt(insn, 31, 30); // Log2 of the size
4477
4478 buf[0] = 0;
4479
4480 /* Sanity checks, that this really is a load/store insn. */
4481 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
4482 goto fail;
4483
4484 if (isInt
4485 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
4486 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
4487 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
4488 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
4489 goto fail;
4490
4491 if (!isInt
4492 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
4493 goto fail;
4494
4495 /* Throw out non-verified but possibly valid cases. */
4496 switch (szLg2) {
4497 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
4498 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
4499 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
4500 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
4501 case BITS3(1,0,0): // can only ever be valid for the vector case
sewardj208a7762014-10-22 13:52:51 +00004502 if (isInt) goto fail; else break;
sewardjbbcf1882014-01-12 12:49:10 +00004503 case BITS3(1,0,1): // these sizes are never valid
4504 case BITS3(1,1,0):
4505 case BITS3(1,1,1): goto fail;
4506
4507 default: vassert(0);
4508 }
4509
4510 IRExpr* rhs = NULL;
4511 switch (optS) {
4512 case BITS4(1,1,1,0): goto fail; //ATC
4513 case BITS4(0,1,1,0):
4514 rhs = getIReg64orZR(mm);
4515 vex_sprintf(buf, "[%s, %s]",
4516 nameIReg64orZR(nn), nameIReg64orZR(mm));
4517 break;
4518 case BITS4(1,1,1,1): goto fail; //ATC
4519 case BITS4(0,1,1,1):
4520 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
4521 vex_sprintf(buf, "[%s, %s lsl %u]",
4522 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
4523 break;
4524 case BITS4(0,1,0,0):
4525 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
4526 vex_sprintf(buf, "[%s, %s uxtx]",
4527 nameIReg64orZR(nn), nameIReg32orZR(mm));
4528 break;
4529 case BITS4(0,1,0,1):
4530 rhs = binop(Iop_Shl64,
4531 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
4532 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
4533 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4534 break;
4535 case BITS4(1,1,0,0):
4536 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
4537 vex_sprintf(buf, "[%s, %s sxtx]",
4538 nameIReg64orZR(nn), nameIReg32orZR(mm));
4539 break;
4540 case BITS4(1,1,0,1):
4541 rhs = binop(Iop_Shl64,
4542 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
4543 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
4544 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
4545 break;
4546 default:
4547 /* The rest appear to be genuinely invalid */
4548 goto fail;
4549 }
4550
4551 vassert(rhs);
4552 IRTemp res = newTemp(Ity_I64);
4553 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
4554 return res;
4555
4556 fail:
4557 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
4558 return IRTemp_INVALID;
4559}
4560
4561
4562/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
4563 bits of DATAE :: Ity_I64. */
4564static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
4565{
4566 IRExpr* addrE = mkexpr(addr);
4567 switch (szB) {
4568 case 8:
4569 storeLE(addrE, dataE);
4570 break;
4571 case 4:
4572 storeLE(addrE, unop(Iop_64to32, dataE));
4573 break;
4574 case 2:
4575 storeLE(addrE, unop(Iop_64to16, dataE));
4576 break;
4577 case 1:
4578 storeLE(addrE, unop(Iop_64to8, dataE));
4579 break;
4580 default:
4581 vassert(0);
4582 }
4583}
4584
4585
4586/* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
4587 placing the result in an Ity_I64 temporary. */
4588static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
4589{
4590 IRTemp res = newTemp(Ity_I64);
4591 IRExpr* addrE = mkexpr(addr);
4592 switch (szB) {
4593 case 8:
4594 assign(res, loadLE(Ity_I64,addrE));
4595 break;
4596 case 4:
4597 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
4598 break;
4599 case 2:
4600 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
4601 break;
4602 case 1:
4603 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
4604 break;
4605 default:
4606 vassert(0);
4607 }
4608 return res;
4609}
4610
4611
sewardj18bf5172014-06-14 18:05:30 +00004612/* Generate a "standard 7" name, from bitQ and size. But also
4613 allow ".1d" since that's occasionally useful. */
4614static
4615const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
4616{
4617 vassert(bitQ <= 1 && size <= 3);
4618 const HChar* nms[8]
sewardj25523c42014-06-15 19:36:29 +00004619 = { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
sewardj18bf5172014-06-14 18:05:30 +00004620 UInt ix = (bitQ << 2) | size;
4621 vassert(ix < 8);
4622 return nms[ix];
4623}
4624
4625
sewardjbbcf1882014-01-12 12:49:10 +00004626static
4627Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
4628{
4629# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4630
4631 /* ------------ LDR,STR (immediate, uimm12) ----------- */
4632 /* uimm12 is scaled by the transfer size
4633
4634 31 29 26 21 9 4
4635 | | | | | |
4636 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
4637 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
4638
4639 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
4640 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
4641
4642 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
4643 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
4644
4645 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
4646 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
4647 */
4648 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
4649 UInt szLg2 = INSN(31,30);
4650 UInt szB = 1 << szLg2;
4651 Bool isLD = INSN(22,22) == 1;
4652 UInt offs = INSN(21,10) * szB;
4653 UInt nn = INSN(9,5);
4654 UInt tt = INSN(4,0);
4655 IRTemp ta = newTemp(Ity_I64);
4656 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
4657 if (nn == 31) { /* FIXME generate stack alignment check */ }
4658 vassert(szLg2 < 4);
4659 if (isLD) {
4660 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
4661 } else {
4662 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
4663 }
4664 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
4665 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
4666 DIP("%s %s, [%s, #%u]\n",
4667 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
4668 nameIReg64orSP(nn), offs);
4669 return True;
4670 }
4671
4672 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
4673 /*
4674 31 29 26 20 11 9 4
4675 | | | | | | |
4676 (at-Rn-then-Rn=EA) | | |
4677 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
4678 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
4679
4680 (at-EA-then-Rn=EA)
4681 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
4682 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
4683
4684 (at-EA)
4685 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
4686 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
4687
4688 simm9 is unscaled.
4689
4690 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
4691 load case this is because would create two competing values for
4692 Rt. In the store case the reason is unclear, but the spec
4693 disallows it anyway.
4694
4695 Stores are narrowing, loads are unsigned widening. sz encodes
4696 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
4697 */
4698 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
4699 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
4700 UInt szLg2 = INSN(31,30);
4701 UInt szB = 1 << szLg2;
4702 Bool isLoad = INSN(22,22) == 1;
4703 UInt imm9 = INSN(20,12);
4704 UInt nn = INSN(9,5);
4705 UInt tt = INSN(4,0);
4706 Bool wBack = INSN(10,10) == 1;
4707 UInt how = INSN(11,10);
4708 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
4709 /* undecodable; fall through */
4710 } else {
4711 if (nn == 31) { /* FIXME generate stack alignment check */ }
4712
4713 // Compute the transfer address TA and the writeback address WA.
4714 IRTemp tRN = newTemp(Ity_I64);
4715 assign(tRN, getIReg64orSP(nn));
4716 IRTemp tEA = newTemp(Ity_I64);
4717 Long simm9 = (Long)sx_to_64(imm9, 9);
4718 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
4719
4720 IRTemp tTA = newTemp(Ity_I64);
4721 IRTemp tWA = newTemp(Ity_I64);
4722 switch (how) {
4723 case BITS2(0,1):
4724 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4725 case BITS2(1,1):
4726 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4727 case BITS2(0,0):
4728 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4729 default:
4730 vassert(0); /* NOTREACHED */
4731 }
4732
sewardje0bff8b2014-03-09 09:40:23 +00004733 /* Normally rN would be updated after the transfer. However, in
4734 the special case typifed by
4735 str x30, [sp,#-16]!
4736 it is necessary to update SP before the transfer, (1)
4737 because Memcheck will otherwise complain about a write
4738 below the stack pointer, and (2) because the segfault
4739 stack extension mechanism will otherwise extend the stack
4740 only down to SP before the instruction, which might not be
4741 far enough, if the -16 bit takes the actual access
4742 address to the next page.
4743 */
4744 Bool earlyWBack
4745 = wBack && simm9 < 0 && szB == 8
4746 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
4747
4748 if (wBack && earlyWBack)
4749 putIReg64orSP(nn, mkexpr(tEA));
4750
sewardjbbcf1882014-01-12 12:49:10 +00004751 if (isLoad) {
4752 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
4753 } else {
4754 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
4755 }
4756
sewardje0bff8b2014-03-09 09:40:23 +00004757 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00004758 putIReg64orSP(nn, mkexpr(tEA));
4759
4760 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
4761 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
4762 const HChar* fmt_str = NULL;
4763 switch (how) {
4764 case BITS2(0,1):
4765 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4766 break;
4767 case BITS2(1,1):
4768 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4769 break;
4770 case BITS2(0,0):
4771 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
4772 break;
4773 default:
4774 vassert(0);
4775 }
4776 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
4777 nameIRegOrZR(szB == 8, tt),
4778 nameIReg64orSP(nn), simm9);
4779 return True;
4780 }
4781 }
4782
4783 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
4784 /* L==1 => mm==LD
4785 L==0 => mm==ST
4786 x==0 => 32 bit transfers, and zero extended loads
4787 x==1 => 64 bit transfers
4788 simm7 is scaled by the (single-register) transfer size
4789
4790 (at-Rn-then-Rn=EA)
4791 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
4792
4793 (at-EA-then-Rn=EA)
4794 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
4795
4796 (at-EA)
4797 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
4798 */
4799
4800 UInt insn_30_23 = INSN(30,23);
4801 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
4802 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
4803 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
4804 UInt bL = INSN(22,22);
4805 UInt bX = INSN(31,31);
4806 UInt bWBack = INSN(23,23);
4807 UInt rT1 = INSN(4,0);
4808 UInt rN = INSN(9,5);
4809 UInt rT2 = INSN(14,10);
4810 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
4811 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
4812 || (bL && rT1 == rT2)) {
4813 /* undecodable; fall through */
4814 } else {
4815 if (rN == 31) { /* FIXME generate stack alignment check */ }
4816
4817 // Compute the transfer address TA and the writeback address WA.
4818 IRTemp tRN = newTemp(Ity_I64);
4819 assign(tRN, getIReg64orSP(rN));
4820 IRTemp tEA = newTemp(Ity_I64);
4821 simm7 = (bX ? 8 : 4) * simm7;
4822 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
4823
4824 IRTemp tTA = newTemp(Ity_I64);
4825 IRTemp tWA = newTemp(Ity_I64);
4826 switch (INSN(24,23)) {
4827 case BITS2(0,1):
4828 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
4829 case BITS2(1,1):
4830 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
4831 case BITS2(1,0):
4832 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
4833 default:
4834 vassert(0); /* NOTREACHED */
4835 }
4836
4837 /* Normally rN would be updated after the transfer. However, in
4838 the special case typifed by
4839 stp x29, x30, [sp,#-112]!
4840 it is necessary to update SP before the transfer, (1)
4841 because Memcheck will otherwise complain about a write
4842 below the stack pointer, and (2) because the segfault
4843 stack extension mechanism will otherwise extend the stack
4844 only down to SP before the instruction, which might not be
4845 far enough, if the -112 bit takes the actual access
4846 address to the next page.
4847 */
4848 Bool earlyWBack
4849 = bWBack && simm7 < 0
4850 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
4851
4852 if (bWBack && earlyWBack)
4853 putIReg64orSP(rN, mkexpr(tEA));
4854
4855 /**/ if (bL == 1 && bX == 1) {
4856 // 64 bit load
4857 putIReg64orZR(rT1, loadLE(Ity_I64,
4858 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4859 putIReg64orZR(rT2, loadLE(Ity_I64,
4860 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
4861 } else if (bL == 1 && bX == 0) {
sewardjbbcf1882014-01-12 12:49:10 +00004862 // 32 bit load
4863 putIReg32orZR(rT1, loadLE(Ity_I32,
4864 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
4865 putIReg32orZR(rT2, loadLE(Ity_I32,
4866 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
4867 } else if (bL == 0 && bX == 1) {
4868 // 64 bit store
4869 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4870 getIReg64orZR(rT1));
4871 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
4872 getIReg64orZR(rT2));
4873 } else {
4874 vassert(bL == 0 && bX == 0);
sewardjbbcf1882014-01-12 12:49:10 +00004875 // 32 bit store
4876 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
4877 getIReg32orZR(rT1));
4878 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
4879 getIReg32orZR(rT2));
4880 }
4881
4882 if (bWBack && !earlyWBack)
4883 putIReg64orSP(rN, mkexpr(tEA));
4884
4885 const HChar* fmt_str = NULL;
4886 switch (INSN(24,23)) {
4887 case BITS2(0,1):
4888 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
4889 break;
4890 case BITS2(1,1):
4891 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
4892 break;
4893 case BITS2(1,0):
4894 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
4895 break;
4896 default:
4897 vassert(0);
4898 }
4899 DIP(fmt_str, bL == 0 ? "st" : "ld",
4900 nameIRegOrZR(bX == 1, rT1),
4901 nameIRegOrZR(bX == 1, rT2),
4902 nameIReg64orSP(rN), simm7);
4903 return True;
4904 }
4905 }
4906
4907 /* ---------------- LDR (literal, int reg) ---------------- */
4908 /* 31 29 23 4
4909 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
4910 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
4911 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
4912 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
4913 Just handles the first two cases for now.
4914 */
4915 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
4916 UInt imm19 = INSN(23,5);
4917 UInt rT = INSN(4,0);
4918 UInt bX = INSN(30,30);
4919 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
4920 if (bX) {
4921 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
4922 } else {
4923 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
4924 }
4925 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
4926 return True;
4927 }
4928
4929 /* -------------- {LD,ST}R (integer register) --------------- */
4930 /* 31 29 20 15 12 11 9 4
4931 | | | | | | | |
4932 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
4933 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
4934 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
4935 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
4936
4937 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
4938 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
4939 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
4940 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
4941 */
4942 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
4943 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
4944 HChar dis_buf[64];
4945 UInt szLg2 = INSN(31,30);
4946 Bool isLD = INSN(22,22) == 1;
4947 UInt tt = INSN(4,0);
4948 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
4949 if (ea != IRTemp_INVALID) {
4950 switch (szLg2) {
4951 case 3: /* 64 bit */
4952 if (isLD) {
4953 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
4954 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
4955 } else {
4956 storeLE(mkexpr(ea), getIReg64orZR(tt));
4957 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
4958 }
4959 break;
4960 case 2: /* 32 bit */
4961 if (isLD) {
4962 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
4963 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
4964 } else {
4965 storeLE(mkexpr(ea), getIReg32orZR(tt));
4966 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
4967 }
4968 break;
4969 case 1: /* 16 bit */
4970 if (isLD) {
4971 putIReg64orZR(tt, unop(Iop_16Uto64,
4972 loadLE(Ity_I16, mkexpr(ea))));
4973 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
4974 } else {
4975 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
4976 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
4977 }
4978 break;
4979 case 0: /* 8 bit */
4980 if (isLD) {
4981 putIReg64orZR(tt, unop(Iop_8Uto64,
4982 loadLE(Ity_I8, mkexpr(ea))));
4983 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
4984 } else {
4985 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
4986 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
4987 }
4988 break;
4989 default:
4990 vassert(0);
4991 }
4992 return True;
4993 }
4994 }
4995
4996 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
4997 /* 31 29 26 23 21 9 4
4998 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
4999 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
5000 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
5001 where
5002 Rt is Wt when x==1, Xt when x==0
5003 */
5004 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
5005 /* Further checks on bits 31:30 and 22 */
5006 Bool valid = False;
5007 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5008 case BITS3(1,0,0):
5009 case BITS3(0,1,0): case BITS3(0,1,1):
5010 case BITS3(0,0,0): case BITS3(0,0,1):
5011 valid = True;
5012 break;
5013 }
5014 if (valid) {
5015 UInt szLg2 = INSN(31,30);
5016 UInt bitX = INSN(22,22);
5017 UInt imm12 = INSN(21,10);
5018 UInt nn = INSN(9,5);
5019 UInt tt = INSN(4,0);
5020 UInt szB = 1 << szLg2;
5021 IRExpr* ea = binop(Iop_Add64,
5022 getIReg64orSP(nn), mkU64(imm12 * szB));
5023 switch (szB) {
5024 case 4:
5025 vassert(bitX == 0);
5026 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
5027 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
5028 nameIReg64orSP(nn), imm12 * szB);
5029 break;
5030 case 2:
5031 if (bitX == 1) {
5032 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
5033 } else {
5034 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
5035 }
5036 DIP("ldrsh %s, [%s, #%u]\n",
5037 nameIRegOrZR(bitX == 0, tt),
5038 nameIReg64orSP(nn), imm12 * szB);
5039 break;
5040 case 1:
5041 if (bitX == 1) {
5042 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
5043 } else {
5044 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
5045 }
5046 DIP("ldrsb %s, [%s, #%u]\n",
5047 nameIRegOrZR(bitX == 0, tt),
5048 nameIReg64orSP(nn), imm12 * szB);
5049 break;
5050 default:
5051 vassert(0);
5052 }
5053 return True;
5054 }
5055 /* else fall through */
5056 }
5057
5058 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
5059 /* (at-Rn-then-Rn=EA)
5060 31 29 23 21 20 11 9 4
5061 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
5062 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
5063 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
5064
5065 (at-EA-then-Rn=EA)
5066 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
5067 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
5068 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
5069 where
5070 Rt is Wt when x==1, Xt when x==0
5071 transfer-at-Rn when [11]==0, at EA when [11]==1
5072 */
5073 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5074 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5075 /* Further checks on bits 31:30 and 22 */
5076 Bool valid = False;
5077 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5078 case BITS3(1,0,0): // LDRSW Xt
5079 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
5080 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
5081 valid = True;
5082 break;
5083 }
5084 if (valid) {
5085 UInt szLg2 = INSN(31,30);
5086 UInt imm9 = INSN(20,12);
5087 Bool atRN = INSN(11,11) == 0;
5088 UInt nn = INSN(9,5);
5089 UInt tt = INSN(4,0);
5090 IRTemp tRN = newTemp(Ity_I64);
5091 IRTemp tEA = newTemp(Ity_I64);
5092 IRTemp tTA = IRTemp_INVALID;
5093 ULong simm9 = sx_to_64(imm9, 9);
5094 Bool is64 = INSN(22,22) == 0;
5095 assign(tRN, getIReg64orSP(nn));
5096 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5097 tTA = atRN ? tRN : tEA;
5098 HChar ch = '?';
5099 /* There are 5 cases:
5100 byte load, SX to 64
5101 byte load, SX to 32, ZX to 64
5102 halfword load, SX to 64
5103 halfword load, SX to 32, ZX to 64
5104 word load, SX to 64
5105 The ifs below handle them in the listed order.
5106 */
5107 if (szLg2 == 0) {
5108 ch = 'b';
5109 if (is64) {
5110 putIReg64orZR(tt, unop(Iop_8Sto64,
5111 loadLE(Ity_I8, mkexpr(tTA))));
5112 } else {
5113 putIReg32orZR(tt, unop(Iop_8Sto32,
5114 loadLE(Ity_I8, mkexpr(tTA))));
5115 }
5116 }
5117 else if (szLg2 == 1) {
5118 ch = 'h';
5119 if (is64) {
5120 putIReg64orZR(tt, unop(Iop_16Sto64,
5121 loadLE(Ity_I16, mkexpr(tTA))));
5122 } else {
5123 putIReg32orZR(tt, unop(Iop_16Sto32,
5124 loadLE(Ity_I16, mkexpr(tTA))));
5125 }
5126 }
5127 else if (szLg2 == 2 && is64) {
5128 ch = 'w';
5129 putIReg64orZR(tt, unop(Iop_32Sto64,
5130 loadLE(Ity_I32, mkexpr(tTA))));
5131 }
5132 else {
5133 vassert(0);
5134 }
5135 putIReg64orSP(nn, mkexpr(tEA));
5136 DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
5137 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5138 return True;
5139 }
5140 /* else fall through */
5141 }
5142
5143 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
5144 /* 31 29 23 21 20 11 9 4
5145 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
5146 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
5147 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
5148 where
5149 Rt is Wt when x==1, Xt when x==0
5150 */
5151 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5152 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5153 /* Further checks on bits 31:30 and 22 */
5154 Bool valid = False;
5155 switch ((INSN(31,30) << 1) | INSN(22,22)) {
5156 case BITS3(1,0,0): // LDURSW Xt
5157 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
5158 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
5159 valid = True;
5160 break;
5161 }
5162 if (valid) {
5163 UInt szLg2 = INSN(31,30);
5164 UInt imm9 = INSN(20,12);
5165 UInt nn = INSN(9,5);
5166 UInt tt = INSN(4,0);
5167 IRTemp tRN = newTemp(Ity_I64);
5168 IRTemp tEA = newTemp(Ity_I64);
5169 ULong simm9 = sx_to_64(imm9, 9);
5170 Bool is64 = INSN(22,22) == 0;
5171 assign(tRN, getIReg64orSP(nn));
5172 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5173 HChar ch = '?';
5174 /* There are 5 cases:
5175 byte load, SX to 64
5176 byte load, SX to 32, ZX to 64
5177 halfword load, SX to 64
5178 halfword load, SX to 32, ZX to 64
5179 word load, SX to 64
5180 The ifs below handle them in the listed order.
5181 */
5182 if (szLg2 == 0) {
5183 ch = 'b';
5184 if (is64) {
5185 putIReg64orZR(tt, unop(Iop_8Sto64,
5186 loadLE(Ity_I8, mkexpr(tEA))));
5187 } else {
5188 putIReg32orZR(tt, unop(Iop_8Sto32,
5189 loadLE(Ity_I8, mkexpr(tEA))));
5190 }
5191 }
5192 else if (szLg2 == 1) {
5193 ch = 'h';
5194 if (is64) {
5195 putIReg64orZR(tt, unop(Iop_16Sto64,
5196 loadLE(Ity_I16, mkexpr(tEA))));
5197 } else {
5198 putIReg32orZR(tt, unop(Iop_16Sto32,
5199 loadLE(Ity_I16, mkexpr(tEA))));
5200 }
5201 }
5202 else if (szLg2 == 2 && is64) {
5203 ch = 'w';
5204 putIReg64orZR(tt, unop(Iop_32Sto64,
5205 loadLE(Ity_I32, mkexpr(tEA))));
5206 }
5207 else {
5208 vassert(0);
5209 }
5210 DIP("ldurs%c %s, [%s, #%lld]",
5211 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
5212 return True;
5213 }
5214 /* else fall through */
5215 }
5216
5217 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
5218 /* L==1 => mm==LD
5219 L==0 => mm==ST
5220 sz==00 => 32 bit (S) transfers
5221 sz==01 => 64 bit (D) transfers
5222 sz==10 => 128 bit (Q) transfers
5223 sz==11 isn't allowed
5224 simm7 is scaled by the (single-register) transfer size
5225
sewardj208a7762014-10-22 13:52:51 +00005226 31 29 26 22 21 14 9 4
sewardjbbcf1882014-01-12 12:49:10 +00005227
sewardj208a7762014-10-22 13:52:51 +00005228 sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn|SP, #imm]
5229 (at-EA, with nontemporal hint)
5230
5231 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
5232 (at-Rn-then-Rn=EA)
sewardjbbcf1882014-01-12 12:49:10 +00005233
5234 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
sewardj208a7762014-10-22 13:52:51 +00005235 (at-EA)
sewardjbbcf1882014-01-12 12:49:10 +00005236
sewardj208a7762014-10-22 13:52:51 +00005237 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
5238 (at-EA-then-Rn=EA)
5239 */
5240 if (INSN(29,25) == BITS5(1,0,1,1,0)) {
sewardjbbcf1882014-01-12 12:49:10 +00005241 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
5242 Bool isLD = INSN(22,22) == 1;
5243 Bool wBack = INSN(23,23) == 1;
5244 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
5245 UInt tt2 = INSN(14,10);
5246 UInt nn = INSN(9,5);
5247 UInt tt1 = INSN(4,0);
5248 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
5249 /* undecodable; fall through */
5250 } else {
5251 if (nn == 31) { /* FIXME generate stack alignment check */ }
5252
5253 // Compute the transfer address TA and the writeback address WA.
5254 UInt szB = 4 << szSlg2; /* szB is the per-register size */
5255 IRTemp tRN = newTemp(Ity_I64);
5256 assign(tRN, getIReg64orSP(nn));
5257 IRTemp tEA = newTemp(Ity_I64);
5258 simm7 = szB * simm7;
5259 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
5260
5261 IRTemp tTA = newTemp(Ity_I64);
5262 IRTemp tWA = newTemp(Ity_I64);
5263 switch (INSN(24,23)) {
5264 case BITS2(0,1):
5265 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
5266 case BITS2(1,1):
5267 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
5268 case BITS2(1,0):
sewardj208a7762014-10-22 13:52:51 +00005269 case BITS2(0,0):
sewardjbbcf1882014-01-12 12:49:10 +00005270 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
5271 default:
5272 vassert(0); /* NOTREACHED */
5273 }
5274
5275 IRType ty = Ity_INVALID;
5276 switch (szB) {
5277 case 4: ty = Ity_F32; break;
5278 case 8: ty = Ity_F64; break;
5279 case 16: ty = Ity_V128; break;
5280 default: vassert(0);
5281 }
5282
sewardje0bff8b2014-03-09 09:40:23 +00005283 /* Normally rN would be updated after the transfer. However, in
sewardj19551432014-05-07 09:20:11 +00005284 the special cases typifed by
sewardje0bff8b2014-03-09 09:40:23 +00005285 stp q0, q1, [sp,#-512]!
sewardj19551432014-05-07 09:20:11 +00005286 stp d0, d1, [sp,#-512]!
5287 stp s0, s1, [sp,#-512]!
sewardje0bff8b2014-03-09 09:40:23 +00005288 it is necessary to update SP before the transfer, (1)
5289 because Memcheck will otherwise complain about a write
5290 below the stack pointer, and (2) because the segfault
5291 stack extension mechanism will otherwise extend the stack
5292 only down to SP before the instruction, which might not be
5293 far enough, if the -512 bit takes the actual access
5294 address to the next page.
5295 */
5296 Bool earlyWBack
sewardj19551432014-05-07 09:20:11 +00005297 = wBack && simm7 < 0
sewardje0bff8b2014-03-09 09:40:23 +00005298 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
5299
5300 if (wBack && earlyWBack)
5301 putIReg64orSP(nn, mkexpr(tEA));
5302
sewardjbbcf1882014-01-12 12:49:10 +00005303 if (isLD) {
sewardj5ba41302014-03-03 08:42:16 +00005304 if (szB < 16) {
5305 putQReg128(tt1, mkV128(0x0000));
5306 }
sewardj606c4ba2014-01-26 19:11:14 +00005307 putQRegLO(tt1,
5308 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
sewardj5ba41302014-03-03 08:42:16 +00005309 if (szB < 16) {
5310 putQReg128(tt2, mkV128(0x0000));
5311 }
sewardj606c4ba2014-01-26 19:11:14 +00005312 putQRegLO(tt2,
5313 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
sewardjbbcf1882014-01-12 12:49:10 +00005314 } else {
5315 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
sewardj606c4ba2014-01-26 19:11:14 +00005316 getQRegLO(tt1, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005317 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
sewardj606c4ba2014-01-26 19:11:14 +00005318 getQRegLO(tt2, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005319 }
5320
sewardje0bff8b2014-03-09 09:40:23 +00005321 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00005322 putIReg64orSP(nn, mkexpr(tEA));
5323
5324 const HChar* fmt_str = NULL;
5325 switch (INSN(24,23)) {
5326 case BITS2(0,1):
5327 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
5328 break;
5329 case BITS2(1,1):
5330 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
5331 break;
5332 case BITS2(1,0):
5333 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
5334 break;
sewardj208a7762014-10-22 13:52:51 +00005335 case BITS2(0,0):
5336 fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
5337 break;
sewardjbbcf1882014-01-12 12:49:10 +00005338 default:
5339 vassert(0);
5340 }
5341 DIP(fmt_str, isLD ? "ld" : "st",
sewardj606c4ba2014-01-26 19:11:14 +00005342 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
sewardjbbcf1882014-01-12 12:49:10 +00005343 nameIReg64orSP(nn), simm7);
5344 return True;
5345 }
5346 }
5347
5348 /* -------------- {LD,ST}R (vector register) --------------- */
5349 /* 31 29 23 20 15 12 11 9 4
5350 | | | | | | | | |
5351 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
5352 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
5353 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
5354 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
5355 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
5356
5357 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
5358 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
5359 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
5360 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
5361 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
5362 */
5363 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5364 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5365 HChar dis_buf[64];
5366 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5367 Bool isLD = INSN(22,22) == 1;
5368 UInt tt = INSN(4,0);
sewardj208a7762014-10-22 13:52:51 +00005369 if (szLg2 > 4) goto after_LDR_STR_vector_register;
sewardjbbcf1882014-01-12 12:49:10 +00005370 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
5371 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
5372 switch (szLg2) {
5373 case 0: /* 8 bit */
5374 if (isLD) {
5375 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005376 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
5377 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005378 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005379 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
5380 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005381 }
5382 break;
5383 case 1:
5384 if (isLD) {
5385 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005386 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
5387 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005388 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005389 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
5390 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005391 }
5392 break;
5393 case 2: /* 32 bit */
5394 if (isLD) {
5395 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005396 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
5397 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005398 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005399 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
5400 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005401 }
5402 break;
5403 case 3: /* 64 bit */
5404 if (isLD) {
5405 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005406 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
5407 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005408 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005409 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
5410 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00005411 }
5412 break;
sewardj208a7762014-10-22 13:52:51 +00005413 case 4:
5414 if (isLD) {
5415 putQReg128(tt, loadLE(Ity_V128, mkexpr(ea)));
5416 DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf);
5417 } else {
5418 storeLE(mkexpr(ea), getQReg128(tt));
5419 DIP("str %s, %s\n", nameQReg128(tt), dis_buf);
5420 }
5421 break;
5422 default:
5423 vassert(0);
sewardjbbcf1882014-01-12 12:49:10 +00005424 }
5425 return True;
5426 }
5427 after_LDR_STR_vector_register:
5428
5429 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
5430 /* 31 29 22 20 15 12 11 9 4
5431 | | | | | | | | |
5432 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
5433
5434 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
5435 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
5436
5437 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
5438 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
5439 */
5440 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
5441 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5442 HChar dis_buf[64];
5443 UInt szLg2 = INSN(31,30);
5444 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
5445 UInt tt = INSN(4,0);
5446 if (szLg2 == 3) goto after_LDRS_integer_register;
5447 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
5448 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
5449 /* Enumerate the 5 variants explicitly. */
5450 if (szLg2 == 2/*32 bit*/ && sxTo64) {
5451 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
5452 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
5453 return True;
5454 }
5455 else
5456 if (szLg2 == 1/*16 bit*/) {
5457 if (sxTo64) {
5458 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
5459 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
5460 } else {
5461 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
5462 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
5463 }
5464 return True;
5465 }
5466 else
5467 if (szLg2 == 0/*8 bit*/) {
5468 if (sxTo64) {
5469 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
5470 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
5471 } else {
5472 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
5473 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
5474 }
5475 return True;
5476 }
5477 /* else it's an invalid combination */
5478 }
5479 after_LDRS_integer_register:
5480
5481 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
5482 /* This is the Unsigned offset variant only. The Post-Index and
5483 Pre-Index variants are below.
5484
5485 31 29 23 21 9 4
5486 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
5487 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
5488 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
5489 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
5490 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
5491
5492 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
5493 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
5494 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
5495 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
5496 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
5497 */
5498 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
5499 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
5500 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5501 Bool isLD = INSN(22,22) == 1;
5502 UInt pimm12 = INSN(21,10) << szLg2;
5503 UInt nn = INSN(9,5);
5504 UInt tt = INSN(4,0);
5505 IRTemp tEA = newTemp(Ity_I64);
5506 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5507 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
5508 if (isLD) {
5509 if (szLg2 < 4) {
5510 putQReg128(tt, mkV128(0x0000));
5511 }
sewardj606c4ba2014-01-26 19:11:14 +00005512 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00005513 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005514 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005515 }
5516 DIP("%s %s, [%s, #%u]\n",
5517 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00005518 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
sewardjbbcf1882014-01-12 12:49:10 +00005519 return True;
5520 }
5521
5522 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
5523 /* These are the Post-Index and Pre-Index variants.
5524
5525 31 29 23 20 11 9 4
5526 (at-Rn-then-Rn=EA)
5527 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
5528 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
5529 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
5530 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
5531 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
5532
5533 (at-EA-then-Rn=EA)
5534 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
5535 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
5536 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
5537 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
5538 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
5539
5540 Stores are the same except with bit 22 set to 0.
5541 */
5542 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5543 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5544 && INSN(21,21) == 0 && INSN(10,10) == 1) {
5545 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5546 Bool isLD = INSN(22,22) == 1;
5547 UInt imm9 = INSN(20,12);
5548 Bool atRN = INSN(11,11) == 0;
5549 UInt nn = INSN(9,5);
5550 UInt tt = INSN(4,0);
5551 IRTemp tRN = newTemp(Ity_I64);
5552 IRTemp tEA = newTemp(Ity_I64);
5553 IRTemp tTA = IRTemp_INVALID;
5554 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5555 ULong simm9 = sx_to_64(imm9, 9);
5556 assign(tRN, getIReg64orSP(nn));
5557 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
5558 tTA = atRN ? tRN : tEA;
5559 if (isLD) {
5560 if (szLg2 < 4) {
5561 putQReg128(tt, mkV128(0x0000));
5562 }
sewardj606c4ba2014-01-26 19:11:14 +00005563 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
sewardjbbcf1882014-01-12 12:49:10 +00005564 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005565 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005566 }
5567 putIReg64orSP(nn, mkexpr(tEA));
5568 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
5569 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00005570 nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
sewardjbbcf1882014-01-12 12:49:10 +00005571 return True;
5572 }
5573
5574 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
5575 /* 31 29 23 20 11 9 4
5576 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
5577 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
5578 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
5579 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
5580 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
5581
5582 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
5583 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
5584 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
5585 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
5586 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
5587 */
5588 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
5589 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
5590 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
5591 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
5592 Bool isLD = INSN(22,22) == 1;
5593 UInt imm9 = INSN(20,12);
5594 UInt nn = INSN(9,5);
5595 UInt tt = INSN(4,0);
5596 ULong simm9 = sx_to_64(imm9, 9);
5597 IRTemp tEA = newTemp(Ity_I64);
5598 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
5599 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
5600 if (isLD) {
sewardj606c4ba2014-01-26 19:11:14 +00005601 if (szLg2 < 4) {
5602 putQReg128(tt, mkV128(0x0000));
5603 }
5604 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00005605 } else {
sewardj606c4ba2014-01-26 19:11:14 +00005606 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005607 }
5608 DIP("%s %s, [%s, #%lld]\n",
5609 isLD ? "ldur" : "stur",
sewardj606c4ba2014-01-26 19:11:14 +00005610 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
sewardjbbcf1882014-01-12 12:49:10 +00005611 return True;
5612 }
5613
5614 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
5615 /* 31 29 23 4
5616 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
5617 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
5618 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
5619 */
5620 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
5621 UInt szB = 4 << INSN(31,30);
5622 UInt imm19 = INSN(23,5);
5623 UInt tt = INSN(4,0);
5624 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
5625 IRType ty = preferredVectorSubTypeFromSize(szB);
sewardj606c4ba2014-01-26 19:11:14 +00005626 putQReg128(tt, mkV128(0x0000));
5627 putQRegLO(tt, loadLE(ty, mkU64(ea)));
5628 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
sewardjbbcf1882014-01-12 12:49:10 +00005629 return True;
5630 }
5631
sewardj208a7762014-10-22 13:52:51 +00005632 /* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */
5633 /* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
5634 /* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
5635 /* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
5636 /* 31 29 26 22 21 20 15 11 9 4
sewardjbbcf1882014-01-12 12:49:10 +00005637
sewardjbed9f682014-10-27 09:29:48 +00005638 0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP]
5639 0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn|SP], step
sewardj606c4ba2014-01-26 19:11:14 +00005640
sewardjbed9f682014-10-27 09:29:48 +00005641 0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP]
5642 0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn|SP], step
sewardj606c4ba2014-01-26 19:11:14 +00005643
sewardjbed9f682014-10-27 09:29:48 +00005644 0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP]
5645 0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn|SP], step
sewardj208a7762014-10-22 13:52:51 +00005646
sewardjbed9f682014-10-27 09:29:48 +00005647 0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn|SP]
5648 0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn|SP], step
sewardj208a7762014-10-22 13:52:51 +00005649
5650 T = defined by Q and sz in the normal way
5651 step = if m == 11111 then transfer-size else Xm
5652 xx = case L of 1 -> LD ; 0 -> ST
sewardj606c4ba2014-01-26 19:11:14 +00005653 */
sewardj208a7762014-10-22 13:52:51 +00005654 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5655 && INSN(21,21) == 0) {
5656 Bool bitQ = INSN(30,30);
5657 Bool isPX = INSN(23,23) == 1;
5658 Bool isLD = INSN(22,22) == 1;
5659 UInt mm = INSN(20,16);
5660 UInt opc = INSN(15,12);
5661 UInt sz = INSN(11,10);
5662 UInt nn = INSN(9,5);
5663 UInt tt = INSN(4,0);
5664 Bool isQ = bitQ == 1;
5665 Bool is1d = sz == BITS2(1,1) && !isQ;
5666 UInt nRegs = 0;
5667 switch (opc) {
5668 case BITS4(0,0,0,0): nRegs = 4; break;
5669 case BITS4(0,1,0,0): nRegs = 3; break;
5670 case BITS4(1,0,0,0): nRegs = 2; break;
5671 case BITS4(0,1,1,1): nRegs = 1; break;
5672 default: break;
sewardj950ca7a2014-04-03 23:03:32 +00005673 }
sewardjbed9f682014-10-27 09:29:48 +00005674
5675 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5676 If we see it, set nRegs to 0 so as to cause the next conditional
5677 to fail. */
5678 if (!isPX && mm != 0)
5679 nRegs = 0;
sewardj208a7762014-10-22 13:52:51 +00005680
5681 if (nRegs == 1 /* .1d is allowed */
5682 || (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) {
5683
5684 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5685
5686 /* Generate the transfer address (TA) and if necessary the
5687 writeback address (WB) */
5688 IRTemp tTA = newTemp(Ity_I64);
5689 assign(tTA, getIReg64orSP(nn));
5690 if (nn == 31) { /* FIXME generate stack alignment check */ }
5691 IRTemp tWB = IRTemp_INVALID;
5692 if (isPX) {
5693 tWB = newTemp(Ity_I64);
5694 assign(tWB, binop(Iop_Add64,
5695 mkexpr(tTA),
5696 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5697 : getIReg64orZR(mm)));
5698 }
5699
5700 /* -- BEGIN generate the transfers -- */
5701
5702 IRTemp u0, u1, u2, u3, i0, i1, i2, i3;
5703 u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID;
5704 switch (nRegs) {
5705 case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */
5706 case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */
5707 case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */
5708 case 1: u0 = newTempV128(); i0 = newTempV128(); break;
5709 default: vassert(0);
5710 }
5711
5712 /* -- Multiple 128 or 64 bit stores -- */
5713 if (!isLD) {
5714 switch (nRegs) {
5715 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5716 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5717 case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */
5718 case 1: assign(u0, getQReg128((tt+0) % 32)); break;
5719 default: vassert(0);
5720 }
5721 switch (nRegs) {
5722 case 4: (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64)
5723 (&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3);
5724 break;
5725 case 3: (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64)
5726 (&i0, &i1, &i2, sz, u0, u1, u2);
5727 break;
5728 case 2: (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64)
5729 (&i0, &i1, sz, u0, u1);
5730 break;
5731 case 1: (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64)
5732 (&i0, sz, u0);
5733 break;
5734 default: vassert(0);
5735 }
5736# define MAYBE_NARROW_TO_64(_expr) \
5737 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5738 UInt step = isQ ? 16 : 8;
5739 switch (nRegs) {
5740 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5741 MAYBE_NARROW_TO_64(mkexpr(i3)) );
5742 /* fallthru */
5743 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5744 MAYBE_NARROW_TO_64(mkexpr(i2)) );
5745 /* fallthru */
5746 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5747 MAYBE_NARROW_TO_64(mkexpr(i1)) );
5748 /* fallthru */
5749 case 1: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5750 MAYBE_NARROW_TO_64(mkexpr(i0)) );
5751 break;
5752 default: vassert(0);
5753 }
5754# undef MAYBE_NARROW_TO_64
5755 }
5756
5757 /* -- Multiple 128 or 64 bit loads -- */
5758 else /* isLD */ {
5759 UInt step = isQ ? 16 : 8;
5760 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5761# define MAYBE_WIDEN_FROM_64(_expr) \
5762 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5763 switch (nRegs) {
5764 case 4:
5765 assign(i3, MAYBE_WIDEN_FROM_64(
5766 loadLE(loadTy,
5767 binop(Iop_Add64, mkexpr(tTA),
5768 mkU64(3 * step)))));
5769 /* fallthru */
5770 case 3:
5771 assign(i2, MAYBE_WIDEN_FROM_64(
5772 loadLE(loadTy,
5773 binop(Iop_Add64, mkexpr(tTA),
5774 mkU64(2 * step)))));
5775 /* fallthru */
5776 case 2:
5777 assign(i1, MAYBE_WIDEN_FROM_64(
5778 loadLE(loadTy,
5779 binop(Iop_Add64, mkexpr(tTA),
5780 mkU64(1 * step)))));
5781 /* fallthru */
sewardj208a7762014-10-22 13:52:51 +00005782 case 1:
5783 assign(i0, MAYBE_WIDEN_FROM_64(
5784 loadLE(loadTy,
5785 binop(Iop_Add64, mkexpr(tTA),
5786 mkU64(0 * step)))));
5787 break;
5788 default:
5789 vassert(0);
5790 }
5791# undef MAYBE_WIDEN_FROM_64
5792 switch (nRegs) {
5793 case 4: (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64)
5794 (&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3);
5795 break;
5796 case 3: (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64)
5797 (&u0, &u1, &u2, sz, i0, i1, i2);
5798 break;
5799 case 2: (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64)
5800 (&u0, &u1, sz, i0, i1);
5801 break;
5802 case 1: (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64)
5803 (&u0, sz, i0);
5804 break;
5805 default: vassert(0);
5806 }
5807 switch (nRegs) {
5808 case 4: putQReg128( (tt+3) % 32,
5809 math_MAYBE_ZERO_HI64(bitQ, u3));
5810 /* fallthru */
5811 case 3: putQReg128( (tt+2) % 32,
5812 math_MAYBE_ZERO_HI64(bitQ, u2));
5813 /* fallthru */
5814 case 2: putQReg128( (tt+1) % 32,
5815 math_MAYBE_ZERO_HI64(bitQ, u1));
5816 /* fallthru */
5817 case 1: putQReg128( (tt+0) % 32,
5818 math_MAYBE_ZERO_HI64(bitQ, u0));
5819 break;
5820 default: vassert(0);
5821 }
5822 }
5823
5824 /* -- END generate the transfers -- */
5825
5826 /* Do the writeback, if necessary */
5827 if (isPX) {
5828 putIReg64orSP(nn, mkexpr(tWB));
5829 }
5830
5831 HChar pxStr[20];
5832 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
5833 if (isPX) {
5834 if (mm == BITS5(1,1,1,1,1))
5835 vex_sprintf(pxStr, ", #%u", xferSzB);
5836 else
5837 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
5838 }
5839 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
5840 DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
5841 isLD ? "ld" : "st", nRegs,
5842 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
5843 pxStr);
5844
5845 return True;
5846 }
5847 /* else fall through */
sewardj950ca7a2014-04-03 23:03:32 +00005848 }
5849
sewardjbed9f682014-10-27 09:29:48 +00005850 /* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */
5851 /* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */
5852 /* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */
5853 /* 31 29 26 22 21 20 15 11 9 4
5854
5855 0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP]
5856 0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn|SP], step
5857
5858 0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP]
5859 0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn|SP], step
5860
5861 0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP]
5862 0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn|SP], step
5863
5864 T = defined by Q and sz in the normal way
5865 step = if m == 11111 then transfer-size else Xm
5866 xx = case L of 1 -> LD ; 0 -> ST
5867 */
5868 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
5869 && INSN(21,21) == 0) {
5870 Bool bitQ = INSN(30,30);
5871 Bool isPX = INSN(23,23) == 1;
5872 Bool isLD = INSN(22,22) == 1;
5873 UInt mm = INSN(20,16);
5874 UInt opc = INSN(15,12);
5875 UInt sz = INSN(11,10);
5876 UInt nn = INSN(9,5);
5877 UInt tt = INSN(4,0);
5878 Bool isQ = bitQ == 1;
5879 UInt nRegs = 0;
5880 switch (opc) {
5881 case BITS4(0,0,1,0): nRegs = 4; break;
5882 case BITS4(0,1,1,0): nRegs = 3; break;
5883 case BITS4(1,0,1,0): nRegs = 2; break;
5884 default: break;
5885 }
5886
5887 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
5888 If we see it, set nRegs to 0 so as to cause the next conditional
5889 to fail. */
5890 if (!isPX && mm != 0)
5891 nRegs = 0;
5892
5893 if (nRegs >= 2 && nRegs <= 4) {
5894
5895 UInt xferSzB = (isQ ? 16 : 8) * nRegs;
5896
5897 /* Generate the transfer address (TA) and if necessary the
5898 writeback address (WB) */
5899 IRTemp tTA = newTemp(Ity_I64);
5900 assign(tTA, getIReg64orSP(nn));
5901 if (nn == 31) { /* FIXME generate stack alignment check */ }
5902 IRTemp tWB = IRTemp_INVALID;
5903 if (isPX) {
5904 tWB = newTemp(Ity_I64);
5905 assign(tWB, binop(Iop_Add64,
5906 mkexpr(tTA),
5907 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
5908 : getIReg64orZR(mm)));
5909 }
5910
5911 /* -- BEGIN generate the transfers -- */
5912
5913 IRTemp u0, u1, u2, u3;
5914 u0 = u1 = u2 = u3 = IRTemp_INVALID;
5915 switch (nRegs) {
5916 case 4: u3 = newTempV128(); /* fallthru */
5917 case 3: u2 = newTempV128(); /* fallthru */
5918 case 2: u1 = newTempV128();
5919 u0 = newTempV128(); break;
5920 default: vassert(0);
5921 }
5922
5923 /* -- Multiple 128 or 64 bit stores -- */
5924 if (!isLD) {
5925 switch (nRegs) {
5926 case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
5927 case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
5928 case 2: assign(u1, getQReg128((tt+1) % 32));
5929 assign(u0, getQReg128((tt+0) % 32)); break;
5930 default: vassert(0);
5931 }
5932# define MAYBE_NARROW_TO_64(_expr) \
5933 (isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
5934 UInt step = isQ ? 16 : 8;
5935 switch (nRegs) {
5936 case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
5937 MAYBE_NARROW_TO_64(mkexpr(u3)) );
5938 /* fallthru */
5939 case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
5940 MAYBE_NARROW_TO_64(mkexpr(u2)) );
5941 /* fallthru */
5942 case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
5943 MAYBE_NARROW_TO_64(mkexpr(u1)) );
5944 storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
5945 MAYBE_NARROW_TO_64(mkexpr(u0)) );
5946 break;
5947 default: vassert(0);
5948 }
5949# undef MAYBE_NARROW_TO_64
5950 }
5951
5952 /* -- Multiple 128 or 64 bit loads -- */
5953 else /* isLD */ {
5954 UInt step = isQ ? 16 : 8;
5955 IRType loadTy = isQ ? Ity_V128 : Ity_I64;
5956# define MAYBE_WIDEN_FROM_64(_expr) \
5957 (isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
5958 switch (nRegs) {
5959 case 4:
5960 assign(u3, MAYBE_WIDEN_FROM_64(
5961 loadLE(loadTy,
5962 binop(Iop_Add64, mkexpr(tTA),
5963 mkU64(3 * step)))));
5964 /* fallthru */
5965 case 3:
5966 assign(u2, MAYBE_WIDEN_FROM_64(
5967 loadLE(loadTy,
5968 binop(Iop_Add64, mkexpr(tTA),
5969 mkU64(2 * step)))));
5970 /* fallthru */
5971 case 2:
5972 assign(u1, MAYBE_WIDEN_FROM_64(
5973 loadLE(loadTy,
5974 binop(Iop_Add64, mkexpr(tTA),
5975 mkU64(1 * step)))));
5976 assign(u0, MAYBE_WIDEN_FROM_64(
5977 loadLE(loadTy,
5978 binop(Iop_Add64, mkexpr(tTA),
5979 mkU64(0 * step)))));
5980 break;
5981 default:
5982 vassert(0);
5983 }
5984# undef MAYBE_WIDEN_FROM_64
5985 switch (nRegs) {
5986 case 4: putQReg128( (tt+3) % 32,
5987 math_MAYBE_ZERO_HI64(bitQ, u3));
5988 /* fallthru */
5989 case 3: putQReg128( (tt+2) % 32,
5990 math_MAYBE_ZERO_HI64(bitQ, u2));
5991 /* fallthru */
5992 case 2: putQReg128( (tt+1) % 32,
5993 math_MAYBE_ZERO_HI64(bitQ, u1));
5994 putQReg128( (tt+0) % 32,
5995 math_MAYBE_ZERO_HI64(bitQ, u0));
5996 break;
5997 default: vassert(0);
5998 }
5999 }
6000
6001 /* -- END generate the transfers -- */
6002
6003 /* Do the writeback, if necessary */
6004 if (isPX) {
6005 putIReg64orSP(nn, mkexpr(tWB));
6006 }
6007
6008 HChar pxStr[20];
6009 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6010 if (isPX) {
6011 if (mm == BITS5(1,1,1,1,1))
6012 vex_sprintf(pxStr, ", #%u", xferSzB);
6013 else
6014 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6015 }
6016 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6017 DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
6018 isLD ? "ld" : "st",
6019 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6020 pxStr);
6021
6022 return True;
6023 }
6024 /* else fall through */
6025 }
6026
sewardj18bf5172014-06-14 18:05:30 +00006027 /* ---------- LD1R (single structure, replicate) ---------- */
sewardjbed9f682014-10-27 09:29:48 +00006028 /* ---------- LD2R (single structure, replicate) ---------- */
6029 /* ---------- LD3R (single structure, replicate) ---------- */
6030 /* ---------- LD4R (single structure, replicate) ---------- */
sewardj18bf5172014-06-14 18:05:30 +00006031 /* 31 29 22 20 15 11 9 4
sewardjbed9f682014-10-27 09:29:48 +00006032 0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn|SP]
6033 0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn|SP], step
6034
6035 0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP]
6036 0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn|SP], step
6037
6038 0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP]
6039 0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn|SP], step
6040
6041 0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP]
6042 0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn|SP], step
6043
6044 step = if m == 11111 then transfer-size else Xm
sewardj18bf5172014-06-14 18:05:30 +00006045 */
6046 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
sewardjbed9f682014-10-27 09:29:48 +00006047 && INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
6048 && INSN(12,12) == 0) {
6049 UInt bitQ = INSN(30,30);
6050 Bool isPX = INSN(23,23) == 1;
6051 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6052 UInt mm = INSN(20,16);
6053 UInt sz = INSN(11,10);
6054 UInt nn = INSN(9,5);
6055 UInt tt = INSN(4,0);
6056
6057 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6058 if (isPX || mm == 0) {
6059
6060 IRType ty = integerIRTypeOfSize(1 << sz);
6061
6062 UInt laneSzB = 1 << sz;
6063 UInt xferSzB = laneSzB * nRegs;
6064
6065 /* Generate the transfer address (TA) and if necessary the
6066 writeback address (WB) */
6067 IRTemp tTA = newTemp(Ity_I64);
6068 assign(tTA, getIReg64orSP(nn));
6069 if (nn == 31) { /* FIXME generate stack alignment check */ }
6070 IRTemp tWB = IRTemp_INVALID;
6071 if (isPX) {
6072 tWB = newTemp(Ity_I64);
6073 assign(tWB, binop(Iop_Add64,
6074 mkexpr(tTA),
6075 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6076 : getIReg64orZR(mm)));
sewardj18bf5172014-06-14 18:05:30 +00006077 }
sewardjbed9f682014-10-27 09:29:48 +00006078
6079 /* Do the writeback, if necessary */
6080 if (isPX) {
6081 putIReg64orSP(nn, mkexpr(tWB));
6082 }
6083
6084 IRTemp e0, e1, e2, e3, v0, v1, v2, v3;
6085 e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID;
6086 switch (nRegs) {
6087 case 4:
6088 e3 = newTemp(ty);
6089 assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6090 mkU64(3 * laneSzB))));
6091 v3 = math_DUP_TO_V128(e3, ty);
6092 putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3));
6093 /* fallthrough */
6094 case 3:
6095 e2 = newTemp(ty);
6096 assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6097 mkU64(2 * laneSzB))));
6098 v2 = math_DUP_TO_V128(e2, ty);
6099 putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2));
6100 /* fallthrough */
6101 case 2:
6102 e1 = newTemp(ty);
6103 assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6104 mkU64(1 * laneSzB))));
6105 v1 = math_DUP_TO_V128(e1, ty);
6106 putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1));
6107 /* fallthrough */
6108 case 1:
6109 e0 = newTemp(ty);
6110 assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
6111 mkU64(0 * laneSzB))));
6112 v0 = math_DUP_TO_V128(e0, ty);
6113 putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0));
6114 break;
6115 default:
6116 vassert(0);
6117 }
6118
6119 HChar pxStr[20];
6120 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6121 if (isPX) {
6122 if (mm == BITS5(1,1,1,1,1))
6123 vex_sprintf(pxStr, ", #%u", xferSzB);
6124 else
6125 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6126 }
6127 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6128 DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
6129 nRegs,
6130 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
6131 pxStr);
6132
sewardj18bf5172014-06-14 18:05:30 +00006133 return True;
6134 }
sewardjbed9f682014-10-27 09:29:48 +00006135 /* else fall through */
sewardj18bf5172014-06-14 18:05:30 +00006136 }
6137
sewardjbed9f682014-10-27 09:29:48 +00006138 /* ------ LD1/ST1 (single structure, to/from one lane) ------ */
6139 /* ------ LD2/ST2 (single structure, to/from one lane) ------ */
6140 /* ------ LD3/ST3 (single structure, to/from one lane) ------ */
6141 /* ------ LD4/ST4 (single structure, to/from one lane) ------ */
6142 /* 31 29 22 21 20 15 11 9 4
6143 0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP]
6144 0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn|SP], step
sewardj8a5ed542014-07-15 11:08:42 +00006145
sewardjbed9f682014-10-27 09:29:48 +00006146 0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP]
6147 0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn|SP], step
sewardj606c4ba2014-01-26 19:11:14 +00006148
sewardjbed9f682014-10-27 09:29:48 +00006149 0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP]
6150 0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn|SP], step
6151
6152 0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP]
6153 0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn|SP], step
6154
6155 step = if m == 11111 then transfer-size else Xm
6156 op = case L of 1 -> LD ; 0 -> ST
6157
6158 laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
6159 01:b:b:b0 -> 2, bbb
6160 10:b:b:00 -> 4, bb
6161 10:b:0:01 -> 8, b
sewardj39f754d2014-06-24 10:26:52 +00006162 */
sewardjbed9f682014-10-27 09:29:48 +00006163 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
6164 UInt bitQ = INSN(30,30);
6165 Bool isPX = INSN(23,23) == 1;
6166 Bool isLD = INSN(22,22) == 1;
6167 UInt nRegs = ((INSN(13,13) << 1) | INSN(21,21)) + 1;
6168 UInt mm = INSN(20,16);
6169 UInt xx = INSN(15,14);
6170 UInt bitS = INSN(12,12);
6171 UInt sz = INSN(11,10);
6172 UInt nn = INSN(9,5);
6173 UInt tt = INSN(4,0);
6174
6175 Bool valid = True;
6176
6177 /* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
6178 if (!isPX && mm != 0)
6179 valid = False;
6180
6181 UInt laneSzB = 0; /* invalid */
6182 UInt ix = 16; /* invalid */
6183
6184 UInt xx_q_S_sz = (xx << 4) | (bitQ << 3) | (bitS << 2) | sz;
6185 switch (xx_q_S_sz) {
6186 case 0x00: case 0x01: case 0x02: case 0x03:
6187 case 0x04: case 0x05: case 0x06: case 0x07:
6188 case 0x08: case 0x09: case 0x0A: case 0x0B:
6189 case 0x0C: case 0x0D: case 0x0E: case 0x0F:
6190 laneSzB = 1; ix = xx_q_S_sz & 0xF;
6191 break;
6192 case 0x10: case 0x12: case 0x14: case 0x16:
6193 case 0x18: case 0x1A: case 0x1C: case 0x1E:
6194 laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7;
6195 break;
6196 case 0x20: case 0x24: case 0x28: case 0x2C:
6197 laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3;
6198 break;
6199 case 0x21: case 0x29:
6200 laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1;
6201 break;
6202 default:
6203 break;
sewardj39f754d2014-06-24 10:26:52 +00006204 }
sewardjbed9f682014-10-27 09:29:48 +00006205
6206 if (valid && laneSzB != 0) {
6207
6208 IRType ty = integerIRTypeOfSize(laneSzB);
6209 UInt xferSzB = laneSzB * nRegs;
6210
6211 /* Generate the transfer address (TA) and if necessary the
6212 writeback address (WB) */
6213 IRTemp tTA = newTemp(Ity_I64);
6214 assign(tTA, getIReg64orSP(nn));
6215 if (nn == 31) { /* FIXME generate stack alignment check */ }
6216 IRTemp tWB = IRTemp_INVALID;
6217 if (isPX) {
6218 tWB = newTemp(Ity_I64);
6219 assign(tWB, binop(Iop_Add64,
6220 mkexpr(tTA),
6221 mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
6222 : getIReg64orZR(mm)));
6223 }
6224
6225 /* Do the writeback, if necessary */
6226 if (isPX) {
6227 putIReg64orSP(nn, mkexpr(tWB));
6228 }
6229
6230 switch (nRegs) {
6231 case 4: {
6232 IRExpr* addr
6233 = binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB));
6234 if (isLD) {
6235 putQRegLane((tt+3) % 32, ix, loadLE(ty, addr));
6236 } else {
6237 storeLE(addr, getQRegLane((tt+3) % 32, ix, ty));
6238 }
6239 /* fallthrough */
6240 }
6241 case 3: {
6242 IRExpr* addr
6243 = binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB));
6244 if (isLD) {
6245 putQRegLane((tt+2) % 32, ix, loadLE(ty, addr));
6246 } else {
6247 storeLE(addr, getQRegLane((tt+2) % 32, ix, ty));
6248 }
6249 /* fallthrough */
6250 }
6251 case 2: {
6252 IRExpr* addr
6253 = binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB));
6254 if (isLD) {
6255 putQRegLane((tt+1) % 32, ix, loadLE(ty, addr));
6256 } else {
6257 storeLE(addr, getQRegLane((tt+1) % 32, ix, ty));
6258 }
6259 /* fallthrough */
6260 }
6261 case 1: {
6262 IRExpr* addr
6263 = binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB));
6264 if (isLD) {
6265 putQRegLane((tt+0) % 32, ix, loadLE(ty, addr));
6266 } else {
6267 storeLE(addr, getQRegLane((tt+0) % 32, ix, ty));
6268 }
6269 break;
6270 }
6271 default:
6272 vassert(0);
6273 }
6274
6275 HChar pxStr[20];
6276 pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
6277 if (isPX) {
6278 if (mm == BITS5(1,1,1,1,1))
6279 vex_sprintf(pxStr, ", #%u", xferSzB);
6280 else
6281 vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
6282 }
6283 const HChar* arr = nameArr_Q_SZ(bitQ, sz);
6284 DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
6285 isLD ? "ld" : "st", nRegs,
6286 (tt+0) % 32, arr, (tt+nRegs-1) % 32, arr,
6287 ix, nameIReg64orSP(nn), pxStr);
6288
6289 return True;
6290 }
6291 /* else fall through */
sewardj39f754d2014-06-24 10:26:52 +00006292 }
6293
sewardj7d009132014-02-20 17:43:38 +00006294 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
6295 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
6296 /* 31 29 23 20 14 9 4
6297 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
6298 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
6299 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
6300 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
sewardjbbcf1882014-01-12 12:49:10 +00006301 */
sewardj7d009132014-02-20 17:43:38 +00006302 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
6303 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
6304 && INSN(14,10) == BITS5(1,1,1,1,1)) {
sewardjdc9259c2014-02-27 11:10:19 +00006305 UInt szBlg2 = INSN(31,30);
6306 Bool isLD = INSN(22,22) == 1;
6307 Bool isAcqOrRel = INSN(15,15) == 1;
6308 UInt ss = INSN(20,16);
6309 UInt nn = INSN(9,5);
6310 UInt tt = INSN(4,0);
sewardjbbcf1882014-01-12 12:49:10 +00006311
sewardjdc9259c2014-02-27 11:10:19 +00006312 vassert(szBlg2 < 4);
6313 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6314 IRType ty = integerIRTypeOfSize(szB);
6315 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
sewardj7d009132014-02-20 17:43:38 +00006316
sewardjdc9259c2014-02-27 11:10:19 +00006317 IRTemp ea = newTemp(Ity_I64);
6318 assign(ea, getIReg64orSP(nn));
6319 /* FIXME generate check that ea is szB-aligned */
sewardj7d009132014-02-20 17:43:38 +00006320
sewardjdc9259c2014-02-27 11:10:19 +00006321 if (isLD && ss == BITS5(1,1,1,1,1)) {
6322 IRTemp res = newTemp(ty);
6323 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
6324 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6325 if (isAcqOrRel) {
6326 stmt(IRStmt_MBE(Imbe_Fence));
6327 }
6328 DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6329 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6330 return True;
6331 }
6332 if (!isLD) {
6333 if (isAcqOrRel) {
6334 stmt(IRStmt_MBE(Imbe_Fence));
6335 }
6336 IRTemp res = newTemp(Ity_I1);
6337 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6338 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
6339 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
6340 Need to set rS to 1 on failure, 0 on success. */
6341 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
6342 mkU64(1)));
6343 DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
6344 nameIRegOrZR(False, ss),
6345 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6346 return True;
6347 }
6348 /* else fall through */
6349 }
6350
6351 /* ------------------ LDA{R,RH,RB} ------------------ */
6352 /* ------------------ STL{R,RH,RB} ------------------ */
6353 /* 31 29 23 20 14 9 4
6354 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
6355 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
6356 */
6357 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
6358 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
6359 UInt szBlg2 = INSN(31,30);
6360 Bool isLD = INSN(22,22) == 1;
6361 UInt nn = INSN(9,5);
6362 UInt tt = INSN(4,0);
6363
6364 vassert(szBlg2 < 4);
6365 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
6366 IRType ty = integerIRTypeOfSize(szB);
6367 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
6368
6369 IRTemp ea = newTemp(Ity_I64);
6370 assign(ea, getIReg64orSP(nn));
6371 /* FIXME generate check that ea is szB-aligned */
6372
6373 if (isLD) {
6374 IRTemp res = newTemp(ty);
6375 assign(res, loadLE(ty, mkexpr(ea)));
6376 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
6377 stmt(IRStmt_MBE(Imbe_Fence));
6378 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
6379 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6380 } else {
6381 stmt(IRStmt_MBE(Imbe_Fence));
6382 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
6383 storeLE(mkexpr(ea), data);
6384 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
6385 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
6386 }
6387 return True;
sewardjbbcf1882014-01-12 12:49:10 +00006388 }
6389
sewardj5b924c82014-10-30 23:56:10 +00006390 /* ------------------ PRFM (immediate) ------------------ */
6391 /* 31 21 9 4
6392 11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn|SP, #pimm]
6393 */
6394 if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
6395 UInt imm12 = INSN(21,10);
6396 UInt nn = INSN(9,5);
6397 UInt tt = INSN(4,0);
6398 /* Generating any IR here is pointless, except for documentation
6399 purposes, as it will get optimised away later. */
6400 IRTemp ea = newTemp(Ity_I64);
6401 assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8)));
6402 DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8);
6403 return True;
6404 }
6405
sewardjbbcf1882014-01-12 12:49:10 +00006406 vex_printf("ARM64 front end: load_store\n");
6407 return False;
6408# undef INSN
6409}
6410
6411
6412/*------------------------------------------------------------*/
6413/*--- Control flow and misc instructions ---*/
6414/*------------------------------------------------------------*/
6415
6416static
sewardj65902992014-05-03 21:20:56 +00006417Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
6418 VexArchInfo* archinfo)
sewardjbbcf1882014-01-12 12:49:10 +00006419{
6420# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
6421
6422 /* ---------------------- B cond ----------------------- */
6423 /* 31 24 4 3
6424 0101010 0 imm19 0 cond */
6425 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
6426 UInt cond = INSN(3,0);
6427 ULong uimm64 = INSN(23,5) << 2;
6428 Long simm64 = (Long)sx_to_64(uimm64, 21);
6429 vassert(dres->whatNext == Dis_Continue);
6430 vassert(dres->len == 4);
6431 vassert(dres->continueAt == 0);
6432 vassert(dres->jk_StopHere == Ijk_INVALID);
6433 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
6434 Ijk_Boring,
6435 IRConst_U64(guest_PC_curr_instr + simm64),
6436 OFFB_PC) );
6437 putPC(mkU64(guest_PC_curr_instr + 4));
6438 dres->whatNext = Dis_StopHere;
6439 dres->jk_StopHere = Ijk_Boring;
6440 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
6441 return True;
6442 }
6443
6444 /* -------------------- B{L} uncond -------------------- */
6445 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
6446 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
6447 100101 imm26 B (PC + sxTo64(imm26 << 2))
6448 */
6449 UInt bLink = INSN(31,31);
6450 ULong uimm64 = INSN(25,0) << 2;
6451 Long simm64 = (Long)sx_to_64(uimm64, 28);
6452 if (bLink) {
6453 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
6454 }
6455 putPC(mkU64(guest_PC_curr_instr + simm64));
6456 dres->whatNext = Dis_StopHere;
6457 dres->jk_StopHere = Ijk_Call;
6458 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
6459 guest_PC_curr_instr + simm64);
6460 return True;
6461 }
6462
6463 /* --------------------- B{L} reg --------------------- */
6464 /* 31 24 22 20 15 9 4
6465 1101011 00 10 11111 000000 nn 00000 RET Rn
6466 1101011 00 01 11111 000000 nn 00000 CALL Rn
6467 1101011 00 00 11111 000000 nn 00000 JMP Rn
6468 */
6469 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
6470 && INSN(20,16) == BITS5(1,1,1,1,1)
6471 && INSN(15,10) == BITS6(0,0,0,0,0,0)
6472 && INSN(4,0) == BITS5(0,0,0,0,0)) {
6473 UInt branch_type = INSN(22,21);
6474 UInt nn = INSN(9,5);
6475 if (branch_type == BITS2(1,0) /* RET */) {
6476 putPC(getIReg64orZR(nn));
6477 dres->whatNext = Dis_StopHere;
6478 dres->jk_StopHere = Ijk_Ret;
6479 DIP("ret %s\n", nameIReg64orZR(nn));
6480 return True;
6481 }
6482 if (branch_type == BITS2(0,1) /* CALL */) {
sewardj702054e2014-05-07 11:09:28 +00006483 IRTemp dst = newTemp(Ity_I64);
6484 assign(dst, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00006485 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
sewardj702054e2014-05-07 11:09:28 +00006486 putPC(mkexpr(dst));
sewardjbbcf1882014-01-12 12:49:10 +00006487 dres->whatNext = Dis_StopHere;
6488 dres->jk_StopHere = Ijk_Call;
6489 DIP("blr %s\n", nameIReg64orZR(nn));
6490 return True;
6491 }
6492 if (branch_type == BITS2(0,0) /* JMP */) {
6493 putPC(getIReg64orZR(nn));
6494 dres->whatNext = Dis_StopHere;
6495 dres->jk_StopHere = Ijk_Boring;
6496 DIP("jmp %s\n", nameIReg64orZR(nn));
6497 return True;
6498 }
6499 }
6500
6501 /* -------------------- CB{N}Z -------------------- */
6502 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6503 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
6504 */
6505 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
6506 Bool is64 = INSN(31,31) == 1;
6507 Bool bIfZ = INSN(24,24) == 0;
6508 ULong uimm64 = INSN(23,5) << 2;
6509 UInt rT = INSN(4,0);
6510 Long simm64 = (Long)sx_to_64(uimm64, 21);
6511 IRExpr* cond = NULL;
6512 if (is64) {
6513 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6514 getIReg64orZR(rT), mkU64(0));
6515 } else {
6516 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
6517 getIReg32orZR(rT), mkU32(0));
6518 }
6519 stmt( IRStmt_Exit(cond,
6520 Ijk_Boring,
6521 IRConst_U64(guest_PC_curr_instr + simm64),
6522 OFFB_PC) );
6523 putPC(mkU64(guest_PC_curr_instr + 4));
6524 dres->whatNext = Dis_StopHere;
6525 dres->jk_StopHere = Ijk_Boring;
6526 DIP("cb%sz %s, 0x%llx\n",
6527 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
6528 guest_PC_curr_instr + simm64);
6529 return True;
6530 }
6531
6532 /* -------------------- TB{N}Z -------------------- */
6533 /* 31 30 24 23 18 5 4
6534 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6535 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
6536 */
6537 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
6538 UInt b5 = INSN(31,31);
6539 Bool bIfZ = INSN(24,24) == 0;
6540 UInt b40 = INSN(23,19);
6541 UInt imm14 = INSN(18,5);
6542 UInt tt = INSN(4,0);
6543 UInt bitNo = (b5 << 5) | b40;
6544 ULong uimm64 = imm14 << 2;
6545 Long simm64 = sx_to_64(uimm64, 16);
6546 IRExpr* cond
6547 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
6548 binop(Iop_And64,
6549 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
6550 mkU64(1)),
6551 mkU64(0));
6552 stmt( IRStmt_Exit(cond,
6553 Ijk_Boring,
6554 IRConst_U64(guest_PC_curr_instr + simm64),
6555 OFFB_PC) );
6556 putPC(mkU64(guest_PC_curr_instr + 4));
6557 dres->whatNext = Dis_StopHere;
6558 dres->jk_StopHere = Ijk_Boring;
6559 DIP("tb%sz %s, #%u, 0x%llx\n",
6560 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
6561 guest_PC_curr_instr + simm64);
6562 return True;
6563 }
6564
6565 /* -------------------- SVC -------------------- */
6566 /* 11010100 000 imm16 000 01
6567 Don't bother with anything except the imm16==0 case.
6568 */
6569 if (INSN(31,0) == 0xD4000001) {
6570 putPC(mkU64(guest_PC_curr_instr + 4));
6571 dres->whatNext = Dis_StopHere;
6572 dres->jk_StopHere = Ijk_Sys_syscall;
6573 DIP("svc #0\n");
6574 return True;
6575 }
6576
6577 /* ------------------ M{SR,RS} ------------------ */
sewardj6eb5ef82014-07-14 20:39:23 +00006578 /* ---- Cases for TPIDR_EL0 ----
sewardjbbcf1882014-01-12 12:49:10 +00006579 0xD51BD0 010 Rt MSR tpidr_el0, rT
6580 0xD53BD0 010 Rt MRS rT, tpidr_el0
6581 */
6582 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
6583 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
6584 Bool toSys = INSN(21,21) == 0;
6585 UInt tt = INSN(4,0);
6586 if (toSys) {
6587 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
6588 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
6589 } else {
6590 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
6591 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
6592 }
6593 return True;
6594 }
sewardj6eb5ef82014-07-14 20:39:23 +00006595 /* ---- Cases for FPCR ----
sewardjbbcf1882014-01-12 12:49:10 +00006596 0xD51B44 000 Rt MSR fpcr, rT
6597 0xD53B44 000 Rt MSR rT, fpcr
6598 */
6599 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
6600 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
6601 Bool toSys = INSN(21,21) == 0;
6602 UInt tt = INSN(4,0);
6603 if (toSys) {
6604 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
6605 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
6606 } else {
6607 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
6608 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
6609 }
6610 return True;
6611 }
sewardj6eb5ef82014-07-14 20:39:23 +00006612 /* ---- Cases for FPSR ----
sewardj7d009132014-02-20 17:43:38 +00006613 0xD51B44 001 Rt MSR fpsr, rT
6614 0xD53B44 001 Rt MSR rT, fpsr
sewardja0645d52014-06-28 22:11:16 +00006615 The only part of this we model is FPSR.QC. All other bits
6616 are ignored when writing to it and RAZ when reading from it.
sewardjbbcf1882014-01-12 12:49:10 +00006617 */
6618 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
6619 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
6620 Bool toSys = INSN(21,21) == 0;
6621 UInt tt = INSN(4,0);
6622 if (toSys) {
sewardja0645d52014-06-28 22:11:16 +00006623 /* Just deal with FPSR.QC. Make up a V128 value which is
6624 zero if Xt[27] is zero and any other value if Xt[27] is
6625 nonzero. */
6626 IRTemp qc64 = newTemp(Ity_I64);
6627 assign(qc64, binop(Iop_And64,
6628 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
6629 mkU64(1)));
6630 IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
6631 stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
sewardjbbcf1882014-01-12 12:49:10 +00006632 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
6633 } else {
sewardja0645d52014-06-28 22:11:16 +00006634 /* Generate a value which is all zeroes except for bit 27,
6635 which must be zero if QCFLAG is all zeroes and one otherwise. */
sewardj8e91fd42014-07-11 12:05:47 +00006636 IRTemp qcV128 = newTempV128();
sewardja0645d52014-06-28 22:11:16 +00006637 assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
6638 IRTemp qc64 = newTemp(Ity_I64);
6639 assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
6640 unop(Iop_V128to64, mkexpr(qcV128))));
6641 IRExpr* res = binop(Iop_Shl64,
6642 unop(Iop_1Uto64,
6643 binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
6644 mkU8(27));
6645 putIReg64orZR(tt, res);
sewardjbbcf1882014-01-12 12:49:10 +00006646 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
6647 }
6648 return True;
6649 }
sewardj6eb5ef82014-07-14 20:39:23 +00006650 /* ---- Cases for NZCV ----
sewardjbbcf1882014-01-12 12:49:10 +00006651 D51B42 000 Rt MSR nzcv, rT
6652 D53B42 000 Rt MRS rT, nzcv
sewardja0645d52014-06-28 22:11:16 +00006653 The only parts of NZCV that actually exist are bits 31:28, which
6654 are the N Z C and V bits themselves. Hence the flags thunk provides
6655 all the state we need.
sewardjbbcf1882014-01-12 12:49:10 +00006656 */
6657 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
6658 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
6659 Bool toSys = INSN(21,21) == 0;
6660 UInt tt = INSN(4,0);
6661 if (toSys) {
6662 IRTemp t = newTemp(Ity_I64);
6663 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
6664 setFlags_COPY(t);
6665 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
6666 } else {
6667 IRTemp res = newTemp(Ity_I64);
6668 assign(res, mk_arm64g_calculate_flags_nzcv());
6669 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
6670 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
6671 }
6672 return True;
6673 }
sewardj6eb5ef82014-07-14 20:39:23 +00006674 /* ---- Cases for DCZID_EL0 ----
sewardjd512d102014-02-21 14:49:44 +00006675 Don't support arbitrary reads and writes to this register. Just
6676 return the value 16, which indicates that the DC ZVA instruction
6677 is not permitted, so we don't have to emulate it.
6678 D5 3B 00 111 Rt MRS rT, dczid_el0
6679 */
6680 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
6681 UInt tt = INSN(4,0);
6682 putIReg64orZR(tt, mkU64(1<<4));
6683 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
6684 return True;
6685 }
sewardj6eb5ef82014-07-14 20:39:23 +00006686 /* ---- Cases for CTR_EL0 ----
sewardj65902992014-05-03 21:20:56 +00006687 We just handle reads, and make up a value from the D and I line
6688 sizes in the VexArchInfo we are given, and patch in the following
6689 fields that the Foundation model gives ("natively"):
6690 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
6691 D5 3B 00 001 Rt MRS rT, dczid_el0
6692 */
6693 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
6694 UInt tt = INSN(4,0);
6695 /* Need to generate a value from dMinLine_lg2_szB and
6696 dMinLine_lg2_szB. The value in the register is in 32-bit
6697 units, so need to subtract 2 from the values in the
6698 VexArchInfo. We can assume that the values here are valid --
6699 disInstr_ARM64 checks them -- so there's no need to deal with
6700 out-of-range cases. */
6701 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
6702 && archinfo->arm64_dMinLine_lg2_szB <= 17
6703 && archinfo->arm64_iMinLine_lg2_szB >= 2
6704 && archinfo->arm64_iMinLine_lg2_szB <= 17);
6705 UInt val
6706 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
6707 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
6708 putIReg64orZR(tt, mkU64(val));
6709 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
6710 return True;
6711 }
sewardj6eb5ef82014-07-14 20:39:23 +00006712 /* ---- Cases for CNTVCT_EL0 ----
6713 This is a timestamp counter of some sort. Support reads of it only
6714 by passing through to the host.
6715 D5 3B E0 010 Rt MRS Xt, cntvct_el0
6716 */
6717 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
6718 UInt tt = INSN(4,0);
6719 IRTemp val = newTemp(Ity_I64);
6720 IRExpr** args = mkIRExprVec_0();
6721 IRDirty* d = unsafeIRDirty_1_N (
6722 val,
6723 0/*regparms*/,
6724 "arm64g_dirtyhelper_MRS_CNTVCT_EL0",
6725 &arm64g_dirtyhelper_MRS_CNTVCT_EL0,
6726 args
6727 );
6728 /* execute the dirty call, dumping the result in val. */
6729 stmt( IRStmt_Dirty(d) );
6730 putIReg64orZR(tt, mkexpr(val));
6731 DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
6732 return True;
6733 }
sewardjbbcf1882014-01-12 12:49:10 +00006734
sewardj65902992014-05-03 21:20:56 +00006735 /* ------------------ IC_IVAU ------------------ */
6736 /* D5 0B 75 001 Rt ic ivau, rT
6737 */
6738 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
6739 /* We will always be provided with a valid iMinLine value. */
6740 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
6741 && archinfo->arm64_iMinLine_lg2_szB <= 17);
6742 /* Round the requested address, in rT, down to the start of the
6743 containing block. */
6744 UInt tt = INSN(4,0);
6745 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
6746 IRTemp addr = newTemp(Ity_I64);
6747 assign( addr, binop( Iop_And64,
6748 getIReg64orZR(tt),
6749 mkU64(~(lineszB - 1))) );
6750 /* Set the invalidation range, request exit-and-invalidate, with
6751 continuation at the next instruction. */
sewardj05f5e012014-05-04 10:52:11 +00006752 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
6753 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj65902992014-05-03 21:20:56 +00006754 /* be paranoid ... */
6755 stmt( IRStmt_MBE(Imbe_Fence) );
6756 putPC(mkU64( guest_PC_curr_instr + 4 ));
6757 dres->whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +00006758 dres->jk_StopHere = Ijk_InvalICache;
sewardj65902992014-05-03 21:20:56 +00006759 DIP("ic ivau, %s\n", nameIReg64orZR(tt));
6760 return True;
6761 }
6762
6763 /* ------------------ DC_CVAU ------------------ */
6764 /* D5 0B 7B 001 Rt dc cvau, rT
6765 */
6766 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
6767 /* Exactly the same scheme as for IC IVAU, except we observe the
sewardj05f5e012014-05-04 10:52:11 +00006768 dMinLine size, and request an Ijk_FlushDCache instead of
6769 Ijk_InvalICache. */
sewardj65902992014-05-03 21:20:56 +00006770 /* We will always be provided with a valid dMinLine value. */
6771 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
6772 && archinfo->arm64_dMinLine_lg2_szB <= 17);
6773 /* Round the requested address, in rT, down to the start of the
6774 containing block. */
6775 UInt tt = INSN(4,0);
6776 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
6777 IRTemp addr = newTemp(Ity_I64);
6778 assign( addr, binop( Iop_And64,
6779 getIReg64orZR(tt),
6780 mkU64(~(lineszB - 1))) );
6781 /* Set the flush range, request exit-and-flush, with
6782 continuation at the next instruction. */
sewardj05f5e012014-05-04 10:52:11 +00006783 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
6784 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj65902992014-05-03 21:20:56 +00006785 /* be paranoid ... */
6786 stmt( IRStmt_MBE(Imbe_Fence) );
6787 putPC(mkU64( guest_PC_curr_instr + 4 ));
6788 dres->whatNext = Dis_StopHere;
6789 dres->jk_StopHere = Ijk_FlushDCache;
6790 DIP("dc cvau, %s\n", nameIReg64orZR(tt));
6791 return True;
6792 }
6793
6794 /* ------------------ ISB, DMB, DSB ------------------ */
sewardj25842552014-10-31 10:25:19 +00006795 /* 31 21 11 7 6 4
6796 11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt
6797 11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt
6798 11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt
6799 */
6800 if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
6801 && INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
6802 && INSN(7,7) == 1
6803 && INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
6804 UInt opc = INSN(6,5);
6805 UInt CRm = INSN(11,8);
6806 vassert(opc <= 2 && CRm <= 15);
sewardjd512d102014-02-21 14:49:44 +00006807 stmt(IRStmt_MBE(Imbe_Fence));
sewardj25842552014-10-31 10:25:19 +00006808 const HChar* opNames[3]
6809 = { "dsb", "dmb", "isb" };
6810 const HChar* howNames[16]
6811 = { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
6812 "#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
6813 DIP("%s %s\n", opNames[opc], howNames[CRm]);
sewardj65902992014-05-03 21:20:56 +00006814 return True;
6815 }
sewardjbbcf1882014-01-12 12:49:10 +00006816
sewardjdc9259c2014-02-27 11:10:19 +00006817 /* -------------------- NOP -------------------- */
6818 if (INSN(31,0) == 0xD503201F) {
6819 DIP("nop\n");
6820 return True;
6821 }
6822
sewardj39b51682014-11-25 12:17:53 +00006823 /* -------------------- BRK -------------------- */
6824 /* 31 23 20 4
6825 1101 0100 001 imm16 00000 BRK #imm16
6826 */
6827 if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
6828 && INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
6829 UInt imm16 = INSN(20,5);
6830 /* Request SIGTRAP and then restart of this insn. */
6831 putPC(mkU64(guest_PC_curr_instr + 0));
6832 dres->whatNext = Dis_StopHere;
6833 dres->jk_StopHere = Ijk_SigTRAP;
6834 DIP("brk #%u\n", imm16);
6835 return True;
6836 }
6837
sewardjbbcf1882014-01-12 12:49:10 +00006838 //fail:
6839 vex_printf("ARM64 front end: branch_etc\n");
6840 return False;
6841# undef INSN
6842}
6843
6844
6845/*------------------------------------------------------------*/
sewardj8e91fd42014-07-11 12:05:47 +00006846/*--- SIMD and FP instructions: helper functions ---*/
sewardjbbcf1882014-01-12 12:49:10 +00006847/*------------------------------------------------------------*/
6848
sewardjd96daf62014-06-15 08:17:35 +00006849/* Some constructors for interleave/deinterleave expressions. */
sewardje520bb32014-02-17 11:00:53 +00006850
sewardjd96daf62014-06-15 08:17:35 +00006851static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
6852 // returns a0 b0
6853 return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
6854}
sewardje520bb32014-02-17 11:00:53 +00006855
sewardjd96daf62014-06-15 08:17:35 +00006856static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
6857 // returns a1 b1
6858 return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
6859}
sewardje520bb32014-02-17 11:00:53 +00006860
sewardjd96daf62014-06-15 08:17:35 +00006861static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
6862 // returns a2 a0 b2 b0
6863 return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
6864}
6865
6866static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
6867 // returns a3 a1 b3 b1
6868 return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
6869}
6870
6871static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
6872 // returns a1 b1 a0 b0
6873 return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
6874}
6875
6876static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
6877 // returns a3 b3 a2 b2
6878 return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
6879}
6880
6881static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6882 // returns a6 a4 a2 a0 b6 b4 b2 b0
6883 return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
6884}
6885
6886static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6887 // returns a7 a5 a3 a1 b7 b5 b3 b1
6888 return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
6889}
6890
6891static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6892 // returns a3 b3 a2 b2 a1 b1 a0 b0
6893 return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
6894}
6895
6896static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
6897 // returns a7 b7 a6 b6 a5 b5 a4 b4
6898 return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
6899}
6900
6901static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
6902 IRTemp bFEDCBA9876543210 ) {
6903 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
6904 return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
6905 mkexpr(bFEDCBA9876543210));
6906}
6907
6908static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
6909 IRTemp bFEDCBA9876543210 ) {
6910 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
6911 return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
6912 mkexpr(bFEDCBA9876543210));
6913}
6914
6915static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
6916 IRTemp bFEDCBA9876543210 ) {
6917 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
6918 return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
6919 mkexpr(bFEDCBA9876543210));
6920}
6921
6922static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
6923 IRTemp bFEDCBA9876543210 ) {
6924 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
6925 return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
6926 mkexpr(bFEDCBA9876543210));
6927}
sewardjecde6972014-02-05 11:01:19 +00006928
sewardjbbcf1882014-01-12 12:49:10 +00006929/* Generate N copies of |bit| in the bottom of a ULong. */
6930static ULong Replicate ( ULong bit, Int N )
6931{
sewardj606c4ba2014-01-26 19:11:14 +00006932 vassert(bit <= 1 && N >= 1 && N < 64);
6933 if (bit == 0) {
6934 return 0;
6935 } else {
6936 /* Careful. This won't work for N == 64. */
6937 return (1ULL << N) - 1;
6938 }
sewardjbbcf1882014-01-12 12:49:10 +00006939}
6940
sewardjfab09142014-02-10 10:28:13 +00006941static ULong Replicate32x2 ( ULong bits32 )
6942{
6943 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
6944 return (bits32 << 32) | bits32;
6945}
6946
6947static ULong Replicate16x4 ( ULong bits16 )
6948{
6949 vassert(0 == (bits16 & ~0xFFFFULL));
6950 return Replicate32x2((bits16 << 16) | bits16);
6951}
6952
6953static ULong Replicate8x8 ( ULong bits8 )
6954{
6955 vassert(0 == (bits8 & ~0xFFULL));
6956 return Replicate16x4((bits8 << 8) | bits8);
6957}
6958
6959/* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
6960 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
6961 is 64. In the former case, the upper 32 bits of the returned value
6962 are guaranteed to be zero. */
sewardjbbcf1882014-01-12 12:49:10 +00006963static ULong VFPExpandImm ( ULong imm8, Int N )
6964{
sewardj606c4ba2014-01-26 19:11:14 +00006965 vassert(imm8 <= 0xFF);
6966 vassert(N == 32 || N == 64);
6967 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
6968 Int F = N - E - 1;
6969 ULong imm8_6 = (imm8 >> 6) & 1;
6970 /* sign: 1 bit */
6971 /* exp: E bits */
6972 /* frac: F bits */
6973 ULong sign = (imm8 >> 7) & 1;
6974 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
6975 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
6976 vassert(sign < (1ULL << 1));
6977 vassert(exp < (1ULL << E));
6978 vassert(frac < (1ULL << F));
6979 vassert(1 + E + F == N);
6980 ULong res = (sign << (E+F)) | (exp << F) | frac;
6981 return res;
sewardjbbcf1882014-01-12 12:49:10 +00006982}
6983
sewardjfab09142014-02-10 10:28:13 +00006984/* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
6985 This might fail, as indicated by the returned Bool. Page 2530 of
6986 the manual. */
6987static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
6988 UInt op, UInt cmode, UInt imm8 )
6989{
6990 vassert(op <= 1);
6991 vassert(cmode <= 15);
6992 vassert(imm8 <= 255);
6993
6994 *res = 0; /* will overwrite iff returning True */
6995
6996 ULong imm64 = 0;
6997 Bool testimm8 = False;
6998
6999 switch (cmode >> 1) {
7000 case 0:
7001 testimm8 = False; imm64 = Replicate32x2(imm8); break;
7002 case 1:
7003 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
7004 case 2:
7005 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
7006 case 3:
7007 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
7008 case 4:
7009 testimm8 = False; imm64 = Replicate16x4(imm8); break;
7010 case 5:
7011 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
7012 case 6:
7013 testimm8 = True;
7014 if ((cmode & 1) == 0)
7015 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
7016 else
7017 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
7018 break;
7019 case 7:
7020 testimm8 = False;
7021 if ((cmode & 1) == 0 && op == 0)
7022 imm64 = Replicate8x8(imm8);
7023 if ((cmode & 1) == 0 && op == 1) {
7024 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
7025 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
7026 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
7027 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
7028 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
7029 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
7030 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
7031 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
7032 }
7033 if ((cmode & 1) == 1 && op == 0) {
7034 ULong imm8_7 = (imm8 >> 7) & 1;
7035 ULong imm8_6 = (imm8 >> 6) & 1;
7036 ULong imm8_50 = imm8 & 63;
7037 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
7038 | ((imm8_6 ^ 1) << (5 + 6 + 19))
7039 | (Replicate(imm8_6, 5) << (6 + 19))
7040 | (imm8_50 << 19);
7041 imm64 = Replicate32x2(imm32);
7042 }
7043 if ((cmode & 1) == 1 && op == 1) {
7044 // imm64 = imm8<7>:NOT(imm8<6>)
7045 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
7046 ULong imm8_7 = (imm8 >> 7) & 1;
7047 ULong imm8_6 = (imm8 >> 6) & 1;
7048 ULong imm8_50 = imm8 & 63;
7049 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
7050 | (Replicate(imm8_6, 8) << 54)
7051 | (imm8_50 << 48);
7052 }
7053 break;
7054 default:
7055 vassert(0);
7056 }
7057
7058 if (testimm8 && imm8 == 0)
7059 return False;
7060
7061 *res = imm64;
7062 return True;
7063}
7064
sewardj606c4ba2014-01-26 19:11:14 +00007065/* Help a bit for decoding laneage for vector operations that can be
7066 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
7067 and SZ bits, typically for vector floating point. */
7068static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
7069 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
7070 /*OUT*/const HChar** arrSpec,
7071 Bool bitQ, Bool bitSZ )
7072{
7073 vassert(bitQ == True || bitQ == False);
7074 vassert(bitSZ == True || bitSZ == False);
7075 if (bitQ && bitSZ) { // 2x64
7076 if (tyI) *tyI = Ity_I64;
7077 if (tyF) *tyF = Ity_F64;
7078 if (nLanes) *nLanes = 2;
7079 if (zeroUpper) *zeroUpper = False;
7080 if (arrSpec) *arrSpec = "2d";
7081 return True;
7082 }
7083 if (bitQ && !bitSZ) { // 4x32
7084 if (tyI) *tyI = Ity_I32;
7085 if (tyF) *tyF = Ity_F32;
7086 if (nLanes) *nLanes = 4;
7087 if (zeroUpper) *zeroUpper = False;
7088 if (arrSpec) *arrSpec = "4s";
7089 return True;
7090 }
7091 if (!bitQ && !bitSZ) { // 2x32
7092 if (tyI) *tyI = Ity_I32;
7093 if (tyF) *tyF = Ity_F32;
7094 if (nLanes) *nLanes = 2;
7095 if (zeroUpper) *zeroUpper = True;
7096 if (arrSpec) *arrSpec = "2s";
7097 return True;
7098 }
7099 // Else impliedly 1x64, which isn't allowed.
7100 return False;
7101}
7102
sewardje520bb32014-02-17 11:00:53 +00007103/* Helper for decoding laneage for shift-style vector operations
7104 that involve an immediate shift amount. */
7105static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
7106 UInt immh, UInt immb )
7107{
7108 vassert(immh < (1<<4));
7109 vassert(immb < (1<<3));
7110 UInt immhb = (immh << 3) | immb;
7111 if (immh & 8) {
7112 if (shift) *shift = 128 - immhb;
7113 if (szBlg2) *szBlg2 = 3;
7114 return True;
7115 }
7116 if (immh & 4) {
7117 if (shift) *shift = 64 - immhb;
7118 if (szBlg2) *szBlg2 = 2;
7119 return True;
7120 }
7121 if (immh & 2) {
7122 if (shift) *shift = 32 - immhb;
7123 if (szBlg2) *szBlg2 = 1;
7124 return True;
7125 }
7126 if (immh & 1) {
7127 if (shift) *shift = 16 - immhb;
7128 if (szBlg2) *szBlg2 = 0;
7129 return True;
7130 }
7131 return False;
7132}
7133
sewardjecde6972014-02-05 11:01:19 +00007134/* Generate IR to fold all lanes of the V128 value in 'src' as
7135 characterised by the operator 'op', and return the result in the
7136 bottom bits of a V128, with all other bits set to zero. */
sewardjdf9d6d52014-06-27 10:43:22 +00007137static IRTemp math_FOLDV ( IRTemp src, IROp op )
sewardjecde6972014-02-05 11:01:19 +00007138{
7139 /* The basic idea is to use repeated applications of Iop_CatEven*
7140 and Iop_CatOdd* operators to 'src' so as to clone each lane into
7141 a complete vector. Then fold all those vectors with 'op' and
7142 zero out all but the least significant lane. */
7143 switch (op) {
7144 case Iop_Min8Sx16: case Iop_Min8Ux16:
sewardjb9aff1e2014-06-15 21:55:33 +00007145 case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
sewardjfab09142014-02-10 10:28:13 +00007146 /* NB: temp naming here is misleading -- the naming is for 8
7147 lanes of 16 bit, whereas what is being operated on is 16
7148 lanes of 8 bits. */
7149 IRTemp x76543210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00007150 IRTemp x76547654 = newTempV128();
7151 IRTemp x32103210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007152 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7153 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
sewardj8e91fd42014-07-11 12:05:47 +00007154 IRTemp x76767676 = newTempV128();
7155 IRTemp x54545454 = newTempV128();
7156 IRTemp x32323232 = newTempV128();
7157 IRTemp x10101010 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007158 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7159 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7160 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7161 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
sewardj8e91fd42014-07-11 12:05:47 +00007162 IRTemp x77777777 = newTempV128();
7163 IRTemp x66666666 = newTempV128();
7164 IRTemp x55555555 = newTempV128();
7165 IRTemp x44444444 = newTempV128();
7166 IRTemp x33333333 = newTempV128();
7167 IRTemp x22222222 = newTempV128();
7168 IRTemp x11111111 = newTempV128();
7169 IRTemp x00000000 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007170 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7171 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7172 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7173 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7174 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7175 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7176 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7177 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
7178 /* Naming not misleading after here. */
sewardj8e91fd42014-07-11 12:05:47 +00007179 IRTemp xAllF = newTempV128();
7180 IRTemp xAllE = newTempV128();
7181 IRTemp xAllD = newTempV128();
7182 IRTemp xAllC = newTempV128();
7183 IRTemp xAllB = newTempV128();
7184 IRTemp xAllA = newTempV128();
7185 IRTemp xAll9 = newTempV128();
7186 IRTemp xAll8 = newTempV128();
7187 IRTemp xAll7 = newTempV128();
7188 IRTemp xAll6 = newTempV128();
7189 IRTemp xAll5 = newTempV128();
7190 IRTemp xAll4 = newTempV128();
7191 IRTemp xAll3 = newTempV128();
7192 IRTemp xAll2 = newTempV128();
7193 IRTemp xAll1 = newTempV128();
7194 IRTemp xAll0 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007195 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
7196 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
7197 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
7198 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
7199 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
7200 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
7201 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
7202 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
7203 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
7204 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
7205 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
7206 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
7207 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
7208 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
7209 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
7210 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
sewardj8e91fd42014-07-11 12:05:47 +00007211 IRTemp maxFE = newTempV128();
7212 IRTemp maxDC = newTempV128();
7213 IRTemp maxBA = newTempV128();
7214 IRTemp max98 = newTempV128();
7215 IRTemp max76 = newTempV128();
7216 IRTemp max54 = newTempV128();
7217 IRTemp max32 = newTempV128();
7218 IRTemp max10 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007219 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
7220 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
7221 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
7222 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
7223 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
7224 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
7225 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
7226 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
sewardj8e91fd42014-07-11 12:05:47 +00007227 IRTemp maxFEDC = newTempV128();
7228 IRTemp maxBA98 = newTempV128();
7229 IRTemp max7654 = newTempV128();
7230 IRTemp max3210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007231 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
7232 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
7233 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7234 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00007235 IRTemp maxFEDCBA98 = newTempV128();
7236 IRTemp max76543210 = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007237 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
7238 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
sewardj8e91fd42014-07-11 12:05:47 +00007239 IRTemp maxAllLanes = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007240 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
7241 mkexpr(max76543210)));
sewardj8e91fd42014-07-11 12:05:47 +00007242 IRTemp res = newTempV128();
sewardjfab09142014-02-10 10:28:13 +00007243 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
7244 return res;
sewardjecde6972014-02-05 11:01:19 +00007245 }
7246 case Iop_Min16Sx8: case Iop_Min16Ux8:
sewardjb9aff1e2014-06-15 21:55:33 +00007247 case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
sewardjecde6972014-02-05 11:01:19 +00007248 IRTemp x76543210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00007249 IRTemp x76547654 = newTempV128();
7250 IRTemp x32103210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007251 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
7252 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
sewardj8e91fd42014-07-11 12:05:47 +00007253 IRTemp x76767676 = newTempV128();
7254 IRTemp x54545454 = newTempV128();
7255 IRTemp x32323232 = newTempV128();
7256 IRTemp x10101010 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007257 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
7258 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
7259 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
7260 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
sewardj8e91fd42014-07-11 12:05:47 +00007261 IRTemp x77777777 = newTempV128();
7262 IRTemp x66666666 = newTempV128();
7263 IRTemp x55555555 = newTempV128();
7264 IRTemp x44444444 = newTempV128();
7265 IRTemp x33333333 = newTempV128();
7266 IRTemp x22222222 = newTempV128();
7267 IRTemp x11111111 = newTempV128();
7268 IRTemp x00000000 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007269 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
7270 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
7271 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
7272 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
7273 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
7274 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
7275 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
7276 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
sewardj8e91fd42014-07-11 12:05:47 +00007277 IRTemp max76 = newTempV128();
7278 IRTemp max54 = newTempV128();
7279 IRTemp max32 = newTempV128();
7280 IRTemp max10 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007281 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
7282 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
7283 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
7284 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
sewardj8e91fd42014-07-11 12:05:47 +00007285 IRTemp max7654 = newTempV128();
7286 IRTemp max3210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007287 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
7288 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00007289 IRTemp max76543210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007290 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
sewardj8e91fd42014-07-11 12:05:47 +00007291 IRTemp res = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007292 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
7293 return res;
7294 }
7295 case Iop_Min32Sx4: case Iop_Min32Ux4:
sewardjb9aff1e2014-06-15 21:55:33 +00007296 case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
sewardjecde6972014-02-05 11:01:19 +00007297 IRTemp x3210 = src;
sewardj8e91fd42014-07-11 12:05:47 +00007298 IRTemp x3232 = newTempV128();
7299 IRTemp x1010 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007300 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
7301 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
sewardj8e91fd42014-07-11 12:05:47 +00007302 IRTemp x3333 = newTempV128();
7303 IRTemp x2222 = newTempV128();
7304 IRTemp x1111 = newTempV128();
7305 IRTemp x0000 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007306 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
7307 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
7308 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
7309 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
sewardj8e91fd42014-07-11 12:05:47 +00007310 IRTemp max32 = newTempV128();
7311 IRTemp max10 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007312 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
7313 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
sewardj8e91fd42014-07-11 12:05:47 +00007314 IRTemp max3210 = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007315 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj8e91fd42014-07-11 12:05:47 +00007316 IRTemp res = newTempV128();
sewardjecde6972014-02-05 11:01:19 +00007317 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
7318 return res;
7319 }
sewardja5a6b752014-06-30 07:33:56 +00007320 case Iop_Add64x2: {
7321 IRTemp x10 = src;
sewardj8e91fd42014-07-11 12:05:47 +00007322 IRTemp x00 = newTempV128();
7323 IRTemp x11 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00007324 assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
7325 assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
sewardj8e91fd42014-07-11 12:05:47 +00007326 IRTemp max10 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00007327 assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
sewardj8e91fd42014-07-11 12:05:47 +00007328 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00007329 assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
7330 return res;
7331 }
sewardjecde6972014-02-05 11:01:19 +00007332 default:
7333 vassert(0);
7334 }
7335}
7336
7337
sewardj92d0ae32014-04-03 13:48:54 +00007338/* Generate IR for TBL and TBX. This deals with the 128 bit case
7339 only. */
7340static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
7341 IRTemp oor_values )
7342{
7343 vassert(len >= 0 && len <= 3);
7344
7345 /* Generate some useful constants as concisely as possible. */
7346 IRTemp half15 = newTemp(Ity_I64);
7347 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
7348 IRTemp half16 = newTemp(Ity_I64);
7349 assign(half16, mkU64(0x1010101010101010ULL));
7350
7351 /* A zero vector */
sewardj8e91fd42014-07-11 12:05:47 +00007352 IRTemp allZero = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007353 assign(allZero, mkV128(0x0000));
7354 /* A vector containing 15 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00007355 IRTemp all15 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007356 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
7357 /* A vector containing 16 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00007358 IRTemp all16 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007359 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
7360 /* A vector containing 32 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00007361 IRTemp all32 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007362 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
7363 /* A vector containing 48 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00007364 IRTemp all48 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007365 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
7366 /* A vector containing 64 in each 8-bit lane */
sewardj8e91fd42014-07-11 12:05:47 +00007367 IRTemp all64 = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007368 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
7369
7370 /* Group the 16/32/48/64 vectors so as to be indexable. */
7371 IRTemp allXX[4] = { all16, all32, all48, all64 };
7372
7373 /* Compute the result for each table vector, with zeroes in places
7374 where the index values are out of range, and OR them into the
7375 running vector. */
sewardj8e91fd42014-07-11 12:05:47 +00007376 IRTemp running_result = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007377 assign(running_result, mkV128(0));
7378
7379 UInt tabent;
7380 for (tabent = 0; tabent <= len; tabent++) {
7381 vassert(tabent >= 0 && tabent < 4);
sewardj8e91fd42014-07-11 12:05:47 +00007382 IRTemp bias = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007383 assign(bias,
7384 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
sewardj8e91fd42014-07-11 12:05:47 +00007385 IRTemp biased_indices = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007386 assign(biased_indices,
7387 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
sewardj8e91fd42014-07-11 12:05:47 +00007388 IRTemp valid_mask = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007389 assign(valid_mask,
7390 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
sewardj8e91fd42014-07-11 12:05:47 +00007391 IRTemp safe_biased_indices = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007392 assign(safe_biased_indices,
7393 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
sewardj8e91fd42014-07-11 12:05:47 +00007394 IRTemp results_or_junk = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007395 assign(results_or_junk,
7396 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
7397 mkexpr(safe_biased_indices)));
sewardj8e91fd42014-07-11 12:05:47 +00007398 IRTemp results_or_zero = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007399 assign(results_or_zero,
7400 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
7401 /* And OR that into the running result. */
sewardj8e91fd42014-07-11 12:05:47 +00007402 IRTemp tmp = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007403 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
7404 mkexpr(running_result)));
7405 running_result = tmp;
7406 }
7407
7408 /* So now running_result holds the overall result where the indices
7409 are in range, and zero in out-of-range lanes. Now we need to
7410 compute an overall validity mask and use this to copy in the
7411 lanes in the oor_values for out of range indices. This is
7412 unnecessary for TBL but will get folded out by iropt, so we lean
7413 on that and generate the same code for TBL and TBX here. */
sewardj8e91fd42014-07-11 12:05:47 +00007414 IRTemp overall_valid_mask = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007415 assign(overall_valid_mask,
7416 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
sewardj8e91fd42014-07-11 12:05:47 +00007417 IRTemp result = newTempV128();
sewardj92d0ae32014-04-03 13:48:54 +00007418 assign(result,
7419 binop(Iop_OrV128,
7420 mkexpr(running_result),
7421 binop(Iop_AndV128,
7422 mkexpr(oor_values),
7423 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
7424 return result;
7425}
7426
7427
sewardj31b5a952014-06-26 07:41:14 +00007428/* Let |argL| and |argR| be V128 values, and let |opI64x2toV128| be
7429 an op which takes two I64s and produces a V128. That is, a widening
7430 operator. Generate IR which applies |opI64x2toV128| to either the
7431 lower (if |is2| is False) or upper (if |is2| is True) halves of
7432 |argL| and |argR|, and return the value in a new IRTemp.
7433*/
7434static
7435IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
7436 IRExpr* argL, IRExpr* argR )
7437{
sewardj8e91fd42014-07-11 12:05:47 +00007438 IRTemp res = newTempV128();
sewardj31b5a952014-06-26 07:41:14 +00007439 IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
7440 assign(res, binop(opI64x2toV128, unop(slice, argL),
7441 unop(slice, argR)));
7442 return res;
7443}
7444
7445
sewardjdf9d6d52014-06-27 10:43:22 +00007446/* Generate signed/unsigned absolute difference vector IR. */
7447static
7448IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
7449{
sewardj6f312d02014-06-28 12:21:37 +00007450 vassert(size <= 3);
sewardj8e91fd42014-07-11 12:05:47 +00007451 IRTemp argL = newTempV128();
7452 IRTemp argR = newTempV128();
7453 IRTemp msk = newTempV128();
7454 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +00007455 assign(argL, argLE);
7456 assign(argR, argRE);
sewardj8e91fd42014-07-11 12:05:47 +00007457 assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
sewardjdf9d6d52014-06-27 10:43:22 +00007458 mkexpr(argL), mkexpr(argR)));
7459 assign(res,
7460 binop(Iop_OrV128,
7461 binop(Iop_AndV128,
sewardj8e91fd42014-07-11 12:05:47 +00007462 binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
sewardjdf9d6d52014-06-27 10:43:22 +00007463 mkexpr(msk)),
7464 binop(Iop_AndV128,
sewardj8e91fd42014-07-11 12:05:47 +00007465 binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
sewardjdf9d6d52014-06-27 10:43:22 +00007466 unop(Iop_NotV128, mkexpr(msk)))));
7467 return res;
7468}
7469
7470
sewardj6f312d02014-06-28 12:21:37 +00007471/* Generate IR that takes a V128 and sign- or zero-widens
7472 either the lower or upper set of lanes to twice-as-wide,
7473 resulting in a new V128 value. */
7474static
sewardja5a6b752014-06-30 07:33:56 +00007475IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
7476 UInt sizeNarrow, IRExpr* srcE )
sewardj6f312d02014-06-28 12:21:37 +00007477{
sewardj8e91fd42014-07-11 12:05:47 +00007478 IRTemp src = newTempV128();
7479 IRTemp res = newTempV128();
sewardj6f312d02014-06-28 12:21:37 +00007480 assign(src, srcE);
7481 switch (sizeNarrow) {
7482 case X10:
7483 assign(res,
7484 binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
7485 binop(fromUpperHalf ? Iop_InterleaveHI32x4
7486 : Iop_InterleaveLO32x4,
7487 mkexpr(src),
7488 mkexpr(src)),
7489 mkU8(32)));
7490 break;
7491 case X01:
7492 assign(res,
7493 binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
7494 binop(fromUpperHalf ? Iop_InterleaveHI16x8
7495 : Iop_InterleaveLO16x8,
7496 mkexpr(src),
7497 mkexpr(src)),
7498 mkU8(16)));
7499 break;
7500 case X00:
7501 assign(res,
7502 binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
7503 binop(fromUpperHalf ? Iop_InterleaveHI8x16
7504 : Iop_InterleaveLO8x16,
7505 mkexpr(src),
7506 mkexpr(src)),
7507 mkU8(8)));
7508 break;
7509 default:
7510 vassert(0);
7511 }
7512 return res;
7513}
7514
7515
sewardja5a6b752014-06-30 07:33:56 +00007516/* Generate IR that takes a V128 and sign- or zero-widens
7517 either the even or odd lanes to twice-as-wide,
7518 resulting in a new V128 value. */
7519static
7520IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
7521 UInt sizeNarrow, IRExpr* srcE )
7522{
sewardj8e91fd42014-07-11 12:05:47 +00007523 IRTemp src = newTempV128();
7524 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00007525 IROp opSAR = mkVecSARN(sizeNarrow+1);
7526 IROp opSHR = mkVecSHRN(sizeNarrow+1);
7527 IROp opSHL = mkVecSHLN(sizeNarrow+1);
7528 IROp opSxR = zWiden ? opSHR : opSAR;
7529 UInt amt = 0;
7530 switch (sizeNarrow) {
7531 case X10: amt = 32; break;
7532 case X01: amt = 16; break;
7533 case X00: amt = 8; break;
7534 default: vassert(0);
7535 }
7536 assign(src, srcE);
7537 if (fromOdd) {
7538 assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
7539 } else {
7540 assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
7541 mkU8(amt)));
7542 }
7543 return res;
7544}
7545
7546
7547/* Generate IR that takes two V128s and narrows (takes lower half)
7548 of each lane, producing a single V128 value. */
7549static
7550IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
7551{
sewardj8e91fd42014-07-11 12:05:47 +00007552 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00007553 assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
7554 mkexpr(argHi), mkexpr(argLo)));
7555 return res;
7556}
7557
7558
sewardj487559e2014-07-10 14:22:45 +00007559/* Return a temp which holds the vector dup of the lane of width
7560 (1 << size) obtained from src[laneNo]. */
7561static
7562IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
7563{
7564 vassert(size <= 3);
7565 /* Normalise |laneNo| so it is of the form
7566 x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
7567 This puts the bits we want to inspect at constant offsets
7568 regardless of the value of |size|.
7569 */
7570 UInt ix = laneNo << size;
7571 vassert(ix <= 15);
7572 IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
7573 switch (size) {
7574 case 0: /* B */
7575 ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
7576 /* fallthrough */
7577 case 1: /* H */
7578 ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
7579 /* fallthrough */
7580 case 2: /* S */
7581 ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
7582 /* fallthrough */
7583 case 3: /* D */
7584 ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
7585 break;
7586 default:
7587 vassert(0);
7588 }
sewardj8e91fd42014-07-11 12:05:47 +00007589 IRTemp res = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00007590 assign(res, src);
7591 Int i;
7592 for (i = 3; i >= 0; i--) {
7593 if (ops[i] == Iop_INVALID)
7594 break;
sewardj8e91fd42014-07-11 12:05:47 +00007595 IRTemp tmp = newTempV128();
sewardj487559e2014-07-10 14:22:45 +00007596 assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
7597 res = tmp;
7598 }
7599 return res;
7600}
7601
7602
7603/* Let |srcV| be a V128 value, and let |imm5| be a lane-and-size
7604 selector encoded as shown below. Return a new V128 holding the
7605 selected lane from |srcV| dup'd out to V128, and also return the
7606 lane number, log2 of the lane size in bytes, and width-character via
7607 *laneNo, *laneSzLg2 and *laneCh respectively. It may be that imm5
7608 is an invalid selector, in which case return
7609 IRTemp_INVALID, 0, 0 and '?' respectively.
7610
7611 imm5 = xxxx1 signifies .b[xxxx]
7612 = xxx10 .h[xxx]
7613 = xx100 .s[xx]
7614 = x1000 .d[x]
7615 otherwise invalid
7616*/
7617static
7618IRTemp handle_DUP_VEC_ELEM ( /*OUT*/UInt* laneNo,
7619 /*OUT*/UInt* laneSzLg2, /*OUT*/HChar* laneCh,
7620 IRExpr* srcV, UInt imm5 )
7621{
7622 *laneNo = 0;
7623 *laneSzLg2 = 0;
7624 *laneCh = '?';
7625
7626 if (imm5 & 1) {
7627 *laneNo = (imm5 >> 1) & 15;
7628 *laneSzLg2 = 0;
7629 *laneCh = 'b';
7630 }
7631 else if (imm5 & 2) {
7632 *laneNo = (imm5 >> 2) & 7;
7633 *laneSzLg2 = 1;
7634 *laneCh = 'h';
7635 }
7636 else if (imm5 & 4) {
7637 *laneNo = (imm5 >> 3) & 3;
7638 *laneSzLg2 = 2;
7639 *laneCh = 's';
7640 }
7641 else if (imm5 & 8) {
7642 *laneNo = (imm5 >> 4) & 1;
7643 *laneSzLg2 = 3;
7644 *laneCh = 'd';
7645 }
7646 else {
7647 /* invalid */
7648 return IRTemp_INVALID;
7649 }
7650
7651 return math_DUP_VEC_ELEM(srcV, *laneSzLg2, *laneNo);
7652}
7653
7654
7655/* Clone |imm| to every lane of a V128, with lane size log2 of |size|. */
7656static
7657IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
7658{
7659 IRType ty = Ity_INVALID;
7660 IRTemp rcS = IRTemp_INVALID;
7661 switch (size) {
7662 case X01:
7663 vassert(imm <= 0xFFFFULL);
7664 ty = Ity_I16;
7665 rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
7666 break;
7667 case X10:
7668 vassert(imm <= 0xFFFFFFFFULL);
7669 ty = Ity_I32;
7670 rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
7671 break;
7672 case X11:
7673 ty = Ity_I64;
7674 rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
7675 default:
7676 vassert(0);
7677 }
7678 IRTemp rcV = math_DUP_TO_V128(rcS, ty);
7679 return rcV;
7680}
7681
7682
sewardj25523c42014-06-15 19:36:29 +00007683/* Let |new64| be a V128 in which only the lower 64 bits are interesting,
7684 and the upper can contain any value -- it is ignored. If |is2| is False,
7685 generate IR to put |new64| in the lower half of vector reg |dd| and zero
7686 the upper half. If |is2| is True, generate IR to put |new64| in the upper
7687 half of vector reg |dd| and leave the lower half unchanged. This
7688 simulates the behaviour of the "foo/foo2" instructions in which the
7689 destination is half the width of sources, for example addhn/addhn2.
7690*/
7691static
7692void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
7693{
7694 if (is2) {
7695 /* Get the old contents of Vdd, zero the upper half, and replace
7696 it with 'x'. */
sewardj8e91fd42014-07-11 12:05:47 +00007697 IRTemp t_zero_oldLO = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00007698 assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
sewardj8e91fd42014-07-11 12:05:47 +00007699 IRTemp t_newHI_zero = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00007700 assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
7701 mkV128(0x0000)));
sewardj8e91fd42014-07-11 12:05:47 +00007702 IRTemp res = newTempV128();
sewardj25523c42014-06-15 19:36:29 +00007703 assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
7704 mkexpr(t_newHI_zero)));
7705 putQReg128(dd, mkexpr(res));
7706 } else {
7707 /* This is simple. */
7708 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
7709 }
7710}
7711
7712
sewardj8e91fd42014-07-11 12:05:47 +00007713/* Compute vector SQABS at lane size |size| for |srcE|, returning
7714 the q result in |*qabs| and the normal result in |*nabs|. */
7715static
7716void math_SQABS ( /*OUT*/IRTemp* qabs, /*OUT*/IRTemp* nabs,
7717 IRExpr* srcE, UInt size )
7718{
7719 IRTemp src, mask, maskn, nsub, qsub;
7720 src = mask = maskn = nsub = qsub = IRTemp_INVALID;
7721 newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
7722 assign(src, srcE);
7723 assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src)));
7724 assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
7725 assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
7726 assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
7727 assign(*nabs, binop(Iop_OrV128,
7728 binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
7729 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
7730 assign(*qabs, binop(Iop_OrV128,
7731 binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
7732 binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
7733}
7734
7735
sewardj51d012a2014-07-21 09:19:50 +00007736/* Compute vector SQNEG at lane size |size| for |srcE|, returning
7737 the q result in |*qneg| and the normal result in |*nneg|. */
7738static
7739void math_SQNEG ( /*OUT*/IRTemp* qneg, /*OUT*/IRTemp* nneg,
7740 IRExpr* srcE, UInt size )
7741{
7742 IRTemp src = IRTemp_INVALID;
7743 newTempsV128_3(&src, nneg, qneg);
7744 assign(src, srcE);
7745 assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
7746 assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
7747}
7748
7749
sewardjecedd982014-08-11 14:02:47 +00007750/* Zero all except the least significant lane of |srcE|, where |size|
7751 indicates the lane size in the usual way. */
sewardj257e99f2014-08-03 12:45:19 +00007752static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
sewardj8e91fd42014-07-11 12:05:47 +00007753{
7754 vassert(size < 4);
7755 IRTemp t = newTempV128();
sewardj51d012a2014-07-21 09:19:50 +00007756 assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
sewardj8e91fd42014-07-11 12:05:47 +00007757 return t;
7758}
7759
7760
sewardj51d012a2014-07-21 09:19:50 +00007761/* Generate IR to compute vector widening MULL from either the lower
7762 (is2==False) or upper (is2==True) halves of vecN and vecM. The
7763 widening multiplies are unsigned when isU==True and signed when
7764 isU==False. |size| is the narrow lane size indication. Optionally,
7765 the product may be added to or subtracted from vecD, at the wide lane
7766 size. This happens when |mas| is 'a' (add) or 's' (sub). When |mas|
7767 is 'm' (only multiply) then the accumulate part does not happen, and
7768 |vecD| is expected to == IRTemp_INVALID.
7769
7770 Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
7771 are allowed. The result is returned in a new IRTemp, which is
7772 returned in *res. */
7773static
7774void math_MULL_ACC ( /*OUT*/IRTemp* res,
7775 Bool is2, Bool isU, UInt size, HChar mas,
7776 IRTemp vecN, IRTemp vecM, IRTemp vecD )
7777{
7778 vassert(res && *res == IRTemp_INVALID);
7779 vassert(size <= 2);
7780 vassert(mas == 'm' || mas == 'a' || mas == 's');
7781 if (mas == 'm') vassert(vecD == IRTemp_INVALID);
7782 IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
7783 IROp accOp = (mas == 'a') ? mkVecADD(size+1)
7784 : (mas == 's' ? mkVecSUB(size+1)
7785 : Iop_INVALID);
7786 IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp,
7787 mkexpr(vecN), mkexpr(vecM));
7788 *res = newTempV128();
7789 assign(*res, mas == 'm' ? mkexpr(mul)
7790 : binop(accOp, mkexpr(vecD), mkexpr(mul)));
7791}
7792
7793
7794/* Same as math_MULL_ACC, except the multiply is signed widening,
7795 the multiplied value is then doubled, before being added to or
7796 subtracted from the accumulated value. And everything is
7797 saturated. In all cases, saturation residuals are returned
7798 via (sat1q, sat1n), and in the accumulate cases,
7799 via (sat2q, sat2n) too. All results are returned in new temporaries.
7800 In the no-accumulate case, *sat2q and *sat2n are never instantiated,
7801 so the caller can tell this has happened. */
7802static
7803void math_SQDMULL_ACC ( /*OUT*/IRTemp* res,
7804 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
7805 /*OUT*/IRTemp* sat2q, /*OUT*/IRTemp* sat2n,
7806 Bool is2, UInt size, HChar mas,
7807 IRTemp vecN, IRTemp vecM, IRTemp vecD )
7808{
7809 vassert(size <= 2);
7810 vassert(mas == 'm' || mas == 'a' || mas == 's');
7811 /* Compute
7812 sat1q = vecN.D[is2] *sq vecM.d[is2] *q 2
7813 sat1n = vecN.D[is2] *s vecM.d[is2] * 2
7814 IOW take either the low or high halves of vecN and vecM, signed widen,
7815 multiply, double that, and signedly saturate. Also compute the same
7816 but without saturation.
7817 */
7818 vassert(sat2q && *sat2q == IRTemp_INVALID);
7819 vassert(sat2n && *sat2n == IRTemp_INVALID);
7820 newTempsV128_3(sat1q, sat1n, res);
7821 IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
7822 mkexpr(vecN), mkexpr(vecM));
7823 IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
7824 mkexpr(vecN), mkexpr(vecM));
7825 assign(*sat1q, mkexpr(tq));
7826 assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
7827
7828 /* If there is no accumulation, the final result is sat1q,
7829 and there's no assignment to sat2q or sat2n. */
7830 if (mas == 'm') {
7831 assign(*res, mkexpr(*sat1q));
7832 return;
7833 }
7834
7835 /* Compute
7836 sat2q = vecD +sq/-sq sat1q
7837 sat2n = vecD +/- sat1n
7838 result = sat2q
7839 */
7840 newTempsV128_2(sat2q, sat2n);
7841 assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
7842 mkexpr(vecD), mkexpr(*sat1q)));
7843 assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
7844 mkexpr(vecD), mkexpr(*sat1n)));
7845 assign(*res, mkexpr(*sat2q));
7846}
7847
7848
sewardj54ffa1d2014-07-22 09:27:49 +00007849/* Generate IR for widening signed vector multiplies. The operands
7850 have their lane width signedly widened, and they are then multiplied
7851 at the wider width, returning results in two new IRTemps. */
sewardja5a6b752014-06-30 07:33:56 +00007852static
sewardj54ffa1d2014-07-22 09:27:49 +00007853void math_MULLS ( /*OUT*/IRTemp* resHI, /*OUT*/IRTemp* resLO,
7854 UInt sizeNarrow, IRTemp argL, IRTemp argR )
7855{
7856 vassert(sizeNarrow <= 2);
7857 newTempsV128_2(resHI, resLO);
7858 IRTemp argLhi = newTemp(Ity_I64);
7859 IRTemp argLlo = newTemp(Ity_I64);
7860 IRTemp argRhi = newTemp(Ity_I64);
7861 IRTemp argRlo = newTemp(Ity_I64);
7862 assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
7863 assign(argLlo, unop(Iop_V128to64, mkexpr(argL)));
7864 assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
7865 assign(argRlo, unop(Iop_V128to64, mkexpr(argR)));
7866 IROp opMulls = mkVecMULLS(sizeNarrow);
7867 assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
7868 assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
7869}
7870
7871
sewardj257e99f2014-08-03 12:45:19 +00007872/* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
7873 double that, possibly add a rounding constant (R variants), and take
7874 the high half. */
sewardj54ffa1d2014-07-22 09:27:49 +00007875static
7876void math_SQDMULH ( /*OUT*/IRTemp* res,
7877 /*OUT*/IRTemp* sat1q, /*OUT*/IRTemp* sat1n,
7878 Bool isR, UInt size, IRTemp vN, IRTemp vM )
7879{
7880 vassert(size == X01 || size == X10); /* s or h only */
7881
7882 newTempsV128_3(res, sat1q, sat1n);
7883
7884 IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
7885 math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
7886
7887 IRTemp addWide = mkVecADD(size+1);
7888
7889 if (isR) {
7890 assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
7891
7892 Int rcShift = size == X01 ? 15 : 31;
7893 IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
7894 assign(*sat1n,
7895 binop(mkVecCATODDLANES(size),
7896 binop(addWide,
7897 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
7898 mkexpr(roundConst)),
7899 binop(addWide,
7900 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
7901 mkexpr(roundConst))));
7902 } else {
7903 assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
7904
7905 assign(*sat1n,
7906 binop(mkVecCATODDLANES(size),
7907 binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
7908 binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
7909 }
7910
7911 assign(*res, mkexpr(*sat1q));
7912}
7913
7914
sewardja97dddf2014-08-14 22:26:52 +00007915/* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
7916 a new temp in *res, and the Q difference pair in new temps in
7917 *qDiff1 and *qDiff2 respectively. |nm| denotes which of the
7918 three operations it is. */
7919static
7920void math_QSHL_IMM ( /*OUT*/IRTemp* res,
7921 /*OUT*/IRTemp* qDiff1, /*OUT*/IRTemp* qDiff2,
7922 IRTemp src, UInt size, UInt shift, const HChar* nm )
7923{
7924 vassert(size <= 3);
7925 UInt laneBits = 8 << size;
7926 vassert(shift < laneBits);
7927 newTempsV128_3(res, qDiff1, qDiff2);
7928 IRTemp z128 = newTempV128();
7929 assign(z128, mkV128(0x0000));
7930
7931 /* UQSHL */
7932 if (vex_streq(nm, "uqshl")) {
sewardj1dd3ec12014-08-15 09:11:08 +00007933 IROp qop = mkVecQSHLNSATUU(size);
sewardja97dddf2014-08-14 22:26:52 +00007934 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
7935 if (shift == 0) {
7936 /* No shift means no saturation. */
7937 assign(*qDiff1, mkexpr(z128));
7938 assign(*qDiff2, mkexpr(z128));
7939 } else {
7940 /* Saturation has occurred if any of the shifted-out bits are
7941 nonzero. We get the shifted-out bits by right-shifting the
7942 original value. */
7943 UInt rshift = laneBits - shift;
7944 vassert(rshift >= 1 && rshift < laneBits);
7945 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
7946 assign(*qDiff2, mkexpr(z128));
7947 }
7948 return;
7949 }
7950
7951 /* SQSHL */
7952 if (vex_streq(nm, "sqshl")) {
sewardj1dd3ec12014-08-15 09:11:08 +00007953 IROp qop = mkVecQSHLNSATSS(size);
sewardja97dddf2014-08-14 22:26:52 +00007954 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
7955 if (shift == 0) {
7956 /* No shift means no saturation. */
7957 assign(*qDiff1, mkexpr(z128));
7958 assign(*qDiff2, mkexpr(z128));
7959 } else {
7960 /* Saturation has occurred if any of the shifted-out bits are
7961 different from the top bit of the original value. */
7962 UInt rshift = laneBits - 1 - shift;
7963 vassert(rshift >= 0 && rshift < laneBits-1);
7964 /* qDiff1 is the shifted out bits, and the top bit of the original
7965 value, preceded by zeroes. */
7966 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
7967 /* qDiff2 is the top bit of the original value, cloned the
7968 correct number of times. */
7969 assign(*qDiff2, binop(mkVecSHRN(size),
7970 binop(mkVecSARN(size), mkexpr(src),
7971 mkU8(laneBits-1)),
7972 mkU8(rshift)));
7973 /* This also succeeds in comparing the top bit of the original
7974 value to itself, which is a bit stupid, but not wrong. */
7975 }
7976 return;
7977 }
7978
7979 /* SQSHLU */
7980 if (vex_streq(nm, "sqshlu")) {
sewardj1dd3ec12014-08-15 09:11:08 +00007981 IROp qop = mkVecQSHLNSATSU(size);
sewardja97dddf2014-08-14 22:26:52 +00007982 assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
sewardjacc29642014-08-15 05:35:35 +00007983 if (shift == 0) {
7984 /* If there's no shift, saturation depends on the top bit
7985 of the source. */
7986 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
7987 assign(*qDiff2, mkexpr(z128));
7988 } else {
7989 /* Saturation has occurred if any of the shifted-out bits are
7990 nonzero. We get the shifted-out bits by right-shifting the
7991 original value. */
7992 UInt rshift = laneBits - shift;
7993 vassert(rshift >= 1 && rshift < laneBits);
7994 assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
7995 assign(*qDiff2, mkexpr(z128));
7996 }
sewardja97dddf2014-08-14 22:26:52 +00007997 return;
7998 }
7999
8000 vassert(0);
8001}
8002
8003
sewardj62ece662014-08-17 19:59:09 +00008004/* Generate IR to do SRHADD and URHADD. */
8005static
8006IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
8007{
8008 /* Generate this:
8009 (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
8010 */
8011 vassert(size <= 3);
8012 IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
8013 IROp opADD = mkVecADD(size);
8014 /* The only tricky bit is to generate the correct vector 1 constant. */
8015 const ULong ones64[4]
8016 = { 0x0101010101010101ULL, 0x0001000100010001ULL,
8017 0x0000000100000001ULL, 0x0000000000000001ULL };
8018 IRTemp imm64 = newTemp(Ity_I64);
8019 assign(imm64, mkU64(ones64[size]));
8020 IRTemp vecOne = newTempV128();
8021 assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
8022 IRTemp scaOne = newTemp(Ity_I8);
8023 assign(scaOne, mkU8(1));
8024 IRTemp res = newTempV128();
8025 assign(res,
8026 binop(opADD,
8027 binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
8028 binop(opADD,
8029 binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
8030 binop(opSHR,
8031 binop(opADD,
8032 binop(opADD,
8033 binop(Iop_AndV128, mkexpr(aa),
8034 mkexpr(vecOne)),
8035 binop(Iop_AndV128, mkexpr(bb),
8036 mkexpr(vecOne))
8037 ),
8038 mkexpr(vecOne)
8039 ),
8040 mkexpr(scaOne)
8041 )
8042 )
8043 )
8044 );
8045 return res;
8046}
8047
8048
sewardj54ffa1d2014-07-22 09:27:49 +00008049/* QCFLAG tracks the SIMD sticky saturation status. Update the status
8050 thusly: if, after application of |opZHI| to both |qres| and |nres|,
8051 they have the same value, leave QCFLAG unchanged. Otherwise, set it
8052 (implicitly) to 1. |opZHI| may only be one of the Iop_ZeroHIxxofV128
8053 operators, or Iop_INVALID, in which case |qres| and |nres| are used
8054 unmodified. The presence |opZHI| means this function can be used to
8055 generate QCFLAG update code for both scalar and vector SIMD operations.
8056*/
8057static
8058void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
sewardja5a6b752014-06-30 07:33:56 +00008059{
sewardj8e91fd42014-07-11 12:05:47 +00008060 IRTemp diff = newTempV128();
8061 IRTemp oldQCFLAG = newTempV128();
8062 IRTemp newQCFLAG = newTempV128();
sewardj54ffa1d2014-07-22 09:27:49 +00008063 if (opZHI == Iop_INVALID) {
8064 assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
8065 } else {
sewardj257e99f2014-08-03 12:45:19 +00008066 vassert(opZHI == Iop_ZeroHI64ofV128
8067 || opZHI == Iop_ZeroHI96ofV128 || opZHI == Iop_ZeroHI112ofV128);
sewardj54ffa1d2014-07-22 09:27:49 +00008068 assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
8069 }
sewardja5a6b752014-06-30 07:33:56 +00008070 assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
8071 assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
8072 stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
8073}
8074
8075
sewardj54ffa1d2014-07-22 09:27:49 +00008076/* A variant of updateQCFLAGwithDifferenceZHI in which |qres| and |nres|
8077 are used unmodified, hence suitable for QCFLAG updates for whole-vector
8078 operations. */
8079static
8080void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
8081{
8082 updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
8083}
8084
8085
sewardj76927e62014-11-17 11:21:21 +00008086/* Generate IR to rearrange two vector values in a way which is useful
8087 for doing S/D add-pair etc operations. There are 3 cases:
8088
8089 2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0]
8090
8091 4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0]
8092
8093 2s: [m2 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0]
8094
8095 The cases are distinguished as follows:
8096 isD == True, bitQ == 1 => 2d
8097 isD == False, bitQ == 1 => 4s
8098 isD == False, bitQ == 0 => 2s
8099*/
8100static
8101void math_REARRANGE_FOR_FLOATING_PAIRWISE (
8102 /*OUT*/IRTemp* rearrL, /*OUT*/IRTemp* rearrR,
8103 IRTemp vecM, IRTemp vecN, Bool isD, UInt bitQ
8104 )
8105{
8106 vassert(rearrL && *rearrL == IRTemp_INVALID);
8107 vassert(rearrR && *rearrR == IRTemp_INVALID);
8108 *rearrL = newTempV128();
8109 *rearrR = newTempV128();
8110 if (isD) {
8111 // 2d case
8112 vassert(bitQ == 1);
8113 assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN)));
8114 assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN)));
8115 }
8116 else if (!isD && bitQ == 1) {
8117 // 4s case
8118 assign(*rearrL, binop(Iop_CatOddLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8119 assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN)));
8120 } else {
8121 // 2s case
8122 vassert(!isD && bitQ == 0);
8123 IRTemp m1n1m0n0 = newTempV128();
8124 IRTemp m0n0m1n1 = newTempV128();
8125 assign(m1n1m0n0, binop(Iop_InterleaveLO32x4,
8126 mkexpr(vecM), mkexpr(vecN)));
8127 assign(m0n0m1n1, triop(Iop_SliceV128,
8128 mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8)));
8129 assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0)));
8130 assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1)));
8131 }
8132}
8133
8134
sewardj1aff76b2014-11-20 10:14:06 +00008135/* Returns 2.0 ^ (-n) for n in 1 .. 64 */
8136static Double two_to_the_minus ( Int n )
8137{
8138 if (n == 1) return 0.5;
8139 vassert(n >= 2 && n <= 64);
8140 Int half = n / 2;
8141 return two_to_the_minus(half) * two_to_the_minus(n - half);
8142}
8143
8144
sewardj8e91fd42014-07-11 12:05:47 +00008145/*------------------------------------------------------------*/
8146/*--- SIMD and FP instructions ---*/
8147/*------------------------------------------------------------*/
8148
sewardjdf1628c2014-06-10 22:52:05 +00008149static
8150Bool dis_AdvSIMD_EXT(/*MB_OUT*/DisResult* dres, UInt insn)
sewardjbbcf1882014-01-12 12:49:10 +00008151{
sewardjab33a7a2014-06-19 22:20:47 +00008152 /* 31 29 23 21 20 15 14 10 9 4
8153 0 q 101110 op2 0 m 0 imm4 0 n d
8154 Decode fields: op2
8155 */
sewardjbbcf1882014-01-12 12:49:10 +00008156# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjab33a7a2014-06-19 22:20:47 +00008157 if (INSN(31,31) != 0
8158 || INSN(29,24) != BITS6(1,0,1,1,1,0)
8159 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(10,10) != 0) {
8160 return False;
8161 }
8162 UInt bitQ = INSN(30,30);
8163 UInt op2 = INSN(23,22);
8164 UInt mm = INSN(20,16);
8165 UInt imm4 = INSN(14,11);
8166 UInt nn = INSN(9,5);
8167 UInt dd = INSN(4,0);
8168
8169 if (op2 == BITS2(0,0)) {
8170 /* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +00008171 IRTemp sHi = newTempV128();
8172 IRTemp sLo = newTempV128();
8173 IRTemp res = newTempV128();
sewardjab33a7a2014-06-19 22:20:47 +00008174 assign(sHi, getQReg128(mm));
8175 assign(sLo, getQReg128(nn));
8176 if (bitQ == 1) {
8177 if (imm4 == 0) {
8178 assign(res, mkexpr(sLo));
8179 } else {
sewardj8def0492014-09-01 14:13:15 +00008180 vassert(imm4 >= 1 && imm4 <= 15);
8181 assign(res, triop(Iop_SliceV128,
8182 mkexpr(sHi), mkexpr(sLo), mkU8(imm4)));
sewardjab33a7a2014-06-19 22:20:47 +00008183 }
8184 putQReg128(dd, mkexpr(res));
8185 DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
8186 } else {
8187 if (imm4 >= 8) return False;
8188 if (imm4 == 0) {
8189 assign(res, mkexpr(sLo));
8190 } else {
sewardj8def0492014-09-01 14:13:15 +00008191 vassert(imm4 >= 1 && imm4 <= 7);
8192 IRTemp hi64lo64 = newTempV128();
8193 assign(hi64lo64, binop(Iop_InterleaveLO64x2,
8194 mkexpr(sHi), mkexpr(sLo)));
8195 assign(res, triop(Iop_SliceV128,
8196 mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4)));
sewardjab33a7a2014-06-19 22:20:47 +00008197 }
8198 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
8199 DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
8200 }
8201 return True;
8202 }
8203
sewardjdf1628c2014-06-10 22:52:05 +00008204 return False;
8205# undef INSN
8206}
sewardjbbcf1882014-01-12 12:49:10 +00008207
sewardjbbcf1882014-01-12 12:49:10 +00008208
sewardjdf1628c2014-06-10 22:52:05 +00008209static
8210Bool dis_AdvSIMD_TBL_TBX(/*MB_OUT*/DisResult* dres, UInt insn)
8211{
8212 /* 31 29 23 21 20 15 14 12 11 9 4
8213 0 q 001110 op2 0 m 0 len op 00 n d
8214 Decode fields: op2,len,op
sewardjbbcf1882014-01-12 12:49:10 +00008215 */
sewardjdf1628c2014-06-10 22:52:05 +00008216# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8217 if (INSN(31,31) != 0
8218 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8219 || INSN(21,21) != 0
8220 || INSN(15,15) != 0
8221 || INSN(11,10) != BITS2(0,0)) {
8222 return False;
8223 }
8224 UInt bitQ = INSN(30,30);
8225 UInt op2 = INSN(23,22);
8226 UInt mm = INSN(20,16);
8227 UInt len = INSN(14,13);
8228 UInt bitOP = INSN(12,12);
8229 UInt nn = INSN(9,5);
8230 UInt dd = INSN(4,0);
8231
8232 if (op2 == X00) {
8233 /* -------- 00,xx,0 TBL, xx register table -------- */
8234 /* -------- 00,xx,1 TBX, xx register table -------- */
8235 /* 31 28 20 15 14 12 9 4
8236 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8237 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
8238 where Ta = 16b(q=1) or 8b(q=0)
8239 */
sewardjdf1628c2014-06-10 22:52:05 +00008240 Bool isTBX = bitOP == 1;
8241 /* The out-of-range values to use. */
sewardj8e91fd42014-07-11 12:05:47 +00008242 IRTemp oor_values = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008243 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
8244 /* src value */
sewardj8e91fd42014-07-11 12:05:47 +00008245 IRTemp src = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008246 assign(src, getQReg128(mm));
8247 /* The table values */
8248 IRTemp tab[4];
8249 UInt i;
8250 for (i = 0; i <= len; i++) {
8251 vassert(i < 4);
sewardj8e91fd42014-07-11 12:05:47 +00008252 tab[i] = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008253 assign(tab[i], getQReg128((nn + i) % 32));
8254 }
8255 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
sewardjdf9d6d52014-06-27 10:43:22 +00008256 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8257 const HChar* Ta = bitQ ==1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +00008258 const HChar* nm = isTBX ? "tbx" : "tbl";
8259 DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
8260 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
8261 return True;
8262 }
8263
8264# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8265 return False;
8266# undef INSN
8267}
8268
8269
8270static
8271Bool dis_AdvSIMD_ZIP_UZP_TRN(/*MB_OUT*/DisResult* dres, UInt insn)
8272{
sewardjfc261d92014-08-24 20:36:14 +00008273 /* 31 29 23 21 20 15 14 11 9 4
8274 0 q 001110 size 0 m 0 opcode 10 n d
8275 Decode fields: opcode
8276 */
sewardjdf1628c2014-06-10 22:52:05 +00008277# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjfc261d92014-08-24 20:36:14 +00008278 if (INSN(31,31) != 0
8279 || INSN(29,24) != BITS6(0,0,1,1,1,0)
8280 || INSN(21,21) != 0 || INSN(15,15) != 0 || INSN(11,10) != BITS2(1,0)) {
8281 return False;
8282 }
8283 UInt bitQ = INSN(30,30);
8284 UInt size = INSN(23,22);
8285 UInt mm = INSN(20,16);
8286 UInt opcode = INSN(14,12);
8287 UInt nn = INSN(9,5);
8288 UInt dd = INSN(4,0);
8289
8290 if (opcode == BITS3(0,0,1) || opcode == BITS3(1,0,1)) {
8291 /* -------- 001 UZP1 std7_std7_std7 -------- */
8292 /* -------- 101 UZP2 std7_std7_std7 -------- */
8293 if (bitQ == 0 && size == X11) return False; // implied 1d case
8294 Bool isUZP1 = opcode == BITS3(0,0,1);
8295 IROp op = isUZP1 ? mkVecCATEVENLANES(size)
8296 : mkVecCATODDLANES(size);
8297 IRTemp preL = newTempV128();
8298 IRTemp preR = newTempV128();
8299 IRTemp res = newTempV128();
8300 if (bitQ == 0) {
8301 assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
8302 getQReg128(nn)));
8303 assign(preR, mkexpr(preL));
8304 } else {
8305 assign(preL, getQReg128(mm));
8306 assign(preR, getQReg128(nn));
8307 }
8308 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8309 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8310 const HChar* nm = isUZP1 ? "uzp1" : "uzp2";
8311 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8312 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8313 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8314 return True;
8315 }
8316
8317 if (opcode == BITS3(0,1,0) || opcode == BITS3(1,1,0)) {
8318 /* -------- 010 TRN1 std7_std7_std7 -------- */
8319 /* -------- 110 TRN2 std7_std7_std7 -------- */
8320 if (bitQ == 0 && size == X11) return False; // implied 1d case
8321 Bool isTRN1 = opcode == BITS3(0,1,0);
8322 IROp op1 = isTRN1 ? mkVecCATEVENLANES(size)
8323 : mkVecCATODDLANES(size);
8324 IROp op2 = mkVecINTERLEAVEHI(size);
8325 IRTemp srcM = newTempV128();
8326 IRTemp srcN = newTempV128();
8327 IRTemp res = newTempV128();
8328 assign(srcM, getQReg128(mm));
8329 assign(srcN, getQReg128(nn));
8330 assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
8331 binop(op1, mkexpr(srcN), mkexpr(srcN))));
8332 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8333 const HChar* nm = isTRN1 ? "trn1" : "trn2";
8334 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8335 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8336 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8337 return True;
8338 }
8339
8340 if (opcode == BITS3(0,1,1) || opcode == BITS3(1,1,1)) {
8341 /* -------- 011 ZIP1 std7_std7_std7 -------- */
8342 /* -------- 111 ZIP2 std7_std7_std7 -------- */
8343 if (bitQ == 0 && size == X11) return False; // implied 1d case
8344 Bool isZIP1 = opcode == BITS3(0,1,1);
8345 IROp op = isZIP1 ? mkVecINTERLEAVELO(size)
8346 : mkVecINTERLEAVEHI(size);
8347 IRTemp preL = newTempV128();
8348 IRTemp preR = newTempV128();
8349 IRTemp res = newTempV128();
8350 if (bitQ == 0 && !isZIP1) {
sewardj8def0492014-09-01 14:13:15 +00008351 IRTemp z128 = newTempV128();
8352 assign(z128, mkV128(0x0000));
8353 // preL = Vm shifted left 32 bits
8354 // preR = Vn shifted left 32 bits
8355 assign(preL, triop(Iop_SliceV128,
8356 getQReg128(mm), mkexpr(z128), mkU8(12)));
8357 assign(preR, triop(Iop_SliceV128,
8358 getQReg128(nn), mkexpr(z128), mkU8(12)));
8359
sewardjfc261d92014-08-24 20:36:14 +00008360 } else {
8361 assign(preL, getQReg128(mm));
8362 assign(preR, getQReg128(nn));
8363 }
8364 assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
8365 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8366 const HChar* nm = isZIP1 ? "zip1" : "zip2";
8367 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8368 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
8369 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
8370 return True;
8371 }
8372
sewardjdf1628c2014-06-10 22:52:05 +00008373 return False;
8374# undef INSN
8375}
8376
8377
8378static
8379Bool dis_AdvSIMD_across_lanes(/*MB_OUT*/DisResult* dres, UInt insn)
8380{
8381 /* 31 28 23 21 16 11 9 4
8382 0 q u 01110 size 11000 opcode 10 n d
8383 Decode fields: u,size,opcode
8384 */
8385# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8386 if (INSN(31,31) != 0
8387 || INSN(28,24) != BITS5(0,1,1,1,0)
8388 || INSN(21,17) != BITS5(1,1,0,0,0) || INSN(11,10) != BITS2(1,0)) {
8389 return False;
8390 }
8391 UInt bitQ = INSN(30,30);
8392 UInt bitU = INSN(29,29);
8393 UInt size = INSN(23,22);
8394 UInt opcode = INSN(16,12);
8395 UInt nn = INSN(9,5);
8396 UInt dd = INSN(4,0);
8397
sewardja5a6b752014-06-30 07:33:56 +00008398 if (opcode == BITS5(0,0,0,1,1)) {
8399 /* -------- 0,xx,00011 SADDLV -------- */
8400 /* -------- 1,xx,00011 UADDLV -------- */
8401 /* size is the narrow size */
8402 if (size == X11 || (size == X10 && bitQ == 0)) return False;
8403 Bool isU = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +00008404 IRTemp src = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00008405 assign(src, getQReg128(nn));
8406 /* The basic plan is to widen the lower half, and if Q = 1,
8407 the upper half too. Add them together (if Q = 1), and in
8408 either case fold with add at twice the lane width.
8409 */
8410 IRExpr* widened
8411 = mkexpr(math_WIDEN_LO_OR_HI_LANES(
8412 isU, False/*!fromUpperHalf*/, size, mkexpr(src)));
8413 if (bitQ == 1) {
8414 widened
8415 = binop(mkVecADD(size+1),
8416 widened,
8417 mkexpr(math_WIDEN_LO_OR_HI_LANES(
8418 isU, True/*fromUpperHalf*/, size, mkexpr(src)))
8419 );
8420 }
8421 /* Now fold. */
sewardj8e91fd42014-07-11 12:05:47 +00008422 IRTemp tWi = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +00008423 assign(tWi, widened);
8424 IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
8425 putQReg128(dd, mkexpr(res));
8426 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8427 const HChar ch = "bhsd"[size];
8428 DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
8429 nameQReg128(dd), ch, nameQReg128(nn), arr);
8430 return True;
8431 }
8432
sewardjb9aff1e2014-06-15 21:55:33 +00008433 UInt ix = 0;
8434 /**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
8435 else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
8436 else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
8437 /**/
8438 if (ix != 0) {
8439 /* -------- 0,xx,01010: SMAXV -------- (1) */
8440 /* -------- 1,xx,01010: UMAXV -------- (2) */
8441 /* -------- 0,xx,11010: SMINV -------- (3) */
8442 /* -------- 1,xx,11010: UMINV -------- (4) */
8443 /* -------- 0,xx,11011: ADDV -------- (5) */
8444 vassert(ix >= 1 && ix <= 5);
sewardjdf1628c2014-06-10 22:52:05 +00008445 if (size == X11) return False; // 1d,2d cases not allowed
8446 if (size == X10 && bitQ == 0) return False; // 2s case not allowed
sewardjdf1628c2014-06-10 22:52:05 +00008447 const IROp opMAXS[3]
8448 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
8449 const IROp opMAXU[3]
8450 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
sewardjb9aff1e2014-06-15 21:55:33 +00008451 const IROp opMINS[3]
8452 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
8453 const IROp opMINU[3]
8454 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
8455 const IROp opADD[3]
8456 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 };
sewardjdf1628c2014-06-10 22:52:05 +00008457 vassert(size < 3);
sewardjb9aff1e2014-06-15 21:55:33 +00008458 IROp op = Iop_INVALID;
8459 const HChar* nm = NULL;
8460 switch (ix) {
8461 case 1: op = opMAXS[size]; nm = "smaxv"; break;
8462 case 2: op = opMAXU[size]; nm = "umaxv"; break;
8463 case 3: op = opMINS[size]; nm = "sminv"; break;
8464 case 4: op = opMINU[size]; nm = "uminv"; break;
8465 case 5: op = opADD[size]; nm = "addv"; break;
8466 default: vassert(0);
8467 }
8468 vassert(op != Iop_INVALID && nm != NULL);
sewardj8e91fd42014-07-11 12:05:47 +00008469 IRTemp tN1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +00008470 assign(tN1, getQReg128(nn));
8471 /* If Q == 0, we're just folding lanes in the lower half of
8472 the value. In which case, copy the lower half of the
8473 source into the upper half, so we can then treat it the
sewardjb9aff1e2014-06-15 21:55:33 +00008474 same as the full width case. Except for the addition case,
8475 in which we have to zero out the upper half. */
sewardj8e91fd42014-07-11 12:05:47 +00008476 IRTemp tN2 = newTempV128();
sewardjb9aff1e2014-06-15 21:55:33 +00008477 assign(tN2, bitQ == 0
8478 ? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
8479 : mk_CatEvenLanes64x2(tN1,tN1))
8480 : mkexpr(tN1));
sewardjdf9d6d52014-06-27 10:43:22 +00008481 IRTemp res = math_FOLDV(tN2, op);
sewardjdf1628c2014-06-10 22:52:05 +00008482 if (res == IRTemp_INVALID)
8483 return False; /* means math_MINMAXV
8484 doesn't handle this case yet */
8485 putQReg128(dd, mkexpr(res));
sewardjdf1628c2014-06-10 22:52:05 +00008486 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
8487 IRType laneTy = tys[size];
8488 const HChar* arr = nameArr_Q_SZ(bitQ, size);
8489 DIP("%s %s, %s.%s\n", nm,
8490 nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
8491 return True;
8492 }
8493
8494# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8495 return False;
8496# undef INSN
8497}
8498
8499
8500static
8501Bool dis_AdvSIMD_copy(/*MB_OUT*/DisResult* dres, UInt insn)
8502{
8503 /* 31 28 20 15 14 10 9 4
8504 0 q op 01110000 imm5 0 imm4 1 n d
8505 Decode fields: q,op,imm4
8506 */
8507# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8508 if (INSN(31,31) != 0
8509 || INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
8510 || INSN(15,15) != 0 || INSN(10,10) != 1) {
8511 return False;
8512 }
8513 UInt bitQ = INSN(30,30);
8514 UInt bitOP = INSN(29,29);
8515 UInt imm5 = INSN(20,16);
8516 UInt imm4 = INSN(14,11);
8517 UInt nn = INSN(9,5);
8518 UInt dd = INSN(4,0);
8519
8520 /* -------- x,0,0000: DUP (element, vector) -------- */
8521 /* 31 28 20 15 9 4
8522 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
8523 */
8524 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
sewardj487559e2014-07-10 14:22:45 +00008525 UInt laneNo = 0;
8526 UInt laneSzLg2 = 0;
8527 HChar laneCh = '?';
8528 IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
8529 getQReg128(nn), imm5);
8530 if (res == IRTemp_INVALID)
8531 return False;
8532 if (bitQ == 0 && laneSzLg2 == X11)
8533 return False; /* .1d case */
8534 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
8535 const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
8536 DIP("dup %s.%s, %s.%c[%u]\n",
8537 nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
8538 return True;
sewardjdf1628c2014-06-10 22:52:05 +00008539 }
8540
8541 /* -------- x,0,0001: DUP (general, vector) -------- */
8542 /* 31 28 20 15 9 4
8543 0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
8544 Q=0 writes 64, Q=1 writes 128
8545 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
8546 xxx10 4H(q=0) or 8H(q=1), R=W
8547 xx100 2S(q=0) or 4S(q=1), R=W
8548 x1000 Invalid(q=0) or 2D(q=1), R=X
8549 x0000 Invalid(q=0) or Invalid(q=1)
8550 Require op=0, imm4=0001
8551 */
8552 if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
8553 Bool isQ = bitQ == 1;
8554 IRTemp w0 = newTemp(Ity_I64);
8555 const HChar* arT = "??";
8556 IRType laneTy = Ity_INVALID;
8557 if (imm5 & 1) {
8558 arT = isQ ? "16b" : "8b";
8559 laneTy = Ity_I8;
8560 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
8561 }
8562 else if (imm5 & 2) {
8563 arT = isQ ? "8h" : "4h";
8564 laneTy = Ity_I16;
8565 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
8566 }
8567 else if (imm5 & 4) {
8568 arT = isQ ? "4s" : "2s";
8569 laneTy = Ity_I32;
8570 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
8571 }
8572 else if ((imm5 & 8) && isQ) {
8573 arT = "2d";
8574 laneTy = Ity_I64;
8575 assign(w0, getIReg64orZR(nn));
8576 }
8577 else {
8578 /* invalid; leave laneTy unchanged. */
8579 }
8580 /* */
8581 if (laneTy != Ity_INVALID) {
8582 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
8583 putQReg128(dd, binop(Iop_64HLtoV128,
8584 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
8585 DIP("dup %s.%s, %s\n",
8586 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
8587 return True;
8588 }
sewardj787a67f2014-06-23 09:09:41 +00008589 /* invalid */
8590 return False;
sewardjdf1628c2014-06-10 22:52:05 +00008591 }
8592
8593 /* -------- 1,0,0011: INS (general) -------- */
8594 /* 31 28 20 15 9 4
8595 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
8596 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
8597 xxx10 -> H, xxx
8598 xx100 -> S, xx
8599 x1000 -> D, x
8600 */
8601 if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
8602 HChar ts = '?';
8603 UInt laneNo = 16;
8604 IRExpr* src = NULL;
8605 if (imm5 & 1) {
8606 src = unop(Iop_64to8, getIReg64orZR(nn));
8607 laneNo = (imm5 >> 1) & 15;
8608 ts = 'b';
8609 }
8610 else if (imm5 & 2) {
8611 src = unop(Iop_64to16, getIReg64orZR(nn));
8612 laneNo = (imm5 >> 2) & 7;
8613 ts = 'h';
8614 }
8615 else if (imm5 & 4) {
8616 src = unop(Iop_64to32, getIReg64orZR(nn));
8617 laneNo = (imm5 >> 3) & 3;
8618 ts = 's';
8619 }
8620 else if (imm5 & 8) {
8621 src = getIReg64orZR(nn);
8622 laneNo = (imm5 >> 4) & 1;
8623 ts = 'd';
8624 }
8625 /* */
8626 if (src) {
8627 vassert(laneNo < 16);
8628 putQRegLane(dd, laneNo, src);
8629 DIP("ins %s.%c[%u], %s\n",
8630 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
8631 return True;
8632 }
sewardj787a67f2014-06-23 09:09:41 +00008633 /* invalid */
8634 return False;
sewardjdf1628c2014-06-10 22:52:05 +00008635 }
8636
8637 /* -------- x,0,0101: SMOV -------- */
8638 /* -------- x,0,0111: UMOV -------- */
8639 /* 31 28 20 15 9 4
8640 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
8641 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
8642 dest is Xd when q==1, Wd when q==0
8643 UMOV:
8644 Ts,index,ops = case q:imm5 of
8645 0:xxxx1 -> B, xxxx, 8Uto64
8646 1:xxxx1 -> invalid
8647 0:xxx10 -> H, xxx, 16Uto64
8648 1:xxx10 -> invalid
8649 0:xx100 -> S, xx, 32Uto64
8650 1:xx100 -> invalid
8651 1:x1000 -> D, x, copy64
8652 other -> invalid
8653 SMOV:
8654 Ts,index,ops = case q:imm5 of
8655 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
8656 1:xxxx1 -> B, xxxx, 8Sto64
8657 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
8658 1:xxx10 -> H, xxx, 16Sto64
8659 0:xx100 -> invalid
8660 1:xx100 -> S, xx, 32Sto64
8661 1:x1000 -> invalid
8662 other -> invalid
8663 */
8664 if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) || imm4 == BITS4(0,1,1,1))) {
8665 Bool isU = (imm4 & 2) == 2;
8666 const HChar* arTs = "??";
8667 UInt laneNo = 16; /* invalid */
8668 // Setting 'res' to non-NULL determines valid/invalid
8669 IRExpr* res = NULL;
8670 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
8671 laneNo = (imm5 >> 1) & 15;
8672 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
8673 res = isU ? unop(Iop_8Uto64, lane)
8674 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
8675 arTs = "b";
8676 }
8677 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
8678 laneNo = (imm5 >> 1) & 15;
8679 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
8680 res = isU ? NULL
8681 : unop(Iop_8Sto64, lane);
8682 arTs = "b";
8683 }
8684 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
8685 laneNo = (imm5 >> 2) & 7;
8686 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
8687 res = isU ? unop(Iop_16Uto64, lane)
8688 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
8689 arTs = "h";
8690 }
8691 else if (bitQ && (imm5 & 2)) { // 1:xxx10
8692 laneNo = (imm5 >> 2) & 7;
8693 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
8694 res = isU ? NULL
8695 : unop(Iop_16Sto64, lane);
8696 arTs = "h";
8697 }
8698 else if (!bitQ && (imm5 & 4)) { // 0:xx100
8699 laneNo = (imm5 >> 3) & 3;
8700 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
8701 res = isU ? unop(Iop_32Uto64, lane)
8702 : NULL;
8703 arTs = "s";
8704 }
8705 else if (bitQ && (imm5 & 4)) { // 1:xxx10
8706 laneNo = (imm5 >> 3) & 3;
8707 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
8708 res = isU ? NULL
8709 : unop(Iop_32Sto64, lane);
8710 arTs = "s";
8711 }
8712 else if (bitQ && (imm5 & 8)) { // 1:x1000
8713 laneNo = (imm5 >> 4) & 1;
8714 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
8715 res = isU ? lane
8716 : NULL;
8717 arTs = "d";
8718 }
8719 /* */
8720 if (res) {
8721 vassert(laneNo < 16);
8722 putIReg64orZR(dd, res);
8723 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
8724 nameIRegOrZR(bitQ == 1, dd),
8725 nameQReg128(nn), arTs, laneNo);
8726 return True;
8727 }
sewardj787a67f2014-06-23 09:09:41 +00008728 /* invalid */
8729 return False;
8730 }
8731
8732 /* -------- 1,1,xxxx: INS (element) -------- */
8733 /* 31 28 20 14 9 4
8734 011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
8735 where Ts,ix1,ix2
8736 = case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
8737 xxx10 -> H, xxx, imm4[3:1]
8738 xx100 -> S, xx, imm4[3:2]
8739 x1000 -> D, x, imm4[3:3]
8740 */
8741 if (bitQ == 1 && bitOP == 1) {
8742 HChar ts = '?';
8743 IRType ity = Ity_INVALID;
8744 UInt ix1 = 16;
8745 UInt ix2 = 16;
8746 if (imm5 & 1) {
8747 ts = 'b';
8748 ity = Ity_I8;
8749 ix1 = (imm5 >> 1) & 15;
8750 ix2 = (imm4 >> 0) & 15;
8751 }
8752 else if (imm5 & 2) {
8753 ts = 'h';
8754 ity = Ity_I16;
8755 ix1 = (imm5 >> 2) & 7;
8756 ix2 = (imm4 >> 1) & 7;
8757 }
8758 else if (imm5 & 4) {
8759 ts = 's';
8760 ity = Ity_I32;
8761 ix1 = (imm5 >> 3) & 3;
8762 ix2 = (imm4 >> 2) & 3;
8763 }
8764 else if (imm5 & 8) {
8765 ts = 'd';
8766 ity = Ity_I64;
8767 ix1 = (imm5 >> 4) & 1;
8768 ix2 = (imm4 >> 3) & 1;
8769 }
8770 /* */
8771 if (ity != Ity_INVALID) {
8772 vassert(ix1 < 16);
8773 vassert(ix2 < 16);
8774 putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
8775 DIP("ins %s.%c[%u], %s.%c[%u]\n",
8776 nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
8777 return True;
8778 }
8779 /* invalid */
8780 return False;
sewardjdf1628c2014-06-10 22:52:05 +00008781 }
8782
8783 return False;
8784# undef INSN
8785}
8786
8787
8788static
8789Bool dis_AdvSIMD_modified_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
8790{
8791 /* 31 28 18 15 11 9 4
8792 0q op 01111 00000 abc cmode 01 defgh d
sewardj2b6fd5e2014-06-19 14:21:37 +00008793 Decode fields: q,op,cmode
8794 Bit 11 is really "o2", but it is always zero.
sewardjdf1628c2014-06-10 22:52:05 +00008795 */
8796# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
8797 if (INSN(31,31) != 0
8798 || INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
8799 || INSN(11,10) != BITS2(0,1)) {
8800 return False;
8801 }
8802 UInt bitQ = INSN(30,30);
8803 UInt bitOP = INSN(29,29);
8804 UInt cmode = INSN(15,12);
8805 UInt abcdefgh = (INSN(18,16) << 5) | INSN(9,5);
8806 UInt dd = INSN(4,0);
8807
sewardjdf1628c2014-06-10 22:52:05 +00008808 ULong imm64lo = 0;
8809 UInt op_cmode = (bitOP << 4) | cmode;
8810 Bool ok = False;
sewardj2b6fd5e2014-06-19 14:21:37 +00008811 Bool isORR = False;
8812 Bool isBIC = False;
sewardj787a67f2014-06-23 09:09:41 +00008813 Bool isMOV = False;
8814 Bool isMVN = False;
8815 Bool isFMOV = False;
sewardjdf1628c2014-06-10 22:52:05 +00008816 switch (op_cmode) {
sewardj2b6fd5e2014-06-19 14:21:37 +00008817 /* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
sewardj2b6fd5e2014-06-19 14:21:37 +00008818 /* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
sewardj787a67f2014-06-23 09:09:41 +00008819 /* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
8820 /* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
8821 case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
8822 case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
8823 ok = True; isMOV = True; break;
sewardj2b6fd5e2014-06-19 14:21:37 +00008824
8825 /* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
8826 /* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
8827 /* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
8828 /* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
8829 case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
8830 case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
8831 ok = True; isORR = True; break;
8832
sewardj787a67f2014-06-23 09:09:41 +00008833 /* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
8834 /* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
8835 case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
8836 ok = True; isMOV = True; break;
8837
8838 /* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
8839 /* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
8840 case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
8841 ok = True; isORR = True; break;
8842
8843 /* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
8844 /* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
8845 case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
8846 ok = True; isMOV = True; break;
8847
8848 /* -------- x,0,1110 MOVI 8-bit -------- */
8849 case BITS5(0,1,1,1,0):
8850 ok = True; isMOV = True; break;
8851
8852 /* FMOV (vector, immediate, single precision) */
8853
8854 /* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
8855 /* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
8856 /* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
8857 /* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
8858 case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
8859 case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
8860 ok = True; isMVN = True; break;
8861
sewardj2b6fd5e2014-06-19 14:21:37 +00008862 /* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
8863 /* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
8864 /* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
8865 /* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
8866 case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
8867 case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
8868 ok = True; isBIC = True; break;
8869
sewardj787a67f2014-06-23 09:09:41 +00008870 /* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
8871 /* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
8872 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
8873 ok = True; isMVN = True; break;
8874
8875 /* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
8876 /* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
8877 case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
8878 ok = True; isBIC = True; break;
8879
8880 /* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
8881 /* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
8882 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
8883 ok = True; isMVN = True; break;
8884
8885 /* -------- 0,1,1110 MOVI 64-bit scalar -------- */
8886 /* -------- 1,1,1110 MOVI 64-bit vector -------- */
8887 case BITS5(1,1,1,1,0):
8888 ok = True; isMOV = True; break;
8889
8890 /* -------- 1,1,1111 FMOV (vector, immediate) -------- */
8891 case BITS5(1,1,1,1,1): // 1:1111
8892 ok = bitQ == 1; isFMOV = True; break;
8893
sewardjdf1628c2014-06-10 22:52:05 +00008894 default:
8895 break;
8896 }
8897 if (ok) {
sewardj787a67f2014-06-23 09:09:41 +00008898 vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
8899 + (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
sewardjdf1628c2014-06-10 22:52:05 +00008900 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
8901 }
8902 if (ok) {
sewardj2b6fd5e2014-06-19 14:21:37 +00008903 if (isORR || isBIC) {
8904 ULong inv
8905 = isORR ? 0ULL : ~0ULL;
8906 IRExpr* immV128
8907 = binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
8908 IRExpr* res
8909 = binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
sewardj2b6fd5e2014-06-19 14:21:37 +00008910 const HChar* nm = isORR ? "orr" : "bic";
8911 if (bitQ == 0) {
8912 putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
8913 DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
8914 } else {
8915 putQReg128(dd, res);
8916 DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
8917 nameQReg128(dd), imm64lo, imm64lo);
8918 }
sewardj787a67f2014-06-23 09:09:41 +00008919 }
8920 else if (isMOV || isMVN || isFMOV) {
8921 if (isMVN) imm64lo = ~imm64lo;
8922 ULong imm64hi = bitQ == 0 ? 0 : imm64lo;
sewardj8e91fd42014-07-11 12:05:47 +00008923 IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
8924 mkU64(imm64lo));
sewardj2b6fd5e2014-06-19 14:21:37 +00008925 putQReg128(dd, immV128);
8926 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
8927 }
sewardjdf1628c2014-06-10 22:52:05 +00008928 return True;
8929 }
8930 /* else fall through */
8931
8932 return False;
8933# undef INSN
8934}
8935
8936
8937static
8938Bool dis_AdvSIMD_scalar_copy(/*MB_OUT*/DisResult* dres, UInt insn)
8939{
sewardjab33a7a2014-06-19 22:20:47 +00008940 /* 31 28 20 15 14 10 9 4
8941 01 op 11110000 imm5 0 imm4 1 n d
8942 Decode fields: op,imm4
8943 */
sewardjdf1628c2014-06-10 22:52:05 +00008944# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjab33a7a2014-06-19 22:20:47 +00008945 if (INSN(31,30) != BITS2(0,1)
8946 || INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
8947 || INSN(15,15) != 0 || INSN(10,10) != 1) {
8948 return False;
8949 }
8950 UInt bitOP = INSN(29,29);
8951 UInt imm5 = INSN(20,16);
8952 UInt imm4 = INSN(14,11);
8953 UInt nn = INSN(9,5);
8954 UInt dd = INSN(4,0);
8955
8956 if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
8957 /* -------- 0,0000 DUP (element, scalar) -------- */
8958 IRTemp w0 = newTemp(Ity_I64);
8959 const HChar* arTs = "??";
8960 IRType laneTy = Ity_INVALID;
8961 UInt laneNo = 16; /* invalid */
8962 if (imm5 & 1) {
8963 arTs = "b";
8964 laneNo = (imm5 >> 1) & 15;
8965 laneTy = Ity_I8;
8966 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
8967 }
8968 else if (imm5 & 2) {
8969 arTs = "h";
8970 laneNo = (imm5 >> 2) & 7;
8971 laneTy = Ity_I16;
8972 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
8973 }
8974 else if (imm5 & 4) {
8975 arTs = "s";
8976 laneNo = (imm5 >> 3) & 3;
8977 laneTy = Ity_I32;
8978 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
8979 }
8980 else if (imm5 & 8) {
8981 arTs = "d";
8982 laneNo = (imm5 >> 4) & 1;
8983 laneTy = Ity_I64;
8984 assign(w0, getQRegLane(nn, laneNo, laneTy));
8985 }
8986 else {
8987 /* invalid; leave laneTy unchanged. */
8988 }
8989 /* */
8990 if (laneTy != Ity_INVALID) {
8991 vassert(laneNo < 16);
8992 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
8993 DIP("dup %s, %s.%s[%u]\n",
8994 nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
8995 return True;
8996 }
8997 /* else fall through */
8998 }
8999
sewardjdf1628c2014-06-10 22:52:05 +00009000 return False;
9001# undef INSN
9002}
9003
sewardjfc83d2c2014-06-12 10:15:46 +00009004
sewardjdf1628c2014-06-10 22:52:05 +00009005static
9006Bool dis_AdvSIMD_scalar_pairwise(/*MB_OUT*/DisResult* dres, UInt insn)
9007{
sewardjb9aff1e2014-06-15 21:55:33 +00009008 /* 31 28 23 21 16 11 9 4
9009 01 u 11110 sz 11000 opcode 10 n d
9010 Decode fields: u,sz,opcode
9011 */
sewardjdf1628c2014-06-10 22:52:05 +00009012# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardjb9aff1e2014-06-15 21:55:33 +00009013 if (INSN(31,30) != BITS2(0,1)
9014 || INSN(28,24) != BITS5(1,1,1,1,0)
9015 || INSN(21,17) != BITS5(1,1,0,0,0)
9016 || INSN(11,10) != BITS2(1,0)) {
9017 return False;
9018 }
9019 UInt bitU = INSN(29,29);
9020 UInt sz = INSN(23,22);
9021 UInt opcode = INSN(16,12);
9022 UInt nn = INSN(9,5);
9023 UInt dd = INSN(4,0);
9024
9025 if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
9026 /* -------- 0,11,11011 ADDP d_2d -------- */
sewardj8e91fd42014-07-11 12:05:47 +00009027 IRTemp xy = newTempV128();
9028 IRTemp xx = newTempV128();
sewardjb9aff1e2014-06-15 21:55:33 +00009029 assign(xy, getQReg128(nn));
9030 assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
9031 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9032 binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
9033 DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
9034 return True;
9035 }
9036
sewardj76927e62014-11-17 11:21:21 +00009037 if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) {
9038 /* -------- 1,00,01101 ADDP s_2s -------- */
9039 /* -------- 1,01,01101 ADDP d_2d -------- */
9040 Bool isD = sz == X01;
9041 IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
9042 IROp opADD = mkVecADDF(isD ? 3 : 2);
9043 IRTemp src = newTempV128();
9044 IRTemp argL = newTempV128();
9045 IRTemp argR = newTempV128();
9046 assign(src, getQReg128(nn));
9047 assign(argL, unop(opZHI, mkexpr(src)));
9048 assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
9049 mkU8(isD ? 8 : 4))));
9050 putQReg128(dd, unop(opZHI,
9051 triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
9052 mkexpr(argL), mkexpr(argR))));
9053 DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn);
9054 return True;
9055 }
9056
sewardjdf1628c2014-06-10 22:52:05 +00009057 return False;
9058# undef INSN
9059}
9060
sewardjfc83d2c2014-06-12 10:15:46 +00009061
sewardjdf1628c2014-06-10 22:52:05 +00009062static
9063Bool dis_AdvSIMD_scalar_shift_by_imm(/*MB_OUT*/DisResult* dres, UInt insn)
9064{
9065 /* 31 28 22 18 15 10 9 4
9066 01 u 111110 immh immb opcode 1 n d
9067 Decode fields: u,immh,opcode
9068 */
9069# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9070 if (INSN(31,30) != BITS2(0,1)
9071 || INSN(28,23) != BITS6(1,1,1,1,1,0) || INSN(10,10) != 1) {
9072 return False;
9073 }
9074 UInt bitU = INSN(29,29);
9075 UInt immh = INSN(22,19);
9076 UInt immb = INSN(18,16);
9077 UInt opcode = INSN(15,11);
9078 UInt nn = INSN(9,5);
9079 UInt dd = INSN(4,0);
9080 UInt immhb = (immh << 3) | immb;
9081
sewardja6b61f02014-08-17 18:32:14 +00009082 if ((immh & 8) == 8
9083 && (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0))) {
9084 /* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
9085 /* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
9086 /* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
9087 /* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
9088 Bool isU = bitU == 1;
9089 Bool isAcc = opcode == BITS5(0,0,0,1,0);
9090 UInt sh = 128 - immhb;
sewardjfc83d2c2014-06-12 10:15:46 +00009091 vassert(sh >= 1 && sh <= 64);
sewardja6b61f02014-08-17 18:32:14 +00009092 IROp op = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
9093 IRExpr* src = getQReg128(nn);
9094 IRTemp shf = newTempV128();
9095 IRTemp res = newTempV128();
9096 if (sh == 64 && isU) {
9097 assign(shf, mkV128(0x0000));
9098 } else {
9099 UInt nudge = 0;
9100 if (sh == 64) {
9101 vassert(!isU);
9102 nudge = 1;
9103 }
9104 assign(shf, binop(op, src, mkU8(sh - nudge)));
9105 }
9106 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9107 : mkexpr(shf));
9108 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9109 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
9110 : (isU ? "ushr" : "sshr");
9111 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
9112 return True;
9113 }
9114
9115 if ((immh & 8) == 8
9116 && (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0))) {
9117 /* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
9118 /* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
9119 /* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
9120 /* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
9121 Bool isU = bitU == 1;
9122 Bool isAcc = opcode == BITS5(0,0,1,1,0);
9123 UInt sh = 128 - immhb;
9124 vassert(sh >= 1 && sh <= 64);
9125 IROp op = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
9126 vassert(sh >= 1 && sh <= 64);
9127 IRExpr* src = getQReg128(nn);
9128 IRTemp imm8 = newTemp(Ity_I8);
9129 assign(imm8, mkU8((UChar)(-sh)));
9130 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
9131 IRTemp shf = newTempV128();
9132 IRTemp res = newTempV128();
9133 assign(shf, binop(op, src, amt));
9134 assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
9135 : mkexpr(shf));
9136 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9137 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
9138 : (isU ? "urshr" : "srshr");
9139 DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
sewardjfc83d2c2014-06-12 10:15:46 +00009140 return True;
9141 }
9142
sewardj8e91fd42014-07-11 12:05:47 +00009143 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
9144 /* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
9145 UInt sh = 128 - immhb;
9146 vassert(sh >= 1 && sh <= 64);
9147 if (sh == 64) {
9148 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
9149 } else {
9150 /* sh is in range 1 .. 63 */
9151 ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
9152 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9153 IRTemp res = newTempV128();
9154 assign(res, binop(Iop_OrV128,
9155 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9156 binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
9157 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9158 }
9159 DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
9160 return True;
9161 }
9162
sewardjacc29642014-08-15 05:35:35 +00009163 if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9164 /* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
9165 UInt sh = immhb - 64;
9166 vassert(sh >= 0 && sh < 64);
9167 putQReg128(dd,
9168 unop(Iop_ZeroHI64ofV128,
9169 sh == 0 ? getQReg128(nn)
9170 : binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9171 DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
9172 return True;
9173 }
9174
sewardj8e91fd42014-07-11 12:05:47 +00009175 if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
9176 /* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
9177 UInt sh = immhb - 64;
9178 vassert(sh >= 0 && sh < 64);
9179 if (sh == 0) {
9180 putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
9181 } else {
9182 /* sh is in range 1 .. 63 */
9183 ULong nmask = (1ULL << sh) - 1;
9184 IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
9185 IRTemp res = newTempV128();
9186 assign(res, binop(Iop_OrV128,
9187 binop(Iop_AndV128, getQReg128(dd), nmaskV),
9188 binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
9189 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9190 }
9191 DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
9192 return True;
9193 }
9194
sewardjacc29642014-08-15 05:35:35 +00009195 if (opcode == BITS5(0,1,1,1,0)
9196 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
9197 /* -------- 0,01110 SQSHL #imm -------- */
9198 /* -------- 1,01110 UQSHL #imm -------- */
9199 /* -------- 1,01100 SQSHLU #imm -------- */
9200 UInt size = 0;
9201 UInt shift = 0;
9202 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9203 if (!ok) return False;
9204 vassert(size >= 0 && size <= 3);
9205 /* The shift encoding has opposite sign for the leftwards case.
9206 Adjust shift to compensate. */
9207 UInt lanebits = 8 << size;
9208 shift = lanebits - shift;
9209 vassert(shift >= 0 && shift < lanebits);
9210 const HChar* nm = NULL;
9211 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
9212 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
9213 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
9214 else vassert(0);
9215 IRTemp qDiff1 = IRTemp_INVALID;
9216 IRTemp qDiff2 = IRTemp_INVALID;
9217 IRTemp res = IRTemp_INVALID;
9218 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
9219 /* This relies on the fact that the zeroed out lanes generate zeroed
9220 result lanes and don't saturate, so there's no point in trimming
9221 the resulting res, qDiff1 or qDiff2 values. */
9222 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
9223 putQReg128(dd, mkexpr(res));
9224 updateQCFLAGwithDifference(qDiff1, qDiff2);
9225 const HChar arr = "bhsd"[size];
9226 DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
9227 return True;
9228 }
9229
sewardje741d162014-08-13 13:10:47 +00009230 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
9231 || (bitU == 1
9232 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
9233 /* -------- 0,10010 SQSHRN #imm -------- */
9234 /* -------- 1,10010 UQSHRN #imm -------- */
9235 /* -------- 0,10011 SQRSHRN #imm -------- */
9236 /* -------- 1,10011 UQRSHRN #imm -------- */
9237 /* -------- 1,10000 SQSHRUN #imm -------- */
9238 /* -------- 1,10001 SQRSHRUN #imm -------- */
9239 UInt size = 0;
9240 UInt shift = 0;
9241 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9242 if (!ok || size == X11) return False;
9243 vassert(size >= X00 && size <= X10);
9244 vassert(shift >= 1 && shift <= (8 << size));
9245 const HChar* nm = "??";
9246 IROp op = Iop_INVALID;
9247 /* Decide on the name and the operation. */
9248 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
9249 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
9250 }
9251 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9252 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
9253 }
9254 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
9255 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
9256 }
9257 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
9258 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
9259 }
9260 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
9261 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
9262 }
9263 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
9264 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
9265 }
9266 else vassert(0);
9267 /* Compute the result (Q, shifted value) pair. */
9268 IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
9269 IRTemp pair = newTempV128();
9270 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
9271 /* Update the result reg */
9272 IRTemp res64in128 = newTempV128();
9273 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
9274 putQReg128(dd, mkexpr(res64in128));
9275 /* Update the Q flag. */
9276 IRTemp q64q64 = newTempV128();
9277 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
9278 IRTemp z128 = newTempV128();
9279 assign(z128, mkV128(0x0000));
9280 updateQCFLAGwithDifference(q64q64, z128);
9281 /* */
9282 const HChar arrNarrow = "bhsd"[size];
9283 const HChar arrWide = "bhsd"[size+1];
9284 DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
9285 return True;
9286 }
9287
sewardjdf1628c2014-06-10 22:52:05 +00009288# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9289 return False;
9290# undef INSN
9291}
9292
sewardjfc83d2c2014-06-12 10:15:46 +00009293
sewardjdf1628c2014-06-10 22:52:05 +00009294static
9295Bool dis_AdvSIMD_scalar_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
9296{
sewardj54ffa1d2014-07-22 09:27:49 +00009297 /* 31 29 28 23 21 20 15 11 9 4
9298 01 U 11110 size 1 m opcode 00 n d
9299 Decode fields: u,opcode
9300 */
sewardjdf1628c2014-06-10 22:52:05 +00009301# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj54ffa1d2014-07-22 09:27:49 +00009302 if (INSN(31,30) != BITS2(0,1)
9303 || INSN(28,24) != BITS5(1,1,1,1,0)
9304 || INSN(21,21) != 1
9305 || INSN(11,10) != BITS2(0,0)) {
9306 return False;
9307 }
9308 UInt bitU = INSN(29,29);
9309 UInt size = INSN(23,22);
9310 UInt mm = INSN(20,16);
9311 UInt opcode = INSN(15,12);
9312 UInt nn = INSN(9,5);
9313 UInt dd = INSN(4,0);
9314 vassert(size < 4);
9315
9316 if (bitU == 0
9317 && (opcode == BITS4(1,1,0,1)
9318 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
9319 /* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
9320 /* -------- 0,1001 SQDMLAL -------- */ // 1
9321 /* -------- 0,1011 SQDMLSL -------- */ // 2
9322 /* Widens, and size refers to the narrowed lanes. */
9323 UInt ks = 3;
9324 switch (opcode) {
9325 case BITS4(1,1,0,1): ks = 0; break;
9326 case BITS4(1,0,0,1): ks = 1; break;
9327 case BITS4(1,0,1,1): ks = 2; break;
9328 default: vassert(0);
9329 }
9330 vassert(ks >= 0 && ks <= 2);
9331 if (size == X00 || size == X11) return False;
9332 vassert(size <= 2);
9333 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
9334 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
9335 newTempsV128_3(&vecN, &vecM, &vecD);
9336 assign(vecN, getQReg128(nn));
9337 assign(vecM, getQReg128(mm));
9338 assign(vecD, getQReg128(dd));
9339 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
9340 False/*!is2*/, size, "mas"[ks],
9341 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
9342 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
9343 putQReg128(dd, unop(opZHI, mkexpr(res)));
9344 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
9345 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
9346 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
9347 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
9348 }
9349 const HChar* nm = ks == 0 ? "sqdmull"
9350 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
9351 const HChar arrNarrow = "bhsd"[size];
9352 const HChar arrWide = "bhsd"[size+1];
9353 DIP("%s %c%d, %c%d, %c%d\n",
9354 nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
9355 return True;
9356 }
9357
sewardjdf1628c2014-06-10 22:52:05 +00009358 return False;
9359# undef INSN
9360}
9361
9362
9363static
9364Bool dis_AdvSIMD_scalar_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
9365{
9366 /* 31 29 28 23 21 20 15 10 9 4
9367 01 U 11110 size 1 m opcode 1 n d
sewardj51d012a2014-07-21 09:19:50 +00009368 Decode fields: u,size,opcode
sewardjdf1628c2014-06-10 22:52:05 +00009369 */
9370# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9371 if (INSN(31,30) != BITS2(0,1)
9372 || INSN(28,24) != BITS5(1,1,1,1,0)
9373 || INSN(21,21) != 1
9374 || INSN(10,10) != 1) {
9375 return False;
9376 }
9377 UInt bitU = INSN(29,29);
9378 UInt size = INSN(23,22);
9379 UInt mm = INSN(20,16);
9380 UInt opcode = INSN(15,11);
9381 UInt nn = INSN(9,5);
9382 UInt dd = INSN(4,0);
9383 vassert(size < 4);
9384
sewardj51d012a2014-07-21 09:19:50 +00009385 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
9386 /* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
9387 /* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
9388 /* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
9389 /* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
9390 Bool isADD = opcode == BITS5(0,0,0,0,1);
9391 Bool isU = bitU == 1;
9392 IROp qop = Iop_INVALID;
9393 IROp nop = Iop_INVALID;
9394 if (isADD) {
9395 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
9396 nop = mkVecADD(size);
9397 } else {
9398 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
9399 nop = mkVecSUB(size);
9400 }
9401 IRTemp argL = newTempV128();
9402 IRTemp argR = newTempV128();
9403 IRTemp qres = newTempV128();
9404 IRTemp nres = newTempV128();
9405 assign(argL, getQReg128(nn));
9406 assign(argR, getQReg128(mm));
9407 assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
sewardj257e99f2014-08-03 12:45:19 +00009408 size, binop(qop, mkexpr(argL), mkexpr(argR)))));
sewardj51d012a2014-07-21 09:19:50 +00009409 assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
sewardj257e99f2014-08-03 12:45:19 +00009410 size, binop(nop, mkexpr(argL), mkexpr(argR)))));
sewardj51d012a2014-07-21 09:19:50 +00009411 putQReg128(dd, mkexpr(qres));
9412 updateQCFLAGwithDifference(qres, nres);
9413 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
9414 : (isU ? "uqsub" : "sqsub");
9415 const HChar arr = "bhsd"[size];
sewardj12972182014-08-04 08:09:47 +00009416 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
sewardj51d012a2014-07-21 09:19:50 +00009417 return True;
9418 }
9419
sewardj2b6fd5e2014-06-19 14:21:37 +00009420 if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
9421 /* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
9422 /* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
9423 Bool isGT = bitU == 0;
9424 IRExpr* argL = getQReg128(nn);
9425 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00009426 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00009427 assign(res,
9428 isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
9429 : binop(Iop_CmpGT64Ux2, argL, argR));
9430 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9431 DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
9432 nameQRegLO(dd, Ity_I64),
9433 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9434 return True;
9435 }
9436
9437 if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
9438 /* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
9439 /* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
9440 Bool isGE = bitU == 0;
9441 IRExpr* argL = getQReg128(nn);
9442 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00009443 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00009444 assign(res,
9445 isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
9446 : unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
9447 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9448 DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
9449 nameQRegLO(dd, Ity_I64),
9450 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9451 return True;
9452 }
9453
sewardja6b61f02014-08-17 18:32:14 +00009454 if (size == X11 && (opcode == BITS5(0,1,0,0,0)
9455 || opcode == BITS5(0,1,0,1,0))) {
9456 /* -------- 0,xx,01000 SSHL d_d_d -------- */
9457 /* -------- 0,xx,01010 SRSHL d_d_d -------- */
9458 /* -------- 1,xx,01000 USHL d_d_d -------- */
9459 /* -------- 1,xx,01010 URSHL d_d_d -------- */
9460 Bool isU = bitU == 1;
9461 Bool isR = opcode == BITS5(0,1,0,1,0);
9462 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
9463 : (isU ? mkVecSHU(size) : mkVecSHS(size));
9464 IRTemp res = newTempV128();
9465 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
9466 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9467 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
9468 : (isU ? "ushl" : "sshl");
9469 DIP("%s %s, %s, %s\n", nm,
9470 nameQRegLO(dd, Ity_I64),
9471 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9472 return True;
9473 }
9474
sewardj12972182014-08-04 08:09:47 +00009475 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
9476 /* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
9477 /* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
9478 /* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
9479 /* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
9480 Bool isU = bitU == 1;
9481 Bool isR = opcode == BITS5(0,1,0,1,1);
9482 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
9483 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
9484 /* This is a bit tricky. Since we're only interested in the lowest
9485 lane of the result, we zero out all the rest in the operands, so
9486 as to ensure that other lanes don't pollute the returned Q value.
9487 This works because it means, for the lanes we don't care about, we
9488 are shifting zero by zero, which can never saturate. */
9489 IRTemp res256 = newTemp(Ity_V256);
9490 IRTemp resSH = newTempV128();
9491 IRTemp resQ = newTempV128();
9492 IRTemp zero = newTempV128();
9493 assign(
9494 res256,
9495 binop(op,
9496 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
9497 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
9498 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
9499 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
9500 assign(zero, mkV128(0x0000));
9501 putQReg128(dd, mkexpr(resSH));
9502 updateQCFLAGwithDifference(resQ, zero);
9503 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
9504 : (isU ? "uqshl" : "sqshl");
9505 const HChar arr = "bhsd"[size];
9506 DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
9507 return True;
9508 }
9509
sewardjdf1628c2014-06-10 22:52:05 +00009510 if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
9511 /* -------- 0,11,10000 ADD d_d_d -------- */
9512 /* -------- 1,11,10000 SUB d_d_d -------- */
9513 Bool isSUB = bitU == 1;
9514 IRTemp res = newTemp(Ity_I64);
9515 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
9516 getQRegLane(nn, 0, Ity_I64),
9517 getQRegLane(mm, 0, Ity_I64)));
9518 putQRegLane(dd, 0, mkexpr(res));
9519 putQRegLane(dd, 1, mkU64(0));
9520 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
9521 nameQRegLO(dd, Ity_I64),
9522 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9523 return True;
9524 }
9525
sewardj2b6fd5e2014-06-19 14:21:37 +00009526 if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
9527 /* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
9528 /* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
9529 Bool isEQ = bitU == 1;
9530 IRExpr* argL = getQReg128(nn);
9531 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +00009532 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00009533 assign(res,
9534 isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
9535 : unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
9536 binop(Iop_AndV128, argL, argR),
9537 mkV128(0x0000))));
9538 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9539 DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
9540 nameQRegLO(dd, Ity_I64),
9541 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
9542 return True;
9543 }
9544
sewardj257e99f2014-08-03 12:45:19 +00009545 if (opcode == BITS5(1,0,1,1,0)) {
9546 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
9547 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
9548 if (size == X00 || size == X11) return False;
9549 Bool isR = bitU == 1;
9550 IRTemp res, sat1q, sat1n, vN, vM;
9551 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
9552 newTempsV128_2(&vN, &vM);
9553 assign(vN, getQReg128(nn));
9554 assign(vM, getQReg128(mm));
9555 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
9556 putQReg128(dd,
9557 mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
9558 updateQCFLAGwithDifference(
9559 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
9560 math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
9561 const HChar arr = "bhsd"[size];
9562 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
9563 DIP("%s %c%d, %c%d, %c%d\n", nm, arr, dd, arr, nn, arr, mm);
9564 return True;
9565 }
9566
sewardjdf1628c2014-06-10 22:52:05 +00009567 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
9568 /* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
9569 IRType ity = size == X11 ? Ity_F64 : Ity_F32;
9570 IRTemp res = newTemp(ity);
9571 assign(res, unop(mkABSF(ity),
9572 triop(mkSUBF(ity),
9573 mkexpr(mk_get_IR_rounding_mode()),
9574 getQRegLO(nn,ity), getQRegLO(mm,ity))));
9575 putQReg128(dd, mkV128(0x0000));
9576 putQRegLO(dd, mkexpr(res));
9577 DIP("fabd %s, %s, %s\n",
9578 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
9579 return True;
9580 }
9581
sewardjdf1628c2014-06-10 22:52:05 +00009582 return False;
9583# undef INSN
9584}
9585
9586
9587static
9588Bool dis_AdvSIMD_scalar_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
9589{
9590 /* 31 29 28 23 21 16 11 9 4
9591 01 U 11110 size 10000 opcode 10 n d
sewardj8e91fd42014-07-11 12:05:47 +00009592 Decode fields: u,size,opcode
sewardjdf1628c2014-06-10 22:52:05 +00009593 */
9594# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9595 if (INSN(31,30) != BITS2(0,1)
9596 || INSN(28,24) != BITS5(1,1,1,1,0)
9597 || INSN(21,17) != BITS5(1,0,0,0,0)
9598 || INSN(11,10) != BITS2(1,0)) {
9599 return False;
9600 }
9601 UInt bitU = INSN(29,29);
9602 UInt size = INSN(23,22);
9603 UInt opcode = INSN(16,12);
9604 UInt nn = INSN(9,5);
9605 UInt dd = INSN(4,0);
9606 vassert(size < 4);
9607
sewardjf7003bc2014-08-18 12:28:02 +00009608 if (opcode == BITS5(0,0,0,1,1)) {
9609 /* -------- 0,xx,00011: SUQADD std4_std4 -------- */
9610 /* -------- 1,xx,00011: USQADD std4_std4 -------- */
9611 /* These are a bit tricky (to say the least). See comments on
9612 the vector variants (in dis_AdvSIMD_two_reg_misc) below for
9613 details. */
9614 Bool isUSQADD = bitU == 1;
9615 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
9616 : mkVecQADDEXTUSSATSS(size);
9617 IROp nop = mkVecADD(size);
9618 IRTemp argL = newTempV128();
9619 IRTemp argR = newTempV128();
9620 assign(argL, getQReg128(nn));
9621 assign(argR, getQReg128(dd));
9622 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9623 size, binop(qop, mkexpr(argL), mkexpr(argR)));
9624 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9625 size, binop(nop, mkexpr(argL), mkexpr(argR)));
9626 putQReg128(dd, mkexpr(qres));
9627 updateQCFLAGwithDifference(qres, nres);
9628 const HChar arr = "bhsd"[size];
9629 DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
9630 return True;
9631 }
9632
sewardj51d012a2014-07-21 09:19:50 +00009633 if (opcode == BITS5(0,0,1,1,1)) {
sewardj8e91fd42014-07-11 12:05:47 +00009634 /* -------- 0,xx,00111 SQABS std4_std4 -------- */
sewardj51d012a2014-07-21 09:19:50 +00009635 /* -------- 1,xx,00111 SQNEG std4_std4 -------- */
9636 Bool isNEG = bitU == 1;
9637 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
9638 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
9639 getQReg128(nn), size );
sewardj257e99f2014-08-03 12:45:19 +00009640 IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
9641 IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
sewardj8e91fd42014-07-11 12:05:47 +00009642 putQReg128(dd, mkexpr(qres));
9643 updateQCFLAGwithDifference(qres, nres);
9644 const HChar arr = "bhsd"[size];
sewardj51d012a2014-07-21 09:19:50 +00009645 DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
sewardj8e91fd42014-07-11 12:05:47 +00009646 return True;
9647 }
9648
sewardj2b6fd5e2014-06-19 14:21:37 +00009649 if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
9650 /* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
9651 /* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
9652 Bool isGT = bitU == 0;
9653 IRExpr* argL = getQReg128(nn);
9654 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +00009655 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00009656 assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
9657 : unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
9658 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9659 DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
9660 return True;
9661 }
9662
9663 if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
9664 /* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
9665 /* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
9666 Bool isEQ = bitU == 0;
9667 IRExpr* argL = getQReg128(nn);
9668 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +00009669 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +00009670 assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
9671 : unop(Iop_NotV128,
9672 binop(Iop_CmpGT64Sx2, argL, argR)));
9673 putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
9674 DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
9675 return True;
9676 }
9677
9678 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
9679 /* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
sewardjdf1628c2014-06-10 22:52:05 +00009680 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
sewardj2b6fd5e2014-06-19 14:21:37 +00009681 binop(Iop_CmpGT64Sx2, mkV128(0x0000),
9682 getQReg128(nn))));
9683 DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
sewardjdf1628c2014-06-10 22:52:05 +00009684 return True;
9685 }
9686
sewardj25523c42014-06-15 19:36:29 +00009687 if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
9688 /* -------- 0,11,01011 ABS d_d -------- */
9689 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9690 unop(Iop_Abs64x2, getQReg128(nn))));
9691 DIP("abs d%u, d%u\n", dd, nn);
9692 return True;
9693 }
9694
9695 if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
9696 /* -------- 1,11,01011 NEG d_d -------- */
9697 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
9698 binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
9699 DIP("neg d%u, d%u\n", dd, nn);
9700 return True;
9701 }
9702
sewardjecedd982014-08-11 14:02:47 +00009703 if (opcode == BITS5(1,0,1,0,0)
9704 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
9705 /* -------- 0,xx,10100: SQXTN -------- */
9706 /* -------- 1,xx,10100: UQXTN -------- */
9707 /* -------- 1,xx,10010: SQXTUN -------- */
9708 if (size == X11) return False;
9709 vassert(size < 3);
9710 IROp opN = Iop_INVALID;
9711 Bool zWiden = True;
9712 const HChar* nm = "??";
9713 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
9714 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
9715 }
9716 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
9717 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
9718 }
9719 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
9720 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
9721 }
9722 else vassert(0);
9723 IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9724 size+1, getQReg128(nn));
9725 IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
9726 size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
9727 putQReg128(dd, mkexpr(resN));
9728 /* This widens zero lanes to zero, and compares it against zero, so all
9729 of the non-participating lanes make no contribution to the
9730 Q flag state. */
9731 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
9732 size, mkexpr(resN));
9733 updateQCFLAGwithDifference(src, resW);
9734 const HChar arrNarrow = "bhsd"[size];
9735 const HChar arrWide = "bhsd"[size+1];
9736 DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
9737 return True;
9738 }
9739
sewardjdf1628c2014-06-10 22:52:05 +00009740# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9741 return False;
9742# undef INSN
9743}
9744
sewardjfc83d2c2014-06-12 10:15:46 +00009745
sewardjdf1628c2014-06-10 22:52:05 +00009746static
9747Bool dis_AdvSIMD_scalar_x_indexed_element(/*MB_OUT*/DisResult* dres, UInt insn)
9748{
sewardj54ffa1d2014-07-22 09:27:49 +00009749 /* 31 28 23 21 20 19 15 11 9 4
9750 01 U 11111 size L M m opcode H 0 n d
9751 Decode fields are: u,size,opcode
9752 M is really part of the mm register number. Individual
9753 cases need to inspect L and H though.
9754 */
sewardjdf1628c2014-06-10 22:52:05 +00009755# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj54ffa1d2014-07-22 09:27:49 +00009756 if (INSN(31,30) != BITS2(0,1)
9757 || INSN(28,24) != BITS5(1,1,1,1,1) || INSN(10,10) !=0) {
9758 return False;
9759 }
9760 UInt bitU = INSN(29,29);
9761 UInt size = INSN(23,22);
9762 UInt bitL = INSN(21,21);
9763 UInt bitM = INSN(20,20);
9764 UInt mmLO4 = INSN(19,16);
9765 UInt opcode = INSN(15,12);
9766 UInt bitH = INSN(11,11);
9767 UInt nn = INSN(9,5);
9768 UInt dd = INSN(4,0);
9769 vassert(size < 4);
9770 vassert(bitH < 2 && bitM < 2 && bitL < 2);
9771
9772 if (bitU == 0
9773 && (opcode == BITS4(1,0,1,1)
9774 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
9775 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
9776 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
9777 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
9778 /* Widens, and size refers to the narrowed lanes. */
9779 UInt ks = 3;
9780 switch (opcode) {
9781 case BITS4(1,0,1,1): ks = 0; break;
9782 case BITS4(0,0,1,1): ks = 1; break;
9783 case BITS4(0,1,1,1): ks = 2; break;
9784 default: vassert(0);
9785 }
9786 vassert(ks >= 0 && ks <= 2);
9787 UInt mm = 32; // invalid
9788 UInt ix = 16; // invalid
9789 switch (size) {
9790 case X00:
9791 return False; // h_b_b[] case is not allowed
9792 case X01:
9793 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
9794 case X10:
9795 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
9796 case X11:
9797 return False; // q_d_d[] case is not allowed
9798 default:
9799 vassert(0);
9800 }
9801 vassert(mm < 32 && ix < 16);
9802 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
9803 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
9804 newTempsV128_2(&vecN, &vecD);
9805 assign(vecN, getQReg128(nn));
9806 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
9807 assign(vecD, getQReg128(dd));
9808 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
9809 False/*!is2*/, size, "mas"[ks],
9810 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
9811 IROp opZHI = mkVecZEROHIxxOFV128(size+1);
9812 putQReg128(dd, unop(opZHI, mkexpr(res)));
9813 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
9814 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
9815 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
9816 updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
9817 }
9818 const HChar* nm = ks == 0 ? "sqmull"
9819 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
9820 const HChar arrNarrow = "bhsd"[size];
9821 const HChar arrWide = "bhsd"[size+1];
9822 DIP("%s %c%d, %c%d, v%d.%c[%u]\n",
9823 nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
9824 return True;
9825 }
9826
sewardj257e99f2014-08-03 12:45:19 +00009827 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
9828 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
9829 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
9830 UInt mm = 32; // invalid
9831 UInt ix = 16; // invalid
9832 switch (size) {
9833 case X00:
9834 return False; // b case is not allowed
9835 case X01:
9836 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
9837 case X10:
9838 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
9839 case X11:
9840 return False; // q case is not allowed
9841 default:
9842 vassert(0);
9843 }
9844 vassert(mm < 32 && ix < 16);
9845 Bool isR = opcode == BITS4(1,1,0,1);
9846 IRTemp res, sat1q, sat1n, vN, vM;
9847 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
9848 vN = newTempV128();
9849 assign(vN, getQReg128(nn));
9850 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
9851 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
9852 IROp opZHI = mkVecZEROHIxxOFV128(size);
9853 putQReg128(dd, unop(opZHI, mkexpr(res)));
9854 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
9855 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
9856 HChar ch = size == X01 ? 'h' : 's';
9857 DIP("%s %c%d, %c%d, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, dd, ix);
9858 return True;
9859 }
9860
sewardjdf1628c2014-06-10 22:52:05 +00009861 return False;
9862# undef INSN
9863}
9864
sewardjfc83d2c2014-06-12 10:15:46 +00009865
sewardjdf1628c2014-06-10 22:52:05 +00009866static
9867Bool dis_AdvSIMD_shift_by_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
9868{
9869 /* 31 28 22 18 15 10 9 4
9870 0 q u 011110 immh immb opcode 1 n d
9871 Decode fields: u,opcode
9872 */
9873# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
9874 if (INSN(31,31) != 0
9875 || INSN(28,23) != BITS6(0,1,1,1,1,0) || INSN(10,10) != 1) {
9876 return False;
9877 }
9878 UInt bitQ = INSN(30,30);
9879 UInt bitU = INSN(29,29);
9880 UInt immh = INSN(22,19);
9881 UInt immb = INSN(18,16);
9882 UInt opcode = INSN(15,11);
9883 UInt nn = INSN(9,5);
9884 UInt dd = INSN(4,0);
9885
sewardja6b61f02014-08-17 18:32:14 +00009886 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,0,1,0)) {
sewardjdf1628c2014-06-10 22:52:05 +00009887 /* -------- 0,00000 SSHR std7_std7_#imm -------- */
9888 /* -------- 1,00000 USHR std7_std7_#imm -------- */
sewardja6b61f02014-08-17 18:32:14 +00009889 /* -------- 0,00010 SSRA std7_std7_#imm -------- */
9890 /* -------- 1,00010 USRA std7_std7_#imm -------- */
sewardjdf1628c2014-06-10 22:52:05 +00009891 /* laneTy, shift = case immh:immb of
9892 0001:xxx -> B, SHR:8-xxx
9893 001x:xxx -> H, SHR:16-xxxx
9894 01xx:xxx -> S, SHR:32-xxxxx
9895 1xxx:xxx -> D, SHR:64-xxxxxx
9896 other -> invalid
9897 */
sewardjdf1628c2014-06-10 22:52:05 +00009898 UInt size = 0;
9899 UInt shift = 0;
9900 Bool isQ = bitQ == 1;
9901 Bool isU = bitU == 1;
sewardja6b61f02014-08-17 18:32:14 +00009902 Bool isAcc = opcode == BITS5(0,0,0,1,0);
sewardjdf1628c2014-06-10 22:52:05 +00009903 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
sewardj8e91fd42014-07-11 12:05:47 +00009904 if (!ok || (bitQ == 0 && size == X11)) return False;
sewardjdf1628c2014-06-10 22:52:05 +00009905 vassert(size >= 0 && size <= 3);
sewardj8e91fd42014-07-11 12:05:47 +00009906 UInt lanebits = 8 << size;
9907 vassert(shift >= 1 && shift <= lanebits);
9908 IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size);
9909 IRExpr* src = getQReg128(nn);
sewardja6b61f02014-08-17 18:32:14 +00009910 IRTemp shf = newTempV128();
sewardj8e91fd42014-07-11 12:05:47 +00009911 IRTemp res = newTempV128();
9912 if (shift == lanebits && isU) {
sewardja6b61f02014-08-17 18:32:14 +00009913 assign(shf, mkV128(0x0000));
sewardj8e91fd42014-07-11 12:05:47 +00009914 } else {
9915 UInt nudge = 0;
9916 if (shift == lanebits) {
9917 vassert(!isU);
9918 nudge = 1;
9919 }
sewardja6b61f02014-08-17 18:32:14 +00009920 assign(shf, binop(op, src, mkU8(shift - nudge)));
sewardjdf1628c2014-06-10 22:52:05 +00009921 }
sewardja6b61f02014-08-17 18:32:14 +00009922 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
9923 : mkexpr(shf));
sewardj8e91fd42014-07-11 12:05:47 +00009924 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9925 HChar laneCh = "bhsd"[size];
9926 UInt nLanes = (isQ ? 128 : 64) / lanebits;
sewardja6b61f02014-08-17 18:32:14 +00009927 const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
9928 : (isU ? "ushr" : "sshr");
9929 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
9930 nameQReg128(dd), nLanes, laneCh,
9931 nameQReg128(nn), nLanes, laneCh, shift);
9932 return True;
9933 }
9934
9935 if (opcode == BITS5(0,0,1,0,0) || opcode == BITS5(0,0,1,1,0)) {
9936 /* -------- 0,00100 SRSHR std7_std7_#imm -------- */
9937 /* -------- 1,00100 URSHR std7_std7_#imm -------- */
9938 /* -------- 0,00110 SRSRA std7_std7_#imm -------- */
9939 /* -------- 1,00110 URSRA std7_std7_#imm -------- */
9940 /* laneTy, shift = case immh:immb of
9941 0001:xxx -> B, SHR:8-xxx
9942 001x:xxx -> H, SHR:16-xxxx
9943 01xx:xxx -> S, SHR:32-xxxxx
9944 1xxx:xxx -> D, SHR:64-xxxxxx
9945 other -> invalid
9946 */
9947 UInt size = 0;
9948 UInt shift = 0;
9949 Bool isQ = bitQ == 1;
9950 Bool isU = bitU == 1;
9951 Bool isAcc = opcode == BITS5(0,0,1,1,0);
9952 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9953 if (!ok || (bitQ == 0 && size == X11)) return False;
9954 vassert(size >= 0 && size <= 3);
9955 UInt lanebits = 8 << size;
9956 vassert(shift >= 1 && shift <= lanebits);
9957 IROp op = isU ? mkVecRSHU(size) : mkVecRSHS(size);
9958 IRExpr* src = getQReg128(nn);
9959 IRTemp imm8 = newTemp(Ity_I8);
9960 assign(imm8, mkU8((UChar)(-shift)));
9961 IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
9962 IRTemp shf = newTempV128();
9963 IRTemp res = newTempV128();
9964 assign(shf, binop(op, src, amt));
9965 assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
9966 : mkexpr(shf));
9967 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
9968 HChar laneCh = "bhsd"[size];
9969 UInt nLanes = (isQ ? 128 : 64) / lanebits;
9970 const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
9971 : (isU ? "urshr" : "srshr");
sewardj8e91fd42014-07-11 12:05:47 +00009972 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
9973 nameQReg128(dd), nLanes, laneCh,
9974 nameQReg128(nn), nLanes, laneCh, shift);
9975 return True;
sewardjdf1628c2014-06-10 22:52:05 +00009976 }
9977
sewardj8e91fd42014-07-11 12:05:47 +00009978 if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
9979 /* -------- 1,01000 SRI std7_std7_#imm -------- */
9980 /* laneTy, shift = case immh:immb of
9981 0001:xxx -> B, SHR:8-xxx
9982 001x:xxx -> H, SHR:16-xxxx
9983 01xx:xxx -> S, SHR:32-xxxxx
9984 1xxx:xxx -> D, SHR:64-xxxxxx
9985 other -> invalid
9986 */
9987 UInt size = 0;
9988 UInt shift = 0;
9989 Bool isQ = bitQ == 1;
9990 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
9991 if (!ok || (bitQ == 0 && size == X11)) return False;
9992 vassert(size >= 0 && size <= 3);
9993 UInt lanebits = 8 << size;
9994 vassert(shift >= 1 && shift <= lanebits);
9995 IRExpr* src = getQReg128(nn);
9996 IRTemp res = newTempV128();
9997 if (shift == lanebits) {
9998 assign(res, getQReg128(dd));
9999 } else {
10000 assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
10001 IRExpr* nmask = binop(mkVecSHLN(size),
10002 mkV128(0xFFFF), mkU8(lanebits - shift));
10003 IRTemp tmp = newTempV128();
10004 assign(tmp, binop(Iop_OrV128,
10005 mkexpr(res),
10006 binop(Iop_AndV128, getQReg128(dd), nmask)));
10007 res = tmp;
10008 }
10009 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10010 HChar laneCh = "bhsd"[size];
10011 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10012 DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
10013 nameQReg128(dd), nLanes, laneCh,
10014 nameQReg128(nn), nLanes, laneCh, shift);
10015 return True;
10016 }
10017
10018 if (opcode == BITS5(0,1,0,1,0)) {
sewardjdf1628c2014-06-10 22:52:05 +000010019 /* -------- 0,01010 SHL std7_std7_#imm -------- */
sewardj8e91fd42014-07-11 12:05:47 +000010020 /* -------- 1,01010 SLI std7_std7_#imm -------- */
sewardjdf1628c2014-06-10 22:52:05 +000010021 /* laneTy, shift = case immh:immb of
10022 0001:xxx -> B, xxx
10023 001x:xxx -> H, xxxx
10024 01xx:xxx -> S, xxxxx
10025 1xxx:xxx -> D, xxxxxx
10026 other -> invalid
10027 */
sewardjdf1628c2014-06-10 22:52:05 +000010028 UInt size = 0;
10029 UInt shift = 0;
sewardj8e91fd42014-07-11 12:05:47 +000010030 Bool isSLI = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +000010031 Bool isQ = bitQ == 1;
10032 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
sewardj8e91fd42014-07-11 12:05:47 +000010033 if (!ok || (bitQ == 0 && size == X11)) return False;
sewardjdf1628c2014-06-10 22:52:05 +000010034 vassert(size >= 0 && size <= 3);
10035 /* The shift encoding has opposite sign for the leftwards case.
10036 Adjust shift to compensate. */
sewardj8e91fd42014-07-11 12:05:47 +000010037 UInt lanebits = 8 << size;
10038 shift = lanebits - shift;
10039 vassert(shift >= 0 && shift < lanebits);
10040 IROp op = mkVecSHLN(size);
10041 IRExpr* src = getQReg128(nn);
10042 IRTemp res = newTempV128();
10043 if (shift == 0) {
10044 assign(res, src);
10045 } else {
sewardjdf9d6d52014-06-27 10:43:22 +000010046 assign(res, binop(op, src, mkU8(shift)));
sewardj8e91fd42014-07-11 12:05:47 +000010047 if (isSLI) {
10048 IRExpr* nmask = binop(mkVecSHRN(size),
10049 mkV128(0xFFFF), mkU8(lanebits - shift));
10050 IRTemp tmp = newTempV128();
10051 assign(tmp, binop(Iop_OrV128,
10052 mkexpr(res),
10053 binop(Iop_AndV128, getQReg128(dd), nmask)));
10054 res = tmp;
10055 }
sewardjdf1628c2014-06-10 22:52:05 +000010056 }
sewardj8e91fd42014-07-11 12:05:47 +000010057 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10058 HChar laneCh = "bhsd"[size];
10059 UInt nLanes = (isQ ? 128 : 64) / lanebits;
10060 const HChar* nm = isSLI ? "sli" : "shl";
10061 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
10062 nameQReg128(dd), nLanes, laneCh,
10063 nameQReg128(nn), nLanes, laneCh, shift);
10064 return True;
sewardjdf1628c2014-06-10 22:52:05 +000010065 }
10066
sewardja97dddf2014-08-14 22:26:52 +000010067 if (opcode == BITS5(0,1,1,1,0)
10068 || (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
10069 /* -------- 0,01110 SQSHL std7_std7_#imm -------- */
10070 /* -------- 1,01110 UQSHL std7_std7_#imm -------- */
10071 /* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
10072 UInt size = 0;
10073 UInt shift = 0;
10074 Bool isQ = bitQ == 1;
10075 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10076 if (!ok || (bitQ == 0 && size == X11)) return False;
10077 vassert(size >= 0 && size <= 3);
10078 /* The shift encoding has opposite sign for the leftwards case.
10079 Adjust shift to compensate. */
10080 UInt lanebits = 8 << size;
10081 shift = lanebits - shift;
10082 vassert(shift >= 0 && shift < lanebits);
10083 const HChar* nm = NULL;
10084 /**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
10085 else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
10086 else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
10087 else vassert(0);
10088 IRTemp qDiff1 = IRTemp_INVALID;
10089 IRTemp qDiff2 = IRTemp_INVALID;
10090 IRTemp res = IRTemp_INVALID;
10091 IRTemp src = newTempV128();
10092 assign(src, getQReg128(nn));
10093 math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
10094 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10095 updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
sewardjacc29642014-08-15 05:35:35 +000010096 isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
sewardja97dddf2014-08-14 22:26:52 +000010097 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10098 DIP("%s %s.%s, %s.%s, #%u\n", nm,
10099 nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
10100 return True;
10101 }
10102
sewardj487559e2014-07-10 14:22:45 +000010103 if (bitU == 0
10104 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1))) {
10105 /* -------- 0,10000 SHRN{,2} #imm -------- */
10106 /* -------- 0,10001 RSHRN{,2} #imm -------- */
10107 /* Narrows, and size is the narrow size. */
10108 UInt size = 0;
10109 UInt shift = 0;
10110 Bool is2 = bitQ == 1;
10111 Bool isR = opcode == BITS5(1,0,0,0,1);
10112 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10113 if (!ok || size == X11) return False;
10114 vassert(shift >= 1);
sewardj8e91fd42014-07-11 12:05:47 +000010115 IRTemp t1 = newTempV128();
10116 IRTemp t2 = newTempV128();
10117 IRTemp t3 = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000010118 assign(t1, getQReg128(nn));
10119 assign(t2, isR ? binop(mkVecADD(size+1),
10120 mkexpr(t1),
10121 mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
10122 : mkexpr(t1));
10123 assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
10124 IRTemp t4 = math_NARROW_LANES(t3, t3, size);
10125 putLO64andZUorPutHI64(is2, dd, t4);
10126 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10127 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10128 DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
10129 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10130 return True;
10131 }
10132
sewardjecedd982014-08-11 14:02:47 +000010133 if (opcode == BITS5(1,0,0,1,0) || opcode == BITS5(1,0,0,1,1)
10134 || (bitU == 1
10135 && (opcode == BITS5(1,0,0,0,0) || opcode == BITS5(1,0,0,0,1)))) {
10136 /* -------- 0,10010 SQSHRN{,2} #imm -------- */
10137 /* -------- 1,10010 UQSHRN{,2} #imm -------- */
10138 /* -------- 0,10011 SQRSHRN{,2} #imm -------- */
10139 /* -------- 1,10011 UQRSHRN{,2} #imm -------- */
10140 /* -------- 1,10000 SQSHRUN{,2} #imm -------- */
10141 /* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
10142 UInt size = 0;
10143 UInt shift = 0;
10144 Bool is2 = bitQ == 1;
10145 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
10146 if (!ok || size == X11) return False;
10147 vassert(shift >= 1 && shift <= (8 << size));
10148 const HChar* nm = "??";
10149 IROp op = Iop_INVALID;
10150 /* Decide on the name and the operation. */
10151 /**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
10152 nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
10153 }
10154 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
10155 nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
10156 }
10157 else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
10158 nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
10159 }
10160 else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
10161 nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
10162 }
10163 else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
10164 nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
10165 }
10166 else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
10167 nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
10168 }
10169 else vassert(0);
10170 /* Compute the result (Q, shifted value) pair. */
10171 IRTemp src128 = newTempV128();
10172 assign(src128, getQReg128(nn));
10173 IRTemp pair = newTempV128();
10174 assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
10175 /* Update the result reg */
10176 IRTemp res64in128 = newTempV128();
10177 assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
10178 putLO64andZUorPutHI64(is2, dd, res64in128);
10179 /* Update the Q flag. */
10180 IRTemp q64q64 = newTempV128();
10181 assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
10182 IRTemp z128 = newTempV128();
10183 assign(z128, mkV128(0x0000));
10184 updateQCFLAGwithDifference(q64q64, z128);
10185 /* */
10186 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10187 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10188 DIP("%s %s.%s, %s.%s, #%u\n", nm,
10189 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
10190 return True;
10191 }
10192
sewardjdf1628c2014-06-10 22:52:05 +000010193 if (opcode == BITS5(1,0,1,0,0)) {
10194 /* -------- 0,10100 SSHLL{,2} #imm -------- */
10195 /* -------- 1,10100 USHLL{,2} #imm -------- */
10196 /* 31 28 22 18 15 9 4
10197 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
10198 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
10199 where Ta,Tb,sh
10200 = case immh of 1xxx -> invalid
10201 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
10202 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
10203 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
10204 0000 -> AdvSIMD modified immediate (???)
10205 */
10206 Bool isQ = bitQ == 1;
10207 Bool isU = bitU == 1;
10208 UInt immhb = (immh << 3) | immb;
sewardj8e91fd42014-07-11 12:05:47 +000010209 IRTemp src = newTempV128();
10210 IRTemp zero = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000010211 IRExpr* res = NULL;
10212 UInt sh = 0;
10213 const HChar* ta = "??";
10214 const HChar* tb = "??";
10215 assign(src, getQReg128(nn));
10216 assign(zero, mkV128(0x0000));
10217 if (immh & 8) {
10218 /* invalid; don't assign to res */
10219 }
10220 else if (immh & 4) {
10221 sh = immhb - 32;
10222 vassert(sh < 32); /* so 32-sh is 1..32 */
10223 ta = "2d";
10224 tb = isQ ? "4s" : "2s";
10225 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
10226 : mk_InterleaveLO32x4(src, zero);
10227 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
10228 }
10229 else if (immh & 2) {
10230 sh = immhb - 16;
10231 vassert(sh < 16); /* so 16-sh is 1..16 */
10232 ta = "4s";
10233 tb = isQ ? "8h" : "4h";
10234 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
10235 : mk_InterleaveLO16x8(src, zero);
10236 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
10237 }
10238 else if (immh & 1) {
10239 sh = immhb - 8;
10240 vassert(sh < 8); /* so 8-sh is 1..8 */
10241 ta = "8h";
10242 tb = isQ ? "16b" : "8b";
10243 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
10244 : mk_InterleaveLO8x16(src, zero);
10245 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
10246 } else {
10247 vassert(immh == 0);
10248 /* invalid; don't assign to res */
10249 }
10250 /* */
10251 if (res) {
10252 putQReg128(dd, res);
10253 DIP("%cshll%s %s.%s, %s.%s, #%d\n",
10254 isU ? 'u' : 's', isQ ? "2" : "",
10255 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
10256 return True;
10257 }
10258 return False;
10259 }
10260
10261# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10262 return False;
10263# undef INSN
10264}
10265
sewardjfc83d2c2014-06-12 10:15:46 +000010266
sewardjdf1628c2014-06-10 22:52:05 +000010267static
10268Bool dis_AdvSIMD_three_different(/*MB_OUT*/DisResult* dres, UInt insn)
10269{
sewardj25523c42014-06-15 19:36:29 +000010270 /* 31 30 29 28 23 21 20 15 11 9 4
10271 0 Q U 01110 size 1 m opcode 00 n d
10272 Decode fields: u,opcode
10273 */
sewardjdf1628c2014-06-10 22:52:05 +000010274# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj25523c42014-06-15 19:36:29 +000010275 if (INSN(31,31) != 0
10276 || INSN(28,24) != BITS5(0,1,1,1,0)
10277 || INSN(21,21) != 1
10278 || INSN(11,10) != BITS2(0,0)) {
10279 return False;
10280 }
10281 UInt bitQ = INSN(30,30);
10282 UInt bitU = INSN(29,29);
10283 UInt size = INSN(23,22);
10284 UInt mm = INSN(20,16);
10285 UInt opcode = INSN(15,12);
10286 UInt nn = INSN(9,5);
10287 UInt dd = INSN(4,0);
10288 vassert(size < 4);
10289 Bool is2 = bitQ == 1;
10290
sewardj6f312d02014-06-28 12:21:37 +000010291 if (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,0,1,0)) {
10292 /* -------- 0,0000 SADDL{2} -------- */
10293 /* -------- 1,0000 UADDL{2} -------- */
10294 /* -------- 0,0010 SSUBL{2} -------- */
10295 /* -------- 1,0010 USUBL{2} -------- */
10296 /* Widens, and size refers to the narrowed lanes. */
sewardj6f312d02014-06-28 12:21:37 +000010297 if (size == X11) return False;
10298 vassert(size <= 2);
10299 Bool isU = bitU == 1;
10300 Bool isADD = opcode == BITS4(0,0,0,0);
sewardja5a6b752014-06-30 07:33:56 +000010301 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
10302 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +000010303 IRTemp res = newTempV128();
sewardj54ffa1d2014-07-22 09:27:49 +000010304 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
sewardj6f312d02014-06-28 12:21:37 +000010305 mkexpr(argL), mkexpr(argR)));
10306 putQReg128(dd, mkexpr(res));
10307 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10308 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10309 const HChar* nm = isADD ? (isU ? "uaddl" : "saddl")
10310 : (isU ? "usubl" : "ssubl");
10311 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10312 nameQReg128(dd), arrWide,
10313 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10314 return True;
10315 }
10316
sewardja5a6b752014-06-30 07:33:56 +000010317 if (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,0,1,1)) {
10318 /* -------- 0,0001 SADDW{2} -------- */
10319 /* -------- 1,0001 UADDW{2} -------- */
10320 /* -------- 0,0011 SSUBW{2} -------- */
10321 /* -------- 1,0011 USUBW{2} -------- */
10322 /* Widens, and size refers to the narrowed lanes. */
10323 if (size == X11) return False;
10324 vassert(size <= 2);
10325 Bool isU = bitU == 1;
10326 Bool isADD = opcode == BITS4(0,0,0,1);
10327 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +000010328 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000010329 assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
10330 getQReg128(nn), mkexpr(argR)));
10331 putQReg128(dd, mkexpr(res));
10332 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10333 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10334 const HChar* nm = isADD ? (isU ? "uaddw" : "saddw")
10335 : (isU ? "usubw" : "ssubw");
10336 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10337 nameQReg128(dd), arrWide,
10338 nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
10339 return True;
10340 }
10341
sewardj25523c42014-06-15 19:36:29 +000010342 if (opcode == BITS4(0,1,0,0) || opcode == BITS4(0,1,1,0)) {
10343 /* -------- 0,0100 ADDHN{2} -------- */
10344 /* -------- 1,0100 RADDHN{2} -------- */
10345 /* -------- 0,0110 SUBHN{2} -------- */
10346 /* -------- 1,0110 RSUBHN{2} -------- */
10347 /* Narrows, and size refers to the narrowed lanes. */
10348 if (size == X11) return False;
10349 vassert(size <= 2);
sewardj487559e2014-07-10 14:22:45 +000010350 const UInt shift[3] = { 8, 16, 32 };
sewardj25523c42014-06-15 19:36:29 +000010351 Bool isADD = opcode == BITS4(0,1,0,0);
10352 Bool isR = bitU == 1;
10353 /* Combined elements in wide lanes */
sewardj8e91fd42014-07-11 12:05:47 +000010354 IRTemp wide = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000010355 IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
sewardj25523c42014-06-15 19:36:29 +000010356 getQReg128(nn), getQReg128(mm));
10357 if (isR) {
sewardj487559e2014-07-10 14:22:45 +000010358 wideE = binop(mkVecADD(size+1),
10359 wideE,
10360 mkexpr(math_VEC_DUP_IMM(size+1,
10361 1ULL << (shift[size]-1))));
sewardj25523c42014-06-15 19:36:29 +000010362 }
10363 assign(wide, wideE);
10364 /* Top halves of elements, still in wide lanes */
sewardj8e91fd42014-07-11 12:05:47 +000010365 IRTemp shrd = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000010366 assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
sewardj25523c42014-06-15 19:36:29 +000010367 /* Elements now compacted into lower 64 bits */
sewardj8e91fd42014-07-11 12:05:47 +000010368 IRTemp new64 = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000010369 assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
sewardj25523c42014-06-15 19:36:29 +000010370 putLO64andZUorPutHI64(is2, dd, new64);
10371 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10372 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10373 const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
10374 : (isR ? "rsubhn" : "subhn");
10375 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10376 nameQReg128(dd), arrNarrow,
10377 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
10378 return True;
10379 }
10380
sewardj6f312d02014-06-28 12:21:37 +000010381 if (opcode == BITS4(0,1,0,1) || opcode == BITS4(0,1,1,1)) {
10382 /* -------- 0,0101 SABAL{2} -------- */
10383 /* -------- 1,0101 UABAL{2} -------- */
10384 /* -------- 0,0111 SABDL{2} -------- */
10385 /* -------- 1,0111 UABDL{2} -------- */
10386 /* Widens, and size refers to the narrowed lanes. */
sewardj6f312d02014-06-28 12:21:37 +000010387 if (size == X11) return False;
10388 vassert(size <= 2);
10389 Bool isU = bitU == 1;
10390 Bool isACC = opcode == BITS4(0,1,0,1);
sewardja5a6b752014-06-30 07:33:56 +000010391 IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
10392 IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj6f312d02014-06-28 12:21:37 +000010393 IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
sewardj8e91fd42014-07-11 12:05:47 +000010394 IRTemp res = newTempV128();
10395 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
sewardj6f312d02014-06-28 12:21:37 +000010396 : mkexpr(abd));
10397 putQReg128(dd, mkexpr(res));
10398 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10399 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10400 const HChar* nm = isACC ? (isU ? "uabal" : "sabal")
10401 : (isU ? "uabdl" : "sabdl");
10402 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10403 nameQReg128(dd), arrWide,
10404 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10405 return True;
10406 }
10407
10408 if (opcode == BITS4(1,1,0,0)
10409 || opcode == BITS4(1,0,0,0) || opcode == BITS4(1,0,1,0)) {
sewardj487559e2014-07-10 14:22:45 +000010410 /* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
sewardj6f312d02014-06-28 12:21:37 +000010411 /* -------- 1,1100 UMULL{2} -------- */ // 0
10412 /* -------- 0,1000 SMLAL{2} -------- */ // 1
10413 /* -------- 1,1000 UMLAL{2} -------- */ // 1
10414 /* -------- 0,1010 SMLSL{2} -------- */ // 2
10415 /* -------- 1,1010 UMLSL{2} -------- */ // 2
10416 /* Widens, and size refers to the narrowed lanes. */
sewardj487559e2014-07-10 14:22:45 +000010417 UInt ks = 3;
sewardj6f312d02014-06-28 12:21:37 +000010418 switch (opcode) {
sewardj487559e2014-07-10 14:22:45 +000010419 case BITS4(1,1,0,0): ks = 0; break;
10420 case BITS4(1,0,0,0): ks = 1; break;
10421 case BITS4(1,0,1,0): ks = 2; break;
sewardj6f312d02014-06-28 12:21:37 +000010422 default: vassert(0);
10423 }
sewardj487559e2014-07-10 14:22:45 +000010424 vassert(ks >= 0 && ks <= 2);
sewardj6f312d02014-06-28 12:21:37 +000010425 if (size == X11) return False;
10426 vassert(size <= 2);
sewardj51d012a2014-07-21 09:19:50 +000010427 Bool isU = bitU == 1;
10428 IRTemp vecN = newTempV128();
10429 IRTemp vecM = newTempV128();
10430 IRTemp vecD = newTempV128();
10431 assign(vecN, getQReg128(nn));
10432 assign(vecM, getQReg128(mm));
10433 assign(vecD, getQReg128(dd));
10434 IRTemp res = IRTemp_INVALID;
10435 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
10436 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
sewardj6f312d02014-06-28 12:21:37 +000010437 putQReg128(dd, mkexpr(res));
10438 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10439 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
sewardj487559e2014-07-10 14:22:45 +000010440 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
sewardj6f312d02014-06-28 12:21:37 +000010441 DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
10442 nameQReg128(dd), arrWide,
10443 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10444 return True;
10445 }
10446
sewardj54ffa1d2014-07-22 09:27:49 +000010447 if (bitU == 0
10448 && (opcode == BITS4(1,1,0,1)
10449 || opcode == BITS4(1,0,0,1) || opcode == BITS4(1,0,1,1))) {
10450 /* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
10451 /* -------- 0,1001 SQDMLAL{2} -------- */ // 1
10452 /* -------- 0,1011 SQDMLSL{2} -------- */ // 2
10453 /* Widens, and size refers to the narrowed lanes. */
10454 UInt ks = 3;
10455 switch (opcode) {
10456 case BITS4(1,1,0,1): ks = 0; break;
10457 case BITS4(1,0,0,1): ks = 1; break;
10458 case BITS4(1,0,1,1): ks = 2; break;
10459 default: vassert(0);
10460 }
10461 vassert(ks >= 0 && ks <= 2);
10462 if (size == X00 || size == X11) return False;
10463 vassert(size <= 2);
10464 IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
10465 vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
10466 newTempsV128_3(&vecN, &vecM, &vecD);
10467 assign(vecN, getQReg128(nn));
10468 assign(vecM, getQReg128(mm));
10469 assign(vecD, getQReg128(dd));
10470 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
10471 is2, size, "mas"[ks],
10472 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
10473 putQReg128(dd, mkexpr(res));
10474 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
10475 updateQCFLAGwithDifference(sat1q, sat1n);
10476 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
10477 updateQCFLAGwithDifference(sat2q, sat2n);
10478 }
10479 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10480 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10481 const HChar* nm = ks == 0 ? "sqdmull"
10482 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
10483 DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
10484 nameQReg128(dd), arrWide,
10485 nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
10486 return True;
10487 }
10488
sewardj31b5a952014-06-26 07:41:14 +000010489 if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
10490 /* -------- 0,1110 PMULL{2} -------- */
sewardj6f312d02014-06-28 12:21:37 +000010491 /* Widens, and size refers to the narrowed lanes. */
sewardj31b5a952014-06-26 07:41:14 +000010492 if (size != X00) return False;
10493 IRTemp res
10494 = math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
10495 getQReg128(nn), getQReg128(mm));
10496 putQReg128(dd, mkexpr(res));
10497 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
10498 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
10499 DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
10500 nameQReg128(dd), arrNarrow,
10501 nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
10502 return True;
10503 }
10504
sewardjdf1628c2014-06-10 22:52:05 +000010505 return False;
10506# undef INSN
10507}
10508
10509
10510static
10511Bool dis_AdvSIMD_three_same(/*MB_OUT*/DisResult* dres, UInt insn)
10512{
10513 /* 31 30 29 28 23 21 20 15 10 9 4
10514 0 Q U 01110 size 1 m opcode 1 n d
10515 Decode fields: u,size,opcode
10516 */
10517# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
10518 if (INSN(31,31) != 0
10519 || INSN(28,24) != BITS5(0,1,1,1,0)
10520 || INSN(21,21) != 1
10521 || INSN(10,10) != 1) {
10522 return False;
10523 }
10524 UInt bitQ = INSN(30,30);
10525 UInt bitU = INSN(29,29);
10526 UInt size = INSN(23,22);
10527 UInt mm = INSN(20,16);
10528 UInt opcode = INSN(15,11);
10529 UInt nn = INSN(9,5);
10530 UInt dd = INSN(4,0);
10531 vassert(size < 4);
10532
sewardja5a6b752014-06-30 07:33:56 +000010533 if (opcode == BITS5(0,0,0,0,0) || opcode == BITS5(0,0,1,0,0)) {
10534 /* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
10535 /* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
10536 /* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
10537 /* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
10538 if (size == X11) return False;
10539 Bool isADD = opcode == BITS5(0,0,0,0,0);
10540 Bool isU = bitU == 1;
10541 /* Widen both args out, do the math, narrow to final result. */
sewardj8e91fd42014-07-11 12:05:47 +000010542 IRTemp argL = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000010543 IRTemp argLhi = IRTemp_INVALID;
10544 IRTemp argLlo = IRTemp_INVALID;
sewardj8e91fd42014-07-11 12:05:47 +000010545 IRTemp argR = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000010546 IRTemp argRhi = IRTemp_INVALID;
10547 IRTemp argRlo = IRTemp_INVALID;
sewardj8e91fd42014-07-11 12:05:47 +000010548 IRTemp resHi = newTempV128();
10549 IRTemp resLo = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000010550 IRTemp res = IRTemp_INVALID;
10551 assign(argL, getQReg128(nn));
10552 argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
10553 argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL));
10554 assign(argR, getQReg128(mm));
10555 argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
10556 argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR));
10557 IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
10558 IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
10559 assign(resHi, binop(opSxR,
10560 binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
10561 mkU8(1)));
10562 assign(resLo, binop(opSxR,
10563 binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
10564 mkU8(1)));
10565 res = math_NARROW_LANES ( resHi, resLo, size );
10566 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10567 const HChar* nm = isADD ? (isU ? "uhadd" : "shadd")
10568 : (isU ? "uhsub" : "shsub");
10569 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10570 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
10571 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10572 return True;
10573 }
10574
sewardj62ece662014-08-17 19:59:09 +000010575 if (opcode == BITS5(0,0,0,1,0)) {
10576 /* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
10577 /* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
10578 if (bitQ == 0 && size == X11) return False; // implied 1d case
10579 Bool isU = bitU == 1;
10580 IRTemp argL = newTempV128();
10581 IRTemp argR = newTempV128();
10582 assign(argL, getQReg128(nn));
10583 assign(argR, getQReg128(mm));
10584 IRTemp res = math_RHADD(size, isU, argL, argR);
10585 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10586 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10587 DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
10588 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10589 return True;
10590 }
10591
sewardja5a6b752014-06-30 07:33:56 +000010592 if (opcode == BITS5(0,0,0,0,1) || opcode == BITS5(0,0,1,0,1)) {
10593 /* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
10594 /* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
10595 /* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
10596 /* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
10597 if (bitQ == 0 && size == X11) return False; // implied 1d case
10598 Bool isADD = opcode == BITS5(0,0,0,0,1);
10599 Bool isU = bitU == 1;
10600 IROp qop = Iop_INVALID;
10601 IROp nop = Iop_INVALID;
10602 if (isADD) {
10603 qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
10604 nop = mkVecADD(size);
10605 } else {
10606 qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
10607 nop = mkVecSUB(size);
10608 }
sewardj8e91fd42014-07-11 12:05:47 +000010609 IRTemp argL = newTempV128();
10610 IRTemp argR = newTempV128();
10611 IRTemp qres = newTempV128();
10612 IRTemp nres = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000010613 assign(argL, getQReg128(nn));
10614 assign(argR, getQReg128(mm));
10615 assign(qres, math_MAYBE_ZERO_HI64_fromE(
10616 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
10617 assign(nres, math_MAYBE_ZERO_HI64_fromE(
10618 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
10619 putQReg128(dd, mkexpr(qres));
sewardj8e91fd42014-07-11 12:05:47 +000010620 updateQCFLAGwithDifference(qres, nres);
sewardja5a6b752014-06-30 07:33:56 +000010621 const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
10622 : (isU ? "uqsub" : "sqsub");
10623 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10624 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
10625 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10626 return True;
10627 }
10628
sewardjdf1628c2014-06-10 22:52:05 +000010629 if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
10630 /* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
10631 /* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
10632 /* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
10633 /* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
sewardjdf9d6d52014-06-27 10:43:22 +000010634 Bool isORx = (size & 2) == 2;
sewardjdf1628c2014-06-10 22:52:05 +000010635 Bool invert = (size & 1) == 1;
sewardj8e91fd42014-07-11 12:05:47 +000010636 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +000010637 assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
sewardjdf1628c2014-06-10 22:52:05 +000010638 getQReg128(nn),
10639 invert ? unop(Iop_NotV128, getQReg128(mm))
10640 : getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000010641 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000010642 const HChar* names[4] = { "and", "bic", "orr", "orn" };
sewardjdf9d6d52014-06-27 10:43:22 +000010643 const HChar* ar = bitQ == 1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +000010644 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
10645 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
10646 return True;
10647 }
10648
10649 if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
10650 /* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
10651 /* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
10652 /* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
10653 /* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +000010654 IRTemp argD = newTempV128();
10655 IRTemp argN = newTempV128();
10656 IRTemp argM = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000010657 assign(argD, getQReg128(dd));
10658 assign(argN, getQReg128(nn));
10659 assign(argM, getQReg128(mm));
10660 const IROp opXOR = Iop_XorV128;
10661 const IROp opAND = Iop_AndV128;
10662 const IROp opNOT = Iop_NotV128;
sewardj8e91fd42014-07-11 12:05:47 +000010663 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000010664 switch (size) {
10665 case BITS2(0,0): /* EOR */
sewardjdf9d6d52014-06-27 10:43:22 +000010666 assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
sewardjdf1628c2014-06-10 22:52:05 +000010667 break;
10668 case BITS2(0,1): /* BSL */
sewardjdf9d6d52014-06-27 10:43:22 +000010669 assign(res, binop(opXOR, mkexpr(argM),
10670 binop(opAND,
10671 binop(opXOR, mkexpr(argM), mkexpr(argN)),
10672 mkexpr(argD))));
sewardjdf1628c2014-06-10 22:52:05 +000010673 break;
10674 case BITS2(1,0): /* BIT */
sewardjdf9d6d52014-06-27 10:43:22 +000010675 assign(res, binop(opXOR, mkexpr(argD),
10676 binop(opAND,
10677 binop(opXOR, mkexpr(argD), mkexpr(argN)),
10678 mkexpr(argM))));
sewardjdf1628c2014-06-10 22:52:05 +000010679 break;
10680 case BITS2(1,1): /* BIF */
sewardjdf9d6d52014-06-27 10:43:22 +000010681 assign(res, binop(opXOR, mkexpr(argD),
10682 binop(opAND,
10683 binop(opXOR, mkexpr(argD), mkexpr(argN)),
10684 unop(opNOT, mkexpr(argM)))));
sewardjdf1628c2014-06-10 22:52:05 +000010685 break;
10686 default:
10687 vassert(0);
10688 }
sewardjdf9d6d52014-06-27 10:43:22 +000010689 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000010690 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
sewardjdf9d6d52014-06-27 10:43:22 +000010691 const HChar* arr = bitQ == 1 ? "16b" : "8b";
sewardjdf1628c2014-06-10 22:52:05 +000010692 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
10693 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10694 return True;
10695 }
10696
10697 if (opcode == BITS5(0,0,1,1,0)) {
10698 /* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
10699 /* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
10700 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000010701 Bool isGT = bitU == 0;
sewardjdf1628c2014-06-10 22:52:05 +000010702 IRExpr* argL = getQReg128(nn);
10703 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +000010704 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000010705 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +000010706 isGT ? binop(mkVecCMPGTS(size), argL, argR)
10707 : binop(mkVecCMPGTU(size), argL, argR));
sewardjdf9d6d52014-06-27 10:43:22 +000010708 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000010709 const HChar* nm = isGT ? "cmgt" : "cmhi";
10710 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10711 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
10712 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10713 return True;
10714 }
10715
10716 if (opcode == BITS5(0,0,1,1,1)) {
10717 /* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
10718 /* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
10719 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000010720 Bool isGE = bitU == 0;
sewardjdf1628c2014-06-10 22:52:05 +000010721 IRExpr* argL = getQReg128(nn);
10722 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +000010723 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000010724 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +000010725 isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
10726 : unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
sewardjdf9d6d52014-06-27 10:43:22 +000010727 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000010728 const HChar* nm = isGE ? "cmge" : "cmhs";
10729 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10730 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
10731 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10732 return True;
10733 }
10734
sewardja6b61f02014-08-17 18:32:14 +000010735 if (opcode == BITS5(0,1,0,0,0) || opcode == BITS5(0,1,0,1,0)) {
10736 /* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
10737 /* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
10738 /* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
10739 /* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
10740 if (bitQ == 0 && size == X11) return False; // implied 1d case
10741 Bool isU = bitU == 1;
10742 Bool isR = opcode == BITS5(0,1,0,1,0);
10743 IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
10744 : (isU ? mkVecSHU(size) : mkVecSHS(size));
10745 IRTemp res = newTempV128();
10746 assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
10747 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10748 const HChar* nm = isR ? (isU ? "urshl" : "srshl")
10749 : (isU ? "ushl" : "sshl");
10750 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10751 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
10752 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10753 return True;
10754 }
10755
sewardj12972182014-08-04 08:09:47 +000010756 if (opcode == BITS5(0,1,0,0,1) || opcode == BITS5(0,1,0,1,1)) {
10757 /* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
10758 /* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
10759 /* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
10760 /* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
10761 if (bitQ == 0 && size == X11) return False; // implied 1d case
10762 Bool isU = bitU == 1;
10763 Bool isR = opcode == BITS5(0,1,0,1,1);
10764 IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
10765 : (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
10766 /* This is a bit tricky. If we're only interested in the lowest 64 bits
10767 of the result (viz, bitQ == 0), then we must adjust the operands to
10768 ensure that the upper part of the result, that we don't care about,
10769 doesn't pollute the returned Q value. To do this, zero out the upper
10770 operand halves beforehand. This works because it means, for the
10771 lanes we don't care about, we are shifting zero by zero, which can
10772 never saturate. */
10773 IRTemp res256 = newTemp(Ity_V256);
10774 IRTemp resSH = newTempV128();
10775 IRTemp resQ = newTempV128();
10776 IRTemp zero = newTempV128();
10777 assign(res256, binop(op,
10778 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
10779 math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
10780 assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
10781 assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
10782 assign(zero, mkV128(0x0000));
10783 putQReg128(dd, mkexpr(resSH));
10784 updateQCFLAGwithDifference(resQ, zero);
10785 const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
10786 : (isU ? "uqshl" : "sqshl");
10787 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10788 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
10789 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10790 return True;
10791 }
10792
sewardjdf1628c2014-06-10 22:52:05 +000010793 if (opcode == BITS5(0,1,1,0,0) || opcode == BITS5(0,1,1,0,1)) {
10794 /* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
10795 /* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
10796 /* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
10797 /* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
10798 if (bitQ == 0 && size == X11) return False; // implied 1d case
10799 Bool isU = bitU == 1;
10800 Bool isMAX = (opcode & 1) == 0;
sewardj8e91fd42014-07-11 12:05:47 +000010801 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
10802 : (isU ? mkVecMINU(size) : mkVecMINS(size));
10803 IRTemp t = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000010804 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000010805 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
sewardjdf1628c2014-06-10 22:52:05 +000010806 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
10807 : (isU ? "umin" : "smin");
10808 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10809 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
10810 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10811 return True;
10812 }
10813
sewardjdf9d6d52014-06-27 10:43:22 +000010814 if (opcode == BITS5(0,1,1,1,0) || opcode == BITS5(0,1,1,1,1)) {
10815 /* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
10816 /* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
10817 /* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
10818 /* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
10819 if (size == X11) return False; // 1d/2d cases not allowed
10820 Bool isU = bitU == 1;
10821 Bool isACC = opcode == BITS5(0,1,1,1,1);
sewardjdf9d6d52014-06-27 10:43:22 +000010822 vassert(size <= 2);
10823 IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +000010824 IRTemp t2 = newTempV128();
10825 assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
sewardjdf9d6d52014-06-27 10:43:22 +000010826 : mkexpr(t1));
10827 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
10828 const HChar* nm = isACC ? (isU ? "uaba" : "saba")
10829 : (isU ? "uabd" : "sabd");
10830 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10831 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
10832 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10833 return True;
10834 }
10835
sewardjdf1628c2014-06-10 22:52:05 +000010836 if (opcode == BITS5(1,0,0,0,0)) {
10837 /* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
10838 /* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
10839 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000010840 Bool isSUB = bitU == 1;
10841 IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size);
10842 IRTemp t = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000010843 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000010844 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
sewardjdf1628c2014-06-10 22:52:05 +000010845 const HChar* nm = isSUB ? "sub" : "add";
10846 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10847 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
10848 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10849 return True;
10850 }
10851
10852 if (opcode == BITS5(1,0,0,0,1)) {
10853 /* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
10854 /* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
10855 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000010856 Bool isEQ = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +000010857 IRExpr* argL = getQReg128(nn);
10858 IRExpr* argR = getQReg128(mm);
sewardj8e91fd42014-07-11 12:05:47 +000010859 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000010860 assign(res,
sewardj8e91fd42014-07-11 12:05:47 +000010861 isEQ ? binop(mkVecCMPEQ(size), argL, argR)
10862 : unop(Iop_NotV128, binop(mkVecCMPEQ(size),
sewardjdf1628c2014-06-10 22:52:05 +000010863 binop(Iop_AndV128, argL, argR),
10864 mkV128(0x0000))));
sewardjdf9d6d52014-06-27 10:43:22 +000010865 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000010866 const HChar* nm = isEQ ? "cmeq" : "cmtst";
10867 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10868 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
10869 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10870 return True;
10871 }
10872
10873 if (opcode == BITS5(1,0,0,1,0)) {
10874 /* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
10875 /* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
10876 if (bitQ == 0 && size == X11) return False; // implied 1d case
10877 Bool isMLS = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +000010878 IROp opMUL = mkVecMUL(size);
10879 IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
10880 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000010881 if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
10882 assign(res, binop(opADDSUB,
10883 getQReg128(dd),
10884 binop(opMUL, getQReg128(nn), getQReg128(mm))));
sewardjdf9d6d52014-06-27 10:43:22 +000010885 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000010886 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10887 DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
10888 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10889 return True;
10890 }
10891 return False;
10892 }
10893
10894 if (opcode == BITS5(1,0,0,1,1)) {
10895 /* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
10896 /* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
10897 if (bitQ == 0 && size == X11) return False; // implied 1d case
10898 Bool isPMUL = bitU == 1;
sewardjdf1628c2014-06-10 22:52:05 +000010899 const IROp opsPMUL[4]
10900 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
sewardj8e91fd42014-07-11 12:05:47 +000010901 IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
10902 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000010903 if (opMUL != Iop_INVALID) {
10904 assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000010905 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000010906 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10907 DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
10908 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10909 return True;
10910 }
10911 return False;
10912 }
10913
sewardja5a6b752014-06-30 07:33:56 +000010914 if (opcode == BITS5(1,0,1,0,0) || opcode == BITS5(1,0,1,0,1)) {
10915 /* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
10916 /* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
10917 /* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
10918 /* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
10919 if (size == X11) return False;
10920 Bool isU = bitU == 1;
10921 Bool isMAX = opcode == BITS5(1,0,1,0,0);
sewardj8e91fd42014-07-11 12:05:47 +000010922 IRTemp vN = newTempV128();
10923 IRTemp vM = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000010924 IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
10925 : (isU ? mkVecMINU(size) : mkVecMINS(size));
10926 assign(vN, getQReg128(nn));
10927 assign(vM, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +000010928 IRTemp res128 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000010929 assign(res128,
10930 binop(op,
10931 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
10932 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
10933 /* In the half-width case, use CatEL32x4 to extract the half-width
10934 result from the full-width result. */
10935 IRExpr* res
10936 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
10937 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
10938 mkexpr(res128)))
10939 : mkexpr(res128);
10940 putQReg128(dd, res);
10941 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10942 const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp")
10943 : (isU ? "uminp" : "sminp");
10944 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
10945 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10946 return True;
10947 }
10948
sewardj54ffa1d2014-07-22 09:27:49 +000010949 if (opcode == BITS5(1,0,1,1,0)) {
10950 /* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
10951 /* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
10952 if (size == X00 || size == X11) return False;
10953 Bool isR = bitU == 1;
10954 IRTemp res, sat1q, sat1n, vN, vM;
10955 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
10956 newTempsV128_2(&vN, &vM);
10957 assign(vN, getQReg128(nn));
10958 assign(vM, getQReg128(mm));
10959 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
10960 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
10961 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
10962 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
10963 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10964 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
10965 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
10966 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10967 return True;
10968 }
10969
sewardja5a6b752014-06-30 07:33:56 +000010970 if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
10971 /* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
10972 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000010973 IRTemp vN = newTempV128();
10974 IRTemp vM = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000010975 assign(vN, getQReg128(nn));
10976 assign(vM, getQReg128(mm));
sewardj8e91fd42014-07-11 12:05:47 +000010977 IRTemp res128 = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000010978 assign(res128,
10979 binop(mkVecADD(size),
10980 binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
10981 binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
10982 /* In the half-width case, use CatEL32x4 to extract the half-width
10983 result from the full-width result. */
10984 IRExpr* res
10985 = bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
10986 binop(Iop_CatEvenLanes32x4, mkexpr(res128),
10987 mkexpr(res128)))
10988 : mkexpr(res128);
10989 putQReg128(dd, res);
10990 const HChar* arr = nameArr_Q_SZ(bitQ, size);
10991 DIP("addp %s.%s, %s.%s, %s.%s\n",
10992 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
10993 return True;
10994 }
10995
sewardjdf1628c2014-06-10 22:52:05 +000010996 if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
10997 /* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
10998 /* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
10999 Bool isD = (size & 1) == 1;
11000 Bool isSUB = (size & 2) == 2;
11001 if (bitQ == 0 && isD) return False; // implied 1d case
11002 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
11003 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11004 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
11005 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +000011006 IRTemp t1 = newTempV128();
11007 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011008 // FIXME: double rounding; use FMA primops instead
11009 assign(t1, triop(opMUL,
11010 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
11011 assign(t2, triop(isSUB ? opSUB : opADD,
11012 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
sewardjdf9d6d52014-06-27 10:43:22 +000011013 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
sewardjdf1628c2014-06-10 22:52:05 +000011014 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11015 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
11016 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11017 return True;
11018 }
11019
11020 if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
11021 /* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11022 /* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11023 Bool isD = (size & 1) == 1;
11024 Bool isSUB = (size & 2) == 2;
11025 if (bitQ == 0 && isD) return False; // implied 1d case
11026 const IROp ops[4]
11027 = { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
11028 IROp op = ops[size];
11029 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +000011030 IRTemp t1 = newTempV128();
11031 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011032 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011033 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011034 putQReg128(dd, mkexpr(t2));
11035 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11036 DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
11037 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11038 return True;
11039 }
11040
11041 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
11042 /* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11043 Bool isD = (size & 1) == 1;
11044 if (bitQ == 0 && isD) return False; // implied 1d case
11045 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11046 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
11047 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +000011048 IRTemp t1 = newTempV128();
11049 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011050 // FIXME: use Abd primop instead?
sewardjdf9d6d52014-06-27 10:43:22 +000011051 assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf1628c2014-06-10 22:52:05 +000011052 assign(t2, unop(opABS, mkexpr(t1)));
sewardjdf9d6d52014-06-27 10:43:22 +000011053 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
sewardjdf1628c2014-06-10 22:52:05 +000011054 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11055 DIP("fabd %s.%s, %s.%s, %s.%s\n",
11056 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11057 return True;
11058 }
11059
11060 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
11061 /* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11062 Bool isD = (size & 1) == 1;
11063 if (bitQ == 0 && isD) return False; // implied 1d case
11064 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +000011065 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011066 assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
11067 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011068 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011069 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11070 DIP("fmul %s.%s, %s.%s, %s.%s\n",
11071 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11072 return True;
11073 }
11074
11075 if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
11076 /* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11077 /* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11078 Bool isD = (size & 1) == 1;
11079 if (bitQ == 0 && isD) return False; // implied 1d case
11080 Bool isGE = bitU == 1;
11081 IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
11082 : (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
sewardj8e91fd42014-07-11 12:05:47 +000011083 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011084 assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
11085 : binop(opCMP, getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011086 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011087 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11088 DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
11089 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11090 return True;
11091 }
11092
11093 if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
11094 /* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11095 Bool isD = (size & 1) == 1;
11096 if (bitQ == 0 && isD) return False; // implied 1d case
11097 IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
sewardj8e91fd42014-07-11 12:05:47 +000011098 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011099 assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
sewardjdf9d6d52014-06-27 10:43:22 +000011100 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011101 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11102 DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
11103 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11104 return True;
11105 }
11106
11107 if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
11108 /* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11109 /* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11110 Bool isD = (size & 1) == 1;
11111 Bool isGT = (size & 2) == 2;
11112 if (bitQ == 0 && isD) return False; // implied 1d case
11113 IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
11114 : (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
11115 IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
sewardj8e91fd42014-07-11 12:05:47 +000011116 IRTemp t1 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011117 assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
11118 unop(opABS, getQReg128(nn)))); // swapd
sewardjdf9d6d52014-06-27 10:43:22 +000011119 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011120 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11121 DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
11122 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11123 return True;
11124 }
11125
sewardj76927e62014-11-17 11:21:21 +000011126 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
11127 /* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11128 Bool isD = size == X01;
11129 if (bitQ == 0 && isD) return False; // implied 1d case
11130 IRTemp srcN = newTempV128();
11131 IRTemp srcM = newTempV128();
11132 IRTemp preL = IRTemp_INVALID;
11133 IRTemp preR = IRTemp_INVALID;
11134 assign(srcN, getQReg128(nn));
11135 assign(srcM, getQReg128(mm));
11136 math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
11137 srcM, srcN, isD, bitQ);
11138 putQReg128(
11139 dd, math_MAYBE_ZERO_HI64_fromE(
11140 bitQ,
11141 triop(mkVecADDF(isD ? 3 : 2),
11142 mkexpr(mk_get_IR_rounding_mode()),
11143 mkexpr(preL), mkexpr(preR))));
11144 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11145 DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
11146 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11147 return True;
11148 }
11149
sewardjdf1628c2014-06-10 22:52:05 +000011150 if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
11151 /* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
11152 Bool isD = (size & 1) == 1;
11153 if (bitQ == 0 && isD) return False; // implied 1d case
11154 vassert(size <= 1);
11155 const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
11156 IROp op = ops[size];
11157 IRTemp rm = mk_get_IR_rounding_mode();
sewardj8e91fd42014-07-11 12:05:47 +000011158 IRTemp t1 = newTempV128();
11159 IRTemp t2 = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011160 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjdf9d6d52014-06-27 10:43:22 +000011161 assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardjdf1628c2014-06-10 22:52:05 +000011162 putQReg128(dd, mkexpr(t2));
11163 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11164 DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
11165 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
11166 return True;
11167 }
11168
11169 return False;
11170# undef INSN
11171}
11172
11173
11174static
11175Bool dis_AdvSIMD_two_reg_misc(/*MB_OUT*/DisResult* dres, UInt insn)
11176{
11177 /* 31 30 29 28 23 21 16 11 9 4
11178 0 Q U 01110 size 10000 opcode 10 n d
11179 Decode fields: U,size,opcode
11180 */
11181# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11182 if (INSN(31,31) != 0
11183 || INSN(28,24) != BITS5(0,1,1,1,0)
11184 || INSN(21,17) != BITS5(1,0,0,0,0)
11185 || INSN(11,10) != BITS2(1,0)) {
11186 return False;
11187 }
11188 UInt bitQ = INSN(30,30);
11189 UInt bitU = INSN(29,29);
11190 UInt size = INSN(23,22);
11191 UInt opcode = INSN(16,12);
11192 UInt nn = INSN(9,5);
11193 UInt dd = INSN(4,0);
11194 vassert(size < 4);
11195
sewardjdf9d6d52014-06-27 10:43:22 +000011196 if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
11197 /* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
11198 /* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
11199 /* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
11200 const IROp iops[3] = { Iop_Reverse8sIn64_x2,
11201 Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
11202 vassert(size <= 2);
sewardj8e91fd42014-07-11 12:05:47 +000011203 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +000011204 assign(res, unop(iops[size], getQReg128(nn)));
11205 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11206 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11207 DIP("%s %s.%s, %s.%s\n", "rev64",
11208 nameQReg128(dd), arr, nameQReg128(nn), arr);
11209 return True;
11210 }
11211
11212 if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
11213 /* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
11214 /* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
11215 Bool isH = size == X01;
sewardj8e91fd42014-07-11 12:05:47 +000011216 IRTemp res = newTempV128();
sewardjdf9d6d52014-06-27 10:43:22 +000011217 IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
11218 assign(res, unop(iop, getQReg128(nn)));
11219 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11220 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11221 DIP("%s %s.%s, %s.%s\n", "rev32",
11222 nameQReg128(dd), arr, nameQReg128(nn), arr);
11223 return True;
11224 }
11225
sewardj715d1622014-06-26 12:39:05 +000011226 if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
11227 /* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +000011228 IRTemp res = newTempV128();
sewardj715d1622014-06-26 12:39:05 +000011229 assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
11230 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf9d6d52014-06-27 10:43:22 +000011231 const HChar* arr = nameArr_Q_SZ(bitQ, size);
sewardj715d1622014-06-26 12:39:05 +000011232 DIP("%s %s.%s, %s.%s\n", "rev16",
11233 nameQReg128(dd), arr, nameQReg128(nn), arr);
11234 return True;
11235 }
11236
sewardja5a6b752014-06-30 07:33:56 +000011237 if (opcode == BITS5(0,0,0,1,0) || opcode == BITS5(0,0,1,1,0)) {
11238 /* -------- 0,xx,00010: SADDLP std6_std6 -------- */
11239 /* -------- 1,xx,00010: UADDLP std6_std6 -------- */
11240 /* -------- 0,xx,00110: SADALP std6_std6 -------- */
11241 /* -------- 1,xx,00110: UADALP std6_std6 -------- */
11242 /* Widens, and size refers to the narrow size. */
11243 if (size == X11) return False; // no 1d or 2d cases
11244 Bool isU = bitU == 1;
11245 Bool isACC = opcode == BITS5(0,0,1,1,0);
sewardj8e91fd42014-07-11 12:05:47 +000011246 IRTemp src = newTempV128();
11247 IRTemp sum = newTempV128();
11248 IRTemp res = newTempV128();
sewardja5a6b752014-06-30 07:33:56 +000011249 assign(src, getQReg128(nn));
11250 assign(sum,
11251 binop(mkVecADD(size+1),
11252 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
11253 isU, True/*fromOdd*/, size, mkexpr(src))),
11254 mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
11255 isU, False/*!fromOdd*/, size, mkexpr(src)))));
11256 assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
11257 : mkexpr(sum));
11258 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11259 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11260 const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1);
11261 DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
11262 : (isU ? "uaddlp" : "saddlp"),
11263 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
11264 return True;
11265 }
11266
sewardjf7003bc2014-08-18 12:28:02 +000011267 if (opcode == BITS5(0,0,0,1,1)) {
11268 /* -------- 0,xx,00011: SUQADD std7_std7 -------- */
11269 /* -------- 1,xx,00011: USQADD std7_std7 -------- */
11270 if (bitQ == 0 && size == X11) return False; // implied 1d case
11271 Bool isUSQADD = bitU == 1;
11272 /* This is switched (in the US vs SU sense) deliberately.
11273 SUQADD corresponds to the ExtUSsatSS variants and
11274 USQADD corresponds to the ExtSUsatUU variants.
11275 See libvex_ir for more details. */
11276 IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
11277 : mkVecQADDEXTUSSATSS(size);
11278 IROp nop = mkVecADD(size);
11279 IRTemp argL = newTempV128();
11280 IRTemp argR = newTempV128();
11281 IRTemp qres = newTempV128();
11282 IRTemp nres = newTempV128();
11283 /* Because the two arguments to the addition are implicitly
11284 extended differently (one signedly, the other unsignedly) it is
11285 important to present them to the primop in the correct order. */
11286 assign(argL, getQReg128(nn));
11287 assign(argR, getQReg128(dd));
11288 assign(qres, math_MAYBE_ZERO_HI64_fromE(
11289 bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
11290 assign(nres, math_MAYBE_ZERO_HI64_fromE(
11291 bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
11292 putQReg128(dd, mkexpr(qres));
11293 updateQCFLAGwithDifference(qres, nres);
11294 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11295 DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
11296 nameQReg128(dd), arr, nameQReg128(nn), arr);
11297 return True;
11298 }
11299
sewardj2b6fd5e2014-06-19 14:21:37 +000011300 if (opcode == BITS5(0,0,1,0,0)) {
11301 /* -------- 0,xx,00100: CLS std6_std6 -------- */
11302 /* -------- 1,xx,00100: CLZ std6_std6 -------- */
11303 if (size == X11) return False; // no 1d or 2d cases
sewardja8c7b0f2014-06-26 08:18:08 +000011304 const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
11305 const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
sewardj2b6fd5e2014-06-19 14:21:37 +000011306 Bool isCLZ = bitU == 1;
sewardj8e91fd42014-07-11 12:05:47 +000011307 IRTemp res = newTempV128();
sewardj2b6fd5e2014-06-19 14:21:37 +000011308 vassert(size <= 2);
11309 assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000011310 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj2b6fd5e2014-06-19 14:21:37 +000011311 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11312 DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
11313 nameQReg128(dd), arr, nameQReg128(nn), arr);
11314 return True;
11315 }
11316
sewardj787a67f2014-06-23 09:09:41 +000011317 if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
sewardj2b6fd5e2014-06-19 14:21:37 +000011318 /* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
sewardj787a67f2014-06-23 09:09:41 +000011319 /* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +000011320 IRTemp res = newTempV128();
sewardj787a67f2014-06-23 09:09:41 +000011321 assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000011322 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj715d1622014-06-26 12:39:05 +000011323 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
sewardj787a67f2014-06-23 09:09:41 +000011324 DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
sewardj2b6fd5e2014-06-19 14:21:37 +000011325 nameQReg128(dd), arr, nameQReg128(nn), arr);
11326 return True;
11327 }
11328
sewardj715d1622014-06-26 12:39:05 +000011329 if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
11330 /* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
sewardj8e91fd42014-07-11 12:05:47 +000011331 IRTemp res = newTempV128();
sewardj715d1622014-06-26 12:39:05 +000011332 assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000011333 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj715d1622014-06-26 12:39:05 +000011334 const HChar* arr = nameArr_Q_SZ(bitQ, 0);
11335 DIP("%s %s.%s, %s.%s\n", "rbit",
11336 nameQReg128(dd), arr, nameQReg128(nn), arr);
11337 return True;
11338 }
11339
sewardj51d012a2014-07-21 09:19:50 +000011340 if (opcode == BITS5(0,0,1,1,1)) {
sewardj8e91fd42014-07-11 12:05:47 +000011341 /* -------- 0,xx,00111 SQABS std7_std7 -------- */
sewardj51d012a2014-07-21 09:19:50 +000011342 /* -------- 1,xx,00111 SQNEG std7_std7 -------- */
sewardj8e91fd42014-07-11 12:05:47 +000011343 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj51d012a2014-07-21 09:19:50 +000011344 Bool isNEG = bitU == 1;
11345 IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
11346 (isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
11347 getQReg128(nn), size );
sewardj8e91fd42014-07-11 12:05:47 +000011348 IRTemp qres = newTempV128(), nres = newTempV128();
sewardj51d012a2014-07-21 09:19:50 +000011349 assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
11350 assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
sewardj8e91fd42014-07-11 12:05:47 +000011351 putQReg128(dd, mkexpr(qres));
11352 updateQCFLAGwithDifference(qres, nres);
11353 const HChar* arr = nameArr_Q_SZ(bitQ, size);
sewardj51d012a2014-07-21 09:19:50 +000011354 DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
sewardj8e91fd42014-07-11 12:05:47 +000011355 nameQReg128(dd), arr, nameQReg128(nn), arr);
11356 return True;
11357 }
11358
sewardjdf1628c2014-06-10 22:52:05 +000011359 if (opcode == BITS5(0,1,0,0,0)) {
11360 /* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
11361 /* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
11362 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000011363 Bool isGT = bitU == 0;
11364 IRExpr* argL = getQReg128(nn);
11365 IRExpr* argR = mkV128(0x0000);
11366 IRTemp res = newTempV128();
11367 IROp opGTS = mkVecCMPGTS(size);
11368 assign(res, isGT ? binop(opGTS, argL, argR)
11369 : unop(Iop_NotV128, binop(opGTS, argR, argL)));
sewardjdf9d6d52014-06-27 10:43:22 +000011370 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011371 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11372 DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
11373 nameQReg128(dd), arr, nameQReg128(nn), arr);
11374 return True;
11375 }
11376
11377 if (opcode == BITS5(0,1,0,0,1)) {
11378 /* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
11379 /* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
11380 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardjdf1628c2014-06-10 22:52:05 +000011381 Bool isEQ = bitU == 0;
11382 IRExpr* argL = getQReg128(nn);
11383 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +000011384 IRTemp res = newTempV128();
11385 assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
sewardjdf1628c2014-06-10 22:52:05 +000011386 : unop(Iop_NotV128,
sewardj8e91fd42014-07-11 12:05:47 +000011387 binop(mkVecCMPGTS(size), argL, argR)));
sewardjdf9d6d52014-06-27 10:43:22 +000011388 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011389 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11390 DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
11391 nameQReg128(dd), arr, nameQReg128(nn), arr);
11392 return True;
11393 }
11394
11395 if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
11396 /* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
11397 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardjdf1628c2014-06-10 22:52:05 +000011398 IRExpr* argL = getQReg128(nn);
11399 IRExpr* argR = mkV128(0x0000);
sewardj8e91fd42014-07-11 12:05:47 +000011400 IRTemp res = newTempV128();
11401 assign(res, binop(mkVecCMPGTS(size), argR, argL));
sewardjdf9d6d52014-06-27 10:43:22 +000011402 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011403 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11404 DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
11405 nameQReg128(dd), arr, nameQReg128(nn), arr);
11406 return True;
11407 }
11408
sewardj25523c42014-06-15 19:36:29 +000011409 if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
11410 /* -------- 0,xx,01011: ABS std7_std7 -------- */
11411 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000011412 IRTemp res = newTempV128();
11413 assign(res, unop(mkVecABS(size), getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000011414 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj25523c42014-06-15 19:36:29 +000011415 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11416 DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
11417 return True;
11418 }
11419
sewardjdf1628c2014-06-10 22:52:05 +000011420 if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
11421 /* -------- 1,xx,01011: NEG std7_std7 -------- */
11422 if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj8e91fd42014-07-11 12:05:47 +000011423 IRTemp res = newTempV128();
11424 assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000011425 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011426 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11427 DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
11428 return True;
11429 }
11430
11431 if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
11432 /* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
11433 /* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
11434 if (bitQ == 0 && size == X11) return False; // implied 1d case
11435 Bool isFNEG = bitU == 1;
11436 IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
11437 : (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
sewardj8e91fd42014-07-11 12:05:47 +000011438 IRTemp res = newTempV128();
sewardjdf1628c2014-06-10 22:52:05 +000011439 assign(res, unop(op, getQReg128(nn)));
sewardjdf9d6d52014-06-27 10:43:22 +000011440 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardjdf1628c2014-06-10 22:52:05 +000011441 const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
11442 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
11443 nameQReg128(dd), arr, nameQReg128(nn), arr);
11444 return True;
11445 }
11446
11447 if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
11448 /* -------- 0,xx,10010: XTN{,2} -------- */
sewardjecedd982014-08-11 14:02:47 +000011449 if (size == X11) return False;
11450 vassert(size < 3);
11451 Bool is2 = bitQ == 1;
11452 IROp opN = mkVecNARROWUN(size);
11453 IRTemp resN = newTempV128();
11454 assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
11455 putLO64andZUorPutHI64(is2, dd, resN);
11456 const HChar* nm = "xtn";
11457 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11458 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11459 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
11460 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
11461 return True;
11462 }
11463
11464 if (opcode == BITS5(1,0,1,0,0)
11465 || (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
11466 /* -------- 0,xx,10100: SQXTN{,2} -------- */
11467 /* -------- 1,xx,10100: UQXTN{,2} -------- */
11468 /* -------- 1,xx,10010: SQXTUN{,2} -------- */
11469 if (size == X11) return False;
11470 vassert(size < 3);
11471 Bool is2 = bitQ == 1;
11472 IROp opN = Iop_INVALID;
11473 Bool zWiden = True;
11474 const HChar* nm = "??";
11475 /**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
11476 opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
sewardjdf1628c2014-06-10 22:52:05 +000011477 }
sewardjecedd982014-08-11 14:02:47 +000011478 else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
11479 opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
sewardjdf1628c2014-06-10 22:52:05 +000011480 }
sewardjecedd982014-08-11 14:02:47 +000011481 else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
11482 opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
11483 }
11484 else vassert(0);
11485 IRTemp src = newTempV128();
11486 assign(src, getQReg128(nn));
11487 IRTemp resN = newTempV128();
11488 assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
11489 putLO64andZUorPutHI64(is2, dd, resN);
11490 IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/*!fromUpperHalf*/,
11491 size, mkexpr(resN));
11492 updateQCFLAGwithDifference(src, resW);
11493 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11494 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11495 DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
11496 nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
11497 return True;
sewardjdf1628c2014-06-10 22:52:05 +000011498 }
11499
sewardj487559e2014-07-10 14:22:45 +000011500 if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
11501 /* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
11502 /* Widens, and size is the narrow size. */
11503 if (size == X11) return False;
11504 Bool is2 = bitQ == 1;
11505 IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
11506 IROp opSHL = mkVecSHLN(size+1);
sewardj8e91fd42014-07-11 12:05:47 +000011507 IRTemp src = newTempV128();
11508 IRTemp res = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000011509 assign(src, getQReg128(nn));
11510 assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
11511 mkU8(8 << size)));
11512 putQReg128(dd, mkexpr(res));
11513 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11514 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11515 DIP("shll%s %s.%s, %s.%s, #%u\n", is2 ? "2" : "",
11516 nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
11517 return True;
11518 }
11519
sewardjdf1628c2014-06-10 22:52:05 +000011520 if (bitU == 0 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
11521 /* -------- 0,01,10110: FCVTN 2s/4s_2d -------- */
11522 IRTemp rm = mk_get_IR_rounding_mode();
11523 IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64);
11524 IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64);
11525 putQRegLane(dd, 2 * bitQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo));
11526 putQRegLane(dd, 2 * bitQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi));
11527 if (bitQ == 0) {
11528 putQRegLane(dd, 1, mkU64(0));
11529 }
11530 DIP("fcvtn%s %s.%s, %s.2d\n", bitQ ? "2" : "",
11531 nameQReg128(dd), bitQ ? "4s" : "2s", nameQReg128(nn));
11532 return True;
11533 }
11534
sewardjfc261d92014-08-24 20:36:14 +000011535 if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
11536 /* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
11537 /* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
11538 Bool isREC = bitU == 0;
11539 IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
11540 IRTemp res = newTempV128();
11541 assign(res, unop(op, getQReg128(nn)));
11542 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11543 const HChar* nm = isREC ? "urecpe" : "ursqrte";
11544 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11545 DIP("%s %s.%s, %s.%s\n", nm,
11546 nameQReg128(dd), arr, nameQReg128(nn), arr);
11547 return True;
11548 }
11549
sewardj5747c4a2014-06-11 20:57:23 +000011550 if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
11551 /* -------- 0,0x,11101: SCVTF -------- */
11552 /* -------- 1,0x,11101: UCVTF -------- */
11553 /* 31 28 22 21 15 9 4
11554 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
11555 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
11556 with laneage:
11557 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
11558 */
11559 Bool isQ = bitQ == 1;
11560 Bool isU = bitU == 1;
11561 Bool isF64 = (size & 1) == 1;
11562 if (isQ || !isF64) {
11563 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
11564 UInt nLanes = 0;
11565 Bool zeroHI = False;
11566 const HChar* arrSpec = NULL;
11567 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
11568 isQ, isF64 );
11569 IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
11570 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
11571 IRTemp rm = mk_get_IR_rounding_mode();
11572 UInt i;
11573 vassert(ok); /* the 'if' above should ensure this */
11574 for (i = 0; i < nLanes; i++) {
11575 putQRegLane(dd, i,
11576 binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
11577 }
11578 if (zeroHI) {
11579 putQRegLane(dd, 1, mkU64(0));
11580 }
11581 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
11582 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
11583 return True;
11584 }
11585 /* else fall through */
11586 }
11587
sewardjdf1628c2014-06-10 22:52:05 +000011588 return False;
11589# undef INSN
11590}
11591
sewardjfc83d2c2014-06-12 10:15:46 +000011592
sewardjdf1628c2014-06-10 22:52:05 +000011593static
11594Bool dis_AdvSIMD_vector_x_indexed_elem(/*MB_OUT*/DisResult* dres, UInt insn)
11595{
sewardj85fbb022014-06-12 13:16:01 +000011596 /* 31 28 23 21 20 19 15 11 9 4
11597 0 Q U 01111 size L M m opcode H 0 n d
11598 Decode fields are: u,size,opcode
sewardj787a67f2014-06-23 09:09:41 +000011599 M is really part of the mm register number. Individual
11600 cases need to inspect L and H though.
sewardj85fbb022014-06-12 13:16:01 +000011601 */
sewardjdf1628c2014-06-10 22:52:05 +000011602# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj85fbb022014-06-12 13:16:01 +000011603 if (INSN(31,31) != 0
sewardj8e91fd42014-07-11 12:05:47 +000011604 || INSN(28,24) != BITS5(0,1,1,1,1) || INSN(10,10) !=0) {
sewardj85fbb022014-06-12 13:16:01 +000011605 return False;
11606 }
11607 UInt bitQ = INSN(30,30);
11608 UInt bitU = INSN(29,29);
11609 UInt size = INSN(23,22);
11610 UInt bitL = INSN(21,21);
11611 UInt bitM = INSN(20,20);
11612 UInt mmLO4 = INSN(19,16);
11613 UInt opcode = INSN(15,12);
11614 UInt bitH = INSN(11,11);
11615 UInt nn = INSN(9,5);
11616 UInt dd = INSN(4,0);
sewardj85fbb022014-06-12 13:16:01 +000011617 vassert(size < 4);
sewardj787a67f2014-06-23 09:09:41 +000011618 vassert(bitH < 2 && bitM < 2 && bitL < 2);
sewardj85fbb022014-06-12 13:16:01 +000011619
sewardjd0e5e532014-10-30 16:36:53 +000011620 if (bitU == 0 && size >= X10
11621 && (opcode == BITS4(0,0,0,1) || opcode == BITS4(0,1,0,1))) {
11622 /* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
11623 /* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
11624 if (bitQ == 0 && size == X11) return False; // implied 1d case
11625 Bool isD = (size & 1) == 1;
11626 Bool isSUB = opcode == BITS4(0,1,0,1);
11627 UInt index;
11628 if (!isD) index = (bitH << 1) | bitL;
11629 else if (isD && bitL == 0) index = bitH;
11630 else return False; // sz:L == x11 => unallocated encoding
11631 vassert(index < (isD ? 2 : 4));
11632 IRType ity = isD ? Ity_F64 : Ity_F32;
11633 IRTemp elem = newTemp(ity);
11634 UInt mm = (bitM << 4) | mmLO4;
11635 assign(elem, getQRegLane(mm, index, ity));
11636 IRTemp dupd = math_DUP_TO_V128(elem, ity);
11637 IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
11638 IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
11639 IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
11640 IRTemp rm = mk_get_IR_rounding_mode();
11641 IRTemp t1 = newTempV128();
11642 IRTemp t2 = newTempV128();
11643 // FIXME: double rounding; use FMA primops instead
11644 assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
11645 assign(t2, triop(isSUB ? opSUB : opADD,
11646 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
11647 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
11648 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11649 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
11650 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm),
11651 isD ? 'd' : 's', index);
11652 return True;
11653 }
11654
sewardj85fbb022014-06-12 13:16:01 +000011655 if (bitU == 0 && size >= X10 && opcode == BITS4(1,0,0,1)) {
11656 /* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
11657 if (bitQ == 0 && size == X11) return False; // implied 1d case
11658 Bool isD = (size & 1) == 1;
11659 UInt index;
11660 if (!isD) index = (bitH << 1) | bitL;
11661 else if (isD && bitL == 0) index = bitH;
11662 else return False; // sz:L == x11 => unallocated encoding
11663 vassert(index < (isD ? 2 : 4));
11664 IRType ity = isD ? Ity_F64 : Ity_F32;
11665 IRTemp elem = newTemp(ity);
sewardj787a67f2014-06-23 09:09:41 +000011666 UInt mm = (bitM << 4) | mmLO4;
sewardj85fbb022014-06-12 13:16:01 +000011667 assign(elem, getQRegLane(mm, index, ity));
11668 IRTemp dupd = math_DUP_TO_V128(elem, ity);
sewardj8e91fd42014-07-11 12:05:47 +000011669 IRTemp res = newTempV128();
sewardj85fbb022014-06-12 13:16:01 +000011670 assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
11671 mkexpr(mk_get_IR_rounding_mode()),
11672 getQReg128(nn), mkexpr(dupd)));
sewardjdf9d6d52014-06-27 10:43:22 +000011673 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj85fbb022014-06-12 13:16:01 +000011674 const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
11675 DIP("fmul %s.%s, %s.%s, %s.%c[%u]\n", nameQReg128(dd), arr,
11676 nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
11677 return True;
11678 }
11679
sewardj787a67f2014-06-23 09:09:41 +000011680 if ((bitU == 1 && (opcode == BITS4(0,0,0,0) || opcode == BITS4(0,1,0,0)))
11681 || (bitU == 0 && opcode == BITS4(1,0,0,0))) {
11682 /* -------- 1,xx,0000 MLA s/h variants only -------- */
11683 /* -------- 1,xx,0100 MLS s/h variants only -------- */
11684 /* -------- 0,xx,1000 MUL s/h variants only -------- */
11685 Bool isMLA = opcode == BITS4(0,0,0,0);
11686 Bool isMLS = opcode == BITS4(0,1,0,0);
11687 UInt mm = 32; // invalid
11688 UInt ix = 16; // invalid
11689 switch (size) {
11690 case X00:
11691 return False; // b case is not allowed
11692 case X01:
11693 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
11694 case X10:
11695 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
11696 case X11:
11697 return False; // d case is not allowed
11698 default:
11699 vassert(0);
11700 }
11701 vassert(mm < 32 && ix < 16);
sewardj487559e2014-07-10 14:22:45 +000011702 IROp opMUL = mkVecMUL(size);
11703 IROp opADD = mkVecADD(size);
11704 IROp opSUB = mkVecSUB(size);
sewardj787a67f2014-06-23 09:09:41 +000011705 HChar ch = size == X01 ? 'h' : 's';
sewardj487559e2014-07-10 14:22:45 +000011706 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
sewardj8e91fd42014-07-11 12:05:47 +000011707 IRTemp vecD = newTempV128();
11708 IRTemp vecN = newTempV128();
11709 IRTemp res = newTempV128();
sewardj787a67f2014-06-23 09:09:41 +000011710 assign(vecD, getQReg128(dd));
11711 assign(vecN, getQReg128(nn));
11712 IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
11713 if (isMLA || isMLS) {
11714 assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
11715 } else {
11716 assign(res, prod);
11717 }
sewardjdf9d6d52014-06-27 10:43:22 +000011718 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj787a67f2014-06-23 09:09:41 +000011719 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11720 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
11721 : (isMLS ? "mls" : "mul"),
11722 nameQReg128(dd), arr,
11723 nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
11724 return True;
11725 }
11726
sewardj487559e2014-07-10 14:22:45 +000011727 if (opcode == BITS4(1,0,1,0)
11728 || opcode == BITS4(0,0,1,0) || opcode == BITS4(0,1,1,0)) {
11729 /* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
11730 /* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
11731 /* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
11732 /* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
11733 /* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
11734 /* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
11735 /* Widens, and size refers to the narrowed lanes. */
11736 UInt ks = 3;
11737 switch (opcode) {
11738 case BITS4(1,0,1,0): ks = 0; break;
11739 case BITS4(0,0,1,0): ks = 1; break;
11740 case BITS4(0,1,1,0): ks = 2; break;
11741 default: vassert(0);
11742 }
11743 vassert(ks >= 0 && ks <= 2);
11744 Bool isU = bitU == 1;
11745 Bool is2 = bitQ == 1;
11746 UInt mm = 32; // invalid
11747 UInt ix = 16; // invalid
11748 switch (size) {
11749 case X00:
11750 return False; // h_b_b[] case is not allowed
11751 case X01:
11752 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
11753 case X10:
11754 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
11755 case X11:
11756 return False; // q_d_d[] case is not allowed
11757 default:
11758 vassert(0);
11759 }
11760 vassert(mm < 32 && ix < 16);
sewardj51d012a2014-07-21 09:19:50 +000011761 IRTemp vecN = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000011762 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
sewardj8e91fd42014-07-11 12:05:47 +000011763 IRTemp vecD = newTempV128();
sewardj487559e2014-07-10 14:22:45 +000011764 assign(vecN, getQReg128(nn));
sewardj51d012a2014-07-21 09:19:50 +000011765 assign(vecD, getQReg128(dd));
11766 IRTemp res = IRTemp_INVALID;
11767 math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
11768 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
sewardj487559e2014-07-10 14:22:45 +000011769 putQReg128(dd, mkexpr(res));
11770 const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
11771 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11772 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11773 HChar ch = size == X01 ? 'h' : 's';
11774 DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
11775 isU ? 'u' : 's', nm, is2 ? "2" : "",
11776 nameQReg128(dd), arrWide,
11777 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
11778 return True;
11779 }
11780
sewardj51d012a2014-07-21 09:19:50 +000011781 if (bitU == 0
11782 && (opcode == BITS4(1,0,1,1)
11783 || opcode == BITS4(0,0,1,1) || opcode == BITS4(0,1,1,1))) {
11784 /* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
11785 /* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
11786 /* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
11787 /* Widens, and size refers to the narrowed lanes. */
11788 UInt ks = 3;
11789 switch (opcode) {
11790 case BITS4(1,0,1,1): ks = 0; break;
11791 case BITS4(0,0,1,1): ks = 1; break;
11792 case BITS4(0,1,1,1): ks = 2; break;
11793 default: vassert(0);
11794 }
11795 vassert(ks >= 0 && ks <= 2);
11796 Bool is2 = bitQ == 1;
11797 UInt mm = 32; // invalid
11798 UInt ix = 16; // invalid
11799 switch (size) {
11800 case X00:
11801 return False; // h_b_b[] case is not allowed
11802 case X01:
11803 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
11804 case X10:
11805 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
11806 case X11:
11807 return False; // q_d_d[] case is not allowed
11808 default:
11809 vassert(0);
11810 }
11811 vassert(mm < 32 && ix < 16);
11812 IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
11813 vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
11814 newTempsV128_2(&vecN, &vecD);
11815 assign(vecN, getQReg128(nn));
11816 IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
11817 assign(vecD, getQReg128(dd));
11818 math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
11819 is2, size, "mas"[ks],
11820 vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
11821 putQReg128(dd, mkexpr(res));
11822 vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
11823 updateQCFLAGwithDifference(sat1q, sat1n);
11824 if (sat2q != IRTemp_INVALID || sat2n != IRTemp_INVALID) {
11825 updateQCFLAGwithDifference(sat2q, sat2n);
11826 }
sewardj54ffa1d2014-07-22 09:27:49 +000011827 const HChar* nm = ks == 0 ? "sqdmull"
sewardj51d012a2014-07-21 09:19:50 +000011828 : (ks == 1 ? "sqdmlal" : "sqdmlsl");
11829 const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
11830 const HChar* arrWide = nameArr_Q_SZ(1, size+1);
11831 HChar ch = size == X01 ? 'h' : 's';
11832 DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
11833 nm, is2 ? "2" : "",
11834 nameQReg128(dd), arrWide,
11835 nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
11836 return True;
11837 }
11838
sewardj257e99f2014-08-03 12:45:19 +000011839 if (opcode == BITS4(1,1,0,0) || opcode == BITS4(1,1,0,1)) {
11840 /* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
11841 /* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
11842 UInt mm = 32; // invalid
11843 UInt ix = 16; // invalid
11844 switch (size) {
11845 case X00:
11846 return False; // b case is not allowed
11847 case X01:
11848 mm = mmLO4; ix = (bitH << 2) | (bitL << 1) | (bitM << 0); break;
11849 case X10:
11850 mm = (bitM << 4) | mmLO4; ix = (bitH << 1) | (bitL << 0); break;
11851 case X11:
11852 return False; // q case is not allowed
11853 default:
11854 vassert(0);
11855 }
11856 vassert(mm < 32 && ix < 16);
11857 Bool isR = opcode == BITS4(1,1,0,1);
11858 IRTemp res, sat1q, sat1n, vN, vM;
11859 res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
11860 vN = newTempV128();
11861 assign(vN, getQReg128(nn));
11862 vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
11863 math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
11864 putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
11865 IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
11866 updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
11867 const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
11868 const HChar* arr = nameArr_Q_SZ(bitQ, size);
11869 HChar ch = size == X01 ? 'h' : 's';
11870 DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
11871 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
11872 return True;
11873 }
11874
sewardjdf1628c2014-06-10 22:52:05 +000011875 return False;
11876# undef INSN
11877}
11878
sewardjfc83d2c2014-06-12 10:15:46 +000011879
sewardjdf1628c2014-06-10 22:52:05 +000011880static
11881Bool dis_AdvSIMD_crypto_aes(/*MB_OUT*/DisResult* dres, UInt insn)
11882{
11883# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11884 return False;
11885# undef INSN
11886}
11887
sewardjfc83d2c2014-06-12 10:15:46 +000011888
sewardjdf1628c2014-06-10 22:52:05 +000011889static
11890Bool dis_AdvSIMD_crypto_three_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
11891{
11892# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11893 return False;
11894# undef INSN
11895}
11896
sewardjfc83d2c2014-06-12 10:15:46 +000011897
sewardjdf1628c2014-06-10 22:52:05 +000011898static
11899Bool dis_AdvSIMD_crypto_two_reg_sha(/*MB_OUT*/DisResult* dres, UInt insn)
11900{
11901# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11902 return False;
11903# undef INSN
11904}
11905
sewardj5747c4a2014-06-11 20:57:23 +000011906
sewardjdf1628c2014-06-10 22:52:05 +000011907static
11908Bool dis_AdvSIMD_fp_compare(/*MB_OUT*/DisResult* dres, UInt insn)
11909{
sewardj5747c4a2014-06-11 20:57:23 +000011910 /* 31 28 23 21 20 15 13 9 4
11911 000 11110 ty 1 m op 1000 n opcode2
11912 The first 3 bits are really "M 0 S", but M and S are always zero.
11913 Decode fields are: ty,op,opcode2
11914 */
sewardjdf1628c2014-06-10 22:52:05 +000011915# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000011916 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
11917 || INSN(21,21) != 1 || INSN(13,10) != BITS4(1,0,0,0)) {
11918 return False;
11919 }
11920 UInt ty = INSN(23,22);
11921 UInt mm = INSN(20,16);
11922 UInt op = INSN(15,14);
11923 UInt nn = INSN(9,5);
11924 UInt opcode2 = INSN(4,0);
11925 vassert(ty < 4);
11926
11927 if (ty <= X01 && op == X00
11928 && (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
11929 /* -------- 0x,00,00000 FCMP d_d, s_s -------- */
11930 /* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
11931 /* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
11932 /* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
11933 /* 31 23 20 15 9 4
11934 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
11935 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
11936 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
11937 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
11938
11939 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
11940 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
11941 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
11942 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
11943
11944 FCMPE generates Invalid Operation exn if either arg is any kind
11945 of NaN. FCMP generates Invalid Operation exn if either arg is a
11946 signalling NaN. We ignore this detail here and produce the same
11947 IR for both.
11948 */
11949 Bool isD = (ty & 1) == 1;
11950 Bool isCMPE = (opcode2 & 16) == 16;
11951 Bool cmpZero = (opcode2 & 8) == 8;
11952 IRType ity = isD ? Ity_F64 : Ity_F32;
11953 Bool valid = True;
11954 if (cmpZero && mm != 0) valid = False;
11955 if (valid) {
11956 IRTemp argL = newTemp(ity);
11957 IRTemp argR = newTemp(ity);
11958 IRTemp irRes = newTemp(Ity_I32);
11959 assign(argL, getQRegLO(nn, ity));
11960 assign(argR,
11961 cmpZero
11962 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
11963 : getQRegLO(mm, ity));
11964 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
11965 mkexpr(argL), mkexpr(argR)));
11966 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
11967 IRTemp nzcv_28x0 = newTemp(Ity_I64);
11968 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
11969 setFlags_COPY(nzcv_28x0);
11970 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
11971 cmpZero ? "#0.0" : nameQRegLO(mm, ity));
11972 return True;
11973 }
11974 return False;
11975 }
11976
sewardjdf1628c2014-06-10 22:52:05 +000011977 return False;
11978# undef INSN
11979}
11980
sewardj5747c4a2014-06-11 20:57:23 +000011981
sewardjdf1628c2014-06-10 22:52:05 +000011982static
11983Bool dis_AdvSIMD_fp_conditional_compare(/*MB_OUT*/DisResult* dres, UInt insn)
11984{
11985# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
11986 return False;
11987# undef INSN
11988}
11989
sewardjfc83d2c2014-06-12 10:15:46 +000011990
sewardjdf1628c2014-06-10 22:52:05 +000011991static
11992Bool dis_AdvSIMD_fp_conditional_select(/*MB_OUT*/DisResult* dres, UInt insn)
11993{
sewardje23ec112014-11-15 16:07:14 +000011994 /* 31 23 21 20 15 11 9 5
11995 000 11110 ty 1 m cond 11 n d
11996 The first 3 bits are really "M 0 S", but M and S are always zero.
11997 Decode fields: ty
11998 */
sewardjdf1628c2014-06-10 22:52:05 +000011999# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardje23ec112014-11-15 16:07:14 +000012000 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) || INSN(21,21) != 1
12001 || INSN(11,10) != BITS2(1,1)) {
12002 return False;
12003 }
12004 UInt ty = INSN(23,22);
12005 UInt mm = INSN(20,16);
12006 UInt cond = INSN(15,12);
12007 UInt nn = INSN(9,5);
12008 UInt dd = INSN(4,0);
12009 if (ty <= X01) {
12010 /* -------- 00: FCSEL s_s -------- */
12011 /* -------- 00: FCSEL d_d -------- */
12012 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
12013 IRTemp srcT = newTemp(ity);
12014 IRTemp srcF = newTemp(ity);
12015 IRTemp res = newTemp(ity);
12016 assign(srcT, getQRegLO(nn, ity));
12017 assign(srcF, getQRegLO(mm, ity));
12018 assign(res, IRExpr_ITE(
12019 unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
12020 mkexpr(srcT), mkexpr(srcF)));
12021 putQReg128(dd, mkV128(0x0000));
12022 putQRegLO(dd, mkexpr(res));
12023 DIP("fcsel %s, %s, %s, %s\n",
12024 nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity),
12025 nameCC(cond));
12026 return True;
12027 }
sewardjdf1628c2014-06-10 22:52:05 +000012028 return False;
12029# undef INSN
12030}
12031
sewardj5747c4a2014-06-11 20:57:23 +000012032
sewardjdf1628c2014-06-10 22:52:05 +000012033static
12034Bool dis_AdvSIMD_fp_data_proc_1_source(/*MB_OUT*/DisResult* dres, UInt insn)
12035{
12036 /* 31 28 23 21 20 14 9 4
12037 000 11110 ty 1 opcode 10000 n d
12038 The first 3 bits are really "M 0 S", but M and S are always zero.
sewardj5747c4a2014-06-11 20:57:23 +000012039 Decode fields: ty,opcode
sewardjdf1628c2014-06-10 22:52:05 +000012040 */
12041# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12042 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12043 || INSN(21,21) != 1 || INSN(14,10) != BITS5(1,0,0,0,0)) {
12044 return False;
12045 }
12046 UInt ty = INSN(23,22);
12047 UInt opcode = INSN(20,15);
12048 UInt nn = INSN(9,5);
12049 UInt dd = INSN(4,0);
12050
12051 if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
12052 /* -------- 0x,000000: FMOV d_d, s_s -------- */
12053 /* -------- 0x,000001: FABS d_d, s_s -------- */
12054 /* -------- 0x,000010: FNEG d_d, s_s -------- */
12055 /* -------- 0x,000011: FSQRT d_d, s_s -------- */
12056 IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
12057 IRTemp src = newTemp(ity);
12058 IRTemp res = newTemp(ity);
12059 const HChar* nm = "??";
12060 assign(src, getQRegLO(nn, ity));
12061 switch (opcode) {
12062 case BITS6(0,0,0,0,0,0):
12063 nm = "fmov"; assign(res, mkexpr(src)); break;
12064 case BITS6(0,0,0,0,0,1):
12065 nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
12066 case BITS6(0,0,0,0,1,0):
12067 nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
12068 case BITS6(0,0,0,0,1,1):
12069 nm = "fsqrt";
12070 assign(res, binop(mkSQRTF(ity),
12071 mkexpr(mk_get_IR_rounding_mode()),
12072 mkexpr(src))); break;
12073 default:
12074 vassert(0);
12075 }
12076 putQReg128(dd, mkV128(0x0000));
12077 putQRegLO(dd, mkexpr(res));
12078 DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
12079 return True;
12080 }
12081
sewardj5747c4a2014-06-11 20:57:23 +000012082 if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
12083 || opcode == BITS6(0,0,0,1,0,1)))
12084 || (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
12085 || opcode == BITS6(0,0,0,1,0,1)))
12086 || (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
12087 || opcode == BITS6(0,0,0,1,0,0)))) {
12088 /* -------- 11,000100: FCVT s_h -------- */
12089 /* -------- 11,000101: FCVT d_h -------- */
12090 /* -------- 00,000111: FCVT h_s -------- */
12091 /* -------- 00,000101: FCVT d_s -------- */
12092 /* -------- 01,000111: FCVT h_d -------- */
12093 /* -------- 01,000100: FCVT s_d -------- */
12094 /* 31 23 21 16 14 9 4
12095 000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp)
12096 --------- 11 ----- 01 --------- FCVT Dd, Hn (unimp)
12097 --------- 00 ----- 11 --------- FCVT Hd, Sn (unimp)
12098 --------- 00 ----- 01 --------- FCVT Dd, Sn
12099 --------- 01 ----- 11 --------- FCVT Hd, Dn (unimp)
12100 --------- 01 ----- 00 --------- FCVT Sd, Dn
12101 Rounding, when dst is smaller than src, is per the FPCR.
12102 */
12103 UInt b2322 = ty;
12104 UInt b1615 = opcode & BITS2(1,1);
12105 if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) {
12106 /* Convert S to D */
12107 IRTemp res = newTemp(Ity_F64);
12108 assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32)));
12109 putQReg128(dd, mkV128(0x0000));
12110 putQRegLO(dd, mkexpr(res));
12111 DIP("fcvt %s, %s\n",
12112 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32));
12113 return True;
12114 }
12115 if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) {
12116 /* Convert D to S */
12117 IRTemp res = newTemp(Ity_F32);
12118 assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()),
12119 getQRegLO(nn, Ity_F64)));
12120 putQReg128(dd, mkV128(0x0000));
12121 putQRegLO(dd, mkexpr(res));
12122 DIP("fcvt %s, %s\n",
12123 nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64));
12124 return True;
12125 }
12126 /* else unhandled */
12127 return False;
12128 }
12129
12130 if (ty <= X01
12131 && opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
12132 && opcode != BITS6(0,0,1,1,0,1)) {
12133 /* -------- 0x,001000 FRINTN d_d, s_s -------- */
12134 /* -------- 0x,001001 FRINTP d_d, s_s -------- */
12135 /* -------- 0x,001010 FRINTM d_d, s_s -------- */
12136 /* -------- 0x,001011 FRINTZ d_d, s_s -------- */
12137 /* -------- 0x,001100 FRINTA d_d, s_s -------- */
12138 /* -------- 0x,001110 FRINTX d_d, s_s -------- */
12139 /* -------- 0x,001111 FRINTI d_d, s_s -------- */
12140 /* 31 23 21 17 14 9 4
12141 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
12142 rm
12143 x==0 => S-registers, x==1 => D-registers
12144 rm (17:15) encodings:
12145 111 per FPCR (FRINTI)
12146 001 +inf (FRINTP)
12147 010 -inf (FRINTM)
12148 011 zero (FRINTZ)
12149 000 tieeven
12150 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
sewardjd8ad76a2014-10-30 15:37:16 +000012151 110 per FPCR + "exact = TRUE" (FRINTX)
sewardj5747c4a2014-06-11 20:57:23 +000012152 101 unallocated
12153 */
12154 Bool isD = (ty & 1) == 1;
12155 UInt rm = opcode & BITS6(0,0,0,1,1,1);
12156 IRType ity = isD ? Ity_F64 : Ity_F32;
12157 IRExpr* irrmE = NULL;
12158 UChar ch = '?';
12159 switch (rm) {
12160 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
12161 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
12162 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
12163 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
12164 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
sewardjd8ad76a2014-10-30 15:37:16 +000012165 // I am unsure about the following, due to the "integral exact"
sewardj9e1c2b02014-11-25 17:42:52 +000012166 // description in the manual. What does it mean? (frintx, that is)
sewardjd8ad76a2014-10-30 15:37:16 +000012167 case BITS3(1,1,0):
12168 ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
sewardj9e1c2b02014-11-25 17:42:52 +000012169 case BITS3(1,1,1):
12170 ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
sewardj5747c4a2014-06-11 20:57:23 +000012171 default: break;
12172 }
12173 if (irrmE) {
12174 IRTemp src = newTemp(ity);
12175 IRTemp dst = newTemp(ity);
12176 assign(src, getQRegLO(nn, ity));
12177 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
12178 irrmE, mkexpr(src)));
12179 putQReg128(dd, mkV128(0x0000));
12180 putQRegLO(dd, mkexpr(dst));
12181 DIP("frint%c %s, %s\n",
12182 ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
12183 return True;
12184 }
12185 return False;
12186 }
12187
sewardjdf1628c2014-06-10 22:52:05 +000012188 return False;
12189# undef INSN
12190}
12191
12192
12193static
12194Bool dis_AdvSIMD_fp_data_proc_2_source(/*MB_OUT*/DisResult* dres, UInt insn)
12195{
12196 /* 31 28 23 21 20 15 11 9 4
12197 000 11110 ty 1 m opcode 10 n d
12198 The first 3 bits are really "M 0 S", but M and S are always zero.
sewardj76927e62014-11-17 11:21:21 +000012199 Decode fields: ty, opcode
sewardjdf1628c2014-06-10 22:52:05 +000012200 */
12201# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12202 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12203 || INSN(21,21) != 1 || INSN(11,10) != BITS2(1,0)) {
12204 return False;
12205 }
12206 UInt ty = INSN(23,22);
12207 UInt mm = INSN(20,16);
12208 UInt opcode = INSN(15,12);
12209 UInt nn = INSN(9,5);
12210 UInt dd = INSN(4,0);
12211
sewardj76927e62014-11-17 11:21:21 +000012212 if (ty <= X01 && opcode <= BITS4(0,1,1,1)) {
sewardjdf1628c2014-06-10 22:52:05 +000012213 /* ------- 0x,0000: FMUL d_d, s_s ------- */
12214 /* ------- 0x,0001: FDIV d_d, s_s ------- */
12215 /* ------- 0x,0010: FADD d_d, s_s ------- */
12216 /* ------- 0x,0011: FSUB d_d, s_s ------- */
sewardj76927e62014-11-17 11:21:21 +000012217 /* ------- 0x,0100: FMAX d_d, s_s ------- */
12218 /* ------- 0x,0101: FMIN d_d, s_s ------- */
12219 /* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
12220 /* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
sewardjdf1628c2014-06-10 22:52:05 +000012221 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
12222 IROp iop = Iop_INVALID;
12223 const HChar* nm = "???";
12224 switch (opcode) {
sewardj76927e62014-11-17 11:21:21 +000012225 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
12226 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
12227 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
12228 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
12229 case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break;
12230 case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break;
12231 case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!!
12232 case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!!
sewardjdf1628c2014-06-10 22:52:05 +000012233 default: vassert(0);
12234 }
sewardj76927e62014-11-17 11:21:21 +000012235 if (opcode <= BITS4(0,0,1,1)) {
12236 // This is really not good code. TODO: avoid width-changing
sewardjb963eef2014-11-17 14:16:56 +000012237 IRTemp res = newTemp(ity);
12238 assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()),
12239 getQRegLO(nn, ity), getQRegLO(mm, ity)));
sewardj76927e62014-11-17 11:21:21 +000012240 putQReg128(dd, mkV128(0));
sewardjb963eef2014-11-17 14:16:56 +000012241 putQRegLO(dd, mkexpr(res));
sewardj76927e62014-11-17 11:21:21 +000012242 } else {
12243 putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2),
12244 binop(iop, getQReg128(nn), getQReg128(mm))));
12245 }
sewardjdf1628c2014-06-10 22:52:05 +000012246 DIP("%s %s, %s, %s\n",
12247 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
12248 return True;
12249 }
12250
12251 if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
12252 /* ------- 0x,1000: FNMUL d_d, s_s ------- */
12253 IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
12254 IROp iop = mkMULF(ity);
12255 IROp iopn = mkNEGF(ity);
12256 const HChar* nm = "fnmul";
12257 IRExpr* resE = unop(iopn,
12258 triop(iop, mkexpr(mk_get_IR_rounding_mode()),
12259 getQRegLO(nn, ity), getQRegLO(mm, ity)));
12260 IRTemp res = newTemp(ity);
12261 assign(res, resE);
12262 putQReg128(dd, mkV128(0));
12263 putQRegLO(dd, mkexpr(res));
12264 DIP("%s %s, %s, %s\n",
12265 nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
12266 return True;
12267 }
12268
sewardjdf1628c2014-06-10 22:52:05 +000012269 return False;
12270# undef INSN
12271}
12272
12273
12274static
12275Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn)
12276{
sewardj5747c4a2014-06-11 20:57:23 +000012277 /* 31 28 23 21 20 15 14 9 4
12278 000 11111 ty o1 m o0 a n d
12279 The first 3 bits are really "M 0 S", but M and S are always zero.
12280 Decode fields: ty,o1,o0
12281 */
sewardjdf1628c2014-06-10 22:52:05 +000012282# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000012283 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
12284 return False;
12285 }
12286 UInt ty = INSN(23,22);
12287 UInt bitO1 = INSN(21,21);
12288 UInt mm = INSN(20,16);
12289 UInt bitO0 = INSN(15,15);
12290 UInt aa = INSN(14,10);
12291 UInt nn = INSN(9,5);
12292 UInt dd = INSN(4,0);
12293 vassert(ty < 4);
12294
12295 if (ty <= X01) {
12296 /* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
12297 /* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
12298 /* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
12299 /* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
12300 /* -------------------- F{N}M{ADD,SUB} -------------------- */
12301 /* 31 22 20 15 14 9 4 ix
12302 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
12303 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
12304 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
12305 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
12306 where Fx=Dx when sz=1, Fx=Sx when sz=0
12307
12308 -----SPEC------ ----IMPL----
12309 fmadd a + n * m a + n * m
12310 fmsub a + (-n) * m a - n * m
12311 fnmadd (-a) + (-n) * m -(a + n * m)
12312 fnmsub (-a) + n * m -(a - n * m)
12313 */
12314 Bool isD = (ty & 1) == 1;
12315 UInt ix = (bitO1 << 1) | bitO0;
12316 IRType ity = isD ? Ity_F64 : Ity_F32;
12317 IROp opADD = mkADDF(ity);
12318 IROp opSUB = mkSUBF(ity);
12319 IROp opMUL = mkMULF(ity);
12320 IROp opNEG = mkNEGF(ity);
12321 IRTemp res = newTemp(ity);
12322 IRExpr* eA = getQRegLO(aa, ity);
12323 IRExpr* eN = getQRegLO(nn, ity);
12324 IRExpr* eM = getQRegLO(mm, ity);
12325 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
12326 IRExpr* eNxM = triop(opMUL, rm, eN, eM);
12327 switch (ix) {
12328 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
12329 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
12330 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
12331 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
12332 default: vassert(0);
12333 }
12334 putQReg128(dd, mkV128(0x0000));
12335 putQRegLO(dd, mkexpr(res));
12336 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
12337 DIP("%s %s, %s, %s, %s\n",
12338 names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
12339 nameQRegLO(mm, ity), nameQRegLO(aa, ity));
12340 return True;
12341 }
12342
sewardjdf1628c2014-06-10 22:52:05 +000012343 return False;
12344# undef INSN
12345}
12346
12347
12348static
12349Bool dis_AdvSIMD_fp_immediate(/*MB_OUT*/DisResult* dres, UInt insn)
12350{
12351 /* 31 28 23 21 20 12 9 4
12352 000 11110 ty 1 imm8 100 imm5 d
12353 The first 3 bits are really "M 0 S", but M and S are always zero.
12354 */
12355# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12356 if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
12357 || INSN(21,21) != 1 || INSN(12,10) != BITS3(1,0,0)) {
12358 return False;
12359 }
12360 UInt ty = INSN(23,22);
12361 UInt imm8 = INSN(20,13);
12362 UInt imm5 = INSN(9,5);
12363 UInt dd = INSN(4,0);
12364
12365 /* ------- 00,00000: FMOV s_imm ------- */
12366 /* ------- 01,00000: FMOV d_imm ------- */
12367 if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
12368 Bool isD = (ty & 1) == 1;
12369 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
12370 if (!isD) {
12371 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
12372 }
12373 putQReg128(dd, mkV128(0));
12374 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
12375 DIP("fmov %s, #0x%llx\n",
12376 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
12377 return True;
12378 }
12379
12380 return False;
12381# undef INSN
12382}
12383
12384
12385static
sewardj1aff76b2014-11-20 10:14:06 +000012386Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/*MB_OUT*/DisResult* dres, UInt insn)
sewardjdf1628c2014-06-10 22:52:05 +000012387{
12388# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj1aff76b2014-11-20 10:14:06 +000012389 /* 31 30 29 28 23 21 20 18 15 9 4
12390 sf 0 0 11110 type 0 rmode opcode scale n d
12391 The first 3 bits are really "sf 0 S", but S is always zero.
12392 Decode fields: sf,type,rmode,opcode
12393 */
12394# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12395 if (INSN(30,29) != BITS2(0,0)
12396 || INSN(28,24) != BITS5(1,1,1,1,0)
12397 || INSN(21,21) != 0) {
12398 return False;
12399 }
12400 UInt bitSF = INSN(31,31);
12401 UInt ty = INSN(23,22); // type
12402 UInt rm = INSN(20,19); // rmode
12403 UInt op = INSN(18,16); // opcode
12404 UInt sc = INSN(15,10); // scale
12405 UInt nn = INSN(9,5);
12406 UInt dd = INSN(4,0);
12407
12408 // op = 010, 011
12409 /* -------------- {S,U}CVTF (scalar, fixedpt) -------------- */
12410 /* (ix) sf S 28 ty rm op 15 9 4
12411 0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits
12412 1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits
12413 2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits
12414 3 1 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Xn, #fbits
12415
12416 4 0 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Wn, #fbits
12417 5 0 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Wn, #fbits
12418 6 1 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Xn, #fbits
12419 7 1 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Xn, #fbits
12420
12421 These are signed/unsigned conversion from integer registers to
12422 FP registers, all 4 32/64-bit combinations, rounded per FPCR,
12423 scaled per |scale|.
12424 */
12425 if (ty <= X01 && rm == X00
12426 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))
12427 && (bitSF == 1 || ((sc >> 5) & 1) == 1)) {
12428 Bool isI64 = bitSF == 1;
12429 Bool isF64 = (ty & 1) == 1;
12430 Bool isU = (op & 1) == 1;
12431 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
12432
12433 Int fbits = 64 - sc;
12434 vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
12435
12436 Double scale = two_to_the_minus(fbits);
12437 IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
12438 : IRExpr_Const(IRConst_F32( (Float)scale ));
12439 IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
12440
12441 const IROp ops[8]
12442 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
12443 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
12444 IRExpr* src = getIRegOrZR(isI64, nn);
12445 IRExpr* res = (isF64 && !isI64)
12446 ? unop(ops[ix], src)
12447 : binop(ops[ix],
12448 mkexpr(mk_get_IR_rounding_mode()), src);
12449 putQReg128(dd, mkV128(0));
12450 putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE));
12451
12452 DIP("%ccvtf %s, %s, #%d\n",
12453 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
12454 nameIRegOrZR(isI64, nn), fbits);
12455 return True;
12456 }
12457
sewardjdf1628c2014-06-10 22:52:05 +000012458 return False;
12459# undef INSN
12460}
12461
12462
12463static
sewardj5747c4a2014-06-11 20:57:23 +000012464Bool dis_AdvSIMD_fp_to_from_int_conv(/*MB_OUT*/DisResult* dres, UInt insn)
sewardjdf1628c2014-06-10 22:52:05 +000012465{
12466 /* 31 30 29 28 23 21 20 18 15 9 4
sewardj5747c4a2014-06-11 20:57:23 +000012467 sf 0 0 11110 type 1 rmode opcode 000000 n d
12468 The first 3 bits are really "sf 0 S", but S is always zero.
sewardjf67fcb92014-10-30 23:10:45 +000012469 Decode fields: sf,type,rmode,opcode
sewardjdf1628c2014-06-10 22:52:05 +000012470 */
12471# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj5747c4a2014-06-11 20:57:23 +000012472 if (INSN(30,29) != BITS2(0,0)
sewardjdf1628c2014-06-10 22:52:05 +000012473 || INSN(28,24) != BITS5(1,1,1,1,0)
12474 || INSN(21,21) != 1
12475 || INSN(15,10) != BITS6(0,0,0,0,0,0)) {
12476 return False;
12477 }
12478 UInt bitSF = INSN(31,31);
sewardjdf1628c2014-06-10 22:52:05 +000012479 UInt ty = INSN(23,22); // type
12480 UInt rm = INSN(20,19); // rmode
12481 UInt op = INSN(18,16); // opcode
12482 UInt nn = INSN(9,5);
12483 UInt dd = INSN(4,0);
12484
sewardj5747c4a2014-06-11 20:57:23 +000012485 // op = 000, 001
sewardjf67fcb92014-10-30 23:10:45 +000012486 /* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
sewardj5747c4a2014-06-11 20:57:23 +000012487 /* 30 23 20 18 15 9 4
12488 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
12489 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
12490 ---------------- 01 -------------- FCVTP-------- (round to +inf)
12491 ---------------- 10 -------------- FCVTM-------- (round to -inf)
12492 ---------------- 11 -------------- FCVTZ-------- (round to zero)
12493
sewardjf67fcb92014-10-30 23:10:45 +000012494 ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
12495 ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
12496
sewardj5747c4a2014-06-11 20:57:23 +000012497 Rd is Xd when sf==1, Wd when sf==0
12498 Fn is Dn when x==1, Sn when x==0
12499 20:19 carry the rounding mode, using the same encoding as FPCR
12500 */
sewardjf67fcb92014-10-30 23:10:45 +000012501 if (ty <= X01
12502 && ( ((op == BITS3(0,0,0) || op == BITS3(0,0,1)) && True)
12503 || ((op == BITS3(1,0,0) || op == BITS3(1,0,1)) && rm == BITS2(0,0))
12504 )
12505 ) {
sewardj5747c4a2014-06-11 20:57:23 +000012506 Bool isI64 = bitSF == 1;
12507 Bool isF64 = (ty & 1) == 1;
12508 Bool isU = (op & 1) == 1;
12509 /* Decide on the IR rounding mode to use. */
12510 IRRoundingMode irrm = 8; /*impossible*/
12511 HChar ch = '?';
sewardjf67fcb92014-10-30 23:10:45 +000012512 if (op == BITS3(0,0,0) || op == BITS3(0,0,1)) {
12513 switch (rm) {
12514 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
12515 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
12516 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
12517 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
12518 default: vassert(0);
12519 }
12520 } else {
12521 vassert(op == BITS3(1,0,0) || op == BITS3(1,0,1));
12522 switch (rm) {
12523 case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break;
12524 default: vassert(0);
12525 }
sewardj5747c4a2014-06-11 20:57:23 +000012526 }
12527 vassert(irrm != 8);
12528 /* Decide on the conversion primop, based on the source size,
12529 dest size and signedness (8 possibilities). Case coding:
12530 F32 ->s I32 0
12531 F32 ->u I32 1
12532 F32 ->s I64 2
12533 F32 ->u I64 3
12534 F64 ->s I32 4
12535 F64 ->u I32 5
12536 F64 ->s I64 6
12537 F64 ->u I64 7
12538 */
12539 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
12540 vassert(ix < 8);
12541 const IROp iops[8]
12542 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
12543 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
12544 IROp iop = iops[ix];
12545 // A bit of ATCery: bounce all cases we haven't seen an example of.
12546 if (/* F32toI32S */
12547 (iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
12548 || (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
12549 || (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
sewardjf67fcb92014-10-30 23:10:45 +000012550 || (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */
sewardj5747c4a2014-06-11 20:57:23 +000012551 /* F32toI32U */
12552 || (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
12553 || (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
sewardj0728a522014-11-15 22:24:18 +000012554 || (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
sewardjf67fcb92014-10-30 23:10:45 +000012555 || (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */
sewardj5747c4a2014-06-11 20:57:23 +000012556 /* F32toI64S */
12557 || (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
sewardj266d5962014-11-20 11:30:41 +000012558 || (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */
sewardj5747c4a2014-06-11 20:57:23 +000012559 /* F32toI64U */
12560 || (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
sewardjefe536b2014-09-06 08:08:47 +000012561 || (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
sewardj5747c4a2014-06-11 20:57:23 +000012562 /* F64toI32S */
12563 || (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
12564 || (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
12565 || (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
sewardj76927e62014-11-17 11:21:21 +000012566 || (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */
sewardj5747c4a2014-06-11 20:57:23 +000012567 /* F64toI32U */
12568 || (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
12569 || (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
12570 || (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
12571 /* F64toI64S */
12572 || (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
12573 || (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
12574 || (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
sewardj76927e62014-11-17 11:21:21 +000012575 || (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */
sewardj5747c4a2014-06-11 20:57:23 +000012576 /* F64toI64U */
12577 || (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
sewardj31b29af2014-10-30 15:54:53 +000012578 || (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
sewardj5747c4a2014-06-11 20:57:23 +000012579 || (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
12580 ) {
12581 /* validated */
12582 } else {
12583 return False;
12584 }
12585 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
12586 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
12587 IRTemp src = newTemp(srcTy);
12588 IRTemp dst = newTemp(dstTy);
12589 assign(src, getQRegLO(nn, srcTy));
12590 assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
12591 putIRegOrZR(isI64, dd, mkexpr(dst));
12592 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
12593 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
12594 return True;
12595 }
12596
12597 // op = 010, 011
sewardjdf1628c2014-06-10 22:52:05 +000012598 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
12599 /* (ix) sf S 28 ty rm op 15 9 4
12600 0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
12601 1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
12602 2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
12603 3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
12604
12605 4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
12606 5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
12607 6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
12608 7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
12609
12610 These are signed/unsigned conversion from integer registers to
12611 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
12612 */
sewardj5747c4a2014-06-11 20:57:23 +000012613 if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) || op == BITS3(0,1,1))) {
sewardjdf1628c2014-06-10 22:52:05 +000012614 Bool isI64 = bitSF == 1;
12615 Bool isF64 = (ty & 1) == 1;
12616 Bool isU = (op & 1) == 1;
12617 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
12618 const IROp ops[8]
12619 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
12620 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
12621 IRExpr* src = getIRegOrZR(isI64, nn);
12622 IRExpr* res = (isF64 && !isI64)
12623 ? unop(ops[ix], src)
sewardj1aff76b2014-11-20 10:14:06 +000012624 : binop(ops[ix],
12625 mkexpr(mk_get_IR_rounding_mode()), src);
sewardjdf1628c2014-06-10 22:52:05 +000012626 putQReg128(dd, mkV128(0));
12627 putQRegLO(dd, res);
12628 DIP("%ccvtf %s, %s\n",
12629 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
12630 nameIRegOrZR(isI64, nn));
12631 return True;
12632 }
12633
sewardj5747c4a2014-06-11 20:57:23 +000012634 // op = 110, 111
sewardjdf1628c2014-06-10 22:52:05 +000012635 /* -------- FMOV (general) -------- */
12636 /* case sf S ty rm op 15 9 4
12637 (1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
12638 (2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
12639 (3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
12640
12641 (4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
12642 (5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
12643 (6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
12644 */
sewardj5747c4a2014-06-11 20:57:23 +000012645 if (1) {
sewardjbbcf1882014-01-12 12:49:10 +000012646 UInt ix = 0; // case
sewardjdf1628c2014-06-10 22:52:05 +000012647 if (bitSF == 0) {
sewardjbbcf1882014-01-12 12:49:10 +000012648 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
12649 ix = 1;
12650 else
12651 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
12652 ix = 4;
12653 } else {
sewardjdf1628c2014-06-10 22:52:05 +000012654 vassert(bitSF == 1);
sewardjbbcf1882014-01-12 12:49:10 +000012655 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
12656 ix = 2;
12657 else
12658 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
12659 ix = 5;
12660 else
12661 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
12662 ix = 3;
12663 else
12664 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
12665 ix = 6;
12666 }
12667 if (ix > 0) {
12668 switch (ix) {
12669 case 1:
12670 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +000012671 putQRegLO(dd, getIReg32orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000012672 DIP("fmov s%u, w%u\n", dd, nn);
12673 break;
12674 case 2:
12675 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +000012676 putQRegLO(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000012677 DIP("fmov d%u, x%u\n", dd, nn);
12678 break;
12679 case 3:
sewardj606c4ba2014-01-26 19:11:14 +000012680 putQRegHI64(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +000012681 DIP("fmov v%u.d[1], x%u\n", dd, nn);
12682 break;
12683 case 4:
sewardj606c4ba2014-01-26 19:11:14 +000012684 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
sewardjbbcf1882014-01-12 12:49:10 +000012685 DIP("fmov w%u, s%u\n", dd, nn);
12686 break;
12687 case 5:
sewardj606c4ba2014-01-26 19:11:14 +000012688 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
sewardjbbcf1882014-01-12 12:49:10 +000012689 DIP("fmov x%u, d%u\n", dd, nn);
12690 break;
12691 case 6:
sewardj606c4ba2014-01-26 19:11:14 +000012692 putIReg64orZR(dd, getQRegHI64(nn));
sewardjbbcf1882014-01-12 12:49:10 +000012693 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
12694 break;
12695 default:
12696 vassert(0);
12697 }
12698 return True;
12699 }
12700 /* undecodable; fall through */
12701 }
12702
sewardjdf1628c2014-06-10 22:52:05 +000012703 return False;
12704# undef INSN
12705}
12706
12707
12708static
12709Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
12710{
12711 Bool ok;
12712 ok = dis_AdvSIMD_EXT(dres, insn);
12713 if (UNLIKELY(ok)) return True;
12714 ok = dis_AdvSIMD_TBL_TBX(dres, insn);
12715 if (UNLIKELY(ok)) return True;
12716 ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
12717 if (UNLIKELY(ok)) return True;
12718 ok = dis_AdvSIMD_across_lanes(dres, insn);
12719 if (UNLIKELY(ok)) return True;
12720 ok = dis_AdvSIMD_copy(dres, insn);
12721 if (UNLIKELY(ok)) return True;
12722 ok = dis_AdvSIMD_modified_immediate(dres, insn);
12723 if (UNLIKELY(ok)) return True;
12724 ok = dis_AdvSIMD_scalar_copy(dres, insn);
12725 if (UNLIKELY(ok)) return True;
12726 ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
12727 if (UNLIKELY(ok)) return True;
12728 ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
12729 if (UNLIKELY(ok)) return True;
12730 ok = dis_AdvSIMD_scalar_three_different(dres, insn);
12731 if (UNLIKELY(ok)) return True;
12732 ok = dis_AdvSIMD_scalar_three_same(dres, insn);
12733 if (UNLIKELY(ok)) return True;
12734 ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
12735 if (UNLIKELY(ok)) return True;
12736 ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
12737 if (UNLIKELY(ok)) return True;
12738 ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
12739 if (UNLIKELY(ok)) return True;
12740 ok = dis_AdvSIMD_three_different(dres, insn);
12741 if (UNLIKELY(ok)) return True;
12742 ok = dis_AdvSIMD_three_same(dres, insn);
12743 if (UNLIKELY(ok)) return True;
12744 ok = dis_AdvSIMD_two_reg_misc(dres, insn);
12745 if (UNLIKELY(ok)) return True;
12746 ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
12747 if (UNLIKELY(ok)) return True;
12748 ok = dis_AdvSIMD_crypto_aes(dres, insn);
12749 if (UNLIKELY(ok)) return True;
12750 ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
12751 if (UNLIKELY(ok)) return True;
12752 ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
12753 if (UNLIKELY(ok)) return True;
12754 ok = dis_AdvSIMD_fp_compare(dres, insn);
12755 if (UNLIKELY(ok)) return True;
12756 ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
12757 if (UNLIKELY(ok)) return True;
12758 ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
12759 if (UNLIKELY(ok)) return True;
12760 ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
12761 if (UNLIKELY(ok)) return True;
12762 ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
12763 if (UNLIKELY(ok)) return True;
12764 ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
12765 if (UNLIKELY(ok)) return True;
12766 ok = dis_AdvSIMD_fp_immediate(dres, insn);
12767 if (UNLIKELY(ok)) return True;
sewardj1aff76b2014-11-20 10:14:06 +000012768 ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn);
sewardjdf1628c2014-06-10 22:52:05 +000012769 if (UNLIKELY(ok)) return True;
sewardj5747c4a2014-06-11 20:57:23 +000012770 ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
sewardjdf1628c2014-06-10 22:52:05 +000012771 if (UNLIKELY(ok)) return True;
12772 return False;
12773}
12774
sewardjbbcf1882014-01-12 12:49:10 +000012775
12776/*------------------------------------------------------------*/
12777/*--- Disassemble a single ARM64 instruction ---*/
12778/*------------------------------------------------------------*/
12779
12780/* Disassemble a single ARM64 instruction into IR. The instruction
12781 has is located at |guest_instr| and has guest IP of
12782 |guest_PC_curr_instr|, which will have been set before the call
12783 here. Returns True iff the instruction was decoded, in which case
12784 *dres will be set accordingly, or False, in which case *dres should
12785 be ignored by the caller. */
12786
12787static
12788Bool disInstr_ARM64_WRK (
12789 /*MB_OUT*/DisResult* dres,
12790 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
12791 Bool resteerCisOk,
12792 void* callback_opaque,
florian8462d112014-09-24 15:18:09 +000012793 const UChar* guest_instr,
sewardjbbcf1882014-01-12 12:49:10 +000012794 VexArchInfo* archinfo,
12795 VexAbiInfo* abiinfo
12796 )
12797{
12798 // A macro to fish bits out of 'insn'.
12799# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
12800
12801//ZZ DisResult dres;
12802//ZZ UInt insn;
12803//ZZ //Bool allow_VFP = False;
12804//ZZ //UInt hwcaps = archinfo->hwcaps;
12805//ZZ IRTemp condT; /* :: Ity_I32 */
12806//ZZ UInt summary;
12807//ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
12808//ZZ
12809//ZZ /* What insn variants are we supporting today? */
12810//ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
12811//ZZ // etc etc
12812
12813 /* Set result defaults. */
12814 dres->whatNext = Dis_Continue;
12815 dres->len = 4;
12816 dres->continueAt = 0;
12817 dres->jk_StopHere = Ijk_INVALID;
12818
12819 /* At least this is simple on ARM64: insns are all 4 bytes long, and
12820 4-aligned. So just fish the whole thing out of memory right now
12821 and have done. */
12822 UInt insn = getUIntLittleEndianly( guest_instr );
12823
12824 if (0) vex_printf("insn: 0x%x\n", insn);
12825
12826 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
12827
12828 vassert(0 == (guest_PC_curr_instr & 3ULL));
12829
12830 /* ----------------------------------------------------------- */
12831
12832 /* Spot "Special" instructions (see comment at top of file). */
12833 {
florian8462d112014-09-24 15:18:09 +000012834 const UChar* code = guest_instr;
sewardjbbcf1882014-01-12 12:49:10 +000012835 /* Spot the 16-byte preamble:
12836 93CC0D8C ror x12, x12, #3
12837 93CC358C ror x12, x12, #13
12838 93CCCD8C ror x12, x12, #51
12839 93CCF58C ror x12, x12, #61
12840 */
12841 UInt word1 = 0x93CC0D8C;
12842 UInt word2 = 0x93CC358C;
12843 UInt word3 = 0x93CCCD8C;
12844 UInt word4 = 0x93CCF58C;
12845 if (getUIntLittleEndianly(code+ 0) == word1 &&
12846 getUIntLittleEndianly(code+ 4) == word2 &&
12847 getUIntLittleEndianly(code+ 8) == word3 &&
12848 getUIntLittleEndianly(code+12) == word4) {
12849 /* Got a "Special" instruction preamble. Which one is it? */
12850 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
12851 /* orr x10,x10,x10 */) {
12852 /* X3 = client_request ( X4 ) */
12853 DIP("x3 = client_request ( x4 )\n");
12854 putPC(mkU64( guest_PC_curr_instr + 20 ));
12855 dres->jk_StopHere = Ijk_ClientReq;
12856 dres->whatNext = Dis_StopHere;
12857 return True;
12858 }
12859 else
12860 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
12861 /* orr x11,x11,x11 */) {
12862 /* X3 = guest_NRADDR */
12863 DIP("x3 = guest_NRADDR\n");
12864 dres->len = 20;
12865 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
12866 return True;
12867 }
12868 else
12869 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
12870 /* orr x12,x12,x12 */) {
12871 /* branch-and-link-to-noredir X8 */
12872 DIP("branch-and-link-to-noredir x8\n");
12873 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
12874 putPC(getIReg64orZR(8));
12875 dres->jk_StopHere = Ijk_NoRedir;
12876 dres->whatNext = Dis_StopHere;
12877 return True;
12878 }
12879 else
12880 if (getUIntLittleEndianly(code+16) == 0xAA090129
12881 /* orr x9,x9,x9 */) {
12882 /* IR injection */
12883 DIP("IR injection\n");
12884 vex_inject_ir(irsb, Iend_LE);
12885 // Invalidate the current insn. The reason is that the IRop we're
12886 // injecting here can change. In which case the translation has to
12887 // be redone. For ease of handling, we simply invalidate all the
12888 // time.
sewardj05f5e012014-05-04 10:52:11 +000012889 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
12890 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
sewardjbbcf1882014-01-12 12:49:10 +000012891 putPC(mkU64( guest_PC_curr_instr + 20 ));
12892 dres->whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +000012893 dres->jk_StopHere = Ijk_InvalICache;
sewardjbbcf1882014-01-12 12:49:10 +000012894 return True;
12895 }
12896 /* We don't know what it is. */
12897 return False;
12898 /*NOTREACHED*/
12899 }
12900 }
12901
12902 /* ----------------------------------------------------------- */
12903
12904 /* Main ARM64 instruction decoder starts here. */
12905
12906 Bool ok = False;
12907
12908 /* insn[28:25] determines the top-level grouping, so let's start
12909 off with that.
12910
12911 For all of these dis_ARM64_ functions, we pass *dres with the
12912 normal default results "insn OK, 4 bytes long, keep decoding" so
12913 they don't need to change it. However, decodes of control-flow
12914 insns may cause *dres to change.
12915 */
12916 switch (INSN(28,25)) {
12917 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
12918 // Data processing - immediate
12919 ok = dis_ARM64_data_processing_immediate(dres, insn);
12920 break;
12921 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
12922 // Branch, exception generation and system instructions
sewardj65902992014-05-03 21:20:56 +000012923 ok = dis_ARM64_branch_etc(dres, insn, archinfo);
sewardjbbcf1882014-01-12 12:49:10 +000012924 break;
12925 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
12926 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
12927 // Loads and stores
12928 ok = dis_ARM64_load_store(dres, insn);
12929 break;
12930 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
12931 // Data processing - register
12932 ok = dis_ARM64_data_processing_register(dres, insn);
12933 break;
12934 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
12935 // Data processing - SIMD and floating point
12936 ok = dis_ARM64_simd_and_fp(dres, insn);
12937 break;
12938 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
12939 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
12940 // UNALLOCATED
12941 break;
12942 default:
12943 vassert(0); /* Can't happen */
12944 }
12945
12946 /* If the next-level down decoders failed, make sure |dres| didn't
12947 get changed. */
12948 if (!ok) {
12949 vassert(dres->whatNext == Dis_Continue);
12950 vassert(dres->len == 4);
12951 vassert(dres->continueAt == 0);
12952 vassert(dres->jk_StopHere == Ijk_INVALID);
12953 }
12954
12955 return ok;
12956
12957# undef INSN
12958}
12959
12960
12961/*------------------------------------------------------------*/
12962/*--- Top-level fn ---*/
12963/*------------------------------------------------------------*/
12964
12965/* Disassemble a single instruction into IR. The instruction
12966 is located in host memory at &guest_code[delta]. */
12967
12968DisResult disInstr_ARM64 ( IRSB* irsb_IN,
12969 Bool (*resteerOkFn) ( void*, Addr64 ),
12970 Bool resteerCisOk,
12971 void* callback_opaque,
florian8462d112014-09-24 15:18:09 +000012972 const UChar* guest_code_IN,
sewardjbbcf1882014-01-12 12:49:10 +000012973 Long delta_IN,
12974 Addr64 guest_IP,
12975 VexArch guest_arch,
12976 VexArchInfo* archinfo,
12977 VexAbiInfo* abiinfo,
sewardj9b769162014-07-24 12:42:03 +000012978 VexEndness host_endness_IN,
sewardjbbcf1882014-01-12 12:49:10 +000012979 Bool sigill_diag_IN )
12980{
12981 DisResult dres;
12982 vex_bzero(&dres, sizeof(dres));
12983
12984 /* Set globals (see top of this file) */
12985 vassert(guest_arch == VexArchARM64);
12986
12987 irsb = irsb_IN;
sewardj9b769162014-07-24 12:42:03 +000012988 host_endness = host_endness_IN;
sewardjbbcf1882014-01-12 12:49:10 +000012989 guest_PC_curr_instr = (Addr64)guest_IP;
12990
sewardj65902992014-05-03 21:20:56 +000012991 /* Sanity checks */
12992 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
12993 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
12994 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
12995
sewardjbbcf1882014-01-12 12:49:10 +000012996 /* Try to decode */
12997 Bool ok = disInstr_ARM64_WRK( &dres,
12998 resteerOkFn, resteerCisOk, callback_opaque,
florian8462d112014-09-24 15:18:09 +000012999 &guest_code_IN[delta_IN],
sewardjbbcf1882014-01-12 12:49:10 +000013000 archinfo, abiinfo );
13001 if (ok) {
13002 /* All decode successes end up here. */
sewardjdc9259c2014-02-27 11:10:19 +000013003 vassert(dres.len == 4 || dres.len == 20);
sewardjbbcf1882014-01-12 12:49:10 +000013004 switch (dres.whatNext) {
13005 case Dis_Continue:
13006 putPC( mkU64(dres.len + guest_PC_curr_instr) );
13007 break;
13008 case Dis_ResteerU:
13009 case Dis_ResteerC:
13010 putPC(mkU64(dres.continueAt));
13011 break;
13012 case Dis_StopHere:
13013 break;
13014 default:
13015 vassert(0);
13016 }
13017 DIP("\n");
13018 } else {
13019 /* All decode failures end up here. */
13020 if (sigill_diag_IN) {
13021 Int i, j;
13022 UChar buf[64];
13023 UInt insn
florian8462d112014-09-24 15:18:09 +000013024 = getUIntLittleEndianly( &guest_code_IN[delta_IN] );
sewardjbbcf1882014-01-12 12:49:10 +000013025 vex_bzero(buf, sizeof(buf));
13026 for (i = j = 0; i < 32; i++) {
13027 if (i > 0) {
13028 if ((i & 7) == 0) buf[j++] = ' ';
13029 else if ((i & 3) == 0) buf[j++] = '\'';
13030 }
13031 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
13032 }
13033 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
13034 vex_printf("disInstr(arm64): %s\n", buf);
13035 }
13036
13037 /* Tell the dispatcher that this insn cannot be decoded, and so
13038 has not been executed, and (is currently) the next to be
13039 executed. PC should be up-to-date since it is made so at the
13040 start of each insn, but nevertheless be paranoid and update
13041 it again right now. */
13042 putPC( mkU64(guest_PC_curr_instr) );
sewardjbbcf1882014-01-12 12:49:10 +000013043 dres.len = 0;
philippe2faf5912014-08-11 22:45:47 +000013044 dres.whatNext = Dis_StopHere;
sewardjbbcf1882014-01-12 12:49:10 +000013045 dres.jk_StopHere = Ijk_NoDecode;
philippe2faf5912014-08-11 22:45:47 +000013046 dres.continueAt = 0;
sewardjbbcf1882014-01-12 12:49:10 +000013047 }
13048 return dres;
13049}
13050
sewardjecde6972014-02-05 11:01:19 +000013051
sewardjbbcf1882014-01-12 12:49:10 +000013052/*--------------------------------------------------------------------*/
13053/*--- end guest_arm64_toIR.c ---*/
13054/*--------------------------------------------------------------------*/