blob: e9ad82af8eb198e6953f36a1341455e01091222f [file] [log] [blame]
sewardjbbcf1882014-01-12 12:49:10 +00001/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- begin guest_arm64_toIR.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2013-2013 OpenWorks
12 info@open-works.net
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
32//ZZ /* XXXX thumb to check:
33//ZZ that all cases where putIRegT writes r15, we generate a jump.
34//ZZ
35//ZZ All uses of newTemp assign to an IRTemp and not a UInt
36//ZZ
37//ZZ For all thumb loads and stores, including VFP ones, new-ITSTATE is
38//ZZ backed out before the memory op, and restored afterwards. This
39//ZZ needs to happen even after we go uncond. (and for sure it doesn't
40//ZZ happen for VFP loads/stores right now).
41//ZZ
42//ZZ VFP on thumb: check that we exclude all r13/r15 cases that we
43//ZZ should.
44//ZZ
45//ZZ XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
46//ZZ taking into account the number of insns guarded by an IT.
47//ZZ
48//ZZ remove the nasty hack, in the spechelper, of looking for Or32(...,
49//ZZ 0xE0) in as the first arg to armg_calculate_condition, and instead
50//ZZ use Slice44 as specified in comments in the spechelper.
51//ZZ
52//ZZ add specialisations for armg_calculate_flag_c and _v, as they
53//ZZ are moderately often needed in Thumb code.
54//ZZ
55//ZZ Correctness: ITSTATE handling in Thumb SVCs is wrong.
56//ZZ
57//ZZ Correctness (obscure): in m_transtab, when invalidating code
58//ZZ address ranges, invalidate up to 18 bytes after the end of the
59//ZZ range. This is because the ITSTATE optimisation at the top of
60//ZZ _THUMB_WRK below analyses up to 18 bytes before the start of any
61//ZZ given instruction, and so might depend on the invalidated area.
62//ZZ */
63//ZZ
64//ZZ /* Limitations, etc
65//ZZ
66//ZZ - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
67//ZZ These instructions are non-restartable in the case where the
68//ZZ transfer(s) fault.
69//ZZ
70//ZZ - SWP: the restart jump back is Ijk_Boring; it should be
71//ZZ Ijk_NoRedir but that's expensive. See comments on casLE() in
72//ZZ guest_x86_toIR.c.
73//ZZ */
74
75/* "Special" instructions.
76
77 This instruction decoder can decode four special instructions
78 which mean nothing natively (are no-ops as far as regs/mem are
79 concerned) but have meaning for supporting Valgrind. A special
80 instruction is flagged by a 16-byte preamble:
81
82 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
83 (ror x12, x12, #3; ror x12, x12, #13
84 ror x12, x12, #51; ror x12, x12, #61)
85
86 Following that, one of the following 3 are allowed
87 (standard interpretation in parentheses):
88
89 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
90 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
91 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
92 AA090129 (orr x9,x9,x9) IR injection
93
94 Any other bytes following the 16-byte preamble are illegal and
95 constitute a failure in instruction decoding. This all assumes
96 that the preamble will never occur except in specific code
97 fragments designed for Valgrind to catch.
98*/
99
100/* Translates ARM64 code to IR. */
101
102#include "libvex_basictypes.h"
103#include "libvex_ir.h"
104#include "libvex.h"
105#include "libvex_guest_arm64.h"
106
107#include "main_util.h"
108#include "main_globals.h"
109#include "guest_generic_bb_to_IR.h"
110#include "guest_arm64_defs.h"
111
112
113/*------------------------------------------------------------*/
114/*--- Globals ---*/
115/*------------------------------------------------------------*/
116
117/* These are set at the start of the translation of a instruction, so
118 that we don't have to pass them around endlessly. CONST means does
119 not change during translation of the instruction.
120*/
121
122/* CONST: is the host bigendian? We need to know this in order to do
123 sub-register accesses to the SIMD/FP registers correctly. */
124static Bool host_is_bigendian;
125
126/* CONST: The guest address for the instruction currently being
127 translated. */
128static Addr64 guest_PC_curr_instr;
129
130/* MOD: The IRSB* into which we're generating code. */
131static IRSB* irsb;
132
133
134/*------------------------------------------------------------*/
135/*--- Debugging output ---*/
136/*------------------------------------------------------------*/
137
138#define DIP(format, args...) \
139 if (vex_traceflags & VEX_TRACE_FE) \
140 vex_printf(format, ## args)
141
142#define DIS(buf, format, args...) \
143 if (vex_traceflags & VEX_TRACE_FE) \
144 vex_sprintf(buf, format, ## args)
145
146
147/*------------------------------------------------------------*/
148/*--- Helper bits and pieces for deconstructing the ---*/
149/*--- arm insn stream. ---*/
150/*------------------------------------------------------------*/
151
152/* Do a little-endian load of a 32-bit word, regardless of the
153 endianness of the underlying host. */
154static inline UInt getUIntLittleEndianly ( UChar* p )
155{
156 UInt w = 0;
157 w = (w << 8) | p[3];
158 w = (w << 8) | p[2];
159 w = (w << 8) | p[1];
160 w = (w << 8) | p[0];
161 return w;
162}
163
164/* Sign extend a N-bit value up to 64 bits, by copying
165 bit N-1 into all higher positions. */
166static ULong sx_to_64 ( ULong x, UInt n )
167{
168 vassert(n > 1 && n < 64);
169 Long r = (Long)x;
170 r = (r << (64-n)) >> (64-n);
171 return (ULong)r;
172}
173
174//ZZ /* Do a little-endian load of a 16-bit word, regardless of the
175//ZZ endianness of the underlying host. */
176//ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
177//ZZ {
178//ZZ UShort w = 0;
179//ZZ w = (w << 8) | p[1];
180//ZZ w = (w << 8) | p[0];
181//ZZ return w;
182//ZZ }
183//ZZ
184//ZZ static UInt ROR32 ( UInt x, UInt sh ) {
185//ZZ vassert(sh >= 0 && sh < 32);
186//ZZ if (sh == 0)
187//ZZ return x;
188//ZZ else
189//ZZ return (x << (32-sh)) | (x >> sh);
190//ZZ }
191//ZZ
192//ZZ static Int popcount32 ( UInt x )
193//ZZ {
194//ZZ Int res = 0, i;
195//ZZ for (i = 0; i < 32; i++) {
196//ZZ res += (x & 1);
197//ZZ x >>= 1;
198//ZZ }
199//ZZ return res;
200//ZZ }
201//ZZ
202//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
203//ZZ {
204//ZZ UInt mask = 1 << ix;
205//ZZ x &= ~mask;
206//ZZ x |= ((b << ix) & mask);
207//ZZ return x;
208//ZZ }
209
210#define BITS2(_b1,_b0) \
211 (((_b1) << 1) | (_b0))
212
213#define BITS3(_b2,_b1,_b0) \
214 (((_b2) << 2) | ((_b1) << 1) | (_b0))
215
216#define BITS4(_b3,_b2,_b1,_b0) \
217 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
218
219#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
220 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
221 | BITS4((_b3),(_b2),(_b1),(_b0)))
222
223#define BITS5(_b4,_b3,_b2,_b1,_b0) \
224 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
225#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
226 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
227#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
228 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
229
230#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
231 (((_b8) << 8) \
232 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
233
234#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
235 (((_b9) << 9) | ((_b8) << 8) \
236 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
237
238#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
239 (((_b10) << 10) \
240 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
241
sewardjdc9259c2014-02-27 11:10:19 +0000242#define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
243 (((_b11) << 11) \
244 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
245
sewardjbbcf1882014-01-12 12:49:10 +0000246// produces _uint[_bMax:_bMin]
247#define SLICE_UInt(_uint,_bMax,_bMin) \
248 (( ((UInt)(_uint)) >> (_bMin)) \
249 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
250
251
252/*------------------------------------------------------------*/
253/*--- Helper bits and pieces for creating IR fragments. ---*/
254/*------------------------------------------------------------*/
255
256static IRExpr* mkV128 ( UShort w )
257{
258 return IRExpr_Const(IRConst_V128(w));
259}
260
261static IRExpr* mkU64 ( ULong i )
262{
263 return IRExpr_Const(IRConst_U64(i));
264}
265
266static IRExpr* mkU32 ( UInt i )
267{
268 return IRExpr_Const(IRConst_U32(i));
269}
270
271static IRExpr* mkU8 ( UInt i )
272{
273 vassert(i < 256);
274 return IRExpr_Const(IRConst_U8( (UChar)i ));
275}
276
277static IRExpr* mkexpr ( IRTemp tmp )
278{
279 return IRExpr_RdTmp(tmp);
280}
281
282static IRExpr* unop ( IROp op, IRExpr* a )
283{
284 return IRExpr_Unop(op, a);
285}
286
287static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
288{
289 return IRExpr_Binop(op, a1, a2);
290}
291
292static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
293{
294 return IRExpr_Triop(op, a1, a2, a3);
295}
296
297static IRExpr* loadLE ( IRType ty, IRExpr* addr )
298{
299 return IRExpr_Load(Iend_LE, ty, addr);
300}
301
302/* Add a statement to the list held by "irbb". */
303static void stmt ( IRStmt* st )
304{
305 addStmtToIRSB( irsb, st );
306}
307
308static void assign ( IRTemp dst, IRExpr* e )
309{
310 stmt( IRStmt_WrTmp(dst, e) );
311}
312
313static void storeLE ( IRExpr* addr, IRExpr* data )
314{
315 stmt( IRStmt_Store(Iend_LE, addr, data) );
316}
317
318//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
319//ZZ {
320//ZZ if (guardT == IRTemp_INVALID) {
321//ZZ /* unconditional */
322//ZZ storeLE(addr, data);
323//ZZ } else {
324//ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
325//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
326//ZZ }
327//ZZ }
328//ZZ
329//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
330//ZZ IRExpr* addr, IRExpr* alt,
331//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
332//ZZ {
333//ZZ if (guardT == IRTemp_INVALID) {
334//ZZ /* unconditional */
335//ZZ IRExpr* loaded = NULL;
336//ZZ switch (cvt) {
337//ZZ case ILGop_Ident32:
338//ZZ loaded = loadLE(Ity_I32, addr); break;
339//ZZ case ILGop_8Uto32:
340//ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
341//ZZ case ILGop_8Sto32:
342//ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
343//ZZ case ILGop_16Uto32:
344//ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
345//ZZ case ILGop_16Sto32:
346//ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
347//ZZ default:
348//ZZ vassert(0);
349//ZZ }
350//ZZ vassert(loaded != NULL);
351//ZZ assign(dst, loaded);
352//ZZ } else {
353//ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
354//ZZ loaded data before putting the data in 'dst'. If the load
355//ZZ does not take place, 'alt' is placed directly in 'dst'. */
356//ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
357//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
358//ZZ }
359//ZZ }
360
361/* Generate a new temporary of the given type. */
362static IRTemp newTemp ( IRType ty )
363{
364 vassert(isPlausibleIRType(ty));
365 return newIRTemp( irsb->tyenv, ty );
366}
367
368//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
369//ZZ IRRoundingMode. */
370//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
371//ZZ {
372//ZZ return mkU32(Irrm_NEAREST);
373//ZZ }
374//ZZ
375//ZZ /* Generate an expression for SRC rotated right by ROT. */
376//ZZ static IRExpr* genROR32( IRTemp src, Int rot )
377//ZZ {
378//ZZ vassert(rot >= 0 && rot < 32);
379//ZZ if (rot == 0)
380//ZZ return mkexpr(src);
381//ZZ return
382//ZZ binop(Iop_Or32,
383//ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
384//ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
385//ZZ }
386//ZZ
387//ZZ static IRExpr* mkU128 ( ULong i )
388//ZZ {
389//ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
390//ZZ }
391//ZZ
392//ZZ /* Generate a 4-aligned version of the given expression if
393//ZZ the given condition is true. Else return it unchanged. */
394//ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
395//ZZ {
396//ZZ if (b)
397//ZZ return binop(Iop_And32, e, mkU32(~3));
398//ZZ else
399//ZZ return e;
400//ZZ }
401
402/* Other IR construction helpers. */
403static IROp mkAND ( IRType ty ) {
404 switch (ty) {
405 case Ity_I32: return Iop_And32;
406 case Ity_I64: return Iop_And64;
407 default: vpanic("mkAND");
408 }
409}
410
411static IROp mkOR ( IRType ty ) {
412 switch (ty) {
413 case Ity_I32: return Iop_Or32;
414 case Ity_I64: return Iop_Or64;
415 default: vpanic("mkOR");
416 }
417}
418
419static IROp mkXOR ( IRType ty ) {
420 switch (ty) {
421 case Ity_I32: return Iop_Xor32;
422 case Ity_I64: return Iop_Xor64;
423 default: vpanic("mkXOR");
424 }
425}
426
427static IROp mkSHL ( IRType ty ) {
428 switch (ty) {
429 case Ity_I32: return Iop_Shl32;
430 case Ity_I64: return Iop_Shl64;
431 default: vpanic("mkSHL");
432 }
433}
434
435static IROp mkSHR ( IRType ty ) {
436 switch (ty) {
437 case Ity_I32: return Iop_Shr32;
438 case Ity_I64: return Iop_Shr64;
439 default: vpanic("mkSHR");
440 }
441}
442
443static IROp mkSAR ( IRType ty ) {
444 switch (ty) {
445 case Ity_I32: return Iop_Sar32;
446 case Ity_I64: return Iop_Sar64;
447 default: vpanic("mkSAR");
448 }
449}
450
451static IROp mkNOT ( IRType ty ) {
452 switch (ty) {
453 case Ity_I32: return Iop_Not32;
454 case Ity_I64: return Iop_Not64;
455 default: vpanic("mkNOT");
456 }
457}
458
459static IROp mkADD ( IRType ty ) {
460 switch (ty) {
461 case Ity_I32: return Iop_Add32;
462 case Ity_I64: return Iop_Add64;
463 default: vpanic("mkADD");
464 }
465}
466
467static IROp mkSUB ( IRType ty ) {
468 switch (ty) {
469 case Ity_I32: return Iop_Sub32;
470 case Ity_I64: return Iop_Sub64;
471 default: vpanic("mkSUB");
472 }
473}
474
475static IROp mkADDF ( IRType ty ) {
476 switch (ty) {
477 case Ity_F32: return Iop_AddF32;
478 case Ity_F64: return Iop_AddF64;
479 default: vpanic("mkADDF");
480 }
481}
482
483static IROp mkSUBF ( IRType ty ) {
484 switch (ty) {
485 case Ity_F32: return Iop_SubF32;
486 case Ity_F64: return Iop_SubF64;
487 default: vpanic("mkSUBF");
488 }
489}
490
491static IROp mkMULF ( IRType ty ) {
492 switch (ty) {
493 case Ity_F32: return Iop_MulF32;
494 case Ity_F64: return Iop_MulF64;
495 default: vpanic("mkMULF");
496 }
497}
498
499static IROp mkDIVF ( IRType ty ) {
500 switch (ty) {
501 case Ity_F32: return Iop_DivF32;
502 case Ity_F64: return Iop_DivF64;
503 default: vpanic("mkMULF");
504 }
505}
506
507static IROp mkNEGF ( IRType ty ) {
508 switch (ty) {
509 case Ity_F32: return Iop_NegF32;
510 case Ity_F64: return Iop_NegF64;
511 default: vpanic("mkNEGF");
512 }
513}
514
515static IROp mkABSF ( IRType ty ) {
516 switch (ty) {
517 case Ity_F32: return Iop_AbsF32;
518 case Ity_F64: return Iop_AbsF64;
519 default: vpanic("mkNEGF");
520 }
521}
522
523static IROp mkSQRTF ( IRType ty ) {
524 switch (ty) {
525 case Ity_F32: return Iop_SqrtF32;
526 case Ity_F64: return Iop_SqrtF64;
527 default: vpanic("mkNEGF");
528 }
529}
530
531static IRExpr* mkU ( IRType ty, ULong imm ) {
532 switch (ty) {
533 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
534 case Ity_I64: return mkU64(imm);
535 default: vpanic("mkU");
536 }
537}
538
539/* Generate IR to create 'arg rotated right by imm', for sane values
540 of 'ty' and 'imm'. */
541static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
542{
543 UInt w = 0;
544 if (ty == Ity_I64) {
545 w = 64;
546 } else {
547 vassert(ty == Ity_I32);
548 w = 32;
549 }
550 vassert(w != 0);
551 vassert(imm < w);
552 if (imm == 0) {
553 return arg;
554 }
555 IRTemp res = newTemp(ty);
556 assign(res, binop(mkOR(ty),
557 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
558 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
559 return res;
560}
561
562/* Generate IR to set the returned temp to either all-zeroes or
563 all ones, as a copy of arg<imm>. */
564static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
565{
566 UInt w = 0;
567 if (ty == Ity_I64) {
568 w = 64;
569 } else {
570 vassert(ty == Ity_I32);
571 w = 32;
572 }
573 vassert(w != 0);
574 vassert(imm < w);
575 IRTemp res = newTemp(ty);
576 assign(res, binop(mkSAR(ty),
577 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
578 mkU8(w - 1)));
579 return res;
580}
581
sewardj7d009132014-02-20 17:43:38 +0000582/* U-widen 8/16/32/64 bit int expr to 64. */
583static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
584{
585 switch (srcTy) {
586 case Ity_I64: return e;
587 case Ity_I32: return unop(Iop_32Uto64, e);
588 case Ity_I16: return unop(Iop_16Uto64, e);
589 case Ity_I8: return unop(Iop_8Uto64, e);
590 default: vpanic("widenUto64(arm64)");
591 }
592}
593
594/* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
595 of these combinations make sense. */
596static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
597{
598 switch (dstTy) {
599 case Ity_I64: return e;
600 case Ity_I32: return unop(Iop_64to32, e);
601 case Ity_I16: return unop(Iop_64to16, e);
602 case Ity_I8: return unop(Iop_64to8, e);
603 default: vpanic("narrowFrom64(arm64)");
604 }
605}
606
sewardjbbcf1882014-01-12 12:49:10 +0000607
608/*------------------------------------------------------------*/
609/*--- Helpers for accessing guest registers. ---*/
610/*------------------------------------------------------------*/
611
612#define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
613#define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
614#define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
615#define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
616#define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
617#define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
618#define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
619#define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
620#define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
621#define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
622#define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
623#define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
624#define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
625#define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
626#define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
627#define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
628#define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
629#define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
630#define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
631#define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
632#define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
633#define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
634#define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
635#define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
636#define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
637#define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
638#define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
639#define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
640#define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
641#define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
642#define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
643
sewardj60687882014-01-15 10:25:21 +0000644#define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
sewardjbbcf1882014-01-12 12:49:10 +0000645#define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
646
647#define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
648#define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
649#define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
650#define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
651
652#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
653#define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
654
655#define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
656#define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
657#define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
658#define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
659#define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
660#define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
661#define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
662#define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
663#define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
664#define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
665#define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
666#define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
667#define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
668#define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
669#define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
670#define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
671#define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
672#define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
673#define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
674#define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
675#define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
676#define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
677#define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
678#define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
679#define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
680#define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
681#define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
682#define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
683#define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
684#define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
685#define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
686#define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
687
688#define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
689#define OFFB_FPSR offsetof(VexGuestARM64State,guest_FPSR)
690//ZZ #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
691//ZZ #define OFFB_ITSTATE offsetof(VexGuestARMState,guest_ITSTATE)
692//ZZ #define OFFB_QFLAG32 offsetof(VexGuestARMState,guest_QFLAG32)
693//ZZ #define OFFB_GEFLAG0 offsetof(VexGuestARMState,guest_GEFLAG0)
694//ZZ #define OFFB_GEFLAG1 offsetof(VexGuestARMState,guest_GEFLAG1)
695//ZZ #define OFFB_GEFLAG2 offsetof(VexGuestARMState,guest_GEFLAG2)
696//ZZ #define OFFB_GEFLAG3 offsetof(VexGuestARMState,guest_GEFLAG3)
697
698#define OFFB_TISTART offsetof(VexGuestARM64State,guest_TISTART)
699#define OFFB_TILEN offsetof(VexGuestARM64State,guest_TILEN)
700
701
702/* ---------------- Integer registers ---------------- */
703
704static Int offsetIReg64 ( UInt iregNo )
705{
706 /* Do we care about endianness here? We do if sub-parts of integer
707 registers are accessed. */
708 switch (iregNo) {
709 case 0: return OFFB_X0;
710 case 1: return OFFB_X1;
711 case 2: return OFFB_X2;
712 case 3: return OFFB_X3;
713 case 4: return OFFB_X4;
714 case 5: return OFFB_X5;
715 case 6: return OFFB_X6;
716 case 7: return OFFB_X7;
717 case 8: return OFFB_X8;
718 case 9: return OFFB_X9;
719 case 10: return OFFB_X10;
720 case 11: return OFFB_X11;
721 case 12: return OFFB_X12;
722 case 13: return OFFB_X13;
723 case 14: return OFFB_X14;
724 case 15: return OFFB_X15;
725 case 16: return OFFB_X16;
726 case 17: return OFFB_X17;
727 case 18: return OFFB_X18;
728 case 19: return OFFB_X19;
729 case 20: return OFFB_X20;
730 case 21: return OFFB_X21;
731 case 22: return OFFB_X22;
732 case 23: return OFFB_X23;
733 case 24: return OFFB_X24;
734 case 25: return OFFB_X25;
735 case 26: return OFFB_X26;
736 case 27: return OFFB_X27;
737 case 28: return OFFB_X28;
738 case 29: return OFFB_X29;
739 case 30: return OFFB_X30;
740 /* but not 31 */
741 default: vassert(0);
742 }
743}
744
745static Int offsetIReg64orSP ( UInt iregNo )
746{
sewardj60687882014-01-15 10:25:21 +0000747 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
sewardjbbcf1882014-01-12 12:49:10 +0000748}
749
750static const HChar* nameIReg64orZR ( UInt iregNo )
751{
752 vassert(iregNo < 32);
753 static const HChar* names[32]
754 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
755 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
756 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
757 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
758 return names[iregNo];
759}
760
761static const HChar* nameIReg64orSP ( UInt iregNo )
762{
763 if (iregNo == 31) {
764 return "sp";
765 }
766 vassert(iregNo < 31);
767 return nameIReg64orZR(iregNo);
768}
769
770static IRExpr* getIReg64orSP ( UInt iregNo )
771{
772 vassert(iregNo < 32);
773 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
774}
775
776static IRExpr* getIReg64orZR ( UInt iregNo )
777{
778 if (iregNo == 31) {
779 return mkU64(0);
780 }
781 vassert(iregNo < 31);
782 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
783}
784
785static void putIReg64orSP ( UInt iregNo, IRExpr* e )
786{
787 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
788 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
789}
790
791static void putIReg64orZR ( UInt iregNo, IRExpr* e )
792{
793 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
794 if (iregNo == 31) {
795 return;
796 }
797 vassert(iregNo < 31);
798 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
799}
800
801static const HChar* nameIReg32orZR ( UInt iregNo )
802{
803 vassert(iregNo < 32);
804 static const HChar* names[32]
805 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
806 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
807 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
808 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
809 return names[iregNo];
810}
811
812static const HChar* nameIReg32orSP ( UInt iregNo )
813{
814 if (iregNo == 31) {
815 return "wsp";
816 }
817 vassert(iregNo < 31);
818 return nameIReg32orZR(iregNo);
819}
820
821static IRExpr* getIReg32orSP ( UInt iregNo )
822{
823 vassert(iregNo < 32);
824 return unop(Iop_64to32,
825 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
826}
827
828static IRExpr* getIReg32orZR ( UInt iregNo )
829{
830 if (iregNo == 31) {
831 return mkU32(0);
832 }
833 vassert(iregNo < 31);
834 return unop(Iop_64to32,
835 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
836}
837
838static void putIReg32orSP ( UInt iregNo, IRExpr* e )
839{
840 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
841 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
842}
843
844static void putIReg32orZR ( UInt iregNo, IRExpr* e )
845{
846 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
847 if (iregNo == 31) {
848 return;
849 }
850 vassert(iregNo < 31);
851 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
852}
853
854static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
855{
856 vassert(is64 == True || is64 == False);
857 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
858}
859
860static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
861{
862 vassert(is64 == True || is64 == False);
863 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
864}
865
866static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
867{
868 vassert(is64 == True || is64 == False);
869 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
870}
871
872static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
873{
874 vassert(is64 == True || is64 == False);
875 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
876}
877
878static void putPC ( IRExpr* e )
879{
880 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
881 stmt( IRStmt_Put(OFFB_PC, e) );
882}
883
884
885/* ---------------- Vector (Q) registers ---------------- */
886
887static Int offsetQReg128 ( UInt qregNo )
888{
889 /* We don't care about endianness at this point. It only becomes
890 relevant when dealing with sections of these registers.*/
891 switch (qregNo) {
892 case 0: return OFFB_Q0;
893 case 1: return OFFB_Q1;
894 case 2: return OFFB_Q2;
895 case 3: return OFFB_Q3;
896 case 4: return OFFB_Q4;
897 case 5: return OFFB_Q5;
898 case 6: return OFFB_Q6;
899 case 7: return OFFB_Q7;
900 case 8: return OFFB_Q8;
901 case 9: return OFFB_Q9;
902 case 10: return OFFB_Q10;
903 case 11: return OFFB_Q11;
904 case 12: return OFFB_Q12;
905 case 13: return OFFB_Q13;
906 case 14: return OFFB_Q14;
907 case 15: return OFFB_Q15;
908 case 16: return OFFB_Q16;
909 case 17: return OFFB_Q17;
910 case 18: return OFFB_Q18;
911 case 19: return OFFB_Q19;
912 case 20: return OFFB_Q20;
913 case 21: return OFFB_Q21;
914 case 22: return OFFB_Q22;
915 case 23: return OFFB_Q23;
916 case 24: return OFFB_Q24;
917 case 25: return OFFB_Q25;
918 case 26: return OFFB_Q26;
919 case 27: return OFFB_Q27;
920 case 28: return OFFB_Q28;
921 case 29: return OFFB_Q29;
922 case 30: return OFFB_Q30;
923 case 31: return OFFB_Q31;
924 default: vassert(0);
925 }
926}
927
sewardjbbcf1882014-01-12 12:49:10 +0000928/* Write to a complete Qreg. */
929static void putQReg128 ( UInt qregNo, IRExpr* e )
930{
931 vassert(qregNo < 32);
932 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
933 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
934}
935
936/* Read a complete Qreg. */
937static IRExpr* getQReg128 ( UInt qregNo )
938{
939 vassert(qregNo < 32);
940 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
941}
942
943/* Produce the IR type for some sub-part of a vector. For 32- and 64-
944 bit sub-parts we can choose either integer or float types, and
945 choose float on the basis that that is the common use case and so
946 will give least interference with Put-to-Get forwarding later
947 on. */
948static IRType preferredVectorSubTypeFromSize ( UInt szB )
949{
950 switch (szB) {
951 case 1: return Ity_I8;
952 case 2: return Ity_I16;
953 case 4: return Ity_I32; //Ity_F32;
954 case 8: return Ity_F64;
955 case 16: return Ity_V128;
956 default: vassert(0);
957 }
958}
959
sewardj606c4ba2014-01-26 19:11:14 +0000960/* Find the offset of the laneNo'th lane of type laneTy in the given
961 Qreg. Since the host is little-endian, the least significant lane
962 has the lowest offset. */
963static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
sewardjbbcf1882014-01-12 12:49:10 +0000964{
965 vassert(!host_is_bigendian);
966 Int base = offsetQReg128(qregNo);
sewardj606c4ba2014-01-26 19:11:14 +0000967 /* Since the host is little-endian, the least significant lane
968 will be at the lowest address. */
969 /* Restrict this to known types, so as to avoid silently accepting
970 stupid types. */
971 UInt laneSzB = 0;
972 switch (laneTy) {
sewardj5860ec72014-03-01 11:19:45 +0000973 case Ity_I8: laneSzB = 1; break;
974 case Ity_I16: laneSzB = 2; break;
sewardj606c4ba2014-01-26 19:11:14 +0000975 case Ity_F32: case Ity_I32: laneSzB = 4; break;
976 case Ity_F64: case Ity_I64: laneSzB = 8; break;
977 case Ity_V128: laneSzB = 16; break;
978 default: break;
sewardjbbcf1882014-01-12 12:49:10 +0000979 }
sewardj606c4ba2014-01-26 19:11:14 +0000980 vassert(laneSzB > 0);
981 UInt minOff = laneNo * laneSzB;
982 UInt maxOff = minOff + laneSzB - 1;
983 vassert(maxOff < 16);
984 return base + minOff;
sewardjbbcf1882014-01-12 12:49:10 +0000985}
986
sewardj606c4ba2014-01-26 19:11:14 +0000987/* Put to the least significant lane of a Qreg. */
988static void putQRegLO ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +0000989{
990 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +0000991 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +0000992 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +0000993 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
994 case Ity_F32: case Ity_F64: case Ity_V128:
995 break;
996 default:
997 vassert(0); // Other cases are probably invalid
sewardjbbcf1882014-01-12 12:49:10 +0000998 }
999 stmt(IRStmt_Put(off, e));
1000}
1001
sewardj606c4ba2014-01-26 19:11:14 +00001002/* Get from the least significant lane of a Qreg. */
1003static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
sewardjbbcf1882014-01-12 12:49:10 +00001004{
sewardj606c4ba2014-01-26 19:11:14 +00001005 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +00001006 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001007 case Ity_I32: case Ity_I64:
1008 case Ity_F32: case Ity_F64: case Ity_V128:
1009 break;
1010 default:
1011 vassert(0); // Other cases are ATC
sewardjbbcf1882014-01-12 12:49:10 +00001012 }
1013 return IRExpr_Get(off, ty);
1014}
1015
sewardj606c4ba2014-01-26 19:11:14 +00001016static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
sewardjbbcf1882014-01-12 12:49:10 +00001017{
1018 static const HChar* namesQ[32]
1019 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1020 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1021 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1022 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1023 static const HChar* namesD[32]
1024 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1025 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1026 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1027 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1028 static const HChar* namesS[32]
1029 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1030 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1031 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1032 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1033 static const HChar* namesH[32]
1034 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1035 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1036 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1037 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1038 static const HChar* namesB[32]
1039 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1040 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1041 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1042 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1043 vassert(qregNo < 32);
sewardj606c4ba2014-01-26 19:11:14 +00001044 switch (sizeofIRType(laneTy)) {
sewardjbbcf1882014-01-12 12:49:10 +00001045 case 1: return namesB[qregNo];
1046 case 2: return namesH[qregNo];
1047 case 4: return namesS[qregNo];
1048 case 8: return namesD[qregNo];
1049 case 16: return namesQ[qregNo];
1050 default: vassert(0);
1051 }
1052 /*NOTREACHED*/
1053}
1054
sewardj606c4ba2014-01-26 19:11:14 +00001055static const HChar* nameQReg128 ( UInt qregNo )
1056{
1057 return nameQRegLO(qregNo, Ity_V128);
1058}
1059
sewardjbbcf1882014-01-12 12:49:10 +00001060/* Find the offset of the most significant half (8 bytes) of the given
1061 Qreg. This requires knowing the endianness of the host. */
sewardj606c4ba2014-01-26 19:11:14 +00001062static Int offsetQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001063{
sewardj606c4ba2014-01-26 19:11:14 +00001064 return offsetQRegLane(qregNo, Ity_I64, 1);
sewardjbbcf1882014-01-12 12:49:10 +00001065}
1066
sewardj606c4ba2014-01-26 19:11:14 +00001067static IRExpr* getQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001068{
sewardj606c4ba2014-01-26 19:11:14 +00001069 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
sewardjbbcf1882014-01-12 12:49:10 +00001070}
1071
sewardj606c4ba2014-01-26 19:11:14 +00001072static void putQRegHI64 ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +00001073{
1074 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +00001075 Int off = offsetQRegHI64(qregNo);
sewardjbbcf1882014-01-12 12:49:10 +00001076 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001077 case Ity_I64: case Ity_F64:
1078 break;
1079 default:
1080 vassert(0); // Other cases are plain wrong
sewardjbbcf1882014-01-12 12:49:10 +00001081 }
1082 stmt(IRStmt_Put(off, e));
1083}
1084
sewardj606c4ba2014-01-26 19:11:14 +00001085/* Put to a specified lane of a Qreg. */
1086static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1087{
1088 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1089 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1090 switch (laneTy) {
1091 case Ity_F64: case Ity_I64:
sewardj32d86752014-03-02 12:47:18 +00001092 case Ity_I32: case Ity_F32:
sewardj5860ec72014-03-01 11:19:45 +00001093 case Ity_I16:
1094 case Ity_I8:
sewardj606c4ba2014-01-26 19:11:14 +00001095 break;
1096 default:
1097 vassert(0); // Other cases are ATC
1098 }
1099 stmt(IRStmt_Put(off, e));
1100}
1101
sewardj32d86752014-03-02 12:47:18 +00001102/* Get from a specified lane of a Qreg. */
sewardj606c4ba2014-01-26 19:11:14 +00001103static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1104{
1105 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1106 switch (laneTy) {
sewardj32d86752014-03-02 12:47:18 +00001107 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
1108 case Ity_F64:
sewardj606c4ba2014-01-26 19:11:14 +00001109 break;
1110 default:
1111 vassert(0); // Other cases are ATC
1112 }
1113 return IRExpr_Get(off, laneTy);
1114}
1115
1116
sewardjbbcf1882014-01-12 12:49:10 +00001117//ZZ /* ---------------- Misc registers ---------------- */
1118//ZZ
1119//ZZ static void putMiscReg32 ( UInt gsoffset,
1120//ZZ IRExpr* e, /* :: Ity_I32 */
1121//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1122//ZZ {
1123//ZZ switch (gsoffset) {
1124//ZZ case OFFB_FPSCR: break;
1125//ZZ case OFFB_QFLAG32: break;
1126//ZZ case OFFB_GEFLAG0: break;
1127//ZZ case OFFB_GEFLAG1: break;
1128//ZZ case OFFB_GEFLAG2: break;
1129//ZZ case OFFB_GEFLAG3: break;
1130//ZZ default: vassert(0); /* awaiting more cases */
1131//ZZ }
1132//ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1133//ZZ
1134//ZZ if (guardT == IRTemp_INVALID) {
1135//ZZ /* unconditional write */
1136//ZZ stmt(IRStmt_Put(gsoffset, e));
1137//ZZ } else {
1138//ZZ stmt(IRStmt_Put(
1139//ZZ gsoffset,
1140//ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1141//ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1142//ZZ ));
1143//ZZ }
1144//ZZ }
1145//ZZ
1146//ZZ static IRTemp get_ITSTATE ( void )
1147//ZZ {
1148//ZZ ASSERT_IS_THUMB;
1149//ZZ IRTemp t = newTemp(Ity_I32);
1150//ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1151//ZZ return t;
1152//ZZ }
1153//ZZ
1154//ZZ static void put_ITSTATE ( IRTemp t )
1155//ZZ {
1156//ZZ ASSERT_IS_THUMB;
1157//ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1158//ZZ }
1159//ZZ
1160//ZZ static IRTemp get_QFLAG32 ( void )
1161//ZZ {
1162//ZZ IRTemp t = newTemp(Ity_I32);
1163//ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1164//ZZ return t;
1165//ZZ }
1166//ZZ
1167//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1168//ZZ {
1169//ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1170//ZZ }
1171//ZZ
1172//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1173//ZZ Status Register) to indicate that overflow or saturation occurred.
1174//ZZ Nb: t must be zero to denote no saturation, and any nonzero
1175//ZZ value to indicate saturation. */
1176//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1177//ZZ {
1178//ZZ IRTemp old = get_QFLAG32();
1179//ZZ IRTemp nyu = newTemp(Ity_I32);
1180//ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1181//ZZ put_QFLAG32(nyu, condT);
1182//ZZ }
1183
1184
1185/* ---------------- FPCR stuff ---------------- */
1186
1187/* Generate IR to get hold of the rounding mode bits in FPCR, and
1188 convert them to IR format. Bind the final result to the
1189 returned temp. */
1190static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1191{
1192 /* The ARMvfp encoding for rounding mode bits is:
1193 00 to nearest
1194 01 to +infinity
1195 10 to -infinity
1196 11 to zero
1197 We need to convert that to the IR encoding:
1198 00 to nearest (the default)
1199 10 to +infinity
1200 01 to -infinity
1201 11 to zero
1202 Which can be done by swapping bits 0 and 1.
1203 The rmode bits are at 23:22 in FPSCR.
1204 */
1205 IRTemp armEncd = newTemp(Ity_I32);
1206 IRTemp swapped = newTemp(Ity_I32);
1207 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1208 we don't zero out bits 24 and above, since the assignment to
1209 'swapped' will mask them out anyway. */
1210 assign(armEncd,
1211 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1212 /* Now swap them. */
1213 assign(swapped,
1214 binop(Iop_Or32,
1215 binop(Iop_And32,
1216 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1217 mkU32(2)),
1218 binop(Iop_And32,
1219 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1220 mkU32(1))
1221 ));
1222 return swapped;
1223}
1224
1225
1226/*------------------------------------------------------------*/
1227/*--- Helpers for flag handling and conditional insns ---*/
1228/*------------------------------------------------------------*/
1229
1230static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1231{
1232 switch (cond) {
1233 case ARM64CondEQ: return "eq";
1234 case ARM64CondNE: return "ne";
1235 case ARM64CondCS: return "cs"; // or 'hs'
1236 case ARM64CondCC: return "cc"; // or 'lo'
1237 case ARM64CondMI: return "mi";
1238 case ARM64CondPL: return "pl";
1239 case ARM64CondVS: return "vs";
1240 case ARM64CondVC: return "vc";
1241 case ARM64CondHI: return "hi";
1242 case ARM64CondLS: return "ls";
1243 case ARM64CondGE: return "ge";
1244 case ARM64CondLT: return "lt";
1245 case ARM64CondGT: return "gt";
1246 case ARM64CondLE: return "le";
1247 case ARM64CondAL: return "al";
1248 case ARM64CondNV: return "nv";
1249 default: vpanic("name_ARM64Condcode");
1250 }
1251}
1252
1253/* and a handy shorthand for it */
1254static const HChar* nameCC ( ARM64Condcode cond ) {
1255 return nameARM64Condcode(cond);
1256}
1257
1258
1259/* Build IR to calculate some particular condition from stored
1260 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1261 Ity_I64, suitable for narrowing. Although the return type is
1262 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1263 :: Ity_I64 and must denote the condition to compute in
1264 bits 7:4, and be zero everywhere else.
1265*/
1266static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1267{
1268 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1269 /* And 'cond' had better produce a value in which only bits 7:4 are
1270 nonzero. However, obviously we can't assert for that. */
1271
1272 /* So what we're constructing for the first argument is
1273 "(cond << 4) | stored-operation".
1274 However, as per comments above, 'cond' must be supplied
1275 pre-shifted to this function.
1276
1277 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1278 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1279 8 bits of the first argument. */
1280 IRExpr** args
1281 = mkIRExprVec_4(
1282 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1283 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1284 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1285 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1286 );
1287 IRExpr* call
1288 = mkIRExprCCall(
1289 Ity_I64,
1290 0/*regparm*/,
1291 "arm64g_calculate_condition", &arm64g_calculate_condition,
1292 args
1293 );
1294
1295 /* Exclude the requested condition, OP and NDEP from definedness
1296 checking. We're only interested in DEP1 and DEP2. */
1297 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1298 return call;
1299}
1300
1301
1302/* Build IR to calculate some particular condition from stored
1303 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1304 Ity_I64, suitable for narrowing. Although the return type is
1305 Ity_I64, the returned value is either 0 or 1.
1306*/
1307static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1308{
1309 /* First arg is "(cond << 4) | condition". This requires that the
1310 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1311 (COND, OP) pair in the lowest 8 bits of the first argument. */
1312 vassert(cond >= 0 && cond <= 15);
1313 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1314}
1315
1316
1317//ZZ /* Build IR to calculate just the carry flag from stored
1318//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1319//ZZ Ity_I32. */
1320//ZZ static IRExpr* mk_armg_calculate_flag_c ( void )
1321//ZZ {
1322//ZZ IRExpr** args
1323//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1324//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1325//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1326//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1327//ZZ IRExpr* call
1328//ZZ = mkIRExprCCall(
1329//ZZ Ity_I32,
1330//ZZ 0/*regparm*/,
1331//ZZ "armg_calculate_flag_c", &armg_calculate_flag_c,
1332//ZZ args
1333//ZZ );
1334//ZZ /* Exclude OP and NDEP from definedness checking. We're only
1335//ZZ interested in DEP1 and DEP2. */
1336//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1337//ZZ return call;
1338//ZZ }
1339//ZZ
1340//ZZ
1341//ZZ /* Build IR to calculate just the overflow flag from stored
1342//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1343//ZZ Ity_I32. */
1344//ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1345//ZZ {
1346//ZZ IRExpr** args
1347//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1348//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1349//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1350//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1351//ZZ IRExpr* call
1352//ZZ = mkIRExprCCall(
1353//ZZ Ity_I32,
1354//ZZ 0/*regparm*/,
1355//ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1356//ZZ args
1357//ZZ );
1358//ZZ /* Exclude OP and NDEP from definedness checking. We're only
1359//ZZ interested in DEP1 and DEP2. */
1360//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1361//ZZ return call;
1362//ZZ }
1363
1364
1365/* Build IR to calculate N Z C V in bits 31:28 of the
1366 returned word. */
1367static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1368{
1369 IRExpr** args
1370 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1371 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1372 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1373 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1374 IRExpr* call
1375 = mkIRExprCCall(
1376 Ity_I64,
1377 0/*regparm*/,
1378 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1379 args
1380 );
1381 /* Exclude OP and NDEP from definedness checking. We're only
1382 interested in DEP1 and DEP2. */
1383 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1384 return call;
1385}
1386
1387
1388/* Build IR to set the flags thunk, in the most general case. */
1389static
1390void setFlags_D1_D2_ND ( UInt cc_op,
1391 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1392{
1393 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1394 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1395 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1396 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1397 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1398 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1399 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1400 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1401}
1402
1403/* Build IR to set the flags thunk after ADD or SUB. */
1404static
1405void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1406{
1407 IRTemp argL64 = IRTemp_INVALID;
1408 IRTemp argR64 = IRTemp_INVALID;
1409 IRTemp z64 = newTemp(Ity_I64);
1410 if (is64) {
1411 argL64 = argL;
1412 argR64 = argR;
1413 } else {
1414 argL64 = newTemp(Ity_I64);
1415 argR64 = newTemp(Ity_I64);
1416 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1417 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1418 }
1419 assign(z64, mkU64(0));
1420 UInt cc_op = ARM64G_CC_OP_NUMBER;
1421 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1422 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1423 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1424 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1425 else { vassert(0); }
1426 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1427}
1428
1429/* Build IR to set the flags thunk after ADD or SUB, if the given
1430 condition evaluates to True at run time. If not, the flags are set
1431 to the specified NZCV value. */
1432static
1433void setFlags_ADD_SUB_conditionally (
1434 Bool is64, Bool isSUB,
1435 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1436 )
1437{
1438 /* Generate IR as follows:
1439 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1440 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1441 CC_DEP2 = ITE(cond, argR64, 0)
1442 CC_NDEP = 0
1443 */
1444
1445 IRTemp z64 = newTemp(Ity_I64);
1446 assign(z64, mkU64(0));
1447
1448 /* Establish the operation and operands for the True case. */
1449 IRTemp t_dep1 = IRTemp_INVALID;
1450 IRTemp t_dep2 = IRTemp_INVALID;
1451 UInt t_op = ARM64G_CC_OP_NUMBER;
1452 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1453 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1454 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1455 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1456 else { vassert(0); }
1457 /* */
1458 if (is64) {
1459 t_dep1 = argL;
1460 t_dep2 = argR;
1461 } else {
1462 t_dep1 = newTemp(Ity_I64);
1463 t_dep2 = newTemp(Ity_I64);
1464 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1465 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1466 }
1467
1468 /* Establish the operation and operands for the False case. */
1469 IRTemp f_dep1 = newTemp(Ity_I64);
1470 IRTemp f_dep2 = z64;
1471 UInt f_op = ARM64G_CC_OP_COPY;
1472 assign(f_dep1, mkU64(nzcv << 28));
1473
1474 /* Final thunk values */
1475 IRTemp dep1 = newTemp(Ity_I64);
1476 IRTemp dep2 = newTemp(Ity_I64);
1477 IRTemp op = newTemp(Ity_I64);
1478
1479 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1480 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1481 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1482
1483 /* finally .. */
1484 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
1485 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1486 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1487 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1488}
1489
1490/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1491static
1492void setFlags_LOGIC ( Bool is64, IRTemp res )
1493{
1494 IRTemp res64 = IRTemp_INVALID;
1495 IRTemp z64 = newTemp(Ity_I64);
1496 UInt cc_op = ARM64G_CC_OP_NUMBER;
1497 if (is64) {
1498 res64 = res;
1499 cc_op = ARM64G_CC_OP_LOGIC64;
1500 } else {
1501 res64 = newTemp(Ity_I64);
1502 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1503 cc_op = ARM64G_CC_OP_LOGIC32;
1504 }
1505 assign(z64, mkU64(0));
1506 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1507}
1508
1509/* Build IR to set the flags thunk to a given NZCV value. NZCV is
1510 located in bits 31:28 of the supplied value. */
1511static
1512void setFlags_COPY ( IRTemp nzcv_28x0 )
1513{
1514 IRTemp z64 = newTemp(Ity_I64);
1515 assign(z64, mkU64(0));
1516 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
1517}
1518
1519
1520//ZZ /* Minor variant of the above that sets NDEP to zero (if it
1521//ZZ sets it at all) */
1522//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1523//ZZ IRTemp t_dep2,
1524//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1525//ZZ {
1526//ZZ IRTemp z32 = newTemp(Ity_I32);
1527//ZZ assign( z32, mkU32(0) );
1528//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1529//ZZ }
1530//ZZ
1531//ZZ
1532//ZZ /* Minor variant of the above that sets DEP2 to zero (if it
1533//ZZ sets it at all) */
1534//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1535//ZZ IRTemp t_ndep,
1536//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1537//ZZ {
1538//ZZ IRTemp z32 = newTemp(Ity_I32);
1539//ZZ assign( z32, mkU32(0) );
1540//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1541//ZZ }
1542//ZZ
1543//ZZ
1544//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1545//ZZ sets them at all) */
1546//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1547//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1548//ZZ {
1549//ZZ IRTemp z32 = newTemp(Ity_I32);
1550//ZZ assign( z32, mkU32(0) );
1551//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1552//ZZ }
1553
1554
1555/*------------------------------------------------------------*/
1556/*--- Misc math helpers ---*/
1557/*------------------------------------------------------------*/
1558
sewardj32d86752014-03-02 12:47:18 +00001559/* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
1560static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
sewardjbbcf1882014-01-12 12:49:10 +00001561{
sewardj32d86752014-03-02 12:47:18 +00001562 IRTemp maskT = newTemp(Ity_I64);
1563 IRTemp res = newTemp(Ity_I64);
1564 vassert(sh >= 1 && sh <= 63);
1565 assign(maskT, mkU64(mask));
sewardjdc9259c2014-02-27 11:10:19 +00001566 assign( res,
sewardjbbcf1882014-01-12 12:49:10 +00001567 binop(Iop_Or64,
1568 binop(Iop_Shr64,
sewardj32d86752014-03-02 12:47:18 +00001569 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
1570 mkU8(sh)),
sewardjbbcf1882014-01-12 12:49:10 +00001571 binop(Iop_And64,
sewardj32d86752014-03-02 12:47:18 +00001572 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
1573 mkexpr(maskT))
sewardjbbcf1882014-01-12 12:49:10 +00001574 )
1575 );
sewardjdc9259c2014-02-27 11:10:19 +00001576 return res;
1577}
1578
sewardj32d86752014-03-02 12:47:18 +00001579/* Generates byte swaps within 32-bit lanes. */
1580static IRTemp math_UINTSWAP64 ( IRTemp src )
1581{
1582 IRTemp res;
1583 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1584 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
1585 return res;
1586}
1587
1588/* Generates byte swaps within 16-bit lanes. */
1589static IRTemp math_USHORTSWAP64 ( IRTemp src )
1590{
1591 IRTemp res;
1592 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1593 return res;
1594}
1595
1596/* Generates a 64-bit byte swap. */
1597static IRTemp math_BYTESWAP64 ( IRTemp src )
1598{
1599 IRTemp res;
1600 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1601 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
1602 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
1603 return res;
1604}
sewardjdc9259c2014-02-27 11:10:19 +00001605
1606/* Generates a 64-bit bit swap. */
1607static IRTemp math_BITSWAP64 ( IRTemp src )
1608{
sewardj32d86752014-03-02 12:47:18 +00001609 IRTemp res;
1610 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
1611 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
1612 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
1613 return math_BYTESWAP64(res);
sewardjbbcf1882014-01-12 12:49:10 +00001614}
1615
sewardj606c4ba2014-01-26 19:11:14 +00001616/* Duplicates the bits at the bottom of the given word to fill the
1617 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
1618 except for the bottom bits. */
1619static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
1620{
1621 if (srcTy == Ity_I8) {
1622 IRTemp t16 = newTemp(Ity_I64);
1623 assign(t16, binop(Iop_Or64, mkexpr(src),
1624 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
1625 IRTemp t32 = newTemp(Ity_I64);
1626 assign(t32, binop(Iop_Or64, mkexpr(t16),
1627 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
1628 IRTemp t64 = newTemp(Ity_I64);
1629 assign(t64, binop(Iop_Or64, mkexpr(t32),
1630 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
1631 return t64;
1632 }
1633 if (srcTy == Ity_I16) {
1634 IRTemp t32 = newTemp(Ity_I64);
1635 assign(t32, binop(Iop_Or64, mkexpr(src),
1636 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
1637 IRTemp t64 = newTemp(Ity_I64);
1638 assign(t64, binop(Iop_Or64, mkexpr(t32),
1639 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
1640 return t64;
1641 }
1642 if (srcTy == Ity_I32) {
1643 IRTemp t64 = newTemp(Ity_I64);
1644 assign(t64, binop(Iop_Or64, mkexpr(src),
1645 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
1646 return t64;
1647 }
1648 if (srcTy == Ity_I64) {
1649 return src;
1650 }
1651 vassert(0);
1652}
1653
1654
sewardjbbcf1882014-01-12 12:49:10 +00001655/*------------------------------------------------------------*/
1656/*--- FP comparison helpers ---*/
1657/*------------------------------------------------------------*/
1658
1659/* irRes :: Ity_I32 holds a floating point comparison result encoded
1660 as an IRCmpF64Result. Generate code to convert it to an
1661 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
1662 Assign a new temp to hold that value, and return the temp. */
1663static
1664IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
1665{
1666 IRTemp ix = newTemp(Ity_I64);
1667 IRTemp termL = newTemp(Ity_I64);
1668 IRTemp termR = newTemp(Ity_I64);
1669 IRTemp nzcv = newTemp(Ity_I64);
1670 IRTemp irRes = newTemp(Ity_I64);
1671
1672 /* This is where the fun starts. We have to convert 'irRes' from
1673 an IR-convention return result (IRCmpF64Result) to an
1674 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
1675 4 bits of 'nzcv'. */
1676 /* Map compare result from IR to ARM(nzcv) */
1677 /*
1678 FP cmp result | IR | ARM(nzcv)
1679 --------------------------------
1680 UN 0x45 0011
1681 LT 0x01 1000
1682 GT 0x00 0010
1683 EQ 0x40 0110
1684 */
1685 /* Now since you're probably wondering WTF ..
1686
1687 ix fishes the useful bits out of the IR value, bits 6 and 0, and
1688 places them side by side, giving a number which is 0, 1, 2 or 3.
1689
1690 termL is a sequence cooked up by GNU superopt. It converts ix
1691 into an almost correct value NZCV value (incredibly), except
1692 for the case of UN, where it produces 0100 instead of the
1693 required 0011.
1694
1695 termR is therefore a correction term, also computed from ix. It
1696 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
1697 the final correct value, we subtract termR from termL.
1698
1699 Don't take my word for it. There's a test program at the bottom
1700 of guest_arm_toIR.c, to try this out with.
1701 */
1702 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
1703
1704 assign(
1705 ix,
1706 binop(Iop_Or64,
1707 binop(Iop_And64,
1708 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
1709 mkU64(3)),
1710 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
1711
1712 assign(
1713 termL,
1714 binop(Iop_Add64,
1715 binop(Iop_Shr64,
1716 binop(Iop_Sub64,
1717 binop(Iop_Shl64,
1718 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
1719 mkU8(62)),
1720 mkU64(1)),
1721 mkU8(61)),
1722 mkU64(1)));
1723
1724 assign(
1725 termR,
1726 binop(Iop_And64,
1727 binop(Iop_And64,
1728 mkexpr(ix),
1729 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
1730 mkU64(1)));
1731
1732 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
1733 return nzcv;
1734}
1735
1736
1737/*------------------------------------------------------------*/
1738/*--- Data processing (immediate) ---*/
1739/*------------------------------------------------------------*/
1740
1741/* Helper functions for supporting "DecodeBitMasks" */
1742
1743static ULong dbm_ROR ( Int width, ULong x, Int rot )
1744{
1745 vassert(width > 0 && width <= 64);
1746 vassert(rot >= 0 && rot < width);
1747 if (rot == 0) return x;
1748 ULong res = x >> rot;
1749 res |= (x << (width - rot));
1750 if (width < 64)
1751 res &= ((1ULL << width) - 1);
1752 return res;
1753}
1754
1755static ULong dbm_RepTo64( Int esize, ULong x )
1756{
1757 switch (esize) {
1758 case 64:
1759 return x;
1760 case 32:
1761 x &= 0xFFFFFFFF; x |= (x << 32);
1762 return x;
1763 case 16:
1764 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
1765 return x;
1766 case 8:
1767 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
1768 return x;
1769 case 4:
1770 x &= 0xF; x |= (x << 4); x |= (x << 8);
1771 x |= (x << 16); x |= (x << 32);
1772 return x;
1773 case 2:
1774 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
1775 x |= (x << 16); x |= (x << 32);
1776 return x;
1777 default:
1778 break;
1779 }
1780 vpanic("dbm_RepTo64");
1781 /*NOTREACHED*/
1782 return 0;
1783}
1784
1785static Int dbm_highestSetBit ( ULong x )
1786{
1787 Int i;
1788 for (i = 63; i >= 0; i--) {
1789 if (x & (1ULL << i))
1790 return i;
1791 }
1792 vassert(x == 0);
1793 return -1;
1794}
1795
1796static
1797Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
1798 ULong immN, ULong imms, ULong immr, Bool immediate,
1799 UInt M /*32 or 64*/)
1800{
1801 vassert(immN < (1ULL << 1));
1802 vassert(imms < (1ULL << 6));
1803 vassert(immr < (1ULL << 6));
1804 vassert(immediate == False || immediate == True);
1805 vassert(M == 32 || M == 64);
1806
1807 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
1808 if (len < 1) { /* printf("fail1\n"); */ return False; }
1809 vassert(len <= 6);
1810 vassert(M >= (1 << len));
1811
1812 vassert(len >= 1 && len <= 6);
1813 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
1814 (1 << len) - 1;
1815 vassert(levels >= 1 && levels <= 63);
1816
1817 if (immediate && ((imms & levels) == levels)) {
1818 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
1819 return False;
1820 }
1821
1822 ULong S = imms & levels;
1823 ULong R = immr & levels;
1824 Int diff = S - R;
1825 diff &= 63;
1826 Int esize = 1 << len;
1827 vassert(2 <= esize && esize <= 64);
1828
1829 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
1830 same below with d. S can be 63 in which case we have an out of
1831 range and hence undefined shift. */
1832 vassert(S >= 0 && S <= 63);
1833 vassert(esize >= (S+1));
1834 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
1835 //(1ULL << (S+1)) - 1;
1836 ((1ULL << S) - 1) + (1ULL << S);
1837
1838 Int d = // diff<len-1:0>
1839 diff & ((1 << len)-1);
1840 vassert(esize >= (d+1));
1841 vassert(d >= 0 && d <= 63);
1842
1843 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
1844 //(1ULL << (d+1)) - 1;
1845 ((1ULL << d) - 1) + (1ULL << d);
1846
1847 if (esize != 64) vassert(elem_s < (1ULL << esize));
1848 if (esize != 64) vassert(elem_d < (1ULL << esize));
1849
1850 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
1851 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
1852
1853 return True;
1854}
1855
1856
1857static
1858Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
1859 UInt insn)
1860{
1861# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
1862
1863 /* insn[28:23]
1864 10000x PC-rel addressing
1865 10001x Add/subtract (immediate)
1866 100100 Logical (immediate)
1867 100101 Move Wide (immediate)
1868 100110 Bitfield
1869 100111 Extract
1870 */
1871
1872 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
1873 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
1874 Bool is64 = INSN(31,31) == 1;
1875 Bool isSub = INSN(30,30) == 1;
1876 Bool setCC = INSN(29,29) == 1;
1877 UInt sh = INSN(23,22);
1878 UInt uimm12 = INSN(21,10);
1879 UInt nn = INSN(9,5);
1880 UInt dd = INSN(4,0);
1881 const HChar* nm = isSub ? "sub" : "add";
1882 if (sh >= 2) {
1883 /* Invalid; fall through */
1884 } else {
1885 vassert(sh <= 1);
1886 uimm12 <<= (12 * sh);
1887 if (is64) {
1888 IRTemp argL = newTemp(Ity_I64);
1889 IRTemp argR = newTemp(Ity_I64);
1890 IRTemp res = newTemp(Ity_I64);
1891 assign(argL, getIReg64orSP(nn));
1892 assign(argR, mkU64(uimm12));
1893 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
1894 mkexpr(argL), mkexpr(argR)));
1895 if (setCC) {
1896 putIReg64orZR(dd, mkexpr(res));
1897 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
1898 DIP("%ss %s, %s, 0x%x\n",
1899 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
1900 } else {
1901 putIReg64orSP(dd, mkexpr(res));
1902 DIP("%s %s, %s, 0x%x\n",
1903 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
1904 }
1905 } else {
1906 IRTemp argL = newTemp(Ity_I32);
1907 IRTemp argR = newTemp(Ity_I32);
1908 IRTemp res = newTemp(Ity_I32);
1909 assign(argL, getIReg32orSP(nn));
1910 assign(argR, mkU32(uimm12));
1911 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
1912 mkexpr(argL), mkexpr(argR)));
1913 if (setCC) {
1914 putIReg32orZR(dd, mkexpr(res));
1915 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
1916 DIP("%ss %s, %s, 0x%x\n",
1917 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
1918 } else {
1919 putIReg32orSP(dd, mkexpr(res));
1920 DIP("%s %s, %s, 0x%x\n",
1921 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
1922 }
1923 }
1924 return True;
1925 }
1926 }
1927
1928 /* -------------------- ADR/ADRP -------------------- */
1929 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
1930 UInt bP = INSN(31,31);
1931 UInt immLo = INSN(30,29);
1932 UInt immHi = INSN(23,5);
1933 UInt rD = INSN(4,0);
1934 ULong uimm = (immHi << 2) | immLo;
1935 ULong simm = sx_to_64(uimm, 21);
1936 ULong val;
1937 if (bP) {
1938 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
1939 } else {
1940 val = guest_PC_curr_instr + simm;
1941 }
1942 putIReg64orZR(rD, mkU64(val));
1943 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
1944 return True;
1945 }
1946
1947 /* -------------------- LOGIC(imm) -------------------- */
1948 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
1949 /* 31 30 28 22 21 15 9 4
1950 sf op 100100 N immr imms Rn Rd
1951 op=00: AND Rd|SP, Rn, #imm
1952 op=01: ORR Rd|SP, Rn, #imm
1953 op=10: EOR Rd|SP, Rn, #imm
1954 op=11: ANDS Rd|ZR, Rn, #imm
1955 */
1956 Bool is64 = INSN(31,31) == 1;
1957 UInt op = INSN(30,29);
1958 UInt N = INSN(22,22);
1959 UInt immR = INSN(21,16);
1960 UInt immS = INSN(15,10);
1961 UInt nn = INSN(9,5);
1962 UInt dd = INSN(4,0);
1963 ULong imm = 0;
1964 Bool ok;
1965 if (N == 1 && !is64)
1966 goto after_logic_imm; /* not allowed; fall through */
1967 ok = dbm_DecodeBitMasks(&imm, NULL,
1968 N, immS, immR, True, is64 ? 64 : 32);
1969 if (!ok)
1970 goto after_logic_imm;
1971
1972 const HChar* names[4] = { "and", "orr", "eor", "ands" };
1973 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
1974 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
1975
1976 vassert(op < 4);
1977 if (is64) {
1978 IRExpr* argL = getIReg64orZR(nn);
1979 IRExpr* argR = mkU64(imm);
1980 IRTemp res = newTemp(Ity_I64);
1981 assign(res, binop(ops64[op], argL, argR));
1982 if (op < 3) {
1983 putIReg64orSP(dd, mkexpr(res));
1984 DIP("%s %s, %s, 0x%llx\n", names[op],
1985 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
1986 } else {
1987 putIReg64orZR(dd, mkexpr(res));
1988 setFlags_LOGIC(True/*is64*/, res);
1989 DIP("%s %s, %s, 0x%llx\n", names[op],
1990 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
1991 }
1992 } else {
1993 IRExpr* argL = getIReg32orZR(nn);
1994 IRExpr* argR = mkU32((UInt)imm);
1995 IRTemp res = newTemp(Ity_I32);
1996 assign(res, binop(ops32[op], argL, argR));
1997 if (op < 3) {
1998 putIReg32orSP(dd, mkexpr(res));
1999 DIP("%s %s, %s, 0x%x\n", names[op],
2000 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2001 } else {
2002 putIReg32orZR(dd, mkexpr(res));
2003 setFlags_LOGIC(False/*!is64*/, res);
2004 DIP("%s %s, %s, 0x%x\n", names[op],
2005 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2006 }
2007 }
2008 return True;
2009 }
2010 after_logic_imm:
2011
2012 /* -------------------- MOV{Z,N,K} -------------------- */
2013 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2014 /* 31 30 28 22 20 4
2015 | | | | | |
2016 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2017 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2018 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2019 */
2020 Bool is64 = INSN(31,31) == 1;
2021 UInt subopc = INSN(30,29);
2022 UInt hw = INSN(22,21);
2023 UInt imm16 = INSN(20,5);
2024 UInt dd = INSN(4,0);
2025 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2026 /* invalid; fall through */
2027 } else {
2028 ULong imm64 = ((ULong)imm16) << (16 * hw);
2029 if (!is64)
2030 vassert(imm64 < 0x100000000ULL);
2031 switch (subopc) {
2032 case BITS2(1,0): // MOVZ
2033 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2034 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2035 break;
2036 case BITS2(0,0): // MOVN
2037 imm64 = ~imm64;
2038 if (!is64)
2039 imm64 &= 0xFFFFFFFFULL;
2040 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2041 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2042 break;
2043 case BITS2(1,1): // MOVK
2044 /* This is more complex. We are inserting a slice into
2045 the destination register, so we need to have the old
2046 value of it. */
2047 if (is64) {
2048 IRTemp old = newTemp(Ity_I64);
2049 assign(old, getIReg64orZR(dd));
2050 ULong mask = 0xFFFFULL << (16 * hw);
2051 IRExpr* res
2052 = binop(Iop_Or64,
2053 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2054 mkU64(imm64));
2055 putIReg64orZR(dd, res);
2056 DIP("movk %s, 0x%x, lsl %u\n",
2057 nameIReg64orZR(dd), imm16, 16*hw);
2058 } else {
2059 IRTemp old = newTemp(Ity_I32);
2060 assign(old, getIReg32orZR(dd));
2061 vassert(hw <= 1);
2062 UInt mask = 0xFFFF << (16 * hw);
2063 IRExpr* res
2064 = binop(Iop_Or32,
2065 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2066 mkU32((UInt)imm64));
2067 putIReg32orZR(dd, res);
2068 DIP("movk %s, 0x%x, lsl %u\n",
2069 nameIReg32orZR(dd), imm16, 16*hw);
2070 }
2071 break;
2072 default:
2073 vassert(0);
2074 }
2075 return True;
2076 }
2077 }
2078
2079 /* -------------------- {U,S,}BFM -------------------- */
2080 /* 30 28 22 21 15 9 4
2081
2082 sf 10 100110 N immr imms nn dd
2083 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2084 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2085
2086 sf 00 100110 N immr imms nn dd
2087 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2088 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2089
2090 sf 01 100110 N immr imms nn dd
2091 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2092 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2093 */
2094 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2095 UInt sf = INSN(31,31);
2096 UInt opc = INSN(30,29);
2097 UInt N = INSN(22,22);
2098 UInt immR = INSN(21,16);
2099 UInt immS = INSN(15,10);
2100 UInt nn = INSN(9,5);
2101 UInt dd = INSN(4,0);
2102 Bool inZero = False;
2103 Bool extend = False;
2104 const HChar* nm = "???";
2105 /* skip invalid combinations */
2106 switch (opc) {
2107 case BITS2(0,0):
2108 inZero = True; extend = True; nm = "sbfm"; break;
2109 case BITS2(0,1):
2110 inZero = False; extend = False; nm = "bfm"; break;
2111 case BITS2(1,0):
2112 inZero = True; extend = False; nm = "ubfm"; break;
2113 case BITS2(1,1):
2114 goto after_bfm; /* invalid */
2115 default:
2116 vassert(0);
2117 }
2118 if (sf == 1 && N != 1) goto after_bfm;
2119 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2120 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2121 ULong wmask = 0, tmask = 0;
2122 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2123 N, immS, immR, False, sf == 1 ? 64 : 32);
2124 if (!ok) goto after_bfm; /* hmmm */
2125
2126 Bool is64 = sf == 1;
2127 IRType ty = is64 ? Ity_I64 : Ity_I32;
2128
2129 IRTemp dst = newTemp(ty);
2130 IRTemp src = newTemp(ty);
2131 IRTemp bot = newTemp(ty);
2132 IRTemp top = newTemp(ty);
2133 IRTemp res = newTemp(ty);
2134 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2135 assign(src, getIRegOrZR(is64, nn));
2136 /* perform bitfield move on low bits */
2137 assign(bot, binop(mkOR(ty),
2138 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2139 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2140 mkU(ty, wmask))));
2141 /* determine extension bits (sign, zero or dest register) */
2142 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2143 /* combine extension bits and result bits */
2144 assign(res, binop(mkOR(ty),
2145 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2146 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2147 putIRegOrZR(is64, dd, mkexpr(res));
2148 DIP("%s %s, %s, immR=%u, immS=%u\n",
2149 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2150 return True;
2151 }
2152 after_bfm:
2153
2154 /* ---------------------- EXTR ---------------------- */
2155 /* 30 28 22 20 15 9 4
2156 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2157 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2158 */
2159 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2160 Bool is64 = INSN(31,31) == 1;
2161 UInt mm = INSN(20,16);
2162 UInt imm6 = INSN(15,10);
2163 UInt nn = INSN(9,5);
2164 UInt dd = INSN(4,0);
2165 Bool valid = True;
2166 if (INSN(31,31) != INSN(22,22))
2167 valid = False;
2168 if (!is64 && imm6 >= 32)
2169 valid = False;
2170 if (!valid) goto after_extr;
2171 IRType ty = is64 ? Ity_I64 : Ity_I32;
2172 IRTemp srcHi = newTemp(ty);
2173 IRTemp srcLo = newTemp(ty);
2174 IRTemp res = newTemp(ty);
2175 assign(srcHi, getIRegOrZR(is64, nn));
2176 assign(srcLo, getIRegOrZR(is64, mm));
2177 if (imm6 == 0) {
2178 assign(res, mkexpr(srcLo));
2179 } else {
2180 UInt szBits = 8 * sizeofIRType(ty);
2181 vassert(imm6 > 0 && imm6 < szBits);
2182 assign(res, binop(mkOR(ty),
2183 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2184 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2185 }
2186 putIRegOrZR(is64, dd, mkexpr(res));
2187 DIP("extr %s, %s, %s, #%u\n",
2188 nameIRegOrZR(is64,dd),
2189 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2190 return True;
2191 }
2192 after_extr:
2193
2194 vex_printf("ARM64 front end: data_processing_immediate\n");
2195 return False;
2196# undef INSN
2197}
2198
2199
2200/*------------------------------------------------------------*/
2201/*--- Data processing (register) instructions ---*/
2202/*------------------------------------------------------------*/
2203
2204static const HChar* nameSH ( UInt sh ) {
2205 switch (sh) {
2206 case 0: return "lsl";
2207 case 1: return "lsr";
2208 case 2: return "asr";
2209 case 3: return "ror";
2210 default: vassert(0);
2211 }
2212}
2213
2214/* Generate IR to get a register value, possibly shifted by an
2215 immediate. Returns either a 32- or 64-bit temporary holding the
2216 result. After the shift, the value can optionally be NOT-ed
2217 too.
2218
2219 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2220 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2221 isn't allowed, but it's the job of the caller to check that.
2222*/
2223static IRTemp getShiftedIRegOrZR ( Bool is64,
2224 UInt sh_how, UInt sh_amt, UInt regNo,
2225 Bool invert )
2226{
2227 vassert(sh_how < 4);
2228 vassert(sh_amt < (is64 ? 64 : 32));
2229 IRType ty = is64 ? Ity_I64 : Ity_I32;
2230 IRTemp t0 = newTemp(ty);
2231 assign(t0, getIRegOrZR(is64, regNo));
2232 IRTemp t1 = newTemp(ty);
2233 switch (sh_how) {
2234 case BITS2(0,0):
2235 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2236 break;
2237 case BITS2(0,1):
2238 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2239 break;
2240 case BITS2(1,0):
2241 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2242 break;
2243 case BITS2(1,1):
2244 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2245 break;
2246 default:
2247 vassert(0);
2248 }
2249 if (invert) {
2250 IRTemp t2 = newTemp(ty);
2251 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2252 return t2;
2253 } else {
2254 return t1;
2255 }
2256}
2257
2258
2259static
2260Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2261 UInt insn)
2262{
2263# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2264
2265 /* ------------------- ADD/SUB(reg) ------------------- */
2266 /* x==0 => 32 bit op x==1 => 64 bit op
2267 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2268
2269 31 30 29 28 23 21 20 15 9 4
2270 | | | | | | | | | |
2271 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2272 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2273 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2274 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2275 */
2276 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2277 UInt bX = INSN(31,31);
2278 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2279 UInt bS = INSN(29, 29); /* set flags? */
2280 UInt sh = INSN(23,22);
2281 UInt rM = INSN(20,16);
2282 UInt imm6 = INSN(15,10);
2283 UInt rN = INSN(9,5);
2284 UInt rD = INSN(4,0);
2285 Bool isSUB = bOP == 1;
2286 Bool is64 = bX == 1;
2287 IRType ty = is64 ? Ity_I64 : Ity_I32;
2288 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2289 /* invalid; fall through */
2290 } else {
2291 IRTemp argL = newTemp(ty);
2292 assign(argL, getIRegOrZR(is64, rN));
2293 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2294 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2295 IRTemp res = newTemp(ty);
2296 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2297 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2298 if (bS) {
2299 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2300 }
2301 DIP("%s%s %s, %s, %s, %s #%u\n",
2302 bOP ? "sub" : "add", bS ? "s" : "",
2303 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2304 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2305 return True;
2306 }
2307 }
2308
2309 /* -------------------- LOGIC(reg) -------------------- */
2310 /* x==0 => 32 bit op x==1 => 64 bit op
2311 N==0 => inv? is no-op (no inversion)
2312 N==1 => inv? is NOT
2313 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2314
2315 31 30 28 23 21 20 15 9 4
2316 | | | | | | | | |
2317 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2318 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2319 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2320 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2321 With N=1, the names are: BIC ORN EON BICS
2322 */
2323 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2324 UInt bX = INSN(31,31);
2325 UInt sh = INSN(23,22);
2326 UInt bN = INSN(21,21);
2327 UInt rM = INSN(20,16);
2328 UInt imm6 = INSN(15,10);
2329 UInt rN = INSN(9,5);
2330 UInt rD = INSN(4,0);
2331 Bool is64 = bX == 1;
2332 IRType ty = is64 ? Ity_I64 : Ity_I32;
2333 if (!is64 && imm6 > 31) {
2334 /* invalid; fall though */
2335 } else {
2336 IRTemp argL = newTemp(ty);
2337 assign(argL, getIRegOrZR(is64, rN));
2338 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2339 IROp op = Iop_INVALID;
2340 switch (INSN(30,29)) {
2341 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2342 case BITS2(0,1): op = mkOR(ty); break;
2343 case BITS2(1,0): op = mkXOR(ty); break;
2344 default: vassert(0);
2345 }
2346 IRTemp res = newTemp(ty);
2347 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2348 if (INSN(30,29) == BITS2(1,1)) {
2349 setFlags_LOGIC(is64, res);
2350 }
2351 putIRegOrZR(is64, rD, mkexpr(res));
2352
2353 static const HChar* names_op[8]
2354 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2355 vassert(((bN << 2) | INSN(30,29)) < 8);
2356 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2357 /* Special-case the printing of "MOV" */
2358 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2359 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2360 nameIRegOrZR(is64, rM));
2361 } else {
2362 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2363 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2364 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2365 }
2366 return True;
2367 }
2368 }
2369
2370 /* -------------------- {U,S}MULH -------------------- */
2371 /* 31 23 22 20 15 9 4
2372 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2373 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2374 */
2375 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
2376 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)
2377 && INSN(23,23) == 1/*ATC*/) {
2378 Bool isU = INSN(23,23) == 1;
2379 UInt mm = INSN(20,16);
2380 UInt nn = INSN(9,5);
2381 UInt dd = INSN(4,0);
2382 putIReg64orZR(dd, unop(Iop_128HIto64,
2383 binop(isU ? Iop_MullU64 : Iop_MullS64,
2384 getIReg64orZR(nn), getIReg64orZR(mm))));
2385 DIP("%cmulh %s, %s, %s\n",
2386 isU ? 'u' : 's',
2387 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2388 return True;
2389 }
2390
2391 /* -------------------- M{ADD,SUB} -------------------- */
2392 /* 31 30 20 15 14 9 4
2393 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
2394 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
2395 */
2396 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2397 Bool is64 = INSN(31,31) == 1;
2398 UInt mm = INSN(20,16);
2399 Bool isAdd = INSN(15,15) == 0;
2400 UInt aa = INSN(14,10);
2401 UInt nn = INSN(9,5);
2402 UInt dd = INSN(4,0);
2403 if (is64) {
2404 putIReg64orZR(
2405 dd,
2406 binop(isAdd ? Iop_Add64 : Iop_Sub64,
2407 getIReg64orZR(aa),
2408 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
2409 } else {
2410 putIReg32orZR(
2411 dd,
2412 binop(isAdd ? Iop_Add32 : Iop_Sub32,
2413 getIReg32orZR(aa),
2414 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
2415 }
2416 DIP("%s %s, %s, %s, %s\n",
2417 isAdd ? "madd" : "msub",
2418 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
2419 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
2420 return True;
2421 }
2422
2423 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
2424 /* 31 30 28 20 15 11 9 4
2425 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
2426 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
2427 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
2428 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
2429 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
2430 */
2431 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
2432 Bool is64 = INSN(31,31) == 1;
2433 UInt b30 = INSN(30,30);
2434 UInt mm = INSN(20,16);
2435 UInt cond = INSN(15,12);
2436 UInt b10 = INSN(10,10);
2437 UInt nn = INSN(9,5);
2438 UInt dd = INSN(4,0);
2439 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
2440 IRType ty = is64 ? Ity_I64 : Ity_I32;
2441 IRExpr* argL = getIRegOrZR(is64, nn);
2442 IRExpr* argR = getIRegOrZR(is64, mm);
2443 switch (op) {
2444 case BITS2(0,0):
2445 break;
2446 case BITS2(0,1):
2447 argR = binop(mkADD(ty), argR, mkU(ty,1));
2448 break;
2449 case BITS2(1,0):
2450 argR = unop(mkNOT(ty), argR);
2451 break;
2452 case BITS2(1,1):
2453 argR = binop(mkSUB(ty), mkU(ty,0), argR);
2454 break;
2455 default:
2456 vassert(0);
2457 }
2458 putIRegOrZR(
2459 is64, dd,
2460 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
2461 argL, argR)
2462 );
2463 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
2464 DIP("%s %s, %s, %s, %s\n", op_nm[op],
2465 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
2466 nameIRegOrZR(is64, mm), nameCC(cond));
2467 return True;
2468 }
2469
2470 /* -------------- ADD/SUB(extended reg) -------------- */
2471 /* 28 20 15 12 9 4
2472 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
2473 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
2474
2475 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
2476 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
2477
2478 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
2479 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
2480
2481 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
2482 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
2483
2484 The 'm' operand is extended per opt, thusly:
2485
2486 000 Xm & 0xFF UXTB
2487 001 Xm & 0xFFFF UXTH
2488 010 Xm & (2^32)-1 UXTW
2489 011 Xm UXTX
2490
2491 100 Xm sx from bit 7 SXTB
2492 101 Xm sx from bit 15 SXTH
2493 110 Xm sx from bit 31 SXTW
2494 111 Xm SXTX
2495
2496 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
2497 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
2498 are the identity operation on Wm.
2499
2500 After extension, the value is shifted left by imm3 bits, which
2501 may only be in the range 0 .. 4 inclusive.
2502 */
2503 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
2504 Bool is64 = INSN(31,31) == 1;
2505 Bool isSub = INSN(30,30) == 1;
2506 Bool setCC = INSN(29,29) == 1;
2507 UInt mm = INSN(20,16);
2508 UInt opt = INSN(15,13);
2509 UInt imm3 = INSN(12,10);
2510 UInt nn = INSN(9,5);
2511 UInt dd = INSN(4,0);
2512 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
2513 "sxtb", "sxth", "sxtw", "sxtx" };
2514 /* Do almost the same thing in the 32- and 64-bit cases. */
2515 IRTemp xN = newTemp(Ity_I64);
2516 IRTemp xM = newTemp(Ity_I64);
2517 assign(xN, getIReg64orSP(nn));
2518 assign(xM, getIReg64orZR(mm));
2519 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
2520 Int shSX = 0;
2521 /* widen Xm .. */
2522 switch (opt) {
2523 case BITS3(0,0,0): // UXTB
2524 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
2525 case BITS3(0,0,1): // UXTH
2526 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
2527 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
2528 if (is64) {
2529 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
2530 }
2531 break;
2532 case BITS3(0,1,1): // UXTX -- always a noop
2533 break;
2534 case BITS3(1,0,0): // SXTB
2535 shSX = 56; goto sxTo64;
2536 case BITS3(1,0,1): // SXTH
2537 shSX = 48; goto sxTo64;
2538 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
2539 if (is64) {
2540 shSX = 32; goto sxTo64;
2541 }
2542 break;
2543 case BITS3(1,1,1): // SXTX -- always a noop
2544 break;
2545 sxTo64:
2546 vassert(shSX >= 32);
2547 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
2548 mkU8(shSX));
2549 break;
2550 default:
2551 vassert(0);
2552 }
2553 /* and now shift */
2554 IRTemp argL = xN;
2555 IRTemp argR = newTemp(Ity_I64);
2556 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
2557 IRTemp res = newTemp(Ity_I64);
2558 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2559 mkexpr(argL), mkexpr(argR)));
2560 if (is64) {
2561 if (setCC) {
2562 putIReg64orZR(dd, mkexpr(res));
2563 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2564 } else {
2565 putIReg64orSP(dd, mkexpr(res));
2566 }
2567 } else {
2568 if (setCC) {
2569 IRTemp argL32 = newTemp(Ity_I32);
2570 IRTemp argR32 = newTemp(Ity_I32);
2571 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
2572 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
2573 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
2574 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
2575 } else {
2576 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
2577 }
2578 }
2579 DIP("%s%s %s, %s, %s %s lsl %u\n",
2580 isSub ? "sub" : "add", setCC ? "s" : "",
2581 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
2582 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
2583 nameExt[opt], imm3);
2584 return True;
2585 }
2586
2587 /* ---------------- CCMP/CCMN(imm) ---------------- */
2588 /* Bizarrely, these appear in the "data processing register"
2589 category, even though they are operations against an
2590 immediate. */
2591 /* 31 29 20 15 11 9 3
2592 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
2593 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
2594
2595 Operation is:
2596 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
2597 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
2598 */
2599 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
2600 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
2601 Bool is64 = INSN(31,31) == 1;
2602 Bool isSUB = INSN(30,30) == 1;
2603 UInt imm5 = INSN(20,16);
2604 UInt cond = INSN(15,12);
2605 UInt nn = INSN(9,5);
2606 UInt nzcv = INSN(3,0);
2607
2608 IRTemp condT = newTemp(Ity_I1);
2609 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
2610
2611 IRType ty = is64 ? Ity_I64 : Ity_I32;
2612 IRTemp argL = newTemp(ty);
2613 IRTemp argR = newTemp(ty);
2614
2615 if (is64) {
2616 assign(argL, getIReg64orZR(nn));
2617 assign(argR, mkU64(imm5));
2618 } else {
2619 assign(argL, getIReg32orZR(nn));
2620 assign(argR, mkU32(imm5));
2621 }
2622 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
2623
2624 DIP("ccm%c %s, #%u, #%u, %s\n",
2625 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
2626 imm5, nzcv, nameCC(cond));
2627 return True;
2628 }
2629
2630 /* ---------------- CCMP/CCMN(reg) ---------------- */
2631 /* 31 29 20 15 11 9 3
2632 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
2633 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
2634 Operation is:
2635 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
2636 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
2637 */
2638 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
2639 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
2640 Bool is64 = INSN(31,31) == 1;
2641 Bool isSUB = INSN(30,30) == 1;
2642 UInt mm = INSN(20,16);
2643 UInt cond = INSN(15,12);
2644 UInt nn = INSN(9,5);
2645 UInt nzcv = INSN(3,0);
2646
2647 IRTemp condT = newTemp(Ity_I1);
2648 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
2649
2650 IRType ty = is64 ? Ity_I64 : Ity_I32;
2651 IRTemp argL = newTemp(ty);
2652 IRTemp argR = newTemp(ty);
2653
2654 if (is64) {
2655 assign(argL, getIReg64orZR(nn));
2656 assign(argR, getIReg64orZR(mm));
2657 } else {
2658 assign(argL, getIReg32orZR(nn));
2659 assign(argR, getIReg32orZR(mm));
2660 }
2661 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
2662
2663 DIP("ccm%c %s, %s, #%u, %s\n",
2664 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
2665 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
2666 return True;
2667 }
2668
2669
2670 /* -------------- REV/REV16/REV32/RBIT -------------- */
2671 /* 31 30 28 20 15 11 9 4
2672
sewardj32d86752014-03-02 12:47:18 +00002673 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
2674 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00002675
sewardj32d86752014-03-02 12:47:18 +00002676 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
2677 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00002678
sewardjdc9259c2014-02-27 11:10:19 +00002679 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
2680 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00002681
sewardjdc9259c2014-02-27 11:10:19 +00002682 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
sewardjbbcf1882014-01-12 12:49:10 +00002683 */
sewardjbbcf1882014-01-12 12:49:10 +00002684 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
sewardjdc9259c2014-02-27 11:10:19 +00002685 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
2686 UInt b31 = INSN(31,31);
2687 UInt opc = INSN(11,10);
2688
2689 UInt ix = 0;
2690 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
2691 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
2692 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
2693 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
2694 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
2695 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
2696 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
sewardj32d86752014-03-02 12:47:18 +00002697 if (ix >= 1 && ix <= 7) {
2698 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
sewardjdc9259c2014-02-27 11:10:19 +00002699 UInt nn = INSN(9,5);
2700 UInt dd = INSN(4,0);
2701 IRTemp src = newTemp(Ity_I64);
2702 IRTemp dst = IRTemp_INVALID;
sewardj32d86752014-03-02 12:47:18 +00002703 IRTemp (*math)(IRTemp) = NULL;
2704 switch (ix) {
2705 case 1: case 2: math = math_BYTESWAP64; break;
2706 case 3: case 4: math = math_BITSWAP64; break;
2707 case 5: case 6: math = math_USHORTSWAP64; break;
2708 case 7: math = math_UINTSWAP64; break;
2709 default: vassert(0);
2710 }
2711 const HChar* names[7]
2712 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
2713 const HChar* nm = names[ix-1];
2714 vassert(math);
2715 if (ix == 6) {
2716 /* This has to be special cased, since the logic below doesn't
2717 handle it correctly. */
sewardjdc9259c2014-02-27 11:10:19 +00002718 assign(src, getIReg64orZR(nn));
sewardj32d86752014-03-02 12:47:18 +00002719 dst = math(src);
2720 putIReg64orZR(dd,
2721 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
2722 } else if (is64) {
2723 assign(src, getIReg64orZR(nn));
2724 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00002725 putIReg64orZR(dd, mkexpr(dst));
2726 } else {
2727 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
sewardj32d86752014-03-02 12:47:18 +00002728 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00002729 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
2730 }
sewardj32d86752014-03-02 12:47:18 +00002731 DIP("%s %s, %s\n", nm,
sewardjdc9259c2014-02-27 11:10:19 +00002732 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
2733 return True;
sewardjbbcf1882014-01-12 12:49:10 +00002734 }
sewardjdc9259c2014-02-27 11:10:19 +00002735 /* else fall through */
sewardjbbcf1882014-01-12 12:49:10 +00002736 }
2737
2738 /* -------------------- CLZ/CLS -------------------- */
2739 /* 30 28 24 20 15 9 4
2740 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
2741 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
2742 */
2743 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
2744 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
2745 Bool is64 = INSN(31,31) == 1;
2746 Bool isCLS = INSN(10,10) == 1;
2747 UInt nn = INSN(9,5);
2748 UInt dd = INSN(4,0);
2749 IRTemp src = newTemp(Ity_I64);
2750 IRTemp dst = newTemp(Ity_I64);
2751 if (!isCLS) { // CLS not yet supported
2752 if (is64) {
2753 assign(src, getIReg64orZR(nn));
2754 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
2755 mkU64(64),
2756 unop(Iop_Clz64, mkexpr(src))));
2757 putIReg64orZR(dd, mkexpr(dst));
2758 } else {
2759 assign(src, binop(Iop_Shl64,
2760 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
2761 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
2762 mkU64(32),
2763 unop(Iop_Clz64, mkexpr(src))));
2764 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
2765 }
2766 DIP("cl%c %s, %s\n",
2767 isCLS ? 's' : 'z', nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
2768 return True;
2769 }
2770 }
2771
2772 /* -------------------- LSLV/LSRV/ASRV -------------------- */
2773 /* 30 28 20 15 11 9 4
2774 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
2775 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
2776 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
2777 */
2778 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
2779 && INSN(15,12) == BITS4(0,0,1,0) && INSN(11,10) < BITS2(1,1)) {
2780 Bool is64 = INSN(31,31) == 1;
2781 UInt mm = INSN(20,16);
2782 UInt op = INSN(11,10);
2783 UInt nn = INSN(9,5);
2784 UInt dd = INSN(4,0);
2785 IRType ty = is64 ? Ity_I64 : Ity_I32;
2786 IRTemp srcL = newTemp(ty);
2787 IRTemp srcR = newTemp(Ity_I8);
2788 IRTemp res = newTemp(ty);
2789 IROp iop = Iop_INVALID;
2790 assign(srcL, getIRegOrZR(is64, nn));
2791 assign(srcR,
2792 unop(Iop_64to8,
2793 binop(Iop_And64,
2794 getIReg64orZR(mm), mkU64(is64 ? 63 : 31))));
2795 switch (op) {
2796 case BITS2(0,0): iop = mkSHL(ty); break;
2797 case BITS2(0,1): iop = mkSHR(ty); break;
2798 case BITS2(1,0): iop = mkSAR(ty); break;
2799 default: vassert(0);
2800 }
2801 assign(res, binop(iop, mkexpr(srcL), mkexpr(srcR)));
2802 putIRegOrZR(is64, dd, mkexpr(res));
2803 vassert(op < 3);
2804 const HChar* names[3] = { "lslv", "lsrv", "asrv" };
2805 DIP("%s %s, %s, %s\n",
2806 names[op], nameIRegOrZR(is64,dd),
2807 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
2808 return True;
2809 }
2810
2811 /* -------------------- SDIV/UDIV -------------------- */
2812 /* 30 28 20 15 10 9 4
2813 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
2814 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
2815 */
2816 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
2817 && INSN(15,11) == BITS5(0,0,0,0,1)) {
2818 Bool is64 = INSN(31,31) == 1;
2819 UInt mm = INSN(20,16);
2820 Bool isS = INSN(10,10) == 1;
2821 UInt nn = INSN(9,5);
2822 UInt dd = INSN(4,0);
2823 if (isS) {
2824 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
2825 getIRegOrZR(is64, nn),
2826 getIRegOrZR(is64, mm)));
2827 } else {
2828 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
2829 getIRegOrZR(is64, nn),
2830 getIRegOrZR(is64, mm)));
2831 }
2832 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
2833 nameIRegOrZR(is64, dd),
2834 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
2835 return True;
2836 }
2837
2838 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
2839 /* 31 23 20 15 14 9 4
2840 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
2841 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
2842 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
2843 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
2844 with operation
2845 Xd = Xa +/- (Wn *u/s Wm)
2846 */
2847 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
2848 Bool isU = INSN(23,23) == 1;
2849 UInt mm = INSN(20,16);
2850 Bool isAdd = INSN(15,15) == 0;
2851 UInt aa = INSN(14,10);
2852 UInt nn = INSN(9,5);
2853 UInt dd = INSN(4,0);
2854 IRTemp wN = newTemp(Ity_I32);
2855 IRTemp wM = newTemp(Ity_I32);
2856 IRTemp xA = newTemp(Ity_I64);
2857 IRTemp muld = newTemp(Ity_I64);
2858 IRTemp res = newTemp(Ity_I64);
2859 assign(wN, getIReg32orZR(nn));
2860 assign(wM, getIReg32orZR(mm));
2861 assign(xA, getIReg64orZR(aa));
2862 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
2863 mkexpr(wN), mkexpr(wM)));
2864 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
2865 mkexpr(xA), mkexpr(muld)));
2866 putIReg64orZR(dd, mkexpr(res));
2867 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
2868 nameIReg64orZR(dd), nameIReg32orZR(nn),
2869 nameIReg32orZR(mm), nameIReg64orZR(aa));
2870 return True;
2871 }
2872 vex_printf("ARM64 front end: data_processing_register\n");
2873 return False;
2874# undef INSN
2875}
2876
2877
2878/*------------------------------------------------------------*/
2879/*--- Load and Store instructions ---*/
2880/*------------------------------------------------------------*/
2881
2882/* Generate the EA for a "reg + reg" style amode. This is done from
2883 parts of the insn, but for sanity checking sake it takes the whole
2884 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
2885 and S=insn[12]:
2886
2887 The possible forms, along with their opt:S values, are:
2888 011:0 Xn|SP + Xm
2889 111:0 Xn|SP + Xm
2890 011:1 Xn|SP + Xm * transfer_szB
2891 111:1 Xn|SP + Xm * transfer_szB
2892 010:0 Xn|SP + 32Uto64(Wm)
2893 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
2894 110:0 Xn|SP + 32Sto64(Wm)
2895 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
2896
2897 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
2898 the transfer size is insn[23,31,30]. For integer loads/stores,
2899 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
2900
2901 If the decoding fails, it returns IRTemp_INVALID.
2902
2903 isInt is True iff this is decoding is for transfers to/from integer
2904 registers. If False it is for transfers to/from vector registers.
2905*/
2906static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
2907{
2908 UInt optS = SLICE_UInt(insn, 15, 12);
2909 UInt mm = SLICE_UInt(insn, 20, 16);
2910 UInt nn = SLICE_UInt(insn, 9, 5);
2911 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
2912 | SLICE_UInt(insn, 31, 30); // Log2 of the size
2913
2914 buf[0] = 0;
2915
2916 /* Sanity checks, that this really is a load/store insn. */
2917 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
2918 goto fail;
2919
2920 if (isInt
2921 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
2922 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
2923 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
2924 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
2925 goto fail;
2926
2927 if (!isInt
2928 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
2929 goto fail;
2930
2931 /* Throw out non-verified but possibly valid cases. */
2932 switch (szLg2) {
2933 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
2934 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
2935 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
2936 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
2937 case BITS3(1,0,0): // can only ever be valid for the vector case
2938 if (isInt) goto fail; else goto fail;
2939 case BITS3(1,0,1): // these sizes are never valid
2940 case BITS3(1,1,0):
2941 case BITS3(1,1,1): goto fail;
2942
2943 default: vassert(0);
2944 }
2945
2946 IRExpr* rhs = NULL;
2947 switch (optS) {
2948 case BITS4(1,1,1,0): goto fail; //ATC
2949 case BITS4(0,1,1,0):
2950 rhs = getIReg64orZR(mm);
2951 vex_sprintf(buf, "[%s, %s]",
2952 nameIReg64orZR(nn), nameIReg64orZR(mm));
2953 break;
2954 case BITS4(1,1,1,1): goto fail; //ATC
2955 case BITS4(0,1,1,1):
2956 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
2957 vex_sprintf(buf, "[%s, %s lsl %u]",
2958 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
2959 break;
2960 case BITS4(0,1,0,0):
2961 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
2962 vex_sprintf(buf, "[%s, %s uxtx]",
2963 nameIReg64orZR(nn), nameIReg32orZR(mm));
2964 break;
2965 case BITS4(0,1,0,1):
2966 rhs = binop(Iop_Shl64,
2967 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
2968 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
2969 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
2970 break;
2971 case BITS4(1,1,0,0):
2972 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
2973 vex_sprintf(buf, "[%s, %s sxtx]",
2974 nameIReg64orZR(nn), nameIReg32orZR(mm));
2975 break;
2976 case BITS4(1,1,0,1):
2977 rhs = binop(Iop_Shl64,
2978 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
2979 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
2980 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
2981 break;
2982 default:
2983 /* The rest appear to be genuinely invalid */
2984 goto fail;
2985 }
2986
2987 vassert(rhs);
2988 IRTemp res = newTemp(Ity_I64);
2989 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
2990 return res;
2991
2992 fail:
2993 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
2994 return IRTemp_INVALID;
2995}
2996
2997
2998/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
2999 bits of DATAE :: Ity_I64. */
3000static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
3001{
3002 IRExpr* addrE = mkexpr(addr);
3003 switch (szB) {
3004 case 8:
3005 storeLE(addrE, dataE);
3006 break;
3007 case 4:
3008 storeLE(addrE, unop(Iop_64to32, dataE));
3009 break;
3010 case 2:
3011 storeLE(addrE, unop(Iop_64to16, dataE));
3012 break;
3013 case 1:
3014 storeLE(addrE, unop(Iop_64to8, dataE));
3015 break;
3016 default:
3017 vassert(0);
3018 }
3019}
3020
3021
3022/* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
3023 placing the result in an Ity_I64 temporary. */
3024static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
3025{
3026 IRTemp res = newTemp(Ity_I64);
3027 IRExpr* addrE = mkexpr(addr);
3028 switch (szB) {
3029 case 8:
3030 assign(res, loadLE(Ity_I64,addrE));
3031 break;
3032 case 4:
3033 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
3034 break;
3035 case 2:
3036 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
3037 break;
3038 case 1:
3039 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
3040 break;
3041 default:
3042 vassert(0);
3043 }
3044 return res;
3045}
3046
3047
3048static
3049Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
3050{
3051# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
3052
3053 /* ------------ LDR,STR (immediate, uimm12) ----------- */
3054 /* uimm12 is scaled by the transfer size
3055
3056 31 29 26 21 9 4
3057 | | | | | |
3058 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
3059 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
3060
3061 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
3062 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
3063
3064 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
3065 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
3066
3067 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
3068 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
3069 */
3070 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
3071 UInt szLg2 = INSN(31,30);
3072 UInt szB = 1 << szLg2;
3073 Bool isLD = INSN(22,22) == 1;
3074 UInt offs = INSN(21,10) * szB;
3075 UInt nn = INSN(9,5);
3076 UInt tt = INSN(4,0);
3077 IRTemp ta = newTemp(Ity_I64);
3078 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
3079 if (nn == 31) { /* FIXME generate stack alignment check */ }
3080 vassert(szLg2 < 4);
3081 if (isLD) {
3082 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
3083 } else {
3084 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
3085 }
3086 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
3087 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
3088 DIP("%s %s, [%s, #%u]\n",
3089 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
3090 nameIReg64orSP(nn), offs);
3091 return True;
3092 }
3093
3094 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
3095 /*
3096 31 29 26 20 11 9 4
3097 | | | | | | |
3098 (at-Rn-then-Rn=EA) | | |
3099 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
3100 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
3101
3102 (at-EA-then-Rn=EA)
3103 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
3104 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
3105
3106 (at-EA)
3107 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
3108 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
3109
3110 simm9 is unscaled.
3111
3112 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
3113 load case this is because would create two competing values for
3114 Rt. In the store case the reason is unclear, but the spec
3115 disallows it anyway.
3116
3117 Stores are narrowing, loads are unsigned widening. sz encodes
3118 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
3119 */
3120 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
3121 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
3122 UInt szLg2 = INSN(31,30);
3123 UInt szB = 1 << szLg2;
3124 Bool isLoad = INSN(22,22) == 1;
3125 UInt imm9 = INSN(20,12);
3126 UInt nn = INSN(9,5);
3127 UInt tt = INSN(4,0);
3128 Bool wBack = INSN(10,10) == 1;
3129 UInt how = INSN(11,10);
3130 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
3131 /* undecodable; fall through */
3132 } else {
3133 if (nn == 31) { /* FIXME generate stack alignment check */ }
3134
3135 // Compute the transfer address TA and the writeback address WA.
3136 IRTemp tRN = newTemp(Ity_I64);
3137 assign(tRN, getIReg64orSP(nn));
3138 IRTemp tEA = newTemp(Ity_I64);
3139 Long simm9 = (Long)sx_to_64(imm9, 9);
3140 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3141
3142 IRTemp tTA = newTemp(Ity_I64);
3143 IRTemp tWA = newTemp(Ity_I64);
3144 switch (how) {
3145 case BITS2(0,1):
3146 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3147 case BITS2(1,1):
3148 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3149 case BITS2(0,0):
3150 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3151 default:
3152 vassert(0); /* NOTREACHED */
3153 }
3154
sewardje0bff8b2014-03-09 09:40:23 +00003155 /* Normally rN would be updated after the transfer. However, in
3156 the special case typifed by
3157 str x30, [sp,#-16]!
3158 it is necessary to update SP before the transfer, (1)
3159 because Memcheck will otherwise complain about a write
3160 below the stack pointer, and (2) because the segfault
3161 stack extension mechanism will otherwise extend the stack
3162 only down to SP before the instruction, which might not be
3163 far enough, if the -16 bit takes the actual access
3164 address to the next page.
3165 */
3166 Bool earlyWBack
3167 = wBack && simm9 < 0 && szB == 8
3168 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
3169
3170 if (wBack && earlyWBack)
3171 putIReg64orSP(nn, mkexpr(tEA));
3172
sewardjbbcf1882014-01-12 12:49:10 +00003173 if (isLoad) {
3174 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
3175 } else {
3176 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
3177 }
3178
sewardje0bff8b2014-03-09 09:40:23 +00003179 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00003180 putIReg64orSP(nn, mkexpr(tEA));
3181
3182 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
3183 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
3184 const HChar* fmt_str = NULL;
3185 switch (how) {
3186 case BITS2(0,1):
3187 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3188 break;
3189 case BITS2(1,1):
3190 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3191 break;
3192 case BITS2(0,0):
3193 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
3194 break;
3195 default:
3196 vassert(0);
3197 }
3198 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
3199 nameIRegOrZR(szB == 8, tt),
3200 nameIReg64orSP(nn), simm9);
3201 return True;
3202 }
3203 }
3204
3205 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
3206 /* L==1 => mm==LD
3207 L==0 => mm==ST
3208 x==0 => 32 bit transfers, and zero extended loads
3209 x==1 => 64 bit transfers
3210 simm7 is scaled by the (single-register) transfer size
3211
3212 (at-Rn-then-Rn=EA)
3213 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
3214
3215 (at-EA-then-Rn=EA)
3216 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
3217
3218 (at-EA)
3219 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
3220 */
3221
3222 UInt insn_30_23 = INSN(30,23);
3223 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
3224 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
3225 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
3226 UInt bL = INSN(22,22);
3227 UInt bX = INSN(31,31);
3228 UInt bWBack = INSN(23,23);
3229 UInt rT1 = INSN(4,0);
3230 UInt rN = INSN(9,5);
3231 UInt rT2 = INSN(14,10);
3232 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
3233 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
3234 || (bL && rT1 == rT2)) {
3235 /* undecodable; fall through */
3236 } else {
3237 if (rN == 31) { /* FIXME generate stack alignment check */ }
3238
3239 // Compute the transfer address TA and the writeback address WA.
3240 IRTemp tRN = newTemp(Ity_I64);
3241 assign(tRN, getIReg64orSP(rN));
3242 IRTemp tEA = newTemp(Ity_I64);
3243 simm7 = (bX ? 8 : 4) * simm7;
3244 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
3245
3246 IRTemp tTA = newTemp(Ity_I64);
3247 IRTemp tWA = newTemp(Ity_I64);
3248 switch (INSN(24,23)) {
3249 case BITS2(0,1):
3250 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3251 case BITS2(1,1):
3252 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3253 case BITS2(1,0):
3254 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3255 default:
3256 vassert(0); /* NOTREACHED */
3257 }
3258
3259 /* Normally rN would be updated after the transfer. However, in
3260 the special case typifed by
3261 stp x29, x30, [sp,#-112]!
3262 it is necessary to update SP before the transfer, (1)
3263 because Memcheck will otherwise complain about a write
3264 below the stack pointer, and (2) because the segfault
3265 stack extension mechanism will otherwise extend the stack
3266 only down to SP before the instruction, which might not be
3267 far enough, if the -112 bit takes the actual access
3268 address to the next page.
3269 */
3270 Bool earlyWBack
3271 = bWBack && simm7 < 0
3272 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
3273
3274 if (bWBack && earlyWBack)
3275 putIReg64orSP(rN, mkexpr(tEA));
3276
3277 /**/ if (bL == 1 && bX == 1) {
3278 // 64 bit load
3279 putIReg64orZR(rT1, loadLE(Ity_I64,
3280 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3281 putIReg64orZR(rT2, loadLE(Ity_I64,
3282 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
3283 } else if (bL == 1 && bX == 0) {
sewardjbbcf1882014-01-12 12:49:10 +00003284 // 32 bit load
3285 putIReg32orZR(rT1, loadLE(Ity_I32,
3286 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3287 putIReg32orZR(rT2, loadLE(Ity_I32,
3288 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
3289 } else if (bL == 0 && bX == 1) {
3290 // 64 bit store
3291 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3292 getIReg64orZR(rT1));
3293 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
3294 getIReg64orZR(rT2));
3295 } else {
3296 vassert(bL == 0 && bX == 0);
sewardjbbcf1882014-01-12 12:49:10 +00003297 // 32 bit store
3298 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3299 getIReg32orZR(rT1));
3300 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
3301 getIReg32orZR(rT2));
3302 }
3303
3304 if (bWBack && !earlyWBack)
3305 putIReg64orSP(rN, mkexpr(tEA));
3306
3307 const HChar* fmt_str = NULL;
3308 switch (INSN(24,23)) {
3309 case BITS2(0,1):
3310 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3311 break;
3312 case BITS2(1,1):
3313 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3314 break;
3315 case BITS2(1,0):
3316 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
3317 break;
3318 default:
3319 vassert(0);
3320 }
3321 DIP(fmt_str, bL == 0 ? "st" : "ld",
3322 nameIRegOrZR(bX == 1, rT1),
3323 nameIRegOrZR(bX == 1, rT2),
3324 nameIReg64orSP(rN), simm7);
3325 return True;
3326 }
3327 }
3328
3329 /* ---------------- LDR (literal, int reg) ---------------- */
3330 /* 31 29 23 4
3331 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
3332 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
3333 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
3334 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
3335 Just handles the first two cases for now.
3336 */
3337 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
3338 UInt imm19 = INSN(23,5);
3339 UInt rT = INSN(4,0);
3340 UInt bX = INSN(30,30);
3341 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
3342 if (bX) {
3343 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
3344 } else {
3345 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
3346 }
3347 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
3348 return True;
3349 }
3350
3351 /* -------------- {LD,ST}R (integer register) --------------- */
3352 /* 31 29 20 15 12 11 9 4
3353 | | | | | | | |
3354 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
3355 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
3356 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
3357 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
3358
3359 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
3360 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
3361 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
3362 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
3363 */
3364 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
3365 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3366 HChar dis_buf[64];
3367 UInt szLg2 = INSN(31,30);
3368 Bool isLD = INSN(22,22) == 1;
3369 UInt tt = INSN(4,0);
3370 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
3371 if (ea != IRTemp_INVALID) {
3372 switch (szLg2) {
3373 case 3: /* 64 bit */
3374 if (isLD) {
3375 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
3376 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
3377 } else {
3378 storeLE(mkexpr(ea), getIReg64orZR(tt));
3379 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
3380 }
3381 break;
3382 case 2: /* 32 bit */
3383 if (isLD) {
3384 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
3385 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
3386 } else {
3387 storeLE(mkexpr(ea), getIReg32orZR(tt));
3388 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
3389 }
3390 break;
3391 case 1: /* 16 bit */
3392 if (isLD) {
3393 putIReg64orZR(tt, unop(Iop_16Uto64,
3394 loadLE(Ity_I16, mkexpr(ea))));
3395 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3396 } else {
3397 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
3398 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3399 }
3400 break;
3401 case 0: /* 8 bit */
3402 if (isLD) {
3403 putIReg64orZR(tt, unop(Iop_8Uto64,
3404 loadLE(Ity_I8, mkexpr(ea))));
3405 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
3406 } else {
3407 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
3408 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
3409 }
3410 break;
3411 default:
3412 vassert(0);
3413 }
3414 return True;
3415 }
3416 }
3417
3418 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
3419 /* 31 29 26 23 21 9 4
3420 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
3421 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
3422 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
3423 where
3424 Rt is Wt when x==1, Xt when x==0
3425 */
3426 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
3427 /* Further checks on bits 31:30 and 22 */
3428 Bool valid = False;
3429 switch ((INSN(31,30) << 1) | INSN(22,22)) {
3430 case BITS3(1,0,0):
3431 case BITS3(0,1,0): case BITS3(0,1,1):
3432 case BITS3(0,0,0): case BITS3(0,0,1):
3433 valid = True;
3434 break;
3435 }
3436 if (valid) {
3437 UInt szLg2 = INSN(31,30);
3438 UInt bitX = INSN(22,22);
3439 UInt imm12 = INSN(21,10);
3440 UInt nn = INSN(9,5);
3441 UInt tt = INSN(4,0);
3442 UInt szB = 1 << szLg2;
3443 IRExpr* ea = binop(Iop_Add64,
3444 getIReg64orSP(nn), mkU64(imm12 * szB));
3445 switch (szB) {
3446 case 4:
3447 vassert(bitX == 0);
3448 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
3449 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
3450 nameIReg64orSP(nn), imm12 * szB);
3451 break;
3452 case 2:
3453 if (bitX == 1) {
3454 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
3455 } else {
3456 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
3457 }
3458 DIP("ldrsh %s, [%s, #%u]\n",
3459 nameIRegOrZR(bitX == 0, tt),
3460 nameIReg64orSP(nn), imm12 * szB);
3461 break;
3462 case 1:
3463 if (bitX == 1) {
3464 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
3465 } else {
3466 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
3467 }
3468 DIP("ldrsb %s, [%s, #%u]\n",
3469 nameIRegOrZR(bitX == 0, tt),
3470 nameIReg64orSP(nn), imm12 * szB);
3471 break;
3472 default:
3473 vassert(0);
3474 }
3475 return True;
3476 }
3477 /* else fall through */
3478 }
3479
3480 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
3481 /* (at-Rn-then-Rn=EA)
3482 31 29 23 21 20 11 9 4
3483 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
3484 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
3485 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
3486
3487 (at-EA-then-Rn=EA)
3488 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
3489 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
3490 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
3491 where
3492 Rt is Wt when x==1, Xt when x==0
3493 transfer-at-Rn when [11]==0, at EA when [11]==1
3494 */
3495 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3496 && INSN(21,21) == 0 && INSN(10,10) == 1) {
3497 /* Further checks on bits 31:30 and 22 */
3498 Bool valid = False;
3499 switch ((INSN(31,30) << 1) | INSN(22,22)) {
3500 case BITS3(1,0,0): // LDRSW Xt
3501 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
3502 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
3503 valid = True;
3504 break;
3505 }
3506 if (valid) {
3507 UInt szLg2 = INSN(31,30);
3508 UInt imm9 = INSN(20,12);
3509 Bool atRN = INSN(11,11) == 0;
3510 UInt nn = INSN(9,5);
3511 UInt tt = INSN(4,0);
3512 IRTemp tRN = newTemp(Ity_I64);
3513 IRTemp tEA = newTemp(Ity_I64);
3514 IRTemp tTA = IRTemp_INVALID;
3515 ULong simm9 = sx_to_64(imm9, 9);
3516 Bool is64 = INSN(22,22) == 0;
3517 assign(tRN, getIReg64orSP(nn));
3518 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3519 tTA = atRN ? tRN : tEA;
3520 HChar ch = '?';
3521 /* There are 5 cases:
3522 byte load, SX to 64
3523 byte load, SX to 32, ZX to 64
3524 halfword load, SX to 64
3525 halfword load, SX to 32, ZX to 64
3526 word load, SX to 64
3527 The ifs below handle them in the listed order.
3528 */
3529 if (szLg2 == 0) {
3530 ch = 'b';
3531 if (is64) {
3532 putIReg64orZR(tt, unop(Iop_8Sto64,
3533 loadLE(Ity_I8, mkexpr(tTA))));
3534 } else {
3535 putIReg32orZR(tt, unop(Iop_8Sto32,
3536 loadLE(Ity_I8, mkexpr(tTA))));
3537 }
3538 }
3539 else if (szLg2 == 1) {
3540 ch = 'h';
3541 if (is64) {
3542 putIReg64orZR(tt, unop(Iop_16Sto64,
3543 loadLE(Ity_I16, mkexpr(tTA))));
3544 } else {
3545 putIReg32orZR(tt, unop(Iop_16Sto32,
3546 loadLE(Ity_I16, mkexpr(tTA))));
3547 }
3548 }
3549 else if (szLg2 == 2 && is64) {
3550 ch = 'w';
3551 putIReg64orZR(tt, unop(Iop_32Sto64,
3552 loadLE(Ity_I32, mkexpr(tTA))));
3553 }
3554 else {
3555 vassert(0);
3556 }
3557 putIReg64orSP(nn, mkexpr(tEA));
3558 DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
3559 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
3560 return True;
3561 }
3562 /* else fall through */
3563 }
3564
3565 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
3566 /* 31 29 23 21 20 11 9 4
3567 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
3568 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
3569 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
3570 where
3571 Rt is Wt when x==1, Xt when x==0
3572 */
3573 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3574 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
3575 /* Further checks on bits 31:30 and 22 */
3576 Bool valid = False;
3577 switch ((INSN(31,30) << 1) | INSN(22,22)) {
3578 case BITS3(1,0,0): // LDURSW Xt
3579 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
3580 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
3581 valid = True;
3582 break;
3583 }
3584 if (valid) {
3585 UInt szLg2 = INSN(31,30);
3586 UInt imm9 = INSN(20,12);
3587 UInt nn = INSN(9,5);
3588 UInt tt = INSN(4,0);
3589 IRTemp tRN = newTemp(Ity_I64);
3590 IRTemp tEA = newTemp(Ity_I64);
3591 ULong simm9 = sx_to_64(imm9, 9);
3592 Bool is64 = INSN(22,22) == 0;
3593 assign(tRN, getIReg64orSP(nn));
3594 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3595 HChar ch = '?';
3596 /* There are 5 cases:
3597 byte load, SX to 64
3598 byte load, SX to 32, ZX to 64
3599 halfword load, SX to 64
3600 halfword load, SX to 32, ZX to 64
3601 word load, SX to 64
3602 The ifs below handle them in the listed order.
3603 */
3604 if (szLg2 == 0) {
3605 ch = 'b';
3606 if (is64) {
3607 putIReg64orZR(tt, unop(Iop_8Sto64,
3608 loadLE(Ity_I8, mkexpr(tEA))));
3609 } else {
3610 putIReg32orZR(tt, unop(Iop_8Sto32,
3611 loadLE(Ity_I8, mkexpr(tEA))));
3612 }
3613 }
3614 else if (szLg2 == 1) {
3615 ch = 'h';
3616 if (is64) {
3617 putIReg64orZR(tt, unop(Iop_16Sto64,
3618 loadLE(Ity_I16, mkexpr(tEA))));
3619 } else {
3620 putIReg32orZR(tt, unop(Iop_16Sto32,
3621 loadLE(Ity_I16, mkexpr(tEA))));
3622 }
3623 }
3624 else if (szLg2 == 2 && is64) {
3625 ch = 'w';
3626 putIReg64orZR(tt, unop(Iop_32Sto64,
3627 loadLE(Ity_I32, mkexpr(tEA))));
3628 }
3629 else {
3630 vassert(0);
3631 }
3632 DIP("ldurs%c %s, [%s, #%lld]",
3633 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
3634 return True;
3635 }
3636 /* else fall through */
3637 }
3638
3639 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
3640 /* L==1 => mm==LD
3641 L==0 => mm==ST
3642 sz==00 => 32 bit (S) transfers
3643 sz==01 => 64 bit (D) transfers
3644 sz==10 => 128 bit (Q) transfers
3645 sz==11 isn't allowed
3646 simm7 is scaled by the (single-register) transfer size
3647
3648 31 29 22 21 14 9 4
3649 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
3650 (at-Rn-then-Rn=EA)
3651
3652 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
3653 (at-EA-then-Rn=EA)
3654
3655 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
3656 (at-EA)
3657 */
3658
3659 UInt insn_29_23 = INSN(29,23);
3660 if (insn_29_23 == BITS7(1,0,1,1,0,0,1)
3661 || insn_29_23 == BITS7(1,0,1,1,0,1,1)
3662 || insn_29_23 == BITS7(1,0,1,1,0,1,0)) {
3663 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
3664 Bool isLD = INSN(22,22) == 1;
3665 Bool wBack = INSN(23,23) == 1;
3666 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
3667 UInt tt2 = INSN(14,10);
3668 UInt nn = INSN(9,5);
3669 UInt tt1 = INSN(4,0);
3670 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
3671 /* undecodable; fall through */
3672 } else {
3673 if (nn == 31) { /* FIXME generate stack alignment check */ }
3674
3675 // Compute the transfer address TA and the writeback address WA.
3676 UInt szB = 4 << szSlg2; /* szB is the per-register size */
3677 IRTemp tRN = newTemp(Ity_I64);
3678 assign(tRN, getIReg64orSP(nn));
3679 IRTemp tEA = newTemp(Ity_I64);
3680 simm7 = szB * simm7;
3681 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
3682
3683 IRTemp tTA = newTemp(Ity_I64);
3684 IRTemp tWA = newTemp(Ity_I64);
3685 switch (INSN(24,23)) {
3686 case BITS2(0,1):
3687 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3688 case BITS2(1,1):
3689 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3690 case BITS2(1,0):
3691 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3692 default:
3693 vassert(0); /* NOTREACHED */
3694 }
3695
3696 IRType ty = Ity_INVALID;
3697 switch (szB) {
3698 case 4: ty = Ity_F32; break;
3699 case 8: ty = Ity_F64; break;
3700 case 16: ty = Ity_V128; break;
3701 default: vassert(0);
3702 }
3703
sewardje0bff8b2014-03-09 09:40:23 +00003704 /* Normally rN would be updated after the transfer. However, in
3705 the special case typifed by
3706 stp q0, q1, [sp,#-512]!
3707 it is necessary to update SP before the transfer, (1)
3708 because Memcheck will otherwise complain about a write
3709 below the stack pointer, and (2) because the segfault
3710 stack extension mechanism will otherwise extend the stack
3711 only down to SP before the instruction, which might not be
3712 far enough, if the -512 bit takes the actual access
3713 address to the next page.
3714 */
3715 Bool earlyWBack
3716 = wBack && simm7 < 0 && szB == 16
3717 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
3718
3719 if (wBack && earlyWBack)
3720 putIReg64orSP(nn, mkexpr(tEA));
3721
sewardjbbcf1882014-01-12 12:49:10 +00003722 if (isLD) {
sewardj5ba41302014-03-03 08:42:16 +00003723 if (szB < 16) {
3724 putQReg128(tt1, mkV128(0x0000));
3725 }
sewardj606c4ba2014-01-26 19:11:14 +00003726 putQRegLO(tt1,
3727 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
sewardj5ba41302014-03-03 08:42:16 +00003728 if (szB < 16) {
3729 putQReg128(tt2, mkV128(0x0000));
3730 }
sewardj606c4ba2014-01-26 19:11:14 +00003731 putQRegLO(tt2,
3732 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
sewardjbbcf1882014-01-12 12:49:10 +00003733 } else {
3734 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
sewardj606c4ba2014-01-26 19:11:14 +00003735 getQRegLO(tt1, ty));
sewardjbbcf1882014-01-12 12:49:10 +00003736 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
sewardj606c4ba2014-01-26 19:11:14 +00003737 getQRegLO(tt2, ty));
sewardjbbcf1882014-01-12 12:49:10 +00003738 }
3739
sewardje0bff8b2014-03-09 09:40:23 +00003740 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00003741 putIReg64orSP(nn, mkexpr(tEA));
3742
3743 const HChar* fmt_str = NULL;
3744 switch (INSN(24,23)) {
3745 case BITS2(0,1):
3746 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3747 break;
3748 case BITS2(1,1):
3749 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3750 break;
3751 case BITS2(1,0):
3752 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
3753 break;
3754 default:
3755 vassert(0);
3756 }
3757 DIP(fmt_str, isLD ? "ld" : "st",
sewardj606c4ba2014-01-26 19:11:14 +00003758 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
sewardjbbcf1882014-01-12 12:49:10 +00003759 nameIReg64orSP(nn), simm7);
3760 return True;
3761 }
3762 }
3763
3764 /* -------------- {LD,ST}R (vector register) --------------- */
3765 /* 31 29 23 20 15 12 11 9 4
3766 | | | | | | | | |
3767 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
3768 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
3769 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
3770 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
3771 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
3772
3773 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
3774 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
3775 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
3776 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
3777 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
3778 */
3779 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
3780 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3781 HChar dis_buf[64];
3782 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
3783 Bool isLD = INSN(22,22) == 1;
3784 UInt tt = INSN(4,0);
3785 if (szLg2 >= 4) goto after_LDR_STR_vector_register;
3786 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
3787 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
3788 switch (szLg2) {
3789 case 0: /* 8 bit */
3790 if (isLD) {
3791 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00003792 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
3793 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003794 } else {
3795 vassert(0); //ATC
sewardj606c4ba2014-01-26 19:11:14 +00003796 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
3797 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003798 }
3799 break;
3800 case 1:
3801 if (isLD) {
3802 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00003803 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
3804 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003805 } else {
3806 vassert(0); //ATC
sewardj606c4ba2014-01-26 19:11:14 +00003807 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
3808 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003809 }
3810 break;
3811 case 2: /* 32 bit */
3812 if (isLD) {
3813 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00003814 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
3815 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003816 } else {
sewardj606c4ba2014-01-26 19:11:14 +00003817 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
3818 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003819 }
3820 break;
3821 case 3: /* 64 bit */
3822 if (isLD) {
3823 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00003824 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
3825 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003826 } else {
sewardj606c4ba2014-01-26 19:11:14 +00003827 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
3828 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003829 }
3830 break;
3831 case 4: return False; //ATC
3832 default: vassert(0);
3833 }
3834 return True;
3835 }
3836 after_LDR_STR_vector_register:
3837
3838 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
3839 /* 31 29 22 20 15 12 11 9 4
3840 | | | | | | | | |
3841 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
3842
3843 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
3844 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
3845
3846 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
3847 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
3848 */
3849 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3850 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3851 HChar dis_buf[64];
3852 UInt szLg2 = INSN(31,30);
3853 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
3854 UInt tt = INSN(4,0);
3855 if (szLg2 == 3) goto after_LDRS_integer_register;
3856 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
3857 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
3858 /* Enumerate the 5 variants explicitly. */
3859 if (szLg2 == 2/*32 bit*/ && sxTo64) {
3860 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
3861 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
3862 return True;
3863 }
3864 else
3865 if (szLg2 == 1/*16 bit*/) {
3866 if (sxTo64) {
3867 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
3868 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
3869 } else {
3870 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
3871 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3872 }
3873 return True;
3874 }
3875 else
3876 if (szLg2 == 0/*8 bit*/) {
3877 if (sxTo64) {
3878 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
3879 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
3880 } else {
3881 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
3882 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
3883 }
3884 return True;
3885 }
3886 /* else it's an invalid combination */
3887 }
3888 after_LDRS_integer_register:
3889
3890 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
3891 /* This is the Unsigned offset variant only. The Post-Index and
3892 Pre-Index variants are below.
3893
3894 31 29 23 21 9 4
3895 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
3896 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
3897 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
3898 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
3899 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
3900
3901 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
3902 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
3903 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
3904 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
3905 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
3906 */
3907 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
3908 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
3909 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
3910 Bool isLD = INSN(22,22) == 1;
3911 UInt pimm12 = INSN(21,10) << szLg2;
3912 UInt nn = INSN(9,5);
3913 UInt tt = INSN(4,0);
3914 IRTemp tEA = newTemp(Ity_I64);
3915 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
3916 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
3917 if (isLD) {
3918 if (szLg2 < 4) {
3919 putQReg128(tt, mkV128(0x0000));
3920 }
sewardj606c4ba2014-01-26 19:11:14 +00003921 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00003922 } else {
sewardj606c4ba2014-01-26 19:11:14 +00003923 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00003924 }
3925 DIP("%s %s, [%s, #%u]\n",
3926 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00003927 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
sewardjbbcf1882014-01-12 12:49:10 +00003928 return True;
3929 }
3930
3931 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
3932 /* These are the Post-Index and Pre-Index variants.
3933
3934 31 29 23 20 11 9 4
3935 (at-Rn-then-Rn=EA)
3936 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
3937 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
3938 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
3939 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
3940 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
3941
3942 (at-EA-then-Rn=EA)
3943 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
3944 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
3945 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
3946 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
3947 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
3948
3949 Stores are the same except with bit 22 set to 0.
3950 */
3951 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
3952 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
3953 && INSN(21,21) == 0 && INSN(10,10) == 1) {
3954 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
3955 Bool isLD = INSN(22,22) == 1;
3956 UInt imm9 = INSN(20,12);
3957 Bool atRN = INSN(11,11) == 0;
3958 UInt nn = INSN(9,5);
3959 UInt tt = INSN(4,0);
3960 IRTemp tRN = newTemp(Ity_I64);
3961 IRTemp tEA = newTemp(Ity_I64);
3962 IRTemp tTA = IRTemp_INVALID;
3963 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
3964 ULong simm9 = sx_to_64(imm9, 9);
3965 assign(tRN, getIReg64orSP(nn));
3966 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3967 tTA = atRN ? tRN : tEA;
3968 if (isLD) {
3969 if (szLg2 < 4) {
3970 putQReg128(tt, mkV128(0x0000));
3971 }
sewardj606c4ba2014-01-26 19:11:14 +00003972 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
sewardjbbcf1882014-01-12 12:49:10 +00003973 } else {
sewardj606c4ba2014-01-26 19:11:14 +00003974 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00003975 }
3976 putIReg64orSP(nn, mkexpr(tEA));
3977 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
3978 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00003979 nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
sewardjbbcf1882014-01-12 12:49:10 +00003980 return True;
3981 }
3982
3983 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
3984 /* 31 29 23 20 11 9 4
3985 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
3986 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
3987 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
3988 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
3989 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
3990
3991 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
3992 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
3993 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
3994 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
3995 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
3996 */
3997 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
3998 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
3999 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
4000 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
4001 Bool isLD = INSN(22,22) == 1;
4002 UInt imm9 = INSN(20,12);
4003 UInt nn = INSN(9,5);
4004 UInt tt = INSN(4,0);
4005 ULong simm9 = sx_to_64(imm9, 9);
4006 IRTemp tEA = newTemp(Ity_I64);
4007 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
4008 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
4009 if (isLD) {
sewardj606c4ba2014-01-26 19:11:14 +00004010 if (szLg2 < 4) {
4011 putQReg128(tt, mkV128(0x0000));
4012 }
4013 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00004014 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004015 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004016 }
4017 DIP("%s %s, [%s, #%lld]\n",
4018 isLD ? "ldur" : "stur",
sewardj606c4ba2014-01-26 19:11:14 +00004019 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
sewardjbbcf1882014-01-12 12:49:10 +00004020 return True;
4021 }
4022
4023 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
4024 /* 31 29 23 4
4025 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
4026 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
4027 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
4028 */
4029 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
4030 UInt szB = 4 << INSN(31,30);
4031 UInt imm19 = INSN(23,5);
4032 UInt tt = INSN(4,0);
4033 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
4034 IRType ty = preferredVectorSubTypeFromSize(szB);
sewardj606c4ba2014-01-26 19:11:14 +00004035 putQReg128(tt, mkV128(0x0000));
4036 putQRegLO(tt, loadLE(ty, mkU64(ea)));
4037 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
sewardjbbcf1882014-01-12 12:49:10 +00004038 return True;
4039 }
4040
sewardj606c4ba2014-01-26 19:11:14 +00004041 /* ---------- LD1/ST1 (single structure, no offset) ---------- */
sewardjbbcf1882014-01-12 12:49:10 +00004042 /* 31 23
sewardj606c4ba2014-01-26 19:11:14 +00004043 0100 1100 0100 0000 0111 11 N T LD1 {vT.2d}, [Xn|SP]
4044 0100 1100 0000 0000 0111 11 N T ST1 {vT.2d}, [Xn|SP]
4045 0100 1100 0100 0000 0111 10 N T LD1 {vT.4s}, [Xn|SP]
4046 0100 1100 0000 0000 0111 10 N T ST1 {vT.4s}, [Xn|SP]
4047 0100 1100 0100 0000 0111 01 N T LD1 {vT.8h}, [Xn|SP]
4048 0100 1100 0000 0000 0111 01 N T ST1 {vT.8h}, [Xn|SP]
sewardjbbcf1882014-01-12 12:49:10 +00004049 0100 1100 0100 0000 0111 00 N T LD1 {vT.16b}, [Xn|SP]
4050 0100 1100 0000 0000 0111 00 N T ST1 {vT.16b}, [Xn|SP]
sewardj606c4ba2014-01-26 19:11:14 +00004051 FIXME does this assume that the host is little endian?
sewardjbbcf1882014-01-12 12:49:10 +00004052 */
sewardj606c4ba2014-01-26 19:11:14 +00004053 if ( (insn & 0xFFFFF000) == 0x4C407000 // LD1 cases
4054 || (insn & 0xFFFFF000) == 0x4C007000 // ST1 cases
sewardjbbcf1882014-01-12 12:49:10 +00004055 ) {
4056 Bool isLD = INSN(22,22) == 1;
4057 UInt rN = INSN(9,5);
4058 UInt vT = INSN(4,0);
4059 IRTemp tEA = newTemp(Ity_I64);
sewardj606c4ba2014-01-26 19:11:14 +00004060 const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4061 const HChar* name = names[INSN(11,10)];
sewardjbbcf1882014-01-12 12:49:10 +00004062 assign(tEA, getIReg64orSP(rN));
4063 if (rN == 31) { /* FIXME generate stack alignment check */ }
4064 if (isLD) {
4065 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4066 } else {
4067 storeLE(mkexpr(tEA), getQReg128(vT));
4068 }
4069 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
sewardj606c4ba2014-01-26 19:11:14 +00004070 vT, name, nameIReg64orSP(rN));
sewardjbbcf1882014-01-12 12:49:10 +00004071 return True;
4072 }
4073
sewardj606c4ba2014-01-26 19:11:14 +00004074 /* 31 23
4075 0000 1100 0100 0000 0111 11 N T LD1 {vT.1d}, [Xn|SP]
4076 0000 1100 0000 0000 0111 11 N T ST1 {vT.1d}, [Xn|SP]
4077 0000 1100 0100 0000 0111 10 N T LD1 {vT.2s}, [Xn|SP]
4078 0000 1100 0000 0000 0111 10 N T ST1 {vT.2s}, [Xn|SP]
4079 0000 1100 0100 0000 0111 01 N T LD1 {vT.4h}, [Xn|SP]
4080 0000 1100 0000 0000 0111 01 N T ST1 {vT.4h}, [Xn|SP]
4081 0000 1100 0100 0000 0111 00 N T LD1 {vT.8b}, [Xn|SP]
4082 0000 1100 0000 0000 0111 00 N T ST1 {vT.8b}, [Xn|SP]
4083 FIXME does this assume that the host is little endian?
4084 */
4085 if ( (insn & 0xFFFFF000) == 0x0C407000 // LD1 cases
4086 || (insn & 0xFFFFF000) == 0x0C007000 // ST1 cases
4087 ) {
4088 Bool isLD = INSN(22,22) == 1;
4089 UInt rN = INSN(9,5);
4090 UInt vT = INSN(4,0);
4091 IRTemp tEA = newTemp(Ity_I64);
4092 const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4093 const HChar* name = names[INSN(11,10)];
4094 assign(tEA, getIReg64orSP(rN));
4095 if (rN == 31) { /* FIXME generate stack alignment check */ }
4096 if (isLD) {
4097 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4098 putQRegLane(vT, 1, mkU64(0));
4099 } else {
4100 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4101 }
4102 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
4103 vT, name, nameIReg64orSP(rN));
4104 return True;
4105 }
4106
4107 /* ---------- LD1/ST1 (single structure, post index) ---------- */
4108 /* 31 23
sewardj7d009132014-02-20 17:43:38 +00004109 0100 1100 1001 1111 0111 11 N T ST1 {vT.2d}, [xN|SP], #16
4110 0100 1100 1101 1111 0111 11 N T LD1 {vT.2d}, [xN|SP], #16
4111 0100 1100 1001 1111 0111 10 N T ST1 {vT.4s}, [xN|SP], #16
4112 0100 1100 1101 1111 0111 10 N T LD1 {vT.4s}, [xN|SP], #16
4113 0100 1100 1001 1111 0111 01 N T ST1 {vT.8h}, [xN|SP], #16
4114 0100 1100 1101 1111 0111 01 N T LD1 {vT.8h}, [xN|SP], #16
4115 0100 1100 1001 1111 0111 00 N T ST1 {vT.16b}, [xN|SP], #16
sewardjf5b08912014-02-06 12:57:58 +00004116 0100 1100 1101 1111 0111 00 N T LD1 {vT.16b}, [xN|SP], #16
sewardj606c4ba2014-01-26 19:11:14 +00004117 Note that #16 is implied and cannot be any other value.
4118 FIXME does this assume that the host is little endian?
4119 */
sewardj7d009132014-02-20 17:43:38 +00004120 if ( (insn & 0xFFFFF000) == 0x4CDF7000 // LD1 cases
4121 || (insn & 0xFFFFF000) == 0x4C9F7000 // ST1 cases
sewardj606c4ba2014-01-26 19:11:14 +00004122 ) {
4123 Bool isLD = INSN(22,22) == 1;
4124 UInt rN = INSN(9,5);
4125 UInt vT = INSN(4,0);
4126 IRTemp tEA = newTemp(Ity_I64);
4127 const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4128 const HChar* name = names[INSN(11,10)];
4129 assign(tEA, getIReg64orSP(rN));
4130 if (rN == 31) { /* FIXME generate stack alignment check */ }
4131 if (isLD) {
4132 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4133 } else {
4134 storeLE(mkexpr(tEA), getQReg128(vT));
4135 }
4136 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(16)));
4137 DIP("%s {v%u.%s}, [%s], #16\n", isLD ? "ld1" : "st1",
4138 vT, name, nameIReg64orSP(rN));
4139 return True;
4140 }
4141
sewardj950ca7a2014-04-03 23:03:32 +00004142 /* 31 23
4143 0000 1100 1001 1111 0111 11 N T ST1 {vT.1d}, [xN|SP], #8
4144 0000 1100 1101 1111 0111 11 N T LD1 {vT.1d}, [xN|SP], #8
sewardj606c4ba2014-01-26 19:11:14 +00004145 0000 1100 1001 1111 0111 10 N T ST1 {vT.2s}, [xN|SP], #8
sewardj950ca7a2014-04-03 23:03:32 +00004146 0000 1100 1101 1111 0111 10 N T LD1 {vT.2s}, [xN|SP], #8
sewardjf5b08912014-02-06 12:57:58 +00004147 0000 1100 1001 1111 0111 01 N T ST1 {vT.4h}, [xN|SP], #8
sewardj950ca7a2014-04-03 23:03:32 +00004148 0000 1100 1101 1111 0111 01 N T LD1 {vT.4h}, [xN|SP], #8
4149 0000 1100 1001 1111 0111 00 N T ST1 {vT.8b}, [xN|SP], #8
4150 0000 1100 1101 1111 0111 00 N T LD1 {vT.8b}, [xN|SP], #8
sewardj606c4ba2014-01-26 19:11:14 +00004151 Note that #8 is implied and cannot be any other value.
4152 FIXME does this assume that the host is little endian?
4153 */
sewardj950ca7a2014-04-03 23:03:32 +00004154 if ( (insn & 0xFFFFF000) == 0x0CDF7000 // LD1 cases
4155 || (insn & 0xFFFFF000) == 0x0C9F7000 // ST1 cases
sewardj606c4ba2014-01-26 19:11:14 +00004156 ) {
sewardj950ca7a2014-04-03 23:03:32 +00004157 Bool isLD = INSN(22,22) == 1;
sewardj606c4ba2014-01-26 19:11:14 +00004158 UInt rN = INSN(9,5);
4159 UInt vT = INSN(4,0);
4160 IRTemp tEA = newTemp(Ity_I64);
4161 const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4162 const HChar* name = names[INSN(11,10)];
4163 assign(tEA, getIReg64orSP(rN));
4164 if (rN == 31) { /* FIXME generate stack alignment check */ }
sewardj950ca7a2014-04-03 23:03:32 +00004165 if (isLD) {
4166 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4167 putQRegLane(vT, 1, mkU64(0));
4168 } else {
4169 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4170 }
sewardj606c4ba2014-01-26 19:11:14 +00004171 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(8)));
sewardj950ca7a2014-04-03 23:03:32 +00004172 DIP("%s {v%u.%s}, [%s], #8\n", isLD ? "ld1" : "st1",
4173 vT, name, nameIReg64orSP(rN));
4174 return True;
4175 }
4176
4177 /* ---------- LD2/ST2 (multiple structures, post index) ---------- */
4178 /* Only a very few cases. */
4179 /* 31 23 11 9 4
4180 0100 1100 1101 1111 1000 11 n t LD2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4181 0100 1100 1001 1111 1000 11 n t ST2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4182 0100 1100 1101 1111 1000 10 n t LD2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4183 0100 1100 1001 1111 1000 10 n t ST2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4184 */
4185 if ( (insn & 0xFFFFFC00) == 0x4CDF8C00 // LD2 .2d
4186 || (insn & 0xFFFFFC00) == 0x4C9F8C00 // ST2 .2d
4187 || (insn & 0xFFFFFC00) == 0x4CDF8800 // LD2 .4s
4188 || (insn & 0xFFFFFC00) == 0x4C9F8800 // ST2 .4s
4189 ) {
4190 Bool isLD = INSN(22,22) == 1;
4191 UInt rN = INSN(9,5);
4192 UInt vT = INSN(4,0);
4193 IRTemp tEA = newTemp(Ity_I64);
4194 UInt sz = INSN(11,10);
4195 const HChar* name = "??";
4196 assign(tEA, getIReg64orSP(rN));
4197 if (rN == 31) { /* FIXME generate stack alignment check */ }
4198 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4199 IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8));
4200 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4201 IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24));
4202 if (sz == BITS2(1,1)) {
4203 name = "2d";
4204 if (isLD) {
4205 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0));
4206 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_16));
4207 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8));
4208 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_24));
4209 } else {
4210 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64));
4211 storeLE(tEA_16, getQRegLane((vT+0) % 32, 1, Ity_I64));
4212 storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64));
4213 storeLE(tEA_24, getQRegLane((vT+1) % 32, 1, Ity_I64));
4214 }
4215 }
4216 else if (sz == BITS2(1,0)) {
4217 /* Uh, this is ugly. TODO: better. */
4218 name = "4s";
4219 IRExpr* tEA_4 = binop(Iop_Add64, mkexpr(tEA), mkU64(4));
4220 IRExpr* tEA_12 = binop(Iop_Add64, mkexpr(tEA), mkU64(12));
4221 IRExpr* tEA_20 = binop(Iop_Add64, mkexpr(tEA), mkU64(20));
4222 IRExpr* tEA_28 = binop(Iop_Add64, mkexpr(tEA), mkU64(28));
4223 if (isLD) {
4224 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I32, tEA_0));
4225 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I32, tEA_8));
4226 putQRegLane((vT+0) % 32, 2, loadLE(Ity_I32, tEA_16));
4227 putQRegLane((vT+0) % 32, 3, loadLE(Ity_I32, tEA_24));
4228 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I32, tEA_4));
4229 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I32, tEA_12));
4230 putQRegLane((vT+1) % 32, 2, loadLE(Ity_I32, tEA_20));
4231 putQRegLane((vT+1) % 32, 3, loadLE(Ity_I32, tEA_28));
4232 } else {
4233 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I32));
4234 storeLE(tEA_8, getQRegLane((vT+0) % 32, 1, Ity_I32));
4235 storeLE(tEA_16, getQRegLane((vT+0) % 32, 2, Ity_I32));
4236 storeLE(tEA_24, getQRegLane((vT+0) % 32, 3, Ity_I32));
4237 storeLE(tEA_4, getQRegLane((vT+1) % 32, 0, Ity_I32));
4238 storeLE(tEA_12, getQRegLane((vT+1) % 32, 1, Ity_I32));
4239 storeLE(tEA_20, getQRegLane((vT+1) % 32, 2, Ity_I32));
4240 storeLE(tEA_28, getQRegLane((vT+1) % 32, 3, Ity_I32));
4241 }
4242 }
4243 else {
4244 vassert(0); // Can't happen.
4245 }
4246 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32)));
4247 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld2" : "st2",
4248 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
4249 return True;
4250 }
4251
4252 /* ---------- LD1/ST1 (multiple structures, no offset) ---------- */
4253 /* Only a very few cases. */
4254 /* 31 23
4255 0100 1100 0100 0000 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4256 0100 1100 0000 0000 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4257 */
4258 if ( (insn & 0xFFFFFC00) == 0x4C40A000 // LD1
4259 || (insn & 0xFFFFFC00) == 0x4C00A000 // ST1
4260 ) {
4261 Bool isLD = INSN(22,22) == 1;
4262 UInt rN = INSN(9,5);
4263 UInt vT = INSN(4,0);
4264 IRTemp tEA = newTemp(Ity_I64);
4265 const HChar* name = "16b";
4266 assign(tEA, getIReg64orSP(rN));
4267 if (rN == 31) { /* FIXME generate stack alignment check */ }
4268 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4269 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4270 if (isLD) {
4271 putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
4272 putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
4273 } else {
4274 storeLE(tEA_0, getQReg128((vT+0) % 32));
4275 storeLE(tEA_16, getQReg128((vT+1) % 32));
4276 }
4277 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld1" : "st1",
4278 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
sewardj606c4ba2014-01-26 19:11:14 +00004279 return True;
4280 }
4281
sewardj7d009132014-02-20 17:43:38 +00004282 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
4283 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
4284 /* 31 29 23 20 14 9 4
4285 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
4286 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
4287 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
4288 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
sewardjbbcf1882014-01-12 12:49:10 +00004289 */
sewardj7d009132014-02-20 17:43:38 +00004290 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
4291 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
4292 && INSN(14,10) == BITS5(1,1,1,1,1)) {
sewardjdc9259c2014-02-27 11:10:19 +00004293 UInt szBlg2 = INSN(31,30);
4294 Bool isLD = INSN(22,22) == 1;
4295 Bool isAcqOrRel = INSN(15,15) == 1;
4296 UInt ss = INSN(20,16);
4297 UInt nn = INSN(9,5);
4298 UInt tt = INSN(4,0);
sewardjbbcf1882014-01-12 12:49:10 +00004299
sewardjdc9259c2014-02-27 11:10:19 +00004300 vassert(szBlg2 < 4);
4301 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
4302 IRType ty = integerIRTypeOfSize(szB);
4303 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
sewardj7d009132014-02-20 17:43:38 +00004304
sewardjdc9259c2014-02-27 11:10:19 +00004305 IRTemp ea = newTemp(Ity_I64);
4306 assign(ea, getIReg64orSP(nn));
4307 /* FIXME generate check that ea is szB-aligned */
sewardj7d009132014-02-20 17:43:38 +00004308
sewardjdc9259c2014-02-27 11:10:19 +00004309 if (isLD && ss == BITS5(1,1,1,1,1)) {
4310 IRTemp res = newTemp(ty);
4311 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
4312 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
4313 if (isAcqOrRel) {
4314 stmt(IRStmt_MBE(Imbe_Fence));
4315 }
4316 DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
4317 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4318 return True;
4319 }
4320 if (!isLD) {
4321 if (isAcqOrRel) {
4322 stmt(IRStmt_MBE(Imbe_Fence));
4323 }
4324 IRTemp res = newTemp(Ity_I1);
4325 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
4326 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
4327 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
4328 Need to set rS to 1 on failure, 0 on success. */
4329 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
4330 mkU64(1)));
4331 DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
4332 nameIRegOrZR(False, ss),
4333 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4334 return True;
4335 }
4336 /* else fall through */
4337 }
4338
4339 /* ------------------ LDA{R,RH,RB} ------------------ */
4340 /* ------------------ STL{R,RH,RB} ------------------ */
4341 /* 31 29 23 20 14 9 4
4342 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
4343 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
4344 */
4345 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
4346 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
4347 UInt szBlg2 = INSN(31,30);
4348 Bool isLD = INSN(22,22) == 1;
4349 UInt nn = INSN(9,5);
4350 UInt tt = INSN(4,0);
4351
4352 vassert(szBlg2 < 4);
4353 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
4354 IRType ty = integerIRTypeOfSize(szB);
4355 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
4356
4357 IRTemp ea = newTemp(Ity_I64);
4358 assign(ea, getIReg64orSP(nn));
4359 /* FIXME generate check that ea is szB-aligned */
4360
4361 if (isLD) {
4362 IRTemp res = newTemp(ty);
4363 assign(res, loadLE(ty, mkexpr(ea)));
4364 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
4365 stmt(IRStmt_MBE(Imbe_Fence));
4366 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
4367 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4368 } else {
4369 stmt(IRStmt_MBE(Imbe_Fence));
4370 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
4371 storeLE(mkexpr(ea), data);
4372 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
4373 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4374 }
4375 return True;
sewardjbbcf1882014-01-12 12:49:10 +00004376 }
4377
4378 vex_printf("ARM64 front end: load_store\n");
4379 return False;
4380# undef INSN
4381}
4382
4383
4384/*------------------------------------------------------------*/
4385/*--- Control flow and misc instructions ---*/
4386/*------------------------------------------------------------*/
4387
4388static
sewardj65902992014-05-03 21:20:56 +00004389Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
4390 VexArchInfo* archinfo)
sewardjbbcf1882014-01-12 12:49:10 +00004391{
4392# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4393
4394 /* ---------------------- B cond ----------------------- */
4395 /* 31 24 4 3
4396 0101010 0 imm19 0 cond */
4397 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
4398 UInt cond = INSN(3,0);
4399 ULong uimm64 = INSN(23,5) << 2;
4400 Long simm64 = (Long)sx_to_64(uimm64, 21);
4401 vassert(dres->whatNext == Dis_Continue);
4402 vassert(dres->len == 4);
4403 vassert(dres->continueAt == 0);
4404 vassert(dres->jk_StopHere == Ijk_INVALID);
4405 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
4406 Ijk_Boring,
4407 IRConst_U64(guest_PC_curr_instr + simm64),
4408 OFFB_PC) );
4409 putPC(mkU64(guest_PC_curr_instr + 4));
4410 dres->whatNext = Dis_StopHere;
4411 dres->jk_StopHere = Ijk_Boring;
4412 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
4413 return True;
4414 }
4415
4416 /* -------------------- B{L} uncond -------------------- */
4417 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
4418 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
4419 100101 imm26 B (PC + sxTo64(imm26 << 2))
4420 */
4421 UInt bLink = INSN(31,31);
4422 ULong uimm64 = INSN(25,0) << 2;
4423 Long simm64 = (Long)sx_to_64(uimm64, 28);
4424 if (bLink) {
4425 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
4426 }
4427 putPC(mkU64(guest_PC_curr_instr + simm64));
4428 dres->whatNext = Dis_StopHere;
4429 dres->jk_StopHere = Ijk_Call;
4430 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
4431 guest_PC_curr_instr + simm64);
4432 return True;
4433 }
4434
4435 /* --------------------- B{L} reg --------------------- */
4436 /* 31 24 22 20 15 9 4
4437 1101011 00 10 11111 000000 nn 00000 RET Rn
4438 1101011 00 01 11111 000000 nn 00000 CALL Rn
4439 1101011 00 00 11111 000000 nn 00000 JMP Rn
4440 */
4441 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
4442 && INSN(20,16) == BITS5(1,1,1,1,1)
4443 && INSN(15,10) == BITS6(0,0,0,0,0,0)
4444 && INSN(4,0) == BITS5(0,0,0,0,0)) {
4445 UInt branch_type = INSN(22,21);
4446 UInt nn = INSN(9,5);
4447 if (branch_type == BITS2(1,0) /* RET */) {
4448 putPC(getIReg64orZR(nn));
4449 dres->whatNext = Dis_StopHere;
4450 dres->jk_StopHere = Ijk_Ret;
4451 DIP("ret %s\n", nameIReg64orZR(nn));
4452 return True;
4453 }
4454 if (branch_type == BITS2(0,1) /* CALL */) {
4455 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
4456 putPC(getIReg64orZR(nn));
4457 dres->whatNext = Dis_StopHere;
4458 dres->jk_StopHere = Ijk_Call;
4459 DIP("blr %s\n", nameIReg64orZR(nn));
4460 return True;
4461 }
4462 if (branch_type == BITS2(0,0) /* JMP */) {
4463 putPC(getIReg64orZR(nn));
4464 dres->whatNext = Dis_StopHere;
4465 dres->jk_StopHere = Ijk_Boring;
4466 DIP("jmp %s\n", nameIReg64orZR(nn));
4467 return True;
4468 }
4469 }
4470
4471 /* -------------------- CB{N}Z -------------------- */
4472 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
4473 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
4474 */
4475 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
4476 Bool is64 = INSN(31,31) == 1;
4477 Bool bIfZ = INSN(24,24) == 0;
4478 ULong uimm64 = INSN(23,5) << 2;
4479 UInt rT = INSN(4,0);
4480 Long simm64 = (Long)sx_to_64(uimm64, 21);
4481 IRExpr* cond = NULL;
4482 if (is64) {
4483 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
4484 getIReg64orZR(rT), mkU64(0));
4485 } else {
4486 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
4487 getIReg32orZR(rT), mkU32(0));
4488 }
4489 stmt( IRStmt_Exit(cond,
4490 Ijk_Boring,
4491 IRConst_U64(guest_PC_curr_instr + simm64),
4492 OFFB_PC) );
4493 putPC(mkU64(guest_PC_curr_instr + 4));
4494 dres->whatNext = Dis_StopHere;
4495 dres->jk_StopHere = Ijk_Boring;
4496 DIP("cb%sz %s, 0x%llx\n",
4497 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
4498 guest_PC_curr_instr + simm64);
4499 return True;
4500 }
4501
4502 /* -------------------- TB{N}Z -------------------- */
4503 /* 31 30 24 23 18 5 4
4504 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
4505 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
4506 */
4507 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
4508 UInt b5 = INSN(31,31);
4509 Bool bIfZ = INSN(24,24) == 0;
4510 UInt b40 = INSN(23,19);
4511 UInt imm14 = INSN(18,5);
4512 UInt tt = INSN(4,0);
4513 UInt bitNo = (b5 << 5) | b40;
4514 ULong uimm64 = imm14 << 2;
4515 Long simm64 = sx_to_64(uimm64, 16);
4516 IRExpr* cond
4517 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
4518 binop(Iop_And64,
4519 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
4520 mkU64(1)),
4521 mkU64(0));
4522 stmt( IRStmt_Exit(cond,
4523 Ijk_Boring,
4524 IRConst_U64(guest_PC_curr_instr + simm64),
4525 OFFB_PC) );
4526 putPC(mkU64(guest_PC_curr_instr + 4));
4527 dres->whatNext = Dis_StopHere;
4528 dres->jk_StopHere = Ijk_Boring;
4529 DIP("tb%sz %s, #%u, 0x%llx\n",
4530 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
4531 guest_PC_curr_instr + simm64);
4532 return True;
4533 }
4534
4535 /* -------------------- SVC -------------------- */
4536 /* 11010100 000 imm16 000 01
4537 Don't bother with anything except the imm16==0 case.
4538 */
4539 if (INSN(31,0) == 0xD4000001) {
4540 putPC(mkU64(guest_PC_curr_instr + 4));
4541 dres->whatNext = Dis_StopHere;
4542 dres->jk_StopHere = Ijk_Sys_syscall;
4543 DIP("svc #0\n");
4544 return True;
4545 }
4546
4547 /* ------------------ M{SR,RS} ------------------ */
4548 /* Only handles the case where the system register is TPIDR_EL0.
4549 0xD51BD0 010 Rt MSR tpidr_el0, rT
4550 0xD53BD0 010 Rt MRS rT, tpidr_el0
4551 */
4552 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
4553 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
4554 Bool toSys = INSN(21,21) == 0;
4555 UInt tt = INSN(4,0);
4556 if (toSys) {
4557 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
4558 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
4559 } else {
4560 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
4561 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
4562 }
4563 return True;
4564 }
4565 /* Cases for FPCR
4566 0xD51B44 000 Rt MSR fpcr, rT
4567 0xD53B44 000 Rt MSR rT, fpcr
4568 */
4569 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
4570 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
4571 Bool toSys = INSN(21,21) == 0;
4572 UInt tt = INSN(4,0);
4573 if (toSys) {
4574 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
4575 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
4576 } else {
4577 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
4578 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
4579 }
4580 return True;
4581 }
4582 /* Cases for FPSR
sewardj7d009132014-02-20 17:43:38 +00004583 0xD51B44 001 Rt MSR fpsr, rT
4584 0xD53B44 001 Rt MSR rT, fpsr
sewardjbbcf1882014-01-12 12:49:10 +00004585 */
4586 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
4587 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
4588 Bool toSys = INSN(21,21) == 0;
4589 UInt tt = INSN(4,0);
4590 if (toSys) {
4591 stmt( IRStmt_Put( OFFB_FPSR, getIReg32orZR(tt)) );
4592 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
4593 } else {
4594 putIReg32orZR(tt, IRExpr_Get(OFFB_FPSR, Ity_I32));
4595 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
4596 }
4597 return True;
4598 }
4599 /* Cases for NZCV
4600 D51B42 000 Rt MSR nzcv, rT
4601 D53B42 000 Rt MRS rT, nzcv
4602 */
4603 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
4604 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
4605 Bool toSys = INSN(21,21) == 0;
4606 UInt tt = INSN(4,0);
4607 if (toSys) {
4608 IRTemp t = newTemp(Ity_I64);
4609 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
4610 setFlags_COPY(t);
4611 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
4612 } else {
4613 IRTemp res = newTemp(Ity_I64);
4614 assign(res, mk_arm64g_calculate_flags_nzcv());
4615 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
4616 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
4617 }
4618 return True;
4619 }
sewardjd512d102014-02-21 14:49:44 +00004620 /* Cases for DCZID_EL0
4621 Don't support arbitrary reads and writes to this register. Just
4622 return the value 16, which indicates that the DC ZVA instruction
4623 is not permitted, so we don't have to emulate it.
4624 D5 3B 00 111 Rt MRS rT, dczid_el0
4625 */
4626 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
4627 UInt tt = INSN(4,0);
4628 putIReg64orZR(tt, mkU64(1<<4));
4629 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
4630 return True;
4631 }
sewardj65902992014-05-03 21:20:56 +00004632 /* Cases for CTR_EL0
4633 We just handle reads, and make up a value from the D and I line
4634 sizes in the VexArchInfo we are given, and patch in the following
4635 fields that the Foundation model gives ("natively"):
4636 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
4637 D5 3B 00 001 Rt MRS rT, dczid_el0
4638 */
4639 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
4640 UInt tt = INSN(4,0);
4641 /* Need to generate a value from dMinLine_lg2_szB and
4642 dMinLine_lg2_szB. The value in the register is in 32-bit
4643 units, so need to subtract 2 from the values in the
4644 VexArchInfo. We can assume that the values here are valid --
4645 disInstr_ARM64 checks them -- so there's no need to deal with
4646 out-of-range cases. */
4647 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
4648 && archinfo->arm64_dMinLine_lg2_szB <= 17
4649 && archinfo->arm64_iMinLine_lg2_szB >= 2
4650 && archinfo->arm64_iMinLine_lg2_szB <= 17);
4651 UInt val
4652 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
4653 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
4654 putIReg64orZR(tt, mkU64(val));
4655 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
4656 return True;
4657 }
sewardjbbcf1882014-01-12 12:49:10 +00004658
sewardj65902992014-05-03 21:20:56 +00004659 /* ------------------ IC_IVAU ------------------ */
4660 /* D5 0B 75 001 Rt ic ivau, rT
4661 */
4662 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
4663 /* We will always be provided with a valid iMinLine value. */
4664 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
4665 && archinfo->arm64_iMinLine_lg2_szB <= 17);
4666 /* Round the requested address, in rT, down to the start of the
4667 containing block. */
4668 UInt tt = INSN(4,0);
4669 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
4670 IRTemp addr = newTemp(Ity_I64);
4671 assign( addr, binop( Iop_And64,
4672 getIReg64orZR(tt),
4673 mkU64(~(lineszB - 1))) );
4674 /* Set the invalidation range, request exit-and-invalidate, with
4675 continuation at the next instruction. */
4676 stmt(IRStmt_Put(OFFB_TISTART, mkexpr(addr)));
4677 stmt(IRStmt_Put(OFFB_TILEN, mkU64(lineszB)));
4678 /* be paranoid ... */
4679 stmt( IRStmt_MBE(Imbe_Fence) );
4680 putPC(mkU64( guest_PC_curr_instr + 4 ));
4681 dres->whatNext = Dis_StopHere;
4682 dres->jk_StopHere = Ijk_TInval;
4683 DIP("ic ivau, %s\n", nameIReg64orZR(tt));
4684 return True;
4685 }
4686
4687 /* ------------------ DC_CVAU ------------------ */
4688 /* D5 0B 7B 001 Rt dc cvau, rT
4689 */
4690 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
4691 /* Exactly the same scheme as for IC IVAU, except we observe the
4692 dMinLine size, and request an Ijk_InvalData instead of
4693 Ijk_TInval. */
4694 /* We will always be provided with a valid dMinLine value. */
4695 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
4696 && archinfo->arm64_dMinLine_lg2_szB <= 17);
4697 /* Round the requested address, in rT, down to the start of the
4698 containing block. */
4699 UInt tt = INSN(4,0);
4700 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
4701 IRTemp addr = newTemp(Ity_I64);
4702 assign( addr, binop( Iop_And64,
4703 getIReg64orZR(tt),
4704 mkU64(~(lineszB - 1))) );
4705 /* Set the flush range, request exit-and-flush, with
4706 continuation at the next instruction. */
4707 stmt(IRStmt_Put(OFFB_TISTART, mkexpr(addr)));
4708 stmt(IRStmt_Put(OFFB_TILEN, mkU64(lineszB)));
4709 /* be paranoid ... */
4710 stmt( IRStmt_MBE(Imbe_Fence) );
4711 putPC(mkU64( guest_PC_curr_instr + 4 ));
4712 dres->whatNext = Dis_StopHere;
4713 dres->jk_StopHere = Ijk_FlushDCache;
4714 DIP("dc cvau, %s\n", nameIReg64orZR(tt));
4715 return True;
4716 }
4717
4718 /* ------------------ ISB, DMB, DSB ------------------ */
sewardjbbcf1882014-01-12 12:49:10 +00004719 if (INSN(31,0) == 0xD5033FDF) {
sewardjd512d102014-02-21 14:49:44 +00004720 stmt(IRStmt_MBE(Imbe_Fence));
sewardjbbcf1882014-01-12 12:49:10 +00004721 DIP("isb\n");
4722 return True;
4723 }
4724 if (INSN(31,0) == 0xD5033BBF) {
sewardjd512d102014-02-21 14:49:44 +00004725 stmt(IRStmt_MBE(Imbe_Fence));
sewardjbbcf1882014-01-12 12:49:10 +00004726 DIP("dmb ish\n");
4727 return True;
4728 }
sewardj65902992014-05-03 21:20:56 +00004729 if (INSN(31,0) == 0xD5033B9F) {
4730 stmt(IRStmt_MBE(Imbe_Fence));
4731 DIP("dsb ish\n");
4732 return True;
4733 }
sewardjbbcf1882014-01-12 12:49:10 +00004734
sewardjdc9259c2014-02-27 11:10:19 +00004735 /* -------------------- NOP -------------------- */
4736 if (INSN(31,0) == 0xD503201F) {
4737 DIP("nop\n");
4738 return True;
4739 }
4740
sewardjbbcf1882014-01-12 12:49:10 +00004741 //fail:
4742 vex_printf("ARM64 front end: branch_etc\n");
4743 return False;
4744# undef INSN
4745}
4746
4747
4748/*------------------------------------------------------------*/
4749/*--- SIMD and FP instructions ---*/
4750/*------------------------------------------------------------*/
4751
sewardjecde6972014-02-05 11:01:19 +00004752/* begin FIXME -- rm temp scaffolding */
4753static IRExpr* mk_CatEvenLanes64x2 ( IRTemp, IRTemp );
4754static IRExpr* mk_CatOddLanes64x2 ( IRTemp, IRTemp );
sewardje520bb32014-02-17 11:00:53 +00004755
sewardjecde6972014-02-05 11:01:19 +00004756static IRExpr* mk_CatEvenLanes32x4 ( IRTemp, IRTemp );
4757static IRExpr* mk_CatOddLanes32x4 ( IRTemp, IRTemp );
sewardje520bb32014-02-17 11:00:53 +00004758static IRExpr* mk_InterleaveLO32x4 ( IRTemp, IRTemp );
4759static IRExpr* mk_InterleaveHI32x4 ( IRTemp, IRTemp );
4760
sewardjecde6972014-02-05 11:01:19 +00004761static IRExpr* mk_CatEvenLanes16x8 ( IRTemp, IRTemp );
4762static IRExpr* mk_CatOddLanes16x8 ( IRTemp, IRTemp );
sewardje520bb32014-02-17 11:00:53 +00004763static IRExpr* mk_InterleaveLO16x8 ( IRTemp, IRTemp );
4764static IRExpr* mk_InterleaveHI16x8 ( IRTemp, IRTemp );
4765
sewardjfab09142014-02-10 10:28:13 +00004766static IRExpr* mk_CatEvenLanes8x16 ( IRTemp, IRTemp );
4767static IRExpr* mk_CatOddLanes8x16 ( IRTemp, IRTemp );
sewardje520bb32014-02-17 11:00:53 +00004768static IRExpr* mk_InterleaveLO8x16 ( IRTemp, IRTemp );
4769static IRExpr* mk_InterleaveHI8x16 ( IRTemp, IRTemp );
sewardjecde6972014-02-05 11:01:19 +00004770/* end FIXME -- rm temp scaffolding */
4771
sewardjbbcf1882014-01-12 12:49:10 +00004772/* Generate N copies of |bit| in the bottom of a ULong. */
4773static ULong Replicate ( ULong bit, Int N )
4774{
sewardj606c4ba2014-01-26 19:11:14 +00004775 vassert(bit <= 1 && N >= 1 && N < 64);
4776 if (bit == 0) {
4777 return 0;
4778 } else {
4779 /* Careful. This won't work for N == 64. */
4780 return (1ULL << N) - 1;
4781 }
sewardjbbcf1882014-01-12 12:49:10 +00004782}
4783
sewardjfab09142014-02-10 10:28:13 +00004784static ULong Replicate32x2 ( ULong bits32 )
4785{
4786 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
4787 return (bits32 << 32) | bits32;
4788}
4789
4790static ULong Replicate16x4 ( ULong bits16 )
4791{
4792 vassert(0 == (bits16 & ~0xFFFFULL));
4793 return Replicate32x2((bits16 << 16) | bits16);
4794}
4795
4796static ULong Replicate8x8 ( ULong bits8 )
4797{
4798 vassert(0 == (bits8 & ~0xFFULL));
4799 return Replicate16x4((bits8 << 8) | bits8);
4800}
4801
4802/* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
4803 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
4804 is 64. In the former case, the upper 32 bits of the returned value
4805 are guaranteed to be zero. */
sewardjbbcf1882014-01-12 12:49:10 +00004806static ULong VFPExpandImm ( ULong imm8, Int N )
4807{
sewardj606c4ba2014-01-26 19:11:14 +00004808 vassert(imm8 <= 0xFF);
4809 vassert(N == 32 || N == 64);
4810 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
4811 Int F = N - E - 1;
4812 ULong imm8_6 = (imm8 >> 6) & 1;
4813 /* sign: 1 bit */
4814 /* exp: E bits */
4815 /* frac: F bits */
4816 ULong sign = (imm8 >> 7) & 1;
4817 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
4818 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
4819 vassert(sign < (1ULL << 1));
4820 vassert(exp < (1ULL << E));
4821 vassert(frac < (1ULL << F));
4822 vassert(1 + E + F == N);
4823 ULong res = (sign << (E+F)) | (exp << F) | frac;
4824 return res;
sewardjbbcf1882014-01-12 12:49:10 +00004825}
4826
sewardjfab09142014-02-10 10:28:13 +00004827/* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
4828 This might fail, as indicated by the returned Bool. Page 2530 of
4829 the manual. */
4830static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
4831 UInt op, UInt cmode, UInt imm8 )
4832{
4833 vassert(op <= 1);
4834 vassert(cmode <= 15);
4835 vassert(imm8 <= 255);
4836
4837 *res = 0; /* will overwrite iff returning True */
4838
4839 ULong imm64 = 0;
4840 Bool testimm8 = False;
4841
4842 switch (cmode >> 1) {
4843 case 0:
4844 testimm8 = False; imm64 = Replicate32x2(imm8); break;
4845 case 1:
4846 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
4847 case 2:
4848 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
4849 case 3:
4850 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
4851 case 4:
4852 testimm8 = False; imm64 = Replicate16x4(imm8); break;
4853 case 5:
4854 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
4855 case 6:
4856 testimm8 = True;
4857 if ((cmode & 1) == 0)
4858 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
4859 else
4860 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
4861 break;
4862 case 7:
4863 testimm8 = False;
4864 if ((cmode & 1) == 0 && op == 0)
4865 imm64 = Replicate8x8(imm8);
4866 if ((cmode & 1) == 0 && op == 1) {
4867 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
4868 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
4869 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
4870 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
4871 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
4872 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
4873 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
4874 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
4875 }
4876 if ((cmode & 1) == 1 && op == 0) {
4877 ULong imm8_7 = (imm8 >> 7) & 1;
4878 ULong imm8_6 = (imm8 >> 6) & 1;
4879 ULong imm8_50 = imm8 & 63;
4880 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
4881 | ((imm8_6 ^ 1) << (5 + 6 + 19))
4882 | (Replicate(imm8_6, 5) << (6 + 19))
4883 | (imm8_50 << 19);
4884 imm64 = Replicate32x2(imm32);
4885 }
4886 if ((cmode & 1) == 1 && op == 1) {
4887 // imm64 = imm8<7>:NOT(imm8<6>)
4888 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
4889 ULong imm8_7 = (imm8 >> 7) & 1;
4890 ULong imm8_6 = (imm8 >> 6) & 1;
4891 ULong imm8_50 = imm8 & 63;
4892 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
4893 | (Replicate(imm8_6, 8) << 54)
4894 | (imm8_50 << 48);
4895 }
4896 break;
4897 default:
4898 vassert(0);
4899 }
4900
4901 if (testimm8 && imm8 == 0)
4902 return False;
4903
4904 *res = imm64;
4905 return True;
4906}
4907
4908
sewardj606c4ba2014-01-26 19:11:14 +00004909/* Help a bit for decoding laneage for vector operations that can be
4910 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
4911 and SZ bits, typically for vector floating point. */
4912static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
4913 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
4914 /*OUT*/const HChar** arrSpec,
4915 Bool bitQ, Bool bitSZ )
4916{
4917 vassert(bitQ == True || bitQ == False);
4918 vassert(bitSZ == True || bitSZ == False);
4919 if (bitQ && bitSZ) { // 2x64
4920 if (tyI) *tyI = Ity_I64;
4921 if (tyF) *tyF = Ity_F64;
4922 if (nLanes) *nLanes = 2;
4923 if (zeroUpper) *zeroUpper = False;
4924 if (arrSpec) *arrSpec = "2d";
4925 return True;
4926 }
4927 if (bitQ && !bitSZ) { // 4x32
4928 if (tyI) *tyI = Ity_I32;
4929 if (tyF) *tyF = Ity_F32;
4930 if (nLanes) *nLanes = 4;
4931 if (zeroUpper) *zeroUpper = False;
4932 if (arrSpec) *arrSpec = "4s";
4933 return True;
4934 }
4935 if (!bitQ && !bitSZ) { // 2x32
4936 if (tyI) *tyI = Ity_I32;
4937 if (tyF) *tyF = Ity_F32;
4938 if (nLanes) *nLanes = 2;
4939 if (zeroUpper) *zeroUpper = True;
4940 if (arrSpec) *arrSpec = "2s";
4941 return True;
4942 }
4943 // Else impliedly 1x64, which isn't allowed.
4944 return False;
4945}
4946
4947/* Helper for decoding laneage for simple vector operations,
4948 eg integer add. */
4949static Bool getLaneInfo_SIMPLE ( /*OUT*/Bool* zeroUpper,
4950 /*OUT*/const HChar** arrSpec,
4951 Bool bitQ, UInt szBlg2 )
4952{
4953 vassert(bitQ == True || bitQ == False);
4954 vassert(szBlg2 < 4);
4955 Bool zu = False;
4956 const HChar* as = NULL;
4957 switch ((szBlg2 << 1) | (bitQ ? 1 : 0)) {
4958 case 0: zu = True; as = "8b"; break;
4959 case 1: zu = False; as = "16b"; break;
4960 case 2: zu = True; as = "4h"; break;
4961 case 3: zu = False; as = "8h"; break;
4962 case 4: zu = True; as = "2s"; break;
4963 case 5: zu = False; as = "4s"; break;
4964 case 6: return False; // impliedly 1x64
4965 case 7: zu = False; as = "2d"; break;
4966 default: vassert(0);
4967 }
4968 vassert(as);
4969 if (arrSpec) *arrSpec = as;
4970 if (zeroUpper) *zeroUpper = zu;
4971 return True;
4972}
4973
4974
sewardje520bb32014-02-17 11:00:53 +00004975/* Helper for decoding laneage for shift-style vector operations
4976 that involve an immediate shift amount. */
4977static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
4978 UInt immh, UInt immb )
4979{
4980 vassert(immh < (1<<4));
4981 vassert(immb < (1<<3));
4982 UInt immhb = (immh << 3) | immb;
4983 if (immh & 8) {
4984 if (shift) *shift = 128 - immhb;
4985 if (szBlg2) *szBlg2 = 3;
4986 return True;
4987 }
4988 if (immh & 4) {
4989 if (shift) *shift = 64 - immhb;
4990 if (szBlg2) *szBlg2 = 2;
4991 return True;
4992 }
4993 if (immh & 2) {
4994 if (shift) *shift = 32 - immhb;
4995 if (szBlg2) *szBlg2 = 1;
4996 return True;
4997 }
4998 if (immh & 1) {
4999 if (shift) *shift = 16 - immhb;
5000 if (szBlg2) *szBlg2 = 0;
5001 return True;
5002 }
5003 return False;
5004}
5005
5006
sewardjecde6972014-02-05 11:01:19 +00005007/* Generate IR to fold all lanes of the V128 value in 'src' as
5008 characterised by the operator 'op', and return the result in the
5009 bottom bits of a V128, with all other bits set to zero. */
5010static IRTemp math_MINMAXV ( IRTemp src, IROp op )
5011{
5012 /* The basic idea is to use repeated applications of Iop_CatEven*
5013 and Iop_CatOdd* operators to 'src' so as to clone each lane into
5014 a complete vector. Then fold all those vectors with 'op' and
5015 zero out all but the least significant lane. */
5016 switch (op) {
5017 case Iop_Min8Sx16: case Iop_Min8Ux16:
5018 case Iop_Max8Sx16: case Iop_Max8Ux16: {
sewardjfab09142014-02-10 10:28:13 +00005019 /* NB: temp naming here is misleading -- the naming is for 8
5020 lanes of 16 bit, whereas what is being operated on is 16
5021 lanes of 8 bits. */
5022 IRTemp x76543210 = src;
5023 IRTemp x76547654 = newTemp(Ity_V128);
5024 IRTemp x32103210 = newTemp(Ity_V128);
5025 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
5026 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
5027 IRTemp x76767676 = newTemp(Ity_V128);
5028 IRTemp x54545454 = newTemp(Ity_V128);
5029 IRTemp x32323232 = newTemp(Ity_V128);
5030 IRTemp x10101010 = newTemp(Ity_V128);
5031 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
5032 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
5033 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
5034 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
5035 IRTemp x77777777 = newTemp(Ity_V128);
5036 IRTemp x66666666 = newTemp(Ity_V128);
5037 IRTemp x55555555 = newTemp(Ity_V128);
5038 IRTemp x44444444 = newTemp(Ity_V128);
5039 IRTemp x33333333 = newTemp(Ity_V128);
5040 IRTemp x22222222 = newTemp(Ity_V128);
5041 IRTemp x11111111 = newTemp(Ity_V128);
5042 IRTemp x00000000 = newTemp(Ity_V128);
5043 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
5044 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
5045 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
5046 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
5047 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
5048 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
5049 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
5050 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
5051 /* Naming not misleading after here. */
5052 IRTemp xAllF = newTemp(Ity_V128);
5053 IRTemp xAllE = newTemp(Ity_V128);
5054 IRTemp xAllD = newTemp(Ity_V128);
5055 IRTemp xAllC = newTemp(Ity_V128);
5056 IRTemp xAllB = newTemp(Ity_V128);
5057 IRTemp xAllA = newTemp(Ity_V128);
5058 IRTemp xAll9 = newTemp(Ity_V128);
5059 IRTemp xAll8 = newTemp(Ity_V128);
5060 IRTemp xAll7 = newTemp(Ity_V128);
5061 IRTemp xAll6 = newTemp(Ity_V128);
5062 IRTemp xAll5 = newTemp(Ity_V128);
5063 IRTemp xAll4 = newTemp(Ity_V128);
5064 IRTemp xAll3 = newTemp(Ity_V128);
5065 IRTemp xAll2 = newTemp(Ity_V128);
5066 IRTemp xAll1 = newTemp(Ity_V128);
5067 IRTemp xAll0 = newTemp(Ity_V128);
5068 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
5069 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
5070 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
5071 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
5072 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
5073 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
5074 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
5075 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
5076 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
5077 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
5078 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
5079 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
5080 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
5081 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
5082 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
5083 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
5084 IRTemp maxFE = newTemp(Ity_V128);
5085 IRTemp maxDC = newTemp(Ity_V128);
5086 IRTemp maxBA = newTemp(Ity_V128);
5087 IRTemp max98 = newTemp(Ity_V128);
5088 IRTemp max76 = newTemp(Ity_V128);
5089 IRTemp max54 = newTemp(Ity_V128);
5090 IRTemp max32 = newTemp(Ity_V128);
5091 IRTemp max10 = newTemp(Ity_V128);
5092 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
5093 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
5094 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
5095 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
5096 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
5097 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
5098 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
5099 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
5100 IRTemp maxFEDC = newTemp(Ity_V128);
5101 IRTemp maxBA98 = newTemp(Ity_V128);
5102 IRTemp max7654 = newTemp(Ity_V128);
5103 IRTemp max3210 = newTemp(Ity_V128);
5104 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
5105 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
5106 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
5107 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5108 IRTemp maxFEDCBA98 = newTemp(Ity_V128);
5109 IRTemp max76543210 = newTemp(Ity_V128);
5110 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
5111 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
5112 IRTemp maxAllLanes = newTemp(Ity_V128);
5113 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
5114 mkexpr(max76543210)));
5115 IRTemp res = newTemp(Ity_V128);
5116 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
5117 return res;
sewardjecde6972014-02-05 11:01:19 +00005118 }
5119 case Iop_Min16Sx8: case Iop_Min16Ux8:
5120 case Iop_Max16Sx8: case Iop_Max16Ux8: {
5121 IRTemp x76543210 = src;
5122 IRTemp x76547654 = newTemp(Ity_V128);
5123 IRTemp x32103210 = newTemp(Ity_V128);
5124 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
5125 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
5126 IRTemp x76767676 = newTemp(Ity_V128);
5127 IRTemp x54545454 = newTemp(Ity_V128);
5128 IRTemp x32323232 = newTemp(Ity_V128);
5129 IRTemp x10101010 = newTemp(Ity_V128);
5130 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
5131 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
5132 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
5133 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
5134 IRTemp x77777777 = newTemp(Ity_V128);
5135 IRTemp x66666666 = newTemp(Ity_V128);
5136 IRTemp x55555555 = newTemp(Ity_V128);
5137 IRTemp x44444444 = newTemp(Ity_V128);
5138 IRTemp x33333333 = newTemp(Ity_V128);
5139 IRTemp x22222222 = newTemp(Ity_V128);
5140 IRTemp x11111111 = newTemp(Ity_V128);
5141 IRTemp x00000000 = newTemp(Ity_V128);
5142 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
5143 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
5144 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
5145 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
5146 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
5147 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
5148 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
5149 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
5150 IRTemp max76 = newTemp(Ity_V128);
5151 IRTemp max54 = newTemp(Ity_V128);
5152 IRTemp max32 = newTemp(Ity_V128);
5153 IRTemp max10 = newTemp(Ity_V128);
5154 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
5155 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
5156 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
5157 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
5158 IRTemp max7654 = newTemp(Ity_V128);
5159 IRTemp max3210 = newTemp(Ity_V128);
5160 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
5161 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5162 IRTemp max76543210 = newTemp(Ity_V128);
5163 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
5164 IRTemp res = newTemp(Ity_V128);
5165 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
5166 return res;
5167 }
5168 case Iop_Min32Sx4: case Iop_Min32Ux4:
5169 case Iop_Max32Sx4: case Iop_Max32Ux4: {
5170 IRTemp x3210 = src;
5171 IRTemp x3232 = newTemp(Ity_V128);
5172 IRTemp x1010 = newTemp(Ity_V128);
5173 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
5174 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
5175 IRTemp x3333 = newTemp(Ity_V128);
5176 IRTemp x2222 = newTemp(Ity_V128);
5177 IRTemp x1111 = newTemp(Ity_V128);
5178 IRTemp x0000 = newTemp(Ity_V128);
5179 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
5180 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
5181 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
5182 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
5183 IRTemp max32 = newTemp(Ity_V128);
5184 IRTemp max10 = newTemp(Ity_V128);
5185 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
5186 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
5187 IRTemp max3210 = newTemp(Ity_V128);
5188 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5189 IRTemp res = newTemp(Ity_V128);
5190 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
5191 return res;
5192 }
5193 default:
5194 vassert(0);
5195 }
5196}
5197
5198
sewardj92d0ae32014-04-03 13:48:54 +00005199/* Generate IR for TBL and TBX. This deals with the 128 bit case
5200 only. */
5201static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
5202 IRTemp oor_values )
5203{
5204 vassert(len >= 0 && len <= 3);
5205
5206 /* Generate some useful constants as concisely as possible. */
5207 IRTemp half15 = newTemp(Ity_I64);
5208 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
5209 IRTemp half16 = newTemp(Ity_I64);
5210 assign(half16, mkU64(0x1010101010101010ULL));
5211
5212 /* A zero vector */
5213 IRTemp allZero = newTemp(Ity_V128);
5214 assign(allZero, mkV128(0x0000));
5215 /* A vector containing 15 in each 8-bit lane */
5216 IRTemp all15 = newTemp(Ity_V128);
5217 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
5218 /* A vector containing 16 in each 8-bit lane */
5219 IRTemp all16 = newTemp(Ity_V128);
5220 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
5221 /* A vector containing 32 in each 8-bit lane */
5222 IRTemp all32 = newTemp(Ity_V128);
5223 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
5224 /* A vector containing 48 in each 8-bit lane */
5225 IRTemp all48 = newTemp(Ity_V128);
5226 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
5227 /* A vector containing 64 in each 8-bit lane */
5228 IRTemp all64 = newTemp(Ity_V128);
5229 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
5230
5231 /* Group the 16/32/48/64 vectors so as to be indexable. */
5232 IRTemp allXX[4] = { all16, all32, all48, all64 };
5233
5234 /* Compute the result for each table vector, with zeroes in places
5235 where the index values are out of range, and OR them into the
5236 running vector. */
5237 IRTemp running_result = newTemp(Ity_V128);
5238 assign(running_result, mkV128(0));
5239
5240 UInt tabent;
5241 for (tabent = 0; tabent <= len; tabent++) {
5242 vassert(tabent >= 0 && tabent < 4);
5243 IRTemp bias = newTemp(Ity_V128);
5244 assign(bias,
5245 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
5246 IRTemp biased_indices = newTemp(Ity_V128);
5247 assign(biased_indices,
5248 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
5249 IRTemp valid_mask = newTemp(Ity_V128);
5250 assign(valid_mask,
5251 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
5252 IRTemp safe_biased_indices = newTemp(Ity_V128);
5253 assign(safe_biased_indices,
5254 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
5255 IRTemp results_or_junk = newTemp(Ity_V128);
5256 assign(results_or_junk,
5257 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
5258 mkexpr(safe_biased_indices)));
5259 IRTemp results_or_zero = newTemp(Ity_V128);
5260 assign(results_or_zero,
5261 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
5262 /* And OR that into the running result. */
5263 IRTemp tmp = newTemp(Ity_V128);
5264 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
5265 mkexpr(running_result)));
5266 running_result = tmp;
5267 }
5268
5269 /* So now running_result holds the overall result where the indices
5270 are in range, and zero in out-of-range lanes. Now we need to
5271 compute an overall validity mask and use this to copy in the
5272 lanes in the oor_values for out of range indices. This is
5273 unnecessary for TBL but will get folded out by iropt, so we lean
5274 on that and generate the same code for TBL and TBX here. */
5275 IRTemp overall_valid_mask = newTemp(Ity_V128);
5276 assign(overall_valid_mask,
5277 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
5278 IRTemp result = newTemp(Ity_V128);
5279 assign(result,
5280 binop(Iop_OrV128,
5281 mkexpr(running_result),
5282 binop(Iop_AndV128,
5283 mkexpr(oor_values),
5284 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
5285 return result;
5286}
5287
5288
sewardjbbcf1882014-01-12 12:49:10 +00005289static
5290Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
5291{
5292# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
5293
5294 /* ---------------- FMOV (general) ---------------- */
5295 /* case 30 23 20 18 15 9 4
5296 (1) 0 00 11110 00 1 00 111 000000 n d FMOV Sd, Wn
5297 (2) 1 00 11110 01 1 00 111 000000 n d FMOV Dd, Xn
5298 (3) 1 00 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
5299
5300 (4) 0 00 11110 00 1 00 110 000000 n d FMOV Wd, Sn
5301 (5) 1 00 11110 01 1 00 110 000000 n d FMOV Xd, Dn
5302 (6) 1 00 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
5303 */
5304 if (INSN(30,24) == BITS7(0,0,1,1,1,1,0)
5305 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5306 UInt sf = INSN(31,31);
5307 UInt ty = INSN(23,22); // type
5308 UInt rm = INSN(20,19); // rmode
5309 UInt op = INSN(18,16); // opcode
5310 UInt nn = INSN(9,5);
5311 UInt dd = INSN(4,0);
5312 UInt ix = 0; // case
5313 if (sf == 0) {
5314 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
5315 ix = 1;
5316 else
5317 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
5318 ix = 4;
5319 } else {
5320 vassert(sf == 1);
5321 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
5322 ix = 2;
5323 else
5324 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
5325 ix = 5;
5326 else
5327 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
5328 ix = 3;
5329 else
5330 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
5331 ix = 6;
5332 }
5333 if (ix > 0) {
5334 switch (ix) {
5335 case 1:
5336 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005337 putQRegLO(dd, getIReg32orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005338 DIP("fmov s%u, w%u\n", dd, nn);
5339 break;
5340 case 2:
5341 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005342 putQRegLO(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005343 DIP("fmov d%u, x%u\n", dd, nn);
5344 break;
5345 case 3:
sewardj606c4ba2014-01-26 19:11:14 +00005346 putQRegHI64(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005347 DIP("fmov v%u.d[1], x%u\n", dd, nn);
5348 break;
5349 case 4:
sewardj606c4ba2014-01-26 19:11:14 +00005350 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
sewardjbbcf1882014-01-12 12:49:10 +00005351 DIP("fmov w%u, s%u\n", dd, nn);
5352 break;
5353 case 5:
sewardj606c4ba2014-01-26 19:11:14 +00005354 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
sewardjbbcf1882014-01-12 12:49:10 +00005355 DIP("fmov x%u, d%u\n", dd, nn);
5356 break;
5357 case 6:
sewardj606c4ba2014-01-26 19:11:14 +00005358 putIReg64orZR(dd, getQRegHI64(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005359 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
5360 break;
5361 default:
5362 vassert(0);
5363 }
5364 return True;
5365 }
5366 /* undecodable; fall through */
5367 }
5368
5369 /* -------------- FMOV (scalar, immediate) -------------- */
5370 /* 31 28 23 20 12 9 4
5371 000 11110 00 1 imm8 100 00000 d FMOV Sd, #imm
5372 000 11110 01 1 imm8 100 00000 d FMOV Dd, #imm
5373 */
5374 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5375 && INSN(21,21) == 1 && INSN(12,5) == BITS8(1,0,0,0,0,0,0,0)) {
5376 Bool isD = INSN(22,22) == 1;
5377 UInt imm8 = INSN(20,13);
5378 UInt dd = INSN(4,0);
5379 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
5380 if (!isD) {
sewardjaeeb31d2014-01-12 18:23:45 +00005381 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
sewardjbbcf1882014-01-12 12:49:10 +00005382 }
5383 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005384 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
5385 DIP("fmov %s, #0x%llx\n",
5386 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
sewardjbbcf1882014-01-12 12:49:10 +00005387 return True;
5388 }
5389
sewardjfab09142014-02-10 10:28:13 +00005390 /* -------------- {FMOV,MOVI} (vector, immediate) -------------- */
5391 /* 31 28 18 15 11 9 4
5392 0q op 01111 00000 abc cmode 01 defgh d MOV Dd, #imm (q=0)
5393 MOV Vd.2d #imm (q=1)
5394 Allowable op:cmode
5395 FMOV = 1:1111
5396 MOVI = 0:xx00, 1:0x00, 1:10x0, 1:110x, 11110
5397 */
5398 if (INSN(31,31) == 0
5399 && INSN(28,19) == BITS10(0,1,1,1,1,0,0,0,0,0)
5400 && INSN(11,10) == BITS2(0,1)) {
5401 UInt bitQ = INSN(30,30);
5402 UInt bitOP = INSN(29,29);
5403 UInt cmode = INSN(15,12);
5404 UInt imm8 = (INSN(18,16) << 5) | INSN(9,5);
5405 UInt dd = INSN(4,0);
5406 ULong imm64lo = 0;
5407 UInt op_cmode = (bitOP << 4) | cmode;
5408 Bool ok = False;
5409 switch (op_cmode) {
5410 case BITS5(1,1,1,1,1): // 1:1111
5411 case BITS5(0,0,0,0,0): case BITS5(0,0,1,0,0):
5412 case BITS5(0,1,0,0,0): case BITS5(0,1,1,0,0): // 0:xx00
5413 case BITS5(1,0,0,0,0): case BITS5(1,0,1,0,0): // 1:0x00
5414 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
5415 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
5416 case BITS5(1,1,1,1,0): // 1:1110
5417 ok = True; break;
5418 default:
5419 break;
5420 }
5421 if (ok) {
5422 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, imm8);
5423 }
5424 if (ok) {
5425 ULong imm64hi = (bitQ == 0 && bitOP == 0) ? 0 : imm64lo;
5426 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(imm64hi), mkU64(imm64lo)));
sewardjbd83e982014-04-08 15:23:42 +00005427 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
sewardjfab09142014-02-10 10:28:13 +00005428 return True;
5429 }
5430 /* else fall through */
5431 }
sewardjfab09142014-02-10 10:28:13 +00005432
sewardjbbcf1882014-01-12 12:49:10 +00005433 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
5434 /* 31 28 23 21 20 18 15 9 4 ix
5435 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn 0
5436 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn 1
5437 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn 2
5438 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn 3
5439
5440 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn 4
5441 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn 5
5442 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn 6
5443 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn 7
5444
5445 These are signed/unsigned conversion from integer registers to
5446 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
5447 */
5448 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,17) == BITS5(1,0,0,0,1)
5449 && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5450 Bool isI64 = INSN(31,31) == 1;
5451 Bool isF64 = INSN(22,22) == 1;
5452 Bool isU = INSN(16,16) == 1;
5453 UInt nn = INSN(9,5);
5454 UInt dd = INSN(4,0);
5455 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
5456 const IROp ops[8]
5457 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
5458 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
5459 IRExpr* src = getIRegOrZR(isI64, nn);
5460 IRExpr* res = (isF64 && !isI64)
5461 ? unop(ops[ix], src)
5462 : binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), src);
5463 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005464 putQRegLO(dd, res);
sewardjbbcf1882014-01-12 12:49:10 +00005465 DIP("%ccvtf %s, %s\n",
sewardj606c4ba2014-01-26 19:11:14 +00005466 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
sewardjbbcf1882014-01-12 12:49:10 +00005467 nameIRegOrZR(isI64, nn));
5468 return True;
5469 }
5470
sewardj5860ec72014-03-01 11:19:45 +00005471 /* ------------ F{ADD,SUB,MUL,DIV,NMUL} (scalar) ------------ */
sewardjbbcf1882014-01-12 12:49:10 +00005472 /* 31 23 20 15 11 9 4
5473 ---------------- 0000 ------ FMUL --------
5474 000 11110 001 m 0001 10 n d FDIV Sd,Sn,Sm
5475 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm
5476 ---------------- 0010 ------ FADD --------
5477 ---------------- 0011 ------ FSUB --------
5478 ---------------- 1000 ------ FNMUL --------
5479 */
5480 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5481 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5482 Bool isD = INSN(22,22) == 1;
5483 UInt mm = INSN(20,16);
5484 UInt op = INSN(15,12);
5485 UInt nn = INSN(9,5);
5486 UInt dd = INSN(4,0);
5487 IROp iop = Iop_INVALID;
5488 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005489 Bool neg = False;
5490 const HChar* nm = "???";
5491 switch (op) {
5492 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ty); break;
5493 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ty); break;
5494 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ty); break;
5495 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ty); break;
5496 case BITS4(1,0,0,0): nm = "fnmul"; iop = mkMULF(ty);
5497 neg = True; break;
5498 default: return False;
5499 }
5500 vassert(iop != Iop_INVALID);
5501 IRExpr* resE = triop(iop, mkexpr(mk_get_IR_rounding_mode()),
sewardj606c4ba2014-01-26 19:11:14 +00005502 getQRegLO(nn, ty), getQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005503 IRTemp res = newTemp(ty);
5504 assign(res, neg ? unop(mkNEGF(ty),resE) : resE);
5505 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005506 putQRegLO(dd, mkexpr(res));
sewardjbbcf1882014-01-12 12:49:10 +00005507 DIP("%s %s, %s, %s\n",
sewardj606c4ba2014-01-26 19:11:14 +00005508 nm, nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005509 return True;
5510 }
5511
5512 /* ------------ F{MOV,ABS,NEG,SQRT} D/D or S/S ------------ */
5513 /* 31 23 21 16 14 9 4
5514 000 11110 00 10000 00 10000 n d FMOV Sd, Sn
5515 000 11110 01 10000 00 10000 n d FMOV Dd, Dn
5516 ------------------ 01 --------- FABS ------
5517 ------------------ 10 --------- FNEG ------
sewardjfab09142014-02-10 10:28:13 +00005518 ------------------ 11 --------- FSQRT -----
sewardjbbcf1882014-01-12 12:49:10 +00005519 */
5520 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5521 && INSN(21,17) == BITS5(1,0,0,0,0)
5522 && INSN(14,10) == BITS5(1,0,0,0,0)) {
5523 Bool isD = INSN(22,22) == 1;
5524 UInt opc = INSN(16,15);
5525 UInt nn = INSN(9,5);
5526 UInt dd = INSN(4,0);
5527 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005528 IRTemp res = newTemp(ty);
5529 if (opc == BITS2(0,0)) {
sewardj606c4ba2014-01-26 19:11:14 +00005530 assign(res, getQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005531 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005532 putQRegLO(dd, mkexpr(res));
5533 DIP("fmov %s, %s\n",
5534 nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005535 return True;
5536 }
5537 if (opc == BITS2(1,0) || opc == BITS2(0,1)) {
5538 Bool isAbs = opc == BITS2(0,1);
5539 IROp op = isAbs ? mkABSF(ty) : mkNEGF(ty);
sewardj606c4ba2014-01-26 19:11:14 +00005540 assign(res, unop(op, getQRegLO(nn, ty)));
sewardjbbcf1882014-01-12 12:49:10 +00005541 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005542 putQRegLO(dd, mkexpr(res));
sewardjbbcf1882014-01-12 12:49:10 +00005543 DIP("%s %s, %s\n", isAbs ? "fabs" : "fneg",
sewardj606c4ba2014-01-26 19:11:14 +00005544 nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005545 return True;
5546 }
5547 if (opc == BITS2(1,1)) {
5548 assign(res,
5549 binop(mkSQRTF(ty),
sewardj606c4ba2014-01-26 19:11:14 +00005550 mkexpr(mk_get_IR_rounding_mode()), getQRegLO(nn, ty)));
sewardjbbcf1882014-01-12 12:49:10 +00005551 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005552 putQRegLO(dd, mkexpr(res));
5553 DIP("fsqrt %s, %s\n", nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005554 return True;
5555 }
5556 /* else fall through; other cases are ATC */
5557 }
5558
sewardjfab09142014-02-10 10:28:13 +00005559 /* ---------------- F{ABS,NEG} (vector) ---------------- */
5560 /* 31 28 22 21 16 9 4
5561 0q0 01110 1 sz 10000 01111 10 n d FABS Vd.T, Vn.T
5562 0q1 01110 1 sz 10000 01111 10 n d FNEG Vd.T, Vn.T
5563 */
5564 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,1)
5565 && INSN(21,17) == BITS5(1,0,0,0,0)
5566 && INSN(16,10) == BITS7(0,1,1,1,1,1,0)) {
5567 UInt bitQ = INSN(30,30);
5568 UInt bitSZ = INSN(22,22);
5569 Bool isFNEG = INSN(29,29) == 1;
5570 UInt nn = INSN(9,5);
5571 UInt dd = INSN(4,0);
5572 const HChar* ar = "??";
5573 IRType tyF = Ity_INVALID;
5574 Bool zeroHI = False;
5575 Bool ok = getLaneInfo_Q_SZ(NULL, &tyF, NULL, &zeroHI, &ar,
5576 (Bool)bitQ, (Bool)bitSZ);
5577 if (ok) {
sewardj32d86752014-03-02 12:47:18 +00005578 vassert(tyF == Ity_F64 || tyF == Ity_F32);
sewardjfab09142014-02-10 10:28:13 +00005579 IROp op = (tyF == Ity_F64) ? (isFNEG ? Iop_Neg64Fx2 : Iop_Abs64Fx2)
5580 : (isFNEG ? Iop_Neg32Fx4 : Iop_Abs32Fx4);
5581 IRTemp res = newTemp(Ity_V128);
5582 assign(res, unop(op, getQReg128(nn)));
5583 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
5584 : mkexpr(res));
5585 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
5586 nameQReg128(dd), ar, nameQReg128(nn), ar);
5587 return True;
5588 }
5589 /* else fall through */
5590 }
5591
sewardjbbcf1882014-01-12 12:49:10 +00005592 /* -------------------- FCMP,FCMPE -------------------- */
5593 /* 31 23 20 15 9 4
5594 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
5595 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
5596 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
5597 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
5598
5599 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
5600 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
5601 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
5602 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
5603
5604 FCMPE generates Invalid Operation exn if either arg is any kind
5605 of NaN. FCMP generates Invalid Operation exn if either arg is a
5606 signalling NaN. We ignore this detail here and produce the same
5607 IR for both.
5608 */
5609 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) && INSN(21,21) == 1
5610 && INSN(15,10) == BITS6(0,0,1,0,0,0) && INSN(2,0) == BITS3(0,0,0)) {
5611 Bool isD = INSN(22,22) == 1;
5612 UInt mm = INSN(20,16);
5613 UInt nn = INSN(9,5);
5614 Bool isCMPE = INSN(4,4) == 1;
5615 Bool cmpZero = INSN(3,3) == 1;
5616 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005617 Bool valid = True;
5618 if (cmpZero && mm != 0) valid = False;
5619 if (valid) {
5620 IRTemp argL = newTemp(ty);
5621 IRTemp argR = newTemp(ty);
5622 IRTemp irRes = newTemp(Ity_I32);
sewardj606c4ba2014-01-26 19:11:14 +00005623 assign(argL, getQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005624 assign(argR,
5625 cmpZero
5626 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
sewardj606c4ba2014-01-26 19:11:14 +00005627 : getQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005628 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
5629 mkexpr(argL), mkexpr(argR)));
5630 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
5631 IRTemp nzcv_28x0 = newTemp(Ity_I64);
5632 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
5633 setFlags_COPY(nzcv_28x0);
sewardj606c4ba2014-01-26 19:11:14 +00005634 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ty),
5635 cmpZero ? "#0.0" : nameQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005636 return True;
5637 }
5638 }
5639
5640 /* -------------------- F{N}M{ADD,SUB} -------------------- */
5641 /* 31 22 20 15 14 9 4 ix
5642 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
5643 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
5644 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
5645 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
5646 where Fx=Dx when sz=1, Fx=Sx when sz=0
5647
5648 -----SPEC------ ----IMPL----
5649 fmadd a + n * m a + n * m
5650 fmsub a + (-n) * m a - n * m
5651 fnmadd (-a) + (-n) * m -(a + n * m)
5652 fnmsub (-a) + n * m -(a - n * m)
5653 */
5654 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,1,0)) {
5655 Bool isD = INSN(22,22) == 1;
5656 UInt mm = INSN(20,16);
5657 UInt aa = INSN(14,10);
5658 UInt nn = INSN(9,5);
5659 UInt dd = INSN(4,0);
5660 UInt ix = (INSN(21,21) << 1) | INSN(15,15);
5661 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005662 IROp opADD = mkADDF(ty);
5663 IROp opSUB = mkSUBF(ty);
5664 IROp opMUL = mkMULF(ty);
5665 IROp opNEG = mkNEGF(ty);
5666 IRTemp res = newTemp(ty);
sewardj606c4ba2014-01-26 19:11:14 +00005667 IRExpr* eA = getQRegLO(aa, ty);
5668 IRExpr* eN = getQRegLO(nn, ty);
5669 IRExpr* eM = getQRegLO(mm, ty);
sewardjbbcf1882014-01-12 12:49:10 +00005670 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
5671 IRExpr* eNxM = triop(opMUL, rm, eN, eM);
5672 switch (ix) {
5673 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
5674 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
5675 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
5676 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
5677 default: vassert(0);
5678 }
5679 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005680 putQRegLO(dd, mkexpr(res));
sewardjbbcf1882014-01-12 12:49:10 +00005681 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
5682 DIP("%s %s, %s, %s, %s\n",
sewardj606c4ba2014-01-26 19:11:14 +00005683 names[ix], nameQRegLO(dd, ty), nameQRegLO(nn, ty),
5684 nameQRegLO(mm, ty), nameQRegLO(aa, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005685 return True;
5686 }
5687
5688 /* -------- FCVT{N,P,M,Z}{S,U} (scalar, integer) -------- */
5689 /* 30 23 20 18 15 9 4
5690 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
5691 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
5692 ---------------- 01 -------------- FCVTP-------- (round to +inf)
5693 ---------------- 10 -------------- FCVTM-------- (round to -inf)
5694 ---------------- 11 -------------- FCVTZ-------- (round to zero)
5695
5696 Rd is Xd when sf==1, Wd when sf==0
5697 Fn is Dn when x==1, Sn when x==0
5698 20:19 carry the rounding mode, using the same encoding as FPCR
5699 */
5700 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,21) == 1
5701 && INSN(18,17) == BITS2(0,0) && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5702 Bool isI64 = INSN(31,31) == 1;
5703 Bool isF64 = INSN(22,22) == 1;
5704 UInt rm = INSN(20,19);
5705 Bool isU = INSN(16,16) == 1;
5706 UInt nn = INSN(9,5);
5707 UInt dd = INSN(4,0);
5708 /* Decide on the IR rounding mode to use. */
5709 IRRoundingMode irrm = 8; /*impossible*/
5710 HChar ch = '?';
5711 switch (rm) {
5712 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
5713 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
5714 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
5715 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
5716 default: vassert(0);
5717 }
5718 vassert(irrm != 8);
5719 /* Decide on the conversion primop, based on the source size,
5720 dest size and signedness (8 possibilities). Case coding:
5721 F32 ->s I32 0
5722 F32 ->u I32 1
5723 F32 ->s I64 2
5724 F32 ->u I64 3
5725 F64 ->s I32 4
5726 F64 ->u I32 5
5727 F64 ->s I64 6
5728 F64 ->u I64 7
5729 */
5730 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
5731 vassert(ix < 8);
5732 const IROp ops[8]
5733 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
5734 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
5735 IROp op = ops[ix];
5736 // A bit of ATCery: bounce all cases we haven't seen an example of.
5737 if (/* F32toI32S */
5738 (op == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
sewardj1eaaec22014-03-07 22:52:19 +00005739 || (op == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
5740 || (op == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
sewardjbbcf1882014-01-12 12:49:10 +00005741 /* F32toI32U */
sewardj1eaaec22014-03-07 22:52:19 +00005742 || (op == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
5743 || (op == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
sewardjbbcf1882014-01-12 12:49:10 +00005744 /* F32toI64S */
sewardj1eaaec22014-03-07 22:52:19 +00005745 || (op == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
sewardjbbcf1882014-01-12 12:49:10 +00005746 /* F32toI64U */
5747 || (op == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
5748 /* F64toI32S */
5749 || (op == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
5750 || (op == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
5751 || (op == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
5752 /* F64toI32U */
sewardjbbcf1882014-01-12 12:49:10 +00005753 || (op == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
sewardj1eaaec22014-03-07 22:52:19 +00005754 || (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
5755 || (op == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
sewardjbbcf1882014-01-12 12:49:10 +00005756 /* F64toI64S */
5757 || (op == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
sewardj1eaaec22014-03-07 22:52:19 +00005758 || (op == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
5759 || (op == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
sewardjbbcf1882014-01-12 12:49:10 +00005760 /* F64toI64U */
5761 || (op == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
sewardj1eaaec22014-03-07 22:52:19 +00005762 || (op == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
sewardjbbcf1882014-01-12 12:49:10 +00005763 ) {
5764 /* validated */
5765 } else {
5766 return False;
5767 }
sewardjbbcf1882014-01-12 12:49:10 +00005768 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
5769 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
5770 IRTemp src = newTemp(srcTy);
5771 IRTemp dst = newTemp(dstTy);
sewardj606c4ba2014-01-26 19:11:14 +00005772 assign(src, getQRegLO(nn, srcTy));
sewardjbbcf1882014-01-12 12:49:10 +00005773 assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
5774 putIRegOrZR(isI64, dd, mkexpr(dst));
5775 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
sewardj606c4ba2014-01-26 19:11:14 +00005776 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
sewardjbbcf1882014-01-12 12:49:10 +00005777 return True;
5778 }
5779
sewardj1eaaec22014-03-07 22:52:19 +00005780 /* -------- FCVTAS (KLUDGED) (scalar, integer) -------- */
5781 /* 30 23 20 18 15 9 4
5782 1 00 11110 0x 1 00 100 000000 n d FCVTAS Xd, Fn
5783 0 00 11110 0x 1 00 100 000000 n d FCVTAS Wd, Fn
5784 Fn is Dn when x==1, Sn when x==0
5785 */
5786 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0)
5787 && INSN(21,16) == BITS6(1,0,0,1,0,0)
5788 && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5789 Bool isI64 = INSN(31,31) == 1;
5790 Bool isF64 = INSN(22,22) == 1;
5791 UInt nn = INSN(9,5);
5792 UInt dd = INSN(4,0);
5793 /* Decide on the IR rounding mode to use. */
5794 /* KLUDGE: should be Irrm_NEAREST_TIE_AWAY_0 */
5795 IRRoundingMode irrm = Irrm_NEAREST;
5796 /* Decide on the conversion primop. */
5797 IROp op = isI64 ? (isF64 ? Iop_F64toI64S : Iop_F32toI64S)
5798 : (isF64 ? Iop_F64toI32S : Iop_F32toI32S);
5799 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
5800 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
5801 IRTemp src = newTemp(srcTy);
5802 IRTemp dst = newTemp(dstTy);
5803 assign(src, getQRegLO(nn, srcTy));
5804 assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
5805 putIRegOrZR(isI64, dd, mkexpr(dst));
5806 DIP("fcvtas %s, %s (KLUDGED)\n",
5807 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
5808 return True;
5809 }
5810
sewardjbbcf1882014-01-12 12:49:10 +00005811 /* ---------------- FRINT{I,M,P,Z} (scalar) ---------------- */
5812 /* 31 23 21 17 14 9 4
5813 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
5814 rm
5815 x==0 => S-registers, x==1 => D-registers
5816 rm (17:15) encodings:
5817 111 per FPCR (FRINTI)
5818 001 +inf (FRINTP)
5819 010 -inf (FRINTM)
5820 011 zero (FRINTZ)
5821 000 tieeven
sewardj1eaaec22014-03-07 22:52:19 +00005822 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
sewardjbbcf1882014-01-12 12:49:10 +00005823 110 per FPCR + "exact = TRUE"
5824 101 unallocated
5825 */
5826 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5827 && INSN(21,18) == BITS4(1,0,0,1) && INSN(14,10) == BITS5(1,0,0,0,0)) {
5828 Bool isD = INSN(22,22) == 1;
5829 UInt rm = INSN(17,15);
5830 UInt nn = INSN(9,5);
5831 UInt dd = INSN(4,0);
5832 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005833 IRExpr* irrmE = NULL;
5834 UChar ch = '?';
5835 switch (rm) {
5836 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
5837 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
5838 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
sewardj1eaaec22014-03-07 22:52:19 +00005839 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
5840 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
sewardjbbcf1882014-01-12 12:49:10 +00005841 default: break;
5842 }
5843 if (irrmE) {
5844 IRTemp src = newTemp(ty);
5845 IRTemp dst = newTemp(ty);
sewardj606c4ba2014-01-26 19:11:14 +00005846 assign(src, getQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005847 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
5848 irrmE, mkexpr(src)));
5849 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005850 putQRegLO(dd, mkexpr(dst));
5851 DIP("frint%c %s, %s\n",
5852 ch, nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005853 return True;
5854 }
5855 /* else unhandled rounding mode case -- fall through */
5856 }
5857
5858 /* ------------------ FCVT (scalar) ------------------ */
5859 /* 31 23 21 16 14 9 4
5860 000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp)
5861 --------- 11 ----- 01 --------- FCVT Dd, Hn (unimp)
5862 --------- 00 ----- 11 --------- FCVT Hd, Sn (unimp)
sewardj1eaaec22014-03-07 22:52:19 +00005863 --------- 00 ----- 01 --------- FCVT Dd, Sn
sewardjbbcf1882014-01-12 12:49:10 +00005864 --------- 01 ----- 11 --------- FCVT Hd, Dn (unimp)
sewardj1eaaec22014-03-07 22:52:19 +00005865 --------- 01 ----- 00 --------- FCVT Sd, Dn
sewardjbbcf1882014-01-12 12:49:10 +00005866 Rounding, when dst is smaller than src, is per the FPCR.
5867 */
5868 if (INSN(31,24) == BITS8(0,0,0,1,1,1,1,0)
5869 && INSN(21,17) == BITS5(1,0,0,0,1)
5870 && INSN(14,10) == BITS5(1,0,0,0,0)) {
5871 UInt b2322 = INSN(23,22);
5872 UInt b1615 = INSN(16,15);
5873 UInt nn = INSN(9,5);
5874 UInt dd = INSN(4,0);
5875 if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) {
5876 /* Convert S to D */
5877 IRTemp res = newTemp(Ity_F64);
sewardj606c4ba2014-01-26 19:11:14 +00005878 assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32)));
sewardjbbcf1882014-01-12 12:49:10 +00005879 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005880 putQRegLO(dd, mkexpr(res));
5881 DIP("fcvt %s, %s\n",
5882 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32));
sewardjbbcf1882014-01-12 12:49:10 +00005883 return True;
5884 }
5885 if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) {
5886 /* Convert D to S */
5887 IRTemp res = newTemp(Ity_F32);
5888 assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()),
sewardj606c4ba2014-01-26 19:11:14 +00005889 getQRegLO(nn, Ity_F64)));
sewardjbbcf1882014-01-12 12:49:10 +00005890 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005891 putQRegLO(dd, mkexpr(res));
5892 DIP("fcvt %s, %s\n",
5893 nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64));
sewardjbbcf1882014-01-12 12:49:10 +00005894 return True;
5895 }
5896 /* else unhandled */
5897 }
5898
5899 /* ------------------ FABD (scalar) ------------------ */
5900 /* 31 23 20 15 9 4
5901 011 11110 111 m 110101 n d FABD Dd, Dn, Dm
5902 011 11110 101 m 110101 n d FABD Sd, Sn, Sm
5903 */
5904 if (INSN(31,23) == BITS9(0,1,1,1,1,1,1,0,1) && INSN(21,21) == 1
5905 && INSN(15,10) == BITS6(1,1,0,1,0,1)) {
5906 Bool isD = INSN(22,22) == 1;
5907 UInt mm = INSN(20,16);
5908 UInt nn = INSN(9,5);
5909 UInt dd = INSN(4,0);
5910 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005911 IRTemp res = newTemp(ty);
sewardj606c4ba2014-01-26 19:11:14 +00005912 assign(res, unop(mkABSF(ty),
5913 triop(mkSUBF(ty),
5914 mkexpr(mk_get_IR_rounding_mode()),
5915 getQRegLO(nn,ty), getQRegLO(mm,ty))));
sewardjbbcf1882014-01-12 12:49:10 +00005916 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005917 putQRegLO(dd, mkexpr(res));
sewardjbbcf1882014-01-12 12:49:10 +00005918 DIP("fabd %s, %s, %s\n",
sewardj606c4ba2014-01-26 19:11:14 +00005919 nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005920 return True;
5921 }
5922
sewardj606c4ba2014-01-26 19:11:14 +00005923 /* -------------- {S,U}CVTF (vector, integer) -------------- */
5924 /* 31 28 22 21 15 9 4
5925 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
5926 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
5927 with laneage:
5928 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
5929 */
5930 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,0)
5931 && INSN(21,16) == BITS6(1,0,0,0,0,1)
5932 && INSN(15,10) == BITS6(1,1,0,1,1,0)) {
5933 Bool isQ = INSN(30,30) == 1;
5934 Bool isU = INSN(29,29) == 1;
5935 Bool isF64 = INSN(22,22) == 1;
5936 UInt nn = INSN(9,5);
5937 UInt dd = INSN(4,0);
5938 if (isQ || !isF64) {
5939 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
5940 UInt nLanes = 0;
5941 Bool zeroHI = False;
5942 const HChar* arrSpec = NULL;
5943 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
5944 isQ, isF64 );
5945 IROp op = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
5946 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
5947 IRTemp rm = mk_get_IR_rounding_mode();
5948 UInt i;
5949 vassert(ok); /* the 'if' above should ensure this */
5950 for (i = 0; i < nLanes; i++) {
5951 putQRegLane(dd, i,
5952 binop(op, mkexpr(rm), getQRegLane(nn, i, tyI)));
5953 }
5954 if (zeroHI) {
5955 putQRegLane(dd, 1, mkU64(0));
5956 }
5957 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
5958 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
5959 return True;
5960 }
5961 /* else fall through */
5962 }
5963
5964 /* ---------- F{ADD,SUB,MUL,DIV,MLA,MLS} (vector) ---------- */
5965 /* 31 28 22 21 20 15 9 4 case
5966 0q0 01110 0 sz 1 m 110101 n d FADD Vd,Vn,Vm 1
5967 0q0 01110 1 sz 1 m 110101 n d FSUB Vd,Vn,Vm 2
5968 0q1 01110 0 sz 1 m 110111 n d FMUL Vd,Vn,Vm 3
5969 0q1 01110 0 sz 1 m 111111 n d FDIV Vd,Vn,Vm 4
5970 0q0 01110 0 sz 1 m 110011 n d FMLA Vd,Vn,Vm 5
5971 0q0 01110 1 sz 1 m 110011 n d FMLS Vd,Vn,Vm 6
sewardje520bb32014-02-17 11:00:53 +00005972 0q1 01110 1 sz 1 m 110101 n d FABD Vd,Vn,Vm 7
sewardj606c4ba2014-01-26 19:11:14 +00005973 */
5974 if (INSN(31,31) == 0
5975 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) {
5976 Bool isQ = INSN(30,30) == 1;
5977 UInt b29 = INSN(29,29);
5978 UInt b23 = INSN(23,23);
5979 Bool isF64 = INSN(22,22) == 1;
5980 UInt mm = INSN(20,16);
5981 UInt b1510 = INSN(15,10);
5982 UInt nn = INSN(9,5);
5983 UInt dd = INSN(4,0);
5984 UInt ix = 0;
5985 /**/ if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,1,0,1)) ix = 1;
5986 else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 2;
5987 else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,0,1,1,1)) ix = 3;
5988 else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,1,1,1,1)) ix = 4;
5989 else if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,0,1,1)) ix = 5;
5990 else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,0,1,1)) ix = 6;
sewardje520bb32014-02-17 11:00:53 +00005991 else if (b29 == 1 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 7;
sewardj606c4ba2014-01-26 19:11:14 +00005992 IRType laneTy = Ity_INVALID;
5993 Bool zeroHI = False;
5994 const HChar* arr = "??";
5995 Bool ok
5996 = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64);
5997 /* Skip MLA/MLS for the time being */
5998 if (ok && ix >= 1 && ix <= 4) {
5999 const IROp ops64[4]
6000 = { Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2 };
6001 const IROp ops32[4]
6002 = { Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4 };
6003 const HChar* names[4]
6004 = { "fadd", "fsub", "fmul", "fdiv" };
6005 IROp op = laneTy==Ity_F64 ? ops64[ix-1] : ops32[ix-1];
6006 IRTemp rm = mk_get_IR_rounding_mode();
6007 IRTemp t1 = newTemp(Ity_V128);
6008 IRTemp t2 = newTemp(Ity_V128);
6009 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjecde6972014-02-05 11:01:19 +00006010 assign(t2, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t1))
6011 : mkexpr(t1));
sewardj606c4ba2014-01-26 19:11:14 +00006012 putQReg128(dd, mkexpr(t2));
6013 DIP("%s %s.%s, %s.%s, %s.%s\n", names[ix-1],
6014 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6015 return True;
6016 }
sewardjfab09142014-02-10 10:28:13 +00006017 if (ok && ix >= 5 && ix <= 6) {
6018 IROp opADD = laneTy==Ity_F64 ? Iop_Add64Fx2 : Iop_Add32Fx4;
6019 IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
6020 IROp opMUL = laneTy==Ity_F64 ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
6021 IRTemp rm = mk_get_IR_rounding_mode();
6022 IRTemp t1 = newTemp(Ity_V128);
6023 IRTemp t2 = newTemp(Ity_V128);
6024 // FIXME: double rounding; use FMA primops instead
6025 assign(t1, triop(opMUL,
6026 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
6027 assign(t2, triop(ix == 5 ? opADD : opSUB,
6028 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
sewardje520bb32014-02-17 11:00:53 +00006029 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
6030 : mkexpr(t2));
sewardjfab09142014-02-10 10:28:13 +00006031 DIP("%s %s.%s, %s.%s, %s.%s\n", ix == 5 ? "fmla" : "fmls",
6032 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6033 return True;
6034 }
sewardje520bb32014-02-17 11:00:53 +00006035 if (ok && ix == 7) {
6036 IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
6037 IROp opABS = laneTy==Ity_F64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
6038 IRTemp rm = mk_get_IR_rounding_mode();
6039 IRTemp t1 = newTemp(Ity_V128);
6040 IRTemp t2 = newTemp(Ity_V128);
6041 // FIXME: use Abd primop instead?
6042 assign(t1, triop(opSUB,
6043 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
6044 assign(t2, unop(opABS, mkexpr(t1)));
6045 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
6046 : mkexpr(t2));
6047 DIP("fabd %s.%s, %s.%s, %s.%s\n",
6048 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6049 return True;
6050 }
sewardj606c4ba2014-01-26 19:11:14 +00006051 }
6052
sewardj2bd1ffe2014-03-27 18:59:00 +00006053 /* ------------ FCM{EQ,GE,GT}, FAC{GE,GT} (vector) ------------ */
6054 /* 31 28 22 20 15 9 4 case
6055 0q1 01110 0 sz 1 m 111011 n d FACGE Vd, Vn, Vm
6056 0q1 01110 1 sz 1 m 111011 n d FACGT Vd, Vn, Vm
6057 0q0 01110 0 sz 1 m 111001 n d FCMEQ Vd, Vn, Vm
6058 0q1 01110 0 sz 1 m 111001 n d FCMGE Vd, Vn, Vm
6059 0q1 01110 1 sz 1 m 111001 n d FCMGT Vd, Vn, Vm
6060 */
6061 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1
6062 && INSN(15,12) == BITS4(1,1,1,0) && INSN(10,10) == 1) {
6063 Bool isQ = INSN(30,30) == 1;
6064 UInt U = INSN(29,29);
6065 UInt E = INSN(23,23);
6066 Bool isF64 = INSN(22,22) == 1;
6067 UInt ac = INSN(11,11);
6068 UInt mm = INSN(20,16);
6069 UInt nn = INSN(9,5);
6070 UInt dd = INSN(4,0);
6071 /* */
6072 UInt EUac = (E << 2) | (U << 1) | ac;
6073 IROp opABS = Iop_INVALID;
6074 IROp opCMP = Iop_INVALID;
6075 IRType laneTy = Ity_INVALID;
6076 Bool zeroHI = False;
6077 Bool swap = True;
6078 const HChar* arr = "??";
6079 const HChar* nm = "??";
6080 Bool ok
6081 = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64);
6082 if (ok) {
6083 vassert((isF64 && laneTy == Ity_F64) || (!isF64 && laneTy == Ity_F32));
6084 switch (EUac) {
6085 case BITS3(0,0,0):
6086 nm = "fcmeq";
6087 opCMP = isF64 ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
6088 swap = False;
6089 break;
6090 case BITS3(0,1,0):
6091 nm = "fcmge";
6092 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6093 break;
6094 case BITS3(0,1,1):
6095 nm = "facge";
6096 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6097 opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
6098 break;
6099 case BITS3(1,1,0):
6100 nm = "fcmgt";
6101 opCMP = isF64 ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
6102 break;
6103 case BITS3(1,1,1):
6104 nm = "fcagt";
6105 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6106 opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
6107 break;
6108 default:
6109 break;
6110 }
6111 }
6112 if (opCMP != Iop_INVALID) {
6113 IRExpr* argN = getQReg128(nn);
6114 IRExpr* argM = getQReg128(mm);
6115 if (opABS != Iop_INVALID) {
6116 argN = unop(opABS, argN);
6117 argM = unop(opABS, argM);
6118 }
6119 IRExpr* res = swap ? binop(opCMP, argM, argN)
6120 : binop(opCMP, argN, argM);
6121 if (zeroHI) {
6122 res = unop(Iop_ZeroHI64ofV128, res);
6123 }
6124 putQReg128(dd, res);
6125 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6126 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6127 return True;
6128 }
6129 /* else fall through */
6130 }
6131
sewardj32d86752014-03-02 12:47:18 +00006132 /* -------------------- FCVTN -------------------- */
6133 /* 31 28 23 20 15 9 4
6134 0q0 01110 0s1 00001 011010 n d FCVTN Vd, Vn
6135 where case q:s of 00: 16Fx4(lo) <- 32Fx4
6136 01: 32Fx2(lo) <- 64Fx2
6137 10: 16Fx4(hi) <- 32Fx4
6138 11: 32Fx2(hi) <- 64Fx2
6139 Only deals with the 32Fx2 <- 64Fx2 version (s==1)
6140 */
6141 if (INSN(31,31) == 0 && INSN(29,23) == BITS7(0,0,1,1,1,0,0)
6142 && INSN(21,10) == BITS12(1,0,0,0,0,1,0,1,1,0,1,0)) {
6143 UInt bQ = INSN(30,30);
6144 UInt bS = INSN(22,22);
6145 UInt nn = INSN(9,5);
6146 UInt dd = INSN(4,0);
6147 if (bS == 1) {
6148 IRTemp rm = mk_get_IR_rounding_mode();
6149 IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64);
6150 IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64);
6151 putQRegLane(dd, 2 * bQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo));
6152 putQRegLane(dd, 2 * bQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi));
6153 if (bQ == 0) {
6154 putQRegLane(dd, 1, mkU64(0));
6155 }
6156 DIP("fcvtn%s %s.%s, %s.2d\n", bQ ? "2" : "",
6157 nameQReg128(dd), bQ ? "4s" : "2s", nameQReg128(nn));
6158 return True;
6159 }
6160 /* else fall through */
6161 }
6162
sewardj606c4ba2014-01-26 19:11:14 +00006163 /* ---------------- ADD/SUB (vector) ---------------- */
6164 /* 31 28 23 21 20 15 9 4
6165 0q0 01110 size 1 m 100001 n d ADD Vd.T, Vn.T, Vm.T
6166 0q1 01110 size 1 m 100001 n d SUB Vd.T, Vn.T, Vm.T
6167 */
6168 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6169 && INSN(21,21) == 1 && INSN(15,10) == BITS6(1,0,0,0,0,1)) {
6170 Bool isQ = INSN(30,30) == 1;
6171 UInt szBlg2 = INSN(23,22);
6172 Bool isSUB = INSN(29,29) == 1;
6173 UInt mm = INSN(20,16);
6174 UInt nn = INSN(9,5);
6175 UInt dd = INSN(4,0);
6176 Bool zeroHI = False;
6177 const HChar* arrSpec = "";
6178 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6179 if (ok) {
sewardjf5b08912014-02-06 12:57:58 +00006180 const IROp opsADD[4]
sewardj606c4ba2014-01-26 19:11:14 +00006181 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
sewardjf5b08912014-02-06 12:57:58 +00006182 const IROp opsSUB[4]
sewardj606c4ba2014-01-26 19:11:14 +00006183 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
6184 vassert(szBlg2 < 4);
sewardjf5b08912014-02-06 12:57:58 +00006185 IROp op = isSUB ? opsSUB[szBlg2] : opsADD[szBlg2];
6186 IRTemp t = newTemp(Ity_V128);
sewardj606c4ba2014-01-26 19:11:14 +00006187 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardjecde6972014-02-05 11:01:19 +00006188 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t))
6189 : mkexpr(t));
sewardj606c4ba2014-01-26 19:11:14 +00006190 const HChar* nm = isSUB ? "sub" : "add";
6191 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6192 nameQReg128(dd), arrSpec,
6193 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6194 return True;
6195 }
6196 /* else fall through */
6197 }
6198
sewardjecde6972014-02-05 11:01:19 +00006199 /* ---------------- ADD/SUB (scalar) ---------------- */
6200 /* 31 28 23 21 20 15 9 4
6201 010 11110 11 1 m 100001 n d ADD Dd, Dn, Dm
6202 011 11110 11 1 m 100001 n d SUB Dd, Dn, Dm
6203 */
6204 if (INSN(31,30) == BITS2(0,1) && INSN(28,21) == BITS8(1,1,1,1,0,1,1,1)
6205 && INSN(15,10) == BITS6(1,0,0,0,0,1)) {
6206 Bool isSUB = INSN(29,29) == 1;
6207 UInt mm = INSN(20,16);
6208 UInt nn = INSN(9,5);
6209 UInt dd = INSN(4,0);
6210 IRTemp res = newTemp(Ity_I64);
6211 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
6212 getQRegLane(nn, 0, Ity_I64),
6213 getQRegLane(mm, 0, Ity_I64)));
6214 putQRegLane(dd, 0, mkexpr(res));
6215 putQRegLane(dd, 1, mkU64(0));
6216 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
6217 nameQRegLO(dd, Ity_I64),
6218 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
6219 return True;
6220 }
6221
sewardjf5b08912014-02-06 12:57:58 +00006222 /* ------------ MUL/PMUL/MLA/MLS (vector) ------------ */
6223 /* 31 28 23 21 20 15 9 4
6224 0q0 01110 size 1 m 100111 n d MUL Vd.T, Vn.T, Vm.T B/H/S only
6225 0q1 01110 size 1 m 100111 n d PMUL Vd.T, Vn.T, Vm.T B only
6226 0q0 01110 size 1 m 100101 n d MLA Vd.T, Vn.T, Vm.T B/H/S only
6227 0q1 01110 size 1 m 100101 n d MLS Vd.T, Vn.T, Vm.T B/H/S only
6228 */
6229 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6230 && INSN(21,21) == 1
6231 && (INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(1,0,0,1,0,1)) {
6232 Bool isQ = INSN(30,30) == 1;
6233 UInt szBlg2 = INSN(23,22);
6234 UInt bit29 = INSN(29,29);
6235 UInt mm = INSN(20,16);
6236 UInt nn = INSN(9,5);
6237 UInt dd = INSN(4,0);
6238 Bool isMLAS = INSN(11,11) == 0;
6239 const IROp opsADD[4]
6240 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_INVALID };
6241 const IROp opsSUB[4]
6242 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_INVALID };
6243 const IROp opsMUL[4]
6244 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
6245 const IROp opsPMUL[4]
6246 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
6247 /* Set opMUL and, if necessary, opACC. A result value of
6248 Iop_INVALID for opMUL indicates that the instruction is
6249 invalid. */
6250 Bool zeroHI = False;
6251 const HChar* arrSpec = "";
6252 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6253 vassert(szBlg2 < 4);
6254 IROp opACC = Iop_INVALID;
6255 IROp opMUL = Iop_INVALID;
6256 if (ok) {
6257 opMUL = (bit29 == 1 && !isMLAS) ? opsPMUL[szBlg2]
6258 : opsMUL[szBlg2];
6259 opACC = isMLAS ? (bit29 == 1 ? opsSUB[szBlg2] : opsADD[szBlg2])
6260 : Iop_INVALID;
6261 }
6262 if (ok && opMUL != Iop_INVALID) {
6263 IRTemp t1 = newTemp(Ity_V128);
6264 assign(t1, binop(opMUL, getQReg128(nn), getQReg128(mm)));
6265 IRTemp t2 = newTemp(Ity_V128);
6266 assign(t2, opACC == Iop_INVALID
6267 ? mkexpr(t1)
6268 : binop(opACC, getQReg128(dd), mkexpr(t1)));
6269 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
6270 : mkexpr(t2));
6271 const HChar* nm = isMLAS ? (bit29 == 1 ? "mls" : "mla")
6272 : (bit29 == 1 ? "pmul" : "mul");
6273 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6274 nameQReg128(dd), arrSpec,
6275 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6276 return True;
6277 }
6278 /* else fall through */
6279 }
6280
sewardjecde6972014-02-05 11:01:19 +00006281 /* ---------------- {S,U}{MIN,MAX} (vector) ---------------- */
6282 /* 31 28 23 21 20 15 9 4
6283 0q0 01110 size 1 m 011011 n d SMIN Vd.T, Vn.T, Vm.T
6284 0q1 01110 size 1 m 011011 n d UMIN Vd.T, Vn.T, Vm.T
6285 0q0 01110 size 1 m 011001 n d SMAX Vd.T, Vn.T, Vm.T
6286 0q1 01110 size 1 m 011001 n d UMAX Vd.T, Vn.T, Vm.T
6287 */
6288 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6289 && INSN(21,21) == 1
6290 && ((INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(0,1,1,0,0,1))) {
6291 Bool isQ = INSN(30,30) == 1;
6292 Bool isU = INSN(29,29) == 1;
6293 UInt szBlg2 = INSN(23,22);
sewardj5860ec72014-03-01 11:19:45 +00006294 Bool isMAX = INSN(11,11) == 0;
sewardjecde6972014-02-05 11:01:19 +00006295 UInt mm = INSN(20,16);
6296 UInt nn = INSN(9,5);
6297 UInt dd = INSN(4,0);
6298 Bool zeroHI = False;
6299 const HChar* arrSpec = "";
6300 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6301 if (ok) {
6302 const IROp opMINS[4]
6303 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
6304 const IROp opMINU[4]
6305 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
6306 const IROp opMAXS[4]
6307 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
6308 const IROp opMAXU[4]
6309 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
6310 vassert(szBlg2 < 4);
6311 IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2])
6312 : (isU ? opMINU[szBlg2] : opMINS[szBlg2]);
6313 IRTemp t = newTemp(Ity_V128);
6314 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
6315 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t))
6316 : mkexpr(t));
6317 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
6318 : (isU ? "umin" : "smin");
6319 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6320 nameQReg128(dd), arrSpec,
6321 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6322 return True;
6323 }
6324 /* else fall through */
6325 }
6326
6327 /* -------------------- {S,U}{MIN,MAX}V -------------------- */
6328 /* 31 28 23 21 16 15 9 4
6329 0q0 01110 size 11000 1 101010 n d SMINV Vd, Vn.T
6330 0q1 01110 size 11000 1 101010 n d UMINV Vd, Vn.T
6331 0q0 01110 size 11000 0 101010 n d SMAXV Vd, Vn.T
6332 0q1 01110 size 11000 0 101010 n d UMAXV Vd, Vn.T
6333 */
6334 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6335 && INSN(21,17) == BITS5(1,1,0,0,0)
6336 && INSN(15,10) == BITS6(1,0,1,0,1,0)) {
6337 Bool isQ = INSN(30,30) == 1;
6338 Bool isU = INSN(29,29) == 1;
6339 UInt szBlg2 = INSN(23,22);
6340 Bool isMAX = INSN(16,16) == 0;
6341 UInt nn = INSN(9,5);
6342 UInt dd = INSN(4,0);
6343 Bool zeroHI = False;
6344 const HChar* arrSpec = "";
6345 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2);
6346 if (ok) {
6347 if (szBlg2 == 3) ok = False;
6348 if (szBlg2 == 2 && !isQ) ok = False;
6349 }
6350 if (ok) {
6351 const IROp opMINS[3]
6352 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
6353 const IROp opMINU[3]
6354 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
6355 const IROp opMAXS[3]
6356 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
6357 const IROp opMAXU[3]
6358 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
6359 vassert(szBlg2 < 3);
6360 IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2])
6361 : (isU ? opMINU[szBlg2] : opMINS[szBlg2]);
6362 IRTemp tN1 = newTemp(Ity_V128);
6363 assign(tN1, getQReg128(nn));
6364 /* If Q == 0, we're just folding lanes in the lower half of
6365 the value. In which case, copy the lower half of the
6366 source into the upper half, so we can then treat it the
6367 same as the full width case. */
6368 IRTemp tN2 = newTemp(Ity_V128);
sewardj5860ec72014-03-01 11:19:45 +00006369 assign(tN2, zeroHI ? mk_CatEvenLanes64x2(tN1,tN1) : mkexpr(tN1));
sewardjecde6972014-02-05 11:01:19 +00006370 IRTemp res = math_MINMAXV(tN2, op);
6371 if (res == IRTemp_INVALID)
6372 return False; /* means math_MINMAXV
6373 doesn't handle this case yet */
6374 putQReg128(dd, mkexpr(res));
6375 const HChar* nm = isMAX ? (isU ? "umaxv" : "smaxv")
6376 : (isU ? "uminv" : "sminv");
6377 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
6378 IRType laneTy = tys[szBlg2];
6379 DIP("%s %s, %s.%s\n", nm,
6380 nameQRegLO(dd, laneTy), nameQReg128(nn), arrSpec);
6381 return True;
6382 }
6383 /* else fall through */
6384 }
6385
sewardjfab09142014-02-10 10:28:13 +00006386 /* ------------ {AND,BIC,ORR,ORN} (vector) ------------ */
6387 /* 31 28 23 20 15 9 4
6388 0q0 01110 001 m 000111 n d AND Vd.T, Vn.T, Vm.T
6389 0q0 01110 011 m 000111 n d BIC Vd.T, Vn.T, Vm.T
6390 0q0 01110 101 m 000111 n d ORR Vd.T, Vn.T, Vm.T
6391 0q0 01110 111 m 000111 n d ORN Vd.T, Vn.T, Vm.T
6392 T is 16b when q==1, 8b when q==0
6393 */
6394 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
6395 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
6396 Bool isQ = INSN(30,30) == 1;
6397 Bool isORR = INSN(23,23) == 1;
6398 Bool invert = INSN(22,22) == 1;
6399 UInt mm = INSN(20,16);
6400 UInt nn = INSN(9,5);
6401 UInt dd = INSN(4,0);
6402 IRTemp res = newTemp(Ity_V128);
6403 assign(res, binop(isORR ? Iop_OrV128 : Iop_AndV128,
6404 getQReg128(nn),
6405 invert ? unop(Iop_NotV128, getQReg128(mm))
6406 : getQReg128(mm)));
6407 putQReg128(dd, isQ ? mkexpr(res)
6408 : unop(Iop_ZeroHI64ofV128, mkexpr(res)));
6409 const HChar* names[4] = { "and", "bic", "orr", "orn" };
6410 const HChar* ar = isQ ? "16b" : "8b";
6411 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
6412 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
6413 return True;
6414 }
6415
sewardje520bb32014-02-17 11:00:53 +00006416 /* ---------- CM{EQ,HI,HS,GE,GT,TST,LE,LT} (vector) ---------- */
6417 /* 31 28 23 21 15 9 4 ix
6418 0q1 01110 size 1 m 100011 n d CMEQ Vd.T, Vn.T, Vm.T (1) ==
sewardj93013432014-04-27 12:02:12 +00006419 0q0 01110 size 1 m 100011 n d CMTST Vd.T, Vn.T, Vm.T (2) &, != 0
sewardje520bb32014-02-17 11:00:53 +00006420
6421 0q1 01110 size 1 m 001101 n d CMHI Vd.T, Vn.T, Vm.T (3) >u
6422 0q0 01110 size 1 m 001101 n d CMGT Vd.T, Vn.T, Vm.T (4) >s
6423
6424 0q1 01110 size 1 m 001111 n d CMHS Vd.T, Vn.T, Vm.T (5) >=u
6425 0q0 01110 size 1 m 001111 n d CMGE Vd.T, Vn.T, Vm.T (6) >=s
6426
6427 0q1 01110 size 100000 100010 n d CMGE Vd.T, Vn.T, #0 (7) >=s 0
6428 0q0 01110 size 100000 100010 n d CMGT Vd.T, Vn.T, #0 (8) >s 0
6429
6430 0q1 01110 size 100000 100110 n d CMLE Vd.T, Vn.T, #0 (9) <=s 0
6431 0q0 01110 size 100000 100110 n d CMEQ Vd.T, Vn.T, #0 (10) == 0
6432
6433 0q0 01110 size 100000 101010 n d CMLT Vd.T, Vn.T, #0 (11) <s 0
6434 */
6435 if (INSN(31,31) == 0
6436 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) {
6437 Bool isQ = INSN(30,30) == 1;
6438 UInt bit29 = INSN(29,29);
6439 UInt szBlg2 = INSN(23,22);
6440 UInt mm = INSN(20,16);
6441 UInt b1510 = INSN(15,10);
6442 UInt nn = INSN(9,5);
6443 UInt dd = INSN(4,0);
6444 const IROp opsEQ[4]
6445 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
6446 const IROp opsGTS[4]
6447 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
6448 const IROp opsGTU[4]
6449 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
6450 Bool zeroHI = False;
6451 const HChar* arrSpec = "??";
6452 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2);
6453 UInt ix = 0;
6454 if (ok) {
6455 switch (b1510) {
6456 case BITS6(1,0,0,0,1,1): ix = bit29 ? 1 : 2; break;
6457 case BITS6(0,0,1,1,0,1): ix = bit29 ? 3 : 4; break;
6458 case BITS6(0,0,1,1,1,1): ix = bit29 ? 5 : 6; break;
6459 case BITS6(1,0,0,0,1,0):
6460 if (mm == 0) { ix = bit29 ? 7 : 8; }; break;
6461 case BITS6(1,0,0,1,1,0):
6462 if (mm == 0) { ix = bit29 ? 9 : 10; }; break;
6463 case BITS6(1,0,1,0,1,0):
6464 if (mm == 0 && bit29 == 0) { ix = 11; }; break;
6465 default: break;
6466 }
6467 }
6468 if (ix != 0) {
6469 vassert(ok && szBlg2 < 4);
6470 IRExpr* argL = getQReg128(nn);
6471 IRExpr* argR = (ix <= 6) ? getQReg128(mm) : mkV128(0x0000);
6472 IRExpr* res = NULL;
6473 /* Some useful identities:
6474 x > y can be expressed directly
6475 x < y == y > x
6476 x <= y == not (x > y)
6477 x >= y == not (y > x)
6478 */
6479 switch (ix) {
6480 case 1: res = binop(opsEQ[szBlg2], argL, argR); break;
sewardj93013432014-04-27 12:02:12 +00006481 case 2: res = unop(Iop_NotV128, binop(opsEQ[szBlg2],
6482 binop(Iop_AndV128, argL, argR),
6483 mkV128(0x0000)));
sewardje520bb32014-02-17 11:00:53 +00006484 break;
6485 case 3: res = binop(opsGTU[szBlg2], argL, argR); break;
6486 case 4: res = binop(opsGTS[szBlg2], argL, argR); break;
6487 case 5: res = unop(Iop_NotV128, binop(opsGTU[szBlg2], argR, argL));
6488 break;
6489 case 6: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL));
6490 break;
6491 case 7: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL));
6492 break;
6493 case 8: res = binop(opsGTS[szBlg2], argL, argR); break;
6494 case 9: res = unop(Iop_NotV128,
6495 binop(opsGTS[szBlg2], argL, argR));
6496 break;
6497 case 10: res = binop(opsEQ[szBlg2], argL, argR); break;
6498 case 11: res = binop(opsGTS[szBlg2], argR, argL); break;
6499 default: vassert(0);
6500 }
6501 vassert(res);
6502 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, res) : res);
6503 const HChar* nms[11] = { "eq", "tst", "hi", "gt", "hs", "ge",
6504 "ge", "gt", "le", "eq", "lt" };
6505 if (ix <= 6) {
6506 DIP("cm%s %s.%s, %s.%s, %s.%s\n", nms[ix-1],
6507 nameQReg128(dd), arrSpec,
6508 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6509 } else {
6510 DIP("cm%s %s.%s, %s.%s, #0\n", nms[ix-1],
6511 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
6512 }
6513 return True;
6514 }
6515 /* else fall through */
6516 }
6517
6518 /* -------------- {EOR,BSL,BIT,BIF} (vector) -------------- */
6519 /* 31 28 23 20 15 9 4
6520 0q1 01110 00 1 m 000111 n d EOR Vd.T, Vm.T, Vn.T
6521 0q1 01110 01 1 m 000111 n d BSL Vd.T, Vm.T, Vn.T
6522 0q1 01110 10 1 m 000111 n d BIT Vd.T, Vm.T, Vn.T
6523 0q1 01110 11 1 m 000111 n d BIF Vd.T, Vm.T, Vn.T
6524 */
6525 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0)
6526 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
6527 Bool isQ = INSN(30,30) == 1;
6528 UInt op = INSN(23,22);
6529 UInt mm = INSN(20,16);
6530 UInt nn = INSN(9,5);
6531 UInt dd = INSN(4,0);
6532 IRTemp argD = newTemp(Ity_V128);
6533 IRTemp argN = newTemp(Ity_V128);
6534 IRTemp argM = newTemp(Ity_V128);
6535 assign(argD, getQReg128(dd));
6536 assign(argN, getQReg128(nn));
6537 assign(argM, getQReg128(mm));
6538 const IROp opXOR = Iop_XorV128;
6539 const IROp opAND = Iop_AndV128;
6540 const IROp opNOT = Iop_NotV128;
6541 IRExpr* res = NULL;
6542 switch (op) {
sewardj5860ec72014-03-01 11:19:45 +00006543 case BITS2(0,0): /* EOR */
sewardje520bb32014-02-17 11:00:53 +00006544 res = binop(opXOR, mkexpr(argM), mkexpr(argN));
6545 break;
sewardj5860ec72014-03-01 11:19:45 +00006546 case BITS2(0,1): /* BSL */
sewardje520bb32014-02-17 11:00:53 +00006547 res = binop(opXOR, mkexpr(argM),
6548 binop(opAND,
6549 binop(opXOR, mkexpr(argM), mkexpr(argN)),
6550 mkexpr(argD)));
6551 break;
sewardj5860ec72014-03-01 11:19:45 +00006552 case BITS2(1,0): /* BIT */
sewardje520bb32014-02-17 11:00:53 +00006553 res = binop(opXOR, mkexpr(argD),
6554 binop(opAND,
6555 binop(opXOR, mkexpr(argD), mkexpr(argN)),
6556 mkexpr(argM)));
6557 break;
sewardj5860ec72014-03-01 11:19:45 +00006558 case BITS2(1,1): /* BIF */
sewardje520bb32014-02-17 11:00:53 +00006559 res = binop(opXOR, mkexpr(argD),
6560 binop(opAND,
6561 binop(opXOR, mkexpr(argD), mkexpr(argN)),
6562 unop(opNOT, mkexpr(argM))));
6563 break;
6564 default:
6565 vassert(0);
6566 }
6567 vassert(res);
6568 putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res));
6569 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
6570 const HChar* arr = isQ ? "16b" : "8b";
6571 vassert(op < 4);
6572 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[op],
6573 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6574 return True;
6575 }
6576
sewardj32d86752014-03-02 12:47:18 +00006577 /* ------------ {USHR,SSHR,SHL} (vector, immediate) ------------ */
sewardje520bb32014-02-17 11:00:53 +00006578 /* 31 28 22 18 15 9 4
sewardj32d86752014-03-02 12:47:18 +00006579 0q1 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #shift (1)
6580 0q0 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #shift (2)
6581 0q0 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #shift (3)
sewardje520bb32014-02-17 11:00:53 +00006582 laneTy, shift = case immh:immb of
sewardj32d86752014-03-02 12:47:18 +00006583 0001:xxx -> B, SHR:8-xxx, SHL:xxx
6584 001x:xxx -> H, SHR:16-xxxx SHL:xxxx
6585 01xx:xxx -> S, SHR:32-xxxxx SHL:xxxxx
6586 1xxx:xxx -> D, SHR:64-xxxxxx SHL:xxxxxx
sewardje520bb32014-02-17 11:00:53 +00006587 other -> invalid
6588 As usual the case laneTy==D && q==0 is not allowed.
6589 */
6590 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0)
sewardj32d86752014-03-02 12:47:18 +00006591 && INSN(10,10) == 1) {
6592 UInt ix = 0;
6593 /**/ if (INSN(29,29) == 1 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 1;
6594 else if (INSN(29,29) == 0 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 2;
6595 else if (INSN(29,29) == 0 && INSN(15,11) == BITS5(0,1,0,1,0)) ix = 3;
6596 if (ix > 0) {
6597 Bool isQ = INSN(30,30) == 1;
6598 UInt immh = INSN(22,19);
6599 UInt immb = INSN(18,16);
6600 UInt nn = INSN(9,5);
6601 UInt dd = INSN(4,0);
6602 const IROp opsSHRN[4]
6603 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
6604 const IROp opsSARN[4]
6605 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
6606 const IROp opsSHLN[4]
6607 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
6608 UInt szBlg2 = 0;
6609 UInt shift = 0;
6610 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &szBlg2, immh, immb);
6611 if (ix == 3) {
6612 /* The shift encoding has opposite sign for the leftwards
6613 case. Adjust shift to compensate. */
6614 shift = (8 << szBlg2) - shift;
6615 }
6616 if (ok && szBlg2 < 4 && shift > 0 && shift < (8 << szBlg2)
6617 && !(szBlg2 == 3/*64bit*/ && !isQ)) {
6618 IROp op = Iop_INVALID;
6619 const HChar* nm = NULL;
6620 switch (ix) {
6621 case 1: op = opsSHRN[szBlg2]; nm = "ushr"; break;
6622 case 2: op = opsSARN[szBlg2]; nm = "sshr"; break;
6623 case 3: op = opsSHLN[szBlg2]; nm = "shl"; break;
6624 default: vassert(0);
6625 }
6626 IRExpr* src = getQReg128(nn);
6627 IRExpr* res = binop(op, src, mkU8(shift));
6628 putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res));
6629 HChar laneCh = "bhsd"[szBlg2];
6630 UInt nLanes = (isQ ? 128 : 64) / (8 << szBlg2);
6631 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
6632 nameQReg128(dd), nLanes, laneCh,
6633 nameQReg128(nn), nLanes, laneCh, shift);
6634 return True;
6635 }
6636 /* else fall through */
sewardje520bb32014-02-17 11:00:53 +00006637 }
sewardje520bb32014-02-17 11:00:53 +00006638 }
6639
6640 /* -------------------- {U,S}SHLL{,2} -------------------- */
6641 /* 31 28 22 18 15 9 4
6642 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
6643 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
6644 where Ta,Tb,sh
sewardj5860ec72014-03-01 11:19:45 +00006645 = case immh of 1xxx -> invalid
sewardje520bb32014-02-17 11:00:53 +00006646 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
sewardj5860ec72014-03-01 11:19:45 +00006647 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
6648 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
6649 0000 -> AdvSIMD modified immediate (???)
sewardje520bb32014-02-17 11:00:53 +00006650 */
6651 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0)
6652 && INSN(15,10) == BITS6(1,0,1,0,0,1)) {
6653 Bool isQ = INSN(30,30) == 1;
6654 Bool isU = INSN(29,29) == 1;
6655 UInt immh = INSN(22,19);
6656 UInt immb = INSN(18,16);
6657 UInt nn = INSN(9,5);
6658 UInt dd = INSN(4,0);
6659 UInt immhb = (immh << 3) | immb;
6660 IRTemp src = newTemp(Ity_V128);
6661 IRTemp zero = newTemp(Ity_V128);
6662 IRExpr* res = NULL;
6663 UInt sh = 0;
6664 const HChar* ta = "??";
6665 const HChar* tb = "??";
6666 assign(src, getQReg128(nn));
6667 assign(zero, mkV128(0x0000));
sewardj5860ec72014-03-01 11:19:45 +00006668 if (immh & 8) {
6669 /* invalid; don't assign to res */
6670 }
6671 else if (immh & 4) {
6672 sh = immhb - 32;
6673 vassert(sh < 32); /* so 32-sh is 1..32 */
6674 ta = "2d";
6675 tb = isQ ? "4s" : "2s";
6676 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
6677 : mk_InterleaveLO32x4(src, zero);
6678 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
sewardje520bb32014-02-17 11:00:53 +00006679 }
6680 else if (immh & 2) {
6681 sh = immhb - 16;
6682 vassert(sh < 16); /* so 16-sh is 1..16 */
6683 ta = "4s";
6684 tb = isQ ? "8h" : "4h";
6685 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
6686 : mk_InterleaveLO16x8(src, zero);
6687 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
6688 }
sewardj5860ec72014-03-01 11:19:45 +00006689 else if (immh & 1) {
6690 sh = immhb - 8;
6691 vassert(sh < 8); /* so 8-sh is 1..8 */
6692 ta = "8h";
6693 tb = isQ ? "16b" : "8b";
6694 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
6695 : mk_InterleaveLO8x16(src, zero);
6696 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
6697 } else {
6698 vassert(immh == 0);
6699 /* invalid; don't assign to res */
sewardje520bb32014-02-17 11:00:53 +00006700 }
6701 /* */
6702 if (res) {
6703 putQReg128(dd, res);
6704 DIP("%cshll%s %s.%s, %s.%s, #%d\n",
6705 isU ? 'u' : 's', isQ ? "2" : "",
6706 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
6707 return True;
6708 }
6709 /* else fall through */
6710 }
6711
sewardj606c4ba2014-01-26 19:11:14 +00006712 /* -------------------- XTN{,2} -------------------- */
sewardjecde6972014-02-05 11:01:19 +00006713 /* 31 28 23 21 15 9 4 XTN{,2} Vd.Tb, Vn.Ta
sewardj606c4ba2014-01-26 19:11:14 +00006714 0q0 01110 size 100001 001010 n d
6715 */
6716 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
6717 && INSN(21,16) == BITS6(1,0,0,0,0,1)
6718 && INSN(15,10) == BITS6(0,0,1,0,1,0)) {
6719 Bool isQ = INSN(30,30) == 1;
6720 UInt size = INSN(23,22);
6721 UInt nn = INSN(9,5);
6722 UInt dd = INSN(4,0);
6723 IROp op = Iop_INVALID;
6724 const HChar* tb = NULL;
6725 const HChar* ta = NULL;
6726 switch ((size << 1) | (isQ ? 1 : 0)) {
6727 case 0: tb = "8b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break;
6728 case 1: tb = "16b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break;
6729 case 2: tb = "4h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break;
6730 case 3: tb = "8h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break;
6731 case 4: tb = "2s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break;
6732 case 5: tb = "4s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break;
6733 case 6: break;
6734 case 7: break;
6735 default: vassert(0);
6736 }
6737 if (op != Iop_INVALID) {
6738 if (!isQ) {
6739 putQRegLane(dd, 1, mkU64(0));
6740 }
6741 putQRegLane(dd, isQ ? 1 : 0, unop(op, getQReg128(nn)));
6742 DIP("xtn%s %s.%s, %s.%s\n", isQ ? "2" : "",
6743 nameQReg128(dd), tb, nameQReg128(nn), ta);
6744 return True;
6745 }
6746 /* else fall through */
6747 }
6748
6749 /* ---------------- DUP (element, vector) ---------------- */
6750 /* 31 28 20 15 9 4
6751 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
6752 */
6753 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
6754 && INSN(15,10) == BITS6(0,0,0,0,0,1)) {
6755 Bool isQ = INSN(30,30) == 1;
6756 UInt imm5 = INSN(20,16);
6757 UInt nn = INSN(9,5);
6758 UInt dd = INSN(4,0);
6759 IRTemp w0 = newTemp(Ity_I64);
6760 const HChar* arT = "??";
6761 const HChar* arTs = "??";
6762 IRType laneTy = Ity_INVALID;
6763 UInt laneNo = 16; /* invalid */
6764 if (imm5 & 1) {
6765 arT = isQ ? "16b" : "8b";
6766 arTs = "b";
6767 laneNo = (imm5 >> 1) & 15;
6768 laneTy = Ity_I8;
6769 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
6770 }
6771 else if (imm5 & 2) {
6772 arT = isQ ? "8h" : "4h";
6773 arTs = "h";
6774 laneNo = (imm5 >> 2) & 7;
6775 laneTy = Ity_I16;
6776 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
6777 }
6778 else if (imm5 & 4) {
6779 arT = isQ ? "4s" : "2s";
6780 arTs = "s";
6781 laneNo = (imm5 >> 3) & 3;
6782 laneTy = Ity_I32;
6783 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
6784 }
6785 else if ((imm5 & 8) && isQ) {
6786 arT = "2d";
6787 arTs = "d";
6788 laneNo = (imm5 >> 4) & 1;
6789 laneTy = Ity_I64;
6790 assign(w0, getQRegLane(nn, laneNo, laneTy));
6791 }
6792 else {
6793 /* invalid; leave laneTy unchanged. */
6794 }
6795 /* */
6796 if (laneTy != Ity_INVALID) {
6797 vassert(laneNo < 16);
6798 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
6799 putQReg128(dd, binop(Iop_64HLtoV128,
6800 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
6801 DIP("dup %s.%s, %s.%s[%u]\n",
6802 nameQReg128(dd), arT, nameQReg128(nn), arTs, laneNo);
6803 return True;
6804 }
6805 /* else fall through */
6806 }
6807
sewardjecde6972014-02-05 11:01:19 +00006808 /* ---------------- DUP (general, vector) ---------------- */
6809 /* 31 28 23 20 15 9 4
6810 0q0 01110 000 imm5 000011 n d DUP Vd.T, Rn
6811 Q=0 writes 64, Q=1 writes 128
6812 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
6813 xxx10 4H(q=0) or 8H(q=1), R=W
6814 xx100 2S(q=0) or 4S(q=1), R=W
6815 x1000 Invalid(q=0) or 2D(q=1), R=X
6816 x0000 Invalid(q=0) or Invalid(q=1)
6817 */
6818 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
6819 && INSN(15,10) == BITS6(0,0,0,0,1,1)) {
6820 Bool isQ = INSN(30,30) == 1;
6821 UInt imm5 = INSN(20,16);
6822 UInt nn = INSN(9,5);
6823 UInt dd = INSN(4,0);
6824 IRTemp w0 = newTemp(Ity_I64);
6825 const HChar* arT = "??";
6826 IRType laneTy = Ity_INVALID;
6827 if (imm5 & 1) {
6828 arT = isQ ? "16b" : "8b";
6829 laneTy = Ity_I8;
6830 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
6831 }
6832 else if (imm5 & 2) {
6833 arT = isQ ? "8h" : "4h";
6834 laneTy = Ity_I16;
6835 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
6836 }
6837 else if (imm5 & 4) {
6838 arT = isQ ? "4s" : "2s";
6839 laneTy = Ity_I32;
6840 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
6841 }
6842 else if ((imm5 & 8) && isQ) {
6843 arT = "2d";
6844 laneTy = Ity_I64;
6845 assign(w0, getIReg64orZR(nn));
6846 }
6847 else {
6848 /* invalid; leave laneTy unchanged. */
6849 }
6850 /* */
6851 if (laneTy != Ity_INVALID) {
6852 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
6853 putQReg128(dd, binop(Iop_64HLtoV128,
6854 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
6855 DIP("dup %s.%s, %s\n",
6856 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
6857 return True;
6858 }
6859 /* else fall through */
6860 }
6861
sewardjf5b08912014-02-06 12:57:58 +00006862 /* ---------------------- {S,U}MOV ---------------------- */
6863 /* 31 28 20 15 9 4
6864 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
6865 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
6866 dest is Xd when q==1, Wd when q==0
6867 UMOV:
6868 Ts,index,ops = case q:imm5 of
6869 0:xxxx1 -> B, xxxx, 8Uto64
6870 1:xxxx1 -> invalid
6871 0:xxx10 -> H, xxx, 16Uto64
6872 1:xxx10 -> invalid
6873 0:xx100 -> S, xx, 32Uto64
6874 1:xx100 -> invalid
6875 1:x1000 -> D, x, copy64
6876 other -> invalid
6877 SMOV:
6878 Ts,index,ops = case q:imm5 of
6879 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
6880 1:xxxx1 -> B, xxxx, 8Sto64
6881 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
6882 1:xxx10 -> H, xxx, 16Sto64
6883 0:xx100 -> invalid
6884 1:xx100 -> S, xx, 32Sto64
6885 1:x1000 -> invalid
6886 other -> invalid
6887 */
6888 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
6889 && (INSN(15,10) & BITS6(1,1,1,0,1,1)) == BITS6(0,0,1,0,1,1)) {
6890 UInt bitQ = INSN(30,30) == 1;
6891 UInt imm5 = INSN(20,16);
6892 UInt nn = INSN(9,5);
6893 UInt dd = INSN(4,0);
6894 Bool isU = INSN(12,12) == 1;
6895 const HChar* arTs = "??";
6896 UInt laneNo = 16; /* invalid */
6897 // Setting 'res' to non-NULL determines valid/invalid
6898 IRExpr* res = NULL;
6899 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
6900 laneNo = (imm5 >> 1) & 15;
6901 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
6902 res = isU ? unop(Iop_8Uto64, lane)
6903 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
6904 arTs = "b";
6905 }
6906 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
6907 laneNo = (imm5 >> 1) & 15;
6908 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
6909 res = isU ? NULL
6910 : unop(Iop_8Sto64, lane);
6911 arTs = "b";
6912 }
6913 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
6914 laneNo = (imm5 >> 2) & 7;
6915 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
6916 res = isU ? unop(Iop_16Uto64, lane)
6917 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
6918 arTs = "h";
6919 }
6920 else if (bitQ && (imm5 & 2)) { // 1:xxx10
6921 laneNo = (imm5 >> 2) & 7;
6922 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
6923 res = isU ? NULL
6924 : unop(Iop_16Sto64, lane);
6925 arTs = "h";
6926 }
6927 else if (!bitQ && (imm5 & 4)) { // 0:xx100
6928 laneNo = (imm5 >> 3) & 3;
6929 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
6930 res = isU ? unop(Iop_32Uto64, lane)
6931 : NULL;
6932 arTs = "s";
6933 }
6934 else if (bitQ && (imm5 & 4)) { // 1:xxx10
6935 laneNo = (imm5 >> 3) & 3;
6936 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
6937 res = isU ? NULL
6938 : unop(Iop_32Sto64, lane);
6939 arTs = "s";
6940 }
6941 else if (bitQ && (imm5 & 8)) { // 1:x1000
6942 laneNo = (imm5 >> 4) & 1;
6943 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
6944 res = isU ? lane
6945 : NULL;
6946 arTs = "d";
6947 }
6948 /* */
6949 if (res) {
6950 vassert(laneNo < 16);
6951 putIReg64orZR(dd, res);
6952 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
6953 nameIRegOrZR(bitQ == 1, dd),
6954 nameQReg128(nn), arTs, laneNo);
6955 return True;
6956 }
6957 /* else fall through */
6958 }
6959
sewardje520bb32014-02-17 11:00:53 +00006960 /* -------------------- INS (general) -------------------- */
6961 /* 31 28 20 15 9 4
6962 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
6963 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
6964 xxx10 -> H, xxx
6965 xx100 -> S, xx
6966 x1000 -> D, x
6967 */
6968 if (INSN(31,21) == BITS11(0,1,0,0,1,1,1,0,0,0,0)
6969 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
6970 UInt imm5 = INSN(20,16);
6971 UInt nn = INSN(9,5);
6972 UInt dd = INSN(4,0);
6973 HChar ts = '?';
6974 UInt laneNo = 16;
6975 IRExpr* src = NULL;
6976 if (imm5 & 1) {
6977 src = unop(Iop_64to8, getIReg64orZR(nn));
6978 laneNo = (imm5 >> 1) & 15;
6979 ts = 'b';
6980 }
6981 else if (imm5 & 2) {
6982 src = unop(Iop_64to16, getIReg64orZR(nn));
6983 laneNo = (imm5 >> 2) & 7;
6984 ts = 'h';
6985 }
6986 else if (imm5 & 4) {
6987 src = unop(Iop_64to32, getIReg64orZR(nn));
6988 laneNo = (imm5 >> 3) & 3;
6989 ts = 's';
6990 }
6991 else if (imm5 & 8) {
6992 src = getIReg64orZR(nn);
6993 laneNo = (imm5 >> 4) & 1;
6994 ts = 'd';
6995 }
6996 /* */
6997 if (src) {
6998 vassert(laneNo < 16);
6999 putQRegLane(dd, laneNo, src);
7000 DIP("ins %s.%c[%u], %s\n",
7001 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
7002 return True;
7003 }
7004 /* else invalid; fall through */
7005 }
7006
sewardj32d86752014-03-02 12:47:18 +00007007 /* -------------------- NEG (vector) -------------------- */
7008 /* 31 28 23 21 16 9 4
7009 0q1 01110 sz 10000 0101110 n d NEG Vd, Vn
7010 sz is laneSz, q:sz == 011 is disallowed, as usual
7011 */
7012 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0)
7013 && INSN(21,10) == BITS12(1,0,0,0,0,0,1,0,1,1,1,0)) {
7014 Bool isQ = INSN(30,30) == 1;
7015 UInt szBlg2 = INSN(23,22);
7016 UInt nn = INSN(9,5);
7017 UInt dd = INSN(4,0);
7018 Bool zeroHI = False;
7019 const HChar* arrSpec = "";
7020 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
7021 if (ok) {
7022 const IROp opSUB[4]
7023 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
7024 IRTemp res = newTemp(Ity_V128);
7025 vassert(szBlg2 < 4);
7026 assign(res, binop(opSUB[szBlg2], mkV128(0x0000), getQReg128(nn)));
7027 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
7028 : mkexpr(res));
7029 DIP("neg %s.%s, %s.%s\n",
7030 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
7031 return True;
7032 }
7033 /* else fall through */
7034 }
7035
sewardj92d0ae32014-04-03 13:48:54 +00007036 /* -------------------- TBL, TBX -------------------- */
7037 /* 31 28 20 15 14 12 9 4
7038 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
7039 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
7040 where Ta = 16b(q=1) or 8b(q=0)
7041 */
7042 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
7043 && INSN(15,15) == 0 && INSN(11,10) == BITS2(0,0)) {
7044 Bool isQ = INSN(30,30) == 1;
7045 Bool isTBX = INSN(12,12) == 1;
7046 UInt mm = INSN(20,16);
7047 UInt len = INSN(14,13);
7048 UInt nn = INSN(9,5);
7049 UInt dd = INSN(4,0);
7050 /* The out-of-range values to use. */
7051 IRTemp oor_values = newTemp(Ity_V128);
7052 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
7053 /* src value */
7054 IRTemp src = newTemp(Ity_V128);
7055 assign(src, getQReg128(mm));
7056 /* The table values */
7057 IRTemp tab[4];
7058 UInt i;
7059 for (i = 0; i <= len; i++) {
7060 vassert(i < 4);
7061 tab[i] = newTemp(Ity_V128);
7062 assign(tab[i], getQReg128((nn + i) % 32));
7063 }
7064 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
7065 putQReg128(dd, isQ ? mkexpr(res)
7066 : unop(Iop_ZeroHI64ofV128, mkexpr(res)) );
7067 const HChar* Ta = isQ ? "16b" : "8b";
7068 const HChar* nm = isTBX ? "tbx" : "tbl";
7069 DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
7070 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
7071 return True;
7072 }
sewardjbbcf1882014-01-12 12:49:10 +00007073 /* FIXME Temporary hacks to get through ld.so FIXME */
7074
7075 /* ------------------ movi vD.4s, #0x0 ------------------ */
7076 /* 0x4F 0x00 0x04 000 vD */
7077 if ((insn & 0xFFFFFFE0) == 0x4F000400) {
7078 UInt vD = INSN(4,0);
7079 putQReg128(vD, mkV128(0x0000));
7080 DIP("movi v%u.4s, #0x0\n", vD);
7081 return True;
7082 }
7083
sewardjbbcf1882014-01-12 12:49:10 +00007084 /* ---------------- MOV vD.16b, vN.16b ---------------- */
7085 /* 31 23 20 15 9 4
7086 010 01110 101 m 000111 n d ORR vD.16b, vN.16b, vM.16b
7087 This only handles the N == M case.
7088 */
7089 if (INSN(31,24) == BITS8(0,1,0,0,1,1,1,0)
7090 && INSN(23,21) == BITS3(1,0,1) && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
7091 UInt mm = INSN(20,16);
7092 UInt nn = INSN(9,5);
7093 UInt dd = INSN(4,0);
7094 if (mm == nn) {
7095 putQReg128(dd, getQReg128(nn));
7096 DIP("mov v%u.16b, v%u.16b\n", dd, nn);
7097 return True;
7098 }
7099 /* else it's really an ORR; fall through. */
7100 }
7101
7102 vex_printf("ARM64 front end: simd_and_fp\n");
7103 return False;
7104# undef INSN
7105}
7106
7107
7108/*------------------------------------------------------------*/
7109/*--- Disassemble a single ARM64 instruction ---*/
7110/*------------------------------------------------------------*/
7111
7112/* Disassemble a single ARM64 instruction into IR. The instruction
7113 has is located at |guest_instr| and has guest IP of
7114 |guest_PC_curr_instr|, which will have been set before the call
7115 here. Returns True iff the instruction was decoded, in which case
7116 *dres will be set accordingly, or False, in which case *dres should
7117 be ignored by the caller. */
7118
7119static
7120Bool disInstr_ARM64_WRK (
7121 /*MB_OUT*/DisResult* dres,
7122 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
7123 Bool resteerCisOk,
7124 void* callback_opaque,
7125 UChar* guest_instr,
7126 VexArchInfo* archinfo,
7127 VexAbiInfo* abiinfo
7128 )
7129{
7130 // A macro to fish bits out of 'insn'.
7131# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7132
7133//ZZ DisResult dres;
7134//ZZ UInt insn;
7135//ZZ //Bool allow_VFP = False;
7136//ZZ //UInt hwcaps = archinfo->hwcaps;
7137//ZZ IRTemp condT; /* :: Ity_I32 */
7138//ZZ UInt summary;
7139//ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
7140//ZZ
7141//ZZ /* What insn variants are we supporting today? */
7142//ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
7143//ZZ // etc etc
7144
7145 /* Set result defaults. */
7146 dres->whatNext = Dis_Continue;
7147 dres->len = 4;
7148 dres->continueAt = 0;
7149 dres->jk_StopHere = Ijk_INVALID;
7150
7151 /* At least this is simple on ARM64: insns are all 4 bytes long, and
7152 4-aligned. So just fish the whole thing out of memory right now
7153 and have done. */
7154 UInt insn = getUIntLittleEndianly( guest_instr );
7155
7156 if (0) vex_printf("insn: 0x%x\n", insn);
7157
7158 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
7159
7160 vassert(0 == (guest_PC_curr_instr & 3ULL));
7161
7162 /* ----------------------------------------------------------- */
7163
7164 /* Spot "Special" instructions (see comment at top of file). */
7165 {
7166 UChar* code = (UChar*)guest_instr;
7167 /* Spot the 16-byte preamble:
7168 93CC0D8C ror x12, x12, #3
7169 93CC358C ror x12, x12, #13
7170 93CCCD8C ror x12, x12, #51
7171 93CCF58C ror x12, x12, #61
7172 */
7173 UInt word1 = 0x93CC0D8C;
7174 UInt word2 = 0x93CC358C;
7175 UInt word3 = 0x93CCCD8C;
7176 UInt word4 = 0x93CCF58C;
7177 if (getUIntLittleEndianly(code+ 0) == word1 &&
7178 getUIntLittleEndianly(code+ 4) == word2 &&
7179 getUIntLittleEndianly(code+ 8) == word3 &&
7180 getUIntLittleEndianly(code+12) == word4) {
7181 /* Got a "Special" instruction preamble. Which one is it? */
7182 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
7183 /* orr x10,x10,x10 */) {
7184 /* X3 = client_request ( X4 ) */
7185 DIP("x3 = client_request ( x4 )\n");
7186 putPC(mkU64( guest_PC_curr_instr + 20 ));
7187 dres->jk_StopHere = Ijk_ClientReq;
7188 dres->whatNext = Dis_StopHere;
7189 return True;
7190 }
7191 else
7192 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
7193 /* orr x11,x11,x11 */) {
7194 /* X3 = guest_NRADDR */
7195 DIP("x3 = guest_NRADDR\n");
7196 dres->len = 20;
7197 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
7198 return True;
7199 }
7200 else
7201 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
7202 /* orr x12,x12,x12 */) {
7203 /* branch-and-link-to-noredir X8 */
7204 DIP("branch-and-link-to-noredir x8\n");
7205 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
7206 putPC(getIReg64orZR(8));
7207 dres->jk_StopHere = Ijk_NoRedir;
7208 dres->whatNext = Dis_StopHere;
7209 return True;
7210 }
7211 else
7212 if (getUIntLittleEndianly(code+16) == 0xAA090129
7213 /* orr x9,x9,x9 */) {
7214 /* IR injection */
7215 DIP("IR injection\n");
7216 vex_inject_ir(irsb, Iend_LE);
7217 // Invalidate the current insn. The reason is that the IRop we're
7218 // injecting here can change. In which case the translation has to
7219 // be redone. For ease of handling, we simply invalidate all the
7220 // time.
7221 stmt(IRStmt_Put(OFFB_TISTART, mkU64(guest_PC_curr_instr)));
7222 stmt(IRStmt_Put(OFFB_TILEN, mkU64(20)));
7223 putPC(mkU64( guest_PC_curr_instr + 20 ));
7224 dres->whatNext = Dis_StopHere;
7225 dres->jk_StopHere = Ijk_TInval;
7226 return True;
7227 }
7228 /* We don't know what it is. */
7229 return False;
7230 /*NOTREACHED*/
7231 }
7232 }
7233
7234 /* ----------------------------------------------------------- */
7235
7236 /* Main ARM64 instruction decoder starts here. */
7237
7238 Bool ok = False;
7239
7240 /* insn[28:25] determines the top-level grouping, so let's start
7241 off with that.
7242
7243 For all of these dis_ARM64_ functions, we pass *dres with the
7244 normal default results "insn OK, 4 bytes long, keep decoding" so
7245 they don't need to change it. However, decodes of control-flow
7246 insns may cause *dres to change.
7247 */
7248 switch (INSN(28,25)) {
7249 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
7250 // Data processing - immediate
7251 ok = dis_ARM64_data_processing_immediate(dres, insn);
7252 break;
7253 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
7254 // Branch, exception generation and system instructions
sewardj65902992014-05-03 21:20:56 +00007255 ok = dis_ARM64_branch_etc(dres, insn, archinfo);
sewardjbbcf1882014-01-12 12:49:10 +00007256 break;
7257 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
7258 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
7259 // Loads and stores
7260 ok = dis_ARM64_load_store(dres, insn);
7261 break;
7262 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
7263 // Data processing - register
7264 ok = dis_ARM64_data_processing_register(dres, insn);
7265 break;
7266 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
7267 // Data processing - SIMD and floating point
7268 ok = dis_ARM64_simd_and_fp(dres, insn);
7269 break;
7270 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
7271 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
7272 // UNALLOCATED
7273 break;
7274 default:
7275 vassert(0); /* Can't happen */
7276 }
7277
7278 /* If the next-level down decoders failed, make sure |dres| didn't
7279 get changed. */
7280 if (!ok) {
7281 vassert(dres->whatNext == Dis_Continue);
7282 vassert(dres->len == 4);
7283 vassert(dres->continueAt == 0);
7284 vassert(dres->jk_StopHere == Ijk_INVALID);
7285 }
7286
7287 return ok;
7288
7289# undef INSN
7290}
7291
7292
7293/*------------------------------------------------------------*/
7294/*--- Top-level fn ---*/
7295/*------------------------------------------------------------*/
7296
7297/* Disassemble a single instruction into IR. The instruction
7298 is located in host memory at &guest_code[delta]. */
7299
7300DisResult disInstr_ARM64 ( IRSB* irsb_IN,
7301 Bool (*resteerOkFn) ( void*, Addr64 ),
7302 Bool resteerCisOk,
7303 void* callback_opaque,
7304 UChar* guest_code_IN,
7305 Long delta_IN,
7306 Addr64 guest_IP,
7307 VexArch guest_arch,
7308 VexArchInfo* archinfo,
7309 VexAbiInfo* abiinfo,
7310 Bool host_bigendian_IN,
7311 Bool sigill_diag_IN )
7312{
7313 DisResult dres;
7314 vex_bzero(&dres, sizeof(dres));
7315
7316 /* Set globals (see top of this file) */
7317 vassert(guest_arch == VexArchARM64);
7318
7319 irsb = irsb_IN;
7320 host_is_bigendian = host_bigendian_IN;
7321 guest_PC_curr_instr = (Addr64)guest_IP;
7322
sewardj65902992014-05-03 21:20:56 +00007323 /* Sanity checks */
7324 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
7325 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
7326 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
7327
sewardjbbcf1882014-01-12 12:49:10 +00007328 /* Try to decode */
7329 Bool ok = disInstr_ARM64_WRK( &dres,
7330 resteerOkFn, resteerCisOk, callback_opaque,
7331 (UChar*)&guest_code_IN[delta_IN],
7332 archinfo, abiinfo );
7333 if (ok) {
7334 /* All decode successes end up here. */
sewardjdc9259c2014-02-27 11:10:19 +00007335 vassert(dres.len == 4 || dres.len == 20);
sewardjbbcf1882014-01-12 12:49:10 +00007336 switch (dres.whatNext) {
7337 case Dis_Continue:
7338 putPC( mkU64(dres.len + guest_PC_curr_instr) );
7339 break;
7340 case Dis_ResteerU:
7341 case Dis_ResteerC:
7342 putPC(mkU64(dres.continueAt));
7343 break;
7344 case Dis_StopHere:
7345 break;
7346 default:
7347 vassert(0);
7348 }
7349 DIP("\n");
7350 } else {
7351 /* All decode failures end up here. */
7352 if (sigill_diag_IN) {
7353 Int i, j;
7354 UChar buf[64];
7355 UInt insn
7356 = getUIntLittleEndianly( (UChar*)&guest_code_IN[delta_IN] );
7357 vex_bzero(buf, sizeof(buf));
7358 for (i = j = 0; i < 32; i++) {
7359 if (i > 0) {
7360 if ((i & 7) == 0) buf[j++] = ' ';
7361 else if ((i & 3) == 0) buf[j++] = '\'';
7362 }
7363 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
7364 }
7365 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
7366 vex_printf("disInstr(arm64): %s\n", buf);
7367 }
7368
7369 /* Tell the dispatcher that this insn cannot be decoded, and so
7370 has not been executed, and (is currently) the next to be
7371 executed. PC should be up-to-date since it is made so at the
7372 start of each insn, but nevertheless be paranoid and update
7373 it again right now. */
7374 putPC( mkU64(guest_PC_curr_instr) );
7375 dres.whatNext = Dis_StopHere;
7376 dres.len = 0;
7377 dres.continueAt = 0;
7378 dres.jk_StopHere = Ijk_NoDecode;
7379 }
7380 return dres;
7381}
7382
sewardjecde6972014-02-05 11:01:19 +00007383////////////////////////////////////////////////////////////////////////
7384////////////////////////////////////////////////////////////////////////
7385
7386/* Spare code for doing reference implementations of various 128-bit
7387 SIMD interleaves/deinterleaves/concatenation ops. For 64-bit
7388 equivalents see the end of guest_arm_toIR.c. */
7389
7390////////////////////////////////////////////////////////////////
7391// 64x2 operations
7392//
7393static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 )
7394{
7395 // returns a0 b0
7396 return binop(Iop_64HLtoV128, unop(Iop_V128to64, mkexpr(a10)),
7397 unop(Iop_V128to64, mkexpr(b10)));
7398}
7399
7400static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 )
7401{
7402 // returns a1 b1
7403 return binop(Iop_64HLtoV128, unop(Iop_V128HIto64, mkexpr(a10)),
7404 unop(Iop_V128HIto64, mkexpr(b10)));
7405}
7406
7407
7408////////////////////////////////////////////////////////////////
7409// 32x4 operations
7410//
7411
7412// Split a 128 bit value into 4 32 bit ones, in 64-bit IRTemps with
7413// the top halves guaranteed to be zero.
7414static void breakV128to32s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
7415 IRTemp* out0, IRTemp v128 )
7416{
7417 if (out3) *out3 = newTemp(Ity_I64);
7418 if (out2) *out2 = newTemp(Ity_I64);
7419 if (out1) *out1 = newTemp(Ity_I64);
7420 if (out0) *out0 = newTemp(Ity_I64);
7421 IRTemp hi64 = newTemp(Ity_I64);
7422 IRTemp lo64 = newTemp(Ity_I64);
7423 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
7424 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) );
7425 if (out3) assign(*out3, binop(Iop_Shr64, mkexpr(hi64), mkU8(32)));
7426 if (out2) assign(*out2, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFFFFFF)));
7427 if (out1) assign(*out1, binop(Iop_Shr64, mkexpr(lo64), mkU8(32)));
7428 if (out0) assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFFFFFF)));
7429}
7430
7431// Make a V128 bit value from 4 32 bit ones, each of which is in a 64 bit
7432// IRTemp.
7433static IRTemp mkV128from32s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
7434{
7435 IRTemp hi64 = newTemp(Ity_I64);
7436 IRTemp lo64 = newTemp(Ity_I64);
7437 assign(hi64,
7438 binop(Iop_Or64,
7439 binop(Iop_Shl64, mkexpr(in3), mkU8(32)),
7440 binop(Iop_And64, mkexpr(in2), mkU64(0xFFFFFFFF))));
7441 assign(lo64,
7442 binop(Iop_Or64,
7443 binop(Iop_Shl64, mkexpr(in1), mkU8(32)),
7444 binop(Iop_And64, mkexpr(in0), mkU64(0xFFFFFFFF))));
7445 IRTemp res = newTemp(Ity_V128);
7446 assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64)));
7447 return res;
7448}
7449
7450static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 )
7451{
7452 // returns a2 a0 b2 b0
7453 IRTemp a2, a0, b2, b0;
7454 breakV128to32s(NULL, &a2, NULL, &a0, a3210);
7455 breakV128to32s(NULL, &b2, NULL, &b0, b3210);
7456 return mkexpr(mkV128from32s(a2, a0, b2, b0));
7457}
7458
7459static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 )
7460{
7461 // returns a3 a1 b3 b1
7462 IRTemp a3, a1, b3, b1;
7463 breakV128to32s(&a3, NULL, &a1, NULL, a3210);
7464 breakV128to32s(&b3, NULL, &b1, NULL, b3210);
7465 return mkexpr(mkV128from32s(a3, a1, b3, b1));
7466}
7467
sewardje520bb32014-02-17 11:00:53 +00007468static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 )
7469{
7470 // returns a1 b1 a0 b0
7471 IRTemp a1, a0, b1, b0;
7472 breakV128to32s(NULL, NULL, &a1, &a0, a3210);
7473 breakV128to32s(NULL, NULL, &b1, &b0, b3210);
7474 return mkexpr(mkV128from32s(a1, b1, a0, b0));
7475}
7476
7477static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 )
7478{
7479 // returns a3 b3 a2 b2
7480 IRTemp a3, a2, b3, b2;
7481 breakV128to32s(&a3, &a2, NULL, NULL, a3210);
7482 breakV128to32s(&b3, &b2, NULL, NULL, b3210);
7483 return mkexpr(mkV128from32s(a3, b3, a2, b2));
7484}
sewardjecde6972014-02-05 11:01:19 +00007485
7486////////////////////////////////////////////////////////////////
7487// 16x8 operations
7488//
7489
7490static void breakV128to16s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
7491 IRTemp* out4, IRTemp* out3, IRTemp* out2,
7492 IRTemp* out1,IRTemp* out0, IRTemp v128 )
7493{
7494 if (out7) *out7 = newTemp(Ity_I64);
7495 if (out6) *out6 = newTemp(Ity_I64);
7496 if (out5) *out5 = newTemp(Ity_I64);
7497 if (out4) *out4 = newTemp(Ity_I64);
7498 if (out3) *out3 = newTemp(Ity_I64);
7499 if (out2) *out2 = newTemp(Ity_I64);
7500 if (out1) *out1 = newTemp(Ity_I64);
7501 if (out0) *out0 = newTemp(Ity_I64);
7502 IRTemp hi64 = newTemp(Ity_I64);
7503 IRTemp lo64 = newTemp(Ity_I64);
7504 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
7505 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) );
7506 if (out7)
7507 assign(*out7, binop(Iop_And64,
7508 binop(Iop_Shr64, mkexpr(hi64), mkU8(48)),
7509 mkU64(0xFFFF)));
7510 if (out6)
7511 assign(*out6, binop(Iop_And64,
7512 binop(Iop_Shr64, mkexpr(hi64), mkU8(32)),
7513 mkU64(0xFFFF)));
7514 if (out5)
7515 assign(*out5, binop(Iop_And64,
7516 binop(Iop_Shr64, mkexpr(hi64), mkU8(16)),
7517 mkU64(0xFFFF)));
7518 if (out4)
7519 assign(*out4, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFF)));
7520 if (out3)
7521 assign(*out3, binop(Iop_And64,
7522 binop(Iop_Shr64, mkexpr(lo64), mkU8(48)),
7523 mkU64(0xFFFF)));
7524 if (out2)
7525 assign(*out2, binop(Iop_And64,
7526 binop(Iop_Shr64, mkexpr(lo64), mkU8(32)),
7527 mkU64(0xFFFF)));
7528 if (out1)
7529 assign(*out1, binop(Iop_And64,
7530 binop(Iop_Shr64, mkexpr(lo64), mkU8(16)),
7531 mkU64(0xFFFF)));
7532 if (out0)
7533 assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFF)));
7534}
7535
7536static IRTemp mkV128from16s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
7537 IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
7538{
7539 IRTemp hi64 = newTemp(Ity_I64);
7540 IRTemp lo64 = newTemp(Ity_I64);
7541 assign(hi64,
7542 binop(Iop_Or64,
7543 binop(Iop_Or64,
7544 binop(Iop_Shl64,
7545 binop(Iop_And64, mkexpr(in7), mkU64(0xFFFF)),
7546 mkU8(48)),
7547 binop(Iop_Shl64,
7548 binop(Iop_And64, mkexpr(in6), mkU64(0xFFFF)),
7549 mkU8(32))),
7550 binop(Iop_Or64,
7551 binop(Iop_Shl64,
7552 binop(Iop_And64, mkexpr(in5), mkU64(0xFFFF)),
7553 mkU8(16)),
7554 binop(Iop_And64,
7555 mkexpr(in4), mkU64(0xFFFF)))));
7556 assign(lo64,
7557 binop(Iop_Or64,
7558 binop(Iop_Or64,
7559 binop(Iop_Shl64,
7560 binop(Iop_And64, mkexpr(in3), mkU64(0xFFFF)),
7561 mkU8(48)),
7562 binop(Iop_Shl64,
7563 binop(Iop_And64, mkexpr(in2), mkU64(0xFFFF)),
7564 mkU8(32))),
7565 binop(Iop_Or64,
7566 binop(Iop_Shl64,
7567 binop(Iop_And64, mkexpr(in1), mkU64(0xFFFF)),
7568 mkU8(16)),
7569 binop(Iop_And64,
7570 mkexpr(in0), mkU64(0xFFFF)))));
7571 IRTemp res = newTemp(Ity_V128);
7572 assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64)));
7573 return res;
7574}
7575
7576static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 )
7577{
7578 // returns a6 a4 a2 a0 b6 b4 b2 b0
7579 IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
7580 breakV128to16s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
7581 breakV128to16s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
7582 return mkexpr(mkV128from16s(a6, a4, a2, a0, b6, b4, b2, b0));
7583}
7584
7585static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 )
7586{
7587 // returns a7 a5 a3 a1 b7 b5 b3 b1
7588 IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
7589 breakV128to16s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
7590 breakV128to16s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
7591 return mkexpr(mkV128from16s(a7, a5, a3, a1, b7, b5, b3, b1));
7592}
7593
sewardje520bb32014-02-17 11:00:53 +00007594static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 )
7595{
7596 // returns a3 b3 a2 b2 a1 b1 a0 b0
7597 IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
7598 breakV128to16s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
7599 breakV128to16s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
7600 return mkexpr(mkV128from16s(a3, b3, a2, b2, a1, b1, a0, b0));
7601}
7602
7603static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 )
7604{
7605 // returns a7 b7 a6 b6 a5 b5 a4 b4
7606 IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
7607 breakV128to16s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
7608 breakV128to16s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
7609 return mkexpr(mkV128from16s(a7, b7, a6, b6, a5, b5, a4, b4));
7610}
7611
sewardjfab09142014-02-10 10:28:13 +00007612////////////////////////////////////////////////////////////////
7613// 8x16 operations
7614//
7615
7616static void breakV128to8s ( IRTemp* outF, IRTemp* outE, IRTemp* outD,
7617 IRTemp* outC, IRTemp* outB, IRTemp* outA,
7618 IRTemp* out9, IRTemp* out8,
7619 IRTemp* out7, IRTemp* out6, IRTemp* out5,
7620 IRTemp* out4, IRTemp* out3, IRTemp* out2,
7621 IRTemp* out1,IRTemp* out0, IRTemp v128 )
7622{
7623 if (outF) *outF = newTemp(Ity_I64);
7624 if (outE) *outE = newTemp(Ity_I64);
7625 if (outD) *outD = newTemp(Ity_I64);
7626 if (outC) *outC = newTemp(Ity_I64);
7627 if (outB) *outB = newTemp(Ity_I64);
7628 if (outA) *outA = newTemp(Ity_I64);
7629 if (out9) *out9 = newTemp(Ity_I64);
7630 if (out8) *out8 = newTemp(Ity_I64);
7631 if (out7) *out7 = newTemp(Ity_I64);
7632 if (out6) *out6 = newTemp(Ity_I64);
7633 if (out5) *out5 = newTemp(Ity_I64);
7634 if (out4) *out4 = newTemp(Ity_I64);
7635 if (out3) *out3 = newTemp(Ity_I64);
7636 if (out2) *out2 = newTemp(Ity_I64);
7637 if (out1) *out1 = newTemp(Ity_I64);
7638 if (out0) *out0 = newTemp(Ity_I64);
7639 IRTemp hi64 = newTemp(Ity_I64);
7640 IRTemp lo64 = newTemp(Ity_I64);
7641 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
7642 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) );
7643 if (outF)
7644 assign(*outF, binop(Iop_And64,
7645 binop(Iop_Shr64, mkexpr(hi64), mkU8(56)),
7646 mkU64(0xFF)));
7647 if (outE)
7648 assign(*outE, binop(Iop_And64,
7649 binop(Iop_Shr64, mkexpr(hi64), mkU8(48)),
7650 mkU64(0xFF)));
7651 if (outD)
7652 assign(*outD, binop(Iop_And64,
7653 binop(Iop_Shr64, mkexpr(hi64), mkU8(40)),
7654 mkU64(0xFF)));
7655 if (outC)
7656 assign(*outC, binop(Iop_And64,
7657 binop(Iop_Shr64, mkexpr(hi64), mkU8(32)),
7658 mkU64(0xFF)));
7659 if (outB)
7660 assign(*outB, binop(Iop_And64,
7661 binop(Iop_Shr64, mkexpr(hi64), mkU8(24)),
7662 mkU64(0xFF)));
7663 if (outA)
7664 assign(*outA, binop(Iop_And64,
7665 binop(Iop_Shr64, mkexpr(hi64), mkU8(16)),
7666 mkU64(0xFF)));
7667 if (out9)
7668 assign(*out9, binop(Iop_And64,
7669 binop(Iop_Shr64, mkexpr(hi64), mkU8(8)),
7670 mkU64(0xFF)));
7671 if (out8)
7672 assign(*out8, binop(Iop_And64,
7673 binop(Iop_Shr64, mkexpr(hi64), mkU8(0)),
7674 mkU64(0xFF)));
7675 if (out7)
7676 assign(*out7, binop(Iop_And64,
7677 binop(Iop_Shr64, mkexpr(lo64), mkU8(56)),
7678 mkU64(0xFF)));
7679 if (out6)
7680 assign(*out6, binop(Iop_And64,
7681 binop(Iop_Shr64, mkexpr(lo64), mkU8(48)),
7682 mkU64(0xFF)));
7683 if (out5)
7684 assign(*out5, binop(Iop_And64,
7685 binop(Iop_Shr64, mkexpr(lo64), mkU8(40)),
7686 mkU64(0xFF)));
7687 if (out4)
7688 assign(*out4, binop(Iop_And64,
7689 binop(Iop_Shr64, mkexpr(lo64), mkU8(32)),
7690 mkU64(0xFF)));
7691 if (out3)
7692 assign(*out3, binop(Iop_And64,
7693 binop(Iop_Shr64, mkexpr(lo64), mkU8(24)),
7694 mkU64(0xFF)));
7695 if (out2)
7696 assign(*out2, binop(Iop_And64,
7697 binop(Iop_Shr64, mkexpr(lo64), mkU8(16)),
7698 mkU64(0xFF)));
7699 if (out1)
7700 assign(*out1, binop(Iop_And64,
7701 binop(Iop_Shr64, mkexpr(lo64), mkU8(8)),
7702 mkU64(0xFF)));
7703 if (out0)
7704 assign(*out0, binop(Iop_And64,
7705 binop(Iop_Shr64, mkexpr(lo64), mkU8(0)),
7706 mkU64(0xFF)));
7707}
7708
7709static IRTemp mkV128from8s ( IRTemp inF, IRTemp inE, IRTemp inD, IRTemp inC,
7710 IRTemp inB, IRTemp inA, IRTemp in9, IRTemp in8,
7711 IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
7712 IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
7713{
7714 IRTemp vFE = newTemp(Ity_I64);
7715 IRTemp vDC = newTemp(Ity_I64);
7716 IRTemp vBA = newTemp(Ity_I64);
7717 IRTemp v98 = newTemp(Ity_I64);
7718 IRTemp v76 = newTemp(Ity_I64);
7719 IRTemp v54 = newTemp(Ity_I64);
7720 IRTemp v32 = newTemp(Ity_I64);
7721 IRTemp v10 = newTemp(Ity_I64);
7722 assign(vFE, binop(Iop_Or64,
7723 binop(Iop_Shl64,
7724 binop(Iop_And64, mkexpr(inF), mkU64(0xFF)), mkU8(8)),
7725 binop(Iop_And64, mkexpr(inE), mkU64(0xFF))));
7726 assign(vDC, binop(Iop_Or64,
7727 binop(Iop_Shl64,
7728 binop(Iop_And64, mkexpr(inD), mkU64(0xFF)), mkU8(8)),
7729 binop(Iop_And64, mkexpr(inC), mkU64(0xFF))));
7730 assign(vBA, binop(Iop_Or64,
7731 binop(Iop_Shl64,
7732 binop(Iop_And64, mkexpr(inB), mkU64(0xFF)), mkU8(8)),
7733 binop(Iop_And64, mkexpr(inA), mkU64(0xFF))));
7734 assign(v98, binop(Iop_Or64,
7735 binop(Iop_Shl64,
7736 binop(Iop_And64, mkexpr(in9), mkU64(0xFF)), mkU8(8)),
7737 binop(Iop_And64, mkexpr(in8), mkU64(0xFF))));
7738 assign(v76, binop(Iop_Or64,
7739 binop(Iop_Shl64,
7740 binop(Iop_And64, mkexpr(in7), mkU64(0xFF)), mkU8(8)),
7741 binop(Iop_And64, mkexpr(in6), mkU64(0xFF))));
7742 assign(v54, binop(Iop_Or64,
7743 binop(Iop_Shl64,
7744 binop(Iop_And64, mkexpr(in5), mkU64(0xFF)), mkU8(8)),
7745 binop(Iop_And64, mkexpr(in4), mkU64(0xFF))));
7746 assign(v32, binop(Iop_Or64,
7747 binop(Iop_Shl64,
7748 binop(Iop_And64, mkexpr(in3), mkU64(0xFF)), mkU8(8)),
7749 binop(Iop_And64, mkexpr(in2), mkU64(0xFF))));
7750 assign(v10, binop(Iop_Or64,
7751 binop(Iop_Shl64,
7752 binop(Iop_And64, mkexpr(in1), mkU64(0xFF)), mkU8(8)),
7753 binop(Iop_And64, mkexpr(in0), mkU64(0xFF))));
7754 return mkV128from16s(vFE, vDC, vBA, v98, v76, v54, v32, v10);
7755}
7756
7757static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
7758 IRTemp bFEDCBA9876543210 )
7759{
7760 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
7761 IRTemp aE, aC, aA, a8, a6, a4, a2, a0, bE, bC, bA, b8, b6, b4, b2, b0;
7762 breakV128to8s(NULL, &aE, NULL, &aC, NULL, &aA, NULL, &a8,
7763 NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0,
7764 aFEDCBA9876543210);
7765 breakV128to8s(NULL, &bE, NULL, &bC, NULL, &bA, NULL, &b8,
7766 NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0,
7767 bFEDCBA9876543210);
7768 return mkexpr(mkV128from8s(aE, aC, aA, a8, a6, a4, a2, a0,
7769 bE, bC, bA, b8, b6, b4, b2, b0));
7770}
7771
7772static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
7773 IRTemp bFEDCBA9876543210 )
7774{
7775 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
7776 IRTemp aF, aD, aB, a9, a7, a5, a3, a1, bF, bD, bB, b9, b7, b5, b3, b1;
7777 breakV128to8s(&aF, NULL, &aD, NULL, &aB, NULL, &a9, NULL,
7778 &a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL,
7779 aFEDCBA9876543210);
7780
7781 breakV128to8s(&bF, NULL, &bD, NULL, &bB, NULL, &b9, NULL,
7782 &b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL,
7783 aFEDCBA9876543210);
7784
7785 return mkexpr(mkV128from8s(aF, aD, aB, a9, a7, a5, a3, a1,
7786 bF, bD, bB, b9, b7, b5, b3, b1));
7787}
7788
sewardje520bb32014-02-17 11:00:53 +00007789static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
7790 IRTemp bFEDCBA9876543210 )
7791{
7792 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
7793 IRTemp a7, b7, a6, b6, a5, b5, a4, b4, a3, b3, a2, b2, a1, b1, a0, b0;
7794 breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
7795 &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0,
7796 aFEDCBA9876543210);
7797 breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
7798 &b7, &b6, &b5, &b4, &b3, &b2, &b1, &b0,
7799 bFEDCBA9876543210);
7800 return mkexpr(mkV128from8s(a7, b7, a6, b6, a5, b5, a4, b4,
7801 a3, b3, a2, b2, a1, b1, a0, b0));
7802}
7803
7804static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
7805 IRTemp bFEDCBA9876543210 )
7806{
7807 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
7808 IRTemp aF, bF, aE, bE, aD, bD, aC, bC, aB, bB, aA, bA, a9, b9, a8, b8;
7809 breakV128to8s(&aF, &aE, &aD, &aC, &aB, &aA, &a9, &a8,
7810 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
7811 aFEDCBA9876543210);
7812 breakV128to8s(&bF, &bE, &bD, &bC, &bB, &bA, &b9, &b8,
7813 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
7814 bFEDCBA9876543210);
7815 return mkexpr(mkV128from8s(aF, bF, aE, bE, aD, bD, aC, bC,
7816 aB, bB, aA, bA, a9, b9, a8, b8));
7817}
sewardjecde6972014-02-05 11:01:19 +00007818
sewardjbbcf1882014-01-12 12:49:10 +00007819/*--------------------------------------------------------------------*/
7820/*--- end guest_arm64_toIR.c ---*/
7821/*--------------------------------------------------------------------*/