blob: 85b2f2dafa1aba5906e1b176289f29b4cad0a93b [file] [log] [blame]
sewardjbbcf1882014-01-12 12:49:10 +00001/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- begin guest_arm64_toIR.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2013-2013 OpenWorks
12 info@open-works.net
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
32//ZZ /* XXXX thumb to check:
33//ZZ that all cases where putIRegT writes r15, we generate a jump.
34//ZZ
35//ZZ All uses of newTemp assign to an IRTemp and not a UInt
36//ZZ
37//ZZ For all thumb loads and stores, including VFP ones, new-ITSTATE is
38//ZZ backed out before the memory op, and restored afterwards. This
39//ZZ needs to happen even after we go uncond. (and for sure it doesn't
40//ZZ happen for VFP loads/stores right now).
41//ZZ
42//ZZ VFP on thumb: check that we exclude all r13/r15 cases that we
43//ZZ should.
44//ZZ
45//ZZ XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
46//ZZ taking into account the number of insns guarded by an IT.
47//ZZ
48//ZZ remove the nasty hack, in the spechelper, of looking for Or32(...,
49//ZZ 0xE0) in as the first arg to armg_calculate_condition, and instead
50//ZZ use Slice44 as specified in comments in the spechelper.
51//ZZ
52//ZZ add specialisations for armg_calculate_flag_c and _v, as they
53//ZZ are moderately often needed in Thumb code.
54//ZZ
55//ZZ Correctness: ITSTATE handling in Thumb SVCs is wrong.
56//ZZ
57//ZZ Correctness (obscure): in m_transtab, when invalidating code
58//ZZ address ranges, invalidate up to 18 bytes after the end of the
59//ZZ range. This is because the ITSTATE optimisation at the top of
60//ZZ _THUMB_WRK below analyses up to 18 bytes before the start of any
61//ZZ given instruction, and so might depend on the invalidated area.
62//ZZ */
63//ZZ
64//ZZ /* Limitations, etc
65//ZZ
66//ZZ - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
67//ZZ These instructions are non-restartable in the case where the
68//ZZ transfer(s) fault.
69//ZZ
70//ZZ - SWP: the restart jump back is Ijk_Boring; it should be
71//ZZ Ijk_NoRedir but that's expensive. See comments on casLE() in
72//ZZ guest_x86_toIR.c.
73//ZZ */
74
75/* "Special" instructions.
76
77 This instruction decoder can decode four special instructions
78 which mean nothing natively (are no-ops as far as regs/mem are
79 concerned) but have meaning for supporting Valgrind. A special
80 instruction is flagged by a 16-byte preamble:
81
82 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
83 (ror x12, x12, #3; ror x12, x12, #13
84 ror x12, x12, #51; ror x12, x12, #61)
85
86 Following that, one of the following 3 are allowed
87 (standard interpretation in parentheses):
88
89 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
90 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
91 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
92 AA090129 (orr x9,x9,x9) IR injection
93
94 Any other bytes following the 16-byte preamble are illegal and
95 constitute a failure in instruction decoding. This all assumes
96 that the preamble will never occur except in specific code
97 fragments designed for Valgrind to catch.
98*/
99
100/* Translates ARM64 code to IR. */
101
102#include "libvex_basictypes.h"
103#include "libvex_ir.h"
104#include "libvex.h"
105#include "libvex_guest_arm64.h"
106
107#include "main_util.h"
108#include "main_globals.h"
109#include "guest_generic_bb_to_IR.h"
110#include "guest_arm64_defs.h"
111
112
113/*------------------------------------------------------------*/
114/*--- Globals ---*/
115/*------------------------------------------------------------*/
116
117/* These are set at the start of the translation of a instruction, so
118 that we don't have to pass them around endlessly. CONST means does
119 not change during translation of the instruction.
120*/
121
122/* CONST: is the host bigendian? We need to know this in order to do
123 sub-register accesses to the SIMD/FP registers correctly. */
124static Bool host_is_bigendian;
125
126/* CONST: The guest address for the instruction currently being
127 translated. */
128static Addr64 guest_PC_curr_instr;
129
130/* MOD: The IRSB* into which we're generating code. */
131static IRSB* irsb;
132
133
134/*------------------------------------------------------------*/
135/*--- Debugging output ---*/
136/*------------------------------------------------------------*/
137
138#define DIP(format, args...) \
139 if (vex_traceflags & VEX_TRACE_FE) \
140 vex_printf(format, ## args)
141
142#define DIS(buf, format, args...) \
143 if (vex_traceflags & VEX_TRACE_FE) \
144 vex_sprintf(buf, format, ## args)
145
146
147/*------------------------------------------------------------*/
148/*--- Helper bits and pieces for deconstructing the ---*/
149/*--- arm insn stream. ---*/
150/*------------------------------------------------------------*/
151
152/* Do a little-endian load of a 32-bit word, regardless of the
153 endianness of the underlying host. */
154static inline UInt getUIntLittleEndianly ( UChar* p )
155{
156 UInt w = 0;
157 w = (w << 8) | p[3];
158 w = (w << 8) | p[2];
159 w = (w << 8) | p[1];
160 w = (w << 8) | p[0];
161 return w;
162}
163
164/* Sign extend a N-bit value up to 64 bits, by copying
165 bit N-1 into all higher positions. */
166static ULong sx_to_64 ( ULong x, UInt n )
167{
168 vassert(n > 1 && n < 64);
169 Long r = (Long)x;
170 r = (r << (64-n)) >> (64-n);
171 return (ULong)r;
172}
173
174//ZZ /* Do a little-endian load of a 16-bit word, regardless of the
175//ZZ endianness of the underlying host. */
176//ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
177//ZZ {
178//ZZ UShort w = 0;
179//ZZ w = (w << 8) | p[1];
180//ZZ w = (w << 8) | p[0];
181//ZZ return w;
182//ZZ }
183//ZZ
184//ZZ static UInt ROR32 ( UInt x, UInt sh ) {
185//ZZ vassert(sh >= 0 && sh < 32);
186//ZZ if (sh == 0)
187//ZZ return x;
188//ZZ else
189//ZZ return (x << (32-sh)) | (x >> sh);
190//ZZ }
191//ZZ
192//ZZ static Int popcount32 ( UInt x )
193//ZZ {
194//ZZ Int res = 0, i;
195//ZZ for (i = 0; i < 32; i++) {
196//ZZ res += (x & 1);
197//ZZ x >>= 1;
198//ZZ }
199//ZZ return res;
200//ZZ }
201//ZZ
202//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
203//ZZ {
204//ZZ UInt mask = 1 << ix;
205//ZZ x &= ~mask;
206//ZZ x |= ((b << ix) & mask);
207//ZZ return x;
208//ZZ }
209
210#define BITS2(_b1,_b0) \
211 (((_b1) << 1) | (_b0))
212
213#define BITS3(_b2,_b1,_b0) \
214 (((_b2) << 2) | ((_b1) << 1) | (_b0))
215
216#define BITS4(_b3,_b2,_b1,_b0) \
217 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
218
219#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
220 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
221 | BITS4((_b3),(_b2),(_b1),(_b0)))
222
223#define BITS5(_b4,_b3,_b2,_b1,_b0) \
224 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
225#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
226 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
227#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
228 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
229
230#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
231 (((_b8) << 8) \
232 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
233
234#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
235 (((_b9) << 9) | ((_b8) << 8) \
236 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
237
238#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
239 (((_b10) << 10) \
240 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
241
sewardjdc9259c2014-02-27 11:10:19 +0000242#define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
243 (((_b11) << 11) \
244 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
245
sewardjbbcf1882014-01-12 12:49:10 +0000246// produces _uint[_bMax:_bMin]
247#define SLICE_UInt(_uint,_bMax,_bMin) \
248 (( ((UInt)(_uint)) >> (_bMin)) \
249 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
250
251
252/*------------------------------------------------------------*/
253/*--- Helper bits and pieces for creating IR fragments. ---*/
254/*------------------------------------------------------------*/
255
256static IRExpr* mkV128 ( UShort w )
257{
258 return IRExpr_Const(IRConst_V128(w));
259}
260
261static IRExpr* mkU64 ( ULong i )
262{
263 return IRExpr_Const(IRConst_U64(i));
264}
265
266static IRExpr* mkU32 ( UInt i )
267{
268 return IRExpr_Const(IRConst_U32(i));
269}
270
271static IRExpr* mkU8 ( UInt i )
272{
273 vassert(i < 256);
274 return IRExpr_Const(IRConst_U8( (UChar)i ));
275}
276
277static IRExpr* mkexpr ( IRTemp tmp )
278{
279 return IRExpr_RdTmp(tmp);
280}
281
282static IRExpr* unop ( IROp op, IRExpr* a )
283{
284 return IRExpr_Unop(op, a);
285}
286
287static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
288{
289 return IRExpr_Binop(op, a1, a2);
290}
291
292static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
293{
294 return IRExpr_Triop(op, a1, a2, a3);
295}
296
297static IRExpr* loadLE ( IRType ty, IRExpr* addr )
298{
299 return IRExpr_Load(Iend_LE, ty, addr);
300}
301
302/* Add a statement to the list held by "irbb". */
303static void stmt ( IRStmt* st )
304{
305 addStmtToIRSB( irsb, st );
306}
307
308static void assign ( IRTemp dst, IRExpr* e )
309{
310 stmt( IRStmt_WrTmp(dst, e) );
311}
312
313static void storeLE ( IRExpr* addr, IRExpr* data )
314{
315 stmt( IRStmt_Store(Iend_LE, addr, data) );
316}
317
318//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
319//ZZ {
320//ZZ if (guardT == IRTemp_INVALID) {
321//ZZ /* unconditional */
322//ZZ storeLE(addr, data);
323//ZZ } else {
324//ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
325//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
326//ZZ }
327//ZZ }
328//ZZ
329//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
330//ZZ IRExpr* addr, IRExpr* alt,
331//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
332//ZZ {
333//ZZ if (guardT == IRTemp_INVALID) {
334//ZZ /* unconditional */
335//ZZ IRExpr* loaded = NULL;
336//ZZ switch (cvt) {
337//ZZ case ILGop_Ident32:
338//ZZ loaded = loadLE(Ity_I32, addr); break;
339//ZZ case ILGop_8Uto32:
340//ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
341//ZZ case ILGop_8Sto32:
342//ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
343//ZZ case ILGop_16Uto32:
344//ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
345//ZZ case ILGop_16Sto32:
346//ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
347//ZZ default:
348//ZZ vassert(0);
349//ZZ }
350//ZZ vassert(loaded != NULL);
351//ZZ assign(dst, loaded);
352//ZZ } else {
353//ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
354//ZZ loaded data before putting the data in 'dst'. If the load
355//ZZ does not take place, 'alt' is placed directly in 'dst'. */
356//ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
357//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
358//ZZ }
359//ZZ }
360
361/* Generate a new temporary of the given type. */
362static IRTemp newTemp ( IRType ty )
363{
364 vassert(isPlausibleIRType(ty));
365 return newIRTemp( irsb->tyenv, ty );
366}
367
368//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
369//ZZ IRRoundingMode. */
370//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
371//ZZ {
372//ZZ return mkU32(Irrm_NEAREST);
373//ZZ }
374//ZZ
375//ZZ /* Generate an expression for SRC rotated right by ROT. */
376//ZZ static IRExpr* genROR32( IRTemp src, Int rot )
377//ZZ {
378//ZZ vassert(rot >= 0 && rot < 32);
379//ZZ if (rot == 0)
380//ZZ return mkexpr(src);
381//ZZ return
382//ZZ binop(Iop_Or32,
383//ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
384//ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
385//ZZ }
386//ZZ
387//ZZ static IRExpr* mkU128 ( ULong i )
388//ZZ {
389//ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
390//ZZ }
391//ZZ
392//ZZ /* Generate a 4-aligned version of the given expression if
393//ZZ the given condition is true. Else return it unchanged. */
394//ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
395//ZZ {
396//ZZ if (b)
397//ZZ return binop(Iop_And32, e, mkU32(~3));
398//ZZ else
399//ZZ return e;
400//ZZ }
401
402/* Other IR construction helpers. */
403static IROp mkAND ( IRType ty ) {
404 switch (ty) {
405 case Ity_I32: return Iop_And32;
406 case Ity_I64: return Iop_And64;
407 default: vpanic("mkAND");
408 }
409}
410
411static IROp mkOR ( IRType ty ) {
412 switch (ty) {
413 case Ity_I32: return Iop_Or32;
414 case Ity_I64: return Iop_Or64;
415 default: vpanic("mkOR");
416 }
417}
418
419static IROp mkXOR ( IRType ty ) {
420 switch (ty) {
421 case Ity_I32: return Iop_Xor32;
422 case Ity_I64: return Iop_Xor64;
423 default: vpanic("mkXOR");
424 }
425}
426
427static IROp mkSHL ( IRType ty ) {
428 switch (ty) {
429 case Ity_I32: return Iop_Shl32;
430 case Ity_I64: return Iop_Shl64;
431 default: vpanic("mkSHL");
432 }
433}
434
435static IROp mkSHR ( IRType ty ) {
436 switch (ty) {
437 case Ity_I32: return Iop_Shr32;
438 case Ity_I64: return Iop_Shr64;
439 default: vpanic("mkSHR");
440 }
441}
442
443static IROp mkSAR ( IRType ty ) {
444 switch (ty) {
445 case Ity_I32: return Iop_Sar32;
446 case Ity_I64: return Iop_Sar64;
447 default: vpanic("mkSAR");
448 }
449}
450
451static IROp mkNOT ( IRType ty ) {
452 switch (ty) {
453 case Ity_I32: return Iop_Not32;
454 case Ity_I64: return Iop_Not64;
455 default: vpanic("mkNOT");
456 }
457}
458
459static IROp mkADD ( IRType ty ) {
460 switch (ty) {
461 case Ity_I32: return Iop_Add32;
462 case Ity_I64: return Iop_Add64;
463 default: vpanic("mkADD");
464 }
465}
466
467static IROp mkSUB ( IRType ty ) {
468 switch (ty) {
469 case Ity_I32: return Iop_Sub32;
470 case Ity_I64: return Iop_Sub64;
471 default: vpanic("mkSUB");
472 }
473}
474
475static IROp mkADDF ( IRType ty ) {
476 switch (ty) {
477 case Ity_F32: return Iop_AddF32;
478 case Ity_F64: return Iop_AddF64;
479 default: vpanic("mkADDF");
480 }
481}
482
483static IROp mkSUBF ( IRType ty ) {
484 switch (ty) {
485 case Ity_F32: return Iop_SubF32;
486 case Ity_F64: return Iop_SubF64;
487 default: vpanic("mkSUBF");
488 }
489}
490
491static IROp mkMULF ( IRType ty ) {
492 switch (ty) {
493 case Ity_F32: return Iop_MulF32;
494 case Ity_F64: return Iop_MulF64;
495 default: vpanic("mkMULF");
496 }
497}
498
499static IROp mkDIVF ( IRType ty ) {
500 switch (ty) {
501 case Ity_F32: return Iop_DivF32;
502 case Ity_F64: return Iop_DivF64;
503 default: vpanic("mkMULF");
504 }
505}
506
507static IROp mkNEGF ( IRType ty ) {
508 switch (ty) {
509 case Ity_F32: return Iop_NegF32;
510 case Ity_F64: return Iop_NegF64;
511 default: vpanic("mkNEGF");
512 }
513}
514
515static IROp mkABSF ( IRType ty ) {
516 switch (ty) {
517 case Ity_F32: return Iop_AbsF32;
518 case Ity_F64: return Iop_AbsF64;
519 default: vpanic("mkNEGF");
520 }
521}
522
523static IROp mkSQRTF ( IRType ty ) {
524 switch (ty) {
525 case Ity_F32: return Iop_SqrtF32;
526 case Ity_F64: return Iop_SqrtF64;
527 default: vpanic("mkNEGF");
528 }
529}
530
531static IRExpr* mkU ( IRType ty, ULong imm ) {
532 switch (ty) {
533 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
534 case Ity_I64: return mkU64(imm);
535 default: vpanic("mkU");
536 }
537}
538
539/* Generate IR to create 'arg rotated right by imm', for sane values
540 of 'ty' and 'imm'. */
541static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
542{
543 UInt w = 0;
544 if (ty == Ity_I64) {
545 w = 64;
546 } else {
547 vassert(ty == Ity_I32);
548 w = 32;
549 }
550 vassert(w != 0);
551 vassert(imm < w);
552 if (imm == 0) {
553 return arg;
554 }
555 IRTemp res = newTemp(ty);
556 assign(res, binop(mkOR(ty),
557 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
558 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
559 return res;
560}
561
562/* Generate IR to set the returned temp to either all-zeroes or
563 all ones, as a copy of arg<imm>. */
564static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
565{
566 UInt w = 0;
567 if (ty == Ity_I64) {
568 w = 64;
569 } else {
570 vassert(ty == Ity_I32);
571 w = 32;
572 }
573 vassert(w != 0);
574 vassert(imm < w);
575 IRTemp res = newTemp(ty);
576 assign(res, binop(mkSAR(ty),
577 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
578 mkU8(w - 1)));
579 return res;
580}
581
sewardj7d009132014-02-20 17:43:38 +0000582/* U-widen 8/16/32/64 bit int expr to 64. */
583static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
584{
585 switch (srcTy) {
586 case Ity_I64: return e;
587 case Ity_I32: return unop(Iop_32Uto64, e);
588 case Ity_I16: return unop(Iop_16Uto64, e);
589 case Ity_I8: return unop(Iop_8Uto64, e);
590 default: vpanic("widenUto64(arm64)");
591 }
592}
593
594/* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
595 of these combinations make sense. */
596static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
597{
598 switch (dstTy) {
599 case Ity_I64: return e;
600 case Ity_I32: return unop(Iop_64to32, e);
601 case Ity_I16: return unop(Iop_64to16, e);
602 case Ity_I8: return unop(Iop_64to8, e);
603 default: vpanic("narrowFrom64(arm64)");
604 }
605}
606
sewardjbbcf1882014-01-12 12:49:10 +0000607
608/*------------------------------------------------------------*/
609/*--- Helpers for accessing guest registers. ---*/
610/*------------------------------------------------------------*/
611
612#define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
613#define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
614#define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
615#define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
616#define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
617#define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
618#define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
619#define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
620#define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
621#define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
622#define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
623#define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
624#define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
625#define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
626#define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
627#define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
628#define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
629#define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
630#define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
631#define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
632#define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
633#define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
634#define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
635#define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
636#define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
637#define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
638#define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
639#define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
640#define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
641#define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
642#define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
643
sewardj60687882014-01-15 10:25:21 +0000644#define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
sewardjbbcf1882014-01-12 12:49:10 +0000645#define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
646
647#define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
648#define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
649#define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
650#define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
651
652#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
653#define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
654
655#define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
656#define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
657#define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
658#define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
659#define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
660#define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
661#define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
662#define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
663#define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
664#define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
665#define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
666#define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
667#define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
668#define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
669#define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
670#define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
671#define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
672#define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
673#define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
674#define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
675#define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
676#define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
677#define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
678#define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
679#define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
680#define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
681#define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
682#define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
683#define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
684#define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
685#define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
686#define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
687
688#define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
689#define OFFB_FPSR offsetof(VexGuestARM64State,guest_FPSR)
690//ZZ #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
691//ZZ #define OFFB_ITSTATE offsetof(VexGuestARMState,guest_ITSTATE)
692//ZZ #define OFFB_QFLAG32 offsetof(VexGuestARMState,guest_QFLAG32)
693//ZZ #define OFFB_GEFLAG0 offsetof(VexGuestARMState,guest_GEFLAG0)
694//ZZ #define OFFB_GEFLAG1 offsetof(VexGuestARMState,guest_GEFLAG1)
695//ZZ #define OFFB_GEFLAG2 offsetof(VexGuestARMState,guest_GEFLAG2)
696//ZZ #define OFFB_GEFLAG3 offsetof(VexGuestARMState,guest_GEFLAG3)
697
sewardj05f5e012014-05-04 10:52:11 +0000698#define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
699#define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
sewardjbbcf1882014-01-12 12:49:10 +0000700
701
702/* ---------------- Integer registers ---------------- */
703
704static Int offsetIReg64 ( UInt iregNo )
705{
706 /* Do we care about endianness here? We do if sub-parts of integer
707 registers are accessed. */
708 switch (iregNo) {
709 case 0: return OFFB_X0;
710 case 1: return OFFB_X1;
711 case 2: return OFFB_X2;
712 case 3: return OFFB_X3;
713 case 4: return OFFB_X4;
714 case 5: return OFFB_X5;
715 case 6: return OFFB_X6;
716 case 7: return OFFB_X7;
717 case 8: return OFFB_X8;
718 case 9: return OFFB_X9;
719 case 10: return OFFB_X10;
720 case 11: return OFFB_X11;
721 case 12: return OFFB_X12;
722 case 13: return OFFB_X13;
723 case 14: return OFFB_X14;
724 case 15: return OFFB_X15;
725 case 16: return OFFB_X16;
726 case 17: return OFFB_X17;
727 case 18: return OFFB_X18;
728 case 19: return OFFB_X19;
729 case 20: return OFFB_X20;
730 case 21: return OFFB_X21;
731 case 22: return OFFB_X22;
732 case 23: return OFFB_X23;
733 case 24: return OFFB_X24;
734 case 25: return OFFB_X25;
735 case 26: return OFFB_X26;
736 case 27: return OFFB_X27;
737 case 28: return OFFB_X28;
738 case 29: return OFFB_X29;
739 case 30: return OFFB_X30;
740 /* but not 31 */
741 default: vassert(0);
742 }
743}
744
745static Int offsetIReg64orSP ( UInt iregNo )
746{
sewardj60687882014-01-15 10:25:21 +0000747 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
sewardjbbcf1882014-01-12 12:49:10 +0000748}
749
750static const HChar* nameIReg64orZR ( UInt iregNo )
751{
752 vassert(iregNo < 32);
753 static const HChar* names[32]
754 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
755 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
756 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
757 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
758 return names[iregNo];
759}
760
761static const HChar* nameIReg64orSP ( UInt iregNo )
762{
763 if (iregNo == 31) {
764 return "sp";
765 }
766 vassert(iregNo < 31);
767 return nameIReg64orZR(iregNo);
768}
769
770static IRExpr* getIReg64orSP ( UInt iregNo )
771{
772 vassert(iregNo < 32);
773 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
774}
775
776static IRExpr* getIReg64orZR ( UInt iregNo )
777{
778 if (iregNo == 31) {
779 return mkU64(0);
780 }
781 vassert(iregNo < 31);
782 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
783}
784
785static void putIReg64orSP ( UInt iregNo, IRExpr* e )
786{
787 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
788 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
789}
790
791static void putIReg64orZR ( UInt iregNo, IRExpr* e )
792{
793 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
794 if (iregNo == 31) {
795 return;
796 }
797 vassert(iregNo < 31);
798 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
799}
800
801static const HChar* nameIReg32orZR ( UInt iregNo )
802{
803 vassert(iregNo < 32);
804 static const HChar* names[32]
805 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
806 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
807 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
808 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
809 return names[iregNo];
810}
811
812static const HChar* nameIReg32orSP ( UInt iregNo )
813{
814 if (iregNo == 31) {
815 return "wsp";
816 }
817 vassert(iregNo < 31);
818 return nameIReg32orZR(iregNo);
819}
820
821static IRExpr* getIReg32orSP ( UInt iregNo )
822{
823 vassert(iregNo < 32);
824 return unop(Iop_64to32,
825 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
826}
827
828static IRExpr* getIReg32orZR ( UInt iregNo )
829{
830 if (iregNo == 31) {
831 return mkU32(0);
832 }
833 vassert(iregNo < 31);
834 return unop(Iop_64to32,
835 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
836}
837
838static void putIReg32orSP ( UInt iregNo, IRExpr* e )
839{
840 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
841 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
842}
843
844static void putIReg32orZR ( UInt iregNo, IRExpr* e )
845{
846 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
847 if (iregNo == 31) {
848 return;
849 }
850 vassert(iregNo < 31);
851 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
852}
853
854static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
855{
856 vassert(is64 == True || is64 == False);
857 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
858}
859
860static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
861{
862 vassert(is64 == True || is64 == False);
863 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
864}
865
866static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
867{
868 vassert(is64 == True || is64 == False);
869 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
870}
871
872static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
873{
874 vassert(is64 == True || is64 == False);
875 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
876}
877
878static void putPC ( IRExpr* e )
879{
880 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
881 stmt( IRStmt_Put(OFFB_PC, e) );
882}
883
884
885/* ---------------- Vector (Q) registers ---------------- */
886
887static Int offsetQReg128 ( UInt qregNo )
888{
889 /* We don't care about endianness at this point. It only becomes
890 relevant when dealing with sections of these registers.*/
891 switch (qregNo) {
892 case 0: return OFFB_Q0;
893 case 1: return OFFB_Q1;
894 case 2: return OFFB_Q2;
895 case 3: return OFFB_Q3;
896 case 4: return OFFB_Q4;
897 case 5: return OFFB_Q5;
898 case 6: return OFFB_Q6;
899 case 7: return OFFB_Q7;
900 case 8: return OFFB_Q8;
901 case 9: return OFFB_Q9;
902 case 10: return OFFB_Q10;
903 case 11: return OFFB_Q11;
904 case 12: return OFFB_Q12;
905 case 13: return OFFB_Q13;
906 case 14: return OFFB_Q14;
907 case 15: return OFFB_Q15;
908 case 16: return OFFB_Q16;
909 case 17: return OFFB_Q17;
910 case 18: return OFFB_Q18;
911 case 19: return OFFB_Q19;
912 case 20: return OFFB_Q20;
913 case 21: return OFFB_Q21;
914 case 22: return OFFB_Q22;
915 case 23: return OFFB_Q23;
916 case 24: return OFFB_Q24;
917 case 25: return OFFB_Q25;
918 case 26: return OFFB_Q26;
919 case 27: return OFFB_Q27;
920 case 28: return OFFB_Q28;
921 case 29: return OFFB_Q29;
922 case 30: return OFFB_Q30;
923 case 31: return OFFB_Q31;
924 default: vassert(0);
925 }
926}
927
sewardjbbcf1882014-01-12 12:49:10 +0000928/* Write to a complete Qreg. */
929static void putQReg128 ( UInt qregNo, IRExpr* e )
930{
931 vassert(qregNo < 32);
932 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
933 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
934}
935
936/* Read a complete Qreg. */
937static IRExpr* getQReg128 ( UInt qregNo )
938{
939 vassert(qregNo < 32);
940 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
941}
942
943/* Produce the IR type for some sub-part of a vector. For 32- and 64-
944 bit sub-parts we can choose either integer or float types, and
945 choose float on the basis that that is the common use case and so
946 will give least interference with Put-to-Get forwarding later
947 on. */
948static IRType preferredVectorSubTypeFromSize ( UInt szB )
949{
950 switch (szB) {
951 case 1: return Ity_I8;
952 case 2: return Ity_I16;
953 case 4: return Ity_I32; //Ity_F32;
954 case 8: return Ity_F64;
955 case 16: return Ity_V128;
956 default: vassert(0);
957 }
958}
959
sewardj606c4ba2014-01-26 19:11:14 +0000960/* Find the offset of the laneNo'th lane of type laneTy in the given
961 Qreg. Since the host is little-endian, the least significant lane
962 has the lowest offset. */
963static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
sewardjbbcf1882014-01-12 12:49:10 +0000964{
965 vassert(!host_is_bigendian);
966 Int base = offsetQReg128(qregNo);
sewardj606c4ba2014-01-26 19:11:14 +0000967 /* Since the host is little-endian, the least significant lane
968 will be at the lowest address. */
969 /* Restrict this to known types, so as to avoid silently accepting
970 stupid types. */
971 UInt laneSzB = 0;
972 switch (laneTy) {
sewardj5860ec72014-03-01 11:19:45 +0000973 case Ity_I8: laneSzB = 1; break;
974 case Ity_I16: laneSzB = 2; break;
sewardj606c4ba2014-01-26 19:11:14 +0000975 case Ity_F32: case Ity_I32: laneSzB = 4; break;
976 case Ity_F64: case Ity_I64: laneSzB = 8; break;
977 case Ity_V128: laneSzB = 16; break;
978 default: break;
sewardjbbcf1882014-01-12 12:49:10 +0000979 }
sewardj606c4ba2014-01-26 19:11:14 +0000980 vassert(laneSzB > 0);
981 UInt minOff = laneNo * laneSzB;
982 UInt maxOff = minOff + laneSzB - 1;
983 vassert(maxOff < 16);
984 return base + minOff;
sewardjbbcf1882014-01-12 12:49:10 +0000985}
986
sewardj606c4ba2014-01-26 19:11:14 +0000987/* Put to the least significant lane of a Qreg. */
988static void putQRegLO ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +0000989{
990 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +0000991 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +0000992 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +0000993 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
994 case Ity_F32: case Ity_F64: case Ity_V128:
995 break;
996 default:
997 vassert(0); // Other cases are probably invalid
sewardjbbcf1882014-01-12 12:49:10 +0000998 }
999 stmt(IRStmt_Put(off, e));
1000}
1001
sewardj606c4ba2014-01-26 19:11:14 +00001002/* Get from the least significant lane of a Qreg. */
1003static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
sewardjbbcf1882014-01-12 12:49:10 +00001004{
sewardj606c4ba2014-01-26 19:11:14 +00001005 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +00001006 switch (ty) {
sewardjb3553472014-05-15 16:49:21 +00001007 case Ity_I8:
1008 case Ity_I16:
sewardj606c4ba2014-01-26 19:11:14 +00001009 case Ity_I32: case Ity_I64:
1010 case Ity_F32: case Ity_F64: case Ity_V128:
1011 break;
1012 default:
1013 vassert(0); // Other cases are ATC
sewardjbbcf1882014-01-12 12:49:10 +00001014 }
1015 return IRExpr_Get(off, ty);
1016}
1017
sewardj606c4ba2014-01-26 19:11:14 +00001018static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
sewardjbbcf1882014-01-12 12:49:10 +00001019{
1020 static const HChar* namesQ[32]
1021 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1022 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1023 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1024 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1025 static const HChar* namesD[32]
1026 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1027 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1028 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1029 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1030 static const HChar* namesS[32]
1031 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1032 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1033 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1034 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1035 static const HChar* namesH[32]
1036 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1037 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1038 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1039 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1040 static const HChar* namesB[32]
1041 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1042 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1043 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1044 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1045 vassert(qregNo < 32);
sewardj606c4ba2014-01-26 19:11:14 +00001046 switch (sizeofIRType(laneTy)) {
sewardjbbcf1882014-01-12 12:49:10 +00001047 case 1: return namesB[qregNo];
1048 case 2: return namesH[qregNo];
1049 case 4: return namesS[qregNo];
1050 case 8: return namesD[qregNo];
1051 case 16: return namesQ[qregNo];
1052 default: vassert(0);
1053 }
1054 /*NOTREACHED*/
1055}
1056
sewardj606c4ba2014-01-26 19:11:14 +00001057static const HChar* nameQReg128 ( UInt qregNo )
1058{
1059 return nameQRegLO(qregNo, Ity_V128);
1060}
1061
sewardjbbcf1882014-01-12 12:49:10 +00001062/* Find the offset of the most significant half (8 bytes) of the given
1063 Qreg. This requires knowing the endianness of the host. */
sewardj606c4ba2014-01-26 19:11:14 +00001064static Int offsetQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001065{
sewardj606c4ba2014-01-26 19:11:14 +00001066 return offsetQRegLane(qregNo, Ity_I64, 1);
sewardjbbcf1882014-01-12 12:49:10 +00001067}
1068
sewardj606c4ba2014-01-26 19:11:14 +00001069static IRExpr* getQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001070{
sewardj606c4ba2014-01-26 19:11:14 +00001071 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
sewardjbbcf1882014-01-12 12:49:10 +00001072}
1073
sewardj606c4ba2014-01-26 19:11:14 +00001074static void putQRegHI64 ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +00001075{
1076 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +00001077 Int off = offsetQRegHI64(qregNo);
sewardjbbcf1882014-01-12 12:49:10 +00001078 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001079 case Ity_I64: case Ity_F64:
1080 break;
1081 default:
1082 vassert(0); // Other cases are plain wrong
sewardjbbcf1882014-01-12 12:49:10 +00001083 }
1084 stmt(IRStmt_Put(off, e));
1085}
1086
sewardj606c4ba2014-01-26 19:11:14 +00001087/* Put to a specified lane of a Qreg. */
1088static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1089{
1090 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1091 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1092 switch (laneTy) {
1093 case Ity_F64: case Ity_I64:
sewardj32d86752014-03-02 12:47:18 +00001094 case Ity_I32: case Ity_F32:
sewardj5860ec72014-03-01 11:19:45 +00001095 case Ity_I16:
1096 case Ity_I8:
sewardj606c4ba2014-01-26 19:11:14 +00001097 break;
1098 default:
1099 vassert(0); // Other cases are ATC
1100 }
1101 stmt(IRStmt_Put(off, e));
1102}
1103
sewardj32d86752014-03-02 12:47:18 +00001104/* Get from a specified lane of a Qreg. */
sewardj606c4ba2014-01-26 19:11:14 +00001105static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1106{
1107 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1108 switch (laneTy) {
sewardj32d86752014-03-02 12:47:18 +00001109 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
1110 case Ity_F64:
sewardj606c4ba2014-01-26 19:11:14 +00001111 break;
1112 default:
1113 vassert(0); // Other cases are ATC
1114 }
1115 return IRExpr_Get(off, laneTy);
1116}
1117
1118
sewardjbbcf1882014-01-12 12:49:10 +00001119//ZZ /* ---------------- Misc registers ---------------- */
1120//ZZ
1121//ZZ static void putMiscReg32 ( UInt gsoffset,
1122//ZZ IRExpr* e, /* :: Ity_I32 */
1123//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1124//ZZ {
1125//ZZ switch (gsoffset) {
1126//ZZ case OFFB_FPSCR: break;
1127//ZZ case OFFB_QFLAG32: break;
1128//ZZ case OFFB_GEFLAG0: break;
1129//ZZ case OFFB_GEFLAG1: break;
1130//ZZ case OFFB_GEFLAG2: break;
1131//ZZ case OFFB_GEFLAG3: break;
1132//ZZ default: vassert(0); /* awaiting more cases */
1133//ZZ }
1134//ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1135//ZZ
1136//ZZ if (guardT == IRTemp_INVALID) {
1137//ZZ /* unconditional write */
1138//ZZ stmt(IRStmt_Put(gsoffset, e));
1139//ZZ } else {
1140//ZZ stmt(IRStmt_Put(
1141//ZZ gsoffset,
1142//ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1143//ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1144//ZZ ));
1145//ZZ }
1146//ZZ }
1147//ZZ
1148//ZZ static IRTemp get_ITSTATE ( void )
1149//ZZ {
1150//ZZ ASSERT_IS_THUMB;
1151//ZZ IRTemp t = newTemp(Ity_I32);
1152//ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1153//ZZ return t;
1154//ZZ }
1155//ZZ
1156//ZZ static void put_ITSTATE ( IRTemp t )
1157//ZZ {
1158//ZZ ASSERT_IS_THUMB;
1159//ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1160//ZZ }
1161//ZZ
1162//ZZ static IRTemp get_QFLAG32 ( void )
1163//ZZ {
1164//ZZ IRTemp t = newTemp(Ity_I32);
1165//ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1166//ZZ return t;
1167//ZZ }
1168//ZZ
1169//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1170//ZZ {
1171//ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1172//ZZ }
1173//ZZ
1174//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1175//ZZ Status Register) to indicate that overflow or saturation occurred.
1176//ZZ Nb: t must be zero to denote no saturation, and any nonzero
1177//ZZ value to indicate saturation. */
1178//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1179//ZZ {
1180//ZZ IRTemp old = get_QFLAG32();
1181//ZZ IRTemp nyu = newTemp(Ity_I32);
1182//ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1183//ZZ put_QFLAG32(nyu, condT);
1184//ZZ }
1185
1186
1187/* ---------------- FPCR stuff ---------------- */
1188
1189/* Generate IR to get hold of the rounding mode bits in FPCR, and
1190 convert them to IR format. Bind the final result to the
1191 returned temp. */
1192static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1193{
1194 /* The ARMvfp encoding for rounding mode bits is:
1195 00 to nearest
1196 01 to +infinity
1197 10 to -infinity
1198 11 to zero
1199 We need to convert that to the IR encoding:
1200 00 to nearest (the default)
1201 10 to +infinity
1202 01 to -infinity
1203 11 to zero
1204 Which can be done by swapping bits 0 and 1.
1205 The rmode bits are at 23:22 in FPSCR.
1206 */
1207 IRTemp armEncd = newTemp(Ity_I32);
1208 IRTemp swapped = newTemp(Ity_I32);
1209 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1210 we don't zero out bits 24 and above, since the assignment to
1211 'swapped' will mask them out anyway. */
1212 assign(armEncd,
1213 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1214 /* Now swap them. */
1215 assign(swapped,
1216 binop(Iop_Or32,
1217 binop(Iop_And32,
1218 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1219 mkU32(2)),
1220 binop(Iop_And32,
1221 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1222 mkU32(1))
1223 ));
1224 return swapped;
1225}
1226
1227
1228/*------------------------------------------------------------*/
1229/*--- Helpers for flag handling and conditional insns ---*/
1230/*------------------------------------------------------------*/
1231
1232static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1233{
1234 switch (cond) {
1235 case ARM64CondEQ: return "eq";
1236 case ARM64CondNE: return "ne";
1237 case ARM64CondCS: return "cs"; // or 'hs'
1238 case ARM64CondCC: return "cc"; // or 'lo'
1239 case ARM64CondMI: return "mi";
1240 case ARM64CondPL: return "pl";
1241 case ARM64CondVS: return "vs";
1242 case ARM64CondVC: return "vc";
1243 case ARM64CondHI: return "hi";
1244 case ARM64CondLS: return "ls";
1245 case ARM64CondGE: return "ge";
1246 case ARM64CondLT: return "lt";
1247 case ARM64CondGT: return "gt";
1248 case ARM64CondLE: return "le";
1249 case ARM64CondAL: return "al";
1250 case ARM64CondNV: return "nv";
1251 default: vpanic("name_ARM64Condcode");
1252 }
1253}
1254
1255/* and a handy shorthand for it */
1256static const HChar* nameCC ( ARM64Condcode cond ) {
1257 return nameARM64Condcode(cond);
1258}
1259
1260
1261/* Build IR to calculate some particular condition from stored
1262 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1263 Ity_I64, suitable for narrowing. Although the return type is
1264 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1265 :: Ity_I64 and must denote the condition to compute in
1266 bits 7:4, and be zero everywhere else.
1267*/
1268static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1269{
1270 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1271 /* And 'cond' had better produce a value in which only bits 7:4 are
1272 nonzero. However, obviously we can't assert for that. */
1273
1274 /* So what we're constructing for the first argument is
1275 "(cond << 4) | stored-operation".
1276 However, as per comments above, 'cond' must be supplied
1277 pre-shifted to this function.
1278
1279 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1280 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1281 8 bits of the first argument. */
1282 IRExpr** args
1283 = mkIRExprVec_4(
1284 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1285 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1286 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1287 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1288 );
1289 IRExpr* call
1290 = mkIRExprCCall(
1291 Ity_I64,
1292 0/*regparm*/,
1293 "arm64g_calculate_condition", &arm64g_calculate_condition,
1294 args
1295 );
1296
1297 /* Exclude the requested condition, OP and NDEP from definedness
1298 checking. We're only interested in DEP1 and DEP2. */
1299 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1300 return call;
1301}
1302
1303
1304/* Build IR to calculate some particular condition from stored
1305 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1306 Ity_I64, suitable for narrowing. Although the return type is
1307 Ity_I64, the returned value is either 0 or 1.
1308*/
1309static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1310{
1311 /* First arg is "(cond << 4) | condition". This requires that the
1312 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1313 (COND, OP) pair in the lowest 8 bits of the first argument. */
1314 vassert(cond >= 0 && cond <= 15);
1315 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1316}
1317
1318
1319//ZZ /* Build IR to calculate just the carry flag from stored
1320//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1321//ZZ Ity_I32. */
1322//ZZ static IRExpr* mk_armg_calculate_flag_c ( void )
1323//ZZ {
1324//ZZ IRExpr** args
1325//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1326//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1327//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1328//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1329//ZZ IRExpr* call
1330//ZZ = mkIRExprCCall(
1331//ZZ Ity_I32,
1332//ZZ 0/*regparm*/,
1333//ZZ "armg_calculate_flag_c", &armg_calculate_flag_c,
1334//ZZ args
1335//ZZ );
1336//ZZ /* Exclude OP and NDEP from definedness checking. We're only
1337//ZZ interested in DEP1 and DEP2. */
1338//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1339//ZZ return call;
1340//ZZ }
1341//ZZ
1342//ZZ
1343//ZZ /* Build IR to calculate just the overflow flag from stored
1344//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1345//ZZ Ity_I32. */
1346//ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1347//ZZ {
1348//ZZ IRExpr** args
1349//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1350//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1351//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1352//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1353//ZZ IRExpr* call
1354//ZZ = mkIRExprCCall(
1355//ZZ Ity_I32,
1356//ZZ 0/*regparm*/,
1357//ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1358//ZZ args
1359//ZZ );
1360//ZZ /* Exclude OP and NDEP from definedness checking. We're only
1361//ZZ interested in DEP1 and DEP2. */
1362//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1363//ZZ return call;
1364//ZZ }
1365
1366
1367/* Build IR to calculate N Z C V in bits 31:28 of the
1368 returned word. */
1369static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1370{
1371 IRExpr** args
1372 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1373 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1374 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1375 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1376 IRExpr* call
1377 = mkIRExprCCall(
1378 Ity_I64,
1379 0/*regparm*/,
1380 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1381 args
1382 );
1383 /* Exclude OP and NDEP from definedness checking. We're only
1384 interested in DEP1 and DEP2. */
1385 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1386 return call;
1387}
1388
1389
1390/* Build IR to set the flags thunk, in the most general case. */
1391static
1392void setFlags_D1_D2_ND ( UInt cc_op,
1393 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1394{
1395 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1396 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1397 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1398 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1399 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1400 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1401 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1402 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1403}
1404
1405/* Build IR to set the flags thunk after ADD or SUB. */
1406static
1407void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1408{
1409 IRTemp argL64 = IRTemp_INVALID;
1410 IRTemp argR64 = IRTemp_INVALID;
1411 IRTemp z64 = newTemp(Ity_I64);
1412 if (is64) {
1413 argL64 = argL;
1414 argR64 = argR;
1415 } else {
1416 argL64 = newTemp(Ity_I64);
1417 argR64 = newTemp(Ity_I64);
1418 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1419 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1420 }
1421 assign(z64, mkU64(0));
1422 UInt cc_op = ARM64G_CC_OP_NUMBER;
1423 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1424 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1425 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1426 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1427 else { vassert(0); }
1428 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1429}
1430
1431/* Build IR to set the flags thunk after ADD or SUB, if the given
1432 condition evaluates to True at run time. If not, the flags are set
1433 to the specified NZCV value. */
1434static
1435void setFlags_ADD_SUB_conditionally (
1436 Bool is64, Bool isSUB,
1437 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1438 )
1439{
1440 /* Generate IR as follows:
1441 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1442 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1443 CC_DEP2 = ITE(cond, argR64, 0)
1444 CC_NDEP = 0
1445 */
1446
1447 IRTemp z64 = newTemp(Ity_I64);
1448 assign(z64, mkU64(0));
1449
1450 /* Establish the operation and operands for the True case. */
1451 IRTemp t_dep1 = IRTemp_INVALID;
1452 IRTemp t_dep2 = IRTemp_INVALID;
1453 UInt t_op = ARM64G_CC_OP_NUMBER;
1454 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1455 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1456 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1457 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1458 else { vassert(0); }
1459 /* */
1460 if (is64) {
1461 t_dep1 = argL;
1462 t_dep2 = argR;
1463 } else {
1464 t_dep1 = newTemp(Ity_I64);
1465 t_dep2 = newTemp(Ity_I64);
1466 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1467 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1468 }
1469
1470 /* Establish the operation and operands for the False case. */
1471 IRTemp f_dep1 = newTemp(Ity_I64);
1472 IRTemp f_dep2 = z64;
1473 UInt f_op = ARM64G_CC_OP_COPY;
1474 assign(f_dep1, mkU64(nzcv << 28));
1475
1476 /* Final thunk values */
1477 IRTemp dep1 = newTemp(Ity_I64);
1478 IRTemp dep2 = newTemp(Ity_I64);
1479 IRTemp op = newTemp(Ity_I64);
1480
1481 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1482 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1483 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1484
1485 /* finally .. */
1486 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
1487 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1488 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1489 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1490}
1491
1492/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1493static
1494void setFlags_LOGIC ( Bool is64, IRTemp res )
1495{
1496 IRTemp res64 = IRTemp_INVALID;
1497 IRTemp z64 = newTemp(Ity_I64);
1498 UInt cc_op = ARM64G_CC_OP_NUMBER;
1499 if (is64) {
1500 res64 = res;
1501 cc_op = ARM64G_CC_OP_LOGIC64;
1502 } else {
1503 res64 = newTemp(Ity_I64);
1504 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1505 cc_op = ARM64G_CC_OP_LOGIC32;
1506 }
1507 assign(z64, mkU64(0));
1508 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1509}
1510
1511/* Build IR to set the flags thunk to a given NZCV value. NZCV is
1512 located in bits 31:28 of the supplied value. */
1513static
1514void setFlags_COPY ( IRTemp nzcv_28x0 )
1515{
1516 IRTemp z64 = newTemp(Ity_I64);
1517 assign(z64, mkU64(0));
1518 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
1519}
1520
1521
1522//ZZ /* Minor variant of the above that sets NDEP to zero (if it
1523//ZZ sets it at all) */
1524//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1525//ZZ IRTemp t_dep2,
1526//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1527//ZZ {
1528//ZZ IRTemp z32 = newTemp(Ity_I32);
1529//ZZ assign( z32, mkU32(0) );
1530//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1531//ZZ }
1532//ZZ
1533//ZZ
1534//ZZ /* Minor variant of the above that sets DEP2 to zero (if it
1535//ZZ sets it at all) */
1536//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1537//ZZ IRTemp t_ndep,
1538//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1539//ZZ {
1540//ZZ IRTemp z32 = newTemp(Ity_I32);
1541//ZZ assign( z32, mkU32(0) );
1542//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1543//ZZ }
1544//ZZ
1545//ZZ
1546//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1547//ZZ sets them at all) */
1548//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1549//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1550//ZZ {
1551//ZZ IRTemp z32 = newTemp(Ity_I32);
1552//ZZ assign( z32, mkU32(0) );
1553//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1554//ZZ }
1555
1556
1557/*------------------------------------------------------------*/
1558/*--- Misc math helpers ---*/
1559/*------------------------------------------------------------*/
1560
sewardj32d86752014-03-02 12:47:18 +00001561/* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
1562static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
sewardjbbcf1882014-01-12 12:49:10 +00001563{
sewardj32d86752014-03-02 12:47:18 +00001564 IRTemp maskT = newTemp(Ity_I64);
1565 IRTemp res = newTemp(Ity_I64);
1566 vassert(sh >= 1 && sh <= 63);
1567 assign(maskT, mkU64(mask));
sewardjdc9259c2014-02-27 11:10:19 +00001568 assign( res,
sewardjbbcf1882014-01-12 12:49:10 +00001569 binop(Iop_Or64,
1570 binop(Iop_Shr64,
sewardj32d86752014-03-02 12:47:18 +00001571 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
1572 mkU8(sh)),
sewardjbbcf1882014-01-12 12:49:10 +00001573 binop(Iop_And64,
sewardj32d86752014-03-02 12:47:18 +00001574 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
1575 mkexpr(maskT))
sewardjbbcf1882014-01-12 12:49:10 +00001576 )
1577 );
sewardjdc9259c2014-02-27 11:10:19 +00001578 return res;
1579}
1580
sewardj32d86752014-03-02 12:47:18 +00001581/* Generates byte swaps within 32-bit lanes. */
1582static IRTemp math_UINTSWAP64 ( IRTemp src )
1583{
1584 IRTemp res;
1585 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1586 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
1587 return res;
1588}
1589
1590/* Generates byte swaps within 16-bit lanes. */
1591static IRTemp math_USHORTSWAP64 ( IRTemp src )
1592{
1593 IRTemp res;
1594 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1595 return res;
1596}
1597
1598/* Generates a 64-bit byte swap. */
1599static IRTemp math_BYTESWAP64 ( IRTemp src )
1600{
1601 IRTemp res;
1602 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1603 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
1604 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
1605 return res;
1606}
sewardjdc9259c2014-02-27 11:10:19 +00001607
1608/* Generates a 64-bit bit swap. */
1609static IRTemp math_BITSWAP64 ( IRTemp src )
1610{
sewardj32d86752014-03-02 12:47:18 +00001611 IRTemp res;
1612 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
1613 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
1614 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
1615 return math_BYTESWAP64(res);
sewardjbbcf1882014-01-12 12:49:10 +00001616}
1617
sewardj606c4ba2014-01-26 19:11:14 +00001618/* Duplicates the bits at the bottom of the given word to fill the
1619 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
1620 except for the bottom bits. */
1621static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
1622{
1623 if (srcTy == Ity_I8) {
1624 IRTemp t16 = newTemp(Ity_I64);
1625 assign(t16, binop(Iop_Or64, mkexpr(src),
1626 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
1627 IRTemp t32 = newTemp(Ity_I64);
1628 assign(t32, binop(Iop_Or64, mkexpr(t16),
1629 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
1630 IRTemp t64 = newTemp(Ity_I64);
1631 assign(t64, binop(Iop_Or64, mkexpr(t32),
1632 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
1633 return t64;
1634 }
1635 if (srcTy == Ity_I16) {
1636 IRTemp t32 = newTemp(Ity_I64);
1637 assign(t32, binop(Iop_Or64, mkexpr(src),
1638 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
1639 IRTemp t64 = newTemp(Ity_I64);
1640 assign(t64, binop(Iop_Or64, mkexpr(t32),
1641 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
1642 return t64;
1643 }
1644 if (srcTy == Ity_I32) {
1645 IRTemp t64 = newTemp(Ity_I64);
1646 assign(t64, binop(Iop_Or64, mkexpr(src),
1647 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
1648 return t64;
1649 }
1650 if (srcTy == Ity_I64) {
1651 return src;
1652 }
1653 vassert(0);
1654}
1655
1656
sewardjbbcf1882014-01-12 12:49:10 +00001657/*------------------------------------------------------------*/
1658/*--- FP comparison helpers ---*/
1659/*------------------------------------------------------------*/
1660
1661/* irRes :: Ity_I32 holds a floating point comparison result encoded
1662 as an IRCmpF64Result. Generate code to convert it to an
1663 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
1664 Assign a new temp to hold that value, and return the temp. */
1665static
1666IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
1667{
1668 IRTemp ix = newTemp(Ity_I64);
1669 IRTemp termL = newTemp(Ity_I64);
1670 IRTemp termR = newTemp(Ity_I64);
1671 IRTemp nzcv = newTemp(Ity_I64);
1672 IRTemp irRes = newTemp(Ity_I64);
1673
1674 /* This is where the fun starts. We have to convert 'irRes' from
1675 an IR-convention return result (IRCmpF64Result) to an
1676 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
1677 4 bits of 'nzcv'. */
1678 /* Map compare result from IR to ARM(nzcv) */
1679 /*
1680 FP cmp result | IR | ARM(nzcv)
1681 --------------------------------
1682 UN 0x45 0011
1683 LT 0x01 1000
1684 GT 0x00 0010
1685 EQ 0x40 0110
1686 */
1687 /* Now since you're probably wondering WTF ..
1688
1689 ix fishes the useful bits out of the IR value, bits 6 and 0, and
1690 places them side by side, giving a number which is 0, 1, 2 or 3.
1691
1692 termL is a sequence cooked up by GNU superopt. It converts ix
1693 into an almost correct value NZCV value (incredibly), except
1694 for the case of UN, where it produces 0100 instead of the
1695 required 0011.
1696
1697 termR is therefore a correction term, also computed from ix. It
1698 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
1699 the final correct value, we subtract termR from termL.
1700
1701 Don't take my word for it. There's a test program at the bottom
1702 of guest_arm_toIR.c, to try this out with.
1703 */
1704 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
1705
1706 assign(
1707 ix,
1708 binop(Iop_Or64,
1709 binop(Iop_And64,
1710 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
1711 mkU64(3)),
1712 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
1713
1714 assign(
1715 termL,
1716 binop(Iop_Add64,
1717 binop(Iop_Shr64,
1718 binop(Iop_Sub64,
1719 binop(Iop_Shl64,
1720 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
1721 mkU8(62)),
1722 mkU64(1)),
1723 mkU8(61)),
1724 mkU64(1)));
1725
1726 assign(
1727 termR,
1728 binop(Iop_And64,
1729 binop(Iop_And64,
1730 mkexpr(ix),
1731 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
1732 mkU64(1)));
1733
1734 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
1735 return nzcv;
1736}
1737
1738
1739/*------------------------------------------------------------*/
1740/*--- Data processing (immediate) ---*/
1741/*------------------------------------------------------------*/
1742
1743/* Helper functions for supporting "DecodeBitMasks" */
1744
1745static ULong dbm_ROR ( Int width, ULong x, Int rot )
1746{
1747 vassert(width > 0 && width <= 64);
1748 vassert(rot >= 0 && rot < width);
1749 if (rot == 0) return x;
1750 ULong res = x >> rot;
1751 res |= (x << (width - rot));
1752 if (width < 64)
1753 res &= ((1ULL << width) - 1);
1754 return res;
1755}
1756
1757static ULong dbm_RepTo64( Int esize, ULong x )
1758{
1759 switch (esize) {
1760 case 64:
1761 return x;
1762 case 32:
1763 x &= 0xFFFFFFFF; x |= (x << 32);
1764 return x;
1765 case 16:
1766 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
1767 return x;
1768 case 8:
1769 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
1770 return x;
1771 case 4:
1772 x &= 0xF; x |= (x << 4); x |= (x << 8);
1773 x |= (x << 16); x |= (x << 32);
1774 return x;
1775 case 2:
1776 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
1777 x |= (x << 16); x |= (x << 32);
1778 return x;
1779 default:
1780 break;
1781 }
1782 vpanic("dbm_RepTo64");
1783 /*NOTREACHED*/
1784 return 0;
1785}
1786
1787static Int dbm_highestSetBit ( ULong x )
1788{
1789 Int i;
1790 for (i = 63; i >= 0; i--) {
1791 if (x & (1ULL << i))
1792 return i;
1793 }
1794 vassert(x == 0);
1795 return -1;
1796}
1797
1798static
1799Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
1800 ULong immN, ULong imms, ULong immr, Bool immediate,
1801 UInt M /*32 or 64*/)
1802{
1803 vassert(immN < (1ULL << 1));
1804 vassert(imms < (1ULL << 6));
1805 vassert(immr < (1ULL << 6));
1806 vassert(immediate == False || immediate == True);
1807 vassert(M == 32 || M == 64);
1808
1809 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
1810 if (len < 1) { /* printf("fail1\n"); */ return False; }
1811 vassert(len <= 6);
1812 vassert(M >= (1 << len));
1813
1814 vassert(len >= 1 && len <= 6);
1815 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
1816 (1 << len) - 1;
1817 vassert(levels >= 1 && levels <= 63);
1818
1819 if (immediate && ((imms & levels) == levels)) {
1820 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
1821 return False;
1822 }
1823
1824 ULong S = imms & levels;
1825 ULong R = immr & levels;
1826 Int diff = S - R;
1827 diff &= 63;
1828 Int esize = 1 << len;
1829 vassert(2 <= esize && esize <= 64);
1830
1831 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
1832 same below with d. S can be 63 in which case we have an out of
1833 range and hence undefined shift. */
1834 vassert(S >= 0 && S <= 63);
1835 vassert(esize >= (S+1));
1836 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
1837 //(1ULL << (S+1)) - 1;
1838 ((1ULL << S) - 1) + (1ULL << S);
1839
1840 Int d = // diff<len-1:0>
1841 diff & ((1 << len)-1);
1842 vassert(esize >= (d+1));
1843 vassert(d >= 0 && d <= 63);
1844
1845 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
1846 //(1ULL << (d+1)) - 1;
1847 ((1ULL << d) - 1) + (1ULL << d);
1848
1849 if (esize != 64) vassert(elem_s < (1ULL << esize));
1850 if (esize != 64) vassert(elem_d < (1ULL << esize));
1851
1852 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
1853 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
1854
1855 return True;
1856}
1857
1858
1859static
1860Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
1861 UInt insn)
1862{
1863# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
1864
1865 /* insn[28:23]
1866 10000x PC-rel addressing
1867 10001x Add/subtract (immediate)
1868 100100 Logical (immediate)
1869 100101 Move Wide (immediate)
1870 100110 Bitfield
1871 100111 Extract
1872 */
1873
1874 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
1875 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
1876 Bool is64 = INSN(31,31) == 1;
1877 Bool isSub = INSN(30,30) == 1;
1878 Bool setCC = INSN(29,29) == 1;
1879 UInt sh = INSN(23,22);
1880 UInt uimm12 = INSN(21,10);
1881 UInt nn = INSN(9,5);
1882 UInt dd = INSN(4,0);
1883 const HChar* nm = isSub ? "sub" : "add";
1884 if (sh >= 2) {
1885 /* Invalid; fall through */
1886 } else {
1887 vassert(sh <= 1);
1888 uimm12 <<= (12 * sh);
1889 if (is64) {
1890 IRTemp argL = newTemp(Ity_I64);
1891 IRTemp argR = newTemp(Ity_I64);
1892 IRTemp res = newTemp(Ity_I64);
1893 assign(argL, getIReg64orSP(nn));
1894 assign(argR, mkU64(uimm12));
1895 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
1896 mkexpr(argL), mkexpr(argR)));
1897 if (setCC) {
1898 putIReg64orZR(dd, mkexpr(res));
1899 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
1900 DIP("%ss %s, %s, 0x%x\n",
1901 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
1902 } else {
1903 putIReg64orSP(dd, mkexpr(res));
1904 DIP("%s %s, %s, 0x%x\n",
1905 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
1906 }
1907 } else {
1908 IRTemp argL = newTemp(Ity_I32);
1909 IRTemp argR = newTemp(Ity_I32);
1910 IRTemp res = newTemp(Ity_I32);
1911 assign(argL, getIReg32orSP(nn));
1912 assign(argR, mkU32(uimm12));
1913 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
1914 mkexpr(argL), mkexpr(argR)));
1915 if (setCC) {
1916 putIReg32orZR(dd, mkexpr(res));
1917 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
1918 DIP("%ss %s, %s, 0x%x\n",
1919 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
1920 } else {
1921 putIReg32orSP(dd, mkexpr(res));
1922 DIP("%s %s, %s, 0x%x\n",
1923 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
1924 }
1925 }
1926 return True;
1927 }
1928 }
1929
1930 /* -------------------- ADR/ADRP -------------------- */
1931 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
1932 UInt bP = INSN(31,31);
1933 UInt immLo = INSN(30,29);
1934 UInt immHi = INSN(23,5);
1935 UInt rD = INSN(4,0);
1936 ULong uimm = (immHi << 2) | immLo;
1937 ULong simm = sx_to_64(uimm, 21);
1938 ULong val;
1939 if (bP) {
1940 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
1941 } else {
1942 val = guest_PC_curr_instr + simm;
1943 }
1944 putIReg64orZR(rD, mkU64(val));
1945 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
1946 return True;
1947 }
1948
1949 /* -------------------- LOGIC(imm) -------------------- */
1950 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
1951 /* 31 30 28 22 21 15 9 4
1952 sf op 100100 N immr imms Rn Rd
1953 op=00: AND Rd|SP, Rn, #imm
1954 op=01: ORR Rd|SP, Rn, #imm
1955 op=10: EOR Rd|SP, Rn, #imm
1956 op=11: ANDS Rd|ZR, Rn, #imm
1957 */
1958 Bool is64 = INSN(31,31) == 1;
1959 UInt op = INSN(30,29);
1960 UInt N = INSN(22,22);
1961 UInt immR = INSN(21,16);
1962 UInt immS = INSN(15,10);
1963 UInt nn = INSN(9,5);
1964 UInt dd = INSN(4,0);
1965 ULong imm = 0;
1966 Bool ok;
1967 if (N == 1 && !is64)
1968 goto after_logic_imm; /* not allowed; fall through */
1969 ok = dbm_DecodeBitMasks(&imm, NULL,
1970 N, immS, immR, True, is64 ? 64 : 32);
1971 if (!ok)
1972 goto after_logic_imm;
1973
1974 const HChar* names[4] = { "and", "orr", "eor", "ands" };
1975 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
1976 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
1977
1978 vassert(op < 4);
1979 if (is64) {
1980 IRExpr* argL = getIReg64orZR(nn);
1981 IRExpr* argR = mkU64(imm);
1982 IRTemp res = newTemp(Ity_I64);
1983 assign(res, binop(ops64[op], argL, argR));
1984 if (op < 3) {
1985 putIReg64orSP(dd, mkexpr(res));
1986 DIP("%s %s, %s, 0x%llx\n", names[op],
1987 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
1988 } else {
1989 putIReg64orZR(dd, mkexpr(res));
1990 setFlags_LOGIC(True/*is64*/, res);
1991 DIP("%s %s, %s, 0x%llx\n", names[op],
1992 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
1993 }
1994 } else {
1995 IRExpr* argL = getIReg32orZR(nn);
1996 IRExpr* argR = mkU32((UInt)imm);
1997 IRTemp res = newTemp(Ity_I32);
1998 assign(res, binop(ops32[op], argL, argR));
1999 if (op < 3) {
2000 putIReg32orSP(dd, mkexpr(res));
2001 DIP("%s %s, %s, 0x%x\n", names[op],
2002 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2003 } else {
2004 putIReg32orZR(dd, mkexpr(res));
2005 setFlags_LOGIC(False/*!is64*/, res);
2006 DIP("%s %s, %s, 0x%x\n", names[op],
2007 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2008 }
2009 }
2010 return True;
2011 }
2012 after_logic_imm:
2013
2014 /* -------------------- MOV{Z,N,K} -------------------- */
2015 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2016 /* 31 30 28 22 20 4
2017 | | | | | |
2018 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2019 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2020 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2021 */
2022 Bool is64 = INSN(31,31) == 1;
2023 UInt subopc = INSN(30,29);
2024 UInt hw = INSN(22,21);
2025 UInt imm16 = INSN(20,5);
2026 UInt dd = INSN(4,0);
2027 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2028 /* invalid; fall through */
2029 } else {
2030 ULong imm64 = ((ULong)imm16) << (16 * hw);
2031 if (!is64)
2032 vassert(imm64 < 0x100000000ULL);
2033 switch (subopc) {
2034 case BITS2(1,0): // MOVZ
2035 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2036 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2037 break;
2038 case BITS2(0,0): // MOVN
2039 imm64 = ~imm64;
2040 if (!is64)
2041 imm64 &= 0xFFFFFFFFULL;
2042 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2043 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2044 break;
2045 case BITS2(1,1): // MOVK
2046 /* This is more complex. We are inserting a slice into
2047 the destination register, so we need to have the old
2048 value of it. */
2049 if (is64) {
2050 IRTemp old = newTemp(Ity_I64);
2051 assign(old, getIReg64orZR(dd));
2052 ULong mask = 0xFFFFULL << (16 * hw);
2053 IRExpr* res
2054 = binop(Iop_Or64,
2055 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2056 mkU64(imm64));
2057 putIReg64orZR(dd, res);
2058 DIP("movk %s, 0x%x, lsl %u\n",
2059 nameIReg64orZR(dd), imm16, 16*hw);
2060 } else {
2061 IRTemp old = newTemp(Ity_I32);
2062 assign(old, getIReg32orZR(dd));
2063 vassert(hw <= 1);
2064 UInt mask = 0xFFFF << (16 * hw);
2065 IRExpr* res
2066 = binop(Iop_Or32,
2067 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2068 mkU32((UInt)imm64));
2069 putIReg32orZR(dd, res);
2070 DIP("movk %s, 0x%x, lsl %u\n",
2071 nameIReg32orZR(dd), imm16, 16*hw);
2072 }
2073 break;
2074 default:
2075 vassert(0);
2076 }
2077 return True;
2078 }
2079 }
2080
2081 /* -------------------- {U,S,}BFM -------------------- */
2082 /* 30 28 22 21 15 9 4
2083
2084 sf 10 100110 N immr imms nn dd
2085 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2086 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2087
2088 sf 00 100110 N immr imms nn dd
2089 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2090 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2091
2092 sf 01 100110 N immr imms nn dd
2093 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2094 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2095 */
2096 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2097 UInt sf = INSN(31,31);
2098 UInt opc = INSN(30,29);
2099 UInt N = INSN(22,22);
2100 UInt immR = INSN(21,16);
2101 UInt immS = INSN(15,10);
2102 UInt nn = INSN(9,5);
2103 UInt dd = INSN(4,0);
2104 Bool inZero = False;
2105 Bool extend = False;
2106 const HChar* nm = "???";
2107 /* skip invalid combinations */
2108 switch (opc) {
2109 case BITS2(0,0):
2110 inZero = True; extend = True; nm = "sbfm"; break;
2111 case BITS2(0,1):
2112 inZero = False; extend = False; nm = "bfm"; break;
2113 case BITS2(1,0):
2114 inZero = True; extend = False; nm = "ubfm"; break;
2115 case BITS2(1,1):
2116 goto after_bfm; /* invalid */
2117 default:
2118 vassert(0);
2119 }
2120 if (sf == 1 && N != 1) goto after_bfm;
2121 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2122 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2123 ULong wmask = 0, tmask = 0;
2124 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2125 N, immS, immR, False, sf == 1 ? 64 : 32);
2126 if (!ok) goto after_bfm; /* hmmm */
2127
2128 Bool is64 = sf == 1;
2129 IRType ty = is64 ? Ity_I64 : Ity_I32;
2130
2131 IRTemp dst = newTemp(ty);
2132 IRTemp src = newTemp(ty);
2133 IRTemp bot = newTemp(ty);
2134 IRTemp top = newTemp(ty);
2135 IRTemp res = newTemp(ty);
2136 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2137 assign(src, getIRegOrZR(is64, nn));
2138 /* perform bitfield move on low bits */
2139 assign(bot, binop(mkOR(ty),
2140 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2141 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2142 mkU(ty, wmask))));
2143 /* determine extension bits (sign, zero or dest register) */
2144 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2145 /* combine extension bits and result bits */
2146 assign(res, binop(mkOR(ty),
2147 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2148 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2149 putIRegOrZR(is64, dd, mkexpr(res));
2150 DIP("%s %s, %s, immR=%u, immS=%u\n",
2151 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2152 return True;
2153 }
2154 after_bfm:
2155
2156 /* ---------------------- EXTR ---------------------- */
2157 /* 30 28 22 20 15 9 4
2158 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2159 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2160 */
2161 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2162 Bool is64 = INSN(31,31) == 1;
2163 UInt mm = INSN(20,16);
2164 UInt imm6 = INSN(15,10);
2165 UInt nn = INSN(9,5);
2166 UInt dd = INSN(4,0);
2167 Bool valid = True;
2168 if (INSN(31,31) != INSN(22,22))
2169 valid = False;
2170 if (!is64 && imm6 >= 32)
2171 valid = False;
2172 if (!valid) goto after_extr;
2173 IRType ty = is64 ? Ity_I64 : Ity_I32;
2174 IRTemp srcHi = newTemp(ty);
2175 IRTemp srcLo = newTemp(ty);
2176 IRTemp res = newTemp(ty);
2177 assign(srcHi, getIRegOrZR(is64, nn));
2178 assign(srcLo, getIRegOrZR(is64, mm));
2179 if (imm6 == 0) {
2180 assign(res, mkexpr(srcLo));
2181 } else {
2182 UInt szBits = 8 * sizeofIRType(ty);
2183 vassert(imm6 > 0 && imm6 < szBits);
2184 assign(res, binop(mkOR(ty),
2185 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2186 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2187 }
2188 putIRegOrZR(is64, dd, mkexpr(res));
2189 DIP("extr %s, %s, %s, #%u\n",
2190 nameIRegOrZR(is64,dd),
2191 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2192 return True;
2193 }
2194 after_extr:
2195
2196 vex_printf("ARM64 front end: data_processing_immediate\n");
2197 return False;
2198# undef INSN
2199}
2200
2201
2202/*------------------------------------------------------------*/
2203/*--- Data processing (register) instructions ---*/
2204/*------------------------------------------------------------*/
2205
2206static const HChar* nameSH ( UInt sh ) {
2207 switch (sh) {
2208 case 0: return "lsl";
2209 case 1: return "lsr";
2210 case 2: return "asr";
2211 case 3: return "ror";
2212 default: vassert(0);
2213 }
2214}
2215
2216/* Generate IR to get a register value, possibly shifted by an
2217 immediate. Returns either a 32- or 64-bit temporary holding the
2218 result. After the shift, the value can optionally be NOT-ed
2219 too.
2220
2221 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2222 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2223 isn't allowed, but it's the job of the caller to check that.
2224*/
2225static IRTemp getShiftedIRegOrZR ( Bool is64,
2226 UInt sh_how, UInt sh_amt, UInt regNo,
2227 Bool invert )
2228{
2229 vassert(sh_how < 4);
2230 vassert(sh_amt < (is64 ? 64 : 32));
2231 IRType ty = is64 ? Ity_I64 : Ity_I32;
2232 IRTemp t0 = newTemp(ty);
2233 assign(t0, getIRegOrZR(is64, regNo));
2234 IRTemp t1 = newTemp(ty);
2235 switch (sh_how) {
2236 case BITS2(0,0):
2237 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2238 break;
2239 case BITS2(0,1):
2240 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2241 break;
2242 case BITS2(1,0):
2243 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2244 break;
2245 case BITS2(1,1):
2246 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2247 break;
2248 default:
2249 vassert(0);
2250 }
2251 if (invert) {
2252 IRTemp t2 = newTemp(ty);
2253 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2254 return t2;
2255 } else {
2256 return t1;
2257 }
2258}
2259
2260
2261static
2262Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2263 UInt insn)
2264{
2265# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2266
2267 /* ------------------- ADD/SUB(reg) ------------------- */
2268 /* x==0 => 32 bit op x==1 => 64 bit op
2269 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2270
2271 31 30 29 28 23 21 20 15 9 4
2272 | | | | | | | | | |
2273 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2274 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2275 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2276 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2277 */
2278 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2279 UInt bX = INSN(31,31);
2280 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2281 UInt bS = INSN(29, 29); /* set flags? */
2282 UInt sh = INSN(23,22);
2283 UInt rM = INSN(20,16);
2284 UInt imm6 = INSN(15,10);
2285 UInt rN = INSN(9,5);
2286 UInt rD = INSN(4,0);
2287 Bool isSUB = bOP == 1;
2288 Bool is64 = bX == 1;
2289 IRType ty = is64 ? Ity_I64 : Ity_I32;
2290 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2291 /* invalid; fall through */
2292 } else {
2293 IRTemp argL = newTemp(ty);
2294 assign(argL, getIRegOrZR(is64, rN));
2295 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2296 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2297 IRTemp res = newTemp(ty);
2298 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2299 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2300 if (bS) {
2301 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2302 }
2303 DIP("%s%s %s, %s, %s, %s #%u\n",
2304 bOP ? "sub" : "add", bS ? "s" : "",
2305 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2306 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2307 return True;
2308 }
2309 }
2310
2311 /* -------------------- LOGIC(reg) -------------------- */
2312 /* x==0 => 32 bit op x==1 => 64 bit op
2313 N==0 => inv? is no-op (no inversion)
2314 N==1 => inv? is NOT
2315 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2316
2317 31 30 28 23 21 20 15 9 4
2318 | | | | | | | | |
2319 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2320 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2321 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2322 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2323 With N=1, the names are: BIC ORN EON BICS
2324 */
2325 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2326 UInt bX = INSN(31,31);
2327 UInt sh = INSN(23,22);
2328 UInt bN = INSN(21,21);
2329 UInt rM = INSN(20,16);
2330 UInt imm6 = INSN(15,10);
2331 UInt rN = INSN(9,5);
2332 UInt rD = INSN(4,0);
2333 Bool is64 = bX == 1;
2334 IRType ty = is64 ? Ity_I64 : Ity_I32;
2335 if (!is64 && imm6 > 31) {
2336 /* invalid; fall though */
2337 } else {
2338 IRTemp argL = newTemp(ty);
2339 assign(argL, getIRegOrZR(is64, rN));
2340 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2341 IROp op = Iop_INVALID;
2342 switch (INSN(30,29)) {
2343 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2344 case BITS2(0,1): op = mkOR(ty); break;
2345 case BITS2(1,0): op = mkXOR(ty); break;
2346 default: vassert(0);
2347 }
2348 IRTemp res = newTemp(ty);
2349 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2350 if (INSN(30,29) == BITS2(1,1)) {
2351 setFlags_LOGIC(is64, res);
2352 }
2353 putIRegOrZR(is64, rD, mkexpr(res));
2354
2355 static const HChar* names_op[8]
2356 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2357 vassert(((bN << 2) | INSN(30,29)) < 8);
2358 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2359 /* Special-case the printing of "MOV" */
2360 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2361 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2362 nameIRegOrZR(is64, rM));
2363 } else {
2364 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2365 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2366 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2367 }
2368 return True;
2369 }
2370 }
2371
2372 /* -------------------- {U,S}MULH -------------------- */
2373 /* 31 23 22 20 15 9 4
2374 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2375 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2376 */
2377 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
sewardj7fce7cc2014-05-07 09:41:40 +00002378 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
sewardjbbcf1882014-01-12 12:49:10 +00002379 Bool isU = INSN(23,23) == 1;
2380 UInt mm = INSN(20,16);
2381 UInt nn = INSN(9,5);
2382 UInt dd = INSN(4,0);
2383 putIReg64orZR(dd, unop(Iop_128HIto64,
2384 binop(isU ? Iop_MullU64 : Iop_MullS64,
2385 getIReg64orZR(nn), getIReg64orZR(mm))));
2386 DIP("%cmulh %s, %s, %s\n",
2387 isU ? 'u' : 's',
2388 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2389 return True;
2390 }
2391
2392 /* -------------------- M{ADD,SUB} -------------------- */
2393 /* 31 30 20 15 14 9 4
2394 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
2395 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
2396 */
2397 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2398 Bool is64 = INSN(31,31) == 1;
2399 UInt mm = INSN(20,16);
2400 Bool isAdd = INSN(15,15) == 0;
2401 UInt aa = INSN(14,10);
2402 UInt nn = INSN(9,5);
2403 UInt dd = INSN(4,0);
2404 if (is64) {
2405 putIReg64orZR(
2406 dd,
2407 binop(isAdd ? Iop_Add64 : Iop_Sub64,
2408 getIReg64orZR(aa),
2409 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
2410 } else {
2411 putIReg32orZR(
2412 dd,
2413 binop(isAdd ? Iop_Add32 : Iop_Sub32,
2414 getIReg32orZR(aa),
2415 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
2416 }
2417 DIP("%s %s, %s, %s, %s\n",
2418 isAdd ? "madd" : "msub",
2419 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
2420 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
2421 return True;
2422 }
2423
2424 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
2425 /* 31 30 28 20 15 11 9 4
2426 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
2427 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
2428 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
2429 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
2430 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
2431 */
2432 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
2433 Bool is64 = INSN(31,31) == 1;
2434 UInt b30 = INSN(30,30);
2435 UInt mm = INSN(20,16);
2436 UInt cond = INSN(15,12);
2437 UInt b10 = INSN(10,10);
2438 UInt nn = INSN(9,5);
2439 UInt dd = INSN(4,0);
2440 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
2441 IRType ty = is64 ? Ity_I64 : Ity_I32;
2442 IRExpr* argL = getIRegOrZR(is64, nn);
2443 IRExpr* argR = getIRegOrZR(is64, mm);
2444 switch (op) {
2445 case BITS2(0,0):
2446 break;
2447 case BITS2(0,1):
2448 argR = binop(mkADD(ty), argR, mkU(ty,1));
2449 break;
2450 case BITS2(1,0):
2451 argR = unop(mkNOT(ty), argR);
2452 break;
2453 case BITS2(1,1):
2454 argR = binop(mkSUB(ty), mkU(ty,0), argR);
2455 break;
2456 default:
2457 vassert(0);
2458 }
2459 putIRegOrZR(
2460 is64, dd,
2461 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
2462 argL, argR)
2463 );
2464 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
2465 DIP("%s %s, %s, %s, %s\n", op_nm[op],
2466 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
2467 nameIRegOrZR(is64, mm), nameCC(cond));
2468 return True;
2469 }
2470
2471 /* -------------- ADD/SUB(extended reg) -------------- */
2472 /* 28 20 15 12 9 4
2473 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
2474 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
2475
2476 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
2477 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
2478
2479 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
2480 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
2481
2482 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
2483 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
2484
2485 The 'm' operand is extended per opt, thusly:
2486
2487 000 Xm & 0xFF UXTB
2488 001 Xm & 0xFFFF UXTH
2489 010 Xm & (2^32)-1 UXTW
2490 011 Xm UXTX
2491
2492 100 Xm sx from bit 7 SXTB
2493 101 Xm sx from bit 15 SXTH
2494 110 Xm sx from bit 31 SXTW
2495 111 Xm SXTX
2496
2497 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
2498 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
2499 are the identity operation on Wm.
2500
2501 After extension, the value is shifted left by imm3 bits, which
2502 may only be in the range 0 .. 4 inclusive.
2503 */
2504 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
2505 Bool is64 = INSN(31,31) == 1;
2506 Bool isSub = INSN(30,30) == 1;
2507 Bool setCC = INSN(29,29) == 1;
2508 UInt mm = INSN(20,16);
2509 UInt opt = INSN(15,13);
2510 UInt imm3 = INSN(12,10);
2511 UInt nn = INSN(9,5);
2512 UInt dd = INSN(4,0);
2513 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
2514 "sxtb", "sxth", "sxtw", "sxtx" };
2515 /* Do almost the same thing in the 32- and 64-bit cases. */
2516 IRTemp xN = newTemp(Ity_I64);
2517 IRTemp xM = newTemp(Ity_I64);
2518 assign(xN, getIReg64orSP(nn));
2519 assign(xM, getIReg64orZR(mm));
2520 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
2521 Int shSX = 0;
2522 /* widen Xm .. */
2523 switch (opt) {
2524 case BITS3(0,0,0): // UXTB
2525 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
2526 case BITS3(0,0,1): // UXTH
2527 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
2528 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
2529 if (is64) {
2530 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
2531 }
2532 break;
2533 case BITS3(0,1,1): // UXTX -- always a noop
2534 break;
2535 case BITS3(1,0,0): // SXTB
2536 shSX = 56; goto sxTo64;
2537 case BITS3(1,0,1): // SXTH
2538 shSX = 48; goto sxTo64;
2539 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
2540 if (is64) {
2541 shSX = 32; goto sxTo64;
2542 }
2543 break;
2544 case BITS3(1,1,1): // SXTX -- always a noop
2545 break;
2546 sxTo64:
2547 vassert(shSX >= 32);
2548 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
2549 mkU8(shSX));
2550 break;
2551 default:
2552 vassert(0);
2553 }
2554 /* and now shift */
2555 IRTemp argL = xN;
2556 IRTemp argR = newTemp(Ity_I64);
2557 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
2558 IRTemp res = newTemp(Ity_I64);
2559 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2560 mkexpr(argL), mkexpr(argR)));
2561 if (is64) {
2562 if (setCC) {
2563 putIReg64orZR(dd, mkexpr(res));
2564 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2565 } else {
2566 putIReg64orSP(dd, mkexpr(res));
2567 }
2568 } else {
2569 if (setCC) {
2570 IRTemp argL32 = newTemp(Ity_I32);
2571 IRTemp argR32 = newTemp(Ity_I32);
2572 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
2573 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
2574 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
2575 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
2576 } else {
2577 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
2578 }
2579 }
2580 DIP("%s%s %s, %s, %s %s lsl %u\n",
2581 isSub ? "sub" : "add", setCC ? "s" : "",
2582 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
2583 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
2584 nameExt[opt], imm3);
2585 return True;
2586 }
2587
2588 /* ---------------- CCMP/CCMN(imm) ---------------- */
2589 /* Bizarrely, these appear in the "data processing register"
2590 category, even though they are operations against an
2591 immediate. */
2592 /* 31 29 20 15 11 9 3
2593 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
2594 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
2595
2596 Operation is:
2597 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
2598 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
2599 */
2600 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
2601 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
2602 Bool is64 = INSN(31,31) == 1;
2603 Bool isSUB = INSN(30,30) == 1;
2604 UInt imm5 = INSN(20,16);
2605 UInt cond = INSN(15,12);
2606 UInt nn = INSN(9,5);
2607 UInt nzcv = INSN(3,0);
2608
2609 IRTemp condT = newTemp(Ity_I1);
2610 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
2611
2612 IRType ty = is64 ? Ity_I64 : Ity_I32;
2613 IRTemp argL = newTemp(ty);
2614 IRTemp argR = newTemp(ty);
2615
2616 if (is64) {
2617 assign(argL, getIReg64orZR(nn));
2618 assign(argR, mkU64(imm5));
2619 } else {
2620 assign(argL, getIReg32orZR(nn));
2621 assign(argR, mkU32(imm5));
2622 }
2623 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
2624
2625 DIP("ccm%c %s, #%u, #%u, %s\n",
2626 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
2627 imm5, nzcv, nameCC(cond));
2628 return True;
2629 }
2630
2631 /* ---------------- CCMP/CCMN(reg) ---------------- */
2632 /* 31 29 20 15 11 9 3
2633 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
2634 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
2635 Operation is:
2636 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
2637 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
2638 */
2639 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
2640 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
2641 Bool is64 = INSN(31,31) == 1;
2642 Bool isSUB = INSN(30,30) == 1;
2643 UInt mm = INSN(20,16);
2644 UInt cond = INSN(15,12);
2645 UInt nn = INSN(9,5);
2646 UInt nzcv = INSN(3,0);
2647
2648 IRTemp condT = newTemp(Ity_I1);
2649 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
2650
2651 IRType ty = is64 ? Ity_I64 : Ity_I32;
2652 IRTemp argL = newTemp(ty);
2653 IRTemp argR = newTemp(ty);
2654
2655 if (is64) {
2656 assign(argL, getIReg64orZR(nn));
2657 assign(argR, getIReg64orZR(mm));
2658 } else {
2659 assign(argL, getIReg32orZR(nn));
2660 assign(argR, getIReg32orZR(mm));
2661 }
2662 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
2663
2664 DIP("ccm%c %s, %s, #%u, %s\n",
2665 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
2666 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
2667 return True;
2668 }
2669
2670
2671 /* -------------- REV/REV16/REV32/RBIT -------------- */
2672 /* 31 30 28 20 15 11 9 4
2673
sewardj32d86752014-03-02 12:47:18 +00002674 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
2675 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00002676
sewardj32d86752014-03-02 12:47:18 +00002677 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
2678 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00002679
sewardjdc9259c2014-02-27 11:10:19 +00002680 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
2681 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00002682
sewardjdc9259c2014-02-27 11:10:19 +00002683 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
sewardjbbcf1882014-01-12 12:49:10 +00002684 */
sewardjbbcf1882014-01-12 12:49:10 +00002685 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
sewardjdc9259c2014-02-27 11:10:19 +00002686 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
2687 UInt b31 = INSN(31,31);
2688 UInt opc = INSN(11,10);
2689
2690 UInt ix = 0;
2691 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
2692 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
2693 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
2694 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
2695 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
2696 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
2697 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
sewardj32d86752014-03-02 12:47:18 +00002698 if (ix >= 1 && ix <= 7) {
2699 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
sewardjdc9259c2014-02-27 11:10:19 +00002700 UInt nn = INSN(9,5);
2701 UInt dd = INSN(4,0);
2702 IRTemp src = newTemp(Ity_I64);
2703 IRTemp dst = IRTemp_INVALID;
sewardj32d86752014-03-02 12:47:18 +00002704 IRTemp (*math)(IRTemp) = NULL;
2705 switch (ix) {
2706 case 1: case 2: math = math_BYTESWAP64; break;
2707 case 3: case 4: math = math_BITSWAP64; break;
2708 case 5: case 6: math = math_USHORTSWAP64; break;
2709 case 7: math = math_UINTSWAP64; break;
2710 default: vassert(0);
2711 }
2712 const HChar* names[7]
2713 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
2714 const HChar* nm = names[ix-1];
2715 vassert(math);
2716 if (ix == 6) {
2717 /* This has to be special cased, since the logic below doesn't
2718 handle it correctly. */
sewardjdc9259c2014-02-27 11:10:19 +00002719 assign(src, getIReg64orZR(nn));
sewardj32d86752014-03-02 12:47:18 +00002720 dst = math(src);
2721 putIReg64orZR(dd,
2722 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
2723 } else if (is64) {
2724 assign(src, getIReg64orZR(nn));
2725 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00002726 putIReg64orZR(dd, mkexpr(dst));
2727 } else {
2728 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
sewardj32d86752014-03-02 12:47:18 +00002729 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00002730 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
2731 }
sewardj32d86752014-03-02 12:47:18 +00002732 DIP("%s %s, %s\n", nm,
sewardjdc9259c2014-02-27 11:10:19 +00002733 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
2734 return True;
sewardjbbcf1882014-01-12 12:49:10 +00002735 }
sewardjdc9259c2014-02-27 11:10:19 +00002736 /* else fall through */
sewardjbbcf1882014-01-12 12:49:10 +00002737 }
2738
2739 /* -------------------- CLZ/CLS -------------------- */
2740 /* 30 28 24 20 15 9 4
2741 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
2742 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
2743 */
2744 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
2745 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
2746 Bool is64 = INSN(31,31) == 1;
2747 Bool isCLS = INSN(10,10) == 1;
2748 UInt nn = INSN(9,5);
2749 UInt dd = INSN(4,0);
2750 IRTemp src = newTemp(Ity_I64);
2751 IRTemp dst = newTemp(Ity_I64);
2752 if (!isCLS) { // CLS not yet supported
2753 if (is64) {
2754 assign(src, getIReg64orZR(nn));
2755 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
2756 mkU64(64),
2757 unop(Iop_Clz64, mkexpr(src))));
2758 putIReg64orZR(dd, mkexpr(dst));
2759 } else {
2760 assign(src, binop(Iop_Shl64,
2761 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
2762 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
2763 mkU64(32),
2764 unop(Iop_Clz64, mkexpr(src))));
2765 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
2766 }
2767 DIP("cl%c %s, %s\n",
2768 isCLS ? 's' : 'z', nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
2769 return True;
2770 }
2771 }
2772
2773 /* -------------------- LSLV/LSRV/ASRV -------------------- */
2774 /* 30 28 20 15 11 9 4
2775 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
2776 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
2777 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
2778 */
2779 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
2780 && INSN(15,12) == BITS4(0,0,1,0) && INSN(11,10) < BITS2(1,1)) {
2781 Bool is64 = INSN(31,31) == 1;
2782 UInt mm = INSN(20,16);
2783 UInt op = INSN(11,10);
2784 UInt nn = INSN(9,5);
2785 UInt dd = INSN(4,0);
2786 IRType ty = is64 ? Ity_I64 : Ity_I32;
2787 IRTemp srcL = newTemp(ty);
2788 IRTemp srcR = newTemp(Ity_I8);
2789 IRTemp res = newTemp(ty);
2790 IROp iop = Iop_INVALID;
2791 assign(srcL, getIRegOrZR(is64, nn));
2792 assign(srcR,
2793 unop(Iop_64to8,
2794 binop(Iop_And64,
2795 getIReg64orZR(mm), mkU64(is64 ? 63 : 31))));
2796 switch (op) {
2797 case BITS2(0,0): iop = mkSHL(ty); break;
2798 case BITS2(0,1): iop = mkSHR(ty); break;
2799 case BITS2(1,0): iop = mkSAR(ty); break;
2800 default: vassert(0);
2801 }
2802 assign(res, binop(iop, mkexpr(srcL), mkexpr(srcR)));
2803 putIRegOrZR(is64, dd, mkexpr(res));
2804 vassert(op < 3);
2805 const HChar* names[3] = { "lslv", "lsrv", "asrv" };
2806 DIP("%s %s, %s, %s\n",
2807 names[op], nameIRegOrZR(is64,dd),
2808 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
2809 return True;
2810 }
2811
2812 /* -------------------- SDIV/UDIV -------------------- */
2813 /* 30 28 20 15 10 9 4
2814 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
2815 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
2816 */
2817 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
2818 && INSN(15,11) == BITS5(0,0,0,0,1)) {
2819 Bool is64 = INSN(31,31) == 1;
2820 UInt mm = INSN(20,16);
2821 Bool isS = INSN(10,10) == 1;
2822 UInt nn = INSN(9,5);
2823 UInt dd = INSN(4,0);
2824 if (isS) {
2825 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
2826 getIRegOrZR(is64, nn),
2827 getIRegOrZR(is64, mm)));
2828 } else {
2829 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
2830 getIRegOrZR(is64, nn),
2831 getIRegOrZR(is64, mm)));
2832 }
2833 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
2834 nameIRegOrZR(is64, dd),
2835 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
2836 return True;
2837 }
2838
2839 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
2840 /* 31 23 20 15 14 9 4
2841 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
2842 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
2843 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
2844 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
2845 with operation
2846 Xd = Xa +/- (Wn *u/s Wm)
2847 */
2848 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
2849 Bool isU = INSN(23,23) == 1;
2850 UInt mm = INSN(20,16);
2851 Bool isAdd = INSN(15,15) == 0;
2852 UInt aa = INSN(14,10);
2853 UInt nn = INSN(9,5);
2854 UInt dd = INSN(4,0);
2855 IRTemp wN = newTemp(Ity_I32);
2856 IRTemp wM = newTemp(Ity_I32);
2857 IRTemp xA = newTemp(Ity_I64);
2858 IRTemp muld = newTemp(Ity_I64);
2859 IRTemp res = newTemp(Ity_I64);
2860 assign(wN, getIReg32orZR(nn));
2861 assign(wM, getIReg32orZR(mm));
2862 assign(xA, getIReg64orZR(aa));
2863 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
2864 mkexpr(wN), mkexpr(wM)));
2865 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
2866 mkexpr(xA), mkexpr(muld)));
2867 putIReg64orZR(dd, mkexpr(res));
2868 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
2869 nameIReg64orZR(dd), nameIReg32orZR(nn),
2870 nameIReg32orZR(mm), nameIReg64orZR(aa));
2871 return True;
2872 }
2873 vex_printf("ARM64 front end: data_processing_register\n");
2874 return False;
2875# undef INSN
2876}
2877
2878
2879/*------------------------------------------------------------*/
2880/*--- Load and Store instructions ---*/
2881/*------------------------------------------------------------*/
2882
2883/* Generate the EA for a "reg + reg" style amode. This is done from
2884 parts of the insn, but for sanity checking sake it takes the whole
2885 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
2886 and S=insn[12]:
2887
2888 The possible forms, along with their opt:S values, are:
2889 011:0 Xn|SP + Xm
2890 111:0 Xn|SP + Xm
2891 011:1 Xn|SP + Xm * transfer_szB
2892 111:1 Xn|SP + Xm * transfer_szB
2893 010:0 Xn|SP + 32Uto64(Wm)
2894 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
2895 110:0 Xn|SP + 32Sto64(Wm)
2896 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
2897
2898 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
2899 the transfer size is insn[23,31,30]. For integer loads/stores,
2900 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
2901
2902 If the decoding fails, it returns IRTemp_INVALID.
2903
2904 isInt is True iff this is decoding is for transfers to/from integer
2905 registers. If False it is for transfers to/from vector registers.
2906*/
2907static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
2908{
2909 UInt optS = SLICE_UInt(insn, 15, 12);
2910 UInt mm = SLICE_UInt(insn, 20, 16);
2911 UInt nn = SLICE_UInt(insn, 9, 5);
2912 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
2913 | SLICE_UInt(insn, 31, 30); // Log2 of the size
2914
2915 buf[0] = 0;
2916
2917 /* Sanity checks, that this really is a load/store insn. */
2918 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
2919 goto fail;
2920
2921 if (isInt
2922 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
2923 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
2924 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
2925 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
2926 goto fail;
2927
2928 if (!isInt
2929 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
2930 goto fail;
2931
2932 /* Throw out non-verified but possibly valid cases. */
2933 switch (szLg2) {
2934 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
2935 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
2936 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
2937 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
2938 case BITS3(1,0,0): // can only ever be valid for the vector case
2939 if (isInt) goto fail; else goto fail;
2940 case BITS3(1,0,1): // these sizes are never valid
2941 case BITS3(1,1,0):
2942 case BITS3(1,1,1): goto fail;
2943
2944 default: vassert(0);
2945 }
2946
2947 IRExpr* rhs = NULL;
2948 switch (optS) {
2949 case BITS4(1,1,1,0): goto fail; //ATC
2950 case BITS4(0,1,1,0):
2951 rhs = getIReg64orZR(mm);
2952 vex_sprintf(buf, "[%s, %s]",
2953 nameIReg64orZR(nn), nameIReg64orZR(mm));
2954 break;
2955 case BITS4(1,1,1,1): goto fail; //ATC
2956 case BITS4(0,1,1,1):
2957 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
2958 vex_sprintf(buf, "[%s, %s lsl %u]",
2959 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
2960 break;
2961 case BITS4(0,1,0,0):
2962 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
2963 vex_sprintf(buf, "[%s, %s uxtx]",
2964 nameIReg64orZR(nn), nameIReg32orZR(mm));
2965 break;
2966 case BITS4(0,1,0,1):
2967 rhs = binop(Iop_Shl64,
2968 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
2969 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
2970 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
2971 break;
2972 case BITS4(1,1,0,0):
2973 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
2974 vex_sprintf(buf, "[%s, %s sxtx]",
2975 nameIReg64orZR(nn), nameIReg32orZR(mm));
2976 break;
2977 case BITS4(1,1,0,1):
2978 rhs = binop(Iop_Shl64,
2979 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
2980 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
2981 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
2982 break;
2983 default:
2984 /* The rest appear to be genuinely invalid */
2985 goto fail;
2986 }
2987
2988 vassert(rhs);
2989 IRTemp res = newTemp(Ity_I64);
2990 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
2991 return res;
2992
2993 fail:
2994 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
2995 return IRTemp_INVALID;
2996}
2997
2998
2999/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
3000 bits of DATAE :: Ity_I64. */
3001static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
3002{
3003 IRExpr* addrE = mkexpr(addr);
3004 switch (szB) {
3005 case 8:
3006 storeLE(addrE, dataE);
3007 break;
3008 case 4:
3009 storeLE(addrE, unop(Iop_64to32, dataE));
3010 break;
3011 case 2:
3012 storeLE(addrE, unop(Iop_64to16, dataE));
3013 break;
3014 case 1:
3015 storeLE(addrE, unop(Iop_64to8, dataE));
3016 break;
3017 default:
3018 vassert(0);
3019 }
3020}
3021
3022
3023/* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
3024 placing the result in an Ity_I64 temporary. */
3025static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
3026{
3027 IRTemp res = newTemp(Ity_I64);
3028 IRExpr* addrE = mkexpr(addr);
3029 switch (szB) {
3030 case 8:
3031 assign(res, loadLE(Ity_I64,addrE));
3032 break;
3033 case 4:
3034 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
3035 break;
3036 case 2:
3037 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
3038 break;
3039 case 1:
3040 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
3041 break;
3042 default:
3043 vassert(0);
3044 }
3045 return res;
3046}
3047
3048
3049static
3050Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
3051{
3052# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
3053
3054 /* ------------ LDR,STR (immediate, uimm12) ----------- */
3055 /* uimm12 is scaled by the transfer size
3056
3057 31 29 26 21 9 4
3058 | | | | | |
3059 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
3060 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
3061
3062 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
3063 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
3064
3065 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
3066 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
3067
3068 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
3069 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
3070 */
3071 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
3072 UInt szLg2 = INSN(31,30);
3073 UInt szB = 1 << szLg2;
3074 Bool isLD = INSN(22,22) == 1;
3075 UInt offs = INSN(21,10) * szB;
3076 UInt nn = INSN(9,5);
3077 UInt tt = INSN(4,0);
3078 IRTemp ta = newTemp(Ity_I64);
3079 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
3080 if (nn == 31) { /* FIXME generate stack alignment check */ }
3081 vassert(szLg2 < 4);
3082 if (isLD) {
3083 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
3084 } else {
3085 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
3086 }
3087 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
3088 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
3089 DIP("%s %s, [%s, #%u]\n",
3090 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
3091 nameIReg64orSP(nn), offs);
3092 return True;
3093 }
3094
3095 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
3096 /*
3097 31 29 26 20 11 9 4
3098 | | | | | | |
3099 (at-Rn-then-Rn=EA) | | |
3100 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
3101 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
3102
3103 (at-EA-then-Rn=EA)
3104 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
3105 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
3106
3107 (at-EA)
3108 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
3109 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
3110
3111 simm9 is unscaled.
3112
3113 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
3114 load case this is because would create two competing values for
3115 Rt. In the store case the reason is unclear, but the spec
3116 disallows it anyway.
3117
3118 Stores are narrowing, loads are unsigned widening. sz encodes
3119 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
3120 */
3121 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
3122 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
3123 UInt szLg2 = INSN(31,30);
3124 UInt szB = 1 << szLg2;
3125 Bool isLoad = INSN(22,22) == 1;
3126 UInt imm9 = INSN(20,12);
3127 UInt nn = INSN(9,5);
3128 UInt tt = INSN(4,0);
3129 Bool wBack = INSN(10,10) == 1;
3130 UInt how = INSN(11,10);
3131 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
3132 /* undecodable; fall through */
3133 } else {
3134 if (nn == 31) { /* FIXME generate stack alignment check */ }
3135
3136 // Compute the transfer address TA and the writeback address WA.
3137 IRTemp tRN = newTemp(Ity_I64);
3138 assign(tRN, getIReg64orSP(nn));
3139 IRTemp tEA = newTemp(Ity_I64);
3140 Long simm9 = (Long)sx_to_64(imm9, 9);
3141 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3142
3143 IRTemp tTA = newTemp(Ity_I64);
3144 IRTemp tWA = newTemp(Ity_I64);
3145 switch (how) {
3146 case BITS2(0,1):
3147 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3148 case BITS2(1,1):
3149 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3150 case BITS2(0,0):
3151 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3152 default:
3153 vassert(0); /* NOTREACHED */
3154 }
3155
sewardje0bff8b2014-03-09 09:40:23 +00003156 /* Normally rN would be updated after the transfer. However, in
3157 the special case typifed by
3158 str x30, [sp,#-16]!
3159 it is necessary to update SP before the transfer, (1)
3160 because Memcheck will otherwise complain about a write
3161 below the stack pointer, and (2) because the segfault
3162 stack extension mechanism will otherwise extend the stack
3163 only down to SP before the instruction, which might not be
3164 far enough, if the -16 bit takes the actual access
3165 address to the next page.
3166 */
3167 Bool earlyWBack
3168 = wBack && simm9 < 0 && szB == 8
3169 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
3170
3171 if (wBack && earlyWBack)
3172 putIReg64orSP(nn, mkexpr(tEA));
3173
sewardjbbcf1882014-01-12 12:49:10 +00003174 if (isLoad) {
3175 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
3176 } else {
3177 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
3178 }
3179
sewardje0bff8b2014-03-09 09:40:23 +00003180 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00003181 putIReg64orSP(nn, mkexpr(tEA));
3182
3183 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
3184 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
3185 const HChar* fmt_str = NULL;
3186 switch (how) {
3187 case BITS2(0,1):
3188 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3189 break;
3190 case BITS2(1,1):
3191 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3192 break;
3193 case BITS2(0,0):
3194 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
3195 break;
3196 default:
3197 vassert(0);
3198 }
3199 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
3200 nameIRegOrZR(szB == 8, tt),
3201 nameIReg64orSP(nn), simm9);
3202 return True;
3203 }
3204 }
3205
3206 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
3207 /* L==1 => mm==LD
3208 L==0 => mm==ST
3209 x==0 => 32 bit transfers, and zero extended loads
3210 x==1 => 64 bit transfers
3211 simm7 is scaled by the (single-register) transfer size
3212
3213 (at-Rn-then-Rn=EA)
3214 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
3215
3216 (at-EA-then-Rn=EA)
3217 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
3218
3219 (at-EA)
3220 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
3221 */
3222
3223 UInt insn_30_23 = INSN(30,23);
3224 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
3225 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
3226 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
3227 UInt bL = INSN(22,22);
3228 UInt bX = INSN(31,31);
3229 UInt bWBack = INSN(23,23);
3230 UInt rT1 = INSN(4,0);
3231 UInt rN = INSN(9,5);
3232 UInt rT2 = INSN(14,10);
3233 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
3234 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
3235 || (bL && rT1 == rT2)) {
3236 /* undecodable; fall through */
3237 } else {
3238 if (rN == 31) { /* FIXME generate stack alignment check */ }
3239
3240 // Compute the transfer address TA and the writeback address WA.
3241 IRTemp tRN = newTemp(Ity_I64);
3242 assign(tRN, getIReg64orSP(rN));
3243 IRTemp tEA = newTemp(Ity_I64);
3244 simm7 = (bX ? 8 : 4) * simm7;
3245 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
3246
3247 IRTemp tTA = newTemp(Ity_I64);
3248 IRTemp tWA = newTemp(Ity_I64);
3249 switch (INSN(24,23)) {
3250 case BITS2(0,1):
3251 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3252 case BITS2(1,1):
3253 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3254 case BITS2(1,0):
3255 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3256 default:
3257 vassert(0); /* NOTREACHED */
3258 }
3259
3260 /* Normally rN would be updated after the transfer. However, in
3261 the special case typifed by
3262 stp x29, x30, [sp,#-112]!
3263 it is necessary to update SP before the transfer, (1)
3264 because Memcheck will otherwise complain about a write
3265 below the stack pointer, and (2) because the segfault
3266 stack extension mechanism will otherwise extend the stack
3267 only down to SP before the instruction, which might not be
3268 far enough, if the -112 bit takes the actual access
3269 address to the next page.
3270 */
3271 Bool earlyWBack
3272 = bWBack && simm7 < 0
3273 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
3274
3275 if (bWBack && earlyWBack)
3276 putIReg64orSP(rN, mkexpr(tEA));
3277
3278 /**/ if (bL == 1 && bX == 1) {
3279 // 64 bit load
3280 putIReg64orZR(rT1, loadLE(Ity_I64,
3281 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3282 putIReg64orZR(rT2, loadLE(Ity_I64,
3283 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
3284 } else if (bL == 1 && bX == 0) {
sewardjbbcf1882014-01-12 12:49:10 +00003285 // 32 bit load
3286 putIReg32orZR(rT1, loadLE(Ity_I32,
3287 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3288 putIReg32orZR(rT2, loadLE(Ity_I32,
3289 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
3290 } else if (bL == 0 && bX == 1) {
3291 // 64 bit store
3292 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3293 getIReg64orZR(rT1));
3294 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
3295 getIReg64orZR(rT2));
3296 } else {
3297 vassert(bL == 0 && bX == 0);
sewardjbbcf1882014-01-12 12:49:10 +00003298 // 32 bit store
3299 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3300 getIReg32orZR(rT1));
3301 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
3302 getIReg32orZR(rT2));
3303 }
3304
3305 if (bWBack && !earlyWBack)
3306 putIReg64orSP(rN, mkexpr(tEA));
3307
3308 const HChar* fmt_str = NULL;
3309 switch (INSN(24,23)) {
3310 case BITS2(0,1):
3311 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3312 break;
3313 case BITS2(1,1):
3314 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3315 break;
3316 case BITS2(1,0):
3317 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
3318 break;
3319 default:
3320 vassert(0);
3321 }
3322 DIP(fmt_str, bL == 0 ? "st" : "ld",
3323 nameIRegOrZR(bX == 1, rT1),
3324 nameIRegOrZR(bX == 1, rT2),
3325 nameIReg64orSP(rN), simm7);
3326 return True;
3327 }
3328 }
3329
3330 /* ---------------- LDR (literal, int reg) ---------------- */
3331 /* 31 29 23 4
3332 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
3333 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
3334 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
3335 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
3336 Just handles the first two cases for now.
3337 */
3338 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
3339 UInt imm19 = INSN(23,5);
3340 UInt rT = INSN(4,0);
3341 UInt bX = INSN(30,30);
3342 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
3343 if (bX) {
3344 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
3345 } else {
3346 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
3347 }
3348 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
3349 return True;
3350 }
3351
3352 /* -------------- {LD,ST}R (integer register) --------------- */
3353 /* 31 29 20 15 12 11 9 4
3354 | | | | | | | |
3355 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
3356 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
3357 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
3358 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
3359
3360 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
3361 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
3362 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
3363 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
3364 */
3365 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
3366 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3367 HChar dis_buf[64];
3368 UInt szLg2 = INSN(31,30);
3369 Bool isLD = INSN(22,22) == 1;
3370 UInt tt = INSN(4,0);
3371 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
3372 if (ea != IRTemp_INVALID) {
3373 switch (szLg2) {
3374 case 3: /* 64 bit */
3375 if (isLD) {
3376 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
3377 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
3378 } else {
3379 storeLE(mkexpr(ea), getIReg64orZR(tt));
3380 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
3381 }
3382 break;
3383 case 2: /* 32 bit */
3384 if (isLD) {
3385 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
3386 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
3387 } else {
3388 storeLE(mkexpr(ea), getIReg32orZR(tt));
3389 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
3390 }
3391 break;
3392 case 1: /* 16 bit */
3393 if (isLD) {
3394 putIReg64orZR(tt, unop(Iop_16Uto64,
3395 loadLE(Ity_I16, mkexpr(ea))));
3396 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3397 } else {
3398 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
3399 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3400 }
3401 break;
3402 case 0: /* 8 bit */
3403 if (isLD) {
3404 putIReg64orZR(tt, unop(Iop_8Uto64,
3405 loadLE(Ity_I8, mkexpr(ea))));
3406 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
3407 } else {
3408 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
3409 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
3410 }
3411 break;
3412 default:
3413 vassert(0);
3414 }
3415 return True;
3416 }
3417 }
3418
3419 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
3420 /* 31 29 26 23 21 9 4
3421 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
3422 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
3423 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
3424 where
3425 Rt is Wt when x==1, Xt when x==0
3426 */
3427 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
3428 /* Further checks on bits 31:30 and 22 */
3429 Bool valid = False;
3430 switch ((INSN(31,30) << 1) | INSN(22,22)) {
3431 case BITS3(1,0,0):
3432 case BITS3(0,1,0): case BITS3(0,1,1):
3433 case BITS3(0,0,0): case BITS3(0,0,1):
3434 valid = True;
3435 break;
3436 }
3437 if (valid) {
3438 UInt szLg2 = INSN(31,30);
3439 UInt bitX = INSN(22,22);
3440 UInt imm12 = INSN(21,10);
3441 UInt nn = INSN(9,5);
3442 UInt tt = INSN(4,0);
3443 UInt szB = 1 << szLg2;
3444 IRExpr* ea = binop(Iop_Add64,
3445 getIReg64orSP(nn), mkU64(imm12 * szB));
3446 switch (szB) {
3447 case 4:
3448 vassert(bitX == 0);
3449 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
3450 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
3451 nameIReg64orSP(nn), imm12 * szB);
3452 break;
3453 case 2:
3454 if (bitX == 1) {
3455 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
3456 } else {
3457 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
3458 }
3459 DIP("ldrsh %s, [%s, #%u]\n",
3460 nameIRegOrZR(bitX == 0, tt),
3461 nameIReg64orSP(nn), imm12 * szB);
3462 break;
3463 case 1:
3464 if (bitX == 1) {
3465 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
3466 } else {
3467 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
3468 }
3469 DIP("ldrsb %s, [%s, #%u]\n",
3470 nameIRegOrZR(bitX == 0, tt),
3471 nameIReg64orSP(nn), imm12 * szB);
3472 break;
3473 default:
3474 vassert(0);
3475 }
3476 return True;
3477 }
3478 /* else fall through */
3479 }
3480
3481 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
3482 /* (at-Rn-then-Rn=EA)
3483 31 29 23 21 20 11 9 4
3484 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
3485 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
3486 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
3487
3488 (at-EA-then-Rn=EA)
3489 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
3490 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
3491 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
3492 where
3493 Rt is Wt when x==1, Xt when x==0
3494 transfer-at-Rn when [11]==0, at EA when [11]==1
3495 */
3496 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3497 && INSN(21,21) == 0 && INSN(10,10) == 1) {
3498 /* Further checks on bits 31:30 and 22 */
3499 Bool valid = False;
3500 switch ((INSN(31,30) << 1) | INSN(22,22)) {
3501 case BITS3(1,0,0): // LDRSW Xt
3502 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
3503 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
3504 valid = True;
3505 break;
3506 }
3507 if (valid) {
3508 UInt szLg2 = INSN(31,30);
3509 UInt imm9 = INSN(20,12);
3510 Bool atRN = INSN(11,11) == 0;
3511 UInt nn = INSN(9,5);
3512 UInt tt = INSN(4,0);
3513 IRTemp tRN = newTemp(Ity_I64);
3514 IRTemp tEA = newTemp(Ity_I64);
3515 IRTemp tTA = IRTemp_INVALID;
3516 ULong simm9 = sx_to_64(imm9, 9);
3517 Bool is64 = INSN(22,22) == 0;
3518 assign(tRN, getIReg64orSP(nn));
3519 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3520 tTA = atRN ? tRN : tEA;
3521 HChar ch = '?';
3522 /* There are 5 cases:
3523 byte load, SX to 64
3524 byte load, SX to 32, ZX to 64
3525 halfword load, SX to 64
3526 halfword load, SX to 32, ZX to 64
3527 word load, SX to 64
3528 The ifs below handle them in the listed order.
3529 */
3530 if (szLg2 == 0) {
3531 ch = 'b';
3532 if (is64) {
3533 putIReg64orZR(tt, unop(Iop_8Sto64,
3534 loadLE(Ity_I8, mkexpr(tTA))));
3535 } else {
3536 putIReg32orZR(tt, unop(Iop_8Sto32,
3537 loadLE(Ity_I8, mkexpr(tTA))));
3538 }
3539 }
3540 else if (szLg2 == 1) {
3541 ch = 'h';
3542 if (is64) {
3543 putIReg64orZR(tt, unop(Iop_16Sto64,
3544 loadLE(Ity_I16, mkexpr(tTA))));
3545 } else {
3546 putIReg32orZR(tt, unop(Iop_16Sto32,
3547 loadLE(Ity_I16, mkexpr(tTA))));
3548 }
3549 }
3550 else if (szLg2 == 2 && is64) {
3551 ch = 'w';
3552 putIReg64orZR(tt, unop(Iop_32Sto64,
3553 loadLE(Ity_I32, mkexpr(tTA))));
3554 }
3555 else {
3556 vassert(0);
3557 }
3558 putIReg64orSP(nn, mkexpr(tEA));
3559 DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
3560 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
3561 return True;
3562 }
3563 /* else fall through */
3564 }
3565
3566 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
3567 /* 31 29 23 21 20 11 9 4
3568 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
3569 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
3570 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
3571 where
3572 Rt is Wt when x==1, Xt when x==0
3573 */
3574 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3575 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
3576 /* Further checks on bits 31:30 and 22 */
3577 Bool valid = False;
3578 switch ((INSN(31,30) << 1) | INSN(22,22)) {
3579 case BITS3(1,0,0): // LDURSW Xt
3580 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
3581 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
3582 valid = True;
3583 break;
3584 }
3585 if (valid) {
3586 UInt szLg2 = INSN(31,30);
3587 UInt imm9 = INSN(20,12);
3588 UInt nn = INSN(9,5);
3589 UInt tt = INSN(4,0);
3590 IRTemp tRN = newTemp(Ity_I64);
3591 IRTemp tEA = newTemp(Ity_I64);
3592 ULong simm9 = sx_to_64(imm9, 9);
3593 Bool is64 = INSN(22,22) == 0;
3594 assign(tRN, getIReg64orSP(nn));
3595 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3596 HChar ch = '?';
3597 /* There are 5 cases:
3598 byte load, SX to 64
3599 byte load, SX to 32, ZX to 64
3600 halfword load, SX to 64
3601 halfword load, SX to 32, ZX to 64
3602 word load, SX to 64
3603 The ifs below handle them in the listed order.
3604 */
3605 if (szLg2 == 0) {
3606 ch = 'b';
3607 if (is64) {
3608 putIReg64orZR(tt, unop(Iop_8Sto64,
3609 loadLE(Ity_I8, mkexpr(tEA))));
3610 } else {
3611 putIReg32orZR(tt, unop(Iop_8Sto32,
3612 loadLE(Ity_I8, mkexpr(tEA))));
3613 }
3614 }
3615 else if (szLg2 == 1) {
3616 ch = 'h';
3617 if (is64) {
3618 putIReg64orZR(tt, unop(Iop_16Sto64,
3619 loadLE(Ity_I16, mkexpr(tEA))));
3620 } else {
3621 putIReg32orZR(tt, unop(Iop_16Sto32,
3622 loadLE(Ity_I16, mkexpr(tEA))));
3623 }
3624 }
3625 else if (szLg2 == 2 && is64) {
3626 ch = 'w';
3627 putIReg64orZR(tt, unop(Iop_32Sto64,
3628 loadLE(Ity_I32, mkexpr(tEA))));
3629 }
3630 else {
3631 vassert(0);
3632 }
3633 DIP("ldurs%c %s, [%s, #%lld]",
3634 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
3635 return True;
3636 }
3637 /* else fall through */
3638 }
3639
3640 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
3641 /* L==1 => mm==LD
3642 L==0 => mm==ST
3643 sz==00 => 32 bit (S) transfers
3644 sz==01 => 64 bit (D) transfers
3645 sz==10 => 128 bit (Q) transfers
3646 sz==11 isn't allowed
3647 simm7 is scaled by the (single-register) transfer size
3648
3649 31 29 22 21 14 9 4
3650 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
3651 (at-Rn-then-Rn=EA)
3652
3653 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
3654 (at-EA-then-Rn=EA)
3655
3656 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
3657 (at-EA)
3658 */
3659
3660 UInt insn_29_23 = INSN(29,23);
3661 if (insn_29_23 == BITS7(1,0,1,1,0,0,1)
3662 || insn_29_23 == BITS7(1,0,1,1,0,1,1)
3663 || insn_29_23 == BITS7(1,0,1,1,0,1,0)) {
3664 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
3665 Bool isLD = INSN(22,22) == 1;
3666 Bool wBack = INSN(23,23) == 1;
3667 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
3668 UInt tt2 = INSN(14,10);
3669 UInt nn = INSN(9,5);
3670 UInt tt1 = INSN(4,0);
3671 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
3672 /* undecodable; fall through */
3673 } else {
3674 if (nn == 31) { /* FIXME generate stack alignment check */ }
3675
3676 // Compute the transfer address TA and the writeback address WA.
3677 UInt szB = 4 << szSlg2; /* szB is the per-register size */
3678 IRTemp tRN = newTemp(Ity_I64);
3679 assign(tRN, getIReg64orSP(nn));
3680 IRTemp tEA = newTemp(Ity_I64);
3681 simm7 = szB * simm7;
3682 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
3683
3684 IRTemp tTA = newTemp(Ity_I64);
3685 IRTemp tWA = newTemp(Ity_I64);
3686 switch (INSN(24,23)) {
3687 case BITS2(0,1):
3688 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3689 case BITS2(1,1):
3690 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3691 case BITS2(1,0):
3692 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3693 default:
3694 vassert(0); /* NOTREACHED */
3695 }
3696
3697 IRType ty = Ity_INVALID;
3698 switch (szB) {
3699 case 4: ty = Ity_F32; break;
3700 case 8: ty = Ity_F64; break;
3701 case 16: ty = Ity_V128; break;
3702 default: vassert(0);
3703 }
3704
sewardje0bff8b2014-03-09 09:40:23 +00003705 /* Normally rN would be updated after the transfer. However, in
sewardj19551432014-05-07 09:20:11 +00003706 the special cases typifed by
sewardje0bff8b2014-03-09 09:40:23 +00003707 stp q0, q1, [sp,#-512]!
sewardj19551432014-05-07 09:20:11 +00003708 stp d0, d1, [sp,#-512]!
3709 stp s0, s1, [sp,#-512]!
sewardje0bff8b2014-03-09 09:40:23 +00003710 it is necessary to update SP before the transfer, (1)
3711 because Memcheck will otherwise complain about a write
3712 below the stack pointer, and (2) because the segfault
3713 stack extension mechanism will otherwise extend the stack
3714 only down to SP before the instruction, which might not be
3715 far enough, if the -512 bit takes the actual access
3716 address to the next page.
3717 */
3718 Bool earlyWBack
sewardj19551432014-05-07 09:20:11 +00003719 = wBack && simm7 < 0
sewardje0bff8b2014-03-09 09:40:23 +00003720 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
3721
3722 if (wBack && earlyWBack)
3723 putIReg64orSP(nn, mkexpr(tEA));
3724
sewardjbbcf1882014-01-12 12:49:10 +00003725 if (isLD) {
sewardj5ba41302014-03-03 08:42:16 +00003726 if (szB < 16) {
3727 putQReg128(tt1, mkV128(0x0000));
3728 }
sewardj606c4ba2014-01-26 19:11:14 +00003729 putQRegLO(tt1,
3730 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
sewardj5ba41302014-03-03 08:42:16 +00003731 if (szB < 16) {
3732 putQReg128(tt2, mkV128(0x0000));
3733 }
sewardj606c4ba2014-01-26 19:11:14 +00003734 putQRegLO(tt2,
3735 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
sewardjbbcf1882014-01-12 12:49:10 +00003736 } else {
3737 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
sewardj606c4ba2014-01-26 19:11:14 +00003738 getQRegLO(tt1, ty));
sewardjbbcf1882014-01-12 12:49:10 +00003739 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
sewardj606c4ba2014-01-26 19:11:14 +00003740 getQRegLO(tt2, ty));
sewardjbbcf1882014-01-12 12:49:10 +00003741 }
3742
sewardje0bff8b2014-03-09 09:40:23 +00003743 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00003744 putIReg64orSP(nn, mkexpr(tEA));
3745
3746 const HChar* fmt_str = NULL;
3747 switch (INSN(24,23)) {
3748 case BITS2(0,1):
3749 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3750 break;
3751 case BITS2(1,1):
3752 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3753 break;
3754 case BITS2(1,0):
3755 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
3756 break;
3757 default:
3758 vassert(0);
3759 }
3760 DIP(fmt_str, isLD ? "ld" : "st",
sewardj606c4ba2014-01-26 19:11:14 +00003761 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
sewardjbbcf1882014-01-12 12:49:10 +00003762 nameIReg64orSP(nn), simm7);
3763 return True;
3764 }
3765 }
3766
3767 /* -------------- {LD,ST}R (vector register) --------------- */
3768 /* 31 29 23 20 15 12 11 9 4
3769 | | | | | | | | |
3770 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
3771 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
3772 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
3773 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
3774 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
3775
3776 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
3777 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
3778 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
3779 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
3780 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
3781 */
3782 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
3783 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3784 HChar dis_buf[64];
3785 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
3786 Bool isLD = INSN(22,22) == 1;
3787 UInt tt = INSN(4,0);
3788 if (szLg2 >= 4) goto after_LDR_STR_vector_register;
3789 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
3790 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
3791 switch (szLg2) {
3792 case 0: /* 8 bit */
3793 if (isLD) {
3794 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00003795 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
3796 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003797 } else {
3798 vassert(0); //ATC
sewardj606c4ba2014-01-26 19:11:14 +00003799 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
3800 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003801 }
3802 break;
3803 case 1:
3804 if (isLD) {
3805 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00003806 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
3807 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003808 } else {
3809 vassert(0); //ATC
sewardj606c4ba2014-01-26 19:11:14 +00003810 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
3811 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003812 }
3813 break;
3814 case 2: /* 32 bit */
3815 if (isLD) {
3816 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00003817 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
3818 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003819 } else {
sewardj606c4ba2014-01-26 19:11:14 +00003820 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
3821 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003822 }
3823 break;
3824 case 3: /* 64 bit */
3825 if (isLD) {
3826 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00003827 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
3828 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003829 } else {
sewardj606c4ba2014-01-26 19:11:14 +00003830 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
3831 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003832 }
3833 break;
3834 case 4: return False; //ATC
3835 default: vassert(0);
3836 }
3837 return True;
3838 }
3839 after_LDR_STR_vector_register:
3840
3841 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
3842 /* 31 29 22 20 15 12 11 9 4
3843 | | | | | | | | |
3844 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
3845
3846 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
3847 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
3848
3849 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
3850 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
3851 */
3852 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3853 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3854 HChar dis_buf[64];
3855 UInt szLg2 = INSN(31,30);
3856 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
3857 UInt tt = INSN(4,0);
3858 if (szLg2 == 3) goto after_LDRS_integer_register;
3859 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
3860 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
3861 /* Enumerate the 5 variants explicitly. */
3862 if (szLg2 == 2/*32 bit*/ && sxTo64) {
3863 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
3864 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
3865 return True;
3866 }
3867 else
3868 if (szLg2 == 1/*16 bit*/) {
3869 if (sxTo64) {
3870 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
3871 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
3872 } else {
3873 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
3874 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3875 }
3876 return True;
3877 }
3878 else
3879 if (szLg2 == 0/*8 bit*/) {
3880 if (sxTo64) {
3881 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
3882 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
3883 } else {
3884 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
3885 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
3886 }
3887 return True;
3888 }
3889 /* else it's an invalid combination */
3890 }
3891 after_LDRS_integer_register:
3892
3893 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
3894 /* This is the Unsigned offset variant only. The Post-Index and
3895 Pre-Index variants are below.
3896
3897 31 29 23 21 9 4
3898 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
3899 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
3900 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
3901 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
3902 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
3903
3904 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
3905 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
3906 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
3907 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
3908 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
3909 */
3910 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
3911 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
3912 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
3913 Bool isLD = INSN(22,22) == 1;
3914 UInt pimm12 = INSN(21,10) << szLg2;
3915 UInt nn = INSN(9,5);
3916 UInt tt = INSN(4,0);
3917 IRTemp tEA = newTemp(Ity_I64);
3918 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
3919 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
3920 if (isLD) {
3921 if (szLg2 < 4) {
3922 putQReg128(tt, mkV128(0x0000));
3923 }
sewardj606c4ba2014-01-26 19:11:14 +00003924 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00003925 } else {
sewardj606c4ba2014-01-26 19:11:14 +00003926 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00003927 }
3928 DIP("%s %s, [%s, #%u]\n",
3929 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00003930 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
sewardjbbcf1882014-01-12 12:49:10 +00003931 return True;
3932 }
3933
3934 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
3935 /* These are the Post-Index and Pre-Index variants.
3936
3937 31 29 23 20 11 9 4
3938 (at-Rn-then-Rn=EA)
3939 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
3940 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
3941 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
3942 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
3943 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
3944
3945 (at-EA-then-Rn=EA)
3946 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
3947 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
3948 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
3949 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
3950 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
3951
3952 Stores are the same except with bit 22 set to 0.
3953 */
3954 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
3955 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
3956 && INSN(21,21) == 0 && INSN(10,10) == 1) {
3957 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
3958 Bool isLD = INSN(22,22) == 1;
3959 UInt imm9 = INSN(20,12);
3960 Bool atRN = INSN(11,11) == 0;
3961 UInt nn = INSN(9,5);
3962 UInt tt = INSN(4,0);
3963 IRTemp tRN = newTemp(Ity_I64);
3964 IRTemp tEA = newTemp(Ity_I64);
3965 IRTemp tTA = IRTemp_INVALID;
3966 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
3967 ULong simm9 = sx_to_64(imm9, 9);
3968 assign(tRN, getIReg64orSP(nn));
3969 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3970 tTA = atRN ? tRN : tEA;
3971 if (isLD) {
3972 if (szLg2 < 4) {
3973 putQReg128(tt, mkV128(0x0000));
3974 }
sewardj606c4ba2014-01-26 19:11:14 +00003975 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
sewardjbbcf1882014-01-12 12:49:10 +00003976 } else {
sewardj606c4ba2014-01-26 19:11:14 +00003977 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00003978 }
3979 putIReg64orSP(nn, mkexpr(tEA));
3980 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
3981 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00003982 nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
sewardjbbcf1882014-01-12 12:49:10 +00003983 return True;
3984 }
3985
3986 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
3987 /* 31 29 23 20 11 9 4
3988 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
3989 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
3990 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
3991 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
3992 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
3993
3994 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
3995 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
3996 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
3997 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
3998 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
3999 */
4000 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
4001 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
4002 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
4003 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
4004 Bool isLD = INSN(22,22) == 1;
4005 UInt imm9 = INSN(20,12);
4006 UInt nn = INSN(9,5);
4007 UInt tt = INSN(4,0);
4008 ULong simm9 = sx_to_64(imm9, 9);
4009 IRTemp tEA = newTemp(Ity_I64);
4010 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
4011 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
4012 if (isLD) {
sewardj606c4ba2014-01-26 19:11:14 +00004013 if (szLg2 < 4) {
4014 putQReg128(tt, mkV128(0x0000));
4015 }
4016 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00004017 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004018 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004019 }
4020 DIP("%s %s, [%s, #%lld]\n",
4021 isLD ? "ldur" : "stur",
sewardj606c4ba2014-01-26 19:11:14 +00004022 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
sewardjbbcf1882014-01-12 12:49:10 +00004023 return True;
4024 }
4025
4026 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
4027 /* 31 29 23 4
4028 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
4029 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
4030 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
4031 */
4032 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
4033 UInt szB = 4 << INSN(31,30);
4034 UInt imm19 = INSN(23,5);
4035 UInt tt = INSN(4,0);
4036 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
4037 IRType ty = preferredVectorSubTypeFromSize(szB);
sewardj606c4ba2014-01-26 19:11:14 +00004038 putQReg128(tt, mkV128(0x0000));
4039 putQRegLO(tt, loadLE(ty, mkU64(ea)));
4040 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
sewardjbbcf1882014-01-12 12:49:10 +00004041 return True;
4042 }
4043
sewardj606c4ba2014-01-26 19:11:14 +00004044 /* ---------- LD1/ST1 (single structure, no offset) ---------- */
sewardjbbcf1882014-01-12 12:49:10 +00004045 /* 31 23
sewardj606c4ba2014-01-26 19:11:14 +00004046 0100 1100 0100 0000 0111 11 N T LD1 {vT.2d}, [Xn|SP]
4047 0100 1100 0000 0000 0111 11 N T ST1 {vT.2d}, [Xn|SP]
4048 0100 1100 0100 0000 0111 10 N T LD1 {vT.4s}, [Xn|SP]
4049 0100 1100 0000 0000 0111 10 N T ST1 {vT.4s}, [Xn|SP]
4050 0100 1100 0100 0000 0111 01 N T LD1 {vT.8h}, [Xn|SP]
4051 0100 1100 0000 0000 0111 01 N T ST1 {vT.8h}, [Xn|SP]
sewardjbbcf1882014-01-12 12:49:10 +00004052 0100 1100 0100 0000 0111 00 N T LD1 {vT.16b}, [Xn|SP]
4053 0100 1100 0000 0000 0111 00 N T ST1 {vT.16b}, [Xn|SP]
sewardj606c4ba2014-01-26 19:11:14 +00004054 FIXME does this assume that the host is little endian?
sewardjbbcf1882014-01-12 12:49:10 +00004055 */
sewardj606c4ba2014-01-26 19:11:14 +00004056 if ( (insn & 0xFFFFF000) == 0x4C407000 // LD1 cases
4057 || (insn & 0xFFFFF000) == 0x4C007000 // ST1 cases
sewardjbbcf1882014-01-12 12:49:10 +00004058 ) {
4059 Bool isLD = INSN(22,22) == 1;
4060 UInt rN = INSN(9,5);
4061 UInt vT = INSN(4,0);
4062 IRTemp tEA = newTemp(Ity_I64);
sewardj606c4ba2014-01-26 19:11:14 +00004063 const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4064 const HChar* name = names[INSN(11,10)];
sewardjbbcf1882014-01-12 12:49:10 +00004065 assign(tEA, getIReg64orSP(rN));
4066 if (rN == 31) { /* FIXME generate stack alignment check */ }
4067 if (isLD) {
4068 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4069 } else {
4070 storeLE(mkexpr(tEA), getQReg128(vT));
4071 }
4072 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
sewardj606c4ba2014-01-26 19:11:14 +00004073 vT, name, nameIReg64orSP(rN));
sewardjbbcf1882014-01-12 12:49:10 +00004074 return True;
4075 }
4076
sewardj606c4ba2014-01-26 19:11:14 +00004077 /* 31 23
4078 0000 1100 0100 0000 0111 11 N T LD1 {vT.1d}, [Xn|SP]
4079 0000 1100 0000 0000 0111 11 N T ST1 {vT.1d}, [Xn|SP]
4080 0000 1100 0100 0000 0111 10 N T LD1 {vT.2s}, [Xn|SP]
4081 0000 1100 0000 0000 0111 10 N T ST1 {vT.2s}, [Xn|SP]
4082 0000 1100 0100 0000 0111 01 N T LD1 {vT.4h}, [Xn|SP]
4083 0000 1100 0000 0000 0111 01 N T ST1 {vT.4h}, [Xn|SP]
4084 0000 1100 0100 0000 0111 00 N T LD1 {vT.8b}, [Xn|SP]
4085 0000 1100 0000 0000 0111 00 N T ST1 {vT.8b}, [Xn|SP]
4086 FIXME does this assume that the host is little endian?
4087 */
4088 if ( (insn & 0xFFFFF000) == 0x0C407000 // LD1 cases
4089 || (insn & 0xFFFFF000) == 0x0C007000 // ST1 cases
4090 ) {
4091 Bool isLD = INSN(22,22) == 1;
4092 UInt rN = INSN(9,5);
4093 UInt vT = INSN(4,0);
4094 IRTemp tEA = newTemp(Ity_I64);
4095 const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4096 const HChar* name = names[INSN(11,10)];
4097 assign(tEA, getIReg64orSP(rN));
4098 if (rN == 31) { /* FIXME generate stack alignment check */ }
4099 if (isLD) {
4100 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4101 putQRegLane(vT, 1, mkU64(0));
4102 } else {
4103 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4104 }
4105 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
4106 vT, name, nameIReg64orSP(rN));
4107 return True;
4108 }
4109
4110 /* ---------- LD1/ST1 (single structure, post index) ---------- */
4111 /* 31 23
sewardj7d009132014-02-20 17:43:38 +00004112 0100 1100 1001 1111 0111 11 N T ST1 {vT.2d}, [xN|SP], #16
4113 0100 1100 1101 1111 0111 11 N T LD1 {vT.2d}, [xN|SP], #16
4114 0100 1100 1001 1111 0111 10 N T ST1 {vT.4s}, [xN|SP], #16
4115 0100 1100 1101 1111 0111 10 N T LD1 {vT.4s}, [xN|SP], #16
4116 0100 1100 1001 1111 0111 01 N T ST1 {vT.8h}, [xN|SP], #16
4117 0100 1100 1101 1111 0111 01 N T LD1 {vT.8h}, [xN|SP], #16
4118 0100 1100 1001 1111 0111 00 N T ST1 {vT.16b}, [xN|SP], #16
sewardjf5b08912014-02-06 12:57:58 +00004119 0100 1100 1101 1111 0111 00 N T LD1 {vT.16b}, [xN|SP], #16
sewardj606c4ba2014-01-26 19:11:14 +00004120 Note that #16 is implied and cannot be any other value.
4121 FIXME does this assume that the host is little endian?
4122 */
sewardj7d009132014-02-20 17:43:38 +00004123 if ( (insn & 0xFFFFF000) == 0x4CDF7000 // LD1 cases
4124 || (insn & 0xFFFFF000) == 0x4C9F7000 // ST1 cases
sewardj606c4ba2014-01-26 19:11:14 +00004125 ) {
4126 Bool isLD = INSN(22,22) == 1;
4127 UInt rN = INSN(9,5);
4128 UInt vT = INSN(4,0);
4129 IRTemp tEA = newTemp(Ity_I64);
4130 const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4131 const HChar* name = names[INSN(11,10)];
4132 assign(tEA, getIReg64orSP(rN));
4133 if (rN == 31) { /* FIXME generate stack alignment check */ }
4134 if (isLD) {
4135 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4136 } else {
4137 storeLE(mkexpr(tEA), getQReg128(vT));
4138 }
4139 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(16)));
4140 DIP("%s {v%u.%s}, [%s], #16\n", isLD ? "ld1" : "st1",
4141 vT, name, nameIReg64orSP(rN));
4142 return True;
4143 }
4144
sewardj950ca7a2014-04-03 23:03:32 +00004145 /* 31 23
4146 0000 1100 1001 1111 0111 11 N T ST1 {vT.1d}, [xN|SP], #8
4147 0000 1100 1101 1111 0111 11 N T LD1 {vT.1d}, [xN|SP], #8
sewardj606c4ba2014-01-26 19:11:14 +00004148 0000 1100 1001 1111 0111 10 N T ST1 {vT.2s}, [xN|SP], #8
sewardj950ca7a2014-04-03 23:03:32 +00004149 0000 1100 1101 1111 0111 10 N T LD1 {vT.2s}, [xN|SP], #8
sewardjf5b08912014-02-06 12:57:58 +00004150 0000 1100 1001 1111 0111 01 N T ST1 {vT.4h}, [xN|SP], #8
sewardj950ca7a2014-04-03 23:03:32 +00004151 0000 1100 1101 1111 0111 01 N T LD1 {vT.4h}, [xN|SP], #8
4152 0000 1100 1001 1111 0111 00 N T ST1 {vT.8b}, [xN|SP], #8
4153 0000 1100 1101 1111 0111 00 N T LD1 {vT.8b}, [xN|SP], #8
sewardj606c4ba2014-01-26 19:11:14 +00004154 Note that #8 is implied and cannot be any other value.
4155 FIXME does this assume that the host is little endian?
4156 */
sewardj950ca7a2014-04-03 23:03:32 +00004157 if ( (insn & 0xFFFFF000) == 0x0CDF7000 // LD1 cases
4158 || (insn & 0xFFFFF000) == 0x0C9F7000 // ST1 cases
sewardj606c4ba2014-01-26 19:11:14 +00004159 ) {
sewardj950ca7a2014-04-03 23:03:32 +00004160 Bool isLD = INSN(22,22) == 1;
sewardj606c4ba2014-01-26 19:11:14 +00004161 UInt rN = INSN(9,5);
4162 UInt vT = INSN(4,0);
4163 IRTemp tEA = newTemp(Ity_I64);
4164 const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4165 const HChar* name = names[INSN(11,10)];
4166 assign(tEA, getIReg64orSP(rN));
4167 if (rN == 31) { /* FIXME generate stack alignment check */ }
sewardj950ca7a2014-04-03 23:03:32 +00004168 if (isLD) {
4169 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4170 putQRegLane(vT, 1, mkU64(0));
4171 } else {
4172 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4173 }
sewardj606c4ba2014-01-26 19:11:14 +00004174 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(8)));
sewardj950ca7a2014-04-03 23:03:32 +00004175 DIP("%s {v%u.%s}, [%s], #8\n", isLD ? "ld1" : "st1",
4176 vT, name, nameIReg64orSP(rN));
4177 return True;
4178 }
4179
4180 /* ---------- LD2/ST2 (multiple structures, post index) ---------- */
4181 /* Only a very few cases. */
4182 /* 31 23 11 9 4
4183 0100 1100 1101 1111 1000 11 n t LD2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4184 0100 1100 1001 1111 1000 11 n t ST2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4185 0100 1100 1101 1111 1000 10 n t LD2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4186 0100 1100 1001 1111 1000 10 n t ST2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4187 */
4188 if ( (insn & 0xFFFFFC00) == 0x4CDF8C00 // LD2 .2d
4189 || (insn & 0xFFFFFC00) == 0x4C9F8C00 // ST2 .2d
4190 || (insn & 0xFFFFFC00) == 0x4CDF8800 // LD2 .4s
4191 || (insn & 0xFFFFFC00) == 0x4C9F8800 // ST2 .4s
4192 ) {
4193 Bool isLD = INSN(22,22) == 1;
4194 UInt rN = INSN(9,5);
4195 UInt vT = INSN(4,0);
4196 IRTemp tEA = newTemp(Ity_I64);
4197 UInt sz = INSN(11,10);
4198 const HChar* name = "??";
4199 assign(tEA, getIReg64orSP(rN));
4200 if (rN == 31) { /* FIXME generate stack alignment check */ }
4201 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4202 IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8));
4203 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4204 IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24));
4205 if (sz == BITS2(1,1)) {
4206 name = "2d";
4207 if (isLD) {
4208 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0));
4209 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_16));
4210 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8));
4211 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_24));
4212 } else {
4213 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64));
4214 storeLE(tEA_16, getQRegLane((vT+0) % 32, 1, Ity_I64));
4215 storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64));
4216 storeLE(tEA_24, getQRegLane((vT+1) % 32, 1, Ity_I64));
4217 }
4218 }
4219 else if (sz == BITS2(1,0)) {
4220 /* Uh, this is ugly. TODO: better. */
4221 name = "4s";
4222 IRExpr* tEA_4 = binop(Iop_Add64, mkexpr(tEA), mkU64(4));
4223 IRExpr* tEA_12 = binop(Iop_Add64, mkexpr(tEA), mkU64(12));
4224 IRExpr* tEA_20 = binop(Iop_Add64, mkexpr(tEA), mkU64(20));
4225 IRExpr* tEA_28 = binop(Iop_Add64, mkexpr(tEA), mkU64(28));
4226 if (isLD) {
4227 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I32, tEA_0));
4228 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I32, tEA_8));
4229 putQRegLane((vT+0) % 32, 2, loadLE(Ity_I32, tEA_16));
4230 putQRegLane((vT+0) % 32, 3, loadLE(Ity_I32, tEA_24));
4231 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I32, tEA_4));
4232 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I32, tEA_12));
4233 putQRegLane((vT+1) % 32, 2, loadLE(Ity_I32, tEA_20));
4234 putQRegLane((vT+1) % 32, 3, loadLE(Ity_I32, tEA_28));
4235 } else {
4236 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I32));
4237 storeLE(tEA_8, getQRegLane((vT+0) % 32, 1, Ity_I32));
4238 storeLE(tEA_16, getQRegLane((vT+0) % 32, 2, Ity_I32));
4239 storeLE(tEA_24, getQRegLane((vT+0) % 32, 3, Ity_I32));
4240 storeLE(tEA_4, getQRegLane((vT+1) % 32, 0, Ity_I32));
4241 storeLE(tEA_12, getQRegLane((vT+1) % 32, 1, Ity_I32));
4242 storeLE(tEA_20, getQRegLane((vT+1) % 32, 2, Ity_I32));
4243 storeLE(tEA_28, getQRegLane((vT+1) % 32, 3, Ity_I32));
4244 }
4245 }
4246 else {
4247 vassert(0); // Can't happen.
4248 }
4249 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32)));
4250 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld2" : "st2",
4251 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
4252 return True;
4253 }
4254
4255 /* ---------- LD1/ST1 (multiple structures, no offset) ---------- */
4256 /* Only a very few cases. */
4257 /* 31 23
4258 0100 1100 0100 0000 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4259 0100 1100 0000 0000 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4260 */
4261 if ( (insn & 0xFFFFFC00) == 0x4C40A000 // LD1
4262 || (insn & 0xFFFFFC00) == 0x4C00A000 // ST1
4263 ) {
4264 Bool isLD = INSN(22,22) == 1;
4265 UInt rN = INSN(9,5);
4266 UInt vT = INSN(4,0);
4267 IRTemp tEA = newTemp(Ity_I64);
4268 const HChar* name = "16b";
4269 assign(tEA, getIReg64orSP(rN));
4270 if (rN == 31) { /* FIXME generate stack alignment check */ }
4271 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4272 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4273 if (isLD) {
4274 putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
4275 putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
4276 } else {
4277 storeLE(tEA_0, getQReg128((vT+0) % 32));
4278 storeLE(tEA_16, getQReg128((vT+1) % 32));
4279 }
4280 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld1" : "st1",
4281 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
sewardj606c4ba2014-01-26 19:11:14 +00004282 return True;
4283 }
4284
sewardj7d009132014-02-20 17:43:38 +00004285 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
4286 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
4287 /* 31 29 23 20 14 9 4
4288 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
4289 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
4290 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
4291 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
sewardjbbcf1882014-01-12 12:49:10 +00004292 */
sewardj7d009132014-02-20 17:43:38 +00004293 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
4294 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
4295 && INSN(14,10) == BITS5(1,1,1,1,1)) {
sewardjdc9259c2014-02-27 11:10:19 +00004296 UInt szBlg2 = INSN(31,30);
4297 Bool isLD = INSN(22,22) == 1;
4298 Bool isAcqOrRel = INSN(15,15) == 1;
4299 UInt ss = INSN(20,16);
4300 UInt nn = INSN(9,5);
4301 UInt tt = INSN(4,0);
sewardjbbcf1882014-01-12 12:49:10 +00004302
sewardjdc9259c2014-02-27 11:10:19 +00004303 vassert(szBlg2 < 4);
4304 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
4305 IRType ty = integerIRTypeOfSize(szB);
4306 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
sewardj7d009132014-02-20 17:43:38 +00004307
sewardjdc9259c2014-02-27 11:10:19 +00004308 IRTemp ea = newTemp(Ity_I64);
4309 assign(ea, getIReg64orSP(nn));
4310 /* FIXME generate check that ea is szB-aligned */
sewardj7d009132014-02-20 17:43:38 +00004311
sewardjdc9259c2014-02-27 11:10:19 +00004312 if (isLD && ss == BITS5(1,1,1,1,1)) {
4313 IRTemp res = newTemp(ty);
4314 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
4315 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
4316 if (isAcqOrRel) {
4317 stmt(IRStmt_MBE(Imbe_Fence));
4318 }
4319 DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
4320 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4321 return True;
4322 }
4323 if (!isLD) {
4324 if (isAcqOrRel) {
4325 stmt(IRStmt_MBE(Imbe_Fence));
4326 }
4327 IRTemp res = newTemp(Ity_I1);
4328 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
4329 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
4330 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
4331 Need to set rS to 1 on failure, 0 on success. */
4332 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
4333 mkU64(1)));
4334 DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
4335 nameIRegOrZR(False, ss),
4336 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4337 return True;
4338 }
4339 /* else fall through */
4340 }
4341
4342 /* ------------------ LDA{R,RH,RB} ------------------ */
4343 /* ------------------ STL{R,RH,RB} ------------------ */
4344 /* 31 29 23 20 14 9 4
4345 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
4346 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
4347 */
4348 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
4349 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
4350 UInt szBlg2 = INSN(31,30);
4351 Bool isLD = INSN(22,22) == 1;
4352 UInt nn = INSN(9,5);
4353 UInt tt = INSN(4,0);
4354
4355 vassert(szBlg2 < 4);
4356 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
4357 IRType ty = integerIRTypeOfSize(szB);
4358 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
4359
4360 IRTemp ea = newTemp(Ity_I64);
4361 assign(ea, getIReg64orSP(nn));
4362 /* FIXME generate check that ea is szB-aligned */
4363
4364 if (isLD) {
4365 IRTemp res = newTemp(ty);
4366 assign(res, loadLE(ty, mkexpr(ea)));
4367 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
4368 stmt(IRStmt_MBE(Imbe_Fence));
4369 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
4370 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4371 } else {
4372 stmt(IRStmt_MBE(Imbe_Fence));
4373 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
4374 storeLE(mkexpr(ea), data);
4375 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
4376 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4377 }
4378 return True;
sewardjbbcf1882014-01-12 12:49:10 +00004379 }
4380
4381 vex_printf("ARM64 front end: load_store\n");
4382 return False;
4383# undef INSN
4384}
4385
4386
4387/*------------------------------------------------------------*/
4388/*--- Control flow and misc instructions ---*/
4389/*------------------------------------------------------------*/
4390
4391static
sewardj65902992014-05-03 21:20:56 +00004392Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
4393 VexArchInfo* archinfo)
sewardjbbcf1882014-01-12 12:49:10 +00004394{
4395# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4396
4397 /* ---------------------- B cond ----------------------- */
4398 /* 31 24 4 3
4399 0101010 0 imm19 0 cond */
4400 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
4401 UInt cond = INSN(3,0);
4402 ULong uimm64 = INSN(23,5) << 2;
4403 Long simm64 = (Long)sx_to_64(uimm64, 21);
4404 vassert(dres->whatNext == Dis_Continue);
4405 vassert(dres->len == 4);
4406 vassert(dres->continueAt == 0);
4407 vassert(dres->jk_StopHere == Ijk_INVALID);
4408 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
4409 Ijk_Boring,
4410 IRConst_U64(guest_PC_curr_instr + simm64),
4411 OFFB_PC) );
4412 putPC(mkU64(guest_PC_curr_instr + 4));
4413 dres->whatNext = Dis_StopHere;
4414 dres->jk_StopHere = Ijk_Boring;
4415 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
4416 return True;
4417 }
4418
4419 /* -------------------- B{L} uncond -------------------- */
4420 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
4421 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
4422 100101 imm26 B (PC + sxTo64(imm26 << 2))
4423 */
4424 UInt bLink = INSN(31,31);
4425 ULong uimm64 = INSN(25,0) << 2;
4426 Long simm64 = (Long)sx_to_64(uimm64, 28);
4427 if (bLink) {
4428 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
4429 }
4430 putPC(mkU64(guest_PC_curr_instr + simm64));
4431 dres->whatNext = Dis_StopHere;
4432 dres->jk_StopHere = Ijk_Call;
4433 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
4434 guest_PC_curr_instr + simm64);
4435 return True;
4436 }
4437
4438 /* --------------------- B{L} reg --------------------- */
4439 /* 31 24 22 20 15 9 4
4440 1101011 00 10 11111 000000 nn 00000 RET Rn
4441 1101011 00 01 11111 000000 nn 00000 CALL Rn
4442 1101011 00 00 11111 000000 nn 00000 JMP Rn
4443 */
4444 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
4445 && INSN(20,16) == BITS5(1,1,1,1,1)
4446 && INSN(15,10) == BITS6(0,0,0,0,0,0)
4447 && INSN(4,0) == BITS5(0,0,0,0,0)) {
4448 UInt branch_type = INSN(22,21);
4449 UInt nn = INSN(9,5);
4450 if (branch_type == BITS2(1,0) /* RET */) {
4451 putPC(getIReg64orZR(nn));
4452 dres->whatNext = Dis_StopHere;
4453 dres->jk_StopHere = Ijk_Ret;
4454 DIP("ret %s\n", nameIReg64orZR(nn));
4455 return True;
4456 }
4457 if (branch_type == BITS2(0,1) /* CALL */) {
sewardj702054e2014-05-07 11:09:28 +00004458 IRTemp dst = newTemp(Ity_I64);
4459 assign(dst, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00004460 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
sewardj702054e2014-05-07 11:09:28 +00004461 putPC(mkexpr(dst));
sewardjbbcf1882014-01-12 12:49:10 +00004462 dres->whatNext = Dis_StopHere;
4463 dres->jk_StopHere = Ijk_Call;
4464 DIP("blr %s\n", nameIReg64orZR(nn));
4465 return True;
4466 }
4467 if (branch_type == BITS2(0,0) /* JMP */) {
4468 putPC(getIReg64orZR(nn));
4469 dres->whatNext = Dis_StopHere;
4470 dres->jk_StopHere = Ijk_Boring;
4471 DIP("jmp %s\n", nameIReg64orZR(nn));
4472 return True;
4473 }
4474 }
4475
4476 /* -------------------- CB{N}Z -------------------- */
4477 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
4478 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
4479 */
4480 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
4481 Bool is64 = INSN(31,31) == 1;
4482 Bool bIfZ = INSN(24,24) == 0;
4483 ULong uimm64 = INSN(23,5) << 2;
4484 UInt rT = INSN(4,0);
4485 Long simm64 = (Long)sx_to_64(uimm64, 21);
4486 IRExpr* cond = NULL;
4487 if (is64) {
4488 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
4489 getIReg64orZR(rT), mkU64(0));
4490 } else {
4491 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
4492 getIReg32orZR(rT), mkU32(0));
4493 }
4494 stmt( IRStmt_Exit(cond,
4495 Ijk_Boring,
4496 IRConst_U64(guest_PC_curr_instr + simm64),
4497 OFFB_PC) );
4498 putPC(mkU64(guest_PC_curr_instr + 4));
4499 dres->whatNext = Dis_StopHere;
4500 dres->jk_StopHere = Ijk_Boring;
4501 DIP("cb%sz %s, 0x%llx\n",
4502 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
4503 guest_PC_curr_instr + simm64);
4504 return True;
4505 }
4506
4507 /* -------------------- TB{N}Z -------------------- */
4508 /* 31 30 24 23 18 5 4
4509 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
4510 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
4511 */
4512 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
4513 UInt b5 = INSN(31,31);
4514 Bool bIfZ = INSN(24,24) == 0;
4515 UInt b40 = INSN(23,19);
4516 UInt imm14 = INSN(18,5);
4517 UInt tt = INSN(4,0);
4518 UInt bitNo = (b5 << 5) | b40;
4519 ULong uimm64 = imm14 << 2;
4520 Long simm64 = sx_to_64(uimm64, 16);
4521 IRExpr* cond
4522 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
4523 binop(Iop_And64,
4524 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
4525 mkU64(1)),
4526 mkU64(0));
4527 stmt( IRStmt_Exit(cond,
4528 Ijk_Boring,
4529 IRConst_U64(guest_PC_curr_instr + simm64),
4530 OFFB_PC) );
4531 putPC(mkU64(guest_PC_curr_instr + 4));
4532 dres->whatNext = Dis_StopHere;
4533 dres->jk_StopHere = Ijk_Boring;
4534 DIP("tb%sz %s, #%u, 0x%llx\n",
4535 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
4536 guest_PC_curr_instr + simm64);
4537 return True;
4538 }
4539
4540 /* -------------------- SVC -------------------- */
4541 /* 11010100 000 imm16 000 01
4542 Don't bother with anything except the imm16==0 case.
4543 */
4544 if (INSN(31,0) == 0xD4000001) {
4545 putPC(mkU64(guest_PC_curr_instr + 4));
4546 dres->whatNext = Dis_StopHere;
4547 dres->jk_StopHere = Ijk_Sys_syscall;
4548 DIP("svc #0\n");
4549 return True;
4550 }
4551
4552 /* ------------------ M{SR,RS} ------------------ */
4553 /* Only handles the case where the system register is TPIDR_EL0.
4554 0xD51BD0 010 Rt MSR tpidr_el0, rT
4555 0xD53BD0 010 Rt MRS rT, tpidr_el0
4556 */
4557 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
4558 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
4559 Bool toSys = INSN(21,21) == 0;
4560 UInt tt = INSN(4,0);
4561 if (toSys) {
4562 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
4563 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
4564 } else {
4565 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
4566 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
4567 }
4568 return True;
4569 }
4570 /* Cases for FPCR
4571 0xD51B44 000 Rt MSR fpcr, rT
4572 0xD53B44 000 Rt MSR rT, fpcr
4573 */
4574 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
4575 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
4576 Bool toSys = INSN(21,21) == 0;
4577 UInt tt = INSN(4,0);
4578 if (toSys) {
4579 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
4580 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
4581 } else {
4582 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
4583 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
4584 }
4585 return True;
4586 }
4587 /* Cases for FPSR
sewardj7d009132014-02-20 17:43:38 +00004588 0xD51B44 001 Rt MSR fpsr, rT
4589 0xD53B44 001 Rt MSR rT, fpsr
sewardjbbcf1882014-01-12 12:49:10 +00004590 */
4591 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
4592 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
4593 Bool toSys = INSN(21,21) == 0;
4594 UInt tt = INSN(4,0);
4595 if (toSys) {
4596 stmt( IRStmt_Put( OFFB_FPSR, getIReg32orZR(tt)) );
4597 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
4598 } else {
4599 putIReg32orZR(tt, IRExpr_Get(OFFB_FPSR, Ity_I32));
4600 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
4601 }
4602 return True;
4603 }
4604 /* Cases for NZCV
4605 D51B42 000 Rt MSR nzcv, rT
4606 D53B42 000 Rt MRS rT, nzcv
4607 */
4608 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
4609 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
4610 Bool toSys = INSN(21,21) == 0;
4611 UInt tt = INSN(4,0);
4612 if (toSys) {
4613 IRTemp t = newTemp(Ity_I64);
4614 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
4615 setFlags_COPY(t);
4616 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
4617 } else {
4618 IRTemp res = newTemp(Ity_I64);
4619 assign(res, mk_arm64g_calculate_flags_nzcv());
4620 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
4621 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
4622 }
4623 return True;
4624 }
sewardjd512d102014-02-21 14:49:44 +00004625 /* Cases for DCZID_EL0
4626 Don't support arbitrary reads and writes to this register. Just
4627 return the value 16, which indicates that the DC ZVA instruction
4628 is not permitted, so we don't have to emulate it.
4629 D5 3B 00 111 Rt MRS rT, dczid_el0
4630 */
4631 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
4632 UInt tt = INSN(4,0);
4633 putIReg64orZR(tt, mkU64(1<<4));
4634 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
4635 return True;
4636 }
sewardj65902992014-05-03 21:20:56 +00004637 /* Cases for CTR_EL0
4638 We just handle reads, and make up a value from the D and I line
4639 sizes in the VexArchInfo we are given, and patch in the following
4640 fields that the Foundation model gives ("natively"):
4641 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
4642 D5 3B 00 001 Rt MRS rT, dczid_el0
4643 */
4644 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
4645 UInt tt = INSN(4,0);
4646 /* Need to generate a value from dMinLine_lg2_szB and
4647 dMinLine_lg2_szB. The value in the register is in 32-bit
4648 units, so need to subtract 2 from the values in the
4649 VexArchInfo. We can assume that the values here are valid --
4650 disInstr_ARM64 checks them -- so there's no need to deal with
4651 out-of-range cases. */
4652 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
4653 && archinfo->arm64_dMinLine_lg2_szB <= 17
4654 && archinfo->arm64_iMinLine_lg2_szB >= 2
4655 && archinfo->arm64_iMinLine_lg2_szB <= 17);
4656 UInt val
4657 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
4658 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
4659 putIReg64orZR(tt, mkU64(val));
4660 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
4661 return True;
4662 }
sewardjbbcf1882014-01-12 12:49:10 +00004663
sewardj65902992014-05-03 21:20:56 +00004664 /* ------------------ IC_IVAU ------------------ */
4665 /* D5 0B 75 001 Rt ic ivau, rT
4666 */
4667 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
4668 /* We will always be provided with a valid iMinLine value. */
4669 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
4670 && archinfo->arm64_iMinLine_lg2_szB <= 17);
4671 /* Round the requested address, in rT, down to the start of the
4672 containing block. */
4673 UInt tt = INSN(4,0);
4674 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
4675 IRTemp addr = newTemp(Ity_I64);
4676 assign( addr, binop( Iop_And64,
4677 getIReg64orZR(tt),
4678 mkU64(~(lineszB - 1))) );
4679 /* Set the invalidation range, request exit-and-invalidate, with
4680 continuation at the next instruction. */
sewardj05f5e012014-05-04 10:52:11 +00004681 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
4682 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj65902992014-05-03 21:20:56 +00004683 /* be paranoid ... */
4684 stmt( IRStmt_MBE(Imbe_Fence) );
4685 putPC(mkU64( guest_PC_curr_instr + 4 ));
4686 dres->whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +00004687 dres->jk_StopHere = Ijk_InvalICache;
sewardj65902992014-05-03 21:20:56 +00004688 DIP("ic ivau, %s\n", nameIReg64orZR(tt));
4689 return True;
4690 }
4691
4692 /* ------------------ DC_CVAU ------------------ */
4693 /* D5 0B 7B 001 Rt dc cvau, rT
4694 */
4695 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
4696 /* Exactly the same scheme as for IC IVAU, except we observe the
sewardj05f5e012014-05-04 10:52:11 +00004697 dMinLine size, and request an Ijk_FlushDCache instead of
4698 Ijk_InvalICache. */
sewardj65902992014-05-03 21:20:56 +00004699 /* We will always be provided with a valid dMinLine value. */
4700 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
4701 && archinfo->arm64_dMinLine_lg2_szB <= 17);
4702 /* Round the requested address, in rT, down to the start of the
4703 containing block. */
4704 UInt tt = INSN(4,0);
4705 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
4706 IRTemp addr = newTemp(Ity_I64);
4707 assign( addr, binop( Iop_And64,
4708 getIReg64orZR(tt),
4709 mkU64(~(lineszB - 1))) );
4710 /* Set the flush range, request exit-and-flush, with
4711 continuation at the next instruction. */
sewardj05f5e012014-05-04 10:52:11 +00004712 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
4713 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj65902992014-05-03 21:20:56 +00004714 /* be paranoid ... */
4715 stmt( IRStmt_MBE(Imbe_Fence) );
4716 putPC(mkU64( guest_PC_curr_instr + 4 ));
4717 dres->whatNext = Dis_StopHere;
4718 dres->jk_StopHere = Ijk_FlushDCache;
4719 DIP("dc cvau, %s\n", nameIReg64orZR(tt));
4720 return True;
4721 }
4722
4723 /* ------------------ ISB, DMB, DSB ------------------ */
sewardjbbcf1882014-01-12 12:49:10 +00004724 if (INSN(31,0) == 0xD5033FDF) {
sewardjd512d102014-02-21 14:49:44 +00004725 stmt(IRStmt_MBE(Imbe_Fence));
sewardjbbcf1882014-01-12 12:49:10 +00004726 DIP("isb\n");
4727 return True;
4728 }
4729 if (INSN(31,0) == 0xD5033BBF) {
sewardjd512d102014-02-21 14:49:44 +00004730 stmt(IRStmt_MBE(Imbe_Fence));
sewardjbbcf1882014-01-12 12:49:10 +00004731 DIP("dmb ish\n");
4732 return True;
4733 }
sewardj65902992014-05-03 21:20:56 +00004734 if (INSN(31,0) == 0xD5033B9F) {
4735 stmt(IRStmt_MBE(Imbe_Fence));
4736 DIP("dsb ish\n");
4737 return True;
4738 }
sewardjbbcf1882014-01-12 12:49:10 +00004739
sewardjdc9259c2014-02-27 11:10:19 +00004740 /* -------------------- NOP -------------------- */
4741 if (INSN(31,0) == 0xD503201F) {
4742 DIP("nop\n");
4743 return True;
4744 }
4745
sewardjbbcf1882014-01-12 12:49:10 +00004746 //fail:
4747 vex_printf("ARM64 front end: branch_etc\n");
4748 return False;
4749# undef INSN
4750}
4751
4752
4753/*------------------------------------------------------------*/
4754/*--- SIMD and FP instructions ---*/
4755/*------------------------------------------------------------*/
4756
sewardjecde6972014-02-05 11:01:19 +00004757/* begin FIXME -- rm temp scaffolding */
4758static IRExpr* mk_CatEvenLanes64x2 ( IRTemp, IRTemp );
4759static IRExpr* mk_CatOddLanes64x2 ( IRTemp, IRTemp );
sewardje520bb32014-02-17 11:00:53 +00004760
sewardjecde6972014-02-05 11:01:19 +00004761static IRExpr* mk_CatEvenLanes32x4 ( IRTemp, IRTemp );
4762static IRExpr* mk_CatOddLanes32x4 ( IRTemp, IRTemp );
sewardje520bb32014-02-17 11:00:53 +00004763static IRExpr* mk_InterleaveLO32x4 ( IRTemp, IRTemp );
4764static IRExpr* mk_InterleaveHI32x4 ( IRTemp, IRTemp );
4765
sewardjecde6972014-02-05 11:01:19 +00004766static IRExpr* mk_CatEvenLanes16x8 ( IRTemp, IRTemp );
4767static IRExpr* mk_CatOddLanes16x8 ( IRTemp, IRTemp );
sewardje520bb32014-02-17 11:00:53 +00004768static IRExpr* mk_InterleaveLO16x8 ( IRTemp, IRTemp );
4769static IRExpr* mk_InterleaveHI16x8 ( IRTemp, IRTemp );
4770
sewardjfab09142014-02-10 10:28:13 +00004771static IRExpr* mk_CatEvenLanes8x16 ( IRTemp, IRTemp );
4772static IRExpr* mk_CatOddLanes8x16 ( IRTemp, IRTemp );
sewardje520bb32014-02-17 11:00:53 +00004773static IRExpr* mk_InterleaveLO8x16 ( IRTemp, IRTemp );
4774static IRExpr* mk_InterleaveHI8x16 ( IRTemp, IRTemp );
sewardjecde6972014-02-05 11:01:19 +00004775/* end FIXME -- rm temp scaffolding */
4776
sewardjbbcf1882014-01-12 12:49:10 +00004777/* Generate N copies of |bit| in the bottom of a ULong. */
4778static ULong Replicate ( ULong bit, Int N )
4779{
sewardj606c4ba2014-01-26 19:11:14 +00004780 vassert(bit <= 1 && N >= 1 && N < 64);
4781 if (bit == 0) {
4782 return 0;
4783 } else {
4784 /* Careful. This won't work for N == 64. */
4785 return (1ULL << N) - 1;
4786 }
sewardjbbcf1882014-01-12 12:49:10 +00004787}
4788
sewardjfab09142014-02-10 10:28:13 +00004789static ULong Replicate32x2 ( ULong bits32 )
4790{
4791 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
4792 return (bits32 << 32) | bits32;
4793}
4794
4795static ULong Replicate16x4 ( ULong bits16 )
4796{
4797 vassert(0 == (bits16 & ~0xFFFFULL));
4798 return Replicate32x2((bits16 << 16) | bits16);
4799}
4800
4801static ULong Replicate8x8 ( ULong bits8 )
4802{
4803 vassert(0 == (bits8 & ~0xFFULL));
4804 return Replicate16x4((bits8 << 8) | bits8);
4805}
4806
4807/* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
4808 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
4809 is 64. In the former case, the upper 32 bits of the returned value
4810 are guaranteed to be zero. */
sewardjbbcf1882014-01-12 12:49:10 +00004811static ULong VFPExpandImm ( ULong imm8, Int N )
4812{
sewardj606c4ba2014-01-26 19:11:14 +00004813 vassert(imm8 <= 0xFF);
4814 vassert(N == 32 || N == 64);
4815 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
4816 Int F = N - E - 1;
4817 ULong imm8_6 = (imm8 >> 6) & 1;
4818 /* sign: 1 bit */
4819 /* exp: E bits */
4820 /* frac: F bits */
4821 ULong sign = (imm8 >> 7) & 1;
4822 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
4823 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
4824 vassert(sign < (1ULL << 1));
4825 vassert(exp < (1ULL << E));
4826 vassert(frac < (1ULL << F));
4827 vassert(1 + E + F == N);
4828 ULong res = (sign << (E+F)) | (exp << F) | frac;
4829 return res;
sewardjbbcf1882014-01-12 12:49:10 +00004830}
4831
sewardjfab09142014-02-10 10:28:13 +00004832/* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
4833 This might fail, as indicated by the returned Bool. Page 2530 of
4834 the manual. */
4835static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
4836 UInt op, UInt cmode, UInt imm8 )
4837{
4838 vassert(op <= 1);
4839 vassert(cmode <= 15);
4840 vassert(imm8 <= 255);
4841
4842 *res = 0; /* will overwrite iff returning True */
4843
4844 ULong imm64 = 0;
4845 Bool testimm8 = False;
4846
4847 switch (cmode >> 1) {
4848 case 0:
4849 testimm8 = False; imm64 = Replicate32x2(imm8); break;
4850 case 1:
4851 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
4852 case 2:
4853 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
4854 case 3:
4855 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
4856 case 4:
4857 testimm8 = False; imm64 = Replicate16x4(imm8); break;
4858 case 5:
4859 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
4860 case 6:
4861 testimm8 = True;
4862 if ((cmode & 1) == 0)
4863 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
4864 else
4865 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
4866 break;
4867 case 7:
4868 testimm8 = False;
4869 if ((cmode & 1) == 0 && op == 0)
4870 imm64 = Replicate8x8(imm8);
4871 if ((cmode & 1) == 0 && op == 1) {
4872 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
4873 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
4874 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
4875 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
4876 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
4877 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
4878 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
4879 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
4880 }
4881 if ((cmode & 1) == 1 && op == 0) {
4882 ULong imm8_7 = (imm8 >> 7) & 1;
4883 ULong imm8_6 = (imm8 >> 6) & 1;
4884 ULong imm8_50 = imm8 & 63;
4885 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
4886 | ((imm8_6 ^ 1) << (5 + 6 + 19))
4887 | (Replicate(imm8_6, 5) << (6 + 19))
4888 | (imm8_50 << 19);
4889 imm64 = Replicate32x2(imm32);
4890 }
4891 if ((cmode & 1) == 1 && op == 1) {
4892 // imm64 = imm8<7>:NOT(imm8<6>)
4893 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
4894 ULong imm8_7 = (imm8 >> 7) & 1;
4895 ULong imm8_6 = (imm8 >> 6) & 1;
4896 ULong imm8_50 = imm8 & 63;
4897 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
4898 | (Replicate(imm8_6, 8) << 54)
4899 | (imm8_50 << 48);
4900 }
4901 break;
4902 default:
4903 vassert(0);
4904 }
4905
4906 if (testimm8 && imm8 == 0)
4907 return False;
4908
4909 *res = imm64;
4910 return True;
4911}
4912
4913
sewardj606c4ba2014-01-26 19:11:14 +00004914/* Help a bit for decoding laneage for vector operations that can be
4915 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
4916 and SZ bits, typically for vector floating point. */
4917static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
4918 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
4919 /*OUT*/const HChar** arrSpec,
4920 Bool bitQ, Bool bitSZ )
4921{
4922 vassert(bitQ == True || bitQ == False);
4923 vassert(bitSZ == True || bitSZ == False);
4924 if (bitQ && bitSZ) { // 2x64
4925 if (tyI) *tyI = Ity_I64;
4926 if (tyF) *tyF = Ity_F64;
4927 if (nLanes) *nLanes = 2;
4928 if (zeroUpper) *zeroUpper = False;
4929 if (arrSpec) *arrSpec = "2d";
4930 return True;
4931 }
4932 if (bitQ && !bitSZ) { // 4x32
4933 if (tyI) *tyI = Ity_I32;
4934 if (tyF) *tyF = Ity_F32;
4935 if (nLanes) *nLanes = 4;
4936 if (zeroUpper) *zeroUpper = False;
4937 if (arrSpec) *arrSpec = "4s";
4938 return True;
4939 }
4940 if (!bitQ && !bitSZ) { // 2x32
4941 if (tyI) *tyI = Ity_I32;
4942 if (tyF) *tyF = Ity_F32;
4943 if (nLanes) *nLanes = 2;
4944 if (zeroUpper) *zeroUpper = True;
4945 if (arrSpec) *arrSpec = "2s";
4946 return True;
4947 }
4948 // Else impliedly 1x64, which isn't allowed.
4949 return False;
4950}
4951
4952/* Helper for decoding laneage for simple vector operations,
4953 eg integer add. */
4954static Bool getLaneInfo_SIMPLE ( /*OUT*/Bool* zeroUpper,
4955 /*OUT*/const HChar** arrSpec,
4956 Bool bitQ, UInt szBlg2 )
4957{
4958 vassert(bitQ == True || bitQ == False);
4959 vassert(szBlg2 < 4);
4960 Bool zu = False;
4961 const HChar* as = NULL;
4962 switch ((szBlg2 << 1) | (bitQ ? 1 : 0)) {
4963 case 0: zu = True; as = "8b"; break;
4964 case 1: zu = False; as = "16b"; break;
4965 case 2: zu = True; as = "4h"; break;
4966 case 3: zu = False; as = "8h"; break;
4967 case 4: zu = True; as = "2s"; break;
4968 case 5: zu = False; as = "4s"; break;
4969 case 6: return False; // impliedly 1x64
4970 case 7: zu = False; as = "2d"; break;
4971 default: vassert(0);
4972 }
4973 vassert(as);
4974 if (arrSpec) *arrSpec = as;
4975 if (zeroUpper) *zeroUpper = zu;
4976 return True;
4977}
4978
4979
sewardje520bb32014-02-17 11:00:53 +00004980/* Helper for decoding laneage for shift-style vector operations
4981 that involve an immediate shift amount. */
4982static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
4983 UInt immh, UInt immb )
4984{
4985 vassert(immh < (1<<4));
4986 vassert(immb < (1<<3));
4987 UInt immhb = (immh << 3) | immb;
4988 if (immh & 8) {
4989 if (shift) *shift = 128 - immhb;
4990 if (szBlg2) *szBlg2 = 3;
4991 return True;
4992 }
4993 if (immh & 4) {
4994 if (shift) *shift = 64 - immhb;
4995 if (szBlg2) *szBlg2 = 2;
4996 return True;
4997 }
4998 if (immh & 2) {
4999 if (shift) *shift = 32 - immhb;
5000 if (szBlg2) *szBlg2 = 1;
5001 return True;
5002 }
5003 if (immh & 1) {
5004 if (shift) *shift = 16 - immhb;
5005 if (szBlg2) *szBlg2 = 0;
5006 return True;
5007 }
5008 return False;
5009}
5010
5011
sewardjecde6972014-02-05 11:01:19 +00005012/* Generate IR to fold all lanes of the V128 value in 'src' as
5013 characterised by the operator 'op', and return the result in the
5014 bottom bits of a V128, with all other bits set to zero. */
5015static IRTemp math_MINMAXV ( IRTemp src, IROp op )
5016{
5017 /* The basic idea is to use repeated applications of Iop_CatEven*
5018 and Iop_CatOdd* operators to 'src' so as to clone each lane into
5019 a complete vector. Then fold all those vectors with 'op' and
5020 zero out all but the least significant lane. */
5021 switch (op) {
5022 case Iop_Min8Sx16: case Iop_Min8Ux16:
5023 case Iop_Max8Sx16: case Iop_Max8Ux16: {
sewardjfab09142014-02-10 10:28:13 +00005024 /* NB: temp naming here is misleading -- the naming is for 8
5025 lanes of 16 bit, whereas what is being operated on is 16
5026 lanes of 8 bits. */
5027 IRTemp x76543210 = src;
5028 IRTemp x76547654 = newTemp(Ity_V128);
5029 IRTemp x32103210 = newTemp(Ity_V128);
5030 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
5031 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
5032 IRTemp x76767676 = newTemp(Ity_V128);
5033 IRTemp x54545454 = newTemp(Ity_V128);
5034 IRTemp x32323232 = newTemp(Ity_V128);
5035 IRTemp x10101010 = newTemp(Ity_V128);
5036 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
5037 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
5038 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
5039 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
5040 IRTemp x77777777 = newTemp(Ity_V128);
5041 IRTemp x66666666 = newTemp(Ity_V128);
5042 IRTemp x55555555 = newTemp(Ity_V128);
5043 IRTemp x44444444 = newTemp(Ity_V128);
5044 IRTemp x33333333 = newTemp(Ity_V128);
5045 IRTemp x22222222 = newTemp(Ity_V128);
5046 IRTemp x11111111 = newTemp(Ity_V128);
5047 IRTemp x00000000 = newTemp(Ity_V128);
5048 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
5049 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
5050 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
5051 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
5052 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
5053 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
5054 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
5055 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
5056 /* Naming not misleading after here. */
5057 IRTemp xAllF = newTemp(Ity_V128);
5058 IRTemp xAllE = newTemp(Ity_V128);
5059 IRTemp xAllD = newTemp(Ity_V128);
5060 IRTemp xAllC = newTemp(Ity_V128);
5061 IRTemp xAllB = newTemp(Ity_V128);
5062 IRTemp xAllA = newTemp(Ity_V128);
5063 IRTemp xAll9 = newTemp(Ity_V128);
5064 IRTemp xAll8 = newTemp(Ity_V128);
5065 IRTemp xAll7 = newTemp(Ity_V128);
5066 IRTemp xAll6 = newTemp(Ity_V128);
5067 IRTemp xAll5 = newTemp(Ity_V128);
5068 IRTemp xAll4 = newTemp(Ity_V128);
5069 IRTemp xAll3 = newTemp(Ity_V128);
5070 IRTemp xAll2 = newTemp(Ity_V128);
5071 IRTemp xAll1 = newTemp(Ity_V128);
5072 IRTemp xAll0 = newTemp(Ity_V128);
5073 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
5074 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
5075 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
5076 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
5077 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
5078 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
5079 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
5080 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
5081 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
5082 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
5083 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
5084 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
5085 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
5086 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
5087 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
5088 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
5089 IRTemp maxFE = newTemp(Ity_V128);
5090 IRTemp maxDC = newTemp(Ity_V128);
5091 IRTemp maxBA = newTemp(Ity_V128);
5092 IRTemp max98 = newTemp(Ity_V128);
5093 IRTemp max76 = newTemp(Ity_V128);
5094 IRTemp max54 = newTemp(Ity_V128);
5095 IRTemp max32 = newTemp(Ity_V128);
5096 IRTemp max10 = newTemp(Ity_V128);
5097 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
5098 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
5099 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
5100 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
5101 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
5102 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
5103 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
5104 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
5105 IRTemp maxFEDC = newTemp(Ity_V128);
5106 IRTemp maxBA98 = newTemp(Ity_V128);
5107 IRTemp max7654 = newTemp(Ity_V128);
5108 IRTemp max3210 = newTemp(Ity_V128);
5109 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
5110 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
5111 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
5112 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5113 IRTemp maxFEDCBA98 = newTemp(Ity_V128);
5114 IRTemp max76543210 = newTemp(Ity_V128);
5115 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
5116 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
5117 IRTemp maxAllLanes = newTemp(Ity_V128);
5118 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
5119 mkexpr(max76543210)));
5120 IRTemp res = newTemp(Ity_V128);
5121 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
5122 return res;
sewardjecde6972014-02-05 11:01:19 +00005123 }
5124 case Iop_Min16Sx8: case Iop_Min16Ux8:
5125 case Iop_Max16Sx8: case Iop_Max16Ux8: {
5126 IRTemp x76543210 = src;
5127 IRTemp x76547654 = newTemp(Ity_V128);
5128 IRTemp x32103210 = newTemp(Ity_V128);
5129 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
5130 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
5131 IRTemp x76767676 = newTemp(Ity_V128);
5132 IRTemp x54545454 = newTemp(Ity_V128);
5133 IRTemp x32323232 = newTemp(Ity_V128);
5134 IRTemp x10101010 = newTemp(Ity_V128);
5135 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
5136 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
5137 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
5138 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
5139 IRTemp x77777777 = newTemp(Ity_V128);
5140 IRTemp x66666666 = newTemp(Ity_V128);
5141 IRTemp x55555555 = newTemp(Ity_V128);
5142 IRTemp x44444444 = newTemp(Ity_V128);
5143 IRTemp x33333333 = newTemp(Ity_V128);
5144 IRTemp x22222222 = newTemp(Ity_V128);
5145 IRTemp x11111111 = newTemp(Ity_V128);
5146 IRTemp x00000000 = newTemp(Ity_V128);
5147 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
5148 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
5149 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
5150 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
5151 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
5152 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
5153 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
5154 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
5155 IRTemp max76 = newTemp(Ity_V128);
5156 IRTemp max54 = newTemp(Ity_V128);
5157 IRTemp max32 = newTemp(Ity_V128);
5158 IRTemp max10 = newTemp(Ity_V128);
5159 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
5160 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
5161 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
5162 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
5163 IRTemp max7654 = newTemp(Ity_V128);
5164 IRTemp max3210 = newTemp(Ity_V128);
5165 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
5166 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5167 IRTemp max76543210 = newTemp(Ity_V128);
5168 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
5169 IRTemp res = newTemp(Ity_V128);
5170 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
5171 return res;
5172 }
5173 case Iop_Min32Sx4: case Iop_Min32Ux4:
5174 case Iop_Max32Sx4: case Iop_Max32Ux4: {
5175 IRTemp x3210 = src;
5176 IRTemp x3232 = newTemp(Ity_V128);
5177 IRTemp x1010 = newTemp(Ity_V128);
5178 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
5179 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
5180 IRTemp x3333 = newTemp(Ity_V128);
5181 IRTemp x2222 = newTemp(Ity_V128);
5182 IRTemp x1111 = newTemp(Ity_V128);
5183 IRTemp x0000 = newTemp(Ity_V128);
5184 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
5185 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
5186 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
5187 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
5188 IRTemp max32 = newTemp(Ity_V128);
5189 IRTemp max10 = newTemp(Ity_V128);
5190 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
5191 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
5192 IRTemp max3210 = newTemp(Ity_V128);
5193 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5194 IRTemp res = newTemp(Ity_V128);
5195 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
5196 return res;
5197 }
5198 default:
5199 vassert(0);
5200 }
5201}
5202
5203
sewardj92d0ae32014-04-03 13:48:54 +00005204/* Generate IR for TBL and TBX. This deals with the 128 bit case
5205 only. */
5206static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
5207 IRTemp oor_values )
5208{
5209 vassert(len >= 0 && len <= 3);
5210
5211 /* Generate some useful constants as concisely as possible. */
5212 IRTemp half15 = newTemp(Ity_I64);
5213 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
5214 IRTemp half16 = newTemp(Ity_I64);
5215 assign(half16, mkU64(0x1010101010101010ULL));
5216
5217 /* A zero vector */
5218 IRTemp allZero = newTemp(Ity_V128);
5219 assign(allZero, mkV128(0x0000));
5220 /* A vector containing 15 in each 8-bit lane */
5221 IRTemp all15 = newTemp(Ity_V128);
5222 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
5223 /* A vector containing 16 in each 8-bit lane */
5224 IRTemp all16 = newTemp(Ity_V128);
5225 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
5226 /* A vector containing 32 in each 8-bit lane */
5227 IRTemp all32 = newTemp(Ity_V128);
5228 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
5229 /* A vector containing 48 in each 8-bit lane */
5230 IRTemp all48 = newTemp(Ity_V128);
5231 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
5232 /* A vector containing 64 in each 8-bit lane */
5233 IRTemp all64 = newTemp(Ity_V128);
5234 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
5235
5236 /* Group the 16/32/48/64 vectors so as to be indexable. */
5237 IRTemp allXX[4] = { all16, all32, all48, all64 };
5238
5239 /* Compute the result for each table vector, with zeroes in places
5240 where the index values are out of range, and OR them into the
5241 running vector. */
5242 IRTemp running_result = newTemp(Ity_V128);
5243 assign(running_result, mkV128(0));
5244
5245 UInt tabent;
5246 for (tabent = 0; tabent <= len; tabent++) {
5247 vassert(tabent >= 0 && tabent < 4);
5248 IRTemp bias = newTemp(Ity_V128);
5249 assign(bias,
5250 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
5251 IRTemp biased_indices = newTemp(Ity_V128);
5252 assign(biased_indices,
5253 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
5254 IRTemp valid_mask = newTemp(Ity_V128);
5255 assign(valid_mask,
5256 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
5257 IRTemp safe_biased_indices = newTemp(Ity_V128);
5258 assign(safe_biased_indices,
5259 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
5260 IRTemp results_or_junk = newTemp(Ity_V128);
5261 assign(results_or_junk,
5262 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
5263 mkexpr(safe_biased_indices)));
5264 IRTemp results_or_zero = newTemp(Ity_V128);
5265 assign(results_or_zero,
5266 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
5267 /* And OR that into the running result. */
5268 IRTemp tmp = newTemp(Ity_V128);
5269 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
5270 mkexpr(running_result)));
5271 running_result = tmp;
5272 }
5273
5274 /* So now running_result holds the overall result where the indices
5275 are in range, and zero in out-of-range lanes. Now we need to
5276 compute an overall validity mask and use this to copy in the
5277 lanes in the oor_values for out of range indices. This is
5278 unnecessary for TBL but will get folded out by iropt, so we lean
5279 on that and generate the same code for TBL and TBX here. */
5280 IRTemp overall_valid_mask = newTemp(Ity_V128);
5281 assign(overall_valid_mask,
5282 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
5283 IRTemp result = newTemp(Ity_V128);
5284 assign(result,
5285 binop(Iop_OrV128,
5286 mkexpr(running_result),
5287 binop(Iop_AndV128,
5288 mkexpr(oor_values),
5289 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
5290 return result;
5291}
5292
5293
sewardjbbcf1882014-01-12 12:49:10 +00005294static
5295Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
5296{
5297# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
5298
5299 /* ---------------- FMOV (general) ---------------- */
5300 /* case 30 23 20 18 15 9 4
5301 (1) 0 00 11110 00 1 00 111 000000 n d FMOV Sd, Wn
5302 (2) 1 00 11110 01 1 00 111 000000 n d FMOV Dd, Xn
5303 (3) 1 00 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
5304
5305 (4) 0 00 11110 00 1 00 110 000000 n d FMOV Wd, Sn
5306 (5) 1 00 11110 01 1 00 110 000000 n d FMOV Xd, Dn
5307 (6) 1 00 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
5308 */
5309 if (INSN(30,24) == BITS7(0,0,1,1,1,1,0)
5310 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5311 UInt sf = INSN(31,31);
5312 UInt ty = INSN(23,22); // type
5313 UInt rm = INSN(20,19); // rmode
5314 UInt op = INSN(18,16); // opcode
5315 UInt nn = INSN(9,5);
5316 UInt dd = INSN(4,0);
5317 UInt ix = 0; // case
5318 if (sf == 0) {
5319 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
5320 ix = 1;
5321 else
5322 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
5323 ix = 4;
5324 } else {
5325 vassert(sf == 1);
5326 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
5327 ix = 2;
5328 else
5329 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
5330 ix = 5;
5331 else
5332 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
5333 ix = 3;
5334 else
5335 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
5336 ix = 6;
5337 }
5338 if (ix > 0) {
5339 switch (ix) {
5340 case 1:
5341 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005342 putQRegLO(dd, getIReg32orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005343 DIP("fmov s%u, w%u\n", dd, nn);
5344 break;
5345 case 2:
5346 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005347 putQRegLO(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005348 DIP("fmov d%u, x%u\n", dd, nn);
5349 break;
5350 case 3:
sewardj606c4ba2014-01-26 19:11:14 +00005351 putQRegHI64(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005352 DIP("fmov v%u.d[1], x%u\n", dd, nn);
5353 break;
5354 case 4:
sewardj606c4ba2014-01-26 19:11:14 +00005355 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
sewardjbbcf1882014-01-12 12:49:10 +00005356 DIP("fmov w%u, s%u\n", dd, nn);
5357 break;
5358 case 5:
sewardj606c4ba2014-01-26 19:11:14 +00005359 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
sewardjbbcf1882014-01-12 12:49:10 +00005360 DIP("fmov x%u, d%u\n", dd, nn);
5361 break;
5362 case 6:
sewardj606c4ba2014-01-26 19:11:14 +00005363 putIReg64orZR(dd, getQRegHI64(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005364 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
5365 break;
5366 default:
5367 vassert(0);
5368 }
5369 return True;
5370 }
5371 /* undecodable; fall through */
5372 }
5373
5374 /* -------------- FMOV (scalar, immediate) -------------- */
5375 /* 31 28 23 20 12 9 4
5376 000 11110 00 1 imm8 100 00000 d FMOV Sd, #imm
5377 000 11110 01 1 imm8 100 00000 d FMOV Dd, #imm
5378 */
5379 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5380 && INSN(21,21) == 1 && INSN(12,5) == BITS8(1,0,0,0,0,0,0,0)) {
5381 Bool isD = INSN(22,22) == 1;
5382 UInt imm8 = INSN(20,13);
5383 UInt dd = INSN(4,0);
5384 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
5385 if (!isD) {
sewardjaeeb31d2014-01-12 18:23:45 +00005386 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
sewardjbbcf1882014-01-12 12:49:10 +00005387 }
5388 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005389 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
5390 DIP("fmov %s, #0x%llx\n",
5391 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
sewardjbbcf1882014-01-12 12:49:10 +00005392 return True;
5393 }
5394
sewardjfab09142014-02-10 10:28:13 +00005395 /* -------------- {FMOV,MOVI} (vector, immediate) -------------- */
5396 /* 31 28 18 15 11 9 4
5397 0q op 01111 00000 abc cmode 01 defgh d MOV Dd, #imm (q=0)
5398 MOV Vd.2d #imm (q=1)
5399 Allowable op:cmode
5400 FMOV = 1:1111
5401 MOVI = 0:xx00, 1:0x00, 1:10x0, 1:110x, 11110
5402 */
5403 if (INSN(31,31) == 0
5404 && INSN(28,19) == BITS10(0,1,1,1,1,0,0,0,0,0)
5405 && INSN(11,10) == BITS2(0,1)) {
5406 UInt bitQ = INSN(30,30);
5407 UInt bitOP = INSN(29,29);
5408 UInt cmode = INSN(15,12);
5409 UInt imm8 = (INSN(18,16) << 5) | INSN(9,5);
5410 UInt dd = INSN(4,0);
5411 ULong imm64lo = 0;
5412 UInt op_cmode = (bitOP << 4) | cmode;
5413 Bool ok = False;
5414 switch (op_cmode) {
5415 case BITS5(1,1,1,1,1): // 1:1111
5416 case BITS5(0,0,0,0,0): case BITS5(0,0,1,0,0):
5417 case BITS5(0,1,0,0,0): case BITS5(0,1,1,0,0): // 0:xx00
5418 case BITS5(1,0,0,0,0): case BITS5(1,0,1,0,0): // 1:0x00
5419 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
5420 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
5421 case BITS5(1,1,1,1,0): // 1:1110
5422 ok = True; break;
5423 default:
5424 break;
5425 }
5426 if (ok) {
5427 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, imm8);
5428 }
5429 if (ok) {
5430 ULong imm64hi = (bitQ == 0 && bitOP == 0) ? 0 : imm64lo;
5431 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(imm64hi), mkU64(imm64lo)));
sewardjbd83e982014-04-08 15:23:42 +00005432 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
sewardjfab09142014-02-10 10:28:13 +00005433 return True;
5434 }
5435 /* else fall through */
5436 }
sewardjfab09142014-02-10 10:28:13 +00005437
sewardjbbcf1882014-01-12 12:49:10 +00005438 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
5439 /* 31 28 23 21 20 18 15 9 4 ix
5440 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn 0
5441 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn 1
5442 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn 2
5443 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn 3
5444
5445 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn 4
5446 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn 5
5447 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn 6
5448 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn 7
5449
5450 These are signed/unsigned conversion from integer registers to
5451 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
5452 */
5453 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,17) == BITS5(1,0,0,0,1)
5454 && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5455 Bool isI64 = INSN(31,31) == 1;
5456 Bool isF64 = INSN(22,22) == 1;
5457 Bool isU = INSN(16,16) == 1;
5458 UInt nn = INSN(9,5);
5459 UInt dd = INSN(4,0);
5460 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
5461 const IROp ops[8]
5462 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
5463 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
5464 IRExpr* src = getIRegOrZR(isI64, nn);
5465 IRExpr* res = (isF64 && !isI64)
5466 ? unop(ops[ix], src)
5467 : binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), src);
5468 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005469 putQRegLO(dd, res);
sewardjbbcf1882014-01-12 12:49:10 +00005470 DIP("%ccvtf %s, %s\n",
sewardj606c4ba2014-01-26 19:11:14 +00005471 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
sewardjbbcf1882014-01-12 12:49:10 +00005472 nameIRegOrZR(isI64, nn));
5473 return True;
5474 }
5475
sewardj5860ec72014-03-01 11:19:45 +00005476 /* ------------ F{ADD,SUB,MUL,DIV,NMUL} (scalar) ------------ */
sewardjbbcf1882014-01-12 12:49:10 +00005477 /* 31 23 20 15 11 9 4
5478 ---------------- 0000 ------ FMUL --------
5479 000 11110 001 m 0001 10 n d FDIV Sd,Sn,Sm
5480 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm
5481 ---------------- 0010 ------ FADD --------
5482 ---------------- 0011 ------ FSUB --------
5483 ---------------- 1000 ------ FNMUL --------
5484 */
5485 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5486 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5487 Bool isD = INSN(22,22) == 1;
5488 UInt mm = INSN(20,16);
5489 UInt op = INSN(15,12);
5490 UInt nn = INSN(9,5);
5491 UInt dd = INSN(4,0);
5492 IROp iop = Iop_INVALID;
5493 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005494 Bool neg = False;
5495 const HChar* nm = "???";
5496 switch (op) {
5497 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ty); break;
5498 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ty); break;
5499 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ty); break;
5500 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ty); break;
5501 case BITS4(1,0,0,0): nm = "fnmul"; iop = mkMULF(ty);
5502 neg = True; break;
5503 default: return False;
5504 }
5505 vassert(iop != Iop_INVALID);
5506 IRExpr* resE = triop(iop, mkexpr(mk_get_IR_rounding_mode()),
sewardj606c4ba2014-01-26 19:11:14 +00005507 getQRegLO(nn, ty), getQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005508 IRTemp res = newTemp(ty);
5509 assign(res, neg ? unop(mkNEGF(ty),resE) : resE);
5510 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005511 putQRegLO(dd, mkexpr(res));
sewardjbbcf1882014-01-12 12:49:10 +00005512 DIP("%s %s, %s, %s\n",
sewardj606c4ba2014-01-26 19:11:14 +00005513 nm, nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005514 return True;
5515 }
5516
5517 /* ------------ F{MOV,ABS,NEG,SQRT} D/D or S/S ------------ */
5518 /* 31 23 21 16 14 9 4
5519 000 11110 00 10000 00 10000 n d FMOV Sd, Sn
5520 000 11110 01 10000 00 10000 n d FMOV Dd, Dn
5521 ------------------ 01 --------- FABS ------
5522 ------------------ 10 --------- FNEG ------
sewardjfab09142014-02-10 10:28:13 +00005523 ------------------ 11 --------- FSQRT -----
sewardjbbcf1882014-01-12 12:49:10 +00005524 */
5525 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5526 && INSN(21,17) == BITS5(1,0,0,0,0)
5527 && INSN(14,10) == BITS5(1,0,0,0,0)) {
5528 Bool isD = INSN(22,22) == 1;
5529 UInt opc = INSN(16,15);
5530 UInt nn = INSN(9,5);
5531 UInt dd = INSN(4,0);
5532 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005533 IRTemp res = newTemp(ty);
5534 if (opc == BITS2(0,0)) {
sewardj606c4ba2014-01-26 19:11:14 +00005535 assign(res, getQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005536 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005537 putQRegLO(dd, mkexpr(res));
5538 DIP("fmov %s, %s\n",
5539 nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005540 return True;
5541 }
5542 if (opc == BITS2(1,0) || opc == BITS2(0,1)) {
5543 Bool isAbs = opc == BITS2(0,1);
5544 IROp op = isAbs ? mkABSF(ty) : mkNEGF(ty);
sewardj606c4ba2014-01-26 19:11:14 +00005545 assign(res, unop(op, getQRegLO(nn, ty)));
sewardjbbcf1882014-01-12 12:49:10 +00005546 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005547 putQRegLO(dd, mkexpr(res));
sewardjbbcf1882014-01-12 12:49:10 +00005548 DIP("%s %s, %s\n", isAbs ? "fabs" : "fneg",
sewardj606c4ba2014-01-26 19:11:14 +00005549 nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005550 return True;
5551 }
5552 if (opc == BITS2(1,1)) {
5553 assign(res,
5554 binop(mkSQRTF(ty),
sewardj606c4ba2014-01-26 19:11:14 +00005555 mkexpr(mk_get_IR_rounding_mode()), getQRegLO(nn, ty)));
sewardjbbcf1882014-01-12 12:49:10 +00005556 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005557 putQRegLO(dd, mkexpr(res));
5558 DIP("fsqrt %s, %s\n", nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005559 return True;
5560 }
5561 /* else fall through; other cases are ATC */
5562 }
5563
sewardjfab09142014-02-10 10:28:13 +00005564 /* ---------------- F{ABS,NEG} (vector) ---------------- */
5565 /* 31 28 22 21 16 9 4
5566 0q0 01110 1 sz 10000 01111 10 n d FABS Vd.T, Vn.T
5567 0q1 01110 1 sz 10000 01111 10 n d FNEG Vd.T, Vn.T
5568 */
5569 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,1)
5570 && INSN(21,17) == BITS5(1,0,0,0,0)
5571 && INSN(16,10) == BITS7(0,1,1,1,1,1,0)) {
5572 UInt bitQ = INSN(30,30);
5573 UInt bitSZ = INSN(22,22);
5574 Bool isFNEG = INSN(29,29) == 1;
5575 UInt nn = INSN(9,5);
5576 UInt dd = INSN(4,0);
5577 const HChar* ar = "??";
5578 IRType tyF = Ity_INVALID;
5579 Bool zeroHI = False;
5580 Bool ok = getLaneInfo_Q_SZ(NULL, &tyF, NULL, &zeroHI, &ar,
5581 (Bool)bitQ, (Bool)bitSZ);
5582 if (ok) {
sewardj32d86752014-03-02 12:47:18 +00005583 vassert(tyF == Ity_F64 || tyF == Ity_F32);
sewardjfab09142014-02-10 10:28:13 +00005584 IROp op = (tyF == Ity_F64) ? (isFNEG ? Iop_Neg64Fx2 : Iop_Abs64Fx2)
5585 : (isFNEG ? Iop_Neg32Fx4 : Iop_Abs32Fx4);
5586 IRTemp res = newTemp(Ity_V128);
5587 assign(res, unop(op, getQReg128(nn)));
5588 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
5589 : mkexpr(res));
5590 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
5591 nameQReg128(dd), ar, nameQReg128(nn), ar);
5592 return True;
5593 }
5594 /* else fall through */
5595 }
5596
sewardjbbcf1882014-01-12 12:49:10 +00005597 /* -------------------- FCMP,FCMPE -------------------- */
5598 /* 31 23 20 15 9 4
5599 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
5600 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
5601 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
5602 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
5603
5604 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
5605 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
5606 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
5607 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
5608
5609 FCMPE generates Invalid Operation exn if either arg is any kind
5610 of NaN. FCMP generates Invalid Operation exn if either arg is a
5611 signalling NaN. We ignore this detail here and produce the same
5612 IR for both.
5613 */
5614 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) && INSN(21,21) == 1
5615 && INSN(15,10) == BITS6(0,0,1,0,0,0) && INSN(2,0) == BITS3(0,0,0)) {
5616 Bool isD = INSN(22,22) == 1;
5617 UInt mm = INSN(20,16);
5618 UInt nn = INSN(9,5);
5619 Bool isCMPE = INSN(4,4) == 1;
5620 Bool cmpZero = INSN(3,3) == 1;
5621 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005622 Bool valid = True;
5623 if (cmpZero && mm != 0) valid = False;
5624 if (valid) {
5625 IRTemp argL = newTemp(ty);
5626 IRTemp argR = newTemp(ty);
5627 IRTemp irRes = newTemp(Ity_I32);
sewardj606c4ba2014-01-26 19:11:14 +00005628 assign(argL, getQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005629 assign(argR,
5630 cmpZero
5631 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
sewardj606c4ba2014-01-26 19:11:14 +00005632 : getQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005633 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
5634 mkexpr(argL), mkexpr(argR)));
5635 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
5636 IRTemp nzcv_28x0 = newTemp(Ity_I64);
5637 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
5638 setFlags_COPY(nzcv_28x0);
sewardj606c4ba2014-01-26 19:11:14 +00005639 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ty),
5640 cmpZero ? "#0.0" : nameQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005641 return True;
5642 }
5643 }
5644
5645 /* -------------------- F{N}M{ADD,SUB} -------------------- */
5646 /* 31 22 20 15 14 9 4 ix
5647 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
5648 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
5649 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
5650 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
5651 where Fx=Dx when sz=1, Fx=Sx when sz=0
5652
5653 -----SPEC------ ----IMPL----
5654 fmadd a + n * m a + n * m
5655 fmsub a + (-n) * m a - n * m
5656 fnmadd (-a) + (-n) * m -(a + n * m)
5657 fnmsub (-a) + n * m -(a - n * m)
5658 */
5659 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,1,0)) {
5660 Bool isD = INSN(22,22) == 1;
5661 UInt mm = INSN(20,16);
5662 UInt aa = INSN(14,10);
5663 UInt nn = INSN(9,5);
5664 UInt dd = INSN(4,0);
5665 UInt ix = (INSN(21,21) << 1) | INSN(15,15);
5666 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005667 IROp opADD = mkADDF(ty);
5668 IROp opSUB = mkSUBF(ty);
5669 IROp opMUL = mkMULF(ty);
5670 IROp opNEG = mkNEGF(ty);
5671 IRTemp res = newTemp(ty);
sewardj606c4ba2014-01-26 19:11:14 +00005672 IRExpr* eA = getQRegLO(aa, ty);
5673 IRExpr* eN = getQRegLO(nn, ty);
5674 IRExpr* eM = getQRegLO(mm, ty);
sewardjbbcf1882014-01-12 12:49:10 +00005675 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
5676 IRExpr* eNxM = triop(opMUL, rm, eN, eM);
5677 switch (ix) {
5678 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
5679 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
5680 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
5681 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
5682 default: vassert(0);
5683 }
5684 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005685 putQRegLO(dd, mkexpr(res));
sewardjbbcf1882014-01-12 12:49:10 +00005686 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
5687 DIP("%s %s, %s, %s, %s\n",
sewardj606c4ba2014-01-26 19:11:14 +00005688 names[ix], nameQRegLO(dd, ty), nameQRegLO(nn, ty),
5689 nameQRegLO(mm, ty), nameQRegLO(aa, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005690 return True;
5691 }
5692
5693 /* -------- FCVT{N,P,M,Z}{S,U} (scalar, integer) -------- */
5694 /* 30 23 20 18 15 9 4
5695 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
5696 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
5697 ---------------- 01 -------------- FCVTP-------- (round to +inf)
5698 ---------------- 10 -------------- FCVTM-------- (round to -inf)
5699 ---------------- 11 -------------- FCVTZ-------- (round to zero)
5700
5701 Rd is Xd when sf==1, Wd when sf==0
5702 Fn is Dn when x==1, Sn when x==0
5703 20:19 carry the rounding mode, using the same encoding as FPCR
5704 */
5705 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,21) == 1
5706 && INSN(18,17) == BITS2(0,0) && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5707 Bool isI64 = INSN(31,31) == 1;
5708 Bool isF64 = INSN(22,22) == 1;
5709 UInt rm = INSN(20,19);
5710 Bool isU = INSN(16,16) == 1;
5711 UInt nn = INSN(9,5);
5712 UInt dd = INSN(4,0);
5713 /* Decide on the IR rounding mode to use. */
5714 IRRoundingMode irrm = 8; /*impossible*/
5715 HChar ch = '?';
5716 switch (rm) {
5717 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
5718 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
5719 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
5720 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
5721 default: vassert(0);
5722 }
5723 vassert(irrm != 8);
5724 /* Decide on the conversion primop, based on the source size,
5725 dest size and signedness (8 possibilities). Case coding:
5726 F32 ->s I32 0
5727 F32 ->u I32 1
5728 F32 ->s I64 2
5729 F32 ->u I64 3
5730 F64 ->s I32 4
5731 F64 ->u I32 5
5732 F64 ->s I64 6
5733 F64 ->u I64 7
5734 */
5735 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
5736 vassert(ix < 8);
5737 const IROp ops[8]
5738 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
5739 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
5740 IROp op = ops[ix];
5741 // A bit of ATCery: bounce all cases we haven't seen an example of.
5742 if (/* F32toI32S */
5743 (op == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
sewardj1eaaec22014-03-07 22:52:19 +00005744 || (op == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
5745 || (op == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
sewardjbbcf1882014-01-12 12:49:10 +00005746 /* F32toI32U */
sewardj1eaaec22014-03-07 22:52:19 +00005747 || (op == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
5748 || (op == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
sewardjbbcf1882014-01-12 12:49:10 +00005749 /* F32toI64S */
sewardj1eaaec22014-03-07 22:52:19 +00005750 || (op == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
sewardjbbcf1882014-01-12 12:49:10 +00005751 /* F32toI64U */
5752 || (op == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
5753 /* F64toI32S */
5754 || (op == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
5755 || (op == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
5756 || (op == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
5757 /* F64toI32U */
sewardjbbcf1882014-01-12 12:49:10 +00005758 || (op == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
sewardj1eaaec22014-03-07 22:52:19 +00005759 || (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
5760 || (op == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
sewardjbbcf1882014-01-12 12:49:10 +00005761 /* F64toI64S */
5762 || (op == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
sewardj1eaaec22014-03-07 22:52:19 +00005763 || (op == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
5764 || (op == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
sewardjbbcf1882014-01-12 12:49:10 +00005765 /* F64toI64U */
5766 || (op == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
sewardj1eaaec22014-03-07 22:52:19 +00005767 || (op == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
sewardjbbcf1882014-01-12 12:49:10 +00005768 ) {
5769 /* validated */
5770 } else {
5771 return False;
5772 }
sewardjbbcf1882014-01-12 12:49:10 +00005773 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
5774 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
5775 IRTemp src = newTemp(srcTy);
5776 IRTemp dst = newTemp(dstTy);
sewardj606c4ba2014-01-26 19:11:14 +00005777 assign(src, getQRegLO(nn, srcTy));
sewardjbbcf1882014-01-12 12:49:10 +00005778 assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
5779 putIRegOrZR(isI64, dd, mkexpr(dst));
5780 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
sewardj606c4ba2014-01-26 19:11:14 +00005781 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
sewardjbbcf1882014-01-12 12:49:10 +00005782 return True;
5783 }
5784
sewardj1eaaec22014-03-07 22:52:19 +00005785 /* -------- FCVTAS (KLUDGED) (scalar, integer) -------- */
5786 /* 30 23 20 18 15 9 4
5787 1 00 11110 0x 1 00 100 000000 n d FCVTAS Xd, Fn
5788 0 00 11110 0x 1 00 100 000000 n d FCVTAS Wd, Fn
5789 Fn is Dn when x==1, Sn when x==0
5790 */
5791 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0)
5792 && INSN(21,16) == BITS6(1,0,0,1,0,0)
5793 && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5794 Bool isI64 = INSN(31,31) == 1;
5795 Bool isF64 = INSN(22,22) == 1;
5796 UInt nn = INSN(9,5);
5797 UInt dd = INSN(4,0);
5798 /* Decide on the IR rounding mode to use. */
5799 /* KLUDGE: should be Irrm_NEAREST_TIE_AWAY_0 */
5800 IRRoundingMode irrm = Irrm_NEAREST;
5801 /* Decide on the conversion primop. */
5802 IROp op = isI64 ? (isF64 ? Iop_F64toI64S : Iop_F32toI64S)
5803 : (isF64 ? Iop_F64toI32S : Iop_F32toI32S);
5804 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
5805 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
5806 IRTemp src = newTemp(srcTy);
5807 IRTemp dst = newTemp(dstTy);
5808 assign(src, getQRegLO(nn, srcTy));
5809 assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
5810 putIRegOrZR(isI64, dd, mkexpr(dst));
5811 DIP("fcvtas %s, %s (KLUDGED)\n",
5812 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
5813 return True;
5814 }
5815
sewardjbbcf1882014-01-12 12:49:10 +00005816 /* ---------------- FRINT{I,M,P,Z} (scalar) ---------------- */
5817 /* 31 23 21 17 14 9 4
5818 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
5819 rm
5820 x==0 => S-registers, x==1 => D-registers
5821 rm (17:15) encodings:
5822 111 per FPCR (FRINTI)
5823 001 +inf (FRINTP)
5824 010 -inf (FRINTM)
5825 011 zero (FRINTZ)
5826 000 tieeven
sewardj1eaaec22014-03-07 22:52:19 +00005827 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
sewardjbbcf1882014-01-12 12:49:10 +00005828 110 per FPCR + "exact = TRUE"
5829 101 unallocated
5830 */
5831 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5832 && INSN(21,18) == BITS4(1,0,0,1) && INSN(14,10) == BITS5(1,0,0,0,0)) {
5833 Bool isD = INSN(22,22) == 1;
5834 UInt rm = INSN(17,15);
5835 UInt nn = INSN(9,5);
5836 UInt dd = INSN(4,0);
5837 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005838 IRExpr* irrmE = NULL;
5839 UChar ch = '?';
5840 switch (rm) {
5841 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
5842 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
5843 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
sewardj1eaaec22014-03-07 22:52:19 +00005844 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
5845 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
sewardjbbcf1882014-01-12 12:49:10 +00005846 default: break;
5847 }
5848 if (irrmE) {
5849 IRTemp src = newTemp(ty);
5850 IRTemp dst = newTemp(ty);
sewardj606c4ba2014-01-26 19:11:14 +00005851 assign(src, getQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005852 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
5853 irrmE, mkexpr(src)));
5854 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005855 putQRegLO(dd, mkexpr(dst));
5856 DIP("frint%c %s, %s\n",
5857 ch, nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005858 return True;
5859 }
5860 /* else unhandled rounding mode case -- fall through */
5861 }
5862
5863 /* ------------------ FCVT (scalar) ------------------ */
5864 /* 31 23 21 16 14 9 4
5865 000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp)
5866 --------- 11 ----- 01 --------- FCVT Dd, Hn (unimp)
5867 --------- 00 ----- 11 --------- FCVT Hd, Sn (unimp)
sewardj1eaaec22014-03-07 22:52:19 +00005868 --------- 00 ----- 01 --------- FCVT Dd, Sn
sewardjbbcf1882014-01-12 12:49:10 +00005869 --------- 01 ----- 11 --------- FCVT Hd, Dn (unimp)
sewardj1eaaec22014-03-07 22:52:19 +00005870 --------- 01 ----- 00 --------- FCVT Sd, Dn
sewardjbbcf1882014-01-12 12:49:10 +00005871 Rounding, when dst is smaller than src, is per the FPCR.
5872 */
5873 if (INSN(31,24) == BITS8(0,0,0,1,1,1,1,0)
5874 && INSN(21,17) == BITS5(1,0,0,0,1)
5875 && INSN(14,10) == BITS5(1,0,0,0,0)) {
5876 UInt b2322 = INSN(23,22);
5877 UInt b1615 = INSN(16,15);
5878 UInt nn = INSN(9,5);
5879 UInt dd = INSN(4,0);
5880 if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) {
5881 /* Convert S to D */
5882 IRTemp res = newTemp(Ity_F64);
sewardj606c4ba2014-01-26 19:11:14 +00005883 assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32)));
sewardjbbcf1882014-01-12 12:49:10 +00005884 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005885 putQRegLO(dd, mkexpr(res));
5886 DIP("fcvt %s, %s\n",
5887 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32));
sewardjbbcf1882014-01-12 12:49:10 +00005888 return True;
5889 }
5890 if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) {
5891 /* Convert D to S */
5892 IRTemp res = newTemp(Ity_F32);
5893 assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()),
sewardj606c4ba2014-01-26 19:11:14 +00005894 getQRegLO(nn, Ity_F64)));
sewardjbbcf1882014-01-12 12:49:10 +00005895 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005896 putQRegLO(dd, mkexpr(res));
5897 DIP("fcvt %s, %s\n",
5898 nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64));
sewardjbbcf1882014-01-12 12:49:10 +00005899 return True;
5900 }
5901 /* else unhandled */
5902 }
5903
5904 /* ------------------ FABD (scalar) ------------------ */
5905 /* 31 23 20 15 9 4
5906 011 11110 111 m 110101 n d FABD Dd, Dn, Dm
5907 011 11110 101 m 110101 n d FABD Sd, Sn, Sm
5908 */
5909 if (INSN(31,23) == BITS9(0,1,1,1,1,1,1,0,1) && INSN(21,21) == 1
5910 && INSN(15,10) == BITS6(1,1,0,1,0,1)) {
5911 Bool isD = INSN(22,22) == 1;
5912 UInt mm = INSN(20,16);
5913 UInt nn = INSN(9,5);
5914 UInt dd = INSN(4,0);
5915 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005916 IRTemp res = newTemp(ty);
sewardj606c4ba2014-01-26 19:11:14 +00005917 assign(res, unop(mkABSF(ty),
5918 triop(mkSUBF(ty),
5919 mkexpr(mk_get_IR_rounding_mode()),
5920 getQRegLO(nn,ty), getQRegLO(mm,ty))));
sewardjbbcf1882014-01-12 12:49:10 +00005921 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005922 putQRegLO(dd, mkexpr(res));
sewardjbbcf1882014-01-12 12:49:10 +00005923 DIP("fabd %s, %s, %s\n",
sewardj606c4ba2014-01-26 19:11:14 +00005924 nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005925 return True;
5926 }
5927
sewardj606c4ba2014-01-26 19:11:14 +00005928 /* -------------- {S,U}CVTF (vector, integer) -------------- */
5929 /* 31 28 22 21 15 9 4
5930 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
5931 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
5932 with laneage:
5933 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
5934 */
5935 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,0)
5936 && INSN(21,16) == BITS6(1,0,0,0,0,1)
5937 && INSN(15,10) == BITS6(1,1,0,1,1,0)) {
5938 Bool isQ = INSN(30,30) == 1;
5939 Bool isU = INSN(29,29) == 1;
5940 Bool isF64 = INSN(22,22) == 1;
5941 UInt nn = INSN(9,5);
5942 UInt dd = INSN(4,0);
5943 if (isQ || !isF64) {
5944 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
5945 UInt nLanes = 0;
5946 Bool zeroHI = False;
5947 const HChar* arrSpec = NULL;
5948 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
5949 isQ, isF64 );
5950 IROp op = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
5951 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
5952 IRTemp rm = mk_get_IR_rounding_mode();
5953 UInt i;
5954 vassert(ok); /* the 'if' above should ensure this */
5955 for (i = 0; i < nLanes; i++) {
5956 putQRegLane(dd, i,
5957 binop(op, mkexpr(rm), getQRegLane(nn, i, tyI)));
5958 }
5959 if (zeroHI) {
5960 putQRegLane(dd, 1, mkU64(0));
5961 }
5962 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
5963 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
5964 return True;
5965 }
5966 /* else fall through */
5967 }
5968
5969 /* ---------- F{ADD,SUB,MUL,DIV,MLA,MLS} (vector) ---------- */
5970 /* 31 28 22 21 20 15 9 4 case
5971 0q0 01110 0 sz 1 m 110101 n d FADD Vd,Vn,Vm 1
5972 0q0 01110 1 sz 1 m 110101 n d FSUB Vd,Vn,Vm 2
5973 0q1 01110 0 sz 1 m 110111 n d FMUL Vd,Vn,Vm 3
5974 0q1 01110 0 sz 1 m 111111 n d FDIV Vd,Vn,Vm 4
5975 0q0 01110 0 sz 1 m 110011 n d FMLA Vd,Vn,Vm 5
5976 0q0 01110 1 sz 1 m 110011 n d FMLS Vd,Vn,Vm 6
sewardje520bb32014-02-17 11:00:53 +00005977 0q1 01110 1 sz 1 m 110101 n d FABD Vd,Vn,Vm 7
sewardj606c4ba2014-01-26 19:11:14 +00005978 */
5979 if (INSN(31,31) == 0
5980 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) {
5981 Bool isQ = INSN(30,30) == 1;
5982 UInt b29 = INSN(29,29);
5983 UInt b23 = INSN(23,23);
5984 Bool isF64 = INSN(22,22) == 1;
5985 UInt mm = INSN(20,16);
5986 UInt b1510 = INSN(15,10);
5987 UInt nn = INSN(9,5);
5988 UInt dd = INSN(4,0);
5989 UInt ix = 0;
5990 /**/ if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,1,0,1)) ix = 1;
5991 else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 2;
5992 else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,0,1,1,1)) ix = 3;
5993 else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,1,1,1,1)) ix = 4;
5994 else if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,0,1,1)) ix = 5;
5995 else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,0,1,1)) ix = 6;
sewardje520bb32014-02-17 11:00:53 +00005996 else if (b29 == 1 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 7;
sewardj606c4ba2014-01-26 19:11:14 +00005997 IRType laneTy = Ity_INVALID;
5998 Bool zeroHI = False;
5999 const HChar* arr = "??";
6000 Bool ok
6001 = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64);
6002 /* Skip MLA/MLS for the time being */
6003 if (ok && ix >= 1 && ix <= 4) {
6004 const IROp ops64[4]
6005 = { Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2 };
6006 const IROp ops32[4]
6007 = { Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4 };
6008 const HChar* names[4]
6009 = { "fadd", "fsub", "fmul", "fdiv" };
6010 IROp op = laneTy==Ity_F64 ? ops64[ix-1] : ops32[ix-1];
6011 IRTemp rm = mk_get_IR_rounding_mode();
6012 IRTemp t1 = newTemp(Ity_V128);
6013 IRTemp t2 = newTemp(Ity_V128);
6014 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjecde6972014-02-05 11:01:19 +00006015 assign(t2, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t1))
6016 : mkexpr(t1));
sewardj606c4ba2014-01-26 19:11:14 +00006017 putQReg128(dd, mkexpr(t2));
6018 DIP("%s %s.%s, %s.%s, %s.%s\n", names[ix-1],
6019 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6020 return True;
6021 }
sewardjfab09142014-02-10 10:28:13 +00006022 if (ok && ix >= 5 && ix <= 6) {
6023 IROp opADD = laneTy==Ity_F64 ? Iop_Add64Fx2 : Iop_Add32Fx4;
6024 IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
6025 IROp opMUL = laneTy==Ity_F64 ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
6026 IRTemp rm = mk_get_IR_rounding_mode();
6027 IRTemp t1 = newTemp(Ity_V128);
6028 IRTemp t2 = newTemp(Ity_V128);
6029 // FIXME: double rounding; use FMA primops instead
6030 assign(t1, triop(opMUL,
6031 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
6032 assign(t2, triop(ix == 5 ? opADD : opSUB,
6033 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
sewardje520bb32014-02-17 11:00:53 +00006034 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
6035 : mkexpr(t2));
sewardjfab09142014-02-10 10:28:13 +00006036 DIP("%s %s.%s, %s.%s, %s.%s\n", ix == 5 ? "fmla" : "fmls",
6037 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6038 return True;
6039 }
sewardje520bb32014-02-17 11:00:53 +00006040 if (ok && ix == 7) {
6041 IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
6042 IROp opABS = laneTy==Ity_F64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
6043 IRTemp rm = mk_get_IR_rounding_mode();
6044 IRTemp t1 = newTemp(Ity_V128);
6045 IRTemp t2 = newTemp(Ity_V128);
6046 // FIXME: use Abd primop instead?
6047 assign(t1, triop(opSUB,
6048 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
6049 assign(t2, unop(opABS, mkexpr(t1)));
6050 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
6051 : mkexpr(t2));
6052 DIP("fabd %s.%s, %s.%s, %s.%s\n",
6053 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6054 return True;
6055 }
sewardj606c4ba2014-01-26 19:11:14 +00006056 }
6057
sewardj2bd1ffe2014-03-27 18:59:00 +00006058 /* ------------ FCM{EQ,GE,GT}, FAC{GE,GT} (vector) ------------ */
6059 /* 31 28 22 20 15 9 4 case
6060 0q1 01110 0 sz 1 m 111011 n d FACGE Vd, Vn, Vm
6061 0q1 01110 1 sz 1 m 111011 n d FACGT Vd, Vn, Vm
6062 0q0 01110 0 sz 1 m 111001 n d FCMEQ Vd, Vn, Vm
6063 0q1 01110 0 sz 1 m 111001 n d FCMGE Vd, Vn, Vm
6064 0q1 01110 1 sz 1 m 111001 n d FCMGT Vd, Vn, Vm
6065 */
6066 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1
6067 && INSN(15,12) == BITS4(1,1,1,0) && INSN(10,10) == 1) {
6068 Bool isQ = INSN(30,30) == 1;
6069 UInt U = INSN(29,29);
6070 UInt E = INSN(23,23);
6071 Bool isF64 = INSN(22,22) == 1;
6072 UInt ac = INSN(11,11);
6073 UInt mm = INSN(20,16);
6074 UInt nn = INSN(9,5);
6075 UInt dd = INSN(4,0);
6076 /* */
6077 UInt EUac = (E << 2) | (U << 1) | ac;
6078 IROp opABS = Iop_INVALID;
6079 IROp opCMP = Iop_INVALID;
6080 IRType laneTy = Ity_INVALID;
6081 Bool zeroHI = False;
6082 Bool swap = True;
6083 const HChar* arr = "??";
6084 const HChar* nm = "??";
6085 Bool ok
6086 = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64);
6087 if (ok) {
6088 vassert((isF64 && laneTy == Ity_F64) || (!isF64 && laneTy == Ity_F32));
6089 switch (EUac) {
6090 case BITS3(0,0,0):
6091 nm = "fcmeq";
6092 opCMP = isF64 ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
6093 swap = False;
6094 break;
6095 case BITS3(0,1,0):
6096 nm = "fcmge";
6097 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6098 break;
6099 case BITS3(0,1,1):
6100 nm = "facge";
6101 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6102 opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
6103 break;
6104 case BITS3(1,1,0):
6105 nm = "fcmgt";
6106 opCMP = isF64 ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
6107 break;
6108 case BITS3(1,1,1):
6109 nm = "fcagt";
6110 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6111 opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
6112 break;
6113 default:
6114 break;
6115 }
6116 }
6117 if (opCMP != Iop_INVALID) {
6118 IRExpr* argN = getQReg128(nn);
6119 IRExpr* argM = getQReg128(mm);
6120 if (opABS != Iop_INVALID) {
6121 argN = unop(opABS, argN);
6122 argM = unop(opABS, argM);
6123 }
6124 IRExpr* res = swap ? binop(opCMP, argM, argN)
6125 : binop(opCMP, argN, argM);
6126 if (zeroHI) {
6127 res = unop(Iop_ZeroHI64ofV128, res);
6128 }
6129 putQReg128(dd, res);
6130 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6131 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6132 return True;
6133 }
6134 /* else fall through */
6135 }
6136
sewardj32d86752014-03-02 12:47:18 +00006137 /* -------------------- FCVTN -------------------- */
6138 /* 31 28 23 20 15 9 4
6139 0q0 01110 0s1 00001 011010 n d FCVTN Vd, Vn
6140 where case q:s of 00: 16Fx4(lo) <- 32Fx4
6141 01: 32Fx2(lo) <- 64Fx2
6142 10: 16Fx4(hi) <- 32Fx4
6143 11: 32Fx2(hi) <- 64Fx2
6144 Only deals with the 32Fx2 <- 64Fx2 version (s==1)
6145 */
6146 if (INSN(31,31) == 0 && INSN(29,23) == BITS7(0,0,1,1,1,0,0)
6147 && INSN(21,10) == BITS12(1,0,0,0,0,1,0,1,1,0,1,0)) {
6148 UInt bQ = INSN(30,30);
6149 UInt bS = INSN(22,22);
6150 UInt nn = INSN(9,5);
6151 UInt dd = INSN(4,0);
6152 if (bS == 1) {
6153 IRTemp rm = mk_get_IR_rounding_mode();
6154 IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64);
6155 IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64);
6156 putQRegLane(dd, 2 * bQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo));
6157 putQRegLane(dd, 2 * bQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi));
6158 if (bQ == 0) {
6159 putQRegLane(dd, 1, mkU64(0));
6160 }
6161 DIP("fcvtn%s %s.%s, %s.2d\n", bQ ? "2" : "",
6162 nameQReg128(dd), bQ ? "4s" : "2s", nameQReg128(nn));
6163 return True;
6164 }
6165 /* else fall through */
6166 }
6167
sewardj606c4ba2014-01-26 19:11:14 +00006168 /* ---------------- ADD/SUB (vector) ---------------- */
6169 /* 31 28 23 21 20 15 9 4
6170 0q0 01110 size 1 m 100001 n d ADD Vd.T, Vn.T, Vm.T
6171 0q1 01110 size 1 m 100001 n d SUB Vd.T, Vn.T, Vm.T
6172 */
6173 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6174 && INSN(21,21) == 1 && INSN(15,10) == BITS6(1,0,0,0,0,1)) {
6175 Bool isQ = INSN(30,30) == 1;
6176 UInt szBlg2 = INSN(23,22);
6177 Bool isSUB = INSN(29,29) == 1;
6178 UInt mm = INSN(20,16);
6179 UInt nn = INSN(9,5);
6180 UInt dd = INSN(4,0);
6181 Bool zeroHI = False;
6182 const HChar* arrSpec = "";
6183 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6184 if (ok) {
sewardjf5b08912014-02-06 12:57:58 +00006185 const IROp opsADD[4]
sewardj606c4ba2014-01-26 19:11:14 +00006186 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
sewardjf5b08912014-02-06 12:57:58 +00006187 const IROp opsSUB[4]
sewardj606c4ba2014-01-26 19:11:14 +00006188 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
6189 vassert(szBlg2 < 4);
sewardjf5b08912014-02-06 12:57:58 +00006190 IROp op = isSUB ? opsSUB[szBlg2] : opsADD[szBlg2];
6191 IRTemp t = newTemp(Ity_V128);
sewardj606c4ba2014-01-26 19:11:14 +00006192 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardjecde6972014-02-05 11:01:19 +00006193 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t))
6194 : mkexpr(t));
sewardj606c4ba2014-01-26 19:11:14 +00006195 const HChar* nm = isSUB ? "sub" : "add";
6196 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6197 nameQReg128(dd), arrSpec,
6198 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6199 return True;
6200 }
6201 /* else fall through */
6202 }
6203
sewardjecde6972014-02-05 11:01:19 +00006204 /* ---------------- ADD/SUB (scalar) ---------------- */
6205 /* 31 28 23 21 20 15 9 4
6206 010 11110 11 1 m 100001 n d ADD Dd, Dn, Dm
6207 011 11110 11 1 m 100001 n d SUB Dd, Dn, Dm
6208 */
6209 if (INSN(31,30) == BITS2(0,1) && INSN(28,21) == BITS8(1,1,1,1,0,1,1,1)
6210 && INSN(15,10) == BITS6(1,0,0,0,0,1)) {
6211 Bool isSUB = INSN(29,29) == 1;
6212 UInt mm = INSN(20,16);
6213 UInt nn = INSN(9,5);
6214 UInt dd = INSN(4,0);
6215 IRTemp res = newTemp(Ity_I64);
6216 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
6217 getQRegLane(nn, 0, Ity_I64),
6218 getQRegLane(mm, 0, Ity_I64)));
6219 putQRegLane(dd, 0, mkexpr(res));
6220 putQRegLane(dd, 1, mkU64(0));
6221 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
6222 nameQRegLO(dd, Ity_I64),
6223 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
6224 return True;
6225 }
6226
sewardjf5b08912014-02-06 12:57:58 +00006227 /* ------------ MUL/PMUL/MLA/MLS (vector) ------------ */
6228 /* 31 28 23 21 20 15 9 4
6229 0q0 01110 size 1 m 100111 n d MUL Vd.T, Vn.T, Vm.T B/H/S only
6230 0q1 01110 size 1 m 100111 n d PMUL Vd.T, Vn.T, Vm.T B only
6231 0q0 01110 size 1 m 100101 n d MLA Vd.T, Vn.T, Vm.T B/H/S only
6232 0q1 01110 size 1 m 100101 n d MLS Vd.T, Vn.T, Vm.T B/H/S only
6233 */
6234 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6235 && INSN(21,21) == 1
6236 && (INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(1,0,0,1,0,1)) {
6237 Bool isQ = INSN(30,30) == 1;
6238 UInt szBlg2 = INSN(23,22);
6239 UInt bit29 = INSN(29,29);
6240 UInt mm = INSN(20,16);
6241 UInt nn = INSN(9,5);
6242 UInt dd = INSN(4,0);
6243 Bool isMLAS = INSN(11,11) == 0;
6244 const IROp opsADD[4]
6245 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_INVALID };
6246 const IROp opsSUB[4]
6247 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_INVALID };
6248 const IROp opsMUL[4]
6249 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
6250 const IROp opsPMUL[4]
6251 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
6252 /* Set opMUL and, if necessary, opACC. A result value of
6253 Iop_INVALID for opMUL indicates that the instruction is
6254 invalid. */
6255 Bool zeroHI = False;
6256 const HChar* arrSpec = "";
6257 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6258 vassert(szBlg2 < 4);
6259 IROp opACC = Iop_INVALID;
6260 IROp opMUL = Iop_INVALID;
6261 if (ok) {
6262 opMUL = (bit29 == 1 && !isMLAS) ? opsPMUL[szBlg2]
6263 : opsMUL[szBlg2];
6264 opACC = isMLAS ? (bit29 == 1 ? opsSUB[szBlg2] : opsADD[szBlg2])
6265 : Iop_INVALID;
6266 }
6267 if (ok && opMUL != Iop_INVALID) {
6268 IRTemp t1 = newTemp(Ity_V128);
6269 assign(t1, binop(opMUL, getQReg128(nn), getQReg128(mm)));
6270 IRTemp t2 = newTemp(Ity_V128);
6271 assign(t2, opACC == Iop_INVALID
6272 ? mkexpr(t1)
6273 : binop(opACC, getQReg128(dd), mkexpr(t1)));
6274 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
6275 : mkexpr(t2));
6276 const HChar* nm = isMLAS ? (bit29 == 1 ? "mls" : "mla")
6277 : (bit29 == 1 ? "pmul" : "mul");
6278 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6279 nameQReg128(dd), arrSpec,
6280 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6281 return True;
6282 }
6283 /* else fall through */
6284 }
6285
sewardjecde6972014-02-05 11:01:19 +00006286 /* ---------------- {S,U}{MIN,MAX} (vector) ---------------- */
6287 /* 31 28 23 21 20 15 9 4
6288 0q0 01110 size 1 m 011011 n d SMIN Vd.T, Vn.T, Vm.T
6289 0q1 01110 size 1 m 011011 n d UMIN Vd.T, Vn.T, Vm.T
6290 0q0 01110 size 1 m 011001 n d SMAX Vd.T, Vn.T, Vm.T
6291 0q1 01110 size 1 m 011001 n d UMAX Vd.T, Vn.T, Vm.T
6292 */
6293 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6294 && INSN(21,21) == 1
6295 && ((INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(0,1,1,0,0,1))) {
6296 Bool isQ = INSN(30,30) == 1;
6297 Bool isU = INSN(29,29) == 1;
6298 UInt szBlg2 = INSN(23,22);
sewardj5860ec72014-03-01 11:19:45 +00006299 Bool isMAX = INSN(11,11) == 0;
sewardjecde6972014-02-05 11:01:19 +00006300 UInt mm = INSN(20,16);
6301 UInt nn = INSN(9,5);
6302 UInt dd = INSN(4,0);
6303 Bool zeroHI = False;
6304 const HChar* arrSpec = "";
6305 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6306 if (ok) {
6307 const IROp opMINS[4]
6308 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
6309 const IROp opMINU[4]
6310 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
6311 const IROp opMAXS[4]
6312 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
6313 const IROp opMAXU[4]
6314 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
6315 vassert(szBlg2 < 4);
6316 IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2])
6317 : (isU ? opMINU[szBlg2] : opMINS[szBlg2]);
6318 IRTemp t = newTemp(Ity_V128);
6319 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
6320 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t))
6321 : mkexpr(t));
6322 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
6323 : (isU ? "umin" : "smin");
6324 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6325 nameQReg128(dd), arrSpec,
6326 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6327 return True;
6328 }
6329 /* else fall through */
6330 }
6331
6332 /* -------------------- {S,U}{MIN,MAX}V -------------------- */
6333 /* 31 28 23 21 16 15 9 4
6334 0q0 01110 size 11000 1 101010 n d SMINV Vd, Vn.T
6335 0q1 01110 size 11000 1 101010 n d UMINV Vd, Vn.T
6336 0q0 01110 size 11000 0 101010 n d SMAXV Vd, Vn.T
6337 0q1 01110 size 11000 0 101010 n d UMAXV Vd, Vn.T
6338 */
6339 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6340 && INSN(21,17) == BITS5(1,1,0,0,0)
6341 && INSN(15,10) == BITS6(1,0,1,0,1,0)) {
6342 Bool isQ = INSN(30,30) == 1;
6343 Bool isU = INSN(29,29) == 1;
6344 UInt szBlg2 = INSN(23,22);
6345 Bool isMAX = INSN(16,16) == 0;
6346 UInt nn = INSN(9,5);
6347 UInt dd = INSN(4,0);
6348 Bool zeroHI = False;
6349 const HChar* arrSpec = "";
6350 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2);
6351 if (ok) {
6352 if (szBlg2 == 3) ok = False;
6353 if (szBlg2 == 2 && !isQ) ok = False;
6354 }
6355 if (ok) {
6356 const IROp opMINS[3]
6357 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
6358 const IROp opMINU[3]
6359 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
6360 const IROp opMAXS[3]
6361 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
6362 const IROp opMAXU[3]
6363 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
6364 vassert(szBlg2 < 3);
6365 IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2])
6366 : (isU ? opMINU[szBlg2] : opMINS[szBlg2]);
6367 IRTemp tN1 = newTemp(Ity_V128);
6368 assign(tN1, getQReg128(nn));
6369 /* If Q == 0, we're just folding lanes in the lower half of
6370 the value. In which case, copy the lower half of the
6371 source into the upper half, so we can then treat it the
6372 same as the full width case. */
6373 IRTemp tN2 = newTemp(Ity_V128);
sewardj5860ec72014-03-01 11:19:45 +00006374 assign(tN2, zeroHI ? mk_CatEvenLanes64x2(tN1,tN1) : mkexpr(tN1));
sewardjecde6972014-02-05 11:01:19 +00006375 IRTemp res = math_MINMAXV(tN2, op);
6376 if (res == IRTemp_INVALID)
6377 return False; /* means math_MINMAXV
6378 doesn't handle this case yet */
6379 putQReg128(dd, mkexpr(res));
6380 const HChar* nm = isMAX ? (isU ? "umaxv" : "smaxv")
6381 : (isU ? "uminv" : "sminv");
6382 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
6383 IRType laneTy = tys[szBlg2];
6384 DIP("%s %s, %s.%s\n", nm,
6385 nameQRegLO(dd, laneTy), nameQReg128(nn), arrSpec);
6386 return True;
6387 }
6388 /* else fall through */
6389 }
6390
sewardjfab09142014-02-10 10:28:13 +00006391 /* ------------ {AND,BIC,ORR,ORN} (vector) ------------ */
6392 /* 31 28 23 20 15 9 4
6393 0q0 01110 001 m 000111 n d AND Vd.T, Vn.T, Vm.T
6394 0q0 01110 011 m 000111 n d BIC Vd.T, Vn.T, Vm.T
6395 0q0 01110 101 m 000111 n d ORR Vd.T, Vn.T, Vm.T
6396 0q0 01110 111 m 000111 n d ORN Vd.T, Vn.T, Vm.T
6397 T is 16b when q==1, 8b when q==0
6398 */
6399 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
6400 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
6401 Bool isQ = INSN(30,30) == 1;
6402 Bool isORR = INSN(23,23) == 1;
6403 Bool invert = INSN(22,22) == 1;
6404 UInt mm = INSN(20,16);
6405 UInt nn = INSN(9,5);
6406 UInt dd = INSN(4,0);
6407 IRTemp res = newTemp(Ity_V128);
6408 assign(res, binop(isORR ? Iop_OrV128 : Iop_AndV128,
6409 getQReg128(nn),
6410 invert ? unop(Iop_NotV128, getQReg128(mm))
6411 : getQReg128(mm)));
6412 putQReg128(dd, isQ ? mkexpr(res)
6413 : unop(Iop_ZeroHI64ofV128, mkexpr(res)));
6414 const HChar* names[4] = { "and", "bic", "orr", "orn" };
6415 const HChar* ar = isQ ? "16b" : "8b";
6416 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
6417 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
6418 return True;
6419 }
6420
sewardje520bb32014-02-17 11:00:53 +00006421 /* ---------- CM{EQ,HI,HS,GE,GT,TST,LE,LT} (vector) ---------- */
6422 /* 31 28 23 21 15 9 4 ix
6423 0q1 01110 size 1 m 100011 n d CMEQ Vd.T, Vn.T, Vm.T (1) ==
sewardj93013432014-04-27 12:02:12 +00006424 0q0 01110 size 1 m 100011 n d CMTST Vd.T, Vn.T, Vm.T (2) &, != 0
sewardje520bb32014-02-17 11:00:53 +00006425
6426 0q1 01110 size 1 m 001101 n d CMHI Vd.T, Vn.T, Vm.T (3) >u
6427 0q0 01110 size 1 m 001101 n d CMGT Vd.T, Vn.T, Vm.T (4) >s
6428
6429 0q1 01110 size 1 m 001111 n d CMHS Vd.T, Vn.T, Vm.T (5) >=u
6430 0q0 01110 size 1 m 001111 n d CMGE Vd.T, Vn.T, Vm.T (6) >=s
6431
6432 0q1 01110 size 100000 100010 n d CMGE Vd.T, Vn.T, #0 (7) >=s 0
6433 0q0 01110 size 100000 100010 n d CMGT Vd.T, Vn.T, #0 (8) >s 0
6434
6435 0q1 01110 size 100000 100110 n d CMLE Vd.T, Vn.T, #0 (9) <=s 0
6436 0q0 01110 size 100000 100110 n d CMEQ Vd.T, Vn.T, #0 (10) == 0
6437
6438 0q0 01110 size 100000 101010 n d CMLT Vd.T, Vn.T, #0 (11) <s 0
6439 */
6440 if (INSN(31,31) == 0
6441 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) {
6442 Bool isQ = INSN(30,30) == 1;
6443 UInt bit29 = INSN(29,29);
6444 UInt szBlg2 = INSN(23,22);
6445 UInt mm = INSN(20,16);
6446 UInt b1510 = INSN(15,10);
6447 UInt nn = INSN(9,5);
6448 UInt dd = INSN(4,0);
6449 const IROp opsEQ[4]
6450 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
6451 const IROp opsGTS[4]
6452 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
6453 const IROp opsGTU[4]
6454 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
6455 Bool zeroHI = False;
6456 const HChar* arrSpec = "??";
6457 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2);
6458 UInt ix = 0;
6459 if (ok) {
6460 switch (b1510) {
6461 case BITS6(1,0,0,0,1,1): ix = bit29 ? 1 : 2; break;
6462 case BITS6(0,0,1,1,0,1): ix = bit29 ? 3 : 4; break;
6463 case BITS6(0,0,1,1,1,1): ix = bit29 ? 5 : 6; break;
6464 case BITS6(1,0,0,0,1,0):
6465 if (mm == 0) { ix = bit29 ? 7 : 8; }; break;
6466 case BITS6(1,0,0,1,1,0):
6467 if (mm == 0) { ix = bit29 ? 9 : 10; }; break;
6468 case BITS6(1,0,1,0,1,0):
6469 if (mm == 0 && bit29 == 0) { ix = 11; }; break;
6470 default: break;
6471 }
6472 }
6473 if (ix != 0) {
6474 vassert(ok && szBlg2 < 4);
6475 IRExpr* argL = getQReg128(nn);
6476 IRExpr* argR = (ix <= 6) ? getQReg128(mm) : mkV128(0x0000);
6477 IRExpr* res = NULL;
6478 /* Some useful identities:
6479 x > y can be expressed directly
6480 x < y == y > x
6481 x <= y == not (x > y)
6482 x >= y == not (y > x)
6483 */
6484 switch (ix) {
6485 case 1: res = binop(opsEQ[szBlg2], argL, argR); break;
sewardj93013432014-04-27 12:02:12 +00006486 case 2: res = unop(Iop_NotV128, binop(opsEQ[szBlg2],
6487 binop(Iop_AndV128, argL, argR),
6488 mkV128(0x0000)));
sewardje520bb32014-02-17 11:00:53 +00006489 break;
6490 case 3: res = binop(opsGTU[szBlg2], argL, argR); break;
6491 case 4: res = binop(opsGTS[szBlg2], argL, argR); break;
6492 case 5: res = unop(Iop_NotV128, binop(opsGTU[szBlg2], argR, argL));
6493 break;
6494 case 6: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL));
6495 break;
6496 case 7: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL));
6497 break;
6498 case 8: res = binop(opsGTS[szBlg2], argL, argR); break;
6499 case 9: res = unop(Iop_NotV128,
6500 binop(opsGTS[szBlg2], argL, argR));
6501 break;
6502 case 10: res = binop(opsEQ[szBlg2], argL, argR); break;
6503 case 11: res = binop(opsGTS[szBlg2], argR, argL); break;
6504 default: vassert(0);
6505 }
6506 vassert(res);
6507 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, res) : res);
6508 const HChar* nms[11] = { "eq", "tst", "hi", "gt", "hs", "ge",
6509 "ge", "gt", "le", "eq", "lt" };
6510 if (ix <= 6) {
6511 DIP("cm%s %s.%s, %s.%s, %s.%s\n", nms[ix-1],
6512 nameQReg128(dd), arrSpec,
6513 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6514 } else {
6515 DIP("cm%s %s.%s, %s.%s, #0\n", nms[ix-1],
6516 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
6517 }
6518 return True;
6519 }
6520 /* else fall through */
6521 }
6522
6523 /* -------------- {EOR,BSL,BIT,BIF} (vector) -------------- */
6524 /* 31 28 23 20 15 9 4
6525 0q1 01110 00 1 m 000111 n d EOR Vd.T, Vm.T, Vn.T
6526 0q1 01110 01 1 m 000111 n d BSL Vd.T, Vm.T, Vn.T
6527 0q1 01110 10 1 m 000111 n d BIT Vd.T, Vm.T, Vn.T
6528 0q1 01110 11 1 m 000111 n d BIF Vd.T, Vm.T, Vn.T
6529 */
6530 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0)
6531 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
6532 Bool isQ = INSN(30,30) == 1;
6533 UInt op = INSN(23,22);
6534 UInt mm = INSN(20,16);
6535 UInt nn = INSN(9,5);
6536 UInt dd = INSN(4,0);
6537 IRTemp argD = newTemp(Ity_V128);
6538 IRTemp argN = newTemp(Ity_V128);
6539 IRTemp argM = newTemp(Ity_V128);
6540 assign(argD, getQReg128(dd));
6541 assign(argN, getQReg128(nn));
6542 assign(argM, getQReg128(mm));
6543 const IROp opXOR = Iop_XorV128;
6544 const IROp opAND = Iop_AndV128;
6545 const IROp opNOT = Iop_NotV128;
6546 IRExpr* res = NULL;
6547 switch (op) {
sewardj5860ec72014-03-01 11:19:45 +00006548 case BITS2(0,0): /* EOR */
sewardje520bb32014-02-17 11:00:53 +00006549 res = binop(opXOR, mkexpr(argM), mkexpr(argN));
6550 break;
sewardj5860ec72014-03-01 11:19:45 +00006551 case BITS2(0,1): /* BSL */
sewardje520bb32014-02-17 11:00:53 +00006552 res = binop(opXOR, mkexpr(argM),
6553 binop(opAND,
6554 binop(opXOR, mkexpr(argM), mkexpr(argN)),
6555 mkexpr(argD)));
6556 break;
sewardj5860ec72014-03-01 11:19:45 +00006557 case BITS2(1,0): /* BIT */
sewardje520bb32014-02-17 11:00:53 +00006558 res = binop(opXOR, mkexpr(argD),
6559 binop(opAND,
6560 binop(opXOR, mkexpr(argD), mkexpr(argN)),
6561 mkexpr(argM)));
6562 break;
sewardj5860ec72014-03-01 11:19:45 +00006563 case BITS2(1,1): /* BIF */
sewardje520bb32014-02-17 11:00:53 +00006564 res = binop(opXOR, mkexpr(argD),
6565 binop(opAND,
6566 binop(opXOR, mkexpr(argD), mkexpr(argN)),
6567 unop(opNOT, mkexpr(argM))));
6568 break;
6569 default:
6570 vassert(0);
6571 }
6572 vassert(res);
6573 putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res));
6574 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
6575 const HChar* arr = isQ ? "16b" : "8b";
6576 vassert(op < 4);
6577 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[op],
6578 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6579 return True;
6580 }
6581
sewardj32d86752014-03-02 12:47:18 +00006582 /* ------------ {USHR,SSHR,SHL} (vector, immediate) ------------ */
sewardje520bb32014-02-17 11:00:53 +00006583 /* 31 28 22 18 15 9 4
sewardj32d86752014-03-02 12:47:18 +00006584 0q1 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #shift (1)
6585 0q0 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #shift (2)
6586 0q0 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #shift (3)
sewardje520bb32014-02-17 11:00:53 +00006587 laneTy, shift = case immh:immb of
sewardj32d86752014-03-02 12:47:18 +00006588 0001:xxx -> B, SHR:8-xxx, SHL:xxx
6589 001x:xxx -> H, SHR:16-xxxx SHL:xxxx
6590 01xx:xxx -> S, SHR:32-xxxxx SHL:xxxxx
6591 1xxx:xxx -> D, SHR:64-xxxxxx SHL:xxxxxx
sewardje520bb32014-02-17 11:00:53 +00006592 other -> invalid
6593 As usual the case laneTy==D && q==0 is not allowed.
6594 */
6595 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0)
sewardj32d86752014-03-02 12:47:18 +00006596 && INSN(10,10) == 1) {
6597 UInt ix = 0;
6598 /**/ if (INSN(29,29) == 1 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 1;
6599 else if (INSN(29,29) == 0 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 2;
6600 else if (INSN(29,29) == 0 && INSN(15,11) == BITS5(0,1,0,1,0)) ix = 3;
6601 if (ix > 0) {
6602 Bool isQ = INSN(30,30) == 1;
6603 UInt immh = INSN(22,19);
6604 UInt immb = INSN(18,16);
6605 UInt nn = INSN(9,5);
6606 UInt dd = INSN(4,0);
6607 const IROp opsSHRN[4]
6608 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
6609 const IROp opsSARN[4]
6610 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
6611 const IROp opsSHLN[4]
6612 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
6613 UInt szBlg2 = 0;
6614 UInt shift = 0;
6615 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &szBlg2, immh, immb);
6616 if (ix == 3) {
6617 /* The shift encoding has opposite sign for the leftwards
6618 case. Adjust shift to compensate. */
6619 shift = (8 << szBlg2) - shift;
6620 }
6621 if (ok && szBlg2 < 4 && shift > 0 && shift < (8 << szBlg2)
6622 && !(szBlg2 == 3/*64bit*/ && !isQ)) {
6623 IROp op = Iop_INVALID;
6624 const HChar* nm = NULL;
6625 switch (ix) {
6626 case 1: op = opsSHRN[szBlg2]; nm = "ushr"; break;
6627 case 2: op = opsSARN[szBlg2]; nm = "sshr"; break;
6628 case 3: op = opsSHLN[szBlg2]; nm = "shl"; break;
6629 default: vassert(0);
6630 }
6631 IRExpr* src = getQReg128(nn);
6632 IRExpr* res = binop(op, src, mkU8(shift));
6633 putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res));
6634 HChar laneCh = "bhsd"[szBlg2];
6635 UInt nLanes = (isQ ? 128 : 64) / (8 << szBlg2);
6636 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
6637 nameQReg128(dd), nLanes, laneCh,
6638 nameQReg128(nn), nLanes, laneCh, shift);
6639 return True;
6640 }
6641 /* else fall through */
sewardje520bb32014-02-17 11:00:53 +00006642 }
sewardje520bb32014-02-17 11:00:53 +00006643 }
6644
6645 /* -------------------- {U,S}SHLL{,2} -------------------- */
6646 /* 31 28 22 18 15 9 4
6647 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
6648 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
6649 where Ta,Tb,sh
sewardj5860ec72014-03-01 11:19:45 +00006650 = case immh of 1xxx -> invalid
sewardje520bb32014-02-17 11:00:53 +00006651 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
sewardj5860ec72014-03-01 11:19:45 +00006652 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
6653 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
6654 0000 -> AdvSIMD modified immediate (???)
sewardje520bb32014-02-17 11:00:53 +00006655 */
6656 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0)
6657 && INSN(15,10) == BITS6(1,0,1,0,0,1)) {
6658 Bool isQ = INSN(30,30) == 1;
6659 Bool isU = INSN(29,29) == 1;
6660 UInt immh = INSN(22,19);
6661 UInt immb = INSN(18,16);
6662 UInt nn = INSN(9,5);
6663 UInt dd = INSN(4,0);
6664 UInt immhb = (immh << 3) | immb;
6665 IRTemp src = newTemp(Ity_V128);
6666 IRTemp zero = newTemp(Ity_V128);
6667 IRExpr* res = NULL;
6668 UInt sh = 0;
6669 const HChar* ta = "??";
6670 const HChar* tb = "??";
6671 assign(src, getQReg128(nn));
6672 assign(zero, mkV128(0x0000));
sewardj5860ec72014-03-01 11:19:45 +00006673 if (immh & 8) {
6674 /* invalid; don't assign to res */
6675 }
6676 else if (immh & 4) {
6677 sh = immhb - 32;
6678 vassert(sh < 32); /* so 32-sh is 1..32 */
6679 ta = "2d";
6680 tb = isQ ? "4s" : "2s";
6681 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
6682 : mk_InterleaveLO32x4(src, zero);
6683 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
sewardje520bb32014-02-17 11:00:53 +00006684 }
6685 else if (immh & 2) {
6686 sh = immhb - 16;
6687 vassert(sh < 16); /* so 16-sh is 1..16 */
6688 ta = "4s";
6689 tb = isQ ? "8h" : "4h";
6690 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
6691 : mk_InterleaveLO16x8(src, zero);
6692 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
6693 }
sewardj5860ec72014-03-01 11:19:45 +00006694 else if (immh & 1) {
6695 sh = immhb - 8;
6696 vassert(sh < 8); /* so 8-sh is 1..8 */
6697 ta = "8h";
6698 tb = isQ ? "16b" : "8b";
6699 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
6700 : mk_InterleaveLO8x16(src, zero);
6701 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
6702 } else {
6703 vassert(immh == 0);
6704 /* invalid; don't assign to res */
sewardje520bb32014-02-17 11:00:53 +00006705 }
6706 /* */
6707 if (res) {
6708 putQReg128(dd, res);
6709 DIP("%cshll%s %s.%s, %s.%s, #%d\n",
6710 isU ? 'u' : 's', isQ ? "2" : "",
6711 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
6712 return True;
6713 }
6714 /* else fall through */
6715 }
6716
sewardj606c4ba2014-01-26 19:11:14 +00006717 /* -------------------- XTN{,2} -------------------- */
sewardjecde6972014-02-05 11:01:19 +00006718 /* 31 28 23 21 15 9 4 XTN{,2} Vd.Tb, Vn.Ta
sewardj606c4ba2014-01-26 19:11:14 +00006719 0q0 01110 size 100001 001010 n d
6720 */
6721 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
6722 && INSN(21,16) == BITS6(1,0,0,0,0,1)
6723 && INSN(15,10) == BITS6(0,0,1,0,1,0)) {
6724 Bool isQ = INSN(30,30) == 1;
6725 UInt size = INSN(23,22);
6726 UInt nn = INSN(9,5);
6727 UInt dd = INSN(4,0);
6728 IROp op = Iop_INVALID;
6729 const HChar* tb = NULL;
6730 const HChar* ta = NULL;
6731 switch ((size << 1) | (isQ ? 1 : 0)) {
6732 case 0: tb = "8b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break;
6733 case 1: tb = "16b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break;
6734 case 2: tb = "4h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break;
6735 case 3: tb = "8h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break;
6736 case 4: tb = "2s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break;
6737 case 5: tb = "4s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break;
6738 case 6: break;
6739 case 7: break;
6740 default: vassert(0);
6741 }
6742 if (op != Iop_INVALID) {
6743 if (!isQ) {
6744 putQRegLane(dd, 1, mkU64(0));
6745 }
6746 putQRegLane(dd, isQ ? 1 : 0, unop(op, getQReg128(nn)));
6747 DIP("xtn%s %s.%s, %s.%s\n", isQ ? "2" : "",
6748 nameQReg128(dd), tb, nameQReg128(nn), ta);
6749 return True;
6750 }
6751 /* else fall through */
6752 }
6753
6754 /* ---------------- DUP (element, vector) ---------------- */
6755 /* 31 28 20 15 9 4
6756 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
6757 */
6758 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
6759 && INSN(15,10) == BITS6(0,0,0,0,0,1)) {
6760 Bool isQ = INSN(30,30) == 1;
6761 UInt imm5 = INSN(20,16);
6762 UInt nn = INSN(9,5);
6763 UInt dd = INSN(4,0);
6764 IRTemp w0 = newTemp(Ity_I64);
6765 const HChar* arT = "??";
6766 const HChar* arTs = "??";
6767 IRType laneTy = Ity_INVALID;
6768 UInt laneNo = 16; /* invalid */
6769 if (imm5 & 1) {
6770 arT = isQ ? "16b" : "8b";
6771 arTs = "b";
6772 laneNo = (imm5 >> 1) & 15;
6773 laneTy = Ity_I8;
6774 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
6775 }
6776 else if (imm5 & 2) {
6777 arT = isQ ? "8h" : "4h";
6778 arTs = "h";
6779 laneNo = (imm5 >> 2) & 7;
6780 laneTy = Ity_I16;
6781 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
6782 }
6783 else if (imm5 & 4) {
6784 arT = isQ ? "4s" : "2s";
6785 arTs = "s";
6786 laneNo = (imm5 >> 3) & 3;
6787 laneTy = Ity_I32;
6788 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
6789 }
6790 else if ((imm5 & 8) && isQ) {
6791 arT = "2d";
6792 arTs = "d";
6793 laneNo = (imm5 >> 4) & 1;
6794 laneTy = Ity_I64;
6795 assign(w0, getQRegLane(nn, laneNo, laneTy));
6796 }
6797 else {
6798 /* invalid; leave laneTy unchanged. */
6799 }
6800 /* */
6801 if (laneTy != Ity_INVALID) {
6802 vassert(laneNo < 16);
6803 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
6804 putQReg128(dd, binop(Iop_64HLtoV128,
6805 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
6806 DIP("dup %s.%s, %s.%s[%u]\n",
6807 nameQReg128(dd), arT, nameQReg128(nn), arTs, laneNo);
6808 return True;
6809 }
6810 /* else fall through */
6811 }
6812
sewardjecde6972014-02-05 11:01:19 +00006813 /* ---------------- DUP (general, vector) ---------------- */
6814 /* 31 28 23 20 15 9 4
6815 0q0 01110 000 imm5 000011 n d DUP Vd.T, Rn
6816 Q=0 writes 64, Q=1 writes 128
6817 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
6818 xxx10 4H(q=0) or 8H(q=1), R=W
6819 xx100 2S(q=0) or 4S(q=1), R=W
6820 x1000 Invalid(q=0) or 2D(q=1), R=X
6821 x0000 Invalid(q=0) or Invalid(q=1)
6822 */
6823 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
6824 && INSN(15,10) == BITS6(0,0,0,0,1,1)) {
6825 Bool isQ = INSN(30,30) == 1;
6826 UInt imm5 = INSN(20,16);
6827 UInt nn = INSN(9,5);
6828 UInt dd = INSN(4,0);
6829 IRTemp w0 = newTemp(Ity_I64);
6830 const HChar* arT = "??";
6831 IRType laneTy = Ity_INVALID;
6832 if (imm5 & 1) {
6833 arT = isQ ? "16b" : "8b";
6834 laneTy = Ity_I8;
6835 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
6836 }
6837 else if (imm5 & 2) {
6838 arT = isQ ? "8h" : "4h";
6839 laneTy = Ity_I16;
6840 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
6841 }
6842 else if (imm5 & 4) {
6843 arT = isQ ? "4s" : "2s";
6844 laneTy = Ity_I32;
6845 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
6846 }
6847 else if ((imm5 & 8) && isQ) {
6848 arT = "2d";
6849 laneTy = Ity_I64;
6850 assign(w0, getIReg64orZR(nn));
6851 }
6852 else {
6853 /* invalid; leave laneTy unchanged. */
6854 }
6855 /* */
6856 if (laneTy != Ity_INVALID) {
6857 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
6858 putQReg128(dd, binop(Iop_64HLtoV128,
6859 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
6860 DIP("dup %s.%s, %s\n",
6861 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
6862 return True;
6863 }
6864 /* else fall through */
6865 }
6866
sewardjf5b08912014-02-06 12:57:58 +00006867 /* ---------------------- {S,U}MOV ---------------------- */
6868 /* 31 28 20 15 9 4
6869 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
6870 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
6871 dest is Xd when q==1, Wd when q==0
6872 UMOV:
6873 Ts,index,ops = case q:imm5 of
6874 0:xxxx1 -> B, xxxx, 8Uto64
6875 1:xxxx1 -> invalid
6876 0:xxx10 -> H, xxx, 16Uto64
6877 1:xxx10 -> invalid
6878 0:xx100 -> S, xx, 32Uto64
6879 1:xx100 -> invalid
6880 1:x1000 -> D, x, copy64
6881 other -> invalid
6882 SMOV:
6883 Ts,index,ops = case q:imm5 of
6884 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
6885 1:xxxx1 -> B, xxxx, 8Sto64
6886 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
6887 1:xxx10 -> H, xxx, 16Sto64
6888 0:xx100 -> invalid
6889 1:xx100 -> S, xx, 32Sto64
6890 1:x1000 -> invalid
6891 other -> invalid
6892 */
6893 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
6894 && (INSN(15,10) & BITS6(1,1,1,0,1,1)) == BITS6(0,0,1,0,1,1)) {
6895 UInt bitQ = INSN(30,30) == 1;
6896 UInt imm5 = INSN(20,16);
6897 UInt nn = INSN(9,5);
6898 UInt dd = INSN(4,0);
6899 Bool isU = INSN(12,12) == 1;
6900 const HChar* arTs = "??";
6901 UInt laneNo = 16; /* invalid */
6902 // Setting 'res' to non-NULL determines valid/invalid
6903 IRExpr* res = NULL;
6904 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
6905 laneNo = (imm5 >> 1) & 15;
6906 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
6907 res = isU ? unop(Iop_8Uto64, lane)
6908 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
6909 arTs = "b";
6910 }
6911 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
6912 laneNo = (imm5 >> 1) & 15;
6913 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
6914 res = isU ? NULL
6915 : unop(Iop_8Sto64, lane);
6916 arTs = "b";
6917 }
6918 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
6919 laneNo = (imm5 >> 2) & 7;
6920 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
6921 res = isU ? unop(Iop_16Uto64, lane)
6922 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
6923 arTs = "h";
6924 }
6925 else if (bitQ && (imm5 & 2)) { // 1:xxx10
6926 laneNo = (imm5 >> 2) & 7;
6927 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
6928 res = isU ? NULL
6929 : unop(Iop_16Sto64, lane);
6930 arTs = "h";
6931 }
6932 else if (!bitQ && (imm5 & 4)) { // 0:xx100
6933 laneNo = (imm5 >> 3) & 3;
6934 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
6935 res = isU ? unop(Iop_32Uto64, lane)
6936 : NULL;
6937 arTs = "s";
6938 }
6939 else if (bitQ && (imm5 & 4)) { // 1:xxx10
6940 laneNo = (imm5 >> 3) & 3;
6941 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
6942 res = isU ? NULL
6943 : unop(Iop_32Sto64, lane);
6944 arTs = "s";
6945 }
6946 else if (bitQ && (imm5 & 8)) { // 1:x1000
6947 laneNo = (imm5 >> 4) & 1;
6948 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
6949 res = isU ? lane
6950 : NULL;
6951 arTs = "d";
6952 }
6953 /* */
6954 if (res) {
6955 vassert(laneNo < 16);
6956 putIReg64orZR(dd, res);
6957 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
6958 nameIRegOrZR(bitQ == 1, dd),
6959 nameQReg128(nn), arTs, laneNo);
6960 return True;
6961 }
6962 /* else fall through */
6963 }
6964
sewardje520bb32014-02-17 11:00:53 +00006965 /* -------------------- INS (general) -------------------- */
6966 /* 31 28 20 15 9 4
6967 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
6968 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
6969 xxx10 -> H, xxx
6970 xx100 -> S, xx
6971 x1000 -> D, x
6972 */
6973 if (INSN(31,21) == BITS11(0,1,0,0,1,1,1,0,0,0,0)
6974 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
6975 UInt imm5 = INSN(20,16);
6976 UInt nn = INSN(9,5);
6977 UInt dd = INSN(4,0);
6978 HChar ts = '?';
6979 UInt laneNo = 16;
6980 IRExpr* src = NULL;
6981 if (imm5 & 1) {
6982 src = unop(Iop_64to8, getIReg64orZR(nn));
6983 laneNo = (imm5 >> 1) & 15;
6984 ts = 'b';
6985 }
6986 else if (imm5 & 2) {
6987 src = unop(Iop_64to16, getIReg64orZR(nn));
6988 laneNo = (imm5 >> 2) & 7;
6989 ts = 'h';
6990 }
6991 else if (imm5 & 4) {
6992 src = unop(Iop_64to32, getIReg64orZR(nn));
6993 laneNo = (imm5 >> 3) & 3;
6994 ts = 's';
6995 }
6996 else if (imm5 & 8) {
6997 src = getIReg64orZR(nn);
6998 laneNo = (imm5 >> 4) & 1;
6999 ts = 'd';
7000 }
7001 /* */
7002 if (src) {
7003 vassert(laneNo < 16);
7004 putQRegLane(dd, laneNo, src);
7005 DIP("ins %s.%c[%u], %s\n",
7006 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
7007 return True;
7008 }
7009 /* else invalid; fall through */
7010 }
7011
sewardj32d86752014-03-02 12:47:18 +00007012 /* -------------------- NEG (vector) -------------------- */
7013 /* 31 28 23 21 16 9 4
7014 0q1 01110 sz 10000 0101110 n d NEG Vd, Vn
7015 sz is laneSz, q:sz == 011 is disallowed, as usual
7016 */
7017 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0)
7018 && INSN(21,10) == BITS12(1,0,0,0,0,0,1,0,1,1,1,0)) {
7019 Bool isQ = INSN(30,30) == 1;
7020 UInt szBlg2 = INSN(23,22);
7021 UInt nn = INSN(9,5);
7022 UInt dd = INSN(4,0);
7023 Bool zeroHI = False;
7024 const HChar* arrSpec = "";
7025 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
7026 if (ok) {
7027 const IROp opSUB[4]
7028 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
7029 IRTemp res = newTemp(Ity_V128);
7030 vassert(szBlg2 < 4);
7031 assign(res, binop(opSUB[szBlg2], mkV128(0x0000), getQReg128(nn)));
7032 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
7033 : mkexpr(res));
7034 DIP("neg %s.%s, %s.%s\n",
7035 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
7036 return True;
7037 }
7038 /* else fall through */
7039 }
7040
sewardj92d0ae32014-04-03 13:48:54 +00007041 /* -------------------- TBL, TBX -------------------- */
7042 /* 31 28 20 15 14 12 9 4
7043 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
7044 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
7045 where Ta = 16b(q=1) or 8b(q=0)
7046 */
7047 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
7048 && INSN(15,15) == 0 && INSN(11,10) == BITS2(0,0)) {
7049 Bool isQ = INSN(30,30) == 1;
7050 Bool isTBX = INSN(12,12) == 1;
7051 UInt mm = INSN(20,16);
7052 UInt len = INSN(14,13);
7053 UInt nn = INSN(9,5);
7054 UInt dd = INSN(4,0);
7055 /* The out-of-range values to use. */
7056 IRTemp oor_values = newTemp(Ity_V128);
7057 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
7058 /* src value */
7059 IRTemp src = newTemp(Ity_V128);
7060 assign(src, getQReg128(mm));
7061 /* The table values */
7062 IRTemp tab[4];
7063 UInt i;
7064 for (i = 0; i <= len; i++) {
7065 vassert(i < 4);
7066 tab[i] = newTemp(Ity_V128);
7067 assign(tab[i], getQReg128((nn + i) % 32));
7068 }
7069 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
7070 putQReg128(dd, isQ ? mkexpr(res)
7071 : unop(Iop_ZeroHI64ofV128, mkexpr(res)) );
7072 const HChar* Ta = isQ ? "16b" : "8b";
7073 const HChar* nm = isTBX ? "tbx" : "tbl";
7074 DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
7075 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
7076 return True;
7077 }
sewardjbbcf1882014-01-12 12:49:10 +00007078 /* FIXME Temporary hacks to get through ld.so FIXME */
7079
7080 /* ------------------ movi vD.4s, #0x0 ------------------ */
7081 /* 0x4F 0x00 0x04 000 vD */
7082 if ((insn & 0xFFFFFFE0) == 0x4F000400) {
7083 UInt vD = INSN(4,0);
7084 putQReg128(vD, mkV128(0x0000));
7085 DIP("movi v%u.4s, #0x0\n", vD);
7086 return True;
7087 }
7088
sewardjbbcf1882014-01-12 12:49:10 +00007089 /* ---------------- MOV vD.16b, vN.16b ---------------- */
7090 /* 31 23 20 15 9 4
7091 010 01110 101 m 000111 n d ORR vD.16b, vN.16b, vM.16b
7092 This only handles the N == M case.
7093 */
7094 if (INSN(31,24) == BITS8(0,1,0,0,1,1,1,0)
7095 && INSN(23,21) == BITS3(1,0,1) && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
7096 UInt mm = INSN(20,16);
7097 UInt nn = INSN(9,5);
7098 UInt dd = INSN(4,0);
7099 if (mm == nn) {
7100 putQReg128(dd, getQReg128(nn));
7101 DIP("mov v%u.16b, v%u.16b\n", dd, nn);
7102 return True;
7103 }
7104 /* else it's really an ORR; fall through. */
7105 }
7106
sewardjb3553472014-05-15 16:49:21 +00007107 /* ---------------- CMEQ_d_d_#0 ---------------- */
7108 /*
7109 010 11110 11 10000 0100 110 n d
7110 */
7111 if ((INSN(31,0) & 0xFFFFFC00) == 0x5EE09800) {
7112 UInt nn = INSN(9,5);
7113 UInt dd = INSN(4,0);
7114 putQReg128(dd, unop(Iop_ZeroHI64ofV128,
7115 binop(Iop_CmpEQ64x2, getQReg128(nn),
7116 mkV128(0x0000))));
7117 DIP("cmeq d%u, d%u, #0\n", dd, nn);
7118 return True;
7119 }
7120
sewardjbbcf1882014-01-12 12:49:10 +00007121 vex_printf("ARM64 front end: simd_and_fp\n");
7122 return False;
7123# undef INSN
7124}
7125
7126
7127/*------------------------------------------------------------*/
7128/*--- Disassemble a single ARM64 instruction ---*/
7129/*------------------------------------------------------------*/
7130
7131/* Disassemble a single ARM64 instruction into IR. The instruction
7132 has is located at |guest_instr| and has guest IP of
7133 |guest_PC_curr_instr|, which will have been set before the call
7134 here. Returns True iff the instruction was decoded, in which case
7135 *dres will be set accordingly, or False, in which case *dres should
7136 be ignored by the caller. */
7137
7138static
7139Bool disInstr_ARM64_WRK (
7140 /*MB_OUT*/DisResult* dres,
7141 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
7142 Bool resteerCisOk,
7143 void* callback_opaque,
7144 UChar* guest_instr,
7145 VexArchInfo* archinfo,
7146 VexAbiInfo* abiinfo
7147 )
7148{
7149 // A macro to fish bits out of 'insn'.
7150# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7151
7152//ZZ DisResult dres;
7153//ZZ UInt insn;
7154//ZZ //Bool allow_VFP = False;
7155//ZZ //UInt hwcaps = archinfo->hwcaps;
7156//ZZ IRTemp condT; /* :: Ity_I32 */
7157//ZZ UInt summary;
7158//ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
7159//ZZ
7160//ZZ /* What insn variants are we supporting today? */
7161//ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
7162//ZZ // etc etc
7163
7164 /* Set result defaults. */
7165 dres->whatNext = Dis_Continue;
7166 dres->len = 4;
7167 dres->continueAt = 0;
7168 dres->jk_StopHere = Ijk_INVALID;
7169
7170 /* At least this is simple on ARM64: insns are all 4 bytes long, and
7171 4-aligned. So just fish the whole thing out of memory right now
7172 and have done. */
7173 UInt insn = getUIntLittleEndianly( guest_instr );
7174
7175 if (0) vex_printf("insn: 0x%x\n", insn);
7176
7177 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
7178
7179 vassert(0 == (guest_PC_curr_instr & 3ULL));
7180
7181 /* ----------------------------------------------------------- */
7182
7183 /* Spot "Special" instructions (see comment at top of file). */
7184 {
7185 UChar* code = (UChar*)guest_instr;
7186 /* Spot the 16-byte preamble:
7187 93CC0D8C ror x12, x12, #3
7188 93CC358C ror x12, x12, #13
7189 93CCCD8C ror x12, x12, #51
7190 93CCF58C ror x12, x12, #61
7191 */
7192 UInt word1 = 0x93CC0D8C;
7193 UInt word2 = 0x93CC358C;
7194 UInt word3 = 0x93CCCD8C;
7195 UInt word4 = 0x93CCF58C;
7196 if (getUIntLittleEndianly(code+ 0) == word1 &&
7197 getUIntLittleEndianly(code+ 4) == word2 &&
7198 getUIntLittleEndianly(code+ 8) == word3 &&
7199 getUIntLittleEndianly(code+12) == word4) {
7200 /* Got a "Special" instruction preamble. Which one is it? */
7201 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
7202 /* orr x10,x10,x10 */) {
7203 /* X3 = client_request ( X4 ) */
7204 DIP("x3 = client_request ( x4 )\n");
7205 putPC(mkU64( guest_PC_curr_instr + 20 ));
7206 dres->jk_StopHere = Ijk_ClientReq;
7207 dres->whatNext = Dis_StopHere;
7208 return True;
7209 }
7210 else
7211 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
7212 /* orr x11,x11,x11 */) {
7213 /* X3 = guest_NRADDR */
7214 DIP("x3 = guest_NRADDR\n");
7215 dres->len = 20;
7216 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
7217 return True;
7218 }
7219 else
7220 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
7221 /* orr x12,x12,x12 */) {
7222 /* branch-and-link-to-noredir X8 */
7223 DIP("branch-and-link-to-noredir x8\n");
7224 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
7225 putPC(getIReg64orZR(8));
7226 dres->jk_StopHere = Ijk_NoRedir;
7227 dres->whatNext = Dis_StopHere;
7228 return True;
7229 }
7230 else
7231 if (getUIntLittleEndianly(code+16) == 0xAA090129
7232 /* orr x9,x9,x9 */) {
7233 /* IR injection */
7234 DIP("IR injection\n");
7235 vex_inject_ir(irsb, Iend_LE);
7236 // Invalidate the current insn. The reason is that the IRop we're
7237 // injecting here can change. In which case the translation has to
7238 // be redone. For ease of handling, we simply invalidate all the
7239 // time.
sewardj05f5e012014-05-04 10:52:11 +00007240 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
7241 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
sewardjbbcf1882014-01-12 12:49:10 +00007242 putPC(mkU64( guest_PC_curr_instr + 20 ));
7243 dres->whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +00007244 dres->jk_StopHere = Ijk_InvalICache;
sewardjbbcf1882014-01-12 12:49:10 +00007245 return True;
7246 }
7247 /* We don't know what it is. */
7248 return False;
7249 /*NOTREACHED*/
7250 }
7251 }
7252
7253 /* ----------------------------------------------------------- */
7254
7255 /* Main ARM64 instruction decoder starts here. */
7256
7257 Bool ok = False;
7258
7259 /* insn[28:25] determines the top-level grouping, so let's start
7260 off with that.
7261
7262 For all of these dis_ARM64_ functions, we pass *dres with the
7263 normal default results "insn OK, 4 bytes long, keep decoding" so
7264 they don't need to change it. However, decodes of control-flow
7265 insns may cause *dres to change.
7266 */
7267 switch (INSN(28,25)) {
7268 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
7269 // Data processing - immediate
7270 ok = dis_ARM64_data_processing_immediate(dres, insn);
7271 break;
7272 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
7273 // Branch, exception generation and system instructions
sewardj65902992014-05-03 21:20:56 +00007274 ok = dis_ARM64_branch_etc(dres, insn, archinfo);
sewardjbbcf1882014-01-12 12:49:10 +00007275 break;
7276 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
7277 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
7278 // Loads and stores
7279 ok = dis_ARM64_load_store(dres, insn);
7280 break;
7281 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
7282 // Data processing - register
7283 ok = dis_ARM64_data_processing_register(dres, insn);
7284 break;
7285 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
7286 // Data processing - SIMD and floating point
7287 ok = dis_ARM64_simd_and_fp(dres, insn);
7288 break;
7289 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
7290 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
7291 // UNALLOCATED
7292 break;
7293 default:
7294 vassert(0); /* Can't happen */
7295 }
7296
7297 /* If the next-level down decoders failed, make sure |dres| didn't
7298 get changed. */
7299 if (!ok) {
7300 vassert(dres->whatNext == Dis_Continue);
7301 vassert(dres->len == 4);
7302 vassert(dres->continueAt == 0);
7303 vassert(dres->jk_StopHere == Ijk_INVALID);
7304 }
7305
7306 return ok;
7307
7308# undef INSN
7309}
7310
7311
7312/*------------------------------------------------------------*/
7313/*--- Top-level fn ---*/
7314/*------------------------------------------------------------*/
7315
7316/* Disassemble a single instruction into IR. The instruction
7317 is located in host memory at &guest_code[delta]. */
7318
7319DisResult disInstr_ARM64 ( IRSB* irsb_IN,
7320 Bool (*resteerOkFn) ( void*, Addr64 ),
7321 Bool resteerCisOk,
7322 void* callback_opaque,
7323 UChar* guest_code_IN,
7324 Long delta_IN,
7325 Addr64 guest_IP,
7326 VexArch guest_arch,
7327 VexArchInfo* archinfo,
7328 VexAbiInfo* abiinfo,
7329 Bool host_bigendian_IN,
7330 Bool sigill_diag_IN )
7331{
7332 DisResult dres;
7333 vex_bzero(&dres, sizeof(dres));
7334
7335 /* Set globals (see top of this file) */
7336 vassert(guest_arch == VexArchARM64);
7337
7338 irsb = irsb_IN;
7339 host_is_bigendian = host_bigendian_IN;
7340 guest_PC_curr_instr = (Addr64)guest_IP;
7341
sewardj65902992014-05-03 21:20:56 +00007342 /* Sanity checks */
7343 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
7344 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
7345 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
7346
sewardjbbcf1882014-01-12 12:49:10 +00007347 /* Try to decode */
7348 Bool ok = disInstr_ARM64_WRK( &dres,
7349 resteerOkFn, resteerCisOk, callback_opaque,
7350 (UChar*)&guest_code_IN[delta_IN],
7351 archinfo, abiinfo );
7352 if (ok) {
7353 /* All decode successes end up here. */
sewardjdc9259c2014-02-27 11:10:19 +00007354 vassert(dres.len == 4 || dres.len == 20);
sewardjbbcf1882014-01-12 12:49:10 +00007355 switch (dres.whatNext) {
7356 case Dis_Continue:
7357 putPC( mkU64(dres.len + guest_PC_curr_instr) );
7358 break;
7359 case Dis_ResteerU:
7360 case Dis_ResteerC:
7361 putPC(mkU64(dres.continueAt));
7362 break;
7363 case Dis_StopHere:
7364 break;
7365 default:
7366 vassert(0);
7367 }
7368 DIP("\n");
7369 } else {
7370 /* All decode failures end up here. */
7371 if (sigill_diag_IN) {
7372 Int i, j;
7373 UChar buf[64];
7374 UInt insn
7375 = getUIntLittleEndianly( (UChar*)&guest_code_IN[delta_IN] );
7376 vex_bzero(buf, sizeof(buf));
7377 for (i = j = 0; i < 32; i++) {
7378 if (i > 0) {
7379 if ((i & 7) == 0) buf[j++] = ' ';
7380 else if ((i & 3) == 0) buf[j++] = '\'';
7381 }
7382 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
7383 }
7384 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
7385 vex_printf("disInstr(arm64): %s\n", buf);
7386 }
7387
7388 /* Tell the dispatcher that this insn cannot be decoded, and so
7389 has not been executed, and (is currently) the next to be
7390 executed. PC should be up-to-date since it is made so at the
7391 start of each insn, but nevertheless be paranoid and update
7392 it again right now. */
7393 putPC( mkU64(guest_PC_curr_instr) );
7394 dres.whatNext = Dis_StopHere;
7395 dres.len = 0;
7396 dres.continueAt = 0;
7397 dres.jk_StopHere = Ijk_NoDecode;
7398 }
7399 return dres;
7400}
7401
sewardjecde6972014-02-05 11:01:19 +00007402////////////////////////////////////////////////////////////////////////
7403////////////////////////////////////////////////////////////////////////
7404
7405/* Spare code for doing reference implementations of various 128-bit
7406 SIMD interleaves/deinterleaves/concatenation ops. For 64-bit
7407 equivalents see the end of guest_arm_toIR.c. */
7408
7409////////////////////////////////////////////////////////////////
7410// 64x2 operations
7411//
7412static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 )
7413{
7414 // returns a0 b0
7415 return binop(Iop_64HLtoV128, unop(Iop_V128to64, mkexpr(a10)),
7416 unop(Iop_V128to64, mkexpr(b10)));
7417}
7418
7419static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 )
7420{
7421 // returns a1 b1
7422 return binop(Iop_64HLtoV128, unop(Iop_V128HIto64, mkexpr(a10)),
7423 unop(Iop_V128HIto64, mkexpr(b10)));
7424}
7425
7426
7427////////////////////////////////////////////////////////////////
7428// 32x4 operations
7429//
7430
7431// Split a 128 bit value into 4 32 bit ones, in 64-bit IRTemps with
7432// the top halves guaranteed to be zero.
7433static void breakV128to32s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
7434 IRTemp* out0, IRTemp v128 )
7435{
7436 if (out3) *out3 = newTemp(Ity_I64);
7437 if (out2) *out2 = newTemp(Ity_I64);
7438 if (out1) *out1 = newTemp(Ity_I64);
7439 if (out0) *out0 = newTemp(Ity_I64);
7440 IRTemp hi64 = newTemp(Ity_I64);
7441 IRTemp lo64 = newTemp(Ity_I64);
7442 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
7443 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) );
7444 if (out3) assign(*out3, binop(Iop_Shr64, mkexpr(hi64), mkU8(32)));
7445 if (out2) assign(*out2, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFFFFFF)));
7446 if (out1) assign(*out1, binop(Iop_Shr64, mkexpr(lo64), mkU8(32)));
7447 if (out0) assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFFFFFF)));
7448}
7449
7450// Make a V128 bit value from 4 32 bit ones, each of which is in a 64 bit
7451// IRTemp.
7452static IRTemp mkV128from32s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
7453{
7454 IRTemp hi64 = newTemp(Ity_I64);
7455 IRTemp lo64 = newTemp(Ity_I64);
7456 assign(hi64,
7457 binop(Iop_Or64,
7458 binop(Iop_Shl64, mkexpr(in3), mkU8(32)),
7459 binop(Iop_And64, mkexpr(in2), mkU64(0xFFFFFFFF))));
7460 assign(lo64,
7461 binop(Iop_Or64,
7462 binop(Iop_Shl64, mkexpr(in1), mkU8(32)),
7463 binop(Iop_And64, mkexpr(in0), mkU64(0xFFFFFFFF))));
7464 IRTemp res = newTemp(Ity_V128);
7465 assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64)));
7466 return res;
7467}
7468
7469static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 )
7470{
7471 // returns a2 a0 b2 b0
7472 IRTemp a2, a0, b2, b0;
7473 breakV128to32s(NULL, &a2, NULL, &a0, a3210);
7474 breakV128to32s(NULL, &b2, NULL, &b0, b3210);
7475 return mkexpr(mkV128from32s(a2, a0, b2, b0));
7476}
7477
7478static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 )
7479{
7480 // returns a3 a1 b3 b1
7481 IRTemp a3, a1, b3, b1;
7482 breakV128to32s(&a3, NULL, &a1, NULL, a3210);
7483 breakV128to32s(&b3, NULL, &b1, NULL, b3210);
7484 return mkexpr(mkV128from32s(a3, a1, b3, b1));
7485}
7486
sewardje520bb32014-02-17 11:00:53 +00007487static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 )
7488{
7489 // returns a1 b1 a0 b0
7490 IRTemp a1, a0, b1, b0;
7491 breakV128to32s(NULL, NULL, &a1, &a0, a3210);
7492 breakV128to32s(NULL, NULL, &b1, &b0, b3210);
7493 return mkexpr(mkV128from32s(a1, b1, a0, b0));
7494}
7495
7496static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 )
7497{
7498 // returns a3 b3 a2 b2
7499 IRTemp a3, a2, b3, b2;
7500 breakV128to32s(&a3, &a2, NULL, NULL, a3210);
7501 breakV128to32s(&b3, &b2, NULL, NULL, b3210);
7502 return mkexpr(mkV128from32s(a3, b3, a2, b2));
7503}
sewardjecde6972014-02-05 11:01:19 +00007504
7505////////////////////////////////////////////////////////////////
7506// 16x8 operations
7507//
7508
7509static void breakV128to16s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
7510 IRTemp* out4, IRTemp* out3, IRTemp* out2,
7511 IRTemp* out1,IRTemp* out0, IRTemp v128 )
7512{
7513 if (out7) *out7 = newTemp(Ity_I64);
7514 if (out6) *out6 = newTemp(Ity_I64);
7515 if (out5) *out5 = newTemp(Ity_I64);
7516 if (out4) *out4 = newTemp(Ity_I64);
7517 if (out3) *out3 = newTemp(Ity_I64);
7518 if (out2) *out2 = newTemp(Ity_I64);
7519 if (out1) *out1 = newTemp(Ity_I64);
7520 if (out0) *out0 = newTemp(Ity_I64);
7521 IRTemp hi64 = newTemp(Ity_I64);
7522 IRTemp lo64 = newTemp(Ity_I64);
7523 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
7524 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) );
7525 if (out7)
7526 assign(*out7, binop(Iop_And64,
7527 binop(Iop_Shr64, mkexpr(hi64), mkU8(48)),
7528 mkU64(0xFFFF)));
7529 if (out6)
7530 assign(*out6, binop(Iop_And64,
7531 binop(Iop_Shr64, mkexpr(hi64), mkU8(32)),
7532 mkU64(0xFFFF)));
7533 if (out5)
7534 assign(*out5, binop(Iop_And64,
7535 binop(Iop_Shr64, mkexpr(hi64), mkU8(16)),
7536 mkU64(0xFFFF)));
7537 if (out4)
7538 assign(*out4, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFF)));
7539 if (out3)
7540 assign(*out3, binop(Iop_And64,
7541 binop(Iop_Shr64, mkexpr(lo64), mkU8(48)),
7542 mkU64(0xFFFF)));
7543 if (out2)
7544 assign(*out2, binop(Iop_And64,
7545 binop(Iop_Shr64, mkexpr(lo64), mkU8(32)),
7546 mkU64(0xFFFF)));
7547 if (out1)
7548 assign(*out1, binop(Iop_And64,
7549 binop(Iop_Shr64, mkexpr(lo64), mkU8(16)),
7550 mkU64(0xFFFF)));
7551 if (out0)
7552 assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFF)));
7553}
7554
7555static IRTemp mkV128from16s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
7556 IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
7557{
7558 IRTemp hi64 = newTemp(Ity_I64);
7559 IRTemp lo64 = newTemp(Ity_I64);
7560 assign(hi64,
7561 binop(Iop_Or64,
7562 binop(Iop_Or64,
7563 binop(Iop_Shl64,
7564 binop(Iop_And64, mkexpr(in7), mkU64(0xFFFF)),
7565 mkU8(48)),
7566 binop(Iop_Shl64,
7567 binop(Iop_And64, mkexpr(in6), mkU64(0xFFFF)),
7568 mkU8(32))),
7569 binop(Iop_Or64,
7570 binop(Iop_Shl64,
7571 binop(Iop_And64, mkexpr(in5), mkU64(0xFFFF)),
7572 mkU8(16)),
7573 binop(Iop_And64,
7574 mkexpr(in4), mkU64(0xFFFF)))));
7575 assign(lo64,
7576 binop(Iop_Or64,
7577 binop(Iop_Or64,
7578 binop(Iop_Shl64,
7579 binop(Iop_And64, mkexpr(in3), mkU64(0xFFFF)),
7580 mkU8(48)),
7581 binop(Iop_Shl64,
7582 binop(Iop_And64, mkexpr(in2), mkU64(0xFFFF)),
7583 mkU8(32))),
7584 binop(Iop_Or64,
7585 binop(Iop_Shl64,
7586 binop(Iop_And64, mkexpr(in1), mkU64(0xFFFF)),
7587 mkU8(16)),
7588 binop(Iop_And64,
7589 mkexpr(in0), mkU64(0xFFFF)))));
7590 IRTemp res = newTemp(Ity_V128);
7591 assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64)));
7592 return res;
7593}
7594
7595static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 )
7596{
7597 // returns a6 a4 a2 a0 b6 b4 b2 b0
7598 IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
7599 breakV128to16s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
7600 breakV128to16s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
7601 return mkexpr(mkV128from16s(a6, a4, a2, a0, b6, b4, b2, b0));
7602}
7603
7604static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 )
7605{
7606 // returns a7 a5 a3 a1 b7 b5 b3 b1
7607 IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
7608 breakV128to16s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
7609 breakV128to16s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
7610 return mkexpr(mkV128from16s(a7, a5, a3, a1, b7, b5, b3, b1));
7611}
7612
sewardje520bb32014-02-17 11:00:53 +00007613static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 )
7614{
7615 // returns a3 b3 a2 b2 a1 b1 a0 b0
7616 IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
7617 breakV128to16s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
7618 breakV128to16s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
7619 return mkexpr(mkV128from16s(a3, b3, a2, b2, a1, b1, a0, b0));
7620}
7621
7622static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 )
7623{
7624 // returns a7 b7 a6 b6 a5 b5 a4 b4
7625 IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
7626 breakV128to16s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
7627 breakV128to16s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
7628 return mkexpr(mkV128from16s(a7, b7, a6, b6, a5, b5, a4, b4));
7629}
7630
sewardjfab09142014-02-10 10:28:13 +00007631////////////////////////////////////////////////////////////////
7632// 8x16 operations
7633//
7634
7635static void breakV128to8s ( IRTemp* outF, IRTemp* outE, IRTemp* outD,
7636 IRTemp* outC, IRTemp* outB, IRTemp* outA,
7637 IRTemp* out9, IRTemp* out8,
7638 IRTemp* out7, IRTemp* out6, IRTemp* out5,
7639 IRTemp* out4, IRTemp* out3, IRTemp* out2,
7640 IRTemp* out1,IRTemp* out0, IRTemp v128 )
7641{
7642 if (outF) *outF = newTemp(Ity_I64);
7643 if (outE) *outE = newTemp(Ity_I64);
7644 if (outD) *outD = newTemp(Ity_I64);
7645 if (outC) *outC = newTemp(Ity_I64);
7646 if (outB) *outB = newTemp(Ity_I64);
7647 if (outA) *outA = newTemp(Ity_I64);
7648 if (out9) *out9 = newTemp(Ity_I64);
7649 if (out8) *out8 = newTemp(Ity_I64);
7650 if (out7) *out7 = newTemp(Ity_I64);
7651 if (out6) *out6 = newTemp(Ity_I64);
7652 if (out5) *out5 = newTemp(Ity_I64);
7653 if (out4) *out4 = newTemp(Ity_I64);
7654 if (out3) *out3 = newTemp(Ity_I64);
7655 if (out2) *out2 = newTemp(Ity_I64);
7656 if (out1) *out1 = newTemp(Ity_I64);
7657 if (out0) *out0 = newTemp(Ity_I64);
7658 IRTemp hi64 = newTemp(Ity_I64);
7659 IRTemp lo64 = newTemp(Ity_I64);
7660 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
7661 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) );
7662 if (outF)
7663 assign(*outF, binop(Iop_And64,
7664 binop(Iop_Shr64, mkexpr(hi64), mkU8(56)),
7665 mkU64(0xFF)));
7666 if (outE)
7667 assign(*outE, binop(Iop_And64,
7668 binop(Iop_Shr64, mkexpr(hi64), mkU8(48)),
7669 mkU64(0xFF)));
7670 if (outD)
7671 assign(*outD, binop(Iop_And64,
7672 binop(Iop_Shr64, mkexpr(hi64), mkU8(40)),
7673 mkU64(0xFF)));
7674 if (outC)
7675 assign(*outC, binop(Iop_And64,
7676 binop(Iop_Shr64, mkexpr(hi64), mkU8(32)),
7677 mkU64(0xFF)));
7678 if (outB)
7679 assign(*outB, binop(Iop_And64,
7680 binop(Iop_Shr64, mkexpr(hi64), mkU8(24)),
7681 mkU64(0xFF)));
7682 if (outA)
7683 assign(*outA, binop(Iop_And64,
7684 binop(Iop_Shr64, mkexpr(hi64), mkU8(16)),
7685 mkU64(0xFF)));
7686 if (out9)
7687 assign(*out9, binop(Iop_And64,
7688 binop(Iop_Shr64, mkexpr(hi64), mkU8(8)),
7689 mkU64(0xFF)));
7690 if (out8)
7691 assign(*out8, binop(Iop_And64,
7692 binop(Iop_Shr64, mkexpr(hi64), mkU8(0)),
7693 mkU64(0xFF)));
7694 if (out7)
7695 assign(*out7, binop(Iop_And64,
7696 binop(Iop_Shr64, mkexpr(lo64), mkU8(56)),
7697 mkU64(0xFF)));
7698 if (out6)
7699 assign(*out6, binop(Iop_And64,
7700 binop(Iop_Shr64, mkexpr(lo64), mkU8(48)),
7701 mkU64(0xFF)));
7702 if (out5)
7703 assign(*out5, binop(Iop_And64,
7704 binop(Iop_Shr64, mkexpr(lo64), mkU8(40)),
7705 mkU64(0xFF)));
7706 if (out4)
7707 assign(*out4, binop(Iop_And64,
7708 binop(Iop_Shr64, mkexpr(lo64), mkU8(32)),
7709 mkU64(0xFF)));
7710 if (out3)
7711 assign(*out3, binop(Iop_And64,
7712 binop(Iop_Shr64, mkexpr(lo64), mkU8(24)),
7713 mkU64(0xFF)));
7714 if (out2)
7715 assign(*out2, binop(Iop_And64,
7716 binop(Iop_Shr64, mkexpr(lo64), mkU8(16)),
7717 mkU64(0xFF)));
7718 if (out1)
7719 assign(*out1, binop(Iop_And64,
7720 binop(Iop_Shr64, mkexpr(lo64), mkU8(8)),
7721 mkU64(0xFF)));
7722 if (out0)
7723 assign(*out0, binop(Iop_And64,
7724 binop(Iop_Shr64, mkexpr(lo64), mkU8(0)),
7725 mkU64(0xFF)));
7726}
7727
7728static IRTemp mkV128from8s ( IRTemp inF, IRTemp inE, IRTemp inD, IRTemp inC,
7729 IRTemp inB, IRTemp inA, IRTemp in9, IRTemp in8,
7730 IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
7731 IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
7732{
7733 IRTemp vFE = newTemp(Ity_I64);
7734 IRTemp vDC = newTemp(Ity_I64);
7735 IRTemp vBA = newTemp(Ity_I64);
7736 IRTemp v98 = newTemp(Ity_I64);
7737 IRTemp v76 = newTemp(Ity_I64);
7738 IRTemp v54 = newTemp(Ity_I64);
7739 IRTemp v32 = newTemp(Ity_I64);
7740 IRTemp v10 = newTemp(Ity_I64);
7741 assign(vFE, binop(Iop_Or64,
7742 binop(Iop_Shl64,
7743 binop(Iop_And64, mkexpr(inF), mkU64(0xFF)), mkU8(8)),
7744 binop(Iop_And64, mkexpr(inE), mkU64(0xFF))));
7745 assign(vDC, binop(Iop_Or64,
7746 binop(Iop_Shl64,
7747 binop(Iop_And64, mkexpr(inD), mkU64(0xFF)), mkU8(8)),
7748 binop(Iop_And64, mkexpr(inC), mkU64(0xFF))));
7749 assign(vBA, binop(Iop_Or64,
7750 binop(Iop_Shl64,
7751 binop(Iop_And64, mkexpr(inB), mkU64(0xFF)), mkU8(8)),
7752 binop(Iop_And64, mkexpr(inA), mkU64(0xFF))));
7753 assign(v98, binop(Iop_Or64,
7754 binop(Iop_Shl64,
7755 binop(Iop_And64, mkexpr(in9), mkU64(0xFF)), mkU8(8)),
7756 binop(Iop_And64, mkexpr(in8), mkU64(0xFF))));
7757 assign(v76, binop(Iop_Or64,
7758 binop(Iop_Shl64,
7759 binop(Iop_And64, mkexpr(in7), mkU64(0xFF)), mkU8(8)),
7760 binop(Iop_And64, mkexpr(in6), mkU64(0xFF))));
7761 assign(v54, binop(Iop_Or64,
7762 binop(Iop_Shl64,
7763 binop(Iop_And64, mkexpr(in5), mkU64(0xFF)), mkU8(8)),
7764 binop(Iop_And64, mkexpr(in4), mkU64(0xFF))));
7765 assign(v32, binop(Iop_Or64,
7766 binop(Iop_Shl64,
7767 binop(Iop_And64, mkexpr(in3), mkU64(0xFF)), mkU8(8)),
7768 binop(Iop_And64, mkexpr(in2), mkU64(0xFF))));
7769 assign(v10, binop(Iop_Or64,
7770 binop(Iop_Shl64,
7771 binop(Iop_And64, mkexpr(in1), mkU64(0xFF)), mkU8(8)),
7772 binop(Iop_And64, mkexpr(in0), mkU64(0xFF))));
7773 return mkV128from16s(vFE, vDC, vBA, v98, v76, v54, v32, v10);
7774}
7775
7776static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
7777 IRTemp bFEDCBA9876543210 )
7778{
7779 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
7780 IRTemp aE, aC, aA, a8, a6, a4, a2, a0, bE, bC, bA, b8, b6, b4, b2, b0;
7781 breakV128to8s(NULL, &aE, NULL, &aC, NULL, &aA, NULL, &a8,
7782 NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0,
7783 aFEDCBA9876543210);
7784 breakV128to8s(NULL, &bE, NULL, &bC, NULL, &bA, NULL, &b8,
7785 NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0,
7786 bFEDCBA9876543210);
7787 return mkexpr(mkV128from8s(aE, aC, aA, a8, a6, a4, a2, a0,
7788 bE, bC, bA, b8, b6, b4, b2, b0));
7789}
7790
7791static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
7792 IRTemp bFEDCBA9876543210 )
7793{
7794 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
7795 IRTemp aF, aD, aB, a9, a7, a5, a3, a1, bF, bD, bB, b9, b7, b5, b3, b1;
7796 breakV128to8s(&aF, NULL, &aD, NULL, &aB, NULL, &a9, NULL,
7797 &a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL,
7798 aFEDCBA9876543210);
7799
7800 breakV128to8s(&bF, NULL, &bD, NULL, &bB, NULL, &b9, NULL,
7801 &b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL,
7802 aFEDCBA9876543210);
7803
7804 return mkexpr(mkV128from8s(aF, aD, aB, a9, a7, a5, a3, a1,
7805 bF, bD, bB, b9, b7, b5, b3, b1));
7806}
7807
sewardje520bb32014-02-17 11:00:53 +00007808static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
7809 IRTemp bFEDCBA9876543210 )
7810{
7811 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
7812 IRTemp a7, b7, a6, b6, a5, b5, a4, b4, a3, b3, a2, b2, a1, b1, a0, b0;
7813 breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
7814 &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0,
7815 aFEDCBA9876543210);
7816 breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
7817 &b7, &b6, &b5, &b4, &b3, &b2, &b1, &b0,
7818 bFEDCBA9876543210);
7819 return mkexpr(mkV128from8s(a7, b7, a6, b6, a5, b5, a4, b4,
7820 a3, b3, a2, b2, a1, b1, a0, b0));
7821}
7822
7823static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
7824 IRTemp bFEDCBA9876543210 )
7825{
7826 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
7827 IRTemp aF, bF, aE, bE, aD, bD, aC, bC, aB, bB, aA, bA, a9, b9, a8, b8;
7828 breakV128to8s(&aF, &aE, &aD, &aC, &aB, &aA, &a9, &a8,
7829 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
7830 aFEDCBA9876543210);
7831 breakV128to8s(&bF, &bE, &bD, &bC, &bB, &bA, &b9, &b8,
7832 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
7833 bFEDCBA9876543210);
7834 return mkexpr(mkV128from8s(aF, bF, aE, bE, aD, bD, aC, bC,
7835 aB, bB, aA, bA, a9, b9, a8, b8));
7836}
sewardjecde6972014-02-05 11:01:19 +00007837
sewardjbbcf1882014-01-12 12:49:10 +00007838/*--------------------------------------------------------------------*/
7839/*--- end guest_arm64_toIR.c ---*/
7840/*--------------------------------------------------------------------*/