blob: 55ca7e24d11d2dec752be71cbdbf5eb844454070 [file] [log] [blame]
sewardjbbcf1882014-01-12 12:49:10 +00001/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- begin guest_arm64_toIR.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2013-2013 OpenWorks
12 info@open-works.net
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
32//ZZ /* XXXX thumb to check:
33//ZZ that all cases where putIRegT writes r15, we generate a jump.
34//ZZ
35//ZZ All uses of newTemp assign to an IRTemp and not a UInt
36//ZZ
37//ZZ For all thumb loads and stores, including VFP ones, new-ITSTATE is
38//ZZ backed out before the memory op, and restored afterwards. This
39//ZZ needs to happen even after we go uncond. (and for sure it doesn't
40//ZZ happen for VFP loads/stores right now).
41//ZZ
42//ZZ VFP on thumb: check that we exclude all r13/r15 cases that we
43//ZZ should.
44//ZZ
45//ZZ XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
46//ZZ taking into account the number of insns guarded by an IT.
47//ZZ
48//ZZ remove the nasty hack, in the spechelper, of looking for Or32(...,
49//ZZ 0xE0) in as the first arg to armg_calculate_condition, and instead
50//ZZ use Slice44 as specified in comments in the spechelper.
51//ZZ
52//ZZ add specialisations for armg_calculate_flag_c and _v, as they
53//ZZ are moderately often needed in Thumb code.
54//ZZ
55//ZZ Correctness: ITSTATE handling in Thumb SVCs is wrong.
56//ZZ
57//ZZ Correctness (obscure): in m_transtab, when invalidating code
58//ZZ address ranges, invalidate up to 18 bytes after the end of the
59//ZZ range. This is because the ITSTATE optimisation at the top of
60//ZZ _THUMB_WRK below analyses up to 18 bytes before the start of any
61//ZZ given instruction, and so might depend on the invalidated area.
62//ZZ */
63//ZZ
64//ZZ /* Limitations, etc
65//ZZ
66//ZZ - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
67//ZZ These instructions are non-restartable in the case where the
68//ZZ transfer(s) fault.
69//ZZ
70//ZZ - SWP: the restart jump back is Ijk_Boring; it should be
71//ZZ Ijk_NoRedir but that's expensive. See comments on casLE() in
72//ZZ guest_x86_toIR.c.
73//ZZ */
74
75/* "Special" instructions.
76
77 This instruction decoder can decode four special instructions
78 which mean nothing natively (are no-ops as far as regs/mem are
79 concerned) but have meaning for supporting Valgrind. A special
80 instruction is flagged by a 16-byte preamble:
81
82 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
83 (ror x12, x12, #3; ror x12, x12, #13
84 ror x12, x12, #51; ror x12, x12, #61)
85
86 Following that, one of the following 3 are allowed
87 (standard interpretation in parentheses):
88
89 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
90 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
91 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
92 AA090129 (orr x9,x9,x9) IR injection
93
94 Any other bytes following the 16-byte preamble are illegal and
95 constitute a failure in instruction decoding. This all assumes
96 that the preamble will never occur except in specific code
97 fragments designed for Valgrind to catch.
98*/
99
100/* Translates ARM64 code to IR. */
101
102#include "libvex_basictypes.h"
103#include "libvex_ir.h"
104#include "libvex.h"
105#include "libvex_guest_arm64.h"
106
107#include "main_util.h"
108#include "main_globals.h"
109#include "guest_generic_bb_to_IR.h"
110#include "guest_arm64_defs.h"
111
112
113/*------------------------------------------------------------*/
114/*--- Globals ---*/
115/*------------------------------------------------------------*/
116
117/* These are set at the start of the translation of a instruction, so
118 that we don't have to pass them around endlessly. CONST means does
119 not change during translation of the instruction.
120*/
121
122/* CONST: is the host bigendian? We need to know this in order to do
123 sub-register accesses to the SIMD/FP registers correctly. */
124static Bool host_is_bigendian;
125
126/* CONST: The guest address for the instruction currently being
127 translated. */
128static Addr64 guest_PC_curr_instr;
129
130/* MOD: The IRSB* into which we're generating code. */
131static IRSB* irsb;
132
133
134/*------------------------------------------------------------*/
135/*--- Debugging output ---*/
136/*------------------------------------------------------------*/
137
138#define DIP(format, args...) \
139 if (vex_traceflags & VEX_TRACE_FE) \
140 vex_printf(format, ## args)
141
142#define DIS(buf, format, args...) \
143 if (vex_traceflags & VEX_TRACE_FE) \
144 vex_sprintf(buf, format, ## args)
145
146
147/*------------------------------------------------------------*/
148/*--- Helper bits and pieces for deconstructing the ---*/
149/*--- arm insn stream. ---*/
150/*------------------------------------------------------------*/
151
152/* Do a little-endian load of a 32-bit word, regardless of the
153 endianness of the underlying host. */
154static inline UInt getUIntLittleEndianly ( UChar* p )
155{
156 UInt w = 0;
157 w = (w << 8) | p[3];
158 w = (w << 8) | p[2];
159 w = (w << 8) | p[1];
160 w = (w << 8) | p[0];
161 return w;
162}
163
164/* Sign extend a N-bit value up to 64 bits, by copying
165 bit N-1 into all higher positions. */
166static ULong sx_to_64 ( ULong x, UInt n )
167{
168 vassert(n > 1 && n < 64);
169 Long r = (Long)x;
170 r = (r << (64-n)) >> (64-n);
171 return (ULong)r;
172}
173
174//ZZ /* Do a little-endian load of a 16-bit word, regardless of the
175//ZZ endianness of the underlying host. */
176//ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
177//ZZ {
178//ZZ UShort w = 0;
179//ZZ w = (w << 8) | p[1];
180//ZZ w = (w << 8) | p[0];
181//ZZ return w;
182//ZZ }
183//ZZ
184//ZZ static UInt ROR32 ( UInt x, UInt sh ) {
185//ZZ vassert(sh >= 0 && sh < 32);
186//ZZ if (sh == 0)
187//ZZ return x;
188//ZZ else
189//ZZ return (x << (32-sh)) | (x >> sh);
190//ZZ }
191//ZZ
192//ZZ static Int popcount32 ( UInt x )
193//ZZ {
194//ZZ Int res = 0, i;
195//ZZ for (i = 0; i < 32; i++) {
196//ZZ res += (x & 1);
197//ZZ x >>= 1;
198//ZZ }
199//ZZ return res;
200//ZZ }
201//ZZ
202//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
203//ZZ {
204//ZZ UInt mask = 1 << ix;
205//ZZ x &= ~mask;
206//ZZ x |= ((b << ix) & mask);
207//ZZ return x;
208//ZZ }
209
210#define BITS2(_b1,_b0) \
211 (((_b1) << 1) | (_b0))
212
213#define BITS3(_b2,_b1,_b0) \
214 (((_b2) << 2) | ((_b1) << 1) | (_b0))
215
216#define BITS4(_b3,_b2,_b1,_b0) \
217 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
218
219#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
220 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
221 | BITS4((_b3),(_b2),(_b1),(_b0)))
222
223#define BITS5(_b4,_b3,_b2,_b1,_b0) \
224 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
225#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
226 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
227#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
228 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
229
230#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
231 (((_b8) << 8) \
232 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
233
234#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
235 (((_b9) << 9) | ((_b8) << 8) \
236 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
237
238#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
239 (((_b10) << 10) \
240 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
241
sewardjdc9259c2014-02-27 11:10:19 +0000242#define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
243 (((_b11) << 11) \
244 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
245
sewardjbbcf1882014-01-12 12:49:10 +0000246// produces _uint[_bMax:_bMin]
247#define SLICE_UInt(_uint,_bMax,_bMin) \
248 (( ((UInt)(_uint)) >> (_bMin)) \
249 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
250
251
252/*------------------------------------------------------------*/
253/*--- Helper bits and pieces for creating IR fragments. ---*/
254/*------------------------------------------------------------*/
255
256static IRExpr* mkV128 ( UShort w )
257{
258 return IRExpr_Const(IRConst_V128(w));
259}
260
261static IRExpr* mkU64 ( ULong i )
262{
263 return IRExpr_Const(IRConst_U64(i));
264}
265
266static IRExpr* mkU32 ( UInt i )
267{
268 return IRExpr_Const(IRConst_U32(i));
269}
270
271static IRExpr* mkU8 ( UInt i )
272{
273 vassert(i < 256);
274 return IRExpr_Const(IRConst_U8( (UChar)i ));
275}
276
277static IRExpr* mkexpr ( IRTemp tmp )
278{
279 return IRExpr_RdTmp(tmp);
280}
281
282static IRExpr* unop ( IROp op, IRExpr* a )
283{
284 return IRExpr_Unop(op, a);
285}
286
287static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
288{
289 return IRExpr_Binop(op, a1, a2);
290}
291
292static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
293{
294 return IRExpr_Triop(op, a1, a2, a3);
295}
296
297static IRExpr* loadLE ( IRType ty, IRExpr* addr )
298{
299 return IRExpr_Load(Iend_LE, ty, addr);
300}
301
302/* Add a statement to the list held by "irbb". */
303static void stmt ( IRStmt* st )
304{
305 addStmtToIRSB( irsb, st );
306}
307
308static void assign ( IRTemp dst, IRExpr* e )
309{
310 stmt( IRStmt_WrTmp(dst, e) );
311}
312
313static void storeLE ( IRExpr* addr, IRExpr* data )
314{
315 stmt( IRStmt_Store(Iend_LE, addr, data) );
316}
317
318//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
319//ZZ {
320//ZZ if (guardT == IRTemp_INVALID) {
321//ZZ /* unconditional */
322//ZZ storeLE(addr, data);
323//ZZ } else {
324//ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
325//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
326//ZZ }
327//ZZ }
328//ZZ
329//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
330//ZZ IRExpr* addr, IRExpr* alt,
331//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
332//ZZ {
333//ZZ if (guardT == IRTemp_INVALID) {
334//ZZ /* unconditional */
335//ZZ IRExpr* loaded = NULL;
336//ZZ switch (cvt) {
337//ZZ case ILGop_Ident32:
338//ZZ loaded = loadLE(Ity_I32, addr); break;
339//ZZ case ILGop_8Uto32:
340//ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
341//ZZ case ILGop_8Sto32:
342//ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
343//ZZ case ILGop_16Uto32:
344//ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
345//ZZ case ILGop_16Sto32:
346//ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
347//ZZ default:
348//ZZ vassert(0);
349//ZZ }
350//ZZ vassert(loaded != NULL);
351//ZZ assign(dst, loaded);
352//ZZ } else {
353//ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
354//ZZ loaded data before putting the data in 'dst'. If the load
355//ZZ does not take place, 'alt' is placed directly in 'dst'. */
356//ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
357//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
358//ZZ }
359//ZZ }
360
361/* Generate a new temporary of the given type. */
362static IRTemp newTemp ( IRType ty )
363{
364 vassert(isPlausibleIRType(ty));
365 return newIRTemp( irsb->tyenv, ty );
366}
367
368//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
369//ZZ IRRoundingMode. */
370//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
371//ZZ {
372//ZZ return mkU32(Irrm_NEAREST);
373//ZZ }
374//ZZ
375//ZZ /* Generate an expression for SRC rotated right by ROT. */
376//ZZ static IRExpr* genROR32( IRTemp src, Int rot )
377//ZZ {
378//ZZ vassert(rot >= 0 && rot < 32);
379//ZZ if (rot == 0)
380//ZZ return mkexpr(src);
381//ZZ return
382//ZZ binop(Iop_Or32,
383//ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
384//ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
385//ZZ }
386//ZZ
387//ZZ static IRExpr* mkU128 ( ULong i )
388//ZZ {
389//ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
390//ZZ }
391//ZZ
392//ZZ /* Generate a 4-aligned version of the given expression if
393//ZZ the given condition is true. Else return it unchanged. */
394//ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
395//ZZ {
396//ZZ if (b)
397//ZZ return binop(Iop_And32, e, mkU32(~3));
398//ZZ else
399//ZZ return e;
400//ZZ }
401
402/* Other IR construction helpers. */
403static IROp mkAND ( IRType ty ) {
404 switch (ty) {
405 case Ity_I32: return Iop_And32;
406 case Ity_I64: return Iop_And64;
407 default: vpanic("mkAND");
408 }
409}
410
411static IROp mkOR ( IRType ty ) {
412 switch (ty) {
413 case Ity_I32: return Iop_Or32;
414 case Ity_I64: return Iop_Or64;
415 default: vpanic("mkOR");
416 }
417}
418
419static IROp mkXOR ( IRType ty ) {
420 switch (ty) {
421 case Ity_I32: return Iop_Xor32;
422 case Ity_I64: return Iop_Xor64;
423 default: vpanic("mkXOR");
424 }
425}
426
427static IROp mkSHL ( IRType ty ) {
428 switch (ty) {
429 case Ity_I32: return Iop_Shl32;
430 case Ity_I64: return Iop_Shl64;
431 default: vpanic("mkSHL");
432 }
433}
434
435static IROp mkSHR ( IRType ty ) {
436 switch (ty) {
437 case Ity_I32: return Iop_Shr32;
438 case Ity_I64: return Iop_Shr64;
439 default: vpanic("mkSHR");
440 }
441}
442
443static IROp mkSAR ( IRType ty ) {
444 switch (ty) {
445 case Ity_I32: return Iop_Sar32;
446 case Ity_I64: return Iop_Sar64;
447 default: vpanic("mkSAR");
448 }
449}
450
451static IROp mkNOT ( IRType ty ) {
452 switch (ty) {
453 case Ity_I32: return Iop_Not32;
454 case Ity_I64: return Iop_Not64;
455 default: vpanic("mkNOT");
456 }
457}
458
459static IROp mkADD ( IRType ty ) {
460 switch (ty) {
461 case Ity_I32: return Iop_Add32;
462 case Ity_I64: return Iop_Add64;
463 default: vpanic("mkADD");
464 }
465}
466
467static IROp mkSUB ( IRType ty ) {
468 switch (ty) {
469 case Ity_I32: return Iop_Sub32;
470 case Ity_I64: return Iop_Sub64;
471 default: vpanic("mkSUB");
472 }
473}
474
475static IROp mkADDF ( IRType ty ) {
476 switch (ty) {
477 case Ity_F32: return Iop_AddF32;
478 case Ity_F64: return Iop_AddF64;
479 default: vpanic("mkADDF");
480 }
481}
482
483static IROp mkSUBF ( IRType ty ) {
484 switch (ty) {
485 case Ity_F32: return Iop_SubF32;
486 case Ity_F64: return Iop_SubF64;
487 default: vpanic("mkSUBF");
488 }
489}
490
491static IROp mkMULF ( IRType ty ) {
492 switch (ty) {
493 case Ity_F32: return Iop_MulF32;
494 case Ity_F64: return Iop_MulF64;
495 default: vpanic("mkMULF");
496 }
497}
498
499static IROp mkDIVF ( IRType ty ) {
500 switch (ty) {
501 case Ity_F32: return Iop_DivF32;
502 case Ity_F64: return Iop_DivF64;
503 default: vpanic("mkMULF");
504 }
505}
506
507static IROp mkNEGF ( IRType ty ) {
508 switch (ty) {
509 case Ity_F32: return Iop_NegF32;
510 case Ity_F64: return Iop_NegF64;
511 default: vpanic("mkNEGF");
512 }
513}
514
515static IROp mkABSF ( IRType ty ) {
516 switch (ty) {
517 case Ity_F32: return Iop_AbsF32;
518 case Ity_F64: return Iop_AbsF64;
519 default: vpanic("mkNEGF");
520 }
521}
522
523static IROp mkSQRTF ( IRType ty ) {
524 switch (ty) {
525 case Ity_F32: return Iop_SqrtF32;
526 case Ity_F64: return Iop_SqrtF64;
527 default: vpanic("mkNEGF");
528 }
529}
530
531static IRExpr* mkU ( IRType ty, ULong imm ) {
532 switch (ty) {
533 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
534 case Ity_I64: return mkU64(imm);
535 default: vpanic("mkU");
536 }
537}
538
539/* Generate IR to create 'arg rotated right by imm', for sane values
540 of 'ty' and 'imm'. */
541static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
542{
543 UInt w = 0;
544 if (ty == Ity_I64) {
545 w = 64;
546 } else {
547 vassert(ty == Ity_I32);
548 w = 32;
549 }
550 vassert(w != 0);
551 vassert(imm < w);
552 if (imm == 0) {
553 return arg;
554 }
555 IRTemp res = newTemp(ty);
556 assign(res, binop(mkOR(ty),
557 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
558 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
559 return res;
560}
561
562/* Generate IR to set the returned temp to either all-zeroes or
563 all ones, as a copy of arg<imm>. */
564static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
565{
566 UInt w = 0;
567 if (ty == Ity_I64) {
568 w = 64;
569 } else {
570 vassert(ty == Ity_I32);
571 w = 32;
572 }
573 vassert(w != 0);
574 vassert(imm < w);
575 IRTemp res = newTemp(ty);
576 assign(res, binop(mkSAR(ty),
577 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
578 mkU8(w - 1)));
579 return res;
580}
581
sewardj7d009132014-02-20 17:43:38 +0000582/* U-widen 8/16/32/64 bit int expr to 64. */
583static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
584{
585 switch (srcTy) {
586 case Ity_I64: return e;
587 case Ity_I32: return unop(Iop_32Uto64, e);
588 case Ity_I16: return unop(Iop_16Uto64, e);
589 case Ity_I8: return unop(Iop_8Uto64, e);
590 default: vpanic("widenUto64(arm64)");
591 }
592}
593
594/* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
595 of these combinations make sense. */
596static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
597{
598 switch (dstTy) {
599 case Ity_I64: return e;
600 case Ity_I32: return unop(Iop_64to32, e);
601 case Ity_I16: return unop(Iop_64to16, e);
602 case Ity_I8: return unop(Iop_64to8, e);
603 default: vpanic("narrowFrom64(arm64)");
604 }
605}
606
sewardjbbcf1882014-01-12 12:49:10 +0000607
608/*------------------------------------------------------------*/
609/*--- Helpers for accessing guest registers. ---*/
610/*------------------------------------------------------------*/
611
612#define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
613#define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
614#define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
615#define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
616#define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
617#define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
618#define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
619#define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
620#define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
621#define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
622#define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
623#define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
624#define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
625#define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
626#define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
627#define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
628#define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
629#define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
630#define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
631#define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
632#define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
633#define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
634#define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
635#define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
636#define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
637#define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
638#define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
639#define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
640#define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
641#define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
642#define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
643
sewardj60687882014-01-15 10:25:21 +0000644#define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
sewardjbbcf1882014-01-12 12:49:10 +0000645#define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
646
647#define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
648#define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
649#define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
650#define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
651
652#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
653#define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
654
655#define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
656#define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
657#define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
658#define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
659#define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
660#define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
661#define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
662#define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
663#define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
664#define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
665#define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
666#define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
667#define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
668#define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
669#define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
670#define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
671#define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
672#define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
673#define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
674#define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
675#define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
676#define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
677#define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
678#define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
679#define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
680#define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
681#define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
682#define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
683#define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
684#define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
685#define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
686#define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
687
688#define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
689#define OFFB_FPSR offsetof(VexGuestARM64State,guest_FPSR)
690//ZZ #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
691//ZZ #define OFFB_ITSTATE offsetof(VexGuestARMState,guest_ITSTATE)
692//ZZ #define OFFB_QFLAG32 offsetof(VexGuestARMState,guest_QFLAG32)
693//ZZ #define OFFB_GEFLAG0 offsetof(VexGuestARMState,guest_GEFLAG0)
694//ZZ #define OFFB_GEFLAG1 offsetof(VexGuestARMState,guest_GEFLAG1)
695//ZZ #define OFFB_GEFLAG2 offsetof(VexGuestARMState,guest_GEFLAG2)
696//ZZ #define OFFB_GEFLAG3 offsetof(VexGuestARMState,guest_GEFLAG3)
697
sewardj05f5e012014-05-04 10:52:11 +0000698#define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
699#define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
sewardjbbcf1882014-01-12 12:49:10 +0000700
701
702/* ---------------- Integer registers ---------------- */
703
704static Int offsetIReg64 ( UInt iregNo )
705{
706 /* Do we care about endianness here? We do if sub-parts of integer
707 registers are accessed. */
708 switch (iregNo) {
709 case 0: return OFFB_X0;
710 case 1: return OFFB_X1;
711 case 2: return OFFB_X2;
712 case 3: return OFFB_X3;
713 case 4: return OFFB_X4;
714 case 5: return OFFB_X5;
715 case 6: return OFFB_X6;
716 case 7: return OFFB_X7;
717 case 8: return OFFB_X8;
718 case 9: return OFFB_X9;
719 case 10: return OFFB_X10;
720 case 11: return OFFB_X11;
721 case 12: return OFFB_X12;
722 case 13: return OFFB_X13;
723 case 14: return OFFB_X14;
724 case 15: return OFFB_X15;
725 case 16: return OFFB_X16;
726 case 17: return OFFB_X17;
727 case 18: return OFFB_X18;
728 case 19: return OFFB_X19;
729 case 20: return OFFB_X20;
730 case 21: return OFFB_X21;
731 case 22: return OFFB_X22;
732 case 23: return OFFB_X23;
733 case 24: return OFFB_X24;
734 case 25: return OFFB_X25;
735 case 26: return OFFB_X26;
736 case 27: return OFFB_X27;
737 case 28: return OFFB_X28;
738 case 29: return OFFB_X29;
739 case 30: return OFFB_X30;
740 /* but not 31 */
741 default: vassert(0);
742 }
743}
744
745static Int offsetIReg64orSP ( UInt iregNo )
746{
sewardj60687882014-01-15 10:25:21 +0000747 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
sewardjbbcf1882014-01-12 12:49:10 +0000748}
749
750static const HChar* nameIReg64orZR ( UInt iregNo )
751{
752 vassert(iregNo < 32);
753 static const HChar* names[32]
754 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
755 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
756 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
757 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
758 return names[iregNo];
759}
760
761static const HChar* nameIReg64orSP ( UInt iregNo )
762{
763 if (iregNo == 31) {
764 return "sp";
765 }
766 vassert(iregNo < 31);
767 return nameIReg64orZR(iregNo);
768}
769
770static IRExpr* getIReg64orSP ( UInt iregNo )
771{
772 vassert(iregNo < 32);
773 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
774}
775
776static IRExpr* getIReg64orZR ( UInt iregNo )
777{
778 if (iregNo == 31) {
779 return mkU64(0);
780 }
781 vassert(iregNo < 31);
782 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
783}
784
785static void putIReg64orSP ( UInt iregNo, IRExpr* e )
786{
787 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
788 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
789}
790
791static void putIReg64orZR ( UInt iregNo, IRExpr* e )
792{
793 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
794 if (iregNo == 31) {
795 return;
796 }
797 vassert(iregNo < 31);
798 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
799}
800
801static const HChar* nameIReg32orZR ( UInt iregNo )
802{
803 vassert(iregNo < 32);
804 static const HChar* names[32]
805 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
806 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
807 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
808 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
809 return names[iregNo];
810}
811
812static const HChar* nameIReg32orSP ( UInt iregNo )
813{
814 if (iregNo == 31) {
815 return "wsp";
816 }
817 vassert(iregNo < 31);
818 return nameIReg32orZR(iregNo);
819}
820
821static IRExpr* getIReg32orSP ( UInt iregNo )
822{
823 vassert(iregNo < 32);
824 return unop(Iop_64to32,
825 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
826}
827
828static IRExpr* getIReg32orZR ( UInt iregNo )
829{
830 if (iregNo == 31) {
831 return mkU32(0);
832 }
833 vassert(iregNo < 31);
834 return unop(Iop_64to32,
835 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
836}
837
838static void putIReg32orSP ( UInt iregNo, IRExpr* e )
839{
840 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
841 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
842}
843
844static void putIReg32orZR ( UInt iregNo, IRExpr* e )
845{
846 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
847 if (iregNo == 31) {
848 return;
849 }
850 vassert(iregNo < 31);
851 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
852}
853
854static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
855{
856 vassert(is64 == True || is64 == False);
857 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
858}
859
860static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
861{
862 vassert(is64 == True || is64 == False);
863 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
864}
865
866static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
867{
868 vassert(is64 == True || is64 == False);
869 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
870}
871
872static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
873{
874 vassert(is64 == True || is64 == False);
875 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
876}
877
878static void putPC ( IRExpr* e )
879{
880 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
881 stmt( IRStmt_Put(OFFB_PC, e) );
882}
883
884
885/* ---------------- Vector (Q) registers ---------------- */
886
887static Int offsetQReg128 ( UInt qregNo )
888{
889 /* We don't care about endianness at this point. It only becomes
890 relevant when dealing with sections of these registers.*/
891 switch (qregNo) {
892 case 0: return OFFB_Q0;
893 case 1: return OFFB_Q1;
894 case 2: return OFFB_Q2;
895 case 3: return OFFB_Q3;
896 case 4: return OFFB_Q4;
897 case 5: return OFFB_Q5;
898 case 6: return OFFB_Q6;
899 case 7: return OFFB_Q7;
900 case 8: return OFFB_Q8;
901 case 9: return OFFB_Q9;
902 case 10: return OFFB_Q10;
903 case 11: return OFFB_Q11;
904 case 12: return OFFB_Q12;
905 case 13: return OFFB_Q13;
906 case 14: return OFFB_Q14;
907 case 15: return OFFB_Q15;
908 case 16: return OFFB_Q16;
909 case 17: return OFFB_Q17;
910 case 18: return OFFB_Q18;
911 case 19: return OFFB_Q19;
912 case 20: return OFFB_Q20;
913 case 21: return OFFB_Q21;
914 case 22: return OFFB_Q22;
915 case 23: return OFFB_Q23;
916 case 24: return OFFB_Q24;
917 case 25: return OFFB_Q25;
918 case 26: return OFFB_Q26;
919 case 27: return OFFB_Q27;
920 case 28: return OFFB_Q28;
921 case 29: return OFFB_Q29;
922 case 30: return OFFB_Q30;
923 case 31: return OFFB_Q31;
924 default: vassert(0);
925 }
926}
927
sewardjbbcf1882014-01-12 12:49:10 +0000928/* Write to a complete Qreg. */
929static void putQReg128 ( UInt qregNo, IRExpr* e )
930{
931 vassert(qregNo < 32);
932 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
933 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
934}
935
936/* Read a complete Qreg. */
937static IRExpr* getQReg128 ( UInt qregNo )
938{
939 vassert(qregNo < 32);
940 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
941}
942
943/* Produce the IR type for some sub-part of a vector. For 32- and 64-
944 bit sub-parts we can choose either integer or float types, and
945 choose float on the basis that that is the common use case and so
946 will give least interference with Put-to-Get forwarding later
947 on. */
948static IRType preferredVectorSubTypeFromSize ( UInt szB )
949{
950 switch (szB) {
951 case 1: return Ity_I8;
952 case 2: return Ity_I16;
953 case 4: return Ity_I32; //Ity_F32;
954 case 8: return Ity_F64;
955 case 16: return Ity_V128;
956 default: vassert(0);
957 }
958}
959
sewardj606c4ba2014-01-26 19:11:14 +0000960/* Find the offset of the laneNo'th lane of type laneTy in the given
961 Qreg. Since the host is little-endian, the least significant lane
962 has the lowest offset. */
963static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
sewardjbbcf1882014-01-12 12:49:10 +0000964{
965 vassert(!host_is_bigendian);
966 Int base = offsetQReg128(qregNo);
sewardj606c4ba2014-01-26 19:11:14 +0000967 /* Since the host is little-endian, the least significant lane
968 will be at the lowest address. */
969 /* Restrict this to known types, so as to avoid silently accepting
970 stupid types. */
971 UInt laneSzB = 0;
972 switch (laneTy) {
sewardj5860ec72014-03-01 11:19:45 +0000973 case Ity_I8: laneSzB = 1; break;
974 case Ity_I16: laneSzB = 2; break;
sewardj606c4ba2014-01-26 19:11:14 +0000975 case Ity_F32: case Ity_I32: laneSzB = 4; break;
976 case Ity_F64: case Ity_I64: laneSzB = 8; break;
977 case Ity_V128: laneSzB = 16; break;
978 default: break;
sewardjbbcf1882014-01-12 12:49:10 +0000979 }
sewardj606c4ba2014-01-26 19:11:14 +0000980 vassert(laneSzB > 0);
981 UInt minOff = laneNo * laneSzB;
982 UInt maxOff = minOff + laneSzB - 1;
983 vassert(maxOff < 16);
984 return base + minOff;
sewardjbbcf1882014-01-12 12:49:10 +0000985}
986
sewardj606c4ba2014-01-26 19:11:14 +0000987/* Put to the least significant lane of a Qreg. */
988static void putQRegLO ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +0000989{
990 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +0000991 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +0000992 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +0000993 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
994 case Ity_F32: case Ity_F64: case Ity_V128:
995 break;
996 default:
997 vassert(0); // Other cases are probably invalid
sewardjbbcf1882014-01-12 12:49:10 +0000998 }
999 stmt(IRStmt_Put(off, e));
1000}
1001
sewardj606c4ba2014-01-26 19:11:14 +00001002/* Get from the least significant lane of a Qreg. */
1003static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
sewardjbbcf1882014-01-12 12:49:10 +00001004{
sewardj606c4ba2014-01-26 19:11:14 +00001005 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +00001006 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001007 case Ity_I32: case Ity_I64:
1008 case Ity_F32: case Ity_F64: case Ity_V128:
1009 break;
1010 default:
1011 vassert(0); // Other cases are ATC
sewardjbbcf1882014-01-12 12:49:10 +00001012 }
1013 return IRExpr_Get(off, ty);
1014}
1015
sewardj606c4ba2014-01-26 19:11:14 +00001016static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
sewardjbbcf1882014-01-12 12:49:10 +00001017{
1018 static const HChar* namesQ[32]
1019 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1020 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1021 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1022 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1023 static const HChar* namesD[32]
1024 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1025 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1026 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1027 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1028 static const HChar* namesS[32]
1029 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1030 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1031 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1032 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1033 static const HChar* namesH[32]
1034 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1035 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1036 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1037 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1038 static const HChar* namesB[32]
1039 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1040 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1041 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1042 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1043 vassert(qregNo < 32);
sewardj606c4ba2014-01-26 19:11:14 +00001044 switch (sizeofIRType(laneTy)) {
sewardjbbcf1882014-01-12 12:49:10 +00001045 case 1: return namesB[qregNo];
1046 case 2: return namesH[qregNo];
1047 case 4: return namesS[qregNo];
1048 case 8: return namesD[qregNo];
1049 case 16: return namesQ[qregNo];
1050 default: vassert(0);
1051 }
1052 /*NOTREACHED*/
1053}
1054
sewardj606c4ba2014-01-26 19:11:14 +00001055static const HChar* nameQReg128 ( UInt qregNo )
1056{
1057 return nameQRegLO(qregNo, Ity_V128);
1058}
1059
sewardjbbcf1882014-01-12 12:49:10 +00001060/* Find the offset of the most significant half (8 bytes) of the given
1061 Qreg. This requires knowing the endianness of the host. */
sewardj606c4ba2014-01-26 19:11:14 +00001062static Int offsetQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001063{
sewardj606c4ba2014-01-26 19:11:14 +00001064 return offsetQRegLane(qregNo, Ity_I64, 1);
sewardjbbcf1882014-01-12 12:49:10 +00001065}
1066
sewardj606c4ba2014-01-26 19:11:14 +00001067static IRExpr* getQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001068{
sewardj606c4ba2014-01-26 19:11:14 +00001069 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
sewardjbbcf1882014-01-12 12:49:10 +00001070}
1071
sewardj606c4ba2014-01-26 19:11:14 +00001072static void putQRegHI64 ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +00001073{
1074 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +00001075 Int off = offsetQRegHI64(qregNo);
sewardjbbcf1882014-01-12 12:49:10 +00001076 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001077 case Ity_I64: case Ity_F64:
1078 break;
1079 default:
1080 vassert(0); // Other cases are plain wrong
sewardjbbcf1882014-01-12 12:49:10 +00001081 }
1082 stmt(IRStmt_Put(off, e));
1083}
1084
sewardj606c4ba2014-01-26 19:11:14 +00001085/* Put to a specified lane of a Qreg. */
1086static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1087{
1088 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1089 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1090 switch (laneTy) {
1091 case Ity_F64: case Ity_I64:
sewardj32d86752014-03-02 12:47:18 +00001092 case Ity_I32: case Ity_F32:
sewardj5860ec72014-03-01 11:19:45 +00001093 case Ity_I16:
1094 case Ity_I8:
sewardj606c4ba2014-01-26 19:11:14 +00001095 break;
1096 default:
1097 vassert(0); // Other cases are ATC
1098 }
1099 stmt(IRStmt_Put(off, e));
1100}
1101
sewardj32d86752014-03-02 12:47:18 +00001102/* Get from a specified lane of a Qreg. */
sewardj606c4ba2014-01-26 19:11:14 +00001103static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1104{
1105 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1106 switch (laneTy) {
sewardj32d86752014-03-02 12:47:18 +00001107 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
1108 case Ity_F64:
sewardj606c4ba2014-01-26 19:11:14 +00001109 break;
1110 default:
1111 vassert(0); // Other cases are ATC
1112 }
1113 return IRExpr_Get(off, laneTy);
1114}
1115
1116
sewardjbbcf1882014-01-12 12:49:10 +00001117//ZZ /* ---------------- Misc registers ---------------- */
1118//ZZ
1119//ZZ static void putMiscReg32 ( UInt gsoffset,
1120//ZZ IRExpr* e, /* :: Ity_I32 */
1121//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1122//ZZ {
1123//ZZ switch (gsoffset) {
1124//ZZ case OFFB_FPSCR: break;
1125//ZZ case OFFB_QFLAG32: break;
1126//ZZ case OFFB_GEFLAG0: break;
1127//ZZ case OFFB_GEFLAG1: break;
1128//ZZ case OFFB_GEFLAG2: break;
1129//ZZ case OFFB_GEFLAG3: break;
1130//ZZ default: vassert(0); /* awaiting more cases */
1131//ZZ }
1132//ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1133//ZZ
1134//ZZ if (guardT == IRTemp_INVALID) {
1135//ZZ /* unconditional write */
1136//ZZ stmt(IRStmt_Put(gsoffset, e));
1137//ZZ } else {
1138//ZZ stmt(IRStmt_Put(
1139//ZZ gsoffset,
1140//ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1141//ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1142//ZZ ));
1143//ZZ }
1144//ZZ }
1145//ZZ
1146//ZZ static IRTemp get_ITSTATE ( void )
1147//ZZ {
1148//ZZ ASSERT_IS_THUMB;
1149//ZZ IRTemp t = newTemp(Ity_I32);
1150//ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1151//ZZ return t;
1152//ZZ }
1153//ZZ
1154//ZZ static void put_ITSTATE ( IRTemp t )
1155//ZZ {
1156//ZZ ASSERT_IS_THUMB;
1157//ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1158//ZZ }
1159//ZZ
1160//ZZ static IRTemp get_QFLAG32 ( void )
1161//ZZ {
1162//ZZ IRTemp t = newTemp(Ity_I32);
1163//ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1164//ZZ return t;
1165//ZZ }
1166//ZZ
1167//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1168//ZZ {
1169//ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1170//ZZ }
1171//ZZ
1172//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1173//ZZ Status Register) to indicate that overflow or saturation occurred.
1174//ZZ Nb: t must be zero to denote no saturation, and any nonzero
1175//ZZ value to indicate saturation. */
1176//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1177//ZZ {
1178//ZZ IRTemp old = get_QFLAG32();
1179//ZZ IRTemp nyu = newTemp(Ity_I32);
1180//ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1181//ZZ put_QFLAG32(nyu, condT);
1182//ZZ }
1183
1184
1185/* ---------------- FPCR stuff ---------------- */
1186
1187/* Generate IR to get hold of the rounding mode bits in FPCR, and
1188 convert them to IR format. Bind the final result to the
1189 returned temp. */
1190static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1191{
1192 /* The ARMvfp encoding for rounding mode bits is:
1193 00 to nearest
1194 01 to +infinity
1195 10 to -infinity
1196 11 to zero
1197 We need to convert that to the IR encoding:
1198 00 to nearest (the default)
1199 10 to +infinity
1200 01 to -infinity
1201 11 to zero
1202 Which can be done by swapping bits 0 and 1.
1203 The rmode bits are at 23:22 in FPSCR.
1204 */
1205 IRTemp armEncd = newTemp(Ity_I32);
1206 IRTemp swapped = newTemp(Ity_I32);
1207 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1208 we don't zero out bits 24 and above, since the assignment to
1209 'swapped' will mask them out anyway. */
1210 assign(armEncd,
1211 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1212 /* Now swap them. */
1213 assign(swapped,
1214 binop(Iop_Or32,
1215 binop(Iop_And32,
1216 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1217 mkU32(2)),
1218 binop(Iop_And32,
1219 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1220 mkU32(1))
1221 ));
1222 return swapped;
1223}
1224
1225
1226/*------------------------------------------------------------*/
1227/*--- Helpers for flag handling and conditional insns ---*/
1228/*------------------------------------------------------------*/
1229
1230static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1231{
1232 switch (cond) {
1233 case ARM64CondEQ: return "eq";
1234 case ARM64CondNE: return "ne";
1235 case ARM64CondCS: return "cs"; // or 'hs'
1236 case ARM64CondCC: return "cc"; // or 'lo'
1237 case ARM64CondMI: return "mi";
1238 case ARM64CondPL: return "pl";
1239 case ARM64CondVS: return "vs";
1240 case ARM64CondVC: return "vc";
1241 case ARM64CondHI: return "hi";
1242 case ARM64CondLS: return "ls";
1243 case ARM64CondGE: return "ge";
1244 case ARM64CondLT: return "lt";
1245 case ARM64CondGT: return "gt";
1246 case ARM64CondLE: return "le";
1247 case ARM64CondAL: return "al";
1248 case ARM64CondNV: return "nv";
1249 default: vpanic("name_ARM64Condcode");
1250 }
1251}
1252
1253/* and a handy shorthand for it */
1254static const HChar* nameCC ( ARM64Condcode cond ) {
1255 return nameARM64Condcode(cond);
1256}
1257
1258
1259/* Build IR to calculate some particular condition from stored
1260 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1261 Ity_I64, suitable for narrowing. Although the return type is
1262 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1263 :: Ity_I64 and must denote the condition to compute in
1264 bits 7:4, and be zero everywhere else.
1265*/
1266static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1267{
1268 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1269 /* And 'cond' had better produce a value in which only bits 7:4 are
1270 nonzero. However, obviously we can't assert for that. */
1271
1272 /* So what we're constructing for the first argument is
1273 "(cond << 4) | stored-operation".
1274 However, as per comments above, 'cond' must be supplied
1275 pre-shifted to this function.
1276
1277 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1278 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1279 8 bits of the first argument. */
1280 IRExpr** args
1281 = mkIRExprVec_4(
1282 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1283 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1284 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1285 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1286 );
1287 IRExpr* call
1288 = mkIRExprCCall(
1289 Ity_I64,
1290 0/*regparm*/,
1291 "arm64g_calculate_condition", &arm64g_calculate_condition,
1292 args
1293 );
1294
1295 /* Exclude the requested condition, OP and NDEP from definedness
1296 checking. We're only interested in DEP1 and DEP2. */
1297 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1298 return call;
1299}
1300
1301
1302/* Build IR to calculate some particular condition from stored
1303 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1304 Ity_I64, suitable for narrowing. Although the return type is
1305 Ity_I64, the returned value is either 0 or 1.
1306*/
1307static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1308{
1309 /* First arg is "(cond << 4) | condition". This requires that the
1310 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1311 (COND, OP) pair in the lowest 8 bits of the first argument. */
1312 vassert(cond >= 0 && cond <= 15);
1313 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1314}
1315
1316
1317//ZZ /* Build IR to calculate just the carry flag from stored
1318//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1319//ZZ Ity_I32. */
1320//ZZ static IRExpr* mk_armg_calculate_flag_c ( void )
1321//ZZ {
1322//ZZ IRExpr** args
1323//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1324//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1325//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1326//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1327//ZZ IRExpr* call
1328//ZZ = mkIRExprCCall(
1329//ZZ Ity_I32,
1330//ZZ 0/*regparm*/,
1331//ZZ "armg_calculate_flag_c", &armg_calculate_flag_c,
1332//ZZ args
1333//ZZ );
1334//ZZ /* Exclude OP and NDEP from definedness checking. We're only
1335//ZZ interested in DEP1 and DEP2. */
1336//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1337//ZZ return call;
1338//ZZ }
1339//ZZ
1340//ZZ
1341//ZZ /* Build IR to calculate just the overflow flag from stored
1342//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1343//ZZ Ity_I32. */
1344//ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1345//ZZ {
1346//ZZ IRExpr** args
1347//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1348//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1349//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1350//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1351//ZZ IRExpr* call
1352//ZZ = mkIRExprCCall(
1353//ZZ Ity_I32,
1354//ZZ 0/*regparm*/,
1355//ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1356//ZZ args
1357//ZZ );
1358//ZZ /* Exclude OP and NDEP from definedness checking. We're only
1359//ZZ interested in DEP1 and DEP2. */
1360//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1361//ZZ return call;
1362//ZZ }
1363
1364
1365/* Build IR to calculate N Z C V in bits 31:28 of the
1366 returned word. */
1367static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1368{
1369 IRExpr** args
1370 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1371 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1372 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1373 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1374 IRExpr* call
1375 = mkIRExprCCall(
1376 Ity_I64,
1377 0/*regparm*/,
1378 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1379 args
1380 );
1381 /* Exclude OP and NDEP from definedness checking. We're only
1382 interested in DEP1 and DEP2. */
1383 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1384 return call;
1385}
1386
1387
1388/* Build IR to set the flags thunk, in the most general case. */
1389static
1390void setFlags_D1_D2_ND ( UInt cc_op,
1391 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1392{
1393 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1394 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1395 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1396 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1397 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1398 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1399 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1400 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1401}
1402
1403/* Build IR to set the flags thunk after ADD or SUB. */
1404static
1405void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1406{
1407 IRTemp argL64 = IRTemp_INVALID;
1408 IRTemp argR64 = IRTemp_INVALID;
1409 IRTemp z64 = newTemp(Ity_I64);
1410 if (is64) {
1411 argL64 = argL;
1412 argR64 = argR;
1413 } else {
1414 argL64 = newTemp(Ity_I64);
1415 argR64 = newTemp(Ity_I64);
1416 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1417 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1418 }
1419 assign(z64, mkU64(0));
1420 UInt cc_op = ARM64G_CC_OP_NUMBER;
1421 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1422 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1423 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1424 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1425 else { vassert(0); }
1426 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1427}
1428
1429/* Build IR to set the flags thunk after ADD or SUB, if the given
1430 condition evaluates to True at run time. If not, the flags are set
1431 to the specified NZCV value. */
1432static
1433void setFlags_ADD_SUB_conditionally (
1434 Bool is64, Bool isSUB,
1435 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1436 )
1437{
1438 /* Generate IR as follows:
1439 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1440 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1441 CC_DEP2 = ITE(cond, argR64, 0)
1442 CC_NDEP = 0
1443 */
1444
1445 IRTemp z64 = newTemp(Ity_I64);
1446 assign(z64, mkU64(0));
1447
1448 /* Establish the operation and operands for the True case. */
1449 IRTemp t_dep1 = IRTemp_INVALID;
1450 IRTemp t_dep2 = IRTemp_INVALID;
1451 UInt t_op = ARM64G_CC_OP_NUMBER;
1452 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1453 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1454 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1455 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1456 else { vassert(0); }
1457 /* */
1458 if (is64) {
1459 t_dep1 = argL;
1460 t_dep2 = argR;
1461 } else {
1462 t_dep1 = newTemp(Ity_I64);
1463 t_dep2 = newTemp(Ity_I64);
1464 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1465 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1466 }
1467
1468 /* Establish the operation and operands for the False case. */
1469 IRTemp f_dep1 = newTemp(Ity_I64);
1470 IRTemp f_dep2 = z64;
1471 UInt f_op = ARM64G_CC_OP_COPY;
1472 assign(f_dep1, mkU64(nzcv << 28));
1473
1474 /* Final thunk values */
1475 IRTemp dep1 = newTemp(Ity_I64);
1476 IRTemp dep2 = newTemp(Ity_I64);
1477 IRTemp op = newTemp(Ity_I64);
1478
1479 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1480 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1481 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1482
1483 /* finally .. */
1484 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
1485 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1486 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1487 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1488}
1489
1490/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1491static
1492void setFlags_LOGIC ( Bool is64, IRTemp res )
1493{
1494 IRTemp res64 = IRTemp_INVALID;
1495 IRTemp z64 = newTemp(Ity_I64);
1496 UInt cc_op = ARM64G_CC_OP_NUMBER;
1497 if (is64) {
1498 res64 = res;
1499 cc_op = ARM64G_CC_OP_LOGIC64;
1500 } else {
1501 res64 = newTemp(Ity_I64);
1502 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1503 cc_op = ARM64G_CC_OP_LOGIC32;
1504 }
1505 assign(z64, mkU64(0));
1506 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1507}
1508
1509/* Build IR to set the flags thunk to a given NZCV value. NZCV is
1510 located in bits 31:28 of the supplied value. */
1511static
1512void setFlags_COPY ( IRTemp nzcv_28x0 )
1513{
1514 IRTemp z64 = newTemp(Ity_I64);
1515 assign(z64, mkU64(0));
1516 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
1517}
1518
1519
1520//ZZ /* Minor variant of the above that sets NDEP to zero (if it
1521//ZZ sets it at all) */
1522//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1523//ZZ IRTemp t_dep2,
1524//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1525//ZZ {
1526//ZZ IRTemp z32 = newTemp(Ity_I32);
1527//ZZ assign( z32, mkU32(0) );
1528//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1529//ZZ }
1530//ZZ
1531//ZZ
1532//ZZ /* Minor variant of the above that sets DEP2 to zero (if it
1533//ZZ sets it at all) */
1534//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1535//ZZ IRTemp t_ndep,
1536//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1537//ZZ {
1538//ZZ IRTemp z32 = newTemp(Ity_I32);
1539//ZZ assign( z32, mkU32(0) );
1540//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1541//ZZ }
1542//ZZ
1543//ZZ
1544//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1545//ZZ sets them at all) */
1546//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1547//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1548//ZZ {
1549//ZZ IRTemp z32 = newTemp(Ity_I32);
1550//ZZ assign( z32, mkU32(0) );
1551//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1552//ZZ }
1553
1554
1555/*------------------------------------------------------------*/
1556/*--- Misc math helpers ---*/
1557/*------------------------------------------------------------*/
1558
sewardj32d86752014-03-02 12:47:18 +00001559/* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
1560static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
sewardjbbcf1882014-01-12 12:49:10 +00001561{
sewardj32d86752014-03-02 12:47:18 +00001562 IRTemp maskT = newTemp(Ity_I64);
1563 IRTemp res = newTemp(Ity_I64);
1564 vassert(sh >= 1 && sh <= 63);
1565 assign(maskT, mkU64(mask));
sewardjdc9259c2014-02-27 11:10:19 +00001566 assign( res,
sewardjbbcf1882014-01-12 12:49:10 +00001567 binop(Iop_Or64,
1568 binop(Iop_Shr64,
sewardj32d86752014-03-02 12:47:18 +00001569 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
1570 mkU8(sh)),
sewardjbbcf1882014-01-12 12:49:10 +00001571 binop(Iop_And64,
sewardj32d86752014-03-02 12:47:18 +00001572 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
1573 mkexpr(maskT))
sewardjbbcf1882014-01-12 12:49:10 +00001574 )
1575 );
sewardjdc9259c2014-02-27 11:10:19 +00001576 return res;
1577}
1578
sewardj32d86752014-03-02 12:47:18 +00001579/* Generates byte swaps within 32-bit lanes. */
1580static IRTemp math_UINTSWAP64 ( IRTemp src )
1581{
1582 IRTemp res;
1583 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1584 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
1585 return res;
1586}
1587
1588/* Generates byte swaps within 16-bit lanes. */
1589static IRTemp math_USHORTSWAP64 ( IRTemp src )
1590{
1591 IRTemp res;
1592 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1593 return res;
1594}
1595
1596/* Generates a 64-bit byte swap. */
1597static IRTemp math_BYTESWAP64 ( IRTemp src )
1598{
1599 IRTemp res;
1600 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1601 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
1602 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
1603 return res;
1604}
sewardjdc9259c2014-02-27 11:10:19 +00001605
1606/* Generates a 64-bit bit swap. */
1607static IRTemp math_BITSWAP64 ( IRTemp src )
1608{
sewardj32d86752014-03-02 12:47:18 +00001609 IRTemp res;
1610 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
1611 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
1612 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
1613 return math_BYTESWAP64(res);
sewardjbbcf1882014-01-12 12:49:10 +00001614}
1615
sewardj606c4ba2014-01-26 19:11:14 +00001616/* Duplicates the bits at the bottom of the given word to fill the
1617 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
1618 except for the bottom bits. */
1619static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
1620{
1621 if (srcTy == Ity_I8) {
1622 IRTemp t16 = newTemp(Ity_I64);
1623 assign(t16, binop(Iop_Or64, mkexpr(src),
1624 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
1625 IRTemp t32 = newTemp(Ity_I64);
1626 assign(t32, binop(Iop_Or64, mkexpr(t16),
1627 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
1628 IRTemp t64 = newTemp(Ity_I64);
1629 assign(t64, binop(Iop_Or64, mkexpr(t32),
1630 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
1631 return t64;
1632 }
1633 if (srcTy == Ity_I16) {
1634 IRTemp t32 = newTemp(Ity_I64);
1635 assign(t32, binop(Iop_Or64, mkexpr(src),
1636 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
1637 IRTemp t64 = newTemp(Ity_I64);
1638 assign(t64, binop(Iop_Or64, mkexpr(t32),
1639 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
1640 return t64;
1641 }
1642 if (srcTy == Ity_I32) {
1643 IRTemp t64 = newTemp(Ity_I64);
1644 assign(t64, binop(Iop_Or64, mkexpr(src),
1645 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
1646 return t64;
1647 }
1648 if (srcTy == Ity_I64) {
1649 return src;
1650 }
1651 vassert(0);
1652}
1653
1654
sewardjbbcf1882014-01-12 12:49:10 +00001655/*------------------------------------------------------------*/
1656/*--- FP comparison helpers ---*/
1657/*------------------------------------------------------------*/
1658
1659/* irRes :: Ity_I32 holds a floating point comparison result encoded
1660 as an IRCmpF64Result. Generate code to convert it to an
1661 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
1662 Assign a new temp to hold that value, and return the temp. */
1663static
1664IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
1665{
1666 IRTemp ix = newTemp(Ity_I64);
1667 IRTemp termL = newTemp(Ity_I64);
1668 IRTemp termR = newTemp(Ity_I64);
1669 IRTemp nzcv = newTemp(Ity_I64);
1670 IRTemp irRes = newTemp(Ity_I64);
1671
1672 /* This is where the fun starts. We have to convert 'irRes' from
1673 an IR-convention return result (IRCmpF64Result) to an
1674 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
1675 4 bits of 'nzcv'. */
1676 /* Map compare result from IR to ARM(nzcv) */
1677 /*
1678 FP cmp result | IR | ARM(nzcv)
1679 --------------------------------
1680 UN 0x45 0011
1681 LT 0x01 1000
1682 GT 0x00 0010
1683 EQ 0x40 0110
1684 */
1685 /* Now since you're probably wondering WTF ..
1686
1687 ix fishes the useful bits out of the IR value, bits 6 and 0, and
1688 places them side by side, giving a number which is 0, 1, 2 or 3.
1689
1690 termL is a sequence cooked up by GNU superopt. It converts ix
1691 into an almost correct value NZCV value (incredibly), except
1692 for the case of UN, where it produces 0100 instead of the
1693 required 0011.
1694
1695 termR is therefore a correction term, also computed from ix. It
1696 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
1697 the final correct value, we subtract termR from termL.
1698
1699 Don't take my word for it. There's a test program at the bottom
1700 of guest_arm_toIR.c, to try this out with.
1701 */
1702 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
1703
1704 assign(
1705 ix,
1706 binop(Iop_Or64,
1707 binop(Iop_And64,
1708 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
1709 mkU64(3)),
1710 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
1711
1712 assign(
1713 termL,
1714 binop(Iop_Add64,
1715 binop(Iop_Shr64,
1716 binop(Iop_Sub64,
1717 binop(Iop_Shl64,
1718 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
1719 mkU8(62)),
1720 mkU64(1)),
1721 mkU8(61)),
1722 mkU64(1)));
1723
1724 assign(
1725 termR,
1726 binop(Iop_And64,
1727 binop(Iop_And64,
1728 mkexpr(ix),
1729 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
1730 mkU64(1)));
1731
1732 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
1733 return nzcv;
1734}
1735
1736
1737/*------------------------------------------------------------*/
1738/*--- Data processing (immediate) ---*/
1739/*------------------------------------------------------------*/
1740
1741/* Helper functions for supporting "DecodeBitMasks" */
1742
1743static ULong dbm_ROR ( Int width, ULong x, Int rot )
1744{
1745 vassert(width > 0 && width <= 64);
1746 vassert(rot >= 0 && rot < width);
1747 if (rot == 0) return x;
1748 ULong res = x >> rot;
1749 res |= (x << (width - rot));
1750 if (width < 64)
1751 res &= ((1ULL << width) - 1);
1752 return res;
1753}
1754
1755static ULong dbm_RepTo64( Int esize, ULong x )
1756{
1757 switch (esize) {
1758 case 64:
1759 return x;
1760 case 32:
1761 x &= 0xFFFFFFFF; x |= (x << 32);
1762 return x;
1763 case 16:
1764 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
1765 return x;
1766 case 8:
1767 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
1768 return x;
1769 case 4:
1770 x &= 0xF; x |= (x << 4); x |= (x << 8);
1771 x |= (x << 16); x |= (x << 32);
1772 return x;
1773 case 2:
1774 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
1775 x |= (x << 16); x |= (x << 32);
1776 return x;
1777 default:
1778 break;
1779 }
1780 vpanic("dbm_RepTo64");
1781 /*NOTREACHED*/
1782 return 0;
1783}
1784
1785static Int dbm_highestSetBit ( ULong x )
1786{
1787 Int i;
1788 for (i = 63; i >= 0; i--) {
1789 if (x & (1ULL << i))
1790 return i;
1791 }
1792 vassert(x == 0);
1793 return -1;
1794}
1795
1796static
1797Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
1798 ULong immN, ULong imms, ULong immr, Bool immediate,
1799 UInt M /*32 or 64*/)
1800{
1801 vassert(immN < (1ULL << 1));
1802 vassert(imms < (1ULL << 6));
1803 vassert(immr < (1ULL << 6));
1804 vassert(immediate == False || immediate == True);
1805 vassert(M == 32 || M == 64);
1806
1807 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
1808 if (len < 1) { /* printf("fail1\n"); */ return False; }
1809 vassert(len <= 6);
1810 vassert(M >= (1 << len));
1811
1812 vassert(len >= 1 && len <= 6);
1813 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
1814 (1 << len) - 1;
1815 vassert(levels >= 1 && levels <= 63);
1816
1817 if (immediate && ((imms & levels) == levels)) {
1818 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
1819 return False;
1820 }
1821
1822 ULong S = imms & levels;
1823 ULong R = immr & levels;
1824 Int diff = S - R;
1825 diff &= 63;
1826 Int esize = 1 << len;
1827 vassert(2 <= esize && esize <= 64);
1828
1829 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
1830 same below with d. S can be 63 in which case we have an out of
1831 range and hence undefined shift. */
1832 vassert(S >= 0 && S <= 63);
1833 vassert(esize >= (S+1));
1834 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
1835 //(1ULL << (S+1)) - 1;
1836 ((1ULL << S) - 1) + (1ULL << S);
1837
1838 Int d = // diff<len-1:0>
1839 diff & ((1 << len)-1);
1840 vassert(esize >= (d+1));
1841 vassert(d >= 0 && d <= 63);
1842
1843 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
1844 //(1ULL << (d+1)) - 1;
1845 ((1ULL << d) - 1) + (1ULL << d);
1846
1847 if (esize != 64) vassert(elem_s < (1ULL << esize));
1848 if (esize != 64) vassert(elem_d < (1ULL << esize));
1849
1850 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
1851 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
1852
1853 return True;
1854}
1855
1856
1857static
1858Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
1859 UInt insn)
1860{
1861# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
1862
1863 /* insn[28:23]
1864 10000x PC-rel addressing
1865 10001x Add/subtract (immediate)
1866 100100 Logical (immediate)
1867 100101 Move Wide (immediate)
1868 100110 Bitfield
1869 100111 Extract
1870 */
1871
1872 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
1873 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
1874 Bool is64 = INSN(31,31) == 1;
1875 Bool isSub = INSN(30,30) == 1;
1876 Bool setCC = INSN(29,29) == 1;
1877 UInt sh = INSN(23,22);
1878 UInt uimm12 = INSN(21,10);
1879 UInt nn = INSN(9,5);
1880 UInt dd = INSN(4,0);
1881 const HChar* nm = isSub ? "sub" : "add";
1882 if (sh >= 2) {
1883 /* Invalid; fall through */
1884 } else {
1885 vassert(sh <= 1);
1886 uimm12 <<= (12 * sh);
1887 if (is64) {
1888 IRTemp argL = newTemp(Ity_I64);
1889 IRTemp argR = newTemp(Ity_I64);
1890 IRTemp res = newTemp(Ity_I64);
1891 assign(argL, getIReg64orSP(nn));
1892 assign(argR, mkU64(uimm12));
1893 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
1894 mkexpr(argL), mkexpr(argR)));
1895 if (setCC) {
1896 putIReg64orZR(dd, mkexpr(res));
1897 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
1898 DIP("%ss %s, %s, 0x%x\n",
1899 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
1900 } else {
1901 putIReg64orSP(dd, mkexpr(res));
1902 DIP("%s %s, %s, 0x%x\n",
1903 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
1904 }
1905 } else {
1906 IRTemp argL = newTemp(Ity_I32);
1907 IRTemp argR = newTemp(Ity_I32);
1908 IRTemp res = newTemp(Ity_I32);
1909 assign(argL, getIReg32orSP(nn));
1910 assign(argR, mkU32(uimm12));
1911 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
1912 mkexpr(argL), mkexpr(argR)));
1913 if (setCC) {
1914 putIReg32orZR(dd, mkexpr(res));
1915 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
1916 DIP("%ss %s, %s, 0x%x\n",
1917 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
1918 } else {
1919 putIReg32orSP(dd, mkexpr(res));
1920 DIP("%s %s, %s, 0x%x\n",
1921 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
1922 }
1923 }
1924 return True;
1925 }
1926 }
1927
1928 /* -------------------- ADR/ADRP -------------------- */
1929 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
1930 UInt bP = INSN(31,31);
1931 UInt immLo = INSN(30,29);
1932 UInt immHi = INSN(23,5);
1933 UInt rD = INSN(4,0);
1934 ULong uimm = (immHi << 2) | immLo;
1935 ULong simm = sx_to_64(uimm, 21);
1936 ULong val;
1937 if (bP) {
1938 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
1939 } else {
1940 val = guest_PC_curr_instr + simm;
1941 }
1942 putIReg64orZR(rD, mkU64(val));
1943 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
1944 return True;
1945 }
1946
1947 /* -------------------- LOGIC(imm) -------------------- */
1948 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
1949 /* 31 30 28 22 21 15 9 4
1950 sf op 100100 N immr imms Rn Rd
1951 op=00: AND Rd|SP, Rn, #imm
1952 op=01: ORR Rd|SP, Rn, #imm
1953 op=10: EOR Rd|SP, Rn, #imm
1954 op=11: ANDS Rd|ZR, Rn, #imm
1955 */
1956 Bool is64 = INSN(31,31) == 1;
1957 UInt op = INSN(30,29);
1958 UInt N = INSN(22,22);
1959 UInt immR = INSN(21,16);
1960 UInt immS = INSN(15,10);
1961 UInt nn = INSN(9,5);
1962 UInt dd = INSN(4,0);
1963 ULong imm = 0;
1964 Bool ok;
1965 if (N == 1 && !is64)
1966 goto after_logic_imm; /* not allowed; fall through */
1967 ok = dbm_DecodeBitMasks(&imm, NULL,
1968 N, immS, immR, True, is64 ? 64 : 32);
1969 if (!ok)
1970 goto after_logic_imm;
1971
1972 const HChar* names[4] = { "and", "orr", "eor", "ands" };
1973 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
1974 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
1975
1976 vassert(op < 4);
1977 if (is64) {
1978 IRExpr* argL = getIReg64orZR(nn);
1979 IRExpr* argR = mkU64(imm);
1980 IRTemp res = newTemp(Ity_I64);
1981 assign(res, binop(ops64[op], argL, argR));
1982 if (op < 3) {
1983 putIReg64orSP(dd, mkexpr(res));
1984 DIP("%s %s, %s, 0x%llx\n", names[op],
1985 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
1986 } else {
1987 putIReg64orZR(dd, mkexpr(res));
1988 setFlags_LOGIC(True/*is64*/, res);
1989 DIP("%s %s, %s, 0x%llx\n", names[op],
1990 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
1991 }
1992 } else {
1993 IRExpr* argL = getIReg32orZR(nn);
1994 IRExpr* argR = mkU32((UInt)imm);
1995 IRTemp res = newTemp(Ity_I32);
1996 assign(res, binop(ops32[op], argL, argR));
1997 if (op < 3) {
1998 putIReg32orSP(dd, mkexpr(res));
1999 DIP("%s %s, %s, 0x%x\n", names[op],
2000 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2001 } else {
2002 putIReg32orZR(dd, mkexpr(res));
2003 setFlags_LOGIC(False/*!is64*/, res);
2004 DIP("%s %s, %s, 0x%x\n", names[op],
2005 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2006 }
2007 }
2008 return True;
2009 }
2010 after_logic_imm:
2011
2012 /* -------------------- MOV{Z,N,K} -------------------- */
2013 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2014 /* 31 30 28 22 20 4
2015 | | | | | |
2016 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2017 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2018 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2019 */
2020 Bool is64 = INSN(31,31) == 1;
2021 UInt subopc = INSN(30,29);
2022 UInt hw = INSN(22,21);
2023 UInt imm16 = INSN(20,5);
2024 UInt dd = INSN(4,0);
2025 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2026 /* invalid; fall through */
2027 } else {
2028 ULong imm64 = ((ULong)imm16) << (16 * hw);
2029 if (!is64)
2030 vassert(imm64 < 0x100000000ULL);
2031 switch (subopc) {
2032 case BITS2(1,0): // MOVZ
2033 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2034 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2035 break;
2036 case BITS2(0,0): // MOVN
2037 imm64 = ~imm64;
2038 if (!is64)
2039 imm64 &= 0xFFFFFFFFULL;
2040 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2041 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2042 break;
2043 case BITS2(1,1): // MOVK
2044 /* This is more complex. We are inserting a slice into
2045 the destination register, so we need to have the old
2046 value of it. */
2047 if (is64) {
2048 IRTemp old = newTemp(Ity_I64);
2049 assign(old, getIReg64orZR(dd));
2050 ULong mask = 0xFFFFULL << (16 * hw);
2051 IRExpr* res
2052 = binop(Iop_Or64,
2053 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2054 mkU64(imm64));
2055 putIReg64orZR(dd, res);
2056 DIP("movk %s, 0x%x, lsl %u\n",
2057 nameIReg64orZR(dd), imm16, 16*hw);
2058 } else {
2059 IRTemp old = newTemp(Ity_I32);
2060 assign(old, getIReg32orZR(dd));
2061 vassert(hw <= 1);
2062 UInt mask = 0xFFFF << (16 * hw);
2063 IRExpr* res
2064 = binop(Iop_Or32,
2065 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2066 mkU32((UInt)imm64));
2067 putIReg32orZR(dd, res);
2068 DIP("movk %s, 0x%x, lsl %u\n",
2069 nameIReg32orZR(dd), imm16, 16*hw);
2070 }
2071 break;
2072 default:
2073 vassert(0);
2074 }
2075 return True;
2076 }
2077 }
2078
2079 /* -------------------- {U,S,}BFM -------------------- */
2080 /* 30 28 22 21 15 9 4
2081
2082 sf 10 100110 N immr imms nn dd
2083 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2084 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2085
2086 sf 00 100110 N immr imms nn dd
2087 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2088 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2089
2090 sf 01 100110 N immr imms nn dd
2091 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2092 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2093 */
2094 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2095 UInt sf = INSN(31,31);
2096 UInt opc = INSN(30,29);
2097 UInt N = INSN(22,22);
2098 UInt immR = INSN(21,16);
2099 UInt immS = INSN(15,10);
2100 UInt nn = INSN(9,5);
2101 UInt dd = INSN(4,0);
2102 Bool inZero = False;
2103 Bool extend = False;
2104 const HChar* nm = "???";
2105 /* skip invalid combinations */
2106 switch (opc) {
2107 case BITS2(0,0):
2108 inZero = True; extend = True; nm = "sbfm"; break;
2109 case BITS2(0,1):
2110 inZero = False; extend = False; nm = "bfm"; break;
2111 case BITS2(1,0):
2112 inZero = True; extend = False; nm = "ubfm"; break;
2113 case BITS2(1,1):
2114 goto after_bfm; /* invalid */
2115 default:
2116 vassert(0);
2117 }
2118 if (sf == 1 && N != 1) goto after_bfm;
2119 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2120 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2121 ULong wmask = 0, tmask = 0;
2122 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2123 N, immS, immR, False, sf == 1 ? 64 : 32);
2124 if (!ok) goto after_bfm; /* hmmm */
2125
2126 Bool is64 = sf == 1;
2127 IRType ty = is64 ? Ity_I64 : Ity_I32;
2128
2129 IRTemp dst = newTemp(ty);
2130 IRTemp src = newTemp(ty);
2131 IRTemp bot = newTemp(ty);
2132 IRTemp top = newTemp(ty);
2133 IRTemp res = newTemp(ty);
2134 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2135 assign(src, getIRegOrZR(is64, nn));
2136 /* perform bitfield move on low bits */
2137 assign(bot, binop(mkOR(ty),
2138 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2139 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2140 mkU(ty, wmask))));
2141 /* determine extension bits (sign, zero or dest register) */
2142 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2143 /* combine extension bits and result bits */
2144 assign(res, binop(mkOR(ty),
2145 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2146 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2147 putIRegOrZR(is64, dd, mkexpr(res));
2148 DIP("%s %s, %s, immR=%u, immS=%u\n",
2149 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2150 return True;
2151 }
2152 after_bfm:
2153
2154 /* ---------------------- EXTR ---------------------- */
2155 /* 30 28 22 20 15 9 4
2156 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2157 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2158 */
2159 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2160 Bool is64 = INSN(31,31) == 1;
2161 UInt mm = INSN(20,16);
2162 UInt imm6 = INSN(15,10);
2163 UInt nn = INSN(9,5);
2164 UInt dd = INSN(4,0);
2165 Bool valid = True;
2166 if (INSN(31,31) != INSN(22,22))
2167 valid = False;
2168 if (!is64 && imm6 >= 32)
2169 valid = False;
2170 if (!valid) goto after_extr;
2171 IRType ty = is64 ? Ity_I64 : Ity_I32;
2172 IRTemp srcHi = newTemp(ty);
2173 IRTemp srcLo = newTemp(ty);
2174 IRTemp res = newTemp(ty);
2175 assign(srcHi, getIRegOrZR(is64, nn));
2176 assign(srcLo, getIRegOrZR(is64, mm));
2177 if (imm6 == 0) {
2178 assign(res, mkexpr(srcLo));
2179 } else {
2180 UInt szBits = 8 * sizeofIRType(ty);
2181 vassert(imm6 > 0 && imm6 < szBits);
2182 assign(res, binop(mkOR(ty),
2183 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2184 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2185 }
2186 putIRegOrZR(is64, dd, mkexpr(res));
2187 DIP("extr %s, %s, %s, #%u\n",
2188 nameIRegOrZR(is64,dd),
2189 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2190 return True;
2191 }
2192 after_extr:
2193
2194 vex_printf("ARM64 front end: data_processing_immediate\n");
2195 return False;
2196# undef INSN
2197}
2198
2199
2200/*------------------------------------------------------------*/
2201/*--- Data processing (register) instructions ---*/
2202/*------------------------------------------------------------*/
2203
2204static const HChar* nameSH ( UInt sh ) {
2205 switch (sh) {
2206 case 0: return "lsl";
2207 case 1: return "lsr";
2208 case 2: return "asr";
2209 case 3: return "ror";
2210 default: vassert(0);
2211 }
2212}
2213
2214/* Generate IR to get a register value, possibly shifted by an
2215 immediate. Returns either a 32- or 64-bit temporary holding the
2216 result. After the shift, the value can optionally be NOT-ed
2217 too.
2218
2219 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2220 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2221 isn't allowed, but it's the job of the caller to check that.
2222*/
2223static IRTemp getShiftedIRegOrZR ( Bool is64,
2224 UInt sh_how, UInt sh_amt, UInt regNo,
2225 Bool invert )
2226{
2227 vassert(sh_how < 4);
2228 vassert(sh_amt < (is64 ? 64 : 32));
2229 IRType ty = is64 ? Ity_I64 : Ity_I32;
2230 IRTemp t0 = newTemp(ty);
2231 assign(t0, getIRegOrZR(is64, regNo));
2232 IRTemp t1 = newTemp(ty);
2233 switch (sh_how) {
2234 case BITS2(0,0):
2235 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2236 break;
2237 case BITS2(0,1):
2238 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2239 break;
2240 case BITS2(1,0):
2241 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2242 break;
2243 case BITS2(1,1):
2244 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2245 break;
2246 default:
2247 vassert(0);
2248 }
2249 if (invert) {
2250 IRTemp t2 = newTemp(ty);
2251 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2252 return t2;
2253 } else {
2254 return t1;
2255 }
2256}
2257
2258
2259static
2260Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2261 UInt insn)
2262{
2263# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2264
2265 /* ------------------- ADD/SUB(reg) ------------------- */
2266 /* x==0 => 32 bit op x==1 => 64 bit op
2267 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2268
2269 31 30 29 28 23 21 20 15 9 4
2270 | | | | | | | | | |
2271 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2272 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2273 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2274 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2275 */
2276 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2277 UInt bX = INSN(31,31);
2278 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2279 UInt bS = INSN(29, 29); /* set flags? */
2280 UInt sh = INSN(23,22);
2281 UInt rM = INSN(20,16);
2282 UInt imm6 = INSN(15,10);
2283 UInt rN = INSN(9,5);
2284 UInt rD = INSN(4,0);
2285 Bool isSUB = bOP == 1;
2286 Bool is64 = bX == 1;
2287 IRType ty = is64 ? Ity_I64 : Ity_I32;
2288 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2289 /* invalid; fall through */
2290 } else {
2291 IRTemp argL = newTemp(ty);
2292 assign(argL, getIRegOrZR(is64, rN));
2293 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2294 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2295 IRTemp res = newTemp(ty);
2296 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2297 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2298 if (bS) {
2299 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2300 }
2301 DIP("%s%s %s, %s, %s, %s #%u\n",
2302 bOP ? "sub" : "add", bS ? "s" : "",
2303 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2304 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2305 return True;
2306 }
2307 }
2308
2309 /* -------------------- LOGIC(reg) -------------------- */
2310 /* x==0 => 32 bit op x==1 => 64 bit op
2311 N==0 => inv? is no-op (no inversion)
2312 N==1 => inv? is NOT
2313 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2314
2315 31 30 28 23 21 20 15 9 4
2316 | | | | | | | | |
2317 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2318 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2319 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2320 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2321 With N=1, the names are: BIC ORN EON BICS
2322 */
2323 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2324 UInt bX = INSN(31,31);
2325 UInt sh = INSN(23,22);
2326 UInt bN = INSN(21,21);
2327 UInt rM = INSN(20,16);
2328 UInt imm6 = INSN(15,10);
2329 UInt rN = INSN(9,5);
2330 UInt rD = INSN(4,0);
2331 Bool is64 = bX == 1;
2332 IRType ty = is64 ? Ity_I64 : Ity_I32;
2333 if (!is64 && imm6 > 31) {
2334 /* invalid; fall though */
2335 } else {
2336 IRTemp argL = newTemp(ty);
2337 assign(argL, getIRegOrZR(is64, rN));
2338 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2339 IROp op = Iop_INVALID;
2340 switch (INSN(30,29)) {
2341 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2342 case BITS2(0,1): op = mkOR(ty); break;
2343 case BITS2(1,0): op = mkXOR(ty); break;
2344 default: vassert(0);
2345 }
2346 IRTemp res = newTemp(ty);
2347 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2348 if (INSN(30,29) == BITS2(1,1)) {
2349 setFlags_LOGIC(is64, res);
2350 }
2351 putIRegOrZR(is64, rD, mkexpr(res));
2352
2353 static const HChar* names_op[8]
2354 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2355 vassert(((bN << 2) | INSN(30,29)) < 8);
2356 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2357 /* Special-case the printing of "MOV" */
2358 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2359 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2360 nameIRegOrZR(is64, rM));
2361 } else {
2362 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2363 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2364 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2365 }
2366 return True;
2367 }
2368 }
2369
2370 /* -------------------- {U,S}MULH -------------------- */
2371 /* 31 23 22 20 15 9 4
2372 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2373 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2374 */
2375 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
2376 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)
2377 && INSN(23,23) == 1/*ATC*/) {
2378 Bool isU = INSN(23,23) == 1;
2379 UInt mm = INSN(20,16);
2380 UInt nn = INSN(9,5);
2381 UInt dd = INSN(4,0);
2382 putIReg64orZR(dd, unop(Iop_128HIto64,
2383 binop(isU ? Iop_MullU64 : Iop_MullS64,
2384 getIReg64orZR(nn), getIReg64orZR(mm))));
2385 DIP("%cmulh %s, %s, %s\n",
2386 isU ? 'u' : 's',
2387 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2388 return True;
2389 }
2390
2391 /* -------------------- M{ADD,SUB} -------------------- */
2392 /* 31 30 20 15 14 9 4
2393 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
2394 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
2395 */
2396 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2397 Bool is64 = INSN(31,31) == 1;
2398 UInt mm = INSN(20,16);
2399 Bool isAdd = INSN(15,15) == 0;
2400 UInt aa = INSN(14,10);
2401 UInt nn = INSN(9,5);
2402 UInt dd = INSN(4,0);
2403 if (is64) {
2404 putIReg64orZR(
2405 dd,
2406 binop(isAdd ? Iop_Add64 : Iop_Sub64,
2407 getIReg64orZR(aa),
2408 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
2409 } else {
2410 putIReg32orZR(
2411 dd,
2412 binop(isAdd ? Iop_Add32 : Iop_Sub32,
2413 getIReg32orZR(aa),
2414 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
2415 }
2416 DIP("%s %s, %s, %s, %s\n",
2417 isAdd ? "madd" : "msub",
2418 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
2419 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
2420 return True;
2421 }
2422
2423 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
2424 /* 31 30 28 20 15 11 9 4
2425 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
2426 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
2427 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
2428 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
2429 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
2430 */
2431 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
2432 Bool is64 = INSN(31,31) == 1;
2433 UInt b30 = INSN(30,30);
2434 UInt mm = INSN(20,16);
2435 UInt cond = INSN(15,12);
2436 UInt b10 = INSN(10,10);
2437 UInt nn = INSN(9,5);
2438 UInt dd = INSN(4,0);
2439 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
2440 IRType ty = is64 ? Ity_I64 : Ity_I32;
2441 IRExpr* argL = getIRegOrZR(is64, nn);
2442 IRExpr* argR = getIRegOrZR(is64, mm);
2443 switch (op) {
2444 case BITS2(0,0):
2445 break;
2446 case BITS2(0,1):
2447 argR = binop(mkADD(ty), argR, mkU(ty,1));
2448 break;
2449 case BITS2(1,0):
2450 argR = unop(mkNOT(ty), argR);
2451 break;
2452 case BITS2(1,1):
2453 argR = binop(mkSUB(ty), mkU(ty,0), argR);
2454 break;
2455 default:
2456 vassert(0);
2457 }
2458 putIRegOrZR(
2459 is64, dd,
2460 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
2461 argL, argR)
2462 );
2463 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
2464 DIP("%s %s, %s, %s, %s\n", op_nm[op],
2465 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
2466 nameIRegOrZR(is64, mm), nameCC(cond));
2467 return True;
2468 }
2469
2470 /* -------------- ADD/SUB(extended reg) -------------- */
2471 /* 28 20 15 12 9 4
2472 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
2473 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
2474
2475 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
2476 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
2477
2478 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
2479 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
2480
2481 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
2482 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
2483
2484 The 'm' operand is extended per opt, thusly:
2485
2486 000 Xm & 0xFF UXTB
2487 001 Xm & 0xFFFF UXTH
2488 010 Xm & (2^32)-1 UXTW
2489 011 Xm UXTX
2490
2491 100 Xm sx from bit 7 SXTB
2492 101 Xm sx from bit 15 SXTH
2493 110 Xm sx from bit 31 SXTW
2494 111 Xm SXTX
2495
2496 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
2497 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
2498 are the identity operation on Wm.
2499
2500 After extension, the value is shifted left by imm3 bits, which
2501 may only be in the range 0 .. 4 inclusive.
2502 */
2503 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
2504 Bool is64 = INSN(31,31) == 1;
2505 Bool isSub = INSN(30,30) == 1;
2506 Bool setCC = INSN(29,29) == 1;
2507 UInt mm = INSN(20,16);
2508 UInt opt = INSN(15,13);
2509 UInt imm3 = INSN(12,10);
2510 UInt nn = INSN(9,5);
2511 UInt dd = INSN(4,0);
2512 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
2513 "sxtb", "sxth", "sxtw", "sxtx" };
2514 /* Do almost the same thing in the 32- and 64-bit cases. */
2515 IRTemp xN = newTemp(Ity_I64);
2516 IRTemp xM = newTemp(Ity_I64);
2517 assign(xN, getIReg64orSP(nn));
2518 assign(xM, getIReg64orZR(mm));
2519 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
2520 Int shSX = 0;
2521 /* widen Xm .. */
2522 switch (opt) {
2523 case BITS3(0,0,0): // UXTB
2524 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
2525 case BITS3(0,0,1): // UXTH
2526 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
2527 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
2528 if (is64) {
2529 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
2530 }
2531 break;
2532 case BITS3(0,1,1): // UXTX -- always a noop
2533 break;
2534 case BITS3(1,0,0): // SXTB
2535 shSX = 56; goto sxTo64;
2536 case BITS3(1,0,1): // SXTH
2537 shSX = 48; goto sxTo64;
2538 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
2539 if (is64) {
2540 shSX = 32; goto sxTo64;
2541 }
2542 break;
2543 case BITS3(1,1,1): // SXTX -- always a noop
2544 break;
2545 sxTo64:
2546 vassert(shSX >= 32);
2547 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
2548 mkU8(shSX));
2549 break;
2550 default:
2551 vassert(0);
2552 }
2553 /* and now shift */
2554 IRTemp argL = xN;
2555 IRTemp argR = newTemp(Ity_I64);
2556 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
2557 IRTemp res = newTemp(Ity_I64);
2558 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2559 mkexpr(argL), mkexpr(argR)));
2560 if (is64) {
2561 if (setCC) {
2562 putIReg64orZR(dd, mkexpr(res));
2563 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2564 } else {
2565 putIReg64orSP(dd, mkexpr(res));
2566 }
2567 } else {
2568 if (setCC) {
2569 IRTemp argL32 = newTemp(Ity_I32);
2570 IRTemp argR32 = newTemp(Ity_I32);
2571 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
2572 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
2573 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
2574 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
2575 } else {
2576 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
2577 }
2578 }
2579 DIP("%s%s %s, %s, %s %s lsl %u\n",
2580 isSub ? "sub" : "add", setCC ? "s" : "",
2581 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
2582 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
2583 nameExt[opt], imm3);
2584 return True;
2585 }
2586
2587 /* ---------------- CCMP/CCMN(imm) ---------------- */
2588 /* Bizarrely, these appear in the "data processing register"
2589 category, even though they are operations against an
2590 immediate. */
2591 /* 31 29 20 15 11 9 3
2592 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
2593 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
2594
2595 Operation is:
2596 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
2597 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
2598 */
2599 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
2600 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
2601 Bool is64 = INSN(31,31) == 1;
2602 Bool isSUB = INSN(30,30) == 1;
2603 UInt imm5 = INSN(20,16);
2604 UInt cond = INSN(15,12);
2605 UInt nn = INSN(9,5);
2606 UInt nzcv = INSN(3,0);
2607
2608 IRTemp condT = newTemp(Ity_I1);
2609 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
2610
2611 IRType ty = is64 ? Ity_I64 : Ity_I32;
2612 IRTemp argL = newTemp(ty);
2613 IRTemp argR = newTemp(ty);
2614
2615 if (is64) {
2616 assign(argL, getIReg64orZR(nn));
2617 assign(argR, mkU64(imm5));
2618 } else {
2619 assign(argL, getIReg32orZR(nn));
2620 assign(argR, mkU32(imm5));
2621 }
2622 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
2623
2624 DIP("ccm%c %s, #%u, #%u, %s\n",
2625 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
2626 imm5, nzcv, nameCC(cond));
2627 return True;
2628 }
2629
2630 /* ---------------- CCMP/CCMN(reg) ---------------- */
2631 /* 31 29 20 15 11 9 3
2632 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
2633 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
2634 Operation is:
2635 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
2636 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
2637 */
2638 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
2639 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
2640 Bool is64 = INSN(31,31) == 1;
2641 Bool isSUB = INSN(30,30) == 1;
2642 UInt mm = INSN(20,16);
2643 UInt cond = INSN(15,12);
2644 UInt nn = INSN(9,5);
2645 UInt nzcv = INSN(3,0);
2646
2647 IRTemp condT = newTemp(Ity_I1);
2648 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
2649
2650 IRType ty = is64 ? Ity_I64 : Ity_I32;
2651 IRTemp argL = newTemp(ty);
2652 IRTemp argR = newTemp(ty);
2653
2654 if (is64) {
2655 assign(argL, getIReg64orZR(nn));
2656 assign(argR, getIReg64orZR(mm));
2657 } else {
2658 assign(argL, getIReg32orZR(nn));
2659 assign(argR, getIReg32orZR(mm));
2660 }
2661 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
2662
2663 DIP("ccm%c %s, %s, #%u, %s\n",
2664 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
2665 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
2666 return True;
2667 }
2668
2669
2670 /* -------------- REV/REV16/REV32/RBIT -------------- */
2671 /* 31 30 28 20 15 11 9 4
2672
sewardj32d86752014-03-02 12:47:18 +00002673 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
2674 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00002675
sewardj32d86752014-03-02 12:47:18 +00002676 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
2677 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00002678
sewardjdc9259c2014-02-27 11:10:19 +00002679 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
2680 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00002681
sewardjdc9259c2014-02-27 11:10:19 +00002682 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
sewardjbbcf1882014-01-12 12:49:10 +00002683 */
sewardjbbcf1882014-01-12 12:49:10 +00002684 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
sewardjdc9259c2014-02-27 11:10:19 +00002685 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
2686 UInt b31 = INSN(31,31);
2687 UInt opc = INSN(11,10);
2688
2689 UInt ix = 0;
2690 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
2691 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
2692 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
2693 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
2694 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
2695 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
2696 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
sewardj32d86752014-03-02 12:47:18 +00002697 if (ix >= 1 && ix <= 7) {
2698 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
sewardjdc9259c2014-02-27 11:10:19 +00002699 UInt nn = INSN(9,5);
2700 UInt dd = INSN(4,0);
2701 IRTemp src = newTemp(Ity_I64);
2702 IRTemp dst = IRTemp_INVALID;
sewardj32d86752014-03-02 12:47:18 +00002703 IRTemp (*math)(IRTemp) = NULL;
2704 switch (ix) {
2705 case 1: case 2: math = math_BYTESWAP64; break;
2706 case 3: case 4: math = math_BITSWAP64; break;
2707 case 5: case 6: math = math_USHORTSWAP64; break;
2708 case 7: math = math_UINTSWAP64; break;
2709 default: vassert(0);
2710 }
2711 const HChar* names[7]
2712 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
2713 const HChar* nm = names[ix-1];
2714 vassert(math);
2715 if (ix == 6) {
2716 /* This has to be special cased, since the logic below doesn't
2717 handle it correctly. */
sewardjdc9259c2014-02-27 11:10:19 +00002718 assign(src, getIReg64orZR(nn));
sewardj32d86752014-03-02 12:47:18 +00002719 dst = math(src);
2720 putIReg64orZR(dd,
2721 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
2722 } else if (is64) {
2723 assign(src, getIReg64orZR(nn));
2724 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00002725 putIReg64orZR(dd, mkexpr(dst));
2726 } else {
2727 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
sewardj32d86752014-03-02 12:47:18 +00002728 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00002729 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
2730 }
sewardj32d86752014-03-02 12:47:18 +00002731 DIP("%s %s, %s\n", nm,
sewardjdc9259c2014-02-27 11:10:19 +00002732 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
2733 return True;
sewardjbbcf1882014-01-12 12:49:10 +00002734 }
sewardjdc9259c2014-02-27 11:10:19 +00002735 /* else fall through */
sewardjbbcf1882014-01-12 12:49:10 +00002736 }
2737
2738 /* -------------------- CLZ/CLS -------------------- */
2739 /* 30 28 24 20 15 9 4
2740 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
2741 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
2742 */
2743 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
2744 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
2745 Bool is64 = INSN(31,31) == 1;
2746 Bool isCLS = INSN(10,10) == 1;
2747 UInt nn = INSN(9,5);
2748 UInt dd = INSN(4,0);
2749 IRTemp src = newTemp(Ity_I64);
2750 IRTemp dst = newTemp(Ity_I64);
2751 if (!isCLS) { // CLS not yet supported
2752 if (is64) {
2753 assign(src, getIReg64orZR(nn));
2754 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
2755 mkU64(64),
2756 unop(Iop_Clz64, mkexpr(src))));
2757 putIReg64orZR(dd, mkexpr(dst));
2758 } else {
2759 assign(src, binop(Iop_Shl64,
2760 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
2761 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
2762 mkU64(32),
2763 unop(Iop_Clz64, mkexpr(src))));
2764 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
2765 }
2766 DIP("cl%c %s, %s\n",
2767 isCLS ? 's' : 'z', nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
2768 return True;
2769 }
2770 }
2771
2772 /* -------------------- LSLV/LSRV/ASRV -------------------- */
2773 /* 30 28 20 15 11 9 4
2774 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
2775 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
2776 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
2777 */
2778 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
2779 && INSN(15,12) == BITS4(0,0,1,0) && INSN(11,10) < BITS2(1,1)) {
2780 Bool is64 = INSN(31,31) == 1;
2781 UInt mm = INSN(20,16);
2782 UInt op = INSN(11,10);
2783 UInt nn = INSN(9,5);
2784 UInt dd = INSN(4,0);
2785 IRType ty = is64 ? Ity_I64 : Ity_I32;
2786 IRTemp srcL = newTemp(ty);
2787 IRTemp srcR = newTemp(Ity_I8);
2788 IRTemp res = newTemp(ty);
2789 IROp iop = Iop_INVALID;
2790 assign(srcL, getIRegOrZR(is64, nn));
2791 assign(srcR,
2792 unop(Iop_64to8,
2793 binop(Iop_And64,
2794 getIReg64orZR(mm), mkU64(is64 ? 63 : 31))));
2795 switch (op) {
2796 case BITS2(0,0): iop = mkSHL(ty); break;
2797 case BITS2(0,1): iop = mkSHR(ty); break;
2798 case BITS2(1,0): iop = mkSAR(ty); break;
2799 default: vassert(0);
2800 }
2801 assign(res, binop(iop, mkexpr(srcL), mkexpr(srcR)));
2802 putIRegOrZR(is64, dd, mkexpr(res));
2803 vassert(op < 3);
2804 const HChar* names[3] = { "lslv", "lsrv", "asrv" };
2805 DIP("%s %s, %s, %s\n",
2806 names[op], nameIRegOrZR(is64,dd),
2807 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
2808 return True;
2809 }
2810
2811 /* -------------------- SDIV/UDIV -------------------- */
2812 /* 30 28 20 15 10 9 4
2813 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
2814 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
2815 */
2816 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
2817 && INSN(15,11) == BITS5(0,0,0,0,1)) {
2818 Bool is64 = INSN(31,31) == 1;
2819 UInt mm = INSN(20,16);
2820 Bool isS = INSN(10,10) == 1;
2821 UInt nn = INSN(9,5);
2822 UInt dd = INSN(4,0);
2823 if (isS) {
2824 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
2825 getIRegOrZR(is64, nn),
2826 getIRegOrZR(is64, mm)));
2827 } else {
2828 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
2829 getIRegOrZR(is64, nn),
2830 getIRegOrZR(is64, mm)));
2831 }
2832 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
2833 nameIRegOrZR(is64, dd),
2834 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
2835 return True;
2836 }
2837
2838 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
2839 /* 31 23 20 15 14 9 4
2840 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
2841 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
2842 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
2843 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
2844 with operation
2845 Xd = Xa +/- (Wn *u/s Wm)
2846 */
2847 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
2848 Bool isU = INSN(23,23) == 1;
2849 UInt mm = INSN(20,16);
2850 Bool isAdd = INSN(15,15) == 0;
2851 UInt aa = INSN(14,10);
2852 UInt nn = INSN(9,5);
2853 UInt dd = INSN(4,0);
2854 IRTemp wN = newTemp(Ity_I32);
2855 IRTemp wM = newTemp(Ity_I32);
2856 IRTemp xA = newTemp(Ity_I64);
2857 IRTemp muld = newTemp(Ity_I64);
2858 IRTemp res = newTemp(Ity_I64);
2859 assign(wN, getIReg32orZR(nn));
2860 assign(wM, getIReg32orZR(mm));
2861 assign(xA, getIReg64orZR(aa));
2862 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
2863 mkexpr(wN), mkexpr(wM)));
2864 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
2865 mkexpr(xA), mkexpr(muld)));
2866 putIReg64orZR(dd, mkexpr(res));
2867 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
2868 nameIReg64orZR(dd), nameIReg32orZR(nn),
2869 nameIReg32orZR(mm), nameIReg64orZR(aa));
2870 return True;
2871 }
2872 vex_printf("ARM64 front end: data_processing_register\n");
2873 return False;
2874# undef INSN
2875}
2876
2877
2878/*------------------------------------------------------------*/
2879/*--- Load and Store instructions ---*/
2880/*------------------------------------------------------------*/
2881
2882/* Generate the EA for a "reg + reg" style amode. This is done from
2883 parts of the insn, but for sanity checking sake it takes the whole
2884 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
2885 and S=insn[12]:
2886
2887 The possible forms, along with their opt:S values, are:
2888 011:0 Xn|SP + Xm
2889 111:0 Xn|SP + Xm
2890 011:1 Xn|SP + Xm * transfer_szB
2891 111:1 Xn|SP + Xm * transfer_szB
2892 010:0 Xn|SP + 32Uto64(Wm)
2893 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
2894 110:0 Xn|SP + 32Sto64(Wm)
2895 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
2896
2897 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
2898 the transfer size is insn[23,31,30]. For integer loads/stores,
2899 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
2900
2901 If the decoding fails, it returns IRTemp_INVALID.
2902
2903 isInt is True iff this is decoding is for transfers to/from integer
2904 registers. If False it is for transfers to/from vector registers.
2905*/
2906static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
2907{
2908 UInt optS = SLICE_UInt(insn, 15, 12);
2909 UInt mm = SLICE_UInt(insn, 20, 16);
2910 UInt nn = SLICE_UInt(insn, 9, 5);
2911 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
2912 | SLICE_UInt(insn, 31, 30); // Log2 of the size
2913
2914 buf[0] = 0;
2915
2916 /* Sanity checks, that this really is a load/store insn. */
2917 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
2918 goto fail;
2919
2920 if (isInt
2921 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
2922 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
2923 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
2924 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
2925 goto fail;
2926
2927 if (!isInt
2928 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
2929 goto fail;
2930
2931 /* Throw out non-verified but possibly valid cases. */
2932 switch (szLg2) {
2933 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
2934 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
2935 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
2936 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
2937 case BITS3(1,0,0): // can only ever be valid for the vector case
2938 if (isInt) goto fail; else goto fail;
2939 case BITS3(1,0,1): // these sizes are never valid
2940 case BITS3(1,1,0):
2941 case BITS3(1,1,1): goto fail;
2942
2943 default: vassert(0);
2944 }
2945
2946 IRExpr* rhs = NULL;
2947 switch (optS) {
2948 case BITS4(1,1,1,0): goto fail; //ATC
2949 case BITS4(0,1,1,0):
2950 rhs = getIReg64orZR(mm);
2951 vex_sprintf(buf, "[%s, %s]",
2952 nameIReg64orZR(nn), nameIReg64orZR(mm));
2953 break;
2954 case BITS4(1,1,1,1): goto fail; //ATC
2955 case BITS4(0,1,1,1):
2956 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
2957 vex_sprintf(buf, "[%s, %s lsl %u]",
2958 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
2959 break;
2960 case BITS4(0,1,0,0):
2961 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
2962 vex_sprintf(buf, "[%s, %s uxtx]",
2963 nameIReg64orZR(nn), nameIReg32orZR(mm));
2964 break;
2965 case BITS4(0,1,0,1):
2966 rhs = binop(Iop_Shl64,
2967 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
2968 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
2969 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
2970 break;
2971 case BITS4(1,1,0,0):
2972 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
2973 vex_sprintf(buf, "[%s, %s sxtx]",
2974 nameIReg64orZR(nn), nameIReg32orZR(mm));
2975 break;
2976 case BITS4(1,1,0,1):
2977 rhs = binop(Iop_Shl64,
2978 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
2979 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
2980 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
2981 break;
2982 default:
2983 /* The rest appear to be genuinely invalid */
2984 goto fail;
2985 }
2986
2987 vassert(rhs);
2988 IRTemp res = newTemp(Ity_I64);
2989 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
2990 return res;
2991
2992 fail:
2993 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
2994 return IRTemp_INVALID;
2995}
2996
2997
2998/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
2999 bits of DATAE :: Ity_I64. */
3000static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
3001{
3002 IRExpr* addrE = mkexpr(addr);
3003 switch (szB) {
3004 case 8:
3005 storeLE(addrE, dataE);
3006 break;
3007 case 4:
3008 storeLE(addrE, unop(Iop_64to32, dataE));
3009 break;
3010 case 2:
3011 storeLE(addrE, unop(Iop_64to16, dataE));
3012 break;
3013 case 1:
3014 storeLE(addrE, unop(Iop_64to8, dataE));
3015 break;
3016 default:
3017 vassert(0);
3018 }
3019}
3020
3021
3022/* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
3023 placing the result in an Ity_I64 temporary. */
3024static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
3025{
3026 IRTemp res = newTemp(Ity_I64);
3027 IRExpr* addrE = mkexpr(addr);
3028 switch (szB) {
3029 case 8:
3030 assign(res, loadLE(Ity_I64,addrE));
3031 break;
3032 case 4:
3033 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
3034 break;
3035 case 2:
3036 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
3037 break;
3038 case 1:
3039 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
3040 break;
3041 default:
3042 vassert(0);
3043 }
3044 return res;
3045}
3046
3047
3048static
3049Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
3050{
3051# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
3052
3053 /* ------------ LDR,STR (immediate, uimm12) ----------- */
3054 /* uimm12 is scaled by the transfer size
3055
3056 31 29 26 21 9 4
3057 | | | | | |
3058 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
3059 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
3060
3061 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
3062 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
3063
3064 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
3065 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
3066
3067 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
3068 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
3069 */
3070 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
3071 UInt szLg2 = INSN(31,30);
3072 UInt szB = 1 << szLg2;
3073 Bool isLD = INSN(22,22) == 1;
3074 UInt offs = INSN(21,10) * szB;
3075 UInt nn = INSN(9,5);
3076 UInt tt = INSN(4,0);
3077 IRTemp ta = newTemp(Ity_I64);
3078 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
3079 if (nn == 31) { /* FIXME generate stack alignment check */ }
3080 vassert(szLg2 < 4);
3081 if (isLD) {
3082 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
3083 } else {
3084 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
3085 }
3086 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
3087 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
3088 DIP("%s %s, [%s, #%u]\n",
3089 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
3090 nameIReg64orSP(nn), offs);
3091 return True;
3092 }
3093
3094 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
3095 /*
3096 31 29 26 20 11 9 4
3097 | | | | | | |
3098 (at-Rn-then-Rn=EA) | | |
3099 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
3100 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
3101
3102 (at-EA-then-Rn=EA)
3103 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
3104 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
3105
3106 (at-EA)
3107 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
3108 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
3109
3110 simm9 is unscaled.
3111
3112 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
3113 load case this is because would create two competing values for
3114 Rt. In the store case the reason is unclear, but the spec
3115 disallows it anyway.
3116
3117 Stores are narrowing, loads are unsigned widening. sz encodes
3118 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
3119 */
3120 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
3121 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
3122 UInt szLg2 = INSN(31,30);
3123 UInt szB = 1 << szLg2;
3124 Bool isLoad = INSN(22,22) == 1;
3125 UInt imm9 = INSN(20,12);
3126 UInt nn = INSN(9,5);
3127 UInt tt = INSN(4,0);
3128 Bool wBack = INSN(10,10) == 1;
3129 UInt how = INSN(11,10);
3130 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
3131 /* undecodable; fall through */
3132 } else {
3133 if (nn == 31) { /* FIXME generate stack alignment check */ }
3134
3135 // Compute the transfer address TA and the writeback address WA.
3136 IRTemp tRN = newTemp(Ity_I64);
3137 assign(tRN, getIReg64orSP(nn));
3138 IRTemp tEA = newTemp(Ity_I64);
3139 Long simm9 = (Long)sx_to_64(imm9, 9);
3140 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3141
3142 IRTemp tTA = newTemp(Ity_I64);
3143 IRTemp tWA = newTemp(Ity_I64);
3144 switch (how) {
3145 case BITS2(0,1):
3146 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3147 case BITS2(1,1):
3148 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3149 case BITS2(0,0):
3150 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3151 default:
3152 vassert(0); /* NOTREACHED */
3153 }
3154
sewardje0bff8b2014-03-09 09:40:23 +00003155 /* Normally rN would be updated after the transfer. However, in
3156 the special case typifed by
3157 str x30, [sp,#-16]!
3158 it is necessary to update SP before the transfer, (1)
3159 because Memcheck will otherwise complain about a write
3160 below the stack pointer, and (2) because the segfault
3161 stack extension mechanism will otherwise extend the stack
3162 only down to SP before the instruction, which might not be
3163 far enough, if the -16 bit takes the actual access
3164 address to the next page.
3165 */
3166 Bool earlyWBack
3167 = wBack && simm9 < 0 && szB == 8
3168 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
3169
3170 if (wBack && earlyWBack)
3171 putIReg64orSP(nn, mkexpr(tEA));
3172
sewardjbbcf1882014-01-12 12:49:10 +00003173 if (isLoad) {
3174 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
3175 } else {
3176 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
3177 }
3178
sewardje0bff8b2014-03-09 09:40:23 +00003179 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00003180 putIReg64orSP(nn, mkexpr(tEA));
3181
3182 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
3183 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
3184 const HChar* fmt_str = NULL;
3185 switch (how) {
3186 case BITS2(0,1):
3187 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3188 break;
3189 case BITS2(1,1):
3190 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3191 break;
3192 case BITS2(0,0):
3193 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
3194 break;
3195 default:
3196 vassert(0);
3197 }
3198 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
3199 nameIRegOrZR(szB == 8, tt),
3200 nameIReg64orSP(nn), simm9);
3201 return True;
3202 }
3203 }
3204
3205 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
3206 /* L==1 => mm==LD
3207 L==0 => mm==ST
3208 x==0 => 32 bit transfers, and zero extended loads
3209 x==1 => 64 bit transfers
3210 simm7 is scaled by the (single-register) transfer size
3211
3212 (at-Rn-then-Rn=EA)
3213 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
3214
3215 (at-EA-then-Rn=EA)
3216 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
3217
3218 (at-EA)
3219 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
3220 */
3221
3222 UInt insn_30_23 = INSN(30,23);
3223 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
3224 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
3225 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
3226 UInt bL = INSN(22,22);
3227 UInt bX = INSN(31,31);
3228 UInt bWBack = INSN(23,23);
3229 UInt rT1 = INSN(4,0);
3230 UInt rN = INSN(9,5);
3231 UInt rT2 = INSN(14,10);
3232 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
3233 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
3234 || (bL && rT1 == rT2)) {
3235 /* undecodable; fall through */
3236 } else {
3237 if (rN == 31) { /* FIXME generate stack alignment check */ }
3238
3239 // Compute the transfer address TA and the writeback address WA.
3240 IRTemp tRN = newTemp(Ity_I64);
3241 assign(tRN, getIReg64orSP(rN));
3242 IRTemp tEA = newTemp(Ity_I64);
3243 simm7 = (bX ? 8 : 4) * simm7;
3244 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
3245
3246 IRTemp tTA = newTemp(Ity_I64);
3247 IRTemp tWA = newTemp(Ity_I64);
3248 switch (INSN(24,23)) {
3249 case BITS2(0,1):
3250 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3251 case BITS2(1,1):
3252 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3253 case BITS2(1,0):
3254 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3255 default:
3256 vassert(0); /* NOTREACHED */
3257 }
3258
3259 /* Normally rN would be updated after the transfer. However, in
3260 the special case typifed by
3261 stp x29, x30, [sp,#-112]!
3262 it is necessary to update SP before the transfer, (1)
3263 because Memcheck will otherwise complain about a write
3264 below the stack pointer, and (2) because the segfault
3265 stack extension mechanism will otherwise extend the stack
3266 only down to SP before the instruction, which might not be
3267 far enough, if the -112 bit takes the actual access
3268 address to the next page.
3269 */
3270 Bool earlyWBack
3271 = bWBack && simm7 < 0
3272 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
3273
3274 if (bWBack && earlyWBack)
3275 putIReg64orSP(rN, mkexpr(tEA));
3276
3277 /**/ if (bL == 1 && bX == 1) {
3278 // 64 bit load
3279 putIReg64orZR(rT1, loadLE(Ity_I64,
3280 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3281 putIReg64orZR(rT2, loadLE(Ity_I64,
3282 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
3283 } else if (bL == 1 && bX == 0) {
sewardjbbcf1882014-01-12 12:49:10 +00003284 // 32 bit load
3285 putIReg32orZR(rT1, loadLE(Ity_I32,
3286 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3287 putIReg32orZR(rT2, loadLE(Ity_I32,
3288 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
3289 } else if (bL == 0 && bX == 1) {
3290 // 64 bit store
3291 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3292 getIReg64orZR(rT1));
3293 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
3294 getIReg64orZR(rT2));
3295 } else {
3296 vassert(bL == 0 && bX == 0);
sewardjbbcf1882014-01-12 12:49:10 +00003297 // 32 bit store
3298 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3299 getIReg32orZR(rT1));
3300 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
3301 getIReg32orZR(rT2));
3302 }
3303
3304 if (bWBack && !earlyWBack)
3305 putIReg64orSP(rN, mkexpr(tEA));
3306
3307 const HChar* fmt_str = NULL;
3308 switch (INSN(24,23)) {
3309 case BITS2(0,1):
3310 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3311 break;
3312 case BITS2(1,1):
3313 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3314 break;
3315 case BITS2(1,0):
3316 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
3317 break;
3318 default:
3319 vassert(0);
3320 }
3321 DIP(fmt_str, bL == 0 ? "st" : "ld",
3322 nameIRegOrZR(bX == 1, rT1),
3323 nameIRegOrZR(bX == 1, rT2),
3324 nameIReg64orSP(rN), simm7);
3325 return True;
3326 }
3327 }
3328
3329 /* ---------------- LDR (literal, int reg) ---------------- */
3330 /* 31 29 23 4
3331 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
3332 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
3333 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
3334 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
3335 Just handles the first two cases for now.
3336 */
3337 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
3338 UInt imm19 = INSN(23,5);
3339 UInt rT = INSN(4,0);
3340 UInt bX = INSN(30,30);
3341 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
3342 if (bX) {
3343 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
3344 } else {
3345 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
3346 }
3347 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
3348 return True;
3349 }
3350
3351 /* -------------- {LD,ST}R (integer register) --------------- */
3352 /* 31 29 20 15 12 11 9 4
3353 | | | | | | | |
3354 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
3355 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
3356 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
3357 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
3358
3359 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
3360 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
3361 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
3362 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
3363 */
3364 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
3365 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3366 HChar dis_buf[64];
3367 UInt szLg2 = INSN(31,30);
3368 Bool isLD = INSN(22,22) == 1;
3369 UInt tt = INSN(4,0);
3370 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
3371 if (ea != IRTemp_INVALID) {
3372 switch (szLg2) {
3373 case 3: /* 64 bit */
3374 if (isLD) {
3375 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
3376 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
3377 } else {
3378 storeLE(mkexpr(ea), getIReg64orZR(tt));
3379 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
3380 }
3381 break;
3382 case 2: /* 32 bit */
3383 if (isLD) {
3384 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
3385 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
3386 } else {
3387 storeLE(mkexpr(ea), getIReg32orZR(tt));
3388 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
3389 }
3390 break;
3391 case 1: /* 16 bit */
3392 if (isLD) {
3393 putIReg64orZR(tt, unop(Iop_16Uto64,
3394 loadLE(Ity_I16, mkexpr(ea))));
3395 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3396 } else {
3397 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
3398 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3399 }
3400 break;
3401 case 0: /* 8 bit */
3402 if (isLD) {
3403 putIReg64orZR(tt, unop(Iop_8Uto64,
3404 loadLE(Ity_I8, mkexpr(ea))));
3405 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
3406 } else {
3407 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
3408 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
3409 }
3410 break;
3411 default:
3412 vassert(0);
3413 }
3414 return True;
3415 }
3416 }
3417
3418 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
3419 /* 31 29 26 23 21 9 4
3420 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
3421 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
3422 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
3423 where
3424 Rt is Wt when x==1, Xt when x==0
3425 */
3426 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
3427 /* Further checks on bits 31:30 and 22 */
3428 Bool valid = False;
3429 switch ((INSN(31,30) << 1) | INSN(22,22)) {
3430 case BITS3(1,0,0):
3431 case BITS3(0,1,0): case BITS3(0,1,1):
3432 case BITS3(0,0,0): case BITS3(0,0,1):
3433 valid = True;
3434 break;
3435 }
3436 if (valid) {
3437 UInt szLg2 = INSN(31,30);
3438 UInt bitX = INSN(22,22);
3439 UInt imm12 = INSN(21,10);
3440 UInt nn = INSN(9,5);
3441 UInt tt = INSN(4,0);
3442 UInt szB = 1 << szLg2;
3443 IRExpr* ea = binop(Iop_Add64,
3444 getIReg64orSP(nn), mkU64(imm12 * szB));
3445 switch (szB) {
3446 case 4:
3447 vassert(bitX == 0);
3448 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
3449 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
3450 nameIReg64orSP(nn), imm12 * szB);
3451 break;
3452 case 2:
3453 if (bitX == 1) {
3454 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
3455 } else {
3456 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
3457 }
3458 DIP("ldrsh %s, [%s, #%u]\n",
3459 nameIRegOrZR(bitX == 0, tt),
3460 nameIReg64orSP(nn), imm12 * szB);
3461 break;
3462 case 1:
3463 if (bitX == 1) {
3464 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
3465 } else {
3466 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
3467 }
3468 DIP("ldrsb %s, [%s, #%u]\n",
3469 nameIRegOrZR(bitX == 0, tt),
3470 nameIReg64orSP(nn), imm12 * szB);
3471 break;
3472 default:
3473 vassert(0);
3474 }
3475 return True;
3476 }
3477 /* else fall through */
3478 }
3479
3480 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
3481 /* (at-Rn-then-Rn=EA)
3482 31 29 23 21 20 11 9 4
3483 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
3484 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
3485 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
3486
3487 (at-EA-then-Rn=EA)
3488 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
3489 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
3490 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
3491 where
3492 Rt is Wt when x==1, Xt when x==0
3493 transfer-at-Rn when [11]==0, at EA when [11]==1
3494 */
3495 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3496 && INSN(21,21) == 0 && INSN(10,10) == 1) {
3497 /* Further checks on bits 31:30 and 22 */
3498 Bool valid = False;
3499 switch ((INSN(31,30) << 1) | INSN(22,22)) {
3500 case BITS3(1,0,0): // LDRSW Xt
3501 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
3502 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
3503 valid = True;
3504 break;
3505 }
3506 if (valid) {
3507 UInt szLg2 = INSN(31,30);
3508 UInt imm9 = INSN(20,12);
3509 Bool atRN = INSN(11,11) == 0;
3510 UInt nn = INSN(9,5);
3511 UInt tt = INSN(4,0);
3512 IRTemp tRN = newTemp(Ity_I64);
3513 IRTemp tEA = newTemp(Ity_I64);
3514 IRTemp tTA = IRTemp_INVALID;
3515 ULong simm9 = sx_to_64(imm9, 9);
3516 Bool is64 = INSN(22,22) == 0;
3517 assign(tRN, getIReg64orSP(nn));
3518 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3519 tTA = atRN ? tRN : tEA;
3520 HChar ch = '?';
3521 /* There are 5 cases:
3522 byte load, SX to 64
3523 byte load, SX to 32, ZX to 64
3524 halfword load, SX to 64
3525 halfword load, SX to 32, ZX to 64
3526 word load, SX to 64
3527 The ifs below handle them in the listed order.
3528 */
3529 if (szLg2 == 0) {
3530 ch = 'b';
3531 if (is64) {
3532 putIReg64orZR(tt, unop(Iop_8Sto64,
3533 loadLE(Ity_I8, mkexpr(tTA))));
3534 } else {
3535 putIReg32orZR(tt, unop(Iop_8Sto32,
3536 loadLE(Ity_I8, mkexpr(tTA))));
3537 }
3538 }
3539 else if (szLg2 == 1) {
3540 ch = 'h';
3541 if (is64) {
3542 putIReg64orZR(tt, unop(Iop_16Sto64,
3543 loadLE(Ity_I16, mkexpr(tTA))));
3544 } else {
3545 putIReg32orZR(tt, unop(Iop_16Sto32,
3546 loadLE(Ity_I16, mkexpr(tTA))));
3547 }
3548 }
3549 else if (szLg2 == 2 && is64) {
3550 ch = 'w';
3551 putIReg64orZR(tt, unop(Iop_32Sto64,
3552 loadLE(Ity_I32, mkexpr(tTA))));
3553 }
3554 else {
3555 vassert(0);
3556 }
3557 putIReg64orSP(nn, mkexpr(tEA));
3558 DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
3559 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
3560 return True;
3561 }
3562 /* else fall through */
3563 }
3564
3565 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
3566 /* 31 29 23 21 20 11 9 4
3567 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
3568 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
3569 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
3570 where
3571 Rt is Wt when x==1, Xt when x==0
3572 */
3573 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3574 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
3575 /* Further checks on bits 31:30 and 22 */
3576 Bool valid = False;
3577 switch ((INSN(31,30) << 1) | INSN(22,22)) {
3578 case BITS3(1,0,0): // LDURSW Xt
3579 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
3580 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
3581 valid = True;
3582 break;
3583 }
3584 if (valid) {
3585 UInt szLg2 = INSN(31,30);
3586 UInt imm9 = INSN(20,12);
3587 UInt nn = INSN(9,5);
3588 UInt tt = INSN(4,0);
3589 IRTemp tRN = newTemp(Ity_I64);
3590 IRTemp tEA = newTemp(Ity_I64);
3591 ULong simm9 = sx_to_64(imm9, 9);
3592 Bool is64 = INSN(22,22) == 0;
3593 assign(tRN, getIReg64orSP(nn));
3594 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3595 HChar ch = '?';
3596 /* There are 5 cases:
3597 byte load, SX to 64
3598 byte load, SX to 32, ZX to 64
3599 halfword load, SX to 64
3600 halfword load, SX to 32, ZX to 64
3601 word load, SX to 64
3602 The ifs below handle them in the listed order.
3603 */
3604 if (szLg2 == 0) {
3605 ch = 'b';
3606 if (is64) {
3607 putIReg64orZR(tt, unop(Iop_8Sto64,
3608 loadLE(Ity_I8, mkexpr(tEA))));
3609 } else {
3610 putIReg32orZR(tt, unop(Iop_8Sto32,
3611 loadLE(Ity_I8, mkexpr(tEA))));
3612 }
3613 }
3614 else if (szLg2 == 1) {
3615 ch = 'h';
3616 if (is64) {
3617 putIReg64orZR(tt, unop(Iop_16Sto64,
3618 loadLE(Ity_I16, mkexpr(tEA))));
3619 } else {
3620 putIReg32orZR(tt, unop(Iop_16Sto32,
3621 loadLE(Ity_I16, mkexpr(tEA))));
3622 }
3623 }
3624 else if (szLg2 == 2 && is64) {
3625 ch = 'w';
3626 putIReg64orZR(tt, unop(Iop_32Sto64,
3627 loadLE(Ity_I32, mkexpr(tEA))));
3628 }
3629 else {
3630 vassert(0);
3631 }
3632 DIP("ldurs%c %s, [%s, #%lld]",
3633 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
3634 return True;
3635 }
3636 /* else fall through */
3637 }
3638
3639 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
3640 /* L==1 => mm==LD
3641 L==0 => mm==ST
3642 sz==00 => 32 bit (S) transfers
3643 sz==01 => 64 bit (D) transfers
3644 sz==10 => 128 bit (Q) transfers
3645 sz==11 isn't allowed
3646 simm7 is scaled by the (single-register) transfer size
3647
3648 31 29 22 21 14 9 4
3649 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
3650 (at-Rn-then-Rn=EA)
3651
3652 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
3653 (at-EA-then-Rn=EA)
3654
3655 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
3656 (at-EA)
3657 */
3658
3659 UInt insn_29_23 = INSN(29,23);
3660 if (insn_29_23 == BITS7(1,0,1,1,0,0,1)
3661 || insn_29_23 == BITS7(1,0,1,1,0,1,1)
3662 || insn_29_23 == BITS7(1,0,1,1,0,1,0)) {
3663 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
3664 Bool isLD = INSN(22,22) == 1;
3665 Bool wBack = INSN(23,23) == 1;
3666 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
3667 UInt tt2 = INSN(14,10);
3668 UInt nn = INSN(9,5);
3669 UInt tt1 = INSN(4,0);
3670 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
3671 /* undecodable; fall through */
3672 } else {
3673 if (nn == 31) { /* FIXME generate stack alignment check */ }
3674
3675 // Compute the transfer address TA and the writeback address WA.
3676 UInt szB = 4 << szSlg2; /* szB is the per-register size */
3677 IRTemp tRN = newTemp(Ity_I64);
3678 assign(tRN, getIReg64orSP(nn));
3679 IRTemp tEA = newTemp(Ity_I64);
3680 simm7 = szB * simm7;
3681 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
3682
3683 IRTemp tTA = newTemp(Ity_I64);
3684 IRTemp tWA = newTemp(Ity_I64);
3685 switch (INSN(24,23)) {
3686 case BITS2(0,1):
3687 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3688 case BITS2(1,1):
3689 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3690 case BITS2(1,0):
3691 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3692 default:
3693 vassert(0); /* NOTREACHED */
3694 }
3695
3696 IRType ty = Ity_INVALID;
3697 switch (szB) {
3698 case 4: ty = Ity_F32; break;
3699 case 8: ty = Ity_F64; break;
3700 case 16: ty = Ity_V128; break;
3701 default: vassert(0);
3702 }
3703
sewardje0bff8b2014-03-09 09:40:23 +00003704 /* Normally rN would be updated after the transfer. However, in
sewardj19551432014-05-07 09:20:11 +00003705 the special cases typifed by
sewardje0bff8b2014-03-09 09:40:23 +00003706 stp q0, q1, [sp,#-512]!
sewardj19551432014-05-07 09:20:11 +00003707 stp d0, d1, [sp,#-512]!
3708 stp s0, s1, [sp,#-512]!
sewardje0bff8b2014-03-09 09:40:23 +00003709 it is necessary to update SP before the transfer, (1)
3710 because Memcheck will otherwise complain about a write
3711 below the stack pointer, and (2) because the segfault
3712 stack extension mechanism will otherwise extend the stack
3713 only down to SP before the instruction, which might not be
3714 far enough, if the -512 bit takes the actual access
3715 address to the next page.
3716 */
3717 Bool earlyWBack
sewardj19551432014-05-07 09:20:11 +00003718 = wBack && simm7 < 0
sewardje0bff8b2014-03-09 09:40:23 +00003719 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
3720
3721 if (wBack && earlyWBack)
3722 putIReg64orSP(nn, mkexpr(tEA));
3723
sewardjbbcf1882014-01-12 12:49:10 +00003724 if (isLD) {
sewardj5ba41302014-03-03 08:42:16 +00003725 if (szB < 16) {
3726 putQReg128(tt1, mkV128(0x0000));
3727 }
sewardj606c4ba2014-01-26 19:11:14 +00003728 putQRegLO(tt1,
3729 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
sewardj5ba41302014-03-03 08:42:16 +00003730 if (szB < 16) {
3731 putQReg128(tt2, mkV128(0x0000));
3732 }
sewardj606c4ba2014-01-26 19:11:14 +00003733 putQRegLO(tt2,
3734 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
sewardjbbcf1882014-01-12 12:49:10 +00003735 } else {
3736 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
sewardj606c4ba2014-01-26 19:11:14 +00003737 getQRegLO(tt1, ty));
sewardjbbcf1882014-01-12 12:49:10 +00003738 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
sewardj606c4ba2014-01-26 19:11:14 +00003739 getQRegLO(tt2, ty));
sewardjbbcf1882014-01-12 12:49:10 +00003740 }
3741
sewardje0bff8b2014-03-09 09:40:23 +00003742 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00003743 putIReg64orSP(nn, mkexpr(tEA));
3744
3745 const HChar* fmt_str = NULL;
3746 switch (INSN(24,23)) {
3747 case BITS2(0,1):
3748 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3749 break;
3750 case BITS2(1,1):
3751 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3752 break;
3753 case BITS2(1,0):
3754 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
3755 break;
3756 default:
3757 vassert(0);
3758 }
3759 DIP(fmt_str, isLD ? "ld" : "st",
sewardj606c4ba2014-01-26 19:11:14 +00003760 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
sewardjbbcf1882014-01-12 12:49:10 +00003761 nameIReg64orSP(nn), simm7);
3762 return True;
3763 }
3764 }
3765
3766 /* -------------- {LD,ST}R (vector register) --------------- */
3767 /* 31 29 23 20 15 12 11 9 4
3768 | | | | | | | | |
3769 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
3770 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
3771 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
3772 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
3773 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
3774
3775 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
3776 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
3777 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
3778 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
3779 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
3780 */
3781 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
3782 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3783 HChar dis_buf[64];
3784 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
3785 Bool isLD = INSN(22,22) == 1;
3786 UInt tt = INSN(4,0);
3787 if (szLg2 >= 4) goto after_LDR_STR_vector_register;
3788 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
3789 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
3790 switch (szLg2) {
3791 case 0: /* 8 bit */
3792 if (isLD) {
3793 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00003794 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
3795 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003796 } else {
3797 vassert(0); //ATC
sewardj606c4ba2014-01-26 19:11:14 +00003798 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
3799 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003800 }
3801 break;
3802 case 1:
3803 if (isLD) {
3804 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00003805 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
3806 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003807 } else {
3808 vassert(0); //ATC
sewardj606c4ba2014-01-26 19:11:14 +00003809 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
3810 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003811 }
3812 break;
3813 case 2: /* 32 bit */
3814 if (isLD) {
3815 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00003816 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
3817 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003818 } else {
sewardj606c4ba2014-01-26 19:11:14 +00003819 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
3820 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003821 }
3822 break;
3823 case 3: /* 64 bit */
3824 if (isLD) {
3825 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00003826 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
3827 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003828 } else {
sewardj606c4ba2014-01-26 19:11:14 +00003829 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
3830 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003831 }
3832 break;
3833 case 4: return False; //ATC
3834 default: vassert(0);
3835 }
3836 return True;
3837 }
3838 after_LDR_STR_vector_register:
3839
3840 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
3841 /* 31 29 22 20 15 12 11 9 4
3842 | | | | | | | | |
3843 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
3844
3845 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
3846 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
3847
3848 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
3849 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
3850 */
3851 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3852 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3853 HChar dis_buf[64];
3854 UInt szLg2 = INSN(31,30);
3855 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
3856 UInt tt = INSN(4,0);
3857 if (szLg2 == 3) goto after_LDRS_integer_register;
3858 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
3859 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
3860 /* Enumerate the 5 variants explicitly. */
3861 if (szLg2 == 2/*32 bit*/ && sxTo64) {
3862 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
3863 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
3864 return True;
3865 }
3866 else
3867 if (szLg2 == 1/*16 bit*/) {
3868 if (sxTo64) {
3869 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
3870 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
3871 } else {
3872 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
3873 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3874 }
3875 return True;
3876 }
3877 else
3878 if (szLg2 == 0/*8 bit*/) {
3879 if (sxTo64) {
3880 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
3881 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
3882 } else {
3883 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
3884 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
3885 }
3886 return True;
3887 }
3888 /* else it's an invalid combination */
3889 }
3890 after_LDRS_integer_register:
3891
3892 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
3893 /* This is the Unsigned offset variant only. The Post-Index and
3894 Pre-Index variants are below.
3895
3896 31 29 23 21 9 4
3897 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
3898 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
3899 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
3900 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
3901 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
3902
3903 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
3904 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
3905 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
3906 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
3907 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
3908 */
3909 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
3910 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
3911 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
3912 Bool isLD = INSN(22,22) == 1;
3913 UInt pimm12 = INSN(21,10) << szLg2;
3914 UInt nn = INSN(9,5);
3915 UInt tt = INSN(4,0);
3916 IRTemp tEA = newTemp(Ity_I64);
3917 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
3918 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
3919 if (isLD) {
3920 if (szLg2 < 4) {
3921 putQReg128(tt, mkV128(0x0000));
3922 }
sewardj606c4ba2014-01-26 19:11:14 +00003923 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00003924 } else {
sewardj606c4ba2014-01-26 19:11:14 +00003925 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00003926 }
3927 DIP("%s %s, [%s, #%u]\n",
3928 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00003929 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
sewardjbbcf1882014-01-12 12:49:10 +00003930 return True;
3931 }
3932
3933 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
3934 /* These are the Post-Index and Pre-Index variants.
3935
3936 31 29 23 20 11 9 4
3937 (at-Rn-then-Rn=EA)
3938 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
3939 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
3940 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
3941 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
3942 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
3943
3944 (at-EA-then-Rn=EA)
3945 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
3946 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
3947 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
3948 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
3949 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
3950
3951 Stores are the same except with bit 22 set to 0.
3952 */
3953 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
3954 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
3955 && INSN(21,21) == 0 && INSN(10,10) == 1) {
3956 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
3957 Bool isLD = INSN(22,22) == 1;
3958 UInt imm9 = INSN(20,12);
3959 Bool atRN = INSN(11,11) == 0;
3960 UInt nn = INSN(9,5);
3961 UInt tt = INSN(4,0);
3962 IRTemp tRN = newTemp(Ity_I64);
3963 IRTemp tEA = newTemp(Ity_I64);
3964 IRTemp tTA = IRTemp_INVALID;
3965 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
3966 ULong simm9 = sx_to_64(imm9, 9);
3967 assign(tRN, getIReg64orSP(nn));
3968 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3969 tTA = atRN ? tRN : tEA;
3970 if (isLD) {
3971 if (szLg2 < 4) {
3972 putQReg128(tt, mkV128(0x0000));
3973 }
sewardj606c4ba2014-01-26 19:11:14 +00003974 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
sewardjbbcf1882014-01-12 12:49:10 +00003975 } else {
sewardj606c4ba2014-01-26 19:11:14 +00003976 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00003977 }
3978 putIReg64orSP(nn, mkexpr(tEA));
3979 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
3980 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00003981 nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
sewardjbbcf1882014-01-12 12:49:10 +00003982 return True;
3983 }
3984
3985 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
3986 /* 31 29 23 20 11 9 4
3987 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
3988 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
3989 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
3990 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
3991 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
3992
3993 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
3994 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
3995 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
3996 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
3997 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
3998 */
3999 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
4000 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
4001 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
4002 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
4003 Bool isLD = INSN(22,22) == 1;
4004 UInt imm9 = INSN(20,12);
4005 UInt nn = INSN(9,5);
4006 UInt tt = INSN(4,0);
4007 ULong simm9 = sx_to_64(imm9, 9);
4008 IRTemp tEA = newTemp(Ity_I64);
4009 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
4010 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
4011 if (isLD) {
sewardj606c4ba2014-01-26 19:11:14 +00004012 if (szLg2 < 4) {
4013 putQReg128(tt, mkV128(0x0000));
4014 }
4015 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00004016 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004017 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004018 }
4019 DIP("%s %s, [%s, #%lld]\n",
4020 isLD ? "ldur" : "stur",
sewardj606c4ba2014-01-26 19:11:14 +00004021 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
sewardjbbcf1882014-01-12 12:49:10 +00004022 return True;
4023 }
4024
4025 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
4026 /* 31 29 23 4
4027 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
4028 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
4029 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
4030 */
4031 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
4032 UInt szB = 4 << INSN(31,30);
4033 UInt imm19 = INSN(23,5);
4034 UInt tt = INSN(4,0);
4035 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
4036 IRType ty = preferredVectorSubTypeFromSize(szB);
sewardj606c4ba2014-01-26 19:11:14 +00004037 putQReg128(tt, mkV128(0x0000));
4038 putQRegLO(tt, loadLE(ty, mkU64(ea)));
4039 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
sewardjbbcf1882014-01-12 12:49:10 +00004040 return True;
4041 }
4042
sewardj606c4ba2014-01-26 19:11:14 +00004043 /* ---------- LD1/ST1 (single structure, no offset) ---------- */
sewardjbbcf1882014-01-12 12:49:10 +00004044 /* 31 23
sewardj606c4ba2014-01-26 19:11:14 +00004045 0100 1100 0100 0000 0111 11 N T LD1 {vT.2d}, [Xn|SP]
4046 0100 1100 0000 0000 0111 11 N T ST1 {vT.2d}, [Xn|SP]
4047 0100 1100 0100 0000 0111 10 N T LD1 {vT.4s}, [Xn|SP]
4048 0100 1100 0000 0000 0111 10 N T ST1 {vT.4s}, [Xn|SP]
4049 0100 1100 0100 0000 0111 01 N T LD1 {vT.8h}, [Xn|SP]
4050 0100 1100 0000 0000 0111 01 N T ST1 {vT.8h}, [Xn|SP]
sewardjbbcf1882014-01-12 12:49:10 +00004051 0100 1100 0100 0000 0111 00 N T LD1 {vT.16b}, [Xn|SP]
4052 0100 1100 0000 0000 0111 00 N T ST1 {vT.16b}, [Xn|SP]
sewardj606c4ba2014-01-26 19:11:14 +00004053 FIXME does this assume that the host is little endian?
sewardjbbcf1882014-01-12 12:49:10 +00004054 */
sewardj606c4ba2014-01-26 19:11:14 +00004055 if ( (insn & 0xFFFFF000) == 0x4C407000 // LD1 cases
4056 || (insn & 0xFFFFF000) == 0x4C007000 // ST1 cases
sewardjbbcf1882014-01-12 12:49:10 +00004057 ) {
4058 Bool isLD = INSN(22,22) == 1;
4059 UInt rN = INSN(9,5);
4060 UInt vT = INSN(4,0);
4061 IRTemp tEA = newTemp(Ity_I64);
sewardj606c4ba2014-01-26 19:11:14 +00004062 const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4063 const HChar* name = names[INSN(11,10)];
sewardjbbcf1882014-01-12 12:49:10 +00004064 assign(tEA, getIReg64orSP(rN));
4065 if (rN == 31) { /* FIXME generate stack alignment check */ }
4066 if (isLD) {
4067 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4068 } else {
4069 storeLE(mkexpr(tEA), getQReg128(vT));
4070 }
4071 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
sewardj606c4ba2014-01-26 19:11:14 +00004072 vT, name, nameIReg64orSP(rN));
sewardjbbcf1882014-01-12 12:49:10 +00004073 return True;
4074 }
4075
sewardj606c4ba2014-01-26 19:11:14 +00004076 /* 31 23
4077 0000 1100 0100 0000 0111 11 N T LD1 {vT.1d}, [Xn|SP]
4078 0000 1100 0000 0000 0111 11 N T ST1 {vT.1d}, [Xn|SP]
4079 0000 1100 0100 0000 0111 10 N T LD1 {vT.2s}, [Xn|SP]
4080 0000 1100 0000 0000 0111 10 N T ST1 {vT.2s}, [Xn|SP]
4081 0000 1100 0100 0000 0111 01 N T LD1 {vT.4h}, [Xn|SP]
4082 0000 1100 0000 0000 0111 01 N T ST1 {vT.4h}, [Xn|SP]
4083 0000 1100 0100 0000 0111 00 N T LD1 {vT.8b}, [Xn|SP]
4084 0000 1100 0000 0000 0111 00 N T ST1 {vT.8b}, [Xn|SP]
4085 FIXME does this assume that the host is little endian?
4086 */
4087 if ( (insn & 0xFFFFF000) == 0x0C407000 // LD1 cases
4088 || (insn & 0xFFFFF000) == 0x0C007000 // ST1 cases
4089 ) {
4090 Bool isLD = INSN(22,22) == 1;
4091 UInt rN = INSN(9,5);
4092 UInt vT = INSN(4,0);
4093 IRTemp tEA = newTemp(Ity_I64);
4094 const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4095 const HChar* name = names[INSN(11,10)];
4096 assign(tEA, getIReg64orSP(rN));
4097 if (rN == 31) { /* FIXME generate stack alignment check */ }
4098 if (isLD) {
4099 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4100 putQRegLane(vT, 1, mkU64(0));
4101 } else {
4102 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4103 }
4104 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
4105 vT, name, nameIReg64orSP(rN));
4106 return True;
4107 }
4108
4109 /* ---------- LD1/ST1 (single structure, post index) ---------- */
4110 /* 31 23
sewardj7d009132014-02-20 17:43:38 +00004111 0100 1100 1001 1111 0111 11 N T ST1 {vT.2d}, [xN|SP], #16
4112 0100 1100 1101 1111 0111 11 N T LD1 {vT.2d}, [xN|SP], #16
4113 0100 1100 1001 1111 0111 10 N T ST1 {vT.4s}, [xN|SP], #16
4114 0100 1100 1101 1111 0111 10 N T LD1 {vT.4s}, [xN|SP], #16
4115 0100 1100 1001 1111 0111 01 N T ST1 {vT.8h}, [xN|SP], #16
4116 0100 1100 1101 1111 0111 01 N T LD1 {vT.8h}, [xN|SP], #16
4117 0100 1100 1001 1111 0111 00 N T ST1 {vT.16b}, [xN|SP], #16
sewardjf5b08912014-02-06 12:57:58 +00004118 0100 1100 1101 1111 0111 00 N T LD1 {vT.16b}, [xN|SP], #16
sewardj606c4ba2014-01-26 19:11:14 +00004119 Note that #16 is implied and cannot be any other value.
4120 FIXME does this assume that the host is little endian?
4121 */
sewardj7d009132014-02-20 17:43:38 +00004122 if ( (insn & 0xFFFFF000) == 0x4CDF7000 // LD1 cases
4123 || (insn & 0xFFFFF000) == 0x4C9F7000 // ST1 cases
sewardj606c4ba2014-01-26 19:11:14 +00004124 ) {
4125 Bool isLD = INSN(22,22) == 1;
4126 UInt rN = INSN(9,5);
4127 UInt vT = INSN(4,0);
4128 IRTemp tEA = newTemp(Ity_I64);
4129 const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4130 const HChar* name = names[INSN(11,10)];
4131 assign(tEA, getIReg64orSP(rN));
4132 if (rN == 31) { /* FIXME generate stack alignment check */ }
4133 if (isLD) {
4134 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4135 } else {
4136 storeLE(mkexpr(tEA), getQReg128(vT));
4137 }
4138 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(16)));
4139 DIP("%s {v%u.%s}, [%s], #16\n", isLD ? "ld1" : "st1",
4140 vT, name, nameIReg64orSP(rN));
4141 return True;
4142 }
4143
sewardj950ca7a2014-04-03 23:03:32 +00004144 /* 31 23
4145 0000 1100 1001 1111 0111 11 N T ST1 {vT.1d}, [xN|SP], #8
4146 0000 1100 1101 1111 0111 11 N T LD1 {vT.1d}, [xN|SP], #8
sewardj606c4ba2014-01-26 19:11:14 +00004147 0000 1100 1001 1111 0111 10 N T ST1 {vT.2s}, [xN|SP], #8
sewardj950ca7a2014-04-03 23:03:32 +00004148 0000 1100 1101 1111 0111 10 N T LD1 {vT.2s}, [xN|SP], #8
sewardjf5b08912014-02-06 12:57:58 +00004149 0000 1100 1001 1111 0111 01 N T ST1 {vT.4h}, [xN|SP], #8
sewardj950ca7a2014-04-03 23:03:32 +00004150 0000 1100 1101 1111 0111 01 N T LD1 {vT.4h}, [xN|SP], #8
4151 0000 1100 1001 1111 0111 00 N T ST1 {vT.8b}, [xN|SP], #8
4152 0000 1100 1101 1111 0111 00 N T LD1 {vT.8b}, [xN|SP], #8
sewardj606c4ba2014-01-26 19:11:14 +00004153 Note that #8 is implied and cannot be any other value.
4154 FIXME does this assume that the host is little endian?
4155 */
sewardj950ca7a2014-04-03 23:03:32 +00004156 if ( (insn & 0xFFFFF000) == 0x0CDF7000 // LD1 cases
4157 || (insn & 0xFFFFF000) == 0x0C9F7000 // ST1 cases
sewardj606c4ba2014-01-26 19:11:14 +00004158 ) {
sewardj950ca7a2014-04-03 23:03:32 +00004159 Bool isLD = INSN(22,22) == 1;
sewardj606c4ba2014-01-26 19:11:14 +00004160 UInt rN = INSN(9,5);
4161 UInt vT = INSN(4,0);
4162 IRTemp tEA = newTemp(Ity_I64);
4163 const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4164 const HChar* name = names[INSN(11,10)];
4165 assign(tEA, getIReg64orSP(rN));
4166 if (rN == 31) { /* FIXME generate stack alignment check */ }
sewardj950ca7a2014-04-03 23:03:32 +00004167 if (isLD) {
4168 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4169 putQRegLane(vT, 1, mkU64(0));
4170 } else {
4171 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4172 }
sewardj606c4ba2014-01-26 19:11:14 +00004173 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(8)));
sewardj950ca7a2014-04-03 23:03:32 +00004174 DIP("%s {v%u.%s}, [%s], #8\n", isLD ? "ld1" : "st1",
4175 vT, name, nameIReg64orSP(rN));
4176 return True;
4177 }
4178
4179 /* ---------- LD2/ST2 (multiple structures, post index) ---------- */
4180 /* Only a very few cases. */
4181 /* 31 23 11 9 4
4182 0100 1100 1101 1111 1000 11 n t LD2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4183 0100 1100 1001 1111 1000 11 n t ST2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4184 0100 1100 1101 1111 1000 10 n t LD2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4185 0100 1100 1001 1111 1000 10 n t ST2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4186 */
4187 if ( (insn & 0xFFFFFC00) == 0x4CDF8C00 // LD2 .2d
4188 || (insn & 0xFFFFFC00) == 0x4C9F8C00 // ST2 .2d
4189 || (insn & 0xFFFFFC00) == 0x4CDF8800 // LD2 .4s
4190 || (insn & 0xFFFFFC00) == 0x4C9F8800 // ST2 .4s
4191 ) {
4192 Bool isLD = INSN(22,22) == 1;
4193 UInt rN = INSN(9,5);
4194 UInt vT = INSN(4,0);
4195 IRTemp tEA = newTemp(Ity_I64);
4196 UInt sz = INSN(11,10);
4197 const HChar* name = "??";
4198 assign(tEA, getIReg64orSP(rN));
4199 if (rN == 31) { /* FIXME generate stack alignment check */ }
4200 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4201 IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8));
4202 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4203 IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24));
4204 if (sz == BITS2(1,1)) {
4205 name = "2d";
4206 if (isLD) {
4207 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0));
4208 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_16));
4209 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8));
4210 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_24));
4211 } else {
4212 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64));
4213 storeLE(tEA_16, getQRegLane((vT+0) % 32, 1, Ity_I64));
4214 storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64));
4215 storeLE(tEA_24, getQRegLane((vT+1) % 32, 1, Ity_I64));
4216 }
4217 }
4218 else if (sz == BITS2(1,0)) {
4219 /* Uh, this is ugly. TODO: better. */
4220 name = "4s";
4221 IRExpr* tEA_4 = binop(Iop_Add64, mkexpr(tEA), mkU64(4));
4222 IRExpr* tEA_12 = binop(Iop_Add64, mkexpr(tEA), mkU64(12));
4223 IRExpr* tEA_20 = binop(Iop_Add64, mkexpr(tEA), mkU64(20));
4224 IRExpr* tEA_28 = binop(Iop_Add64, mkexpr(tEA), mkU64(28));
4225 if (isLD) {
4226 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I32, tEA_0));
4227 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I32, tEA_8));
4228 putQRegLane((vT+0) % 32, 2, loadLE(Ity_I32, tEA_16));
4229 putQRegLane((vT+0) % 32, 3, loadLE(Ity_I32, tEA_24));
4230 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I32, tEA_4));
4231 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I32, tEA_12));
4232 putQRegLane((vT+1) % 32, 2, loadLE(Ity_I32, tEA_20));
4233 putQRegLane((vT+1) % 32, 3, loadLE(Ity_I32, tEA_28));
4234 } else {
4235 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I32));
4236 storeLE(tEA_8, getQRegLane((vT+0) % 32, 1, Ity_I32));
4237 storeLE(tEA_16, getQRegLane((vT+0) % 32, 2, Ity_I32));
4238 storeLE(tEA_24, getQRegLane((vT+0) % 32, 3, Ity_I32));
4239 storeLE(tEA_4, getQRegLane((vT+1) % 32, 0, Ity_I32));
4240 storeLE(tEA_12, getQRegLane((vT+1) % 32, 1, Ity_I32));
4241 storeLE(tEA_20, getQRegLane((vT+1) % 32, 2, Ity_I32));
4242 storeLE(tEA_28, getQRegLane((vT+1) % 32, 3, Ity_I32));
4243 }
4244 }
4245 else {
4246 vassert(0); // Can't happen.
4247 }
4248 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32)));
4249 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld2" : "st2",
4250 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
4251 return True;
4252 }
4253
4254 /* ---------- LD1/ST1 (multiple structures, no offset) ---------- */
4255 /* Only a very few cases. */
4256 /* 31 23
4257 0100 1100 0100 0000 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4258 0100 1100 0000 0000 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4259 */
4260 if ( (insn & 0xFFFFFC00) == 0x4C40A000 // LD1
4261 || (insn & 0xFFFFFC00) == 0x4C00A000 // ST1
4262 ) {
4263 Bool isLD = INSN(22,22) == 1;
4264 UInt rN = INSN(9,5);
4265 UInt vT = INSN(4,0);
4266 IRTemp tEA = newTemp(Ity_I64);
4267 const HChar* name = "16b";
4268 assign(tEA, getIReg64orSP(rN));
4269 if (rN == 31) { /* FIXME generate stack alignment check */ }
4270 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4271 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4272 if (isLD) {
4273 putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
4274 putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
4275 } else {
4276 storeLE(tEA_0, getQReg128((vT+0) % 32));
4277 storeLE(tEA_16, getQReg128((vT+1) % 32));
4278 }
4279 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld1" : "st1",
4280 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
sewardj606c4ba2014-01-26 19:11:14 +00004281 return True;
4282 }
4283
sewardj7d009132014-02-20 17:43:38 +00004284 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
4285 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
4286 /* 31 29 23 20 14 9 4
4287 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
4288 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
4289 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
4290 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
sewardjbbcf1882014-01-12 12:49:10 +00004291 */
sewardj7d009132014-02-20 17:43:38 +00004292 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
4293 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
4294 && INSN(14,10) == BITS5(1,1,1,1,1)) {
sewardjdc9259c2014-02-27 11:10:19 +00004295 UInt szBlg2 = INSN(31,30);
4296 Bool isLD = INSN(22,22) == 1;
4297 Bool isAcqOrRel = INSN(15,15) == 1;
4298 UInt ss = INSN(20,16);
4299 UInt nn = INSN(9,5);
4300 UInt tt = INSN(4,0);
sewardjbbcf1882014-01-12 12:49:10 +00004301
sewardjdc9259c2014-02-27 11:10:19 +00004302 vassert(szBlg2 < 4);
4303 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
4304 IRType ty = integerIRTypeOfSize(szB);
4305 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
sewardj7d009132014-02-20 17:43:38 +00004306
sewardjdc9259c2014-02-27 11:10:19 +00004307 IRTemp ea = newTemp(Ity_I64);
4308 assign(ea, getIReg64orSP(nn));
4309 /* FIXME generate check that ea is szB-aligned */
sewardj7d009132014-02-20 17:43:38 +00004310
sewardjdc9259c2014-02-27 11:10:19 +00004311 if (isLD && ss == BITS5(1,1,1,1,1)) {
4312 IRTemp res = newTemp(ty);
4313 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
4314 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
4315 if (isAcqOrRel) {
4316 stmt(IRStmt_MBE(Imbe_Fence));
4317 }
4318 DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
4319 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4320 return True;
4321 }
4322 if (!isLD) {
4323 if (isAcqOrRel) {
4324 stmt(IRStmt_MBE(Imbe_Fence));
4325 }
4326 IRTemp res = newTemp(Ity_I1);
4327 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
4328 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
4329 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
4330 Need to set rS to 1 on failure, 0 on success. */
4331 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
4332 mkU64(1)));
4333 DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
4334 nameIRegOrZR(False, ss),
4335 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4336 return True;
4337 }
4338 /* else fall through */
4339 }
4340
4341 /* ------------------ LDA{R,RH,RB} ------------------ */
4342 /* ------------------ STL{R,RH,RB} ------------------ */
4343 /* 31 29 23 20 14 9 4
4344 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
4345 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
4346 */
4347 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
4348 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
4349 UInt szBlg2 = INSN(31,30);
4350 Bool isLD = INSN(22,22) == 1;
4351 UInt nn = INSN(9,5);
4352 UInt tt = INSN(4,0);
4353
4354 vassert(szBlg2 < 4);
4355 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
4356 IRType ty = integerIRTypeOfSize(szB);
4357 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
4358
4359 IRTemp ea = newTemp(Ity_I64);
4360 assign(ea, getIReg64orSP(nn));
4361 /* FIXME generate check that ea is szB-aligned */
4362
4363 if (isLD) {
4364 IRTemp res = newTemp(ty);
4365 assign(res, loadLE(ty, mkexpr(ea)));
4366 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
4367 stmt(IRStmt_MBE(Imbe_Fence));
4368 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
4369 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4370 } else {
4371 stmt(IRStmt_MBE(Imbe_Fence));
4372 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
4373 storeLE(mkexpr(ea), data);
4374 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
4375 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4376 }
4377 return True;
sewardjbbcf1882014-01-12 12:49:10 +00004378 }
4379
4380 vex_printf("ARM64 front end: load_store\n");
4381 return False;
4382# undef INSN
4383}
4384
4385
4386/*------------------------------------------------------------*/
4387/*--- Control flow and misc instructions ---*/
4388/*------------------------------------------------------------*/
4389
4390static
sewardj65902992014-05-03 21:20:56 +00004391Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
4392 VexArchInfo* archinfo)
sewardjbbcf1882014-01-12 12:49:10 +00004393{
4394# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4395
4396 /* ---------------------- B cond ----------------------- */
4397 /* 31 24 4 3
4398 0101010 0 imm19 0 cond */
4399 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
4400 UInt cond = INSN(3,0);
4401 ULong uimm64 = INSN(23,5) << 2;
4402 Long simm64 = (Long)sx_to_64(uimm64, 21);
4403 vassert(dres->whatNext == Dis_Continue);
4404 vassert(dres->len == 4);
4405 vassert(dres->continueAt == 0);
4406 vassert(dres->jk_StopHere == Ijk_INVALID);
4407 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
4408 Ijk_Boring,
4409 IRConst_U64(guest_PC_curr_instr + simm64),
4410 OFFB_PC) );
4411 putPC(mkU64(guest_PC_curr_instr + 4));
4412 dres->whatNext = Dis_StopHere;
4413 dres->jk_StopHere = Ijk_Boring;
4414 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
4415 return True;
4416 }
4417
4418 /* -------------------- B{L} uncond -------------------- */
4419 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
4420 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
4421 100101 imm26 B (PC + sxTo64(imm26 << 2))
4422 */
4423 UInt bLink = INSN(31,31);
4424 ULong uimm64 = INSN(25,0) << 2;
4425 Long simm64 = (Long)sx_to_64(uimm64, 28);
4426 if (bLink) {
4427 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
4428 }
4429 putPC(mkU64(guest_PC_curr_instr + simm64));
4430 dres->whatNext = Dis_StopHere;
4431 dres->jk_StopHere = Ijk_Call;
4432 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
4433 guest_PC_curr_instr + simm64);
4434 return True;
4435 }
4436
4437 /* --------------------- B{L} reg --------------------- */
4438 /* 31 24 22 20 15 9 4
4439 1101011 00 10 11111 000000 nn 00000 RET Rn
4440 1101011 00 01 11111 000000 nn 00000 CALL Rn
4441 1101011 00 00 11111 000000 nn 00000 JMP Rn
4442 */
4443 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
4444 && INSN(20,16) == BITS5(1,1,1,1,1)
4445 && INSN(15,10) == BITS6(0,0,0,0,0,0)
4446 && INSN(4,0) == BITS5(0,0,0,0,0)) {
4447 UInt branch_type = INSN(22,21);
4448 UInt nn = INSN(9,5);
4449 if (branch_type == BITS2(1,0) /* RET */) {
4450 putPC(getIReg64orZR(nn));
4451 dres->whatNext = Dis_StopHere;
4452 dres->jk_StopHere = Ijk_Ret;
4453 DIP("ret %s\n", nameIReg64orZR(nn));
4454 return True;
4455 }
4456 if (branch_type == BITS2(0,1) /* CALL */) {
4457 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
4458 putPC(getIReg64orZR(nn));
4459 dres->whatNext = Dis_StopHere;
4460 dres->jk_StopHere = Ijk_Call;
4461 DIP("blr %s\n", nameIReg64orZR(nn));
4462 return True;
4463 }
4464 if (branch_type == BITS2(0,0) /* JMP */) {
4465 putPC(getIReg64orZR(nn));
4466 dres->whatNext = Dis_StopHere;
4467 dres->jk_StopHere = Ijk_Boring;
4468 DIP("jmp %s\n", nameIReg64orZR(nn));
4469 return True;
4470 }
4471 }
4472
4473 /* -------------------- CB{N}Z -------------------- */
4474 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
4475 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
4476 */
4477 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
4478 Bool is64 = INSN(31,31) == 1;
4479 Bool bIfZ = INSN(24,24) == 0;
4480 ULong uimm64 = INSN(23,5) << 2;
4481 UInt rT = INSN(4,0);
4482 Long simm64 = (Long)sx_to_64(uimm64, 21);
4483 IRExpr* cond = NULL;
4484 if (is64) {
4485 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
4486 getIReg64orZR(rT), mkU64(0));
4487 } else {
4488 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
4489 getIReg32orZR(rT), mkU32(0));
4490 }
4491 stmt( IRStmt_Exit(cond,
4492 Ijk_Boring,
4493 IRConst_U64(guest_PC_curr_instr + simm64),
4494 OFFB_PC) );
4495 putPC(mkU64(guest_PC_curr_instr + 4));
4496 dres->whatNext = Dis_StopHere;
4497 dres->jk_StopHere = Ijk_Boring;
4498 DIP("cb%sz %s, 0x%llx\n",
4499 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
4500 guest_PC_curr_instr + simm64);
4501 return True;
4502 }
4503
4504 /* -------------------- TB{N}Z -------------------- */
4505 /* 31 30 24 23 18 5 4
4506 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
4507 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
4508 */
4509 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
4510 UInt b5 = INSN(31,31);
4511 Bool bIfZ = INSN(24,24) == 0;
4512 UInt b40 = INSN(23,19);
4513 UInt imm14 = INSN(18,5);
4514 UInt tt = INSN(4,0);
4515 UInt bitNo = (b5 << 5) | b40;
4516 ULong uimm64 = imm14 << 2;
4517 Long simm64 = sx_to_64(uimm64, 16);
4518 IRExpr* cond
4519 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
4520 binop(Iop_And64,
4521 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
4522 mkU64(1)),
4523 mkU64(0));
4524 stmt( IRStmt_Exit(cond,
4525 Ijk_Boring,
4526 IRConst_U64(guest_PC_curr_instr + simm64),
4527 OFFB_PC) );
4528 putPC(mkU64(guest_PC_curr_instr + 4));
4529 dres->whatNext = Dis_StopHere;
4530 dres->jk_StopHere = Ijk_Boring;
4531 DIP("tb%sz %s, #%u, 0x%llx\n",
4532 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
4533 guest_PC_curr_instr + simm64);
4534 return True;
4535 }
4536
4537 /* -------------------- SVC -------------------- */
4538 /* 11010100 000 imm16 000 01
4539 Don't bother with anything except the imm16==0 case.
4540 */
4541 if (INSN(31,0) == 0xD4000001) {
4542 putPC(mkU64(guest_PC_curr_instr + 4));
4543 dres->whatNext = Dis_StopHere;
4544 dres->jk_StopHere = Ijk_Sys_syscall;
4545 DIP("svc #0\n");
4546 return True;
4547 }
4548
4549 /* ------------------ M{SR,RS} ------------------ */
4550 /* Only handles the case where the system register is TPIDR_EL0.
4551 0xD51BD0 010 Rt MSR tpidr_el0, rT
4552 0xD53BD0 010 Rt MRS rT, tpidr_el0
4553 */
4554 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
4555 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
4556 Bool toSys = INSN(21,21) == 0;
4557 UInt tt = INSN(4,0);
4558 if (toSys) {
4559 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
4560 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
4561 } else {
4562 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
4563 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
4564 }
4565 return True;
4566 }
4567 /* Cases for FPCR
4568 0xD51B44 000 Rt MSR fpcr, rT
4569 0xD53B44 000 Rt MSR rT, fpcr
4570 */
4571 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
4572 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
4573 Bool toSys = INSN(21,21) == 0;
4574 UInt tt = INSN(4,0);
4575 if (toSys) {
4576 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
4577 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
4578 } else {
4579 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
4580 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
4581 }
4582 return True;
4583 }
4584 /* Cases for FPSR
sewardj7d009132014-02-20 17:43:38 +00004585 0xD51B44 001 Rt MSR fpsr, rT
4586 0xD53B44 001 Rt MSR rT, fpsr
sewardjbbcf1882014-01-12 12:49:10 +00004587 */
4588 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
4589 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
4590 Bool toSys = INSN(21,21) == 0;
4591 UInt tt = INSN(4,0);
4592 if (toSys) {
4593 stmt( IRStmt_Put( OFFB_FPSR, getIReg32orZR(tt)) );
4594 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
4595 } else {
4596 putIReg32orZR(tt, IRExpr_Get(OFFB_FPSR, Ity_I32));
4597 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
4598 }
4599 return True;
4600 }
4601 /* Cases for NZCV
4602 D51B42 000 Rt MSR nzcv, rT
4603 D53B42 000 Rt MRS rT, nzcv
4604 */
4605 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
4606 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
4607 Bool toSys = INSN(21,21) == 0;
4608 UInt tt = INSN(4,0);
4609 if (toSys) {
4610 IRTemp t = newTemp(Ity_I64);
4611 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
4612 setFlags_COPY(t);
4613 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
4614 } else {
4615 IRTemp res = newTemp(Ity_I64);
4616 assign(res, mk_arm64g_calculate_flags_nzcv());
4617 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
4618 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
4619 }
4620 return True;
4621 }
sewardjd512d102014-02-21 14:49:44 +00004622 /* Cases for DCZID_EL0
4623 Don't support arbitrary reads and writes to this register. Just
4624 return the value 16, which indicates that the DC ZVA instruction
4625 is not permitted, so we don't have to emulate it.
4626 D5 3B 00 111 Rt MRS rT, dczid_el0
4627 */
4628 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
4629 UInt tt = INSN(4,0);
4630 putIReg64orZR(tt, mkU64(1<<4));
4631 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
4632 return True;
4633 }
sewardj65902992014-05-03 21:20:56 +00004634 /* Cases for CTR_EL0
4635 We just handle reads, and make up a value from the D and I line
4636 sizes in the VexArchInfo we are given, and patch in the following
4637 fields that the Foundation model gives ("natively"):
4638 CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
4639 D5 3B 00 001 Rt MRS rT, dczid_el0
4640 */
4641 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
4642 UInt tt = INSN(4,0);
4643 /* Need to generate a value from dMinLine_lg2_szB and
4644 dMinLine_lg2_szB. The value in the register is in 32-bit
4645 units, so need to subtract 2 from the values in the
4646 VexArchInfo. We can assume that the values here are valid --
4647 disInstr_ARM64 checks them -- so there's no need to deal with
4648 out-of-range cases. */
4649 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
4650 && archinfo->arm64_dMinLine_lg2_szB <= 17
4651 && archinfo->arm64_iMinLine_lg2_szB >= 2
4652 && archinfo->arm64_iMinLine_lg2_szB <= 17);
4653 UInt val
4654 = 0x8440c000 | ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
4655 | ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
4656 putIReg64orZR(tt, mkU64(val));
4657 DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
4658 return True;
4659 }
sewardjbbcf1882014-01-12 12:49:10 +00004660
sewardj65902992014-05-03 21:20:56 +00004661 /* ------------------ IC_IVAU ------------------ */
4662 /* D5 0B 75 001 Rt ic ivau, rT
4663 */
4664 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
4665 /* We will always be provided with a valid iMinLine value. */
4666 vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
4667 && archinfo->arm64_iMinLine_lg2_szB <= 17);
4668 /* Round the requested address, in rT, down to the start of the
4669 containing block. */
4670 UInt tt = INSN(4,0);
4671 ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
4672 IRTemp addr = newTemp(Ity_I64);
4673 assign( addr, binop( Iop_And64,
4674 getIReg64orZR(tt),
4675 mkU64(~(lineszB - 1))) );
4676 /* Set the invalidation range, request exit-and-invalidate, with
4677 continuation at the next instruction. */
sewardj05f5e012014-05-04 10:52:11 +00004678 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
4679 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj65902992014-05-03 21:20:56 +00004680 /* be paranoid ... */
4681 stmt( IRStmt_MBE(Imbe_Fence) );
4682 putPC(mkU64( guest_PC_curr_instr + 4 ));
4683 dres->whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +00004684 dres->jk_StopHere = Ijk_InvalICache;
sewardj65902992014-05-03 21:20:56 +00004685 DIP("ic ivau, %s\n", nameIReg64orZR(tt));
4686 return True;
4687 }
4688
4689 /* ------------------ DC_CVAU ------------------ */
4690 /* D5 0B 7B 001 Rt dc cvau, rT
4691 */
4692 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
4693 /* Exactly the same scheme as for IC IVAU, except we observe the
sewardj05f5e012014-05-04 10:52:11 +00004694 dMinLine size, and request an Ijk_FlushDCache instead of
4695 Ijk_InvalICache. */
sewardj65902992014-05-03 21:20:56 +00004696 /* We will always be provided with a valid dMinLine value. */
4697 vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
4698 && archinfo->arm64_dMinLine_lg2_szB <= 17);
4699 /* Round the requested address, in rT, down to the start of the
4700 containing block. */
4701 UInt tt = INSN(4,0);
4702 ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
4703 IRTemp addr = newTemp(Ity_I64);
4704 assign( addr, binop( Iop_And64,
4705 getIReg64orZR(tt),
4706 mkU64(~(lineszB - 1))) );
4707 /* Set the flush range, request exit-and-flush, with
4708 continuation at the next instruction. */
sewardj05f5e012014-05-04 10:52:11 +00004709 stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
4710 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj65902992014-05-03 21:20:56 +00004711 /* be paranoid ... */
4712 stmt( IRStmt_MBE(Imbe_Fence) );
4713 putPC(mkU64( guest_PC_curr_instr + 4 ));
4714 dres->whatNext = Dis_StopHere;
4715 dres->jk_StopHere = Ijk_FlushDCache;
4716 DIP("dc cvau, %s\n", nameIReg64orZR(tt));
4717 return True;
4718 }
4719
4720 /* ------------------ ISB, DMB, DSB ------------------ */
sewardjbbcf1882014-01-12 12:49:10 +00004721 if (INSN(31,0) == 0xD5033FDF) {
sewardjd512d102014-02-21 14:49:44 +00004722 stmt(IRStmt_MBE(Imbe_Fence));
sewardjbbcf1882014-01-12 12:49:10 +00004723 DIP("isb\n");
4724 return True;
4725 }
4726 if (INSN(31,0) == 0xD5033BBF) {
sewardjd512d102014-02-21 14:49:44 +00004727 stmt(IRStmt_MBE(Imbe_Fence));
sewardjbbcf1882014-01-12 12:49:10 +00004728 DIP("dmb ish\n");
4729 return True;
4730 }
sewardj65902992014-05-03 21:20:56 +00004731 if (INSN(31,0) == 0xD5033B9F) {
4732 stmt(IRStmt_MBE(Imbe_Fence));
4733 DIP("dsb ish\n");
4734 return True;
4735 }
sewardjbbcf1882014-01-12 12:49:10 +00004736
sewardjdc9259c2014-02-27 11:10:19 +00004737 /* -------------------- NOP -------------------- */
4738 if (INSN(31,0) == 0xD503201F) {
4739 DIP("nop\n");
4740 return True;
4741 }
4742
sewardjbbcf1882014-01-12 12:49:10 +00004743 //fail:
4744 vex_printf("ARM64 front end: branch_etc\n");
4745 return False;
4746# undef INSN
4747}
4748
4749
4750/*------------------------------------------------------------*/
4751/*--- SIMD and FP instructions ---*/
4752/*------------------------------------------------------------*/
4753
sewardjecde6972014-02-05 11:01:19 +00004754/* begin FIXME -- rm temp scaffolding */
4755static IRExpr* mk_CatEvenLanes64x2 ( IRTemp, IRTemp );
4756static IRExpr* mk_CatOddLanes64x2 ( IRTemp, IRTemp );
sewardje520bb32014-02-17 11:00:53 +00004757
sewardjecde6972014-02-05 11:01:19 +00004758static IRExpr* mk_CatEvenLanes32x4 ( IRTemp, IRTemp );
4759static IRExpr* mk_CatOddLanes32x4 ( IRTemp, IRTemp );
sewardje520bb32014-02-17 11:00:53 +00004760static IRExpr* mk_InterleaveLO32x4 ( IRTemp, IRTemp );
4761static IRExpr* mk_InterleaveHI32x4 ( IRTemp, IRTemp );
4762
sewardjecde6972014-02-05 11:01:19 +00004763static IRExpr* mk_CatEvenLanes16x8 ( IRTemp, IRTemp );
4764static IRExpr* mk_CatOddLanes16x8 ( IRTemp, IRTemp );
sewardje520bb32014-02-17 11:00:53 +00004765static IRExpr* mk_InterleaveLO16x8 ( IRTemp, IRTemp );
4766static IRExpr* mk_InterleaveHI16x8 ( IRTemp, IRTemp );
4767
sewardjfab09142014-02-10 10:28:13 +00004768static IRExpr* mk_CatEvenLanes8x16 ( IRTemp, IRTemp );
4769static IRExpr* mk_CatOddLanes8x16 ( IRTemp, IRTemp );
sewardje520bb32014-02-17 11:00:53 +00004770static IRExpr* mk_InterleaveLO8x16 ( IRTemp, IRTemp );
4771static IRExpr* mk_InterleaveHI8x16 ( IRTemp, IRTemp );
sewardjecde6972014-02-05 11:01:19 +00004772/* end FIXME -- rm temp scaffolding */
4773
sewardjbbcf1882014-01-12 12:49:10 +00004774/* Generate N copies of |bit| in the bottom of a ULong. */
4775static ULong Replicate ( ULong bit, Int N )
4776{
sewardj606c4ba2014-01-26 19:11:14 +00004777 vassert(bit <= 1 && N >= 1 && N < 64);
4778 if (bit == 0) {
4779 return 0;
4780 } else {
4781 /* Careful. This won't work for N == 64. */
4782 return (1ULL << N) - 1;
4783 }
sewardjbbcf1882014-01-12 12:49:10 +00004784}
4785
sewardjfab09142014-02-10 10:28:13 +00004786static ULong Replicate32x2 ( ULong bits32 )
4787{
4788 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
4789 return (bits32 << 32) | bits32;
4790}
4791
4792static ULong Replicate16x4 ( ULong bits16 )
4793{
4794 vassert(0 == (bits16 & ~0xFFFFULL));
4795 return Replicate32x2((bits16 << 16) | bits16);
4796}
4797
4798static ULong Replicate8x8 ( ULong bits8 )
4799{
4800 vassert(0 == (bits8 & ~0xFFULL));
4801 return Replicate16x4((bits8 << 8) | bits8);
4802}
4803
4804/* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
4805 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
4806 is 64. In the former case, the upper 32 bits of the returned value
4807 are guaranteed to be zero. */
sewardjbbcf1882014-01-12 12:49:10 +00004808static ULong VFPExpandImm ( ULong imm8, Int N )
4809{
sewardj606c4ba2014-01-26 19:11:14 +00004810 vassert(imm8 <= 0xFF);
4811 vassert(N == 32 || N == 64);
4812 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
4813 Int F = N - E - 1;
4814 ULong imm8_6 = (imm8 >> 6) & 1;
4815 /* sign: 1 bit */
4816 /* exp: E bits */
4817 /* frac: F bits */
4818 ULong sign = (imm8 >> 7) & 1;
4819 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
4820 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
4821 vassert(sign < (1ULL << 1));
4822 vassert(exp < (1ULL << E));
4823 vassert(frac < (1ULL << F));
4824 vassert(1 + E + F == N);
4825 ULong res = (sign << (E+F)) | (exp << F) | frac;
4826 return res;
sewardjbbcf1882014-01-12 12:49:10 +00004827}
4828
sewardjfab09142014-02-10 10:28:13 +00004829/* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
4830 This might fail, as indicated by the returned Bool. Page 2530 of
4831 the manual. */
4832static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
4833 UInt op, UInt cmode, UInt imm8 )
4834{
4835 vassert(op <= 1);
4836 vassert(cmode <= 15);
4837 vassert(imm8 <= 255);
4838
4839 *res = 0; /* will overwrite iff returning True */
4840
4841 ULong imm64 = 0;
4842 Bool testimm8 = False;
4843
4844 switch (cmode >> 1) {
4845 case 0:
4846 testimm8 = False; imm64 = Replicate32x2(imm8); break;
4847 case 1:
4848 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
4849 case 2:
4850 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
4851 case 3:
4852 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
4853 case 4:
4854 testimm8 = False; imm64 = Replicate16x4(imm8); break;
4855 case 5:
4856 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
4857 case 6:
4858 testimm8 = True;
4859 if ((cmode & 1) == 0)
4860 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
4861 else
4862 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
4863 break;
4864 case 7:
4865 testimm8 = False;
4866 if ((cmode & 1) == 0 && op == 0)
4867 imm64 = Replicate8x8(imm8);
4868 if ((cmode & 1) == 0 && op == 1) {
4869 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
4870 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
4871 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
4872 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
4873 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
4874 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
4875 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
4876 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
4877 }
4878 if ((cmode & 1) == 1 && op == 0) {
4879 ULong imm8_7 = (imm8 >> 7) & 1;
4880 ULong imm8_6 = (imm8 >> 6) & 1;
4881 ULong imm8_50 = imm8 & 63;
4882 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
4883 | ((imm8_6 ^ 1) << (5 + 6 + 19))
4884 | (Replicate(imm8_6, 5) << (6 + 19))
4885 | (imm8_50 << 19);
4886 imm64 = Replicate32x2(imm32);
4887 }
4888 if ((cmode & 1) == 1 && op == 1) {
4889 // imm64 = imm8<7>:NOT(imm8<6>)
4890 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
4891 ULong imm8_7 = (imm8 >> 7) & 1;
4892 ULong imm8_6 = (imm8 >> 6) & 1;
4893 ULong imm8_50 = imm8 & 63;
4894 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
4895 | (Replicate(imm8_6, 8) << 54)
4896 | (imm8_50 << 48);
4897 }
4898 break;
4899 default:
4900 vassert(0);
4901 }
4902
4903 if (testimm8 && imm8 == 0)
4904 return False;
4905
4906 *res = imm64;
4907 return True;
4908}
4909
4910
sewardj606c4ba2014-01-26 19:11:14 +00004911/* Help a bit for decoding laneage for vector operations that can be
4912 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
4913 and SZ bits, typically for vector floating point. */
4914static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
4915 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
4916 /*OUT*/const HChar** arrSpec,
4917 Bool bitQ, Bool bitSZ )
4918{
4919 vassert(bitQ == True || bitQ == False);
4920 vassert(bitSZ == True || bitSZ == False);
4921 if (bitQ && bitSZ) { // 2x64
4922 if (tyI) *tyI = Ity_I64;
4923 if (tyF) *tyF = Ity_F64;
4924 if (nLanes) *nLanes = 2;
4925 if (zeroUpper) *zeroUpper = False;
4926 if (arrSpec) *arrSpec = "2d";
4927 return True;
4928 }
4929 if (bitQ && !bitSZ) { // 4x32
4930 if (tyI) *tyI = Ity_I32;
4931 if (tyF) *tyF = Ity_F32;
4932 if (nLanes) *nLanes = 4;
4933 if (zeroUpper) *zeroUpper = False;
4934 if (arrSpec) *arrSpec = "4s";
4935 return True;
4936 }
4937 if (!bitQ && !bitSZ) { // 2x32
4938 if (tyI) *tyI = Ity_I32;
4939 if (tyF) *tyF = Ity_F32;
4940 if (nLanes) *nLanes = 2;
4941 if (zeroUpper) *zeroUpper = True;
4942 if (arrSpec) *arrSpec = "2s";
4943 return True;
4944 }
4945 // Else impliedly 1x64, which isn't allowed.
4946 return False;
4947}
4948
4949/* Helper for decoding laneage for simple vector operations,
4950 eg integer add. */
4951static Bool getLaneInfo_SIMPLE ( /*OUT*/Bool* zeroUpper,
4952 /*OUT*/const HChar** arrSpec,
4953 Bool bitQ, UInt szBlg2 )
4954{
4955 vassert(bitQ == True || bitQ == False);
4956 vassert(szBlg2 < 4);
4957 Bool zu = False;
4958 const HChar* as = NULL;
4959 switch ((szBlg2 << 1) | (bitQ ? 1 : 0)) {
4960 case 0: zu = True; as = "8b"; break;
4961 case 1: zu = False; as = "16b"; break;
4962 case 2: zu = True; as = "4h"; break;
4963 case 3: zu = False; as = "8h"; break;
4964 case 4: zu = True; as = "2s"; break;
4965 case 5: zu = False; as = "4s"; break;
4966 case 6: return False; // impliedly 1x64
4967 case 7: zu = False; as = "2d"; break;
4968 default: vassert(0);
4969 }
4970 vassert(as);
4971 if (arrSpec) *arrSpec = as;
4972 if (zeroUpper) *zeroUpper = zu;
4973 return True;
4974}
4975
4976
sewardje520bb32014-02-17 11:00:53 +00004977/* Helper for decoding laneage for shift-style vector operations
4978 that involve an immediate shift amount. */
4979static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
4980 UInt immh, UInt immb )
4981{
4982 vassert(immh < (1<<4));
4983 vassert(immb < (1<<3));
4984 UInt immhb = (immh << 3) | immb;
4985 if (immh & 8) {
4986 if (shift) *shift = 128 - immhb;
4987 if (szBlg2) *szBlg2 = 3;
4988 return True;
4989 }
4990 if (immh & 4) {
4991 if (shift) *shift = 64 - immhb;
4992 if (szBlg2) *szBlg2 = 2;
4993 return True;
4994 }
4995 if (immh & 2) {
4996 if (shift) *shift = 32 - immhb;
4997 if (szBlg2) *szBlg2 = 1;
4998 return True;
4999 }
5000 if (immh & 1) {
5001 if (shift) *shift = 16 - immhb;
5002 if (szBlg2) *szBlg2 = 0;
5003 return True;
5004 }
5005 return False;
5006}
5007
5008
sewardjecde6972014-02-05 11:01:19 +00005009/* Generate IR to fold all lanes of the V128 value in 'src' as
5010 characterised by the operator 'op', and return the result in the
5011 bottom bits of a V128, with all other bits set to zero. */
5012static IRTemp math_MINMAXV ( IRTemp src, IROp op )
5013{
5014 /* The basic idea is to use repeated applications of Iop_CatEven*
5015 and Iop_CatOdd* operators to 'src' so as to clone each lane into
5016 a complete vector. Then fold all those vectors with 'op' and
5017 zero out all but the least significant lane. */
5018 switch (op) {
5019 case Iop_Min8Sx16: case Iop_Min8Ux16:
5020 case Iop_Max8Sx16: case Iop_Max8Ux16: {
sewardjfab09142014-02-10 10:28:13 +00005021 /* NB: temp naming here is misleading -- the naming is for 8
5022 lanes of 16 bit, whereas what is being operated on is 16
5023 lanes of 8 bits. */
5024 IRTemp x76543210 = src;
5025 IRTemp x76547654 = newTemp(Ity_V128);
5026 IRTemp x32103210 = newTemp(Ity_V128);
5027 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
5028 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
5029 IRTemp x76767676 = newTemp(Ity_V128);
5030 IRTemp x54545454 = newTemp(Ity_V128);
5031 IRTemp x32323232 = newTemp(Ity_V128);
5032 IRTemp x10101010 = newTemp(Ity_V128);
5033 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
5034 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
5035 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
5036 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
5037 IRTemp x77777777 = newTemp(Ity_V128);
5038 IRTemp x66666666 = newTemp(Ity_V128);
5039 IRTemp x55555555 = newTemp(Ity_V128);
5040 IRTemp x44444444 = newTemp(Ity_V128);
5041 IRTemp x33333333 = newTemp(Ity_V128);
5042 IRTemp x22222222 = newTemp(Ity_V128);
5043 IRTemp x11111111 = newTemp(Ity_V128);
5044 IRTemp x00000000 = newTemp(Ity_V128);
5045 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
5046 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
5047 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
5048 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
5049 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
5050 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
5051 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
5052 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
5053 /* Naming not misleading after here. */
5054 IRTemp xAllF = newTemp(Ity_V128);
5055 IRTemp xAllE = newTemp(Ity_V128);
5056 IRTemp xAllD = newTemp(Ity_V128);
5057 IRTemp xAllC = newTemp(Ity_V128);
5058 IRTemp xAllB = newTemp(Ity_V128);
5059 IRTemp xAllA = newTemp(Ity_V128);
5060 IRTemp xAll9 = newTemp(Ity_V128);
5061 IRTemp xAll8 = newTemp(Ity_V128);
5062 IRTemp xAll7 = newTemp(Ity_V128);
5063 IRTemp xAll6 = newTemp(Ity_V128);
5064 IRTemp xAll5 = newTemp(Ity_V128);
5065 IRTemp xAll4 = newTemp(Ity_V128);
5066 IRTemp xAll3 = newTemp(Ity_V128);
5067 IRTemp xAll2 = newTemp(Ity_V128);
5068 IRTemp xAll1 = newTemp(Ity_V128);
5069 IRTemp xAll0 = newTemp(Ity_V128);
5070 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
5071 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
5072 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
5073 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
5074 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
5075 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
5076 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
5077 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
5078 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
5079 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
5080 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
5081 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
5082 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
5083 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
5084 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
5085 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
5086 IRTemp maxFE = newTemp(Ity_V128);
5087 IRTemp maxDC = newTemp(Ity_V128);
5088 IRTemp maxBA = newTemp(Ity_V128);
5089 IRTemp max98 = newTemp(Ity_V128);
5090 IRTemp max76 = newTemp(Ity_V128);
5091 IRTemp max54 = newTemp(Ity_V128);
5092 IRTemp max32 = newTemp(Ity_V128);
5093 IRTemp max10 = newTemp(Ity_V128);
5094 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
5095 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
5096 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
5097 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
5098 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
5099 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
5100 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
5101 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
5102 IRTemp maxFEDC = newTemp(Ity_V128);
5103 IRTemp maxBA98 = newTemp(Ity_V128);
5104 IRTemp max7654 = newTemp(Ity_V128);
5105 IRTemp max3210 = newTemp(Ity_V128);
5106 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
5107 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
5108 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
5109 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5110 IRTemp maxFEDCBA98 = newTemp(Ity_V128);
5111 IRTemp max76543210 = newTemp(Ity_V128);
5112 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
5113 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
5114 IRTemp maxAllLanes = newTemp(Ity_V128);
5115 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
5116 mkexpr(max76543210)));
5117 IRTemp res = newTemp(Ity_V128);
5118 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
5119 return res;
sewardjecde6972014-02-05 11:01:19 +00005120 }
5121 case Iop_Min16Sx8: case Iop_Min16Ux8:
5122 case Iop_Max16Sx8: case Iop_Max16Ux8: {
5123 IRTemp x76543210 = src;
5124 IRTemp x76547654 = newTemp(Ity_V128);
5125 IRTemp x32103210 = newTemp(Ity_V128);
5126 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
5127 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
5128 IRTemp x76767676 = newTemp(Ity_V128);
5129 IRTemp x54545454 = newTemp(Ity_V128);
5130 IRTemp x32323232 = newTemp(Ity_V128);
5131 IRTemp x10101010 = newTemp(Ity_V128);
5132 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
5133 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
5134 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
5135 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
5136 IRTemp x77777777 = newTemp(Ity_V128);
5137 IRTemp x66666666 = newTemp(Ity_V128);
5138 IRTemp x55555555 = newTemp(Ity_V128);
5139 IRTemp x44444444 = newTemp(Ity_V128);
5140 IRTemp x33333333 = newTemp(Ity_V128);
5141 IRTemp x22222222 = newTemp(Ity_V128);
5142 IRTemp x11111111 = newTemp(Ity_V128);
5143 IRTemp x00000000 = newTemp(Ity_V128);
5144 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
5145 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
5146 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
5147 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
5148 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
5149 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
5150 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
5151 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
5152 IRTemp max76 = newTemp(Ity_V128);
5153 IRTemp max54 = newTemp(Ity_V128);
5154 IRTemp max32 = newTemp(Ity_V128);
5155 IRTemp max10 = newTemp(Ity_V128);
5156 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
5157 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
5158 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
5159 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
5160 IRTemp max7654 = newTemp(Ity_V128);
5161 IRTemp max3210 = newTemp(Ity_V128);
5162 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
5163 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5164 IRTemp max76543210 = newTemp(Ity_V128);
5165 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
5166 IRTemp res = newTemp(Ity_V128);
5167 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
5168 return res;
5169 }
5170 case Iop_Min32Sx4: case Iop_Min32Ux4:
5171 case Iop_Max32Sx4: case Iop_Max32Ux4: {
5172 IRTemp x3210 = src;
5173 IRTemp x3232 = newTemp(Ity_V128);
5174 IRTemp x1010 = newTemp(Ity_V128);
5175 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
5176 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
5177 IRTemp x3333 = newTemp(Ity_V128);
5178 IRTemp x2222 = newTemp(Ity_V128);
5179 IRTemp x1111 = newTemp(Ity_V128);
5180 IRTemp x0000 = newTemp(Ity_V128);
5181 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
5182 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
5183 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
5184 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
5185 IRTemp max32 = newTemp(Ity_V128);
5186 IRTemp max10 = newTemp(Ity_V128);
5187 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
5188 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
5189 IRTemp max3210 = newTemp(Ity_V128);
5190 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5191 IRTemp res = newTemp(Ity_V128);
5192 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
5193 return res;
5194 }
5195 default:
5196 vassert(0);
5197 }
5198}
5199
5200
sewardj92d0ae32014-04-03 13:48:54 +00005201/* Generate IR for TBL and TBX. This deals with the 128 bit case
5202 only. */
5203static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
5204 IRTemp oor_values )
5205{
5206 vassert(len >= 0 && len <= 3);
5207
5208 /* Generate some useful constants as concisely as possible. */
5209 IRTemp half15 = newTemp(Ity_I64);
5210 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
5211 IRTemp half16 = newTemp(Ity_I64);
5212 assign(half16, mkU64(0x1010101010101010ULL));
5213
5214 /* A zero vector */
5215 IRTemp allZero = newTemp(Ity_V128);
5216 assign(allZero, mkV128(0x0000));
5217 /* A vector containing 15 in each 8-bit lane */
5218 IRTemp all15 = newTemp(Ity_V128);
5219 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
5220 /* A vector containing 16 in each 8-bit lane */
5221 IRTemp all16 = newTemp(Ity_V128);
5222 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
5223 /* A vector containing 32 in each 8-bit lane */
5224 IRTemp all32 = newTemp(Ity_V128);
5225 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
5226 /* A vector containing 48 in each 8-bit lane */
5227 IRTemp all48 = newTemp(Ity_V128);
5228 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
5229 /* A vector containing 64 in each 8-bit lane */
5230 IRTemp all64 = newTemp(Ity_V128);
5231 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
5232
5233 /* Group the 16/32/48/64 vectors so as to be indexable. */
5234 IRTemp allXX[4] = { all16, all32, all48, all64 };
5235
5236 /* Compute the result for each table vector, with zeroes in places
5237 where the index values are out of range, and OR them into the
5238 running vector. */
5239 IRTemp running_result = newTemp(Ity_V128);
5240 assign(running_result, mkV128(0));
5241
5242 UInt tabent;
5243 for (tabent = 0; tabent <= len; tabent++) {
5244 vassert(tabent >= 0 && tabent < 4);
5245 IRTemp bias = newTemp(Ity_V128);
5246 assign(bias,
5247 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
5248 IRTemp biased_indices = newTemp(Ity_V128);
5249 assign(biased_indices,
5250 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
5251 IRTemp valid_mask = newTemp(Ity_V128);
5252 assign(valid_mask,
5253 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
5254 IRTemp safe_biased_indices = newTemp(Ity_V128);
5255 assign(safe_biased_indices,
5256 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
5257 IRTemp results_or_junk = newTemp(Ity_V128);
5258 assign(results_or_junk,
5259 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
5260 mkexpr(safe_biased_indices)));
5261 IRTemp results_or_zero = newTemp(Ity_V128);
5262 assign(results_or_zero,
5263 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
5264 /* And OR that into the running result. */
5265 IRTemp tmp = newTemp(Ity_V128);
5266 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
5267 mkexpr(running_result)));
5268 running_result = tmp;
5269 }
5270
5271 /* So now running_result holds the overall result where the indices
5272 are in range, and zero in out-of-range lanes. Now we need to
5273 compute an overall validity mask and use this to copy in the
5274 lanes in the oor_values for out of range indices. This is
5275 unnecessary for TBL but will get folded out by iropt, so we lean
5276 on that and generate the same code for TBL and TBX here. */
5277 IRTemp overall_valid_mask = newTemp(Ity_V128);
5278 assign(overall_valid_mask,
5279 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
5280 IRTemp result = newTemp(Ity_V128);
5281 assign(result,
5282 binop(Iop_OrV128,
5283 mkexpr(running_result),
5284 binop(Iop_AndV128,
5285 mkexpr(oor_values),
5286 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
5287 return result;
5288}
5289
5290
sewardjbbcf1882014-01-12 12:49:10 +00005291static
5292Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
5293{
5294# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
5295
5296 /* ---------------- FMOV (general) ---------------- */
5297 /* case 30 23 20 18 15 9 4
5298 (1) 0 00 11110 00 1 00 111 000000 n d FMOV Sd, Wn
5299 (2) 1 00 11110 01 1 00 111 000000 n d FMOV Dd, Xn
5300 (3) 1 00 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
5301
5302 (4) 0 00 11110 00 1 00 110 000000 n d FMOV Wd, Sn
5303 (5) 1 00 11110 01 1 00 110 000000 n d FMOV Xd, Dn
5304 (6) 1 00 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
5305 */
5306 if (INSN(30,24) == BITS7(0,0,1,1,1,1,0)
5307 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5308 UInt sf = INSN(31,31);
5309 UInt ty = INSN(23,22); // type
5310 UInt rm = INSN(20,19); // rmode
5311 UInt op = INSN(18,16); // opcode
5312 UInt nn = INSN(9,5);
5313 UInt dd = INSN(4,0);
5314 UInt ix = 0; // case
5315 if (sf == 0) {
5316 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
5317 ix = 1;
5318 else
5319 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
5320 ix = 4;
5321 } else {
5322 vassert(sf == 1);
5323 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
5324 ix = 2;
5325 else
5326 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
5327 ix = 5;
5328 else
5329 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
5330 ix = 3;
5331 else
5332 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
5333 ix = 6;
5334 }
5335 if (ix > 0) {
5336 switch (ix) {
5337 case 1:
5338 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005339 putQRegLO(dd, getIReg32orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005340 DIP("fmov s%u, w%u\n", dd, nn);
5341 break;
5342 case 2:
5343 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005344 putQRegLO(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005345 DIP("fmov d%u, x%u\n", dd, nn);
5346 break;
5347 case 3:
sewardj606c4ba2014-01-26 19:11:14 +00005348 putQRegHI64(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005349 DIP("fmov v%u.d[1], x%u\n", dd, nn);
5350 break;
5351 case 4:
sewardj606c4ba2014-01-26 19:11:14 +00005352 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
sewardjbbcf1882014-01-12 12:49:10 +00005353 DIP("fmov w%u, s%u\n", dd, nn);
5354 break;
5355 case 5:
sewardj606c4ba2014-01-26 19:11:14 +00005356 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
sewardjbbcf1882014-01-12 12:49:10 +00005357 DIP("fmov x%u, d%u\n", dd, nn);
5358 break;
5359 case 6:
sewardj606c4ba2014-01-26 19:11:14 +00005360 putIReg64orZR(dd, getQRegHI64(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005361 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
5362 break;
5363 default:
5364 vassert(0);
5365 }
5366 return True;
5367 }
5368 /* undecodable; fall through */
5369 }
5370
5371 /* -------------- FMOV (scalar, immediate) -------------- */
5372 /* 31 28 23 20 12 9 4
5373 000 11110 00 1 imm8 100 00000 d FMOV Sd, #imm
5374 000 11110 01 1 imm8 100 00000 d FMOV Dd, #imm
5375 */
5376 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5377 && INSN(21,21) == 1 && INSN(12,5) == BITS8(1,0,0,0,0,0,0,0)) {
5378 Bool isD = INSN(22,22) == 1;
5379 UInt imm8 = INSN(20,13);
5380 UInt dd = INSN(4,0);
5381 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
5382 if (!isD) {
sewardjaeeb31d2014-01-12 18:23:45 +00005383 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
sewardjbbcf1882014-01-12 12:49:10 +00005384 }
5385 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005386 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
5387 DIP("fmov %s, #0x%llx\n",
5388 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
sewardjbbcf1882014-01-12 12:49:10 +00005389 return True;
5390 }
5391
sewardjfab09142014-02-10 10:28:13 +00005392 /* -------------- {FMOV,MOVI} (vector, immediate) -------------- */
5393 /* 31 28 18 15 11 9 4
5394 0q op 01111 00000 abc cmode 01 defgh d MOV Dd, #imm (q=0)
5395 MOV Vd.2d #imm (q=1)
5396 Allowable op:cmode
5397 FMOV = 1:1111
5398 MOVI = 0:xx00, 1:0x00, 1:10x0, 1:110x, 11110
5399 */
5400 if (INSN(31,31) == 0
5401 && INSN(28,19) == BITS10(0,1,1,1,1,0,0,0,0,0)
5402 && INSN(11,10) == BITS2(0,1)) {
5403 UInt bitQ = INSN(30,30);
5404 UInt bitOP = INSN(29,29);
5405 UInt cmode = INSN(15,12);
5406 UInt imm8 = (INSN(18,16) << 5) | INSN(9,5);
5407 UInt dd = INSN(4,0);
5408 ULong imm64lo = 0;
5409 UInt op_cmode = (bitOP << 4) | cmode;
5410 Bool ok = False;
5411 switch (op_cmode) {
5412 case BITS5(1,1,1,1,1): // 1:1111
5413 case BITS5(0,0,0,0,0): case BITS5(0,0,1,0,0):
5414 case BITS5(0,1,0,0,0): case BITS5(0,1,1,0,0): // 0:xx00
5415 case BITS5(1,0,0,0,0): case BITS5(1,0,1,0,0): // 1:0x00
5416 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
5417 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
5418 case BITS5(1,1,1,1,0): // 1:1110
5419 ok = True; break;
5420 default:
5421 break;
5422 }
5423 if (ok) {
5424 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, imm8);
5425 }
5426 if (ok) {
5427 ULong imm64hi = (bitQ == 0 && bitOP == 0) ? 0 : imm64lo;
5428 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(imm64hi), mkU64(imm64lo)));
sewardjbd83e982014-04-08 15:23:42 +00005429 DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
sewardjfab09142014-02-10 10:28:13 +00005430 return True;
5431 }
5432 /* else fall through */
5433 }
sewardjfab09142014-02-10 10:28:13 +00005434
sewardjbbcf1882014-01-12 12:49:10 +00005435 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
5436 /* 31 28 23 21 20 18 15 9 4 ix
5437 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn 0
5438 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn 1
5439 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn 2
5440 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn 3
5441
5442 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn 4
5443 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn 5
5444 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn 6
5445 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn 7
5446
5447 These are signed/unsigned conversion from integer registers to
5448 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
5449 */
5450 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,17) == BITS5(1,0,0,0,1)
5451 && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5452 Bool isI64 = INSN(31,31) == 1;
5453 Bool isF64 = INSN(22,22) == 1;
5454 Bool isU = INSN(16,16) == 1;
5455 UInt nn = INSN(9,5);
5456 UInt dd = INSN(4,0);
5457 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
5458 const IROp ops[8]
5459 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
5460 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
5461 IRExpr* src = getIRegOrZR(isI64, nn);
5462 IRExpr* res = (isF64 && !isI64)
5463 ? unop(ops[ix], src)
5464 : binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), src);
5465 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005466 putQRegLO(dd, res);
sewardjbbcf1882014-01-12 12:49:10 +00005467 DIP("%ccvtf %s, %s\n",
sewardj606c4ba2014-01-26 19:11:14 +00005468 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
sewardjbbcf1882014-01-12 12:49:10 +00005469 nameIRegOrZR(isI64, nn));
5470 return True;
5471 }
5472
sewardj5860ec72014-03-01 11:19:45 +00005473 /* ------------ F{ADD,SUB,MUL,DIV,NMUL} (scalar) ------------ */
sewardjbbcf1882014-01-12 12:49:10 +00005474 /* 31 23 20 15 11 9 4
5475 ---------------- 0000 ------ FMUL --------
5476 000 11110 001 m 0001 10 n d FDIV Sd,Sn,Sm
5477 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm
5478 ---------------- 0010 ------ FADD --------
5479 ---------------- 0011 ------ FSUB --------
5480 ---------------- 1000 ------ FNMUL --------
5481 */
5482 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5483 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5484 Bool isD = INSN(22,22) == 1;
5485 UInt mm = INSN(20,16);
5486 UInt op = INSN(15,12);
5487 UInt nn = INSN(9,5);
5488 UInt dd = INSN(4,0);
5489 IROp iop = Iop_INVALID;
5490 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005491 Bool neg = False;
5492 const HChar* nm = "???";
5493 switch (op) {
5494 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ty); break;
5495 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ty); break;
5496 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ty); break;
5497 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ty); break;
5498 case BITS4(1,0,0,0): nm = "fnmul"; iop = mkMULF(ty);
5499 neg = True; break;
5500 default: return False;
5501 }
5502 vassert(iop != Iop_INVALID);
5503 IRExpr* resE = triop(iop, mkexpr(mk_get_IR_rounding_mode()),
sewardj606c4ba2014-01-26 19:11:14 +00005504 getQRegLO(nn, ty), getQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005505 IRTemp res = newTemp(ty);
5506 assign(res, neg ? unop(mkNEGF(ty),resE) : resE);
5507 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005508 putQRegLO(dd, mkexpr(res));
sewardjbbcf1882014-01-12 12:49:10 +00005509 DIP("%s %s, %s, %s\n",
sewardj606c4ba2014-01-26 19:11:14 +00005510 nm, nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005511 return True;
5512 }
5513
5514 /* ------------ F{MOV,ABS,NEG,SQRT} D/D or S/S ------------ */
5515 /* 31 23 21 16 14 9 4
5516 000 11110 00 10000 00 10000 n d FMOV Sd, Sn
5517 000 11110 01 10000 00 10000 n d FMOV Dd, Dn
5518 ------------------ 01 --------- FABS ------
5519 ------------------ 10 --------- FNEG ------
sewardjfab09142014-02-10 10:28:13 +00005520 ------------------ 11 --------- FSQRT -----
sewardjbbcf1882014-01-12 12:49:10 +00005521 */
5522 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5523 && INSN(21,17) == BITS5(1,0,0,0,0)
5524 && INSN(14,10) == BITS5(1,0,0,0,0)) {
5525 Bool isD = INSN(22,22) == 1;
5526 UInt opc = INSN(16,15);
5527 UInt nn = INSN(9,5);
5528 UInt dd = INSN(4,0);
5529 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005530 IRTemp res = newTemp(ty);
5531 if (opc == BITS2(0,0)) {
sewardj606c4ba2014-01-26 19:11:14 +00005532 assign(res, getQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005533 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005534 putQRegLO(dd, mkexpr(res));
5535 DIP("fmov %s, %s\n",
5536 nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005537 return True;
5538 }
5539 if (opc == BITS2(1,0) || opc == BITS2(0,1)) {
5540 Bool isAbs = opc == BITS2(0,1);
5541 IROp op = isAbs ? mkABSF(ty) : mkNEGF(ty);
sewardj606c4ba2014-01-26 19:11:14 +00005542 assign(res, unop(op, getQRegLO(nn, ty)));
sewardjbbcf1882014-01-12 12:49:10 +00005543 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005544 putQRegLO(dd, mkexpr(res));
sewardjbbcf1882014-01-12 12:49:10 +00005545 DIP("%s %s, %s\n", isAbs ? "fabs" : "fneg",
sewardj606c4ba2014-01-26 19:11:14 +00005546 nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005547 return True;
5548 }
5549 if (opc == BITS2(1,1)) {
5550 assign(res,
5551 binop(mkSQRTF(ty),
sewardj606c4ba2014-01-26 19:11:14 +00005552 mkexpr(mk_get_IR_rounding_mode()), getQRegLO(nn, ty)));
sewardjbbcf1882014-01-12 12:49:10 +00005553 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005554 putQRegLO(dd, mkexpr(res));
5555 DIP("fsqrt %s, %s\n", nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005556 return True;
5557 }
5558 /* else fall through; other cases are ATC */
5559 }
5560
sewardjfab09142014-02-10 10:28:13 +00005561 /* ---------------- F{ABS,NEG} (vector) ---------------- */
5562 /* 31 28 22 21 16 9 4
5563 0q0 01110 1 sz 10000 01111 10 n d FABS Vd.T, Vn.T
5564 0q1 01110 1 sz 10000 01111 10 n d FNEG Vd.T, Vn.T
5565 */
5566 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,1)
5567 && INSN(21,17) == BITS5(1,0,0,0,0)
5568 && INSN(16,10) == BITS7(0,1,1,1,1,1,0)) {
5569 UInt bitQ = INSN(30,30);
5570 UInt bitSZ = INSN(22,22);
5571 Bool isFNEG = INSN(29,29) == 1;
5572 UInt nn = INSN(9,5);
5573 UInt dd = INSN(4,0);
5574 const HChar* ar = "??";
5575 IRType tyF = Ity_INVALID;
5576 Bool zeroHI = False;
5577 Bool ok = getLaneInfo_Q_SZ(NULL, &tyF, NULL, &zeroHI, &ar,
5578 (Bool)bitQ, (Bool)bitSZ);
5579 if (ok) {
sewardj32d86752014-03-02 12:47:18 +00005580 vassert(tyF == Ity_F64 || tyF == Ity_F32);
sewardjfab09142014-02-10 10:28:13 +00005581 IROp op = (tyF == Ity_F64) ? (isFNEG ? Iop_Neg64Fx2 : Iop_Abs64Fx2)
5582 : (isFNEG ? Iop_Neg32Fx4 : Iop_Abs32Fx4);
5583 IRTemp res = newTemp(Ity_V128);
5584 assign(res, unop(op, getQReg128(nn)));
5585 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
5586 : mkexpr(res));
5587 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
5588 nameQReg128(dd), ar, nameQReg128(nn), ar);
5589 return True;
5590 }
5591 /* else fall through */
5592 }
5593
sewardjbbcf1882014-01-12 12:49:10 +00005594 /* -------------------- FCMP,FCMPE -------------------- */
5595 /* 31 23 20 15 9 4
5596 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
5597 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
5598 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
5599 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
5600
5601 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
5602 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
5603 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
5604 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
5605
5606 FCMPE generates Invalid Operation exn if either arg is any kind
5607 of NaN. FCMP generates Invalid Operation exn if either arg is a
5608 signalling NaN. We ignore this detail here and produce the same
5609 IR for both.
5610 */
5611 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) && INSN(21,21) == 1
5612 && INSN(15,10) == BITS6(0,0,1,0,0,0) && INSN(2,0) == BITS3(0,0,0)) {
5613 Bool isD = INSN(22,22) == 1;
5614 UInt mm = INSN(20,16);
5615 UInt nn = INSN(9,5);
5616 Bool isCMPE = INSN(4,4) == 1;
5617 Bool cmpZero = INSN(3,3) == 1;
5618 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005619 Bool valid = True;
5620 if (cmpZero && mm != 0) valid = False;
5621 if (valid) {
5622 IRTemp argL = newTemp(ty);
5623 IRTemp argR = newTemp(ty);
5624 IRTemp irRes = newTemp(Ity_I32);
sewardj606c4ba2014-01-26 19:11:14 +00005625 assign(argL, getQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005626 assign(argR,
5627 cmpZero
5628 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
sewardj606c4ba2014-01-26 19:11:14 +00005629 : getQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005630 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
5631 mkexpr(argL), mkexpr(argR)));
5632 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
5633 IRTemp nzcv_28x0 = newTemp(Ity_I64);
5634 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
5635 setFlags_COPY(nzcv_28x0);
sewardj606c4ba2014-01-26 19:11:14 +00005636 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ty),
5637 cmpZero ? "#0.0" : nameQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005638 return True;
5639 }
5640 }
5641
5642 /* -------------------- F{N}M{ADD,SUB} -------------------- */
5643 /* 31 22 20 15 14 9 4 ix
5644 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
5645 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
5646 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
5647 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
5648 where Fx=Dx when sz=1, Fx=Sx when sz=0
5649
5650 -----SPEC------ ----IMPL----
5651 fmadd a + n * m a + n * m
5652 fmsub a + (-n) * m a - n * m
5653 fnmadd (-a) + (-n) * m -(a + n * m)
5654 fnmsub (-a) + n * m -(a - n * m)
5655 */
5656 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,1,0)) {
5657 Bool isD = INSN(22,22) == 1;
5658 UInt mm = INSN(20,16);
5659 UInt aa = INSN(14,10);
5660 UInt nn = INSN(9,5);
5661 UInt dd = INSN(4,0);
5662 UInt ix = (INSN(21,21) << 1) | INSN(15,15);
5663 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005664 IROp opADD = mkADDF(ty);
5665 IROp opSUB = mkSUBF(ty);
5666 IROp opMUL = mkMULF(ty);
5667 IROp opNEG = mkNEGF(ty);
5668 IRTemp res = newTemp(ty);
sewardj606c4ba2014-01-26 19:11:14 +00005669 IRExpr* eA = getQRegLO(aa, ty);
5670 IRExpr* eN = getQRegLO(nn, ty);
5671 IRExpr* eM = getQRegLO(mm, ty);
sewardjbbcf1882014-01-12 12:49:10 +00005672 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
5673 IRExpr* eNxM = triop(opMUL, rm, eN, eM);
5674 switch (ix) {
5675 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
5676 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
5677 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
5678 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
5679 default: vassert(0);
5680 }
5681 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005682 putQRegLO(dd, mkexpr(res));
sewardjbbcf1882014-01-12 12:49:10 +00005683 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
5684 DIP("%s %s, %s, %s, %s\n",
sewardj606c4ba2014-01-26 19:11:14 +00005685 names[ix], nameQRegLO(dd, ty), nameQRegLO(nn, ty),
5686 nameQRegLO(mm, ty), nameQRegLO(aa, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005687 return True;
5688 }
5689
5690 /* -------- FCVT{N,P,M,Z}{S,U} (scalar, integer) -------- */
5691 /* 30 23 20 18 15 9 4
5692 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
5693 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
5694 ---------------- 01 -------------- FCVTP-------- (round to +inf)
5695 ---------------- 10 -------------- FCVTM-------- (round to -inf)
5696 ---------------- 11 -------------- FCVTZ-------- (round to zero)
5697
5698 Rd is Xd when sf==1, Wd when sf==0
5699 Fn is Dn when x==1, Sn when x==0
5700 20:19 carry the rounding mode, using the same encoding as FPCR
5701 */
5702 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,21) == 1
5703 && INSN(18,17) == BITS2(0,0) && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5704 Bool isI64 = INSN(31,31) == 1;
5705 Bool isF64 = INSN(22,22) == 1;
5706 UInt rm = INSN(20,19);
5707 Bool isU = INSN(16,16) == 1;
5708 UInt nn = INSN(9,5);
5709 UInt dd = INSN(4,0);
5710 /* Decide on the IR rounding mode to use. */
5711 IRRoundingMode irrm = 8; /*impossible*/
5712 HChar ch = '?';
5713 switch (rm) {
5714 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
5715 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
5716 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
5717 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
5718 default: vassert(0);
5719 }
5720 vassert(irrm != 8);
5721 /* Decide on the conversion primop, based on the source size,
5722 dest size and signedness (8 possibilities). Case coding:
5723 F32 ->s I32 0
5724 F32 ->u I32 1
5725 F32 ->s I64 2
5726 F32 ->u I64 3
5727 F64 ->s I32 4
5728 F64 ->u I32 5
5729 F64 ->s I64 6
5730 F64 ->u I64 7
5731 */
5732 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
5733 vassert(ix < 8);
5734 const IROp ops[8]
5735 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
5736 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
5737 IROp op = ops[ix];
5738 // A bit of ATCery: bounce all cases we haven't seen an example of.
5739 if (/* F32toI32S */
5740 (op == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
sewardj1eaaec22014-03-07 22:52:19 +00005741 || (op == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
5742 || (op == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
sewardjbbcf1882014-01-12 12:49:10 +00005743 /* F32toI32U */
sewardj1eaaec22014-03-07 22:52:19 +00005744 || (op == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
5745 || (op == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
sewardjbbcf1882014-01-12 12:49:10 +00005746 /* F32toI64S */
sewardj1eaaec22014-03-07 22:52:19 +00005747 || (op == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
sewardjbbcf1882014-01-12 12:49:10 +00005748 /* F32toI64U */
5749 || (op == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
5750 /* F64toI32S */
5751 || (op == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
5752 || (op == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
5753 || (op == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
5754 /* F64toI32U */
sewardjbbcf1882014-01-12 12:49:10 +00005755 || (op == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
sewardj1eaaec22014-03-07 22:52:19 +00005756 || (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
5757 || (op == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
sewardjbbcf1882014-01-12 12:49:10 +00005758 /* F64toI64S */
5759 || (op == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
sewardj1eaaec22014-03-07 22:52:19 +00005760 || (op == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
5761 || (op == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
sewardjbbcf1882014-01-12 12:49:10 +00005762 /* F64toI64U */
5763 || (op == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
sewardj1eaaec22014-03-07 22:52:19 +00005764 || (op == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
sewardjbbcf1882014-01-12 12:49:10 +00005765 ) {
5766 /* validated */
5767 } else {
5768 return False;
5769 }
sewardjbbcf1882014-01-12 12:49:10 +00005770 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
5771 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
5772 IRTemp src = newTemp(srcTy);
5773 IRTemp dst = newTemp(dstTy);
sewardj606c4ba2014-01-26 19:11:14 +00005774 assign(src, getQRegLO(nn, srcTy));
sewardjbbcf1882014-01-12 12:49:10 +00005775 assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
5776 putIRegOrZR(isI64, dd, mkexpr(dst));
5777 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
sewardj606c4ba2014-01-26 19:11:14 +00005778 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
sewardjbbcf1882014-01-12 12:49:10 +00005779 return True;
5780 }
5781
sewardj1eaaec22014-03-07 22:52:19 +00005782 /* -------- FCVTAS (KLUDGED) (scalar, integer) -------- */
5783 /* 30 23 20 18 15 9 4
5784 1 00 11110 0x 1 00 100 000000 n d FCVTAS Xd, Fn
5785 0 00 11110 0x 1 00 100 000000 n d FCVTAS Wd, Fn
5786 Fn is Dn when x==1, Sn when x==0
5787 */
5788 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0)
5789 && INSN(21,16) == BITS6(1,0,0,1,0,0)
5790 && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5791 Bool isI64 = INSN(31,31) == 1;
5792 Bool isF64 = INSN(22,22) == 1;
5793 UInt nn = INSN(9,5);
5794 UInt dd = INSN(4,0);
5795 /* Decide on the IR rounding mode to use. */
5796 /* KLUDGE: should be Irrm_NEAREST_TIE_AWAY_0 */
5797 IRRoundingMode irrm = Irrm_NEAREST;
5798 /* Decide on the conversion primop. */
5799 IROp op = isI64 ? (isF64 ? Iop_F64toI64S : Iop_F32toI64S)
5800 : (isF64 ? Iop_F64toI32S : Iop_F32toI32S);
5801 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
5802 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
5803 IRTemp src = newTemp(srcTy);
5804 IRTemp dst = newTemp(dstTy);
5805 assign(src, getQRegLO(nn, srcTy));
5806 assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
5807 putIRegOrZR(isI64, dd, mkexpr(dst));
5808 DIP("fcvtas %s, %s (KLUDGED)\n",
5809 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
5810 return True;
5811 }
5812
sewardjbbcf1882014-01-12 12:49:10 +00005813 /* ---------------- FRINT{I,M,P,Z} (scalar) ---------------- */
5814 /* 31 23 21 17 14 9 4
5815 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
5816 rm
5817 x==0 => S-registers, x==1 => D-registers
5818 rm (17:15) encodings:
5819 111 per FPCR (FRINTI)
5820 001 +inf (FRINTP)
5821 010 -inf (FRINTM)
5822 011 zero (FRINTZ)
5823 000 tieeven
sewardj1eaaec22014-03-07 22:52:19 +00005824 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
sewardjbbcf1882014-01-12 12:49:10 +00005825 110 per FPCR + "exact = TRUE"
5826 101 unallocated
5827 */
5828 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5829 && INSN(21,18) == BITS4(1,0,0,1) && INSN(14,10) == BITS5(1,0,0,0,0)) {
5830 Bool isD = INSN(22,22) == 1;
5831 UInt rm = INSN(17,15);
5832 UInt nn = INSN(9,5);
5833 UInt dd = INSN(4,0);
5834 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005835 IRExpr* irrmE = NULL;
5836 UChar ch = '?';
5837 switch (rm) {
5838 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
5839 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
5840 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
sewardj1eaaec22014-03-07 22:52:19 +00005841 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
5842 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
sewardjbbcf1882014-01-12 12:49:10 +00005843 default: break;
5844 }
5845 if (irrmE) {
5846 IRTemp src = newTemp(ty);
5847 IRTemp dst = newTemp(ty);
sewardj606c4ba2014-01-26 19:11:14 +00005848 assign(src, getQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005849 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
5850 irrmE, mkexpr(src)));
5851 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005852 putQRegLO(dd, mkexpr(dst));
5853 DIP("frint%c %s, %s\n",
5854 ch, nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005855 return True;
5856 }
5857 /* else unhandled rounding mode case -- fall through */
5858 }
5859
5860 /* ------------------ FCVT (scalar) ------------------ */
5861 /* 31 23 21 16 14 9 4
5862 000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp)
5863 --------- 11 ----- 01 --------- FCVT Dd, Hn (unimp)
5864 --------- 00 ----- 11 --------- FCVT Hd, Sn (unimp)
sewardj1eaaec22014-03-07 22:52:19 +00005865 --------- 00 ----- 01 --------- FCVT Dd, Sn
sewardjbbcf1882014-01-12 12:49:10 +00005866 --------- 01 ----- 11 --------- FCVT Hd, Dn (unimp)
sewardj1eaaec22014-03-07 22:52:19 +00005867 --------- 01 ----- 00 --------- FCVT Sd, Dn
sewardjbbcf1882014-01-12 12:49:10 +00005868 Rounding, when dst is smaller than src, is per the FPCR.
5869 */
5870 if (INSN(31,24) == BITS8(0,0,0,1,1,1,1,0)
5871 && INSN(21,17) == BITS5(1,0,0,0,1)
5872 && INSN(14,10) == BITS5(1,0,0,0,0)) {
5873 UInt b2322 = INSN(23,22);
5874 UInt b1615 = INSN(16,15);
5875 UInt nn = INSN(9,5);
5876 UInt dd = INSN(4,0);
5877 if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) {
5878 /* Convert S to D */
5879 IRTemp res = newTemp(Ity_F64);
sewardj606c4ba2014-01-26 19:11:14 +00005880 assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32)));
sewardjbbcf1882014-01-12 12:49:10 +00005881 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005882 putQRegLO(dd, mkexpr(res));
5883 DIP("fcvt %s, %s\n",
5884 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32));
sewardjbbcf1882014-01-12 12:49:10 +00005885 return True;
5886 }
5887 if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) {
5888 /* Convert D to S */
5889 IRTemp res = newTemp(Ity_F32);
5890 assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()),
sewardj606c4ba2014-01-26 19:11:14 +00005891 getQRegLO(nn, Ity_F64)));
sewardjbbcf1882014-01-12 12:49:10 +00005892 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005893 putQRegLO(dd, mkexpr(res));
5894 DIP("fcvt %s, %s\n",
5895 nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64));
sewardjbbcf1882014-01-12 12:49:10 +00005896 return True;
5897 }
5898 /* else unhandled */
5899 }
5900
5901 /* ------------------ FABD (scalar) ------------------ */
5902 /* 31 23 20 15 9 4
5903 011 11110 111 m 110101 n d FABD Dd, Dn, Dm
5904 011 11110 101 m 110101 n d FABD Sd, Sn, Sm
5905 */
5906 if (INSN(31,23) == BITS9(0,1,1,1,1,1,1,0,1) && INSN(21,21) == 1
5907 && INSN(15,10) == BITS6(1,1,0,1,0,1)) {
5908 Bool isD = INSN(22,22) == 1;
5909 UInt mm = INSN(20,16);
5910 UInt nn = INSN(9,5);
5911 UInt dd = INSN(4,0);
5912 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005913 IRTemp res = newTemp(ty);
sewardj606c4ba2014-01-26 19:11:14 +00005914 assign(res, unop(mkABSF(ty),
5915 triop(mkSUBF(ty),
5916 mkexpr(mk_get_IR_rounding_mode()),
5917 getQRegLO(nn,ty), getQRegLO(mm,ty))));
sewardjbbcf1882014-01-12 12:49:10 +00005918 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005919 putQRegLO(dd, mkexpr(res));
sewardjbbcf1882014-01-12 12:49:10 +00005920 DIP("fabd %s, %s, %s\n",
sewardj606c4ba2014-01-26 19:11:14 +00005921 nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005922 return True;
5923 }
5924
sewardj606c4ba2014-01-26 19:11:14 +00005925 /* -------------- {S,U}CVTF (vector, integer) -------------- */
5926 /* 31 28 22 21 15 9 4
5927 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
5928 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
5929 with laneage:
5930 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
5931 */
5932 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,0)
5933 && INSN(21,16) == BITS6(1,0,0,0,0,1)
5934 && INSN(15,10) == BITS6(1,1,0,1,1,0)) {
5935 Bool isQ = INSN(30,30) == 1;
5936 Bool isU = INSN(29,29) == 1;
5937 Bool isF64 = INSN(22,22) == 1;
5938 UInt nn = INSN(9,5);
5939 UInt dd = INSN(4,0);
5940 if (isQ || !isF64) {
5941 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
5942 UInt nLanes = 0;
5943 Bool zeroHI = False;
5944 const HChar* arrSpec = NULL;
5945 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
5946 isQ, isF64 );
5947 IROp op = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
5948 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
5949 IRTemp rm = mk_get_IR_rounding_mode();
5950 UInt i;
5951 vassert(ok); /* the 'if' above should ensure this */
5952 for (i = 0; i < nLanes; i++) {
5953 putQRegLane(dd, i,
5954 binop(op, mkexpr(rm), getQRegLane(nn, i, tyI)));
5955 }
5956 if (zeroHI) {
5957 putQRegLane(dd, 1, mkU64(0));
5958 }
5959 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
5960 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
5961 return True;
5962 }
5963 /* else fall through */
5964 }
5965
5966 /* ---------- F{ADD,SUB,MUL,DIV,MLA,MLS} (vector) ---------- */
5967 /* 31 28 22 21 20 15 9 4 case
5968 0q0 01110 0 sz 1 m 110101 n d FADD Vd,Vn,Vm 1
5969 0q0 01110 1 sz 1 m 110101 n d FSUB Vd,Vn,Vm 2
5970 0q1 01110 0 sz 1 m 110111 n d FMUL Vd,Vn,Vm 3
5971 0q1 01110 0 sz 1 m 111111 n d FDIV Vd,Vn,Vm 4
5972 0q0 01110 0 sz 1 m 110011 n d FMLA Vd,Vn,Vm 5
5973 0q0 01110 1 sz 1 m 110011 n d FMLS Vd,Vn,Vm 6
sewardje520bb32014-02-17 11:00:53 +00005974 0q1 01110 1 sz 1 m 110101 n d FABD Vd,Vn,Vm 7
sewardj606c4ba2014-01-26 19:11:14 +00005975 */
5976 if (INSN(31,31) == 0
5977 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) {
5978 Bool isQ = INSN(30,30) == 1;
5979 UInt b29 = INSN(29,29);
5980 UInt b23 = INSN(23,23);
5981 Bool isF64 = INSN(22,22) == 1;
5982 UInt mm = INSN(20,16);
5983 UInt b1510 = INSN(15,10);
5984 UInt nn = INSN(9,5);
5985 UInt dd = INSN(4,0);
5986 UInt ix = 0;
5987 /**/ if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,1,0,1)) ix = 1;
5988 else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 2;
5989 else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,0,1,1,1)) ix = 3;
5990 else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,1,1,1,1)) ix = 4;
5991 else if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,0,1,1)) ix = 5;
5992 else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,0,1,1)) ix = 6;
sewardje520bb32014-02-17 11:00:53 +00005993 else if (b29 == 1 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 7;
sewardj606c4ba2014-01-26 19:11:14 +00005994 IRType laneTy = Ity_INVALID;
5995 Bool zeroHI = False;
5996 const HChar* arr = "??";
5997 Bool ok
5998 = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64);
5999 /* Skip MLA/MLS for the time being */
6000 if (ok && ix >= 1 && ix <= 4) {
6001 const IROp ops64[4]
6002 = { Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2 };
6003 const IROp ops32[4]
6004 = { Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4 };
6005 const HChar* names[4]
6006 = { "fadd", "fsub", "fmul", "fdiv" };
6007 IROp op = laneTy==Ity_F64 ? ops64[ix-1] : ops32[ix-1];
6008 IRTemp rm = mk_get_IR_rounding_mode();
6009 IRTemp t1 = newTemp(Ity_V128);
6010 IRTemp t2 = newTemp(Ity_V128);
6011 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjecde6972014-02-05 11:01:19 +00006012 assign(t2, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t1))
6013 : mkexpr(t1));
sewardj606c4ba2014-01-26 19:11:14 +00006014 putQReg128(dd, mkexpr(t2));
6015 DIP("%s %s.%s, %s.%s, %s.%s\n", names[ix-1],
6016 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6017 return True;
6018 }
sewardjfab09142014-02-10 10:28:13 +00006019 if (ok && ix >= 5 && ix <= 6) {
6020 IROp opADD = laneTy==Ity_F64 ? Iop_Add64Fx2 : Iop_Add32Fx4;
6021 IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
6022 IROp opMUL = laneTy==Ity_F64 ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
6023 IRTemp rm = mk_get_IR_rounding_mode();
6024 IRTemp t1 = newTemp(Ity_V128);
6025 IRTemp t2 = newTemp(Ity_V128);
6026 // FIXME: double rounding; use FMA primops instead
6027 assign(t1, triop(opMUL,
6028 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
6029 assign(t2, triop(ix == 5 ? opADD : opSUB,
6030 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
sewardje520bb32014-02-17 11:00:53 +00006031 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
6032 : mkexpr(t2));
sewardjfab09142014-02-10 10:28:13 +00006033 DIP("%s %s.%s, %s.%s, %s.%s\n", ix == 5 ? "fmla" : "fmls",
6034 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6035 return True;
6036 }
sewardje520bb32014-02-17 11:00:53 +00006037 if (ok && ix == 7) {
6038 IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
6039 IROp opABS = laneTy==Ity_F64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
6040 IRTemp rm = mk_get_IR_rounding_mode();
6041 IRTemp t1 = newTemp(Ity_V128);
6042 IRTemp t2 = newTemp(Ity_V128);
6043 // FIXME: use Abd primop instead?
6044 assign(t1, triop(opSUB,
6045 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
6046 assign(t2, unop(opABS, mkexpr(t1)));
6047 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
6048 : mkexpr(t2));
6049 DIP("fabd %s.%s, %s.%s, %s.%s\n",
6050 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6051 return True;
6052 }
sewardj606c4ba2014-01-26 19:11:14 +00006053 }
6054
sewardj2bd1ffe2014-03-27 18:59:00 +00006055 /* ------------ FCM{EQ,GE,GT}, FAC{GE,GT} (vector) ------------ */
6056 /* 31 28 22 20 15 9 4 case
6057 0q1 01110 0 sz 1 m 111011 n d FACGE Vd, Vn, Vm
6058 0q1 01110 1 sz 1 m 111011 n d FACGT Vd, Vn, Vm
6059 0q0 01110 0 sz 1 m 111001 n d FCMEQ Vd, Vn, Vm
6060 0q1 01110 0 sz 1 m 111001 n d FCMGE Vd, Vn, Vm
6061 0q1 01110 1 sz 1 m 111001 n d FCMGT Vd, Vn, Vm
6062 */
6063 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1
6064 && INSN(15,12) == BITS4(1,1,1,0) && INSN(10,10) == 1) {
6065 Bool isQ = INSN(30,30) == 1;
6066 UInt U = INSN(29,29);
6067 UInt E = INSN(23,23);
6068 Bool isF64 = INSN(22,22) == 1;
6069 UInt ac = INSN(11,11);
6070 UInt mm = INSN(20,16);
6071 UInt nn = INSN(9,5);
6072 UInt dd = INSN(4,0);
6073 /* */
6074 UInt EUac = (E << 2) | (U << 1) | ac;
6075 IROp opABS = Iop_INVALID;
6076 IROp opCMP = Iop_INVALID;
6077 IRType laneTy = Ity_INVALID;
6078 Bool zeroHI = False;
6079 Bool swap = True;
6080 const HChar* arr = "??";
6081 const HChar* nm = "??";
6082 Bool ok
6083 = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64);
6084 if (ok) {
6085 vassert((isF64 && laneTy == Ity_F64) || (!isF64 && laneTy == Ity_F32));
6086 switch (EUac) {
6087 case BITS3(0,0,0):
6088 nm = "fcmeq";
6089 opCMP = isF64 ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
6090 swap = False;
6091 break;
6092 case BITS3(0,1,0):
6093 nm = "fcmge";
6094 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6095 break;
6096 case BITS3(0,1,1):
6097 nm = "facge";
6098 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6099 opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
6100 break;
6101 case BITS3(1,1,0):
6102 nm = "fcmgt";
6103 opCMP = isF64 ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
6104 break;
6105 case BITS3(1,1,1):
6106 nm = "fcagt";
6107 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6108 opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
6109 break;
6110 default:
6111 break;
6112 }
6113 }
6114 if (opCMP != Iop_INVALID) {
6115 IRExpr* argN = getQReg128(nn);
6116 IRExpr* argM = getQReg128(mm);
6117 if (opABS != Iop_INVALID) {
6118 argN = unop(opABS, argN);
6119 argM = unop(opABS, argM);
6120 }
6121 IRExpr* res = swap ? binop(opCMP, argM, argN)
6122 : binop(opCMP, argN, argM);
6123 if (zeroHI) {
6124 res = unop(Iop_ZeroHI64ofV128, res);
6125 }
6126 putQReg128(dd, res);
6127 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6128 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6129 return True;
6130 }
6131 /* else fall through */
6132 }
6133
sewardj32d86752014-03-02 12:47:18 +00006134 /* -------------------- FCVTN -------------------- */
6135 /* 31 28 23 20 15 9 4
6136 0q0 01110 0s1 00001 011010 n d FCVTN Vd, Vn
6137 where case q:s of 00: 16Fx4(lo) <- 32Fx4
6138 01: 32Fx2(lo) <- 64Fx2
6139 10: 16Fx4(hi) <- 32Fx4
6140 11: 32Fx2(hi) <- 64Fx2
6141 Only deals with the 32Fx2 <- 64Fx2 version (s==1)
6142 */
6143 if (INSN(31,31) == 0 && INSN(29,23) == BITS7(0,0,1,1,1,0,0)
6144 && INSN(21,10) == BITS12(1,0,0,0,0,1,0,1,1,0,1,0)) {
6145 UInt bQ = INSN(30,30);
6146 UInt bS = INSN(22,22);
6147 UInt nn = INSN(9,5);
6148 UInt dd = INSN(4,0);
6149 if (bS == 1) {
6150 IRTemp rm = mk_get_IR_rounding_mode();
6151 IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64);
6152 IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64);
6153 putQRegLane(dd, 2 * bQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo));
6154 putQRegLane(dd, 2 * bQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi));
6155 if (bQ == 0) {
6156 putQRegLane(dd, 1, mkU64(0));
6157 }
6158 DIP("fcvtn%s %s.%s, %s.2d\n", bQ ? "2" : "",
6159 nameQReg128(dd), bQ ? "4s" : "2s", nameQReg128(nn));
6160 return True;
6161 }
6162 /* else fall through */
6163 }
6164
sewardj606c4ba2014-01-26 19:11:14 +00006165 /* ---------------- ADD/SUB (vector) ---------------- */
6166 /* 31 28 23 21 20 15 9 4
6167 0q0 01110 size 1 m 100001 n d ADD Vd.T, Vn.T, Vm.T
6168 0q1 01110 size 1 m 100001 n d SUB Vd.T, Vn.T, Vm.T
6169 */
6170 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6171 && INSN(21,21) == 1 && INSN(15,10) == BITS6(1,0,0,0,0,1)) {
6172 Bool isQ = INSN(30,30) == 1;
6173 UInt szBlg2 = INSN(23,22);
6174 Bool isSUB = INSN(29,29) == 1;
6175 UInt mm = INSN(20,16);
6176 UInt nn = INSN(9,5);
6177 UInt dd = INSN(4,0);
6178 Bool zeroHI = False;
6179 const HChar* arrSpec = "";
6180 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6181 if (ok) {
sewardjf5b08912014-02-06 12:57:58 +00006182 const IROp opsADD[4]
sewardj606c4ba2014-01-26 19:11:14 +00006183 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
sewardjf5b08912014-02-06 12:57:58 +00006184 const IROp opsSUB[4]
sewardj606c4ba2014-01-26 19:11:14 +00006185 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
6186 vassert(szBlg2 < 4);
sewardjf5b08912014-02-06 12:57:58 +00006187 IROp op = isSUB ? opsSUB[szBlg2] : opsADD[szBlg2];
6188 IRTemp t = newTemp(Ity_V128);
sewardj606c4ba2014-01-26 19:11:14 +00006189 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardjecde6972014-02-05 11:01:19 +00006190 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t))
6191 : mkexpr(t));
sewardj606c4ba2014-01-26 19:11:14 +00006192 const HChar* nm = isSUB ? "sub" : "add";
6193 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6194 nameQReg128(dd), arrSpec,
6195 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6196 return True;
6197 }
6198 /* else fall through */
6199 }
6200
sewardjecde6972014-02-05 11:01:19 +00006201 /* ---------------- ADD/SUB (scalar) ---------------- */
6202 /* 31 28 23 21 20 15 9 4
6203 010 11110 11 1 m 100001 n d ADD Dd, Dn, Dm
6204 011 11110 11 1 m 100001 n d SUB Dd, Dn, Dm
6205 */
6206 if (INSN(31,30) == BITS2(0,1) && INSN(28,21) == BITS8(1,1,1,1,0,1,1,1)
6207 && INSN(15,10) == BITS6(1,0,0,0,0,1)) {
6208 Bool isSUB = INSN(29,29) == 1;
6209 UInt mm = INSN(20,16);
6210 UInt nn = INSN(9,5);
6211 UInt dd = INSN(4,0);
6212 IRTemp res = newTemp(Ity_I64);
6213 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
6214 getQRegLane(nn, 0, Ity_I64),
6215 getQRegLane(mm, 0, Ity_I64)));
6216 putQRegLane(dd, 0, mkexpr(res));
6217 putQRegLane(dd, 1, mkU64(0));
6218 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
6219 nameQRegLO(dd, Ity_I64),
6220 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
6221 return True;
6222 }
6223
sewardjf5b08912014-02-06 12:57:58 +00006224 /* ------------ MUL/PMUL/MLA/MLS (vector) ------------ */
6225 /* 31 28 23 21 20 15 9 4
6226 0q0 01110 size 1 m 100111 n d MUL Vd.T, Vn.T, Vm.T B/H/S only
6227 0q1 01110 size 1 m 100111 n d PMUL Vd.T, Vn.T, Vm.T B only
6228 0q0 01110 size 1 m 100101 n d MLA Vd.T, Vn.T, Vm.T B/H/S only
6229 0q1 01110 size 1 m 100101 n d MLS Vd.T, Vn.T, Vm.T B/H/S only
6230 */
6231 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6232 && INSN(21,21) == 1
6233 && (INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(1,0,0,1,0,1)) {
6234 Bool isQ = INSN(30,30) == 1;
6235 UInt szBlg2 = INSN(23,22);
6236 UInt bit29 = INSN(29,29);
6237 UInt mm = INSN(20,16);
6238 UInt nn = INSN(9,5);
6239 UInt dd = INSN(4,0);
6240 Bool isMLAS = INSN(11,11) == 0;
6241 const IROp opsADD[4]
6242 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_INVALID };
6243 const IROp opsSUB[4]
6244 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_INVALID };
6245 const IROp opsMUL[4]
6246 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
6247 const IROp opsPMUL[4]
6248 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
6249 /* Set opMUL and, if necessary, opACC. A result value of
6250 Iop_INVALID for opMUL indicates that the instruction is
6251 invalid. */
6252 Bool zeroHI = False;
6253 const HChar* arrSpec = "";
6254 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6255 vassert(szBlg2 < 4);
6256 IROp opACC = Iop_INVALID;
6257 IROp opMUL = Iop_INVALID;
6258 if (ok) {
6259 opMUL = (bit29 == 1 && !isMLAS) ? opsPMUL[szBlg2]
6260 : opsMUL[szBlg2];
6261 opACC = isMLAS ? (bit29 == 1 ? opsSUB[szBlg2] : opsADD[szBlg2])
6262 : Iop_INVALID;
6263 }
6264 if (ok && opMUL != Iop_INVALID) {
6265 IRTemp t1 = newTemp(Ity_V128);
6266 assign(t1, binop(opMUL, getQReg128(nn), getQReg128(mm)));
6267 IRTemp t2 = newTemp(Ity_V128);
6268 assign(t2, opACC == Iop_INVALID
6269 ? mkexpr(t1)
6270 : binop(opACC, getQReg128(dd), mkexpr(t1)));
6271 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
6272 : mkexpr(t2));
6273 const HChar* nm = isMLAS ? (bit29 == 1 ? "mls" : "mla")
6274 : (bit29 == 1 ? "pmul" : "mul");
6275 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6276 nameQReg128(dd), arrSpec,
6277 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6278 return True;
6279 }
6280 /* else fall through */
6281 }
6282
sewardjecde6972014-02-05 11:01:19 +00006283 /* ---------------- {S,U}{MIN,MAX} (vector) ---------------- */
6284 /* 31 28 23 21 20 15 9 4
6285 0q0 01110 size 1 m 011011 n d SMIN Vd.T, Vn.T, Vm.T
6286 0q1 01110 size 1 m 011011 n d UMIN Vd.T, Vn.T, Vm.T
6287 0q0 01110 size 1 m 011001 n d SMAX Vd.T, Vn.T, Vm.T
6288 0q1 01110 size 1 m 011001 n d UMAX Vd.T, Vn.T, Vm.T
6289 */
6290 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6291 && INSN(21,21) == 1
6292 && ((INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(0,1,1,0,0,1))) {
6293 Bool isQ = INSN(30,30) == 1;
6294 Bool isU = INSN(29,29) == 1;
6295 UInt szBlg2 = INSN(23,22);
sewardj5860ec72014-03-01 11:19:45 +00006296 Bool isMAX = INSN(11,11) == 0;
sewardjecde6972014-02-05 11:01:19 +00006297 UInt mm = INSN(20,16);
6298 UInt nn = INSN(9,5);
6299 UInt dd = INSN(4,0);
6300 Bool zeroHI = False;
6301 const HChar* arrSpec = "";
6302 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6303 if (ok) {
6304 const IROp opMINS[4]
6305 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
6306 const IROp opMINU[4]
6307 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
6308 const IROp opMAXS[4]
6309 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
6310 const IROp opMAXU[4]
6311 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
6312 vassert(szBlg2 < 4);
6313 IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2])
6314 : (isU ? opMINU[szBlg2] : opMINS[szBlg2]);
6315 IRTemp t = newTemp(Ity_V128);
6316 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
6317 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t))
6318 : mkexpr(t));
6319 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
6320 : (isU ? "umin" : "smin");
6321 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6322 nameQReg128(dd), arrSpec,
6323 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6324 return True;
6325 }
6326 /* else fall through */
6327 }
6328
6329 /* -------------------- {S,U}{MIN,MAX}V -------------------- */
6330 /* 31 28 23 21 16 15 9 4
6331 0q0 01110 size 11000 1 101010 n d SMINV Vd, Vn.T
6332 0q1 01110 size 11000 1 101010 n d UMINV Vd, Vn.T
6333 0q0 01110 size 11000 0 101010 n d SMAXV Vd, Vn.T
6334 0q1 01110 size 11000 0 101010 n d UMAXV Vd, Vn.T
6335 */
6336 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6337 && INSN(21,17) == BITS5(1,1,0,0,0)
6338 && INSN(15,10) == BITS6(1,0,1,0,1,0)) {
6339 Bool isQ = INSN(30,30) == 1;
6340 Bool isU = INSN(29,29) == 1;
6341 UInt szBlg2 = INSN(23,22);
6342 Bool isMAX = INSN(16,16) == 0;
6343 UInt nn = INSN(9,5);
6344 UInt dd = INSN(4,0);
6345 Bool zeroHI = False;
6346 const HChar* arrSpec = "";
6347 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2);
6348 if (ok) {
6349 if (szBlg2 == 3) ok = False;
6350 if (szBlg2 == 2 && !isQ) ok = False;
6351 }
6352 if (ok) {
6353 const IROp opMINS[3]
6354 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
6355 const IROp opMINU[3]
6356 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
6357 const IROp opMAXS[3]
6358 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
6359 const IROp opMAXU[3]
6360 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
6361 vassert(szBlg2 < 3);
6362 IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2])
6363 : (isU ? opMINU[szBlg2] : opMINS[szBlg2]);
6364 IRTemp tN1 = newTemp(Ity_V128);
6365 assign(tN1, getQReg128(nn));
6366 /* If Q == 0, we're just folding lanes in the lower half of
6367 the value. In which case, copy the lower half of the
6368 source into the upper half, so we can then treat it the
6369 same as the full width case. */
6370 IRTemp tN2 = newTemp(Ity_V128);
sewardj5860ec72014-03-01 11:19:45 +00006371 assign(tN2, zeroHI ? mk_CatEvenLanes64x2(tN1,tN1) : mkexpr(tN1));
sewardjecde6972014-02-05 11:01:19 +00006372 IRTemp res = math_MINMAXV(tN2, op);
6373 if (res == IRTemp_INVALID)
6374 return False; /* means math_MINMAXV
6375 doesn't handle this case yet */
6376 putQReg128(dd, mkexpr(res));
6377 const HChar* nm = isMAX ? (isU ? "umaxv" : "smaxv")
6378 : (isU ? "uminv" : "sminv");
6379 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
6380 IRType laneTy = tys[szBlg2];
6381 DIP("%s %s, %s.%s\n", nm,
6382 nameQRegLO(dd, laneTy), nameQReg128(nn), arrSpec);
6383 return True;
6384 }
6385 /* else fall through */
6386 }
6387
sewardjfab09142014-02-10 10:28:13 +00006388 /* ------------ {AND,BIC,ORR,ORN} (vector) ------------ */
6389 /* 31 28 23 20 15 9 4
6390 0q0 01110 001 m 000111 n d AND Vd.T, Vn.T, Vm.T
6391 0q0 01110 011 m 000111 n d BIC Vd.T, Vn.T, Vm.T
6392 0q0 01110 101 m 000111 n d ORR Vd.T, Vn.T, Vm.T
6393 0q0 01110 111 m 000111 n d ORN Vd.T, Vn.T, Vm.T
6394 T is 16b when q==1, 8b when q==0
6395 */
6396 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
6397 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
6398 Bool isQ = INSN(30,30) == 1;
6399 Bool isORR = INSN(23,23) == 1;
6400 Bool invert = INSN(22,22) == 1;
6401 UInt mm = INSN(20,16);
6402 UInt nn = INSN(9,5);
6403 UInt dd = INSN(4,0);
6404 IRTemp res = newTemp(Ity_V128);
6405 assign(res, binop(isORR ? Iop_OrV128 : Iop_AndV128,
6406 getQReg128(nn),
6407 invert ? unop(Iop_NotV128, getQReg128(mm))
6408 : getQReg128(mm)));
6409 putQReg128(dd, isQ ? mkexpr(res)
6410 : unop(Iop_ZeroHI64ofV128, mkexpr(res)));
6411 const HChar* names[4] = { "and", "bic", "orr", "orn" };
6412 const HChar* ar = isQ ? "16b" : "8b";
6413 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
6414 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
6415 return True;
6416 }
6417
sewardje520bb32014-02-17 11:00:53 +00006418 /* ---------- CM{EQ,HI,HS,GE,GT,TST,LE,LT} (vector) ---------- */
6419 /* 31 28 23 21 15 9 4 ix
6420 0q1 01110 size 1 m 100011 n d CMEQ Vd.T, Vn.T, Vm.T (1) ==
sewardj93013432014-04-27 12:02:12 +00006421 0q0 01110 size 1 m 100011 n d CMTST Vd.T, Vn.T, Vm.T (2) &, != 0
sewardje520bb32014-02-17 11:00:53 +00006422
6423 0q1 01110 size 1 m 001101 n d CMHI Vd.T, Vn.T, Vm.T (3) >u
6424 0q0 01110 size 1 m 001101 n d CMGT Vd.T, Vn.T, Vm.T (4) >s
6425
6426 0q1 01110 size 1 m 001111 n d CMHS Vd.T, Vn.T, Vm.T (5) >=u
6427 0q0 01110 size 1 m 001111 n d CMGE Vd.T, Vn.T, Vm.T (6) >=s
6428
6429 0q1 01110 size 100000 100010 n d CMGE Vd.T, Vn.T, #0 (7) >=s 0
6430 0q0 01110 size 100000 100010 n d CMGT Vd.T, Vn.T, #0 (8) >s 0
6431
6432 0q1 01110 size 100000 100110 n d CMLE Vd.T, Vn.T, #0 (9) <=s 0
6433 0q0 01110 size 100000 100110 n d CMEQ Vd.T, Vn.T, #0 (10) == 0
6434
6435 0q0 01110 size 100000 101010 n d CMLT Vd.T, Vn.T, #0 (11) <s 0
6436 */
6437 if (INSN(31,31) == 0
6438 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) {
6439 Bool isQ = INSN(30,30) == 1;
6440 UInt bit29 = INSN(29,29);
6441 UInt szBlg2 = INSN(23,22);
6442 UInt mm = INSN(20,16);
6443 UInt b1510 = INSN(15,10);
6444 UInt nn = INSN(9,5);
6445 UInt dd = INSN(4,0);
6446 const IROp opsEQ[4]
6447 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
6448 const IROp opsGTS[4]
6449 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
6450 const IROp opsGTU[4]
6451 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
6452 Bool zeroHI = False;
6453 const HChar* arrSpec = "??";
6454 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2);
6455 UInt ix = 0;
6456 if (ok) {
6457 switch (b1510) {
6458 case BITS6(1,0,0,0,1,1): ix = bit29 ? 1 : 2; break;
6459 case BITS6(0,0,1,1,0,1): ix = bit29 ? 3 : 4; break;
6460 case BITS6(0,0,1,1,1,1): ix = bit29 ? 5 : 6; break;
6461 case BITS6(1,0,0,0,1,0):
6462 if (mm == 0) { ix = bit29 ? 7 : 8; }; break;
6463 case BITS6(1,0,0,1,1,0):
6464 if (mm == 0) { ix = bit29 ? 9 : 10; }; break;
6465 case BITS6(1,0,1,0,1,0):
6466 if (mm == 0 && bit29 == 0) { ix = 11; }; break;
6467 default: break;
6468 }
6469 }
6470 if (ix != 0) {
6471 vassert(ok && szBlg2 < 4);
6472 IRExpr* argL = getQReg128(nn);
6473 IRExpr* argR = (ix <= 6) ? getQReg128(mm) : mkV128(0x0000);
6474 IRExpr* res = NULL;
6475 /* Some useful identities:
6476 x > y can be expressed directly
6477 x < y == y > x
6478 x <= y == not (x > y)
6479 x >= y == not (y > x)
6480 */
6481 switch (ix) {
6482 case 1: res = binop(opsEQ[szBlg2], argL, argR); break;
sewardj93013432014-04-27 12:02:12 +00006483 case 2: res = unop(Iop_NotV128, binop(opsEQ[szBlg2],
6484 binop(Iop_AndV128, argL, argR),
6485 mkV128(0x0000)));
sewardje520bb32014-02-17 11:00:53 +00006486 break;
6487 case 3: res = binop(opsGTU[szBlg2], argL, argR); break;
6488 case 4: res = binop(opsGTS[szBlg2], argL, argR); break;
6489 case 5: res = unop(Iop_NotV128, binop(opsGTU[szBlg2], argR, argL));
6490 break;
6491 case 6: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL));
6492 break;
6493 case 7: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL));
6494 break;
6495 case 8: res = binop(opsGTS[szBlg2], argL, argR); break;
6496 case 9: res = unop(Iop_NotV128,
6497 binop(opsGTS[szBlg2], argL, argR));
6498 break;
6499 case 10: res = binop(opsEQ[szBlg2], argL, argR); break;
6500 case 11: res = binop(opsGTS[szBlg2], argR, argL); break;
6501 default: vassert(0);
6502 }
6503 vassert(res);
6504 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, res) : res);
6505 const HChar* nms[11] = { "eq", "tst", "hi", "gt", "hs", "ge",
6506 "ge", "gt", "le", "eq", "lt" };
6507 if (ix <= 6) {
6508 DIP("cm%s %s.%s, %s.%s, %s.%s\n", nms[ix-1],
6509 nameQReg128(dd), arrSpec,
6510 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6511 } else {
6512 DIP("cm%s %s.%s, %s.%s, #0\n", nms[ix-1],
6513 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
6514 }
6515 return True;
6516 }
6517 /* else fall through */
6518 }
6519
6520 /* -------------- {EOR,BSL,BIT,BIF} (vector) -------------- */
6521 /* 31 28 23 20 15 9 4
6522 0q1 01110 00 1 m 000111 n d EOR Vd.T, Vm.T, Vn.T
6523 0q1 01110 01 1 m 000111 n d BSL Vd.T, Vm.T, Vn.T
6524 0q1 01110 10 1 m 000111 n d BIT Vd.T, Vm.T, Vn.T
6525 0q1 01110 11 1 m 000111 n d BIF Vd.T, Vm.T, Vn.T
6526 */
6527 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0)
6528 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
6529 Bool isQ = INSN(30,30) == 1;
6530 UInt op = INSN(23,22);
6531 UInt mm = INSN(20,16);
6532 UInt nn = INSN(9,5);
6533 UInt dd = INSN(4,0);
6534 IRTemp argD = newTemp(Ity_V128);
6535 IRTemp argN = newTemp(Ity_V128);
6536 IRTemp argM = newTemp(Ity_V128);
6537 assign(argD, getQReg128(dd));
6538 assign(argN, getQReg128(nn));
6539 assign(argM, getQReg128(mm));
6540 const IROp opXOR = Iop_XorV128;
6541 const IROp opAND = Iop_AndV128;
6542 const IROp opNOT = Iop_NotV128;
6543 IRExpr* res = NULL;
6544 switch (op) {
sewardj5860ec72014-03-01 11:19:45 +00006545 case BITS2(0,0): /* EOR */
sewardje520bb32014-02-17 11:00:53 +00006546 res = binop(opXOR, mkexpr(argM), mkexpr(argN));
6547 break;
sewardj5860ec72014-03-01 11:19:45 +00006548 case BITS2(0,1): /* BSL */
sewardje520bb32014-02-17 11:00:53 +00006549 res = binop(opXOR, mkexpr(argM),
6550 binop(opAND,
6551 binop(opXOR, mkexpr(argM), mkexpr(argN)),
6552 mkexpr(argD)));
6553 break;
sewardj5860ec72014-03-01 11:19:45 +00006554 case BITS2(1,0): /* BIT */
sewardje520bb32014-02-17 11:00:53 +00006555 res = binop(opXOR, mkexpr(argD),
6556 binop(opAND,
6557 binop(opXOR, mkexpr(argD), mkexpr(argN)),
6558 mkexpr(argM)));
6559 break;
sewardj5860ec72014-03-01 11:19:45 +00006560 case BITS2(1,1): /* BIF */
sewardje520bb32014-02-17 11:00:53 +00006561 res = binop(opXOR, mkexpr(argD),
6562 binop(opAND,
6563 binop(opXOR, mkexpr(argD), mkexpr(argN)),
6564 unop(opNOT, mkexpr(argM))));
6565 break;
6566 default:
6567 vassert(0);
6568 }
6569 vassert(res);
6570 putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res));
6571 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
6572 const HChar* arr = isQ ? "16b" : "8b";
6573 vassert(op < 4);
6574 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[op],
6575 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6576 return True;
6577 }
6578
sewardj32d86752014-03-02 12:47:18 +00006579 /* ------------ {USHR,SSHR,SHL} (vector, immediate) ------------ */
sewardje520bb32014-02-17 11:00:53 +00006580 /* 31 28 22 18 15 9 4
sewardj32d86752014-03-02 12:47:18 +00006581 0q1 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #shift (1)
6582 0q0 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #shift (2)
6583 0q0 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #shift (3)
sewardje520bb32014-02-17 11:00:53 +00006584 laneTy, shift = case immh:immb of
sewardj32d86752014-03-02 12:47:18 +00006585 0001:xxx -> B, SHR:8-xxx, SHL:xxx
6586 001x:xxx -> H, SHR:16-xxxx SHL:xxxx
6587 01xx:xxx -> S, SHR:32-xxxxx SHL:xxxxx
6588 1xxx:xxx -> D, SHR:64-xxxxxx SHL:xxxxxx
sewardje520bb32014-02-17 11:00:53 +00006589 other -> invalid
6590 As usual the case laneTy==D && q==0 is not allowed.
6591 */
6592 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0)
sewardj32d86752014-03-02 12:47:18 +00006593 && INSN(10,10) == 1) {
6594 UInt ix = 0;
6595 /**/ if (INSN(29,29) == 1 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 1;
6596 else if (INSN(29,29) == 0 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 2;
6597 else if (INSN(29,29) == 0 && INSN(15,11) == BITS5(0,1,0,1,0)) ix = 3;
6598 if (ix > 0) {
6599 Bool isQ = INSN(30,30) == 1;
6600 UInt immh = INSN(22,19);
6601 UInt immb = INSN(18,16);
6602 UInt nn = INSN(9,5);
6603 UInt dd = INSN(4,0);
6604 const IROp opsSHRN[4]
6605 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
6606 const IROp opsSARN[4]
6607 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
6608 const IROp opsSHLN[4]
6609 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
6610 UInt szBlg2 = 0;
6611 UInt shift = 0;
6612 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &szBlg2, immh, immb);
6613 if (ix == 3) {
6614 /* The shift encoding has opposite sign for the leftwards
6615 case. Adjust shift to compensate. */
6616 shift = (8 << szBlg2) - shift;
6617 }
6618 if (ok && szBlg2 < 4 && shift > 0 && shift < (8 << szBlg2)
6619 && !(szBlg2 == 3/*64bit*/ && !isQ)) {
6620 IROp op = Iop_INVALID;
6621 const HChar* nm = NULL;
6622 switch (ix) {
6623 case 1: op = opsSHRN[szBlg2]; nm = "ushr"; break;
6624 case 2: op = opsSARN[szBlg2]; nm = "sshr"; break;
6625 case 3: op = opsSHLN[szBlg2]; nm = "shl"; break;
6626 default: vassert(0);
6627 }
6628 IRExpr* src = getQReg128(nn);
6629 IRExpr* res = binop(op, src, mkU8(shift));
6630 putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res));
6631 HChar laneCh = "bhsd"[szBlg2];
6632 UInt nLanes = (isQ ? 128 : 64) / (8 << szBlg2);
6633 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
6634 nameQReg128(dd), nLanes, laneCh,
6635 nameQReg128(nn), nLanes, laneCh, shift);
6636 return True;
6637 }
6638 /* else fall through */
sewardje520bb32014-02-17 11:00:53 +00006639 }
sewardje520bb32014-02-17 11:00:53 +00006640 }
6641
6642 /* -------------------- {U,S}SHLL{,2} -------------------- */
6643 /* 31 28 22 18 15 9 4
6644 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
6645 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
6646 where Ta,Tb,sh
sewardj5860ec72014-03-01 11:19:45 +00006647 = case immh of 1xxx -> invalid
sewardje520bb32014-02-17 11:00:53 +00006648 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
sewardj5860ec72014-03-01 11:19:45 +00006649 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
6650 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
6651 0000 -> AdvSIMD modified immediate (???)
sewardje520bb32014-02-17 11:00:53 +00006652 */
6653 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0)
6654 && INSN(15,10) == BITS6(1,0,1,0,0,1)) {
6655 Bool isQ = INSN(30,30) == 1;
6656 Bool isU = INSN(29,29) == 1;
6657 UInt immh = INSN(22,19);
6658 UInt immb = INSN(18,16);
6659 UInt nn = INSN(9,5);
6660 UInt dd = INSN(4,0);
6661 UInt immhb = (immh << 3) | immb;
6662 IRTemp src = newTemp(Ity_V128);
6663 IRTemp zero = newTemp(Ity_V128);
6664 IRExpr* res = NULL;
6665 UInt sh = 0;
6666 const HChar* ta = "??";
6667 const HChar* tb = "??";
6668 assign(src, getQReg128(nn));
6669 assign(zero, mkV128(0x0000));
sewardj5860ec72014-03-01 11:19:45 +00006670 if (immh & 8) {
6671 /* invalid; don't assign to res */
6672 }
6673 else if (immh & 4) {
6674 sh = immhb - 32;
6675 vassert(sh < 32); /* so 32-sh is 1..32 */
6676 ta = "2d";
6677 tb = isQ ? "4s" : "2s";
6678 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
6679 : mk_InterleaveLO32x4(src, zero);
6680 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
sewardje520bb32014-02-17 11:00:53 +00006681 }
6682 else if (immh & 2) {
6683 sh = immhb - 16;
6684 vassert(sh < 16); /* so 16-sh is 1..16 */
6685 ta = "4s";
6686 tb = isQ ? "8h" : "4h";
6687 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
6688 : mk_InterleaveLO16x8(src, zero);
6689 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
6690 }
sewardj5860ec72014-03-01 11:19:45 +00006691 else if (immh & 1) {
6692 sh = immhb - 8;
6693 vassert(sh < 8); /* so 8-sh is 1..8 */
6694 ta = "8h";
6695 tb = isQ ? "16b" : "8b";
6696 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
6697 : mk_InterleaveLO8x16(src, zero);
6698 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
6699 } else {
6700 vassert(immh == 0);
6701 /* invalid; don't assign to res */
sewardje520bb32014-02-17 11:00:53 +00006702 }
6703 /* */
6704 if (res) {
6705 putQReg128(dd, res);
6706 DIP("%cshll%s %s.%s, %s.%s, #%d\n",
6707 isU ? 'u' : 's', isQ ? "2" : "",
6708 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
6709 return True;
6710 }
6711 /* else fall through */
6712 }
6713
sewardj606c4ba2014-01-26 19:11:14 +00006714 /* -------------------- XTN{,2} -------------------- */
sewardjecde6972014-02-05 11:01:19 +00006715 /* 31 28 23 21 15 9 4 XTN{,2} Vd.Tb, Vn.Ta
sewardj606c4ba2014-01-26 19:11:14 +00006716 0q0 01110 size 100001 001010 n d
6717 */
6718 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
6719 && INSN(21,16) == BITS6(1,0,0,0,0,1)
6720 && INSN(15,10) == BITS6(0,0,1,0,1,0)) {
6721 Bool isQ = INSN(30,30) == 1;
6722 UInt size = INSN(23,22);
6723 UInt nn = INSN(9,5);
6724 UInt dd = INSN(4,0);
6725 IROp op = Iop_INVALID;
6726 const HChar* tb = NULL;
6727 const HChar* ta = NULL;
6728 switch ((size << 1) | (isQ ? 1 : 0)) {
6729 case 0: tb = "8b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break;
6730 case 1: tb = "16b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break;
6731 case 2: tb = "4h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break;
6732 case 3: tb = "8h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break;
6733 case 4: tb = "2s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break;
6734 case 5: tb = "4s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break;
6735 case 6: break;
6736 case 7: break;
6737 default: vassert(0);
6738 }
6739 if (op != Iop_INVALID) {
6740 if (!isQ) {
6741 putQRegLane(dd, 1, mkU64(0));
6742 }
6743 putQRegLane(dd, isQ ? 1 : 0, unop(op, getQReg128(nn)));
6744 DIP("xtn%s %s.%s, %s.%s\n", isQ ? "2" : "",
6745 nameQReg128(dd), tb, nameQReg128(nn), ta);
6746 return True;
6747 }
6748 /* else fall through */
6749 }
6750
6751 /* ---------------- DUP (element, vector) ---------------- */
6752 /* 31 28 20 15 9 4
6753 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
6754 */
6755 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
6756 && INSN(15,10) == BITS6(0,0,0,0,0,1)) {
6757 Bool isQ = INSN(30,30) == 1;
6758 UInt imm5 = INSN(20,16);
6759 UInt nn = INSN(9,5);
6760 UInt dd = INSN(4,0);
6761 IRTemp w0 = newTemp(Ity_I64);
6762 const HChar* arT = "??";
6763 const HChar* arTs = "??";
6764 IRType laneTy = Ity_INVALID;
6765 UInt laneNo = 16; /* invalid */
6766 if (imm5 & 1) {
6767 arT = isQ ? "16b" : "8b";
6768 arTs = "b";
6769 laneNo = (imm5 >> 1) & 15;
6770 laneTy = Ity_I8;
6771 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
6772 }
6773 else if (imm5 & 2) {
6774 arT = isQ ? "8h" : "4h";
6775 arTs = "h";
6776 laneNo = (imm5 >> 2) & 7;
6777 laneTy = Ity_I16;
6778 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
6779 }
6780 else if (imm5 & 4) {
6781 arT = isQ ? "4s" : "2s";
6782 arTs = "s";
6783 laneNo = (imm5 >> 3) & 3;
6784 laneTy = Ity_I32;
6785 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
6786 }
6787 else if ((imm5 & 8) && isQ) {
6788 arT = "2d";
6789 arTs = "d";
6790 laneNo = (imm5 >> 4) & 1;
6791 laneTy = Ity_I64;
6792 assign(w0, getQRegLane(nn, laneNo, laneTy));
6793 }
6794 else {
6795 /* invalid; leave laneTy unchanged. */
6796 }
6797 /* */
6798 if (laneTy != Ity_INVALID) {
6799 vassert(laneNo < 16);
6800 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
6801 putQReg128(dd, binop(Iop_64HLtoV128,
6802 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
6803 DIP("dup %s.%s, %s.%s[%u]\n",
6804 nameQReg128(dd), arT, nameQReg128(nn), arTs, laneNo);
6805 return True;
6806 }
6807 /* else fall through */
6808 }
6809
sewardjecde6972014-02-05 11:01:19 +00006810 /* ---------------- DUP (general, vector) ---------------- */
6811 /* 31 28 23 20 15 9 4
6812 0q0 01110 000 imm5 000011 n d DUP Vd.T, Rn
6813 Q=0 writes 64, Q=1 writes 128
6814 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
6815 xxx10 4H(q=0) or 8H(q=1), R=W
6816 xx100 2S(q=0) or 4S(q=1), R=W
6817 x1000 Invalid(q=0) or 2D(q=1), R=X
6818 x0000 Invalid(q=0) or Invalid(q=1)
6819 */
6820 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
6821 && INSN(15,10) == BITS6(0,0,0,0,1,1)) {
6822 Bool isQ = INSN(30,30) == 1;
6823 UInt imm5 = INSN(20,16);
6824 UInt nn = INSN(9,5);
6825 UInt dd = INSN(4,0);
6826 IRTemp w0 = newTemp(Ity_I64);
6827 const HChar* arT = "??";
6828 IRType laneTy = Ity_INVALID;
6829 if (imm5 & 1) {
6830 arT = isQ ? "16b" : "8b";
6831 laneTy = Ity_I8;
6832 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
6833 }
6834 else if (imm5 & 2) {
6835 arT = isQ ? "8h" : "4h";
6836 laneTy = Ity_I16;
6837 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
6838 }
6839 else if (imm5 & 4) {
6840 arT = isQ ? "4s" : "2s";
6841 laneTy = Ity_I32;
6842 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
6843 }
6844 else if ((imm5 & 8) && isQ) {
6845 arT = "2d";
6846 laneTy = Ity_I64;
6847 assign(w0, getIReg64orZR(nn));
6848 }
6849 else {
6850 /* invalid; leave laneTy unchanged. */
6851 }
6852 /* */
6853 if (laneTy != Ity_INVALID) {
6854 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
6855 putQReg128(dd, binop(Iop_64HLtoV128,
6856 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
6857 DIP("dup %s.%s, %s\n",
6858 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
6859 return True;
6860 }
6861 /* else fall through */
6862 }
6863
sewardjf5b08912014-02-06 12:57:58 +00006864 /* ---------------------- {S,U}MOV ---------------------- */
6865 /* 31 28 20 15 9 4
6866 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
6867 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
6868 dest is Xd when q==1, Wd when q==0
6869 UMOV:
6870 Ts,index,ops = case q:imm5 of
6871 0:xxxx1 -> B, xxxx, 8Uto64
6872 1:xxxx1 -> invalid
6873 0:xxx10 -> H, xxx, 16Uto64
6874 1:xxx10 -> invalid
6875 0:xx100 -> S, xx, 32Uto64
6876 1:xx100 -> invalid
6877 1:x1000 -> D, x, copy64
6878 other -> invalid
6879 SMOV:
6880 Ts,index,ops = case q:imm5 of
6881 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
6882 1:xxxx1 -> B, xxxx, 8Sto64
6883 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
6884 1:xxx10 -> H, xxx, 16Sto64
6885 0:xx100 -> invalid
6886 1:xx100 -> S, xx, 32Sto64
6887 1:x1000 -> invalid
6888 other -> invalid
6889 */
6890 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
6891 && (INSN(15,10) & BITS6(1,1,1,0,1,1)) == BITS6(0,0,1,0,1,1)) {
6892 UInt bitQ = INSN(30,30) == 1;
6893 UInt imm5 = INSN(20,16);
6894 UInt nn = INSN(9,5);
6895 UInt dd = INSN(4,0);
6896 Bool isU = INSN(12,12) == 1;
6897 const HChar* arTs = "??";
6898 UInt laneNo = 16; /* invalid */
6899 // Setting 'res' to non-NULL determines valid/invalid
6900 IRExpr* res = NULL;
6901 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
6902 laneNo = (imm5 >> 1) & 15;
6903 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
6904 res = isU ? unop(Iop_8Uto64, lane)
6905 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
6906 arTs = "b";
6907 }
6908 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
6909 laneNo = (imm5 >> 1) & 15;
6910 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
6911 res = isU ? NULL
6912 : unop(Iop_8Sto64, lane);
6913 arTs = "b";
6914 }
6915 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
6916 laneNo = (imm5 >> 2) & 7;
6917 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
6918 res = isU ? unop(Iop_16Uto64, lane)
6919 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
6920 arTs = "h";
6921 }
6922 else if (bitQ && (imm5 & 2)) { // 1:xxx10
6923 laneNo = (imm5 >> 2) & 7;
6924 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
6925 res = isU ? NULL
6926 : unop(Iop_16Sto64, lane);
6927 arTs = "h";
6928 }
6929 else if (!bitQ && (imm5 & 4)) { // 0:xx100
6930 laneNo = (imm5 >> 3) & 3;
6931 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
6932 res = isU ? unop(Iop_32Uto64, lane)
6933 : NULL;
6934 arTs = "s";
6935 }
6936 else if (bitQ && (imm5 & 4)) { // 1:xxx10
6937 laneNo = (imm5 >> 3) & 3;
6938 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
6939 res = isU ? NULL
6940 : unop(Iop_32Sto64, lane);
6941 arTs = "s";
6942 }
6943 else if (bitQ && (imm5 & 8)) { // 1:x1000
6944 laneNo = (imm5 >> 4) & 1;
6945 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
6946 res = isU ? lane
6947 : NULL;
6948 arTs = "d";
6949 }
6950 /* */
6951 if (res) {
6952 vassert(laneNo < 16);
6953 putIReg64orZR(dd, res);
6954 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
6955 nameIRegOrZR(bitQ == 1, dd),
6956 nameQReg128(nn), arTs, laneNo);
6957 return True;
6958 }
6959 /* else fall through */
6960 }
6961
sewardje520bb32014-02-17 11:00:53 +00006962 /* -------------------- INS (general) -------------------- */
6963 /* 31 28 20 15 9 4
6964 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
6965 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
6966 xxx10 -> H, xxx
6967 xx100 -> S, xx
6968 x1000 -> D, x
6969 */
6970 if (INSN(31,21) == BITS11(0,1,0,0,1,1,1,0,0,0,0)
6971 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
6972 UInt imm5 = INSN(20,16);
6973 UInt nn = INSN(9,5);
6974 UInt dd = INSN(4,0);
6975 HChar ts = '?';
6976 UInt laneNo = 16;
6977 IRExpr* src = NULL;
6978 if (imm5 & 1) {
6979 src = unop(Iop_64to8, getIReg64orZR(nn));
6980 laneNo = (imm5 >> 1) & 15;
6981 ts = 'b';
6982 }
6983 else if (imm5 & 2) {
6984 src = unop(Iop_64to16, getIReg64orZR(nn));
6985 laneNo = (imm5 >> 2) & 7;
6986 ts = 'h';
6987 }
6988 else if (imm5 & 4) {
6989 src = unop(Iop_64to32, getIReg64orZR(nn));
6990 laneNo = (imm5 >> 3) & 3;
6991 ts = 's';
6992 }
6993 else if (imm5 & 8) {
6994 src = getIReg64orZR(nn);
6995 laneNo = (imm5 >> 4) & 1;
6996 ts = 'd';
6997 }
6998 /* */
6999 if (src) {
7000 vassert(laneNo < 16);
7001 putQRegLane(dd, laneNo, src);
7002 DIP("ins %s.%c[%u], %s\n",
7003 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
7004 return True;
7005 }
7006 /* else invalid; fall through */
7007 }
7008
sewardj32d86752014-03-02 12:47:18 +00007009 /* -------------------- NEG (vector) -------------------- */
7010 /* 31 28 23 21 16 9 4
7011 0q1 01110 sz 10000 0101110 n d NEG Vd, Vn
7012 sz is laneSz, q:sz == 011 is disallowed, as usual
7013 */
7014 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0)
7015 && INSN(21,10) == BITS12(1,0,0,0,0,0,1,0,1,1,1,0)) {
7016 Bool isQ = INSN(30,30) == 1;
7017 UInt szBlg2 = INSN(23,22);
7018 UInt nn = INSN(9,5);
7019 UInt dd = INSN(4,0);
7020 Bool zeroHI = False;
7021 const HChar* arrSpec = "";
7022 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
7023 if (ok) {
7024 const IROp opSUB[4]
7025 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
7026 IRTemp res = newTemp(Ity_V128);
7027 vassert(szBlg2 < 4);
7028 assign(res, binop(opSUB[szBlg2], mkV128(0x0000), getQReg128(nn)));
7029 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
7030 : mkexpr(res));
7031 DIP("neg %s.%s, %s.%s\n",
7032 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
7033 return True;
7034 }
7035 /* else fall through */
7036 }
7037
sewardj92d0ae32014-04-03 13:48:54 +00007038 /* -------------------- TBL, TBX -------------------- */
7039 /* 31 28 20 15 14 12 9 4
7040 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
7041 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
7042 where Ta = 16b(q=1) or 8b(q=0)
7043 */
7044 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
7045 && INSN(15,15) == 0 && INSN(11,10) == BITS2(0,0)) {
7046 Bool isQ = INSN(30,30) == 1;
7047 Bool isTBX = INSN(12,12) == 1;
7048 UInt mm = INSN(20,16);
7049 UInt len = INSN(14,13);
7050 UInt nn = INSN(9,5);
7051 UInt dd = INSN(4,0);
7052 /* The out-of-range values to use. */
7053 IRTemp oor_values = newTemp(Ity_V128);
7054 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
7055 /* src value */
7056 IRTemp src = newTemp(Ity_V128);
7057 assign(src, getQReg128(mm));
7058 /* The table values */
7059 IRTemp tab[4];
7060 UInt i;
7061 for (i = 0; i <= len; i++) {
7062 vassert(i < 4);
7063 tab[i] = newTemp(Ity_V128);
7064 assign(tab[i], getQReg128((nn + i) % 32));
7065 }
7066 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
7067 putQReg128(dd, isQ ? mkexpr(res)
7068 : unop(Iop_ZeroHI64ofV128, mkexpr(res)) );
7069 const HChar* Ta = isQ ? "16b" : "8b";
7070 const HChar* nm = isTBX ? "tbx" : "tbl";
7071 DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
7072 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
7073 return True;
7074 }
sewardjbbcf1882014-01-12 12:49:10 +00007075 /* FIXME Temporary hacks to get through ld.so FIXME */
7076
7077 /* ------------------ movi vD.4s, #0x0 ------------------ */
7078 /* 0x4F 0x00 0x04 000 vD */
7079 if ((insn & 0xFFFFFFE0) == 0x4F000400) {
7080 UInt vD = INSN(4,0);
7081 putQReg128(vD, mkV128(0x0000));
7082 DIP("movi v%u.4s, #0x0\n", vD);
7083 return True;
7084 }
7085
sewardjbbcf1882014-01-12 12:49:10 +00007086 /* ---------------- MOV vD.16b, vN.16b ---------------- */
7087 /* 31 23 20 15 9 4
7088 010 01110 101 m 000111 n d ORR vD.16b, vN.16b, vM.16b
7089 This only handles the N == M case.
7090 */
7091 if (INSN(31,24) == BITS8(0,1,0,0,1,1,1,0)
7092 && INSN(23,21) == BITS3(1,0,1) && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
7093 UInt mm = INSN(20,16);
7094 UInt nn = INSN(9,5);
7095 UInt dd = INSN(4,0);
7096 if (mm == nn) {
7097 putQReg128(dd, getQReg128(nn));
7098 DIP("mov v%u.16b, v%u.16b\n", dd, nn);
7099 return True;
7100 }
7101 /* else it's really an ORR; fall through. */
7102 }
7103
7104 vex_printf("ARM64 front end: simd_and_fp\n");
7105 return False;
7106# undef INSN
7107}
7108
7109
7110/*------------------------------------------------------------*/
7111/*--- Disassemble a single ARM64 instruction ---*/
7112/*------------------------------------------------------------*/
7113
7114/* Disassemble a single ARM64 instruction into IR. The instruction
7115 has is located at |guest_instr| and has guest IP of
7116 |guest_PC_curr_instr|, which will have been set before the call
7117 here. Returns True iff the instruction was decoded, in which case
7118 *dres will be set accordingly, or False, in which case *dres should
7119 be ignored by the caller. */
7120
7121static
7122Bool disInstr_ARM64_WRK (
7123 /*MB_OUT*/DisResult* dres,
7124 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
7125 Bool resteerCisOk,
7126 void* callback_opaque,
7127 UChar* guest_instr,
7128 VexArchInfo* archinfo,
7129 VexAbiInfo* abiinfo
7130 )
7131{
7132 // A macro to fish bits out of 'insn'.
7133# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7134
7135//ZZ DisResult dres;
7136//ZZ UInt insn;
7137//ZZ //Bool allow_VFP = False;
7138//ZZ //UInt hwcaps = archinfo->hwcaps;
7139//ZZ IRTemp condT; /* :: Ity_I32 */
7140//ZZ UInt summary;
7141//ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
7142//ZZ
7143//ZZ /* What insn variants are we supporting today? */
7144//ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
7145//ZZ // etc etc
7146
7147 /* Set result defaults. */
7148 dres->whatNext = Dis_Continue;
7149 dres->len = 4;
7150 dres->continueAt = 0;
7151 dres->jk_StopHere = Ijk_INVALID;
7152
7153 /* At least this is simple on ARM64: insns are all 4 bytes long, and
7154 4-aligned. So just fish the whole thing out of memory right now
7155 and have done. */
7156 UInt insn = getUIntLittleEndianly( guest_instr );
7157
7158 if (0) vex_printf("insn: 0x%x\n", insn);
7159
7160 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
7161
7162 vassert(0 == (guest_PC_curr_instr & 3ULL));
7163
7164 /* ----------------------------------------------------------- */
7165
7166 /* Spot "Special" instructions (see comment at top of file). */
7167 {
7168 UChar* code = (UChar*)guest_instr;
7169 /* Spot the 16-byte preamble:
7170 93CC0D8C ror x12, x12, #3
7171 93CC358C ror x12, x12, #13
7172 93CCCD8C ror x12, x12, #51
7173 93CCF58C ror x12, x12, #61
7174 */
7175 UInt word1 = 0x93CC0D8C;
7176 UInt word2 = 0x93CC358C;
7177 UInt word3 = 0x93CCCD8C;
7178 UInt word4 = 0x93CCF58C;
7179 if (getUIntLittleEndianly(code+ 0) == word1 &&
7180 getUIntLittleEndianly(code+ 4) == word2 &&
7181 getUIntLittleEndianly(code+ 8) == word3 &&
7182 getUIntLittleEndianly(code+12) == word4) {
7183 /* Got a "Special" instruction preamble. Which one is it? */
7184 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
7185 /* orr x10,x10,x10 */) {
7186 /* X3 = client_request ( X4 ) */
7187 DIP("x3 = client_request ( x4 )\n");
7188 putPC(mkU64( guest_PC_curr_instr + 20 ));
7189 dres->jk_StopHere = Ijk_ClientReq;
7190 dres->whatNext = Dis_StopHere;
7191 return True;
7192 }
7193 else
7194 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
7195 /* orr x11,x11,x11 */) {
7196 /* X3 = guest_NRADDR */
7197 DIP("x3 = guest_NRADDR\n");
7198 dres->len = 20;
7199 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
7200 return True;
7201 }
7202 else
7203 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
7204 /* orr x12,x12,x12 */) {
7205 /* branch-and-link-to-noredir X8 */
7206 DIP("branch-and-link-to-noredir x8\n");
7207 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
7208 putPC(getIReg64orZR(8));
7209 dres->jk_StopHere = Ijk_NoRedir;
7210 dres->whatNext = Dis_StopHere;
7211 return True;
7212 }
7213 else
7214 if (getUIntLittleEndianly(code+16) == 0xAA090129
7215 /* orr x9,x9,x9 */) {
7216 /* IR injection */
7217 DIP("IR injection\n");
7218 vex_inject_ir(irsb, Iend_LE);
7219 // Invalidate the current insn. The reason is that the IRop we're
7220 // injecting here can change. In which case the translation has to
7221 // be redone. For ease of handling, we simply invalidate all the
7222 // time.
sewardj05f5e012014-05-04 10:52:11 +00007223 stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
7224 stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
sewardjbbcf1882014-01-12 12:49:10 +00007225 putPC(mkU64( guest_PC_curr_instr + 20 ));
7226 dres->whatNext = Dis_StopHere;
sewardj05f5e012014-05-04 10:52:11 +00007227 dres->jk_StopHere = Ijk_InvalICache;
sewardjbbcf1882014-01-12 12:49:10 +00007228 return True;
7229 }
7230 /* We don't know what it is. */
7231 return False;
7232 /*NOTREACHED*/
7233 }
7234 }
7235
7236 /* ----------------------------------------------------------- */
7237
7238 /* Main ARM64 instruction decoder starts here. */
7239
7240 Bool ok = False;
7241
7242 /* insn[28:25] determines the top-level grouping, so let's start
7243 off with that.
7244
7245 For all of these dis_ARM64_ functions, we pass *dres with the
7246 normal default results "insn OK, 4 bytes long, keep decoding" so
7247 they don't need to change it. However, decodes of control-flow
7248 insns may cause *dres to change.
7249 */
7250 switch (INSN(28,25)) {
7251 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
7252 // Data processing - immediate
7253 ok = dis_ARM64_data_processing_immediate(dres, insn);
7254 break;
7255 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
7256 // Branch, exception generation and system instructions
sewardj65902992014-05-03 21:20:56 +00007257 ok = dis_ARM64_branch_etc(dres, insn, archinfo);
sewardjbbcf1882014-01-12 12:49:10 +00007258 break;
7259 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
7260 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
7261 // Loads and stores
7262 ok = dis_ARM64_load_store(dres, insn);
7263 break;
7264 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
7265 // Data processing - register
7266 ok = dis_ARM64_data_processing_register(dres, insn);
7267 break;
7268 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
7269 // Data processing - SIMD and floating point
7270 ok = dis_ARM64_simd_and_fp(dres, insn);
7271 break;
7272 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
7273 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
7274 // UNALLOCATED
7275 break;
7276 default:
7277 vassert(0); /* Can't happen */
7278 }
7279
7280 /* If the next-level down decoders failed, make sure |dres| didn't
7281 get changed. */
7282 if (!ok) {
7283 vassert(dres->whatNext == Dis_Continue);
7284 vassert(dres->len == 4);
7285 vassert(dres->continueAt == 0);
7286 vassert(dres->jk_StopHere == Ijk_INVALID);
7287 }
7288
7289 return ok;
7290
7291# undef INSN
7292}
7293
7294
7295/*------------------------------------------------------------*/
7296/*--- Top-level fn ---*/
7297/*------------------------------------------------------------*/
7298
7299/* Disassemble a single instruction into IR. The instruction
7300 is located in host memory at &guest_code[delta]. */
7301
7302DisResult disInstr_ARM64 ( IRSB* irsb_IN,
7303 Bool (*resteerOkFn) ( void*, Addr64 ),
7304 Bool resteerCisOk,
7305 void* callback_opaque,
7306 UChar* guest_code_IN,
7307 Long delta_IN,
7308 Addr64 guest_IP,
7309 VexArch guest_arch,
7310 VexArchInfo* archinfo,
7311 VexAbiInfo* abiinfo,
7312 Bool host_bigendian_IN,
7313 Bool sigill_diag_IN )
7314{
7315 DisResult dres;
7316 vex_bzero(&dres, sizeof(dres));
7317
7318 /* Set globals (see top of this file) */
7319 vassert(guest_arch == VexArchARM64);
7320
7321 irsb = irsb_IN;
7322 host_is_bigendian = host_bigendian_IN;
7323 guest_PC_curr_instr = (Addr64)guest_IP;
7324
sewardj65902992014-05-03 21:20:56 +00007325 /* Sanity checks */
7326 /* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
7327 vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
7328 vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
7329
sewardjbbcf1882014-01-12 12:49:10 +00007330 /* Try to decode */
7331 Bool ok = disInstr_ARM64_WRK( &dres,
7332 resteerOkFn, resteerCisOk, callback_opaque,
7333 (UChar*)&guest_code_IN[delta_IN],
7334 archinfo, abiinfo );
7335 if (ok) {
7336 /* All decode successes end up here. */
sewardjdc9259c2014-02-27 11:10:19 +00007337 vassert(dres.len == 4 || dres.len == 20);
sewardjbbcf1882014-01-12 12:49:10 +00007338 switch (dres.whatNext) {
7339 case Dis_Continue:
7340 putPC( mkU64(dres.len + guest_PC_curr_instr) );
7341 break;
7342 case Dis_ResteerU:
7343 case Dis_ResteerC:
7344 putPC(mkU64(dres.continueAt));
7345 break;
7346 case Dis_StopHere:
7347 break;
7348 default:
7349 vassert(0);
7350 }
7351 DIP("\n");
7352 } else {
7353 /* All decode failures end up here. */
7354 if (sigill_diag_IN) {
7355 Int i, j;
7356 UChar buf[64];
7357 UInt insn
7358 = getUIntLittleEndianly( (UChar*)&guest_code_IN[delta_IN] );
7359 vex_bzero(buf, sizeof(buf));
7360 for (i = j = 0; i < 32; i++) {
7361 if (i > 0) {
7362 if ((i & 7) == 0) buf[j++] = ' ';
7363 else if ((i & 3) == 0) buf[j++] = '\'';
7364 }
7365 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
7366 }
7367 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
7368 vex_printf("disInstr(arm64): %s\n", buf);
7369 }
7370
7371 /* Tell the dispatcher that this insn cannot be decoded, and so
7372 has not been executed, and (is currently) the next to be
7373 executed. PC should be up-to-date since it is made so at the
7374 start of each insn, but nevertheless be paranoid and update
7375 it again right now. */
7376 putPC( mkU64(guest_PC_curr_instr) );
7377 dres.whatNext = Dis_StopHere;
7378 dres.len = 0;
7379 dres.continueAt = 0;
7380 dres.jk_StopHere = Ijk_NoDecode;
7381 }
7382 return dres;
7383}
7384
sewardjecde6972014-02-05 11:01:19 +00007385////////////////////////////////////////////////////////////////////////
7386////////////////////////////////////////////////////////////////////////
7387
7388/* Spare code for doing reference implementations of various 128-bit
7389 SIMD interleaves/deinterleaves/concatenation ops. For 64-bit
7390 equivalents see the end of guest_arm_toIR.c. */
7391
7392////////////////////////////////////////////////////////////////
7393// 64x2 operations
7394//
7395static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 )
7396{
7397 // returns a0 b0
7398 return binop(Iop_64HLtoV128, unop(Iop_V128to64, mkexpr(a10)),
7399 unop(Iop_V128to64, mkexpr(b10)));
7400}
7401
7402static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 )
7403{
7404 // returns a1 b1
7405 return binop(Iop_64HLtoV128, unop(Iop_V128HIto64, mkexpr(a10)),
7406 unop(Iop_V128HIto64, mkexpr(b10)));
7407}
7408
7409
7410////////////////////////////////////////////////////////////////
7411// 32x4 operations
7412//
7413
7414// Split a 128 bit value into 4 32 bit ones, in 64-bit IRTemps with
7415// the top halves guaranteed to be zero.
7416static void breakV128to32s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
7417 IRTemp* out0, IRTemp v128 )
7418{
7419 if (out3) *out3 = newTemp(Ity_I64);
7420 if (out2) *out2 = newTemp(Ity_I64);
7421 if (out1) *out1 = newTemp(Ity_I64);
7422 if (out0) *out0 = newTemp(Ity_I64);
7423 IRTemp hi64 = newTemp(Ity_I64);
7424 IRTemp lo64 = newTemp(Ity_I64);
7425 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
7426 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) );
7427 if (out3) assign(*out3, binop(Iop_Shr64, mkexpr(hi64), mkU8(32)));
7428 if (out2) assign(*out2, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFFFFFF)));
7429 if (out1) assign(*out1, binop(Iop_Shr64, mkexpr(lo64), mkU8(32)));
7430 if (out0) assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFFFFFF)));
7431}
7432
7433// Make a V128 bit value from 4 32 bit ones, each of which is in a 64 bit
7434// IRTemp.
7435static IRTemp mkV128from32s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
7436{
7437 IRTemp hi64 = newTemp(Ity_I64);
7438 IRTemp lo64 = newTemp(Ity_I64);
7439 assign(hi64,
7440 binop(Iop_Or64,
7441 binop(Iop_Shl64, mkexpr(in3), mkU8(32)),
7442 binop(Iop_And64, mkexpr(in2), mkU64(0xFFFFFFFF))));
7443 assign(lo64,
7444 binop(Iop_Or64,
7445 binop(Iop_Shl64, mkexpr(in1), mkU8(32)),
7446 binop(Iop_And64, mkexpr(in0), mkU64(0xFFFFFFFF))));
7447 IRTemp res = newTemp(Ity_V128);
7448 assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64)));
7449 return res;
7450}
7451
7452static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 )
7453{
7454 // returns a2 a0 b2 b0
7455 IRTemp a2, a0, b2, b0;
7456 breakV128to32s(NULL, &a2, NULL, &a0, a3210);
7457 breakV128to32s(NULL, &b2, NULL, &b0, b3210);
7458 return mkexpr(mkV128from32s(a2, a0, b2, b0));
7459}
7460
7461static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 )
7462{
7463 // returns a3 a1 b3 b1
7464 IRTemp a3, a1, b3, b1;
7465 breakV128to32s(&a3, NULL, &a1, NULL, a3210);
7466 breakV128to32s(&b3, NULL, &b1, NULL, b3210);
7467 return mkexpr(mkV128from32s(a3, a1, b3, b1));
7468}
7469
sewardje520bb32014-02-17 11:00:53 +00007470static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 )
7471{
7472 // returns a1 b1 a0 b0
7473 IRTemp a1, a0, b1, b0;
7474 breakV128to32s(NULL, NULL, &a1, &a0, a3210);
7475 breakV128to32s(NULL, NULL, &b1, &b0, b3210);
7476 return mkexpr(mkV128from32s(a1, b1, a0, b0));
7477}
7478
7479static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 )
7480{
7481 // returns a3 b3 a2 b2
7482 IRTemp a3, a2, b3, b2;
7483 breakV128to32s(&a3, &a2, NULL, NULL, a3210);
7484 breakV128to32s(&b3, &b2, NULL, NULL, b3210);
7485 return mkexpr(mkV128from32s(a3, b3, a2, b2));
7486}
sewardjecde6972014-02-05 11:01:19 +00007487
7488////////////////////////////////////////////////////////////////
7489// 16x8 operations
7490//
7491
7492static void breakV128to16s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
7493 IRTemp* out4, IRTemp* out3, IRTemp* out2,
7494 IRTemp* out1,IRTemp* out0, IRTemp v128 )
7495{
7496 if (out7) *out7 = newTemp(Ity_I64);
7497 if (out6) *out6 = newTemp(Ity_I64);
7498 if (out5) *out5 = newTemp(Ity_I64);
7499 if (out4) *out4 = newTemp(Ity_I64);
7500 if (out3) *out3 = newTemp(Ity_I64);
7501 if (out2) *out2 = newTemp(Ity_I64);
7502 if (out1) *out1 = newTemp(Ity_I64);
7503 if (out0) *out0 = newTemp(Ity_I64);
7504 IRTemp hi64 = newTemp(Ity_I64);
7505 IRTemp lo64 = newTemp(Ity_I64);
7506 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
7507 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) );
7508 if (out7)
7509 assign(*out7, binop(Iop_And64,
7510 binop(Iop_Shr64, mkexpr(hi64), mkU8(48)),
7511 mkU64(0xFFFF)));
7512 if (out6)
7513 assign(*out6, binop(Iop_And64,
7514 binop(Iop_Shr64, mkexpr(hi64), mkU8(32)),
7515 mkU64(0xFFFF)));
7516 if (out5)
7517 assign(*out5, binop(Iop_And64,
7518 binop(Iop_Shr64, mkexpr(hi64), mkU8(16)),
7519 mkU64(0xFFFF)));
7520 if (out4)
7521 assign(*out4, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFF)));
7522 if (out3)
7523 assign(*out3, binop(Iop_And64,
7524 binop(Iop_Shr64, mkexpr(lo64), mkU8(48)),
7525 mkU64(0xFFFF)));
7526 if (out2)
7527 assign(*out2, binop(Iop_And64,
7528 binop(Iop_Shr64, mkexpr(lo64), mkU8(32)),
7529 mkU64(0xFFFF)));
7530 if (out1)
7531 assign(*out1, binop(Iop_And64,
7532 binop(Iop_Shr64, mkexpr(lo64), mkU8(16)),
7533 mkU64(0xFFFF)));
7534 if (out0)
7535 assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFF)));
7536}
7537
7538static IRTemp mkV128from16s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
7539 IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
7540{
7541 IRTemp hi64 = newTemp(Ity_I64);
7542 IRTemp lo64 = newTemp(Ity_I64);
7543 assign(hi64,
7544 binop(Iop_Or64,
7545 binop(Iop_Or64,
7546 binop(Iop_Shl64,
7547 binop(Iop_And64, mkexpr(in7), mkU64(0xFFFF)),
7548 mkU8(48)),
7549 binop(Iop_Shl64,
7550 binop(Iop_And64, mkexpr(in6), mkU64(0xFFFF)),
7551 mkU8(32))),
7552 binop(Iop_Or64,
7553 binop(Iop_Shl64,
7554 binop(Iop_And64, mkexpr(in5), mkU64(0xFFFF)),
7555 mkU8(16)),
7556 binop(Iop_And64,
7557 mkexpr(in4), mkU64(0xFFFF)))));
7558 assign(lo64,
7559 binop(Iop_Or64,
7560 binop(Iop_Or64,
7561 binop(Iop_Shl64,
7562 binop(Iop_And64, mkexpr(in3), mkU64(0xFFFF)),
7563 mkU8(48)),
7564 binop(Iop_Shl64,
7565 binop(Iop_And64, mkexpr(in2), mkU64(0xFFFF)),
7566 mkU8(32))),
7567 binop(Iop_Or64,
7568 binop(Iop_Shl64,
7569 binop(Iop_And64, mkexpr(in1), mkU64(0xFFFF)),
7570 mkU8(16)),
7571 binop(Iop_And64,
7572 mkexpr(in0), mkU64(0xFFFF)))));
7573 IRTemp res = newTemp(Ity_V128);
7574 assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64)));
7575 return res;
7576}
7577
7578static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 )
7579{
7580 // returns a6 a4 a2 a0 b6 b4 b2 b0
7581 IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
7582 breakV128to16s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
7583 breakV128to16s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
7584 return mkexpr(mkV128from16s(a6, a4, a2, a0, b6, b4, b2, b0));
7585}
7586
7587static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 )
7588{
7589 // returns a7 a5 a3 a1 b7 b5 b3 b1
7590 IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
7591 breakV128to16s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
7592 breakV128to16s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
7593 return mkexpr(mkV128from16s(a7, a5, a3, a1, b7, b5, b3, b1));
7594}
7595
sewardje520bb32014-02-17 11:00:53 +00007596static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 )
7597{
7598 // returns a3 b3 a2 b2 a1 b1 a0 b0
7599 IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
7600 breakV128to16s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
7601 breakV128to16s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
7602 return mkexpr(mkV128from16s(a3, b3, a2, b2, a1, b1, a0, b0));
7603}
7604
7605static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 )
7606{
7607 // returns a7 b7 a6 b6 a5 b5 a4 b4
7608 IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
7609 breakV128to16s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
7610 breakV128to16s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
7611 return mkexpr(mkV128from16s(a7, b7, a6, b6, a5, b5, a4, b4));
7612}
7613
sewardjfab09142014-02-10 10:28:13 +00007614////////////////////////////////////////////////////////////////
7615// 8x16 operations
7616//
7617
7618static void breakV128to8s ( IRTemp* outF, IRTemp* outE, IRTemp* outD,
7619 IRTemp* outC, IRTemp* outB, IRTemp* outA,
7620 IRTemp* out9, IRTemp* out8,
7621 IRTemp* out7, IRTemp* out6, IRTemp* out5,
7622 IRTemp* out4, IRTemp* out3, IRTemp* out2,
7623 IRTemp* out1,IRTemp* out0, IRTemp v128 )
7624{
7625 if (outF) *outF = newTemp(Ity_I64);
7626 if (outE) *outE = newTemp(Ity_I64);
7627 if (outD) *outD = newTemp(Ity_I64);
7628 if (outC) *outC = newTemp(Ity_I64);
7629 if (outB) *outB = newTemp(Ity_I64);
7630 if (outA) *outA = newTemp(Ity_I64);
7631 if (out9) *out9 = newTemp(Ity_I64);
7632 if (out8) *out8 = newTemp(Ity_I64);
7633 if (out7) *out7 = newTemp(Ity_I64);
7634 if (out6) *out6 = newTemp(Ity_I64);
7635 if (out5) *out5 = newTemp(Ity_I64);
7636 if (out4) *out4 = newTemp(Ity_I64);
7637 if (out3) *out3 = newTemp(Ity_I64);
7638 if (out2) *out2 = newTemp(Ity_I64);
7639 if (out1) *out1 = newTemp(Ity_I64);
7640 if (out0) *out0 = newTemp(Ity_I64);
7641 IRTemp hi64 = newTemp(Ity_I64);
7642 IRTemp lo64 = newTemp(Ity_I64);
7643 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
7644 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) );
7645 if (outF)
7646 assign(*outF, binop(Iop_And64,
7647 binop(Iop_Shr64, mkexpr(hi64), mkU8(56)),
7648 mkU64(0xFF)));
7649 if (outE)
7650 assign(*outE, binop(Iop_And64,
7651 binop(Iop_Shr64, mkexpr(hi64), mkU8(48)),
7652 mkU64(0xFF)));
7653 if (outD)
7654 assign(*outD, binop(Iop_And64,
7655 binop(Iop_Shr64, mkexpr(hi64), mkU8(40)),
7656 mkU64(0xFF)));
7657 if (outC)
7658 assign(*outC, binop(Iop_And64,
7659 binop(Iop_Shr64, mkexpr(hi64), mkU8(32)),
7660 mkU64(0xFF)));
7661 if (outB)
7662 assign(*outB, binop(Iop_And64,
7663 binop(Iop_Shr64, mkexpr(hi64), mkU8(24)),
7664 mkU64(0xFF)));
7665 if (outA)
7666 assign(*outA, binop(Iop_And64,
7667 binop(Iop_Shr64, mkexpr(hi64), mkU8(16)),
7668 mkU64(0xFF)));
7669 if (out9)
7670 assign(*out9, binop(Iop_And64,
7671 binop(Iop_Shr64, mkexpr(hi64), mkU8(8)),
7672 mkU64(0xFF)));
7673 if (out8)
7674 assign(*out8, binop(Iop_And64,
7675 binop(Iop_Shr64, mkexpr(hi64), mkU8(0)),
7676 mkU64(0xFF)));
7677 if (out7)
7678 assign(*out7, binop(Iop_And64,
7679 binop(Iop_Shr64, mkexpr(lo64), mkU8(56)),
7680 mkU64(0xFF)));
7681 if (out6)
7682 assign(*out6, binop(Iop_And64,
7683 binop(Iop_Shr64, mkexpr(lo64), mkU8(48)),
7684 mkU64(0xFF)));
7685 if (out5)
7686 assign(*out5, binop(Iop_And64,
7687 binop(Iop_Shr64, mkexpr(lo64), mkU8(40)),
7688 mkU64(0xFF)));
7689 if (out4)
7690 assign(*out4, binop(Iop_And64,
7691 binop(Iop_Shr64, mkexpr(lo64), mkU8(32)),
7692 mkU64(0xFF)));
7693 if (out3)
7694 assign(*out3, binop(Iop_And64,
7695 binop(Iop_Shr64, mkexpr(lo64), mkU8(24)),
7696 mkU64(0xFF)));
7697 if (out2)
7698 assign(*out2, binop(Iop_And64,
7699 binop(Iop_Shr64, mkexpr(lo64), mkU8(16)),
7700 mkU64(0xFF)));
7701 if (out1)
7702 assign(*out1, binop(Iop_And64,
7703 binop(Iop_Shr64, mkexpr(lo64), mkU8(8)),
7704 mkU64(0xFF)));
7705 if (out0)
7706 assign(*out0, binop(Iop_And64,
7707 binop(Iop_Shr64, mkexpr(lo64), mkU8(0)),
7708 mkU64(0xFF)));
7709}
7710
7711static IRTemp mkV128from8s ( IRTemp inF, IRTemp inE, IRTemp inD, IRTemp inC,
7712 IRTemp inB, IRTemp inA, IRTemp in9, IRTemp in8,
7713 IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
7714 IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
7715{
7716 IRTemp vFE = newTemp(Ity_I64);
7717 IRTemp vDC = newTemp(Ity_I64);
7718 IRTemp vBA = newTemp(Ity_I64);
7719 IRTemp v98 = newTemp(Ity_I64);
7720 IRTemp v76 = newTemp(Ity_I64);
7721 IRTemp v54 = newTemp(Ity_I64);
7722 IRTemp v32 = newTemp(Ity_I64);
7723 IRTemp v10 = newTemp(Ity_I64);
7724 assign(vFE, binop(Iop_Or64,
7725 binop(Iop_Shl64,
7726 binop(Iop_And64, mkexpr(inF), mkU64(0xFF)), mkU8(8)),
7727 binop(Iop_And64, mkexpr(inE), mkU64(0xFF))));
7728 assign(vDC, binop(Iop_Or64,
7729 binop(Iop_Shl64,
7730 binop(Iop_And64, mkexpr(inD), mkU64(0xFF)), mkU8(8)),
7731 binop(Iop_And64, mkexpr(inC), mkU64(0xFF))));
7732 assign(vBA, binop(Iop_Or64,
7733 binop(Iop_Shl64,
7734 binop(Iop_And64, mkexpr(inB), mkU64(0xFF)), mkU8(8)),
7735 binop(Iop_And64, mkexpr(inA), mkU64(0xFF))));
7736 assign(v98, binop(Iop_Or64,
7737 binop(Iop_Shl64,
7738 binop(Iop_And64, mkexpr(in9), mkU64(0xFF)), mkU8(8)),
7739 binop(Iop_And64, mkexpr(in8), mkU64(0xFF))));
7740 assign(v76, binop(Iop_Or64,
7741 binop(Iop_Shl64,
7742 binop(Iop_And64, mkexpr(in7), mkU64(0xFF)), mkU8(8)),
7743 binop(Iop_And64, mkexpr(in6), mkU64(0xFF))));
7744 assign(v54, binop(Iop_Or64,
7745 binop(Iop_Shl64,
7746 binop(Iop_And64, mkexpr(in5), mkU64(0xFF)), mkU8(8)),
7747 binop(Iop_And64, mkexpr(in4), mkU64(0xFF))));
7748 assign(v32, binop(Iop_Or64,
7749 binop(Iop_Shl64,
7750 binop(Iop_And64, mkexpr(in3), mkU64(0xFF)), mkU8(8)),
7751 binop(Iop_And64, mkexpr(in2), mkU64(0xFF))));
7752 assign(v10, binop(Iop_Or64,
7753 binop(Iop_Shl64,
7754 binop(Iop_And64, mkexpr(in1), mkU64(0xFF)), mkU8(8)),
7755 binop(Iop_And64, mkexpr(in0), mkU64(0xFF))));
7756 return mkV128from16s(vFE, vDC, vBA, v98, v76, v54, v32, v10);
7757}
7758
7759static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
7760 IRTemp bFEDCBA9876543210 )
7761{
7762 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
7763 IRTemp aE, aC, aA, a8, a6, a4, a2, a0, bE, bC, bA, b8, b6, b4, b2, b0;
7764 breakV128to8s(NULL, &aE, NULL, &aC, NULL, &aA, NULL, &a8,
7765 NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0,
7766 aFEDCBA9876543210);
7767 breakV128to8s(NULL, &bE, NULL, &bC, NULL, &bA, NULL, &b8,
7768 NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0,
7769 bFEDCBA9876543210);
7770 return mkexpr(mkV128from8s(aE, aC, aA, a8, a6, a4, a2, a0,
7771 bE, bC, bA, b8, b6, b4, b2, b0));
7772}
7773
7774static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
7775 IRTemp bFEDCBA9876543210 )
7776{
7777 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
7778 IRTemp aF, aD, aB, a9, a7, a5, a3, a1, bF, bD, bB, b9, b7, b5, b3, b1;
7779 breakV128to8s(&aF, NULL, &aD, NULL, &aB, NULL, &a9, NULL,
7780 &a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL,
7781 aFEDCBA9876543210);
7782
7783 breakV128to8s(&bF, NULL, &bD, NULL, &bB, NULL, &b9, NULL,
7784 &b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL,
7785 aFEDCBA9876543210);
7786
7787 return mkexpr(mkV128from8s(aF, aD, aB, a9, a7, a5, a3, a1,
7788 bF, bD, bB, b9, b7, b5, b3, b1));
7789}
7790
sewardje520bb32014-02-17 11:00:53 +00007791static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
7792 IRTemp bFEDCBA9876543210 )
7793{
7794 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
7795 IRTemp a7, b7, a6, b6, a5, b5, a4, b4, a3, b3, a2, b2, a1, b1, a0, b0;
7796 breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
7797 &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0,
7798 aFEDCBA9876543210);
7799 breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
7800 &b7, &b6, &b5, &b4, &b3, &b2, &b1, &b0,
7801 bFEDCBA9876543210);
7802 return mkexpr(mkV128from8s(a7, b7, a6, b6, a5, b5, a4, b4,
7803 a3, b3, a2, b2, a1, b1, a0, b0));
7804}
7805
7806static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
7807 IRTemp bFEDCBA9876543210 )
7808{
7809 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
7810 IRTemp aF, bF, aE, bE, aD, bD, aC, bC, aB, bB, aA, bA, a9, b9, a8, b8;
7811 breakV128to8s(&aF, &aE, &aD, &aC, &aB, &aA, &a9, &a8,
7812 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
7813 aFEDCBA9876543210);
7814 breakV128to8s(&bF, &bE, &bD, &bC, &bB, &bA, &b9, &b8,
7815 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
7816 bFEDCBA9876543210);
7817 return mkexpr(mkV128from8s(aF, bF, aE, bE, aD, bD, aC, bC,
7818 aB, bB, aA, bA, a9, b9, a8, b8));
7819}
sewardjecde6972014-02-05 11:01:19 +00007820
sewardjbbcf1882014-01-12 12:49:10 +00007821/*--------------------------------------------------------------------*/
7822/*--- end guest_arm64_toIR.c ---*/
7823/*--------------------------------------------------------------------*/