blob: 2eb41d6512711859a5362e62fdf7743db73f8bcd [file] [log] [blame]
sewardjbbcf1882014-01-12 12:49:10 +00001/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- begin guest_arm64_toIR.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2013-2013 OpenWorks
12 info@open-works.net
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
27 02110-1301, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30*/
31
32//ZZ /* XXXX thumb to check:
33//ZZ that all cases where putIRegT writes r15, we generate a jump.
34//ZZ
35//ZZ All uses of newTemp assign to an IRTemp and not a UInt
36//ZZ
37//ZZ For all thumb loads and stores, including VFP ones, new-ITSTATE is
38//ZZ backed out before the memory op, and restored afterwards. This
39//ZZ needs to happen even after we go uncond. (and for sure it doesn't
40//ZZ happen for VFP loads/stores right now).
41//ZZ
42//ZZ VFP on thumb: check that we exclude all r13/r15 cases that we
43//ZZ should.
44//ZZ
45//ZZ XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
46//ZZ taking into account the number of insns guarded by an IT.
47//ZZ
48//ZZ remove the nasty hack, in the spechelper, of looking for Or32(...,
49//ZZ 0xE0) in as the first arg to armg_calculate_condition, and instead
50//ZZ use Slice44 as specified in comments in the spechelper.
51//ZZ
52//ZZ add specialisations for armg_calculate_flag_c and _v, as they
53//ZZ are moderately often needed in Thumb code.
54//ZZ
55//ZZ Correctness: ITSTATE handling in Thumb SVCs is wrong.
56//ZZ
57//ZZ Correctness (obscure): in m_transtab, when invalidating code
58//ZZ address ranges, invalidate up to 18 bytes after the end of the
59//ZZ range. This is because the ITSTATE optimisation at the top of
60//ZZ _THUMB_WRK below analyses up to 18 bytes before the start of any
61//ZZ given instruction, and so might depend on the invalidated area.
62//ZZ */
63//ZZ
64//ZZ /* Limitations, etc
65//ZZ
66//ZZ - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
67//ZZ These instructions are non-restartable in the case where the
68//ZZ transfer(s) fault.
69//ZZ
70//ZZ - SWP: the restart jump back is Ijk_Boring; it should be
71//ZZ Ijk_NoRedir but that's expensive. See comments on casLE() in
72//ZZ guest_x86_toIR.c.
73//ZZ */
74
75/* "Special" instructions.
76
77 This instruction decoder can decode four special instructions
78 which mean nothing natively (are no-ops as far as regs/mem are
79 concerned) but have meaning for supporting Valgrind. A special
80 instruction is flagged by a 16-byte preamble:
81
82 93CC0D8C 93CC358C 93CCCD8C 93CCF58C
83 (ror x12, x12, #3; ror x12, x12, #13
84 ror x12, x12, #51; ror x12, x12, #61)
85
86 Following that, one of the following 3 are allowed
87 (standard interpretation in parentheses):
88
89 AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
90 AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
91 AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
92 AA090129 (orr x9,x9,x9) IR injection
93
94 Any other bytes following the 16-byte preamble are illegal and
95 constitute a failure in instruction decoding. This all assumes
96 that the preamble will never occur except in specific code
97 fragments designed for Valgrind to catch.
98*/
99
100/* Translates ARM64 code to IR. */
101
102#include "libvex_basictypes.h"
103#include "libvex_ir.h"
104#include "libvex.h"
105#include "libvex_guest_arm64.h"
106
107#include "main_util.h"
108#include "main_globals.h"
109#include "guest_generic_bb_to_IR.h"
110#include "guest_arm64_defs.h"
111
112
113/*------------------------------------------------------------*/
114/*--- Globals ---*/
115/*------------------------------------------------------------*/
116
117/* These are set at the start of the translation of a instruction, so
118 that we don't have to pass them around endlessly. CONST means does
119 not change during translation of the instruction.
120*/
121
122/* CONST: is the host bigendian? We need to know this in order to do
123 sub-register accesses to the SIMD/FP registers correctly. */
124static Bool host_is_bigendian;
125
126/* CONST: The guest address for the instruction currently being
127 translated. */
128static Addr64 guest_PC_curr_instr;
129
130/* MOD: The IRSB* into which we're generating code. */
131static IRSB* irsb;
132
133
134/*------------------------------------------------------------*/
135/*--- Debugging output ---*/
136/*------------------------------------------------------------*/
137
138#define DIP(format, args...) \
139 if (vex_traceflags & VEX_TRACE_FE) \
140 vex_printf(format, ## args)
141
142#define DIS(buf, format, args...) \
143 if (vex_traceflags & VEX_TRACE_FE) \
144 vex_sprintf(buf, format, ## args)
145
146
147/*------------------------------------------------------------*/
148/*--- Helper bits and pieces for deconstructing the ---*/
149/*--- arm insn stream. ---*/
150/*------------------------------------------------------------*/
151
152/* Do a little-endian load of a 32-bit word, regardless of the
153 endianness of the underlying host. */
154static inline UInt getUIntLittleEndianly ( UChar* p )
155{
156 UInt w = 0;
157 w = (w << 8) | p[3];
158 w = (w << 8) | p[2];
159 w = (w << 8) | p[1];
160 w = (w << 8) | p[0];
161 return w;
162}
163
164/* Sign extend a N-bit value up to 64 bits, by copying
165 bit N-1 into all higher positions. */
166static ULong sx_to_64 ( ULong x, UInt n )
167{
168 vassert(n > 1 && n < 64);
169 Long r = (Long)x;
170 r = (r << (64-n)) >> (64-n);
171 return (ULong)r;
172}
173
174//ZZ /* Do a little-endian load of a 16-bit word, regardless of the
175//ZZ endianness of the underlying host. */
176//ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
177//ZZ {
178//ZZ UShort w = 0;
179//ZZ w = (w << 8) | p[1];
180//ZZ w = (w << 8) | p[0];
181//ZZ return w;
182//ZZ }
183//ZZ
184//ZZ static UInt ROR32 ( UInt x, UInt sh ) {
185//ZZ vassert(sh >= 0 && sh < 32);
186//ZZ if (sh == 0)
187//ZZ return x;
188//ZZ else
189//ZZ return (x << (32-sh)) | (x >> sh);
190//ZZ }
191//ZZ
192//ZZ static Int popcount32 ( UInt x )
193//ZZ {
194//ZZ Int res = 0, i;
195//ZZ for (i = 0; i < 32; i++) {
196//ZZ res += (x & 1);
197//ZZ x >>= 1;
198//ZZ }
199//ZZ return res;
200//ZZ }
201//ZZ
202//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
203//ZZ {
204//ZZ UInt mask = 1 << ix;
205//ZZ x &= ~mask;
206//ZZ x |= ((b << ix) & mask);
207//ZZ return x;
208//ZZ }
209
210#define BITS2(_b1,_b0) \
211 (((_b1) << 1) | (_b0))
212
213#define BITS3(_b2,_b1,_b0) \
214 (((_b2) << 2) | ((_b1) << 1) | (_b0))
215
216#define BITS4(_b3,_b2,_b1,_b0) \
217 (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
218
219#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
220 ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
221 | BITS4((_b3),(_b2),(_b1),(_b0)))
222
223#define BITS5(_b4,_b3,_b2,_b1,_b0) \
224 (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
225#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
226 (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
227#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
228 (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
229
230#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
231 (((_b8) << 8) \
232 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
233
234#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
235 (((_b9) << 9) | ((_b8) << 8) \
236 | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
237
238#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
239 (((_b10) << 10) \
240 | BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
241
sewardjdc9259c2014-02-27 11:10:19 +0000242#define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
243 (((_b11) << 11) \
244 | BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
245
sewardjbbcf1882014-01-12 12:49:10 +0000246// produces _uint[_bMax:_bMin]
247#define SLICE_UInt(_uint,_bMax,_bMin) \
248 (( ((UInt)(_uint)) >> (_bMin)) \
249 & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
250
251
252/*------------------------------------------------------------*/
253/*--- Helper bits and pieces for creating IR fragments. ---*/
254/*------------------------------------------------------------*/
255
256static IRExpr* mkV128 ( UShort w )
257{
258 return IRExpr_Const(IRConst_V128(w));
259}
260
261static IRExpr* mkU64 ( ULong i )
262{
263 return IRExpr_Const(IRConst_U64(i));
264}
265
266static IRExpr* mkU32 ( UInt i )
267{
268 return IRExpr_Const(IRConst_U32(i));
269}
270
271static IRExpr* mkU8 ( UInt i )
272{
273 vassert(i < 256);
274 return IRExpr_Const(IRConst_U8( (UChar)i ));
275}
276
277static IRExpr* mkexpr ( IRTemp tmp )
278{
279 return IRExpr_RdTmp(tmp);
280}
281
282static IRExpr* unop ( IROp op, IRExpr* a )
283{
284 return IRExpr_Unop(op, a);
285}
286
287static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
288{
289 return IRExpr_Binop(op, a1, a2);
290}
291
292static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
293{
294 return IRExpr_Triop(op, a1, a2, a3);
295}
296
297static IRExpr* loadLE ( IRType ty, IRExpr* addr )
298{
299 return IRExpr_Load(Iend_LE, ty, addr);
300}
301
302/* Add a statement to the list held by "irbb". */
303static void stmt ( IRStmt* st )
304{
305 addStmtToIRSB( irsb, st );
306}
307
308static void assign ( IRTemp dst, IRExpr* e )
309{
310 stmt( IRStmt_WrTmp(dst, e) );
311}
312
313static void storeLE ( IRExpr* addr, IRExpr* data )
314{
315 stmt( IRStmt_Store(Iend_LE, addr, data) );
316}
317
318//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
319//ZZ {
320//ZZ if (guardT == IRTemp_INVALID) {
321//ZZ /* unconditional */
322//ZZ storeLE(addr, data);
323//ZZ } else {
324//ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
325//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
326//ZZ }
327//ZZ }
328//ZZ
329//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
330//ZZ IRExpr* addr, IRExpr* alt,
331//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
332//ZZ {
333//ZZ if (guardT == IRTemp_INVALID) {
334//ZZ /* unconditional */
335//ZZ IRExpr* loaded = NULL;
336//ZZ switch (cvt) {
337//ZZ case ILGop_Ident32:
338//ZZ loaded = loadLE(Ity_I32, addr); break;
339//ZZ case ILGop_8Uto32:
340//ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
341//ZZ case ILGop_8Sto32:
342//ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
343//ZZ case ILGop_16Uto32:
344//ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
345//ZZ case ILGop_16Sto32:
346//ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
347//ZZ default:
348//ZZ vassert(0);
349//ZZ }
350//ZZ vassert(loaded != NULL);
351//ZZ assign(dst, loaded);
352//ZZ } else {
353//ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
354//ZZ loaded data before putting the data in 'dst'. If the load
355//ZZ does not take place, 'alt' is placed directly in 'dst'. */
356//ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
357//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
358//ZZ }
359//ZZ }
360
361/* Generate a new temporary of the given type. */
362static IRTemp newTemp ( IRType ty )
363{
364 vassert(isPlausibleIRType(ty));
365 return newIRTemp( irsb->tyenv, ty );
366}
367
368//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
369//ZZ IRRoundingMode. */
370//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
371//ZZ {
372//ZZ return mkU32(Irrm_NEAREST);
373//ZZ }
374//ZZ
375//ZZ /* Generate an expression for SRC rotated right by ROT. */
376//ZZ static IRExpr* genROR32( IRTemp src, Int rot )
377//ZZ {
378//ZZ vassert(rot >= 0 && rot < 32);
379//ZZ if (rot == 0)
380//ZZ return mkexpr(src);
381//ZZ return
382//ZZ binop(Iop_Or32,
383//ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
384//ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
385//ZZ }
386//ZZ
387//ZZ static IRExpr* mkU128 ( ULong i )
388//ZZ {
389//ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
390//ZZ }
391//ZZ
392//ZZ /* Generate a 4-aligned version of the given expression if
393//ZZ the given condition is true. Else return it unchanged. */
394//ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
395//ZZ {
396//ZZ if (b)
397//ZZ return binop(Iop_And32, e, mkU32(~3));
398//ZZ else
399//ZZ return e;
400//ZZ }
401
402/* Other IR construction helpers. */
403static IROp mkAND ( IRType ty ) {
404 switch (ty) {
405 case Ity_I32: return Iop_And32;
406 case Ity_I64: return Iop_And64;
407 default: vpanic("mkAND");
408 }
409}
410
411static IROp mkOR ( IRType ty ) {
412 switch (ty) {
413 case Ity_I32: return Iop_Or32;
414 case Ity_I64: return Iop_Or64;
415 default: vpanic("mkOR");
416 }
417}
418
419static IROp mkXOR ( IRType ty ) {
420 switch (ty) {
421 case Ity_I32: return Iop_Xor32;
422 case Ity_I64: return Iop_Xor64;
423 default: vpanic("mkXOR");
424 }
425}
426
427static IROp mkSHL ( IRType ty ) {
428 switch (ty) {
429 case Ity_I32: return Iop_Shl32;
430 case Ity_I64: return Iop_Shl64;
431 default: vpanic("mkSHL");
432 }
433}
434
435static IROp mkSHR ( IRType ty ) {
436 switch (ty) {
437 case Ity_I32: return Iop_Shr32;
438 case Ity_I64: return Iop_Shr64;
439 default: vpanic("mkSHR");
440 }
441}
442
443static IROp mkSAR ( IRType ty ) {
444 switch (ty) {
445 case Ity_I32: return Iop_Sar32;
446 case Ity_I64: return Iop_Sar64;
447 default: vpanic("mkSAR");
448 }
449}
450
451static IROp mkNOT ( IRType ty ) {
452 switch (ty) {
453 case Ity_I32: return Iop_Not32;
454 case Ity_I64: return Iop_Not64;
455 default: vpanic("mkNOT");
456 }
457}
458
459static IROp mkADD ( IRType ty ) {
460 switch (ty) {
461 case Ity_I32: return Iop_Add32;
462 case Ity_I64: return Iop_Add64;
463 default: vpanic("mkADD");
464 }
465}
466
467static IROp mkSUB ( IRType ty ) {
468 switch (ty) {
469 case Ity_I32: return Iop_Sub32;
470 case Ity_I64: return Iop_Sub64;
471 default: vpanic("mkSUB");
472 }
473}
474
475static IROp mkADDF ( IRType ty ) {
476 switch (ty) {
477 case Ity_F32: return Iop_AddF32;
478 case Ity_F64: return Iop_AddF64;
479 default: vpanic("mkADDF");
480 }
481}
482
483static IROp mkSUBF ( IRType ty ) {
484 switch (ty) {
485 case Ity_F32: return Iop_SubF32;
486 case Ity_F64: return Iop_SubF64;
487 default: vpanic("mkSUBF");
488 }
489}
490
491static IROp mkMULF ( IRType ty ) {
492 switch (ty) {
493 case Ity_F32: return Iop_MulF32;
494 case Ity_F64: return Iop_MulF64;
495 default: vpanic("mkMULF");
496 }
497}
498
499static IROp mkDIVF ( IRType ty ) {
500 switch (ty) {
501 case Ity_F32: return Iop_DivF32;
502 case Ity_F64: return Iop_DivF64;
503 default: vpanic("mkMULF");
504 }
505}
506
507static IROp mkNEGF ( IRType ty ) {
508 switch (ty) {
509 case Ity_F32: return Iop_NegF32;
510 case Ity_F64: return Iop_NegF64;
511 default: vpanic("mkNEGF");
512 }
513}
514
515static IROp mkABSF ( IRType ty ) {
516 switch (ty) {
517 case Ity_F32: return Iop_AbsF32;
518 case Ity_F64: return Iop_AbsF64;
519 default: vpanic("mkNEGF");
520 }
521}
522
523static IROp mkSQRTF ( IRType ty ) {
524 switch (ty) {
525 case Ity_F32: return Iop_SqrtF32;
526 case Ity_F64: return Iop_SqrtF64;
527 default: vpanic("mkNEGF");
528 }
529}
530
531static IRExpr* mkU ( IRType ty, ULong imm ) {
532 switch (ty) {
533 case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
534 case Ity_I64: return mkU64(imm);
535 default: vpanic("mkU");
536 }
537}
538
539/* Generate IR to create 'arg rotated right by imm', for sane values
540 of 'ty' and 'imm'. */
541static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
542{
543 UInt w = 0;
544 if (ty == Ity_I64) {
545 w = 64;
546 } else {
547 vassert(ty == Ity_I32);
548 w = 32;
549 }
550 vassert(w != 0);
551 vassert(imm < w);
552 if (imm == 0) {
553 return arg;
554 }
555 IRTemp res = newTemp(ty);
556 assign(res, binop(mkOR(ty),
557 binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
558 binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
559 return res;
560}
561
562/* Generate IR to set the returned temp to either all-zeroes or
563 all ones, as a copy of arg<imm>. */
564static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
565{
566 UInt w = 0;
567 if (ty == Ity_I64) {
568 w = 64;
569 } else {
570 vassert(ty == Ity_I32);
571 w = 32;
572 }
573 vassert(w != 0);
574 vassert(imm < w);
575 IRTemp res = newTemp(ty);
576 assign(res, binop(mkSAR(ty),
577 binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
578 mkU8(w - 1)));
579 return res;
580}
581
sewardj7d009132014-02-20 17:43:38 +0000582/* U-widen 8/16/32/64 bit int expr to 64. */
583static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
584{
585 switch (srcTy) {
586 case Ity_I64: return e;
587 case Ity_I32: return unop(Iop_32Uto64, e);
588 case Ity_I16: return unop(Iop_16Uto64, e);
589 case Ity_I8: return unop(Iop_8Uto64, e);
590 default: vpanic("widenUto64(arm64)");
591 }
592}
593
594/* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
595 of these combinations make sense. */
596static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
597{
598 switch (dstTy) {
599 case Ity_I64: return e;
600 case Ity_I32: return unop(Iop_64to32, e);
601 case Ity_I16: return unop(Iop_64to16, e);
602 case Ity_I8: return unop(Iop_64to8, e);
603 default: vpanic("narrowFrom64(arm64)");
604 }
605}
606
sewardjbbcf1882014-01-12 12:49:10 +0000607
608/*------------------------------------------------------------*/
609/*--- Helpers for accessing guest registers. ---*/
610/*------------------------------------------------------------*/
611
612#define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
613#define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
614#define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
615#define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
616#define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
617#define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
618#define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
619#define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
620#define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
621#define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
622#define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
623#define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
624#define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
625#define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
626#define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
627#define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
628#define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
629#define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
630#define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
631#define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
632#define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
633#define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
634#define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
635#define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
636#define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
637#define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
638#define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
639#define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
640#define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
641#define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
642#define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
643
sewardj60687882014-01-15 10:25:21 +0000644#define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
sewardjbbcf1882014-01-12 12:49:10 +0000645#define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
646
647#define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
648#define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
649#define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
650#define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
651
652#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
653#define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
654
655#define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
656#define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
657#define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
658#define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
659#define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
660#define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
661#define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
662#define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
663#define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
664#define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
665#define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
666#define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
667#define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
668#define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
669#define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
670#define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
671#define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
672#define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
673#define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
674#define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
675#define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
676#define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
677#define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
678#define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
679#define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
680#define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
681#define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
682#define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
683#define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
684#define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
685#define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
686#define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
687
688#define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
689#define OFFB_FPSR offsetof(VexGuestARM64State,guest_FPSR)
690//ZZ #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
691//ZZ #define OFFB_ITSTATE offsetof(VexGuestARMState,guest_ITSTATE)
692//ZZ #define OFFB_QFLAG32 offsetof(VexGuestARMState,guest_QFLAG32)
693//ZZ #define OFFB_GEFLAG0 offsetof(VexGuestARMState,guest_GEFLAG0)
694//ZZ #define OFFB_GEFLAG1 offsetof(VexGuestARMState,guest_GEFLAG1)
695//ZZ #define OFFB_GEFLAG2 offsetof(VexGuestARMState,guest_GEFLAG2)
696//ZZ #define OFFB_GEFLAG3 offsetof(VexGuestARMState,guest_GEFLAG3)
697
698#define OFFB_TISTART offsetof(VexGuestARM64State,guest_TISTART)
699#define OFFB_TILEN offsetof(VexGuestARM64State,guest_TILEN)
700
701
702/* ---------------- Integer registers ---------------- */
703
704static Int offsetIReg64 ( UInt iregNo )
705{
706 /* Do we care about endianness here? We do if sub-parts of integer
707 registers are accessed. */
708 switch (iregNo) {
709 case 0: return OFFB_X0;
710 case 1: return OFFB_X1;
711 case 2: return OFFB_X2;
712 case 3: return OFFB_X3;
713 case 4: return OFFB_X4;
714 case 5: return OFFB_X5;
715 case 6: return OFFB_X6;
716 case 7: return OFFB_X7;
717 case 8: return OFFB_X8;
718 case 9: return OFFB_X9;
719 case 10: return OFFB_X10;
720 case 11: return OFFB_X11;
721 case 12: return OFFB_X12;
722 case 13: return OFFB_X13;
723 case 14: return OFFB_X14;
724 case 15: return OFFB_X15;
725 case 16: return OFFB_X16;
726 case 17: return OFFB_X17;
727 case 18: return OFFB_X18;
728 case 19: return OFFB_X19;
729 case 20: return OFFB_X20;
730 case 21: return OFFB_X21;
731 case 22: return OFFB_X22;
732 case 23: return OFFB_X23;
733 case 24: return OFFB_X24;
734 case 25: return OFFB_X25;
735 case 26: return OFFB_X26;
736 case 27: return OFFB_X27;
737 case 28: return OFFB_X28;
738 case 29: return OFFB_X29;
739 case 30: return OFFB_X30;
740 /* but not 31 */
741 default: vassert(0);
742 }
743}
744
745static Int offsetIReg64orSP ( UInt iregNo )
746{
sewardj60687882014-01-15 10:25:21 +0000747 return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
sewardjbbcf1882014-01-12 12:49:10 +0000748}
749
750static const HChar* nameIReg64orZR ( UInt iregNo )
751{
752 vassert(iregNo < 32);
753 static const HChar* names[32]
754 = { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
755 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
756 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
757 "x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
758 return names[iregNo];
759}
760
761static const HChar* nameIReg64orSP ( UInt iregNo )
762{
763 if (iregNo == 31) {
764 return "sp";
765 }
766 vassert(iregNo < 31);
767 return nameIReg64orZR(iregNo);
768}
769
770static IRExpr* getIReg64orSP ( UInt iregNo )
771{
772 vassert(iregNo < 32);
773 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
774}
775
776static IRExpr* getIReg64orZR ( UInt iregNo )
777{
778 if (iregNo == 31) {
779 return mkU64(0);
780 }
781 vassert(iregNo < 31);
782 return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
783}
784
785static void putIReg64orSP ( UInt iregNo, IRExpr* e )
786{
787 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
788 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
789}
790
791static void putIReg64orZR ( UInt iregNo, IRExpr* e )
792{
793 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
794 if (iregNo == 31) {
795 return;
796 }
797 vassert(iregNo < 31);
798 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
799}
800
801static const HChar* nameIReg32orZR ( UInt iregNo )
802{
803 vassert(iregNo < 32);
804 static const HChar* names[32]
805 = { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
806 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
807 "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
808 "w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
809 return names[iregNo];
810}
811
812static const HChar* nameIReg32orSP ( UInt iregNo )
813{
814 if (iregNo == 31) {
815 return "wsp";
816 }
817 vassert(iregNo < 31);
818 return nameIReg32orZR(iregNo);
819}
820
821static IRExpr* getIReg32orSP ( UInt iregNo )
822{
823 vassert(iregNo < 32);
824 return unop(Iop_64to32,
825 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
826}
827
828static IRExpr* getIReg32orZR ( UInt iregNo )
829{
830 if (iregNo == 31) {
831 return mkU32(0);
832 }
833 vassert(iregNo < 31);
834 return unop(Iop_64to32,
835 IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
836}
837
838static void putIReg32orSP ( UInt iregNo, IRExpr* e )
839{
840 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
841 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
842}
843
844static void putIReg32orZR ( UInt iregNo, IRExpr* e )
845{
846 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
847 if (iregNo == 31) {
848 return;
849 }
850 vassert(iregNo < 31);
851 stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
852}
853
854static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
855{
856 vassert(is64 == True || is64 == False);
857 return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
858}
859
860static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
861{
862 vassert(is64 == True || is64 == False);
863 return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
864}
865
866static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
867{
868 vassert(is64 == True || is64 == False);
869 return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
870}
871
872static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
873{
874 vassert(is64 == True || is64 == False);
875 if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
876}
877
878static void putPC ( IRExpr* e )
879{
880 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
881 stmt( IRStmt_Put(OFFB_PC, e) );
882}
883
884
885/* ---------------- Vector (Q) registers ---------------- */
886
887static Int offsetQReg128 ( UInt qregNo )
888{
889 /* We don't care about endianness at this point. It only becomes
890 relevant when dealing with sections of these registers.*/
891 switch (qregNo) {
892 case 0: return OFFB_Q0;
893 case 1: return OFFB_Q1;
894 case 2: return OFFB_Q2;
895 case 3: return OFFB_Q3;
896 case 4: return OFFB_Q4;
897 case 5: return OFFB_Q5;
898 case 6: return OFFB_Q6;
899 case 7: return OFFB_Q7;
900 case 8: return OFFB_Q8;
901 case 9: return OFFB_Q9;
902 case 10: return OFFB_Q10;
903 case 11: return OFFB_Q11;
904 case 12: return OFFB_Q12;
905 case 13: return OFFB_Q13;
906 case 14: return OFFB_Q14;
907 case 15: return OFFB_Q15;
908 case 16: return OFFB_Q16;
909 case 17: return OFFB_Q17;
910 case 18: return OFFB_Q18;
911 case 19: return OFFB_Q19;
912 case 20: return OFFB_Q20;
913 case 21: return OFFB_Q21;
914 case 22: return OFFB_Q22;
915 case 23: return OFFB_Q23;
916 case 24: return OFFB_Q24;
917 case 25: return OFFB_Q25;
918 case 26: return OFFB_Q26;
919 case 27: return OFFB_Q27;
920 case 28: return OFFB_Q28;
921 case 29: return OFFB_Q29;
922 case 30: return OFFB_Q30;
923 case 31: return OFFB_Q31;
924 default: vassert(0);
925 }
926}
927
sewardjbbcf1882014-01-12 12:49:10 +0000928/* Write to a complete Qreg. */
929static void putQReg128 ( UInt qregNo, IRExpr* e )
930{
931 vassert(qregNo < 32);
932 vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
933 stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
934}
935
936/* Read a complete Qreg. */
937static IRExpr* getQReg128 ( UInt qregNo )
938{
939 vassert(qregNo < 32);
940 return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
941}
942
943/* Produce the IR type for some sub-part of a vector. For 32- and 64-
944 bit sub-parts we can choose either integer or float types, and
945 choose float on the basis that that is the common use case and so
946 will give least interference with Put-to-Get forwarding later
947 on. */
948static IRType preferredVectorSubTypeFromSize ( UInt szB )
949{
950 switch (szB) {
951 case 1: return Ity_I8;
952 case 2: return Ity_I16;
953 case 4: return Ity_I32; //Ity_F32;
954 case 8: return Ity_F64;
955 case 16: return Ity_V128;
956 default: vassert(0);
957 }
958}
959
sewardj606c4ba2014-01-26 19:11:14 +0000960/* Find the offset of the laneNo'th lane of type laneTy in the given
961 Qreg. Since the host is little-endian, the least significant lane
962 has the lowest offset. */
963static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
sewardjbbcf1882014-01-12 12:49:10 +0000964{
965 vassert(!host_is_bigendian);
966 Int base = offsetQReg128(qregNo);
sewardj606c4ba2014-01-26 19:11:14 +0000967 /* Since the host is little-endian, the least significant lane
968 will be at the lowest address. */
969 /* Restrict this to known types, so as to avoid silently accepting
970 stupid types. */
971 UInt laneSzB = 0;
972 switch (laneTy) {
sewardj5860ec72014-03-01 11:19:45 +0000973 case Ity_I8: laneSzB = 1; break;
974 case Ity_I16: laneSzB = 2; break;
sewardj606c4ba2014-01-26 19:11:14 +0000975 case Ity_F32: case Ity_I32: laneSzB = 4; break;
976 case Ity_F64: case Ity_I64: laneSzB = 8; break;
977 case Ity_V128: laneSzB = 16; break;
978 default: break;
sewardjbbcf1882014-01-12 12:49:10 +0000979 }
sewardj606c4ba2014-01-26 19:11:14 +0000980 vassert(laneSzB > 0);
981 UInt minOff = laneNo * laneSzB;
982 UInt maxOff = minOff + laneSzB - 1;
983 vassert(maxOff < 16);
984 return base + minOff;
sewardjbbcf1882014-01-12 12:49:10 +0000985}
986
sewardj606c4ba2014-01-26 19:11:14 +0000987/* Put to the least significant lane of a Qreg. */
988static void putQRegLO ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +0000989{
990 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +0000991 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +0000992 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +0000993 case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
994 case Ity_F32: case Ity_F64: case Ity_V128:
995 break;
996 default:
997 vassert(0); // Other cases are probably invalid
sewardjbbcf1882014-01-12 12:49:10 +0000998 }
999 stmt(IRStmt_Put(off, e));
1000}
1001
sewardj606c4ba2014-01-26 19:11:14 +00001002/* Get from the least significant lane of a Qreg. */
1003static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
sewardjbbcf1882014-01-12 12:49:10 +00001004{
sewardj606c4ba2014-01-26 19:11:14 +00001005 Int off = offsetQRegLane(qregNo, ty, 0);
sewardjbbcf1882014-01-12 12:49:10 +00001006 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001007 case Ity_I32: case Ity_I64:
1008 case Ity_F32: case Ity_F64: case Ity_V128:
1009 break;
1010 default:
1011 vassert(0); // Other cases are ATC
sewardjbbcf1882014-01-12 12:49:10 +00001012 }
1013 return IRExpr_Get(off, ty);
1014}
1015
sewardj606c4ba2014-01-26 19:11:14 +00001016static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
sewardjbbcf1882014-01-12 12:49:10 +00001017{
1018 static const HChar* namesQ[32]
1019 = { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1020 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
1021 "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
1022 "q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
1023 static const HChar* namesD[32]
1024 = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
1025 "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
1026 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
1027 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
1028 static const HChar* namesS[32]
1029 = { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
1030 "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
1031 "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
1032 "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
1033 static const HChar* namesH[32]
1034 = { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
1035 "h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
1036 "h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
1037 "h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
1038 static const HChar* namesB[32]
1039 = { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
1040 "b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
1041 "b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
1042 "b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
1043 vassert(qregNo < 32);
sewardj606c4ba2014-01-26 19:11:14 +00001044 switch (sizeofIRType(laneTy)) {
sewardjbbcf1882014-01-12 12:49:10 +00001045 case 1: return namesB[qregNo];
1046 case 2: return namesH[qregNo];
1047 case 4: return namesS[qregNo];
1048 case 8: return namesD[qregNo];
1049 case 16: return namesQ[qregNo];
1050 default: vassert(0);
1051 }
1052 /*NOTREACHED*/
1053}
1054
sewardj606c4ba2014-01-26 19:11:14 +00001055static const HChar* nameQReg128 ( UInt qregNo )
1056{
1057 return nameQRegLO(qregNo, Ity_V128);
1058}
1059
sewardjbbcf1882014-01-12 12:49:10 +00001060/* Find the offset of the most significant half (8 bytes) of the given
1061 Qreg. This requires knowing the endianness of the host. */
sewardj606c4ba2014-01-26 19:11:14 +00001062static Int offsetQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001063{
sewardj606c4ba2014-01-26 19:11:14 +00001064 return offsetQRegLane(qregNo, Ity_I64, 1);
sewardjbbcf1882014-01-12 12:49:10 +00001065}
1066
sewardj606c4ba2014-01-26 19:11:14 +00001067static IRExpr* getQRegHI64 ( UInt qregNo )
sewardjbbcf1882014-01-12 12:49:10 +00001068{
sewardj606c4ba2014-01-26 19:11:14 +00001069 return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
sewardjbbcf1882014-01-12 12:49:10 +00001070}
1071
sewardj606c4ba2014-01-26 19:11:14 +00001072static void putQRegHI64 ( UInt qregNo, IRExpr* e )
sewardjbbcf1882014-01-12 12:49:10 +00001073{
1074 IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj606c4ba2014-01-26 19:11:14 +00001075 Int off = offsetQRegHI64(qregNo);
sewardjbbcf1882014-01-12 12:49:10 +00001076 switch (ty) {
sewardj606c4ba2014-01-26 19:11:14 +00001077 case Ity_I64: case Ity_F64:
1078 break;
1079 default:
1080 vassert(0); // Other cases are plain wrong
sewardjbbcf1882014-01-12 12:49:10 +00001081 }
1082 stmt(IRStmt_Put(off, e));
1083}
1084
sewardj606c4ba2014-01-26 19:11:14 +00001085/* Put to a specified lane of a Qreg. */
1086static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
1087{
1088 IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
1089 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1090 switch (laneTy) {
1091 case Ity_F64: case Ity_I64:
sewardj32d86752014-03-02 12:47:18 +00001092 case Ity_I32: case Ity_F32:
sewardj5860ec72014-03-01 11:19:45 +00001093 case Ity_I16:
1094 case Ity_I8:
sewardj606c4ba2014-01-26 19:11:14 +00001095 break;
1096 default:
1097 vassert(0); // Other cases are ATC
1098 }
1099 stmt(IRStmt_Put(off, e));
1100}
1101
sewardj32d86752014-03-02 12:47:18 +00001102/* Get from a specified lane of a Qreg. */
sewardj606c4ba2014-01-26 19:11:14 +00001103static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
1104{
1105 Int off = offsetQRegLane(qregNo, laneTy, laneNo);
1106 switch (laneTy) {
sewardj32d86752014-03-02 12:47:18 +00001107 case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
1108 case Ity_F64:
sewardj606c4ba2014-01-26 19:11:14 +00001109 break;
1110 default:
1111 vassert(0); // Other cases are ATC
1112 }
1113 return IRExpr_Get(off, laneTy);
1114}
1115
1116
sewardjbbcf1882014-01-12 12:49:10 +00001117//ZZ /* ---------------- Misc registers ---------------- */
1118//ZZ
1119//ZZ static void putMiscReg32 ( UInt gsoffset,
1120//ZZ IRExpr* e, /* :: Ity_I32 */
1121//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
1122//ZZ {
1123//ZZ switch (gsoffset) {
1124//ZZ case OFFB_FPSCR: break;
1125//ZZ case OFFB_QFLAG32: break;
1126//ZZ case OFFB_GEFLAG0: break;
1127//ZZ case OFFB_GEFLAG1: break;
1128//ZZ case OFFB_GEFLAG2: break;
1129//ZZ case OFFB_GEFLAG3: break;
1130//ZZ default: vassert(0); /* awaiting more cases */
1131//ZZ }
1132//ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
1133//ZZ
1134//ZZ if (guardT == IRTemp_INVALID) {
1135//ZZ /* unconditional write */
1136//ZZ stmt(IRStmt_Put(gsoffset, e));
1137//ZZ } else {
1138//ZZ stmt(IRStmt_Put(
1139//ZZ gsoffset,
1140//ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
1141//ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
1142//ZZ ));
1143//ZZ }
1144//ZZ }
1145//ZZ
1146//ZZ static IRTemp get_ITSTATE ( void )
1147//ZZ {
1148//ZZ ASSERT_IS_THUMB;
1149//ZZ IRTemp t = newTemp(Ity_I32);
1150//ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
1151//ZZ return t;
1152//ZZ }
1153//ZZ
1154//ZZ static void put_ITSTATE ( IRTemp t )
1155//ZZ {
1156//ZZ ASSERT_IS_THUMB;
1157//ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
1158//ZZ }
1159//ZZ
1160//ZZ static IRTemp get_QFLAG32 ( void )
1161//ZZ {
1162//ZZ IRTemp t = newTemp(Ity_I32);
1163//ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
1164//ZZ return t;
1165//ZZ }
1166//ZZ
1167//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
1168//ZZ {
1169//ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
1170//ZZ }
1171//ZZ
1172//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
1173//ZZ Status Register) to indicate that overflow or saturation occurred.
1174//ZZ Nb: t must be zero to denote no saturation, and any nonzero
1175//ZZ value to indicate saturation. */
1176//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
1177//ZZ {
1178//ZZ IRTemp old = get_QFLAG32();
1179//ZZ IRTemp nyu = newTemp(Ity_I32);
1180//ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
1181//ZZ put_QFLAG32(nyu, condT);
1182//ZZ }
1183
1184
1185/* ---------------- FPCR stuff ---------------- */
1186
1187/* Generate IR to get hold of the rounding mode bits in FPCR, and
1188 convert them to IR format. Bind the final result to the
1189 returned temp. */
1190static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1191{
1192 /* The ARMvfp encoding for rounding mode bits is:
1193 00 to nearest
1194 01 to +infinity
1195 10 to -infinity
1196 11 to zero
1197 We need to convert that to the IR encoding:
1198 00 to nearest (the default)
1199 10 to +infinity
1200 01 to -infinity
1201 11 to zero
1202 Which can be done by swapping bits 0 and 1.
1203 The rmode bits are at 23:22 in FPSCR.
1204 */
1205 IRTemp armEncd = newTemp(Ity_I32);
1206 IRTemp swapped = newTemp(Ity_I32);
1207 /* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
1208 we don't zero out bits 24 and above, since the assignment to
1209 'swapped' will mask them out anyway. */
1210 assign(armEncd,
1211 binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
1212 /* Now swap them. */
1213 assign(swapped,
1214 binop(Iop_Or32,
1215 binop(Iop_And32,
1216 binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1217 mkU32(2)),
1218 binop(Iop_And32,
1219 binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1220 mkU32(1))
1221 ));
1222 return swapped;
1223}
1224
1225
1226/*------------------------------------------------------------*/
1227/*--- Helpers for flag handling and conditional insns ---*/
1228/*------------------------------------------------------------*/
1229
1230static const HChar* nameARM64Condcode ( ARM64Condcode cond )
1231{
1232 switch (cond) {
1233 case ARM64CondEQ: return "eq";
1234 case ARM64CondNE: return "ne";
1235 case ARM64CondCS: return "cs"; // or 'hs'
1236 case ARM64CondCC: return "cc"; // or 'lo'
1237 case ARM64CondMI: return "mi";
1238 case ARM64CondPL: return "pl";
1239 case ARM64CondVS: return "vs";
1240 case ARM64CondVC: return "vc";
1241 case ARM64CondHI: return "hi";
1242 case ARM64CondLS: return "ls";
1243 case ARM64CondGE: return "ge";
1244 case ARM64CondLT: return "lt";
1245 case ARM64CondGT: return "gt";
1246 case ARM64CondLE: return "le";
1247 case ARM64CondAL: return "al";
1248 case ARM64CondNV: return "nv";
1249 default: vpanic("name_ARM64Condcode");
1250 }
1251}
1252
1253/* and a handy shorthand for it */
1254static const HChar* nameCC ( ARM64Condcode cond ) {
1255 return nameARM64Condcode(cond);
1256}
1257
1258
1259/* Build IR to calculate some particular condition from stored
1260 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1261 Ity_I64, suitable for narrowing. Although the return type is
1262 Ity_I64, the returned value is either 0 or 1. 'cond' must be
1263 :: Ity_I64 and must denote the condition to compute in
1264 bits 7:4, and be zero everywhere else.
1265*/
1266static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
1267{
1268 vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
1269 /* And 'cond' had better produce a value in which only bits 7:4 are
1270 nonzero. However, obviously we can't assert for that. */
1271
1272 /* So what we're constructing for the first argument is
1273 "(cond << 4) | stored-operation".
1274 However, as per comments above, 'cond' must be supplied
1275 pre-shifted to this function.
1276
1277 This pairing scheme requires that the ARM64_CC_OP_ values all fit
1278 in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
1279 8 bits of the first argument. */
1280 IRExpr** args
1281 = mkIRExprVec_4(
1282 binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
1283 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1284 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1285 IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
1286 );
1287 IRExpr* call
1288 = mkIRExprCCall(
1289 Ity_I64,
1290 0/*regparm*/,
1291 "arm64g_calculate_condition", &arm64g_calculate_condition,
1292 args
1293 );
1294
1295 /* Exclude the requested condition, OP and NDEP from definedness
1296 checking. We're only interested in DEP1 and DEP2. */
1297 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1298 return call;
1299}
1300
1301
1302/* Build IR to calculate some particular condition from stored
1303 CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
1304 Ity_I64, suitable for narrowing. Although the return type is
1305 Ity_I64, the returned value is either 0 or 1.
1306*/
1307static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
1308{
1309 /* First arg is "(cond << 4) | condition". This requires that the
1310 ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
1311 (COND, OP) pair in the lowest 8 bits of the first argument. */
1312 vassert(cond >= 0 && cond <= 15);
1313 return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
1314}
1315
1316
1317//ZZ /* Build IR to calculate just the carry flag from stored
1318//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1319//ZZ Ity_I32. */
1320//ZZ static IRExpr* mk_armg_calculate_flag_c ( void )
1321//ZZ {
1322//ZZ IRExpr** args
1323//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1324//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1325//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1326//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1327//ZZ IRExpr* call
1328//ZZ = mkIRExprCCall(
1329//ZZ Ity_I32,
1330//ZZ 0/*regparm*/,
1331//ZZ "armg_calculate_flag_c", &armg_calculate_flag_c,
1332//ZZ args
1333//ZZ );
1334//ZZ /* Exclude OP and NDEP from definedness checking. We're only
1335//ZZ interested in DEP1 and DEP2. */
1336//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1337//ZZ return call;
1338//ZZ }
1339//ZZ
1340//ZZ
1341//ZZ /* Build IR to calculate just the overflow flag from stored
1342//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
1343//ZZ Ity_I32. */
1344//ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
1345//ZZ {
1346//ZZ IRExpr** args
1347//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
1348//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1349//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1350//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1351//ZZ IRExpr* call
1352//ZZ = mkIRExprCCall(
1353//ZZ Ity_I32,
1354//ZZ 0/*regparm*/,
1355//ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
1356//ZZ args
1357//ZZ );
1358//ZZ /* Exclude OP and NDEP from definedness checking. We're only
1359//ZZ interested in DEP1 and DEP2. */
1360//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1361//ZZ return call;
1362//ZZ }
1363
1364
1365/* Build IR to calculate N Z C V in bits 31:28 of the
1366 returned word. */
1367static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
1368{
1369 IRExpr** args
1370 = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
1371 IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
1372 IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
1373 IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
1374 IRExpr* call
1375 = mkIRExprCCall(
1376 Ity_I64,
1377 0/*regparm*/,
1378 "arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
1379 args
1380 );
1381 /* Exclude OP and NDEP from definedness checking. We're only
1382 interested in DEP1 and DEP2. */
1383 call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1384 return call;
1385}
1386
1387
1388/* Build IR to set the flags thunk, in the most general case. */
1389static
1390void setFlags_D1_D2_ND ( UInt cc_op,
1391 IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
1392{
1393 vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
1394 vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
1395 vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
1396 vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
1397 stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
1398 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1399 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1400 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1401}
1402
1403/* Build IR to set the flags thunk after ADD or SUB. */
1404static
1405void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
1406{
1407 IRTemp argL64 = IRTemp_INVALID;
1408 IRTemp argR64 = IRTemp_INVALID;
1409 IRTemp z64 = newTemp(Ity_I64);
1410 if (is64) {
1411 argL64 = argL;
1412 argR64 = argR;
1413 } else {
1414 argL64 = newTemp(Ity_I64);
1415 argR64 = newTemp(Ity_I64);
1416 assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
1417 assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
1418 }
1419 assign(z64, mkU64(0));
1420 UInt cc_op = ARM64G_CC_OP_NUMBER;
1421 /**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
1422 else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
1423 else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
1424 else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
1425 else { vassert(0); }
1426 setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
1427}
1428
1429/* Build IR to set the flags thunk after ADD or SUB, if the given
1430 condition evaluates to True at run time. If not, the flags are set
1431 to the specified NZCV value. */
1432static
1433void setFlags_ADD_SUB_conditionally (
1434 Bool is64, Bool isSUB,
1435 IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
1436 )
1437{
1438 /* Generate IR as follows:
1439 CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
1440 CC_DEP1 = ITE(cond, argL64, nzcv << 28)
1441 CC_DEP2 = ITE(cond, argR64, 0)
1442 CC_NDEP = 0
1443 */
1444
1445 IRTemp z64 = newTemp(Ity_I64);
1446 assign(z64, mkU64(0));
1447
1448 /* Establish the operation and operands for the True case. */
1449 IRTemp t_dep1 = IRTemp_INVALID;
1450 IRTemp t_dep2 = IRTemp_INVALID;
1451 UInt t_op = ARM64G_CC_OP_NUMBER;
1452 /**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
1453 else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
1454 else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
1455 else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
1456 else { vassert(0); }
1457 /* */
1458 if (is64) {
1459 t_dep1 = argL;
1460 t_dep2 = argR;
1461 } else {
1462 t_dep1 = newTemp(Ity_I64);
1463 t_dep2 = newTemp(Ity_I64);
1464 assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
1465 assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
1466 }
1467
1468 /* Establish the operation and operands for the False case. */
1469 IRTemp f_dep1 = newTemp(Ity_I64);
1470 IRTemp f_dep2 = z64;
1471 UInt f_op = ARM64G_CC_OP_COPY;
1472 assign(f_dep1, mkU64(nzcv << 28));
1473
1474 /* Final thunk values */
1475 IRTemp dep1 = newTemp(Ity_I64);
1476 IRTemp dep2 = newTemp(Ity_I64);
1477 IRTemp op = newTemp(Ity_I64);
1478
1479 assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
1480 assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
1481 assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
1482
1483 /* finally .. */
1484 stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
1485 stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
1486 stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
1487 stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
1488}
1489
1490/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
1491static
1492void setFlags_LOGIC ( Bool is64, IRTemp res )
1493{
1494 IRTemp res64 = IRTemp_INVALID;
1495 IRTemp z64 = newTemp(Ity_I64);
1496 UInt cc_op = ARM64G_CC_OP_NUMBER;
1497 if (is64) {
1498 res64 = res;
1499 cc_op = ARM64G_CC_OP_LOGIC64;
1500 } else {
1501 res64 = newTemp(Ity_I64);
1502 assign(res64, unop(Iop_32Uto64, mkexpr(res)));
1503 cc_op = ARM64G_CC_OP_LOGIC32;
1504 }
1505 assign(z64, mkU64(0));
1506 setFlags_D1_D2_ND(cc_op, res64, z64, z64);
1507}
1508
1509/* Build IR to set the flags thunk to a given NZCV value. NZCV is
1510 located in bits 31:28 of the supplied value. */
1511static
1512void setFlags_COPY ( IRTemp nzcv_28x0 )
1513{
1514 IRTemp z64 = newTemp(Ity_I64);
1515 assign(z64, mkU64(0));
1516 setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
1517}
1518
1519
1520//ZZ /* Minor variant of the above that sets NDEP to zero (if it
1521//ZZ sets it at all) */
1522//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1523//ZZ IRTemp t_dep2,
1524//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1525//ZZ {
1526//ZZ IRTemp z32 = newTemp(Ity_I32);
1527//ZZ assign( z32, mkU32(0) );
1528//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1529//ZZ }
1530//ZZ
1531//ZZ
1532//ZZ /* Minor variant of the above that sets DEP2 to zero (if it
1533//ZZ sets it at all) */
1534//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1535//ZZ IRTemp t_ndep,
1536//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1537//ZZ {
1538//ZZ IRTemp z32 = newTemp(Ity_I32);
1539//ZZ assign( z32, mkU32(0) );
1540//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1541//ZZ }
1542//ZZ
1543//ZZ
1544//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1545//ZZ sets them at all) */
1546//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1547//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1548//ZZ {
1549//ZZ IRTemp z32 = newTemp(Ity_I32);
1550//ZZ assign( z32, mkU32(0) );
1551//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1552//ZZ }
1553
1554
1555/*------------------------------------------------------------*/
1556/*--- Misc math helpers ---*/
1557/*------------------------------------------------------------*/
1558
sewardj32d86752014-03-02 12:47:18 +00001559/* Generate IR for ((x & mask) >>u sh) | ((x << sh) & mask) */
1560static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
sewardjbbcf1882014-01-12 12:49:10 +00001561{
sewardj32d86752014-03-02 12:47:18 +00001562 IRTemp maskT = newTemp(Ity_I64);
1563 IRTemp res = newTemp(Ity_I64);
1564 vassert(sh >= 1 && sh <= 63);
1565 assign(maskT, mkU64(mask));
sewardjdc9259c2014-02-27 11:10:19 +00001566 assign( res,
sewardjbbcf1882014-01-12 12:49:10 +00001567 binop(Iop_Or64,
1568 binop(Iop_Shr64,
sewardj32d86752014-03-02 12:47:18 +00001569 binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
1570 mkU8(sh)),
sewardjbbcf1882014-01-12 12:49:10 +00001571 binop(Iop_And64,
sewardj32d86752014-03-02 12:47:18 +00001572 binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
1573 mkexpr(maskT))
sewardjbbcf1882014-01-12 12:49:10 +00001574 )
1575 );
sewardjdc9259c2014-02-27 11:10:19 +00001576 return res;
1577}
1578
sewardj32d86752014-03-02 12:47:18 +00001579/* Generates byte swaps within 32-bit lanes. */
1580static IRTemp math_UINTSWAP64 ( IRTemp src )
1581{
1582 IRTemp res;
1583 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1584 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
1585 return res;
1586}
1587
1588/* Generates byte swaps within 16-bit lanes. */
1589static IRTemp math_USHORTSWAP64 ( IRTemp src )
1590{
1591 IRTemp res;
1592 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1593 return res;
1594}
1595
1596/* Generates a 64-bit byte swap. */
1597static IRTemp math_BYTESWAP64 ( IRTemp src )
1598{
1599 IRTemp res;
1600 res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
1601 res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
1602 res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
1603 return res;
1604}
sewardjdc9259c2014-02-27 11:10:19 +00001605
1606/* Generates a 64-bit bit swap. */
1607static IRTemp math_BITSWAP64 ( IRTemp src )
1608{
sewardj32d86752014-03-02 12:47:18 +00001609 IRTemp res;
1610 res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
1611 res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
1612 res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
1613 return math_BYTESWAP64(res);
sewardjbbcf1882014-01-12 12:49:10 +00001614}
1615
sewardj606c4ba2014-01-26 19:11:14 +00001616/* Duplicates the bits at the bottom of the given word to fill the
1617 whole word. src :: Ity_I64 is assumed to have zeroes everywhere
1618 except for the bottom bits. */
1619static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
1620{
1621 if (srcTy == Ity_I8) {
1622 IRTemp t16 = newTemp(Ity_I64);
1623 assign(t16, binop(Iop_Or64, mkexpr(src),
1624 binop(Iop_Shl64, mkexpr(src), mkU8(8))));
1625 IRTemp t32 = newTemp(Ity_I64);
1626 assign(t32, binop(Iop_Or64, mkexpr(t16),
1627 binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
1628 IRTemp t64 = newTemp(Ity_I64);
1629 assign(t64, binop(Iop_Or64, mkexpr(t32),
1630 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
1631 return t64;
1632 }
1633 if (srcTy == Ity_I16) {
1634 IRTemp t32 = newTemp(Ity_I64);
1635 assign(t32, binop(Iop_Or64, mkexpr(src),
1636 binop(Iop_Shl64, mkexpr(src), mkU8(16))));
1637 IRTemp t64 = newTemp(Ity_I64);
1638 assign(t64, binop(Iop_Or64, mkexpr(t32),
1639 binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
1640 return t64;
1641 }
1642 if (srcTy == Ity_I32) {
1643 IRTemp t64 = newTemp(Ity_I64);
1644 assign(t64, binop(Iop_Or64, mkexpr(src),
1645 binop(Iop_Shl64, mkexpr(src), mkU8(32))));
1646 return t64;
1647 }
1648 if (srcTy == Ity_I64) {
1649 return src;
1650 }
1651 vassert(0);
1652}
1653
1654
sewardjbbcf1882014-01-12 12:49:10 +00001655/*------------------------------------------------------------*/
1656/*--- FP comparison helpers ---*/
1657/*------------------------------------------------------------*/
1658
1659/* irRes :: Ity_I32 holds a floating point comparison result encoded
1660 as an IRCmpF64Result. Generate code to convert it to an
1661 ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
1662 Assign a new temp to hold that value, and return the temp. */
1663static
1664IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
1665{
1666 IRTemp ix = newTemp(Ity_I64);
1667 IRTemp termL = newTemp(Ity_I64);
1668 IRTemp termR = newTemp(Ity_I64);
1669 IRTemp nzcv = newTemp(Ity_I64);
1670 IRTemp irRes = newTemp(Ity_I64);
1671
1672 /* This is where the fun starts. We have to convert 'irRes' from
1673 an IR-convention return result (IRCmpF64Result) to an
1674 ARM-encoded (N,Z,C,V) group. The final result is in the bottom
1675 4 bits of 'nzcv'. */
1676 /* Map compare result from IR to ARM(nzcv) */
1677 /*
1678 FP cmp result | IR | ARM(nzcv)
1679 --------------------------------
1680 UN 0x45 0011
1681 LT 0x01 1000
1682 GT 0x00 0010
1683 EQ 0x40 0110
1684 */
1685 /* Now since you're probably wondering WTF ..
1686
1687 ix fishes the useful bits out of the IR value, bits 6 and 0, and
1688 places them side by side, giving a number which is 0, 1, 2 or 3.
1689
1690 termL is a sequence cooked up by GNU superopt. It converts ix
1691 into an almost correct value NZCV value (incredibly), except
1692 for the case of UN, where it produces 0100 instead of the
1693 required 0011.
1694
1695 termR is therefore a correction term, also computed from ix. It
1696 is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
1697 the final correct value, we subtract termR from termL.
1698
1699 Don't take my word for it. There's a test program at the bottom
1700 of guest_arm_toIR.c, to try this out with.
1701 */
1702 assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
1703
1704 assign(
1705 ix,
1706 binop(Iop_Or64,
1707 binop(Iop_And64,
1708 binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
1709 mkU64(3)),
1710 binop(Iop_And64, mkexpr(irRes), mkU64(1))));
1711
1712 assign(
1713 termL,
1714 binop(Iop_Add64,
1715 binop(Iop_Shr64,
1716 binop(Iop_Sub64,
1717 binop(Iop_Shl64,
1718 binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
1719 mkU8(62)),
1720 mkU64(1)),
1721 mkU8(61)),
1722 mkU64(1)));
1723
1724 assign(
1725 termR,
1726 binop(Iop_And64,
1727 binop(Iop_And64,
1728 mkexpr(ix),
1729 binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
1730 mkU64(1)));
1731
1732 assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
1733 return nzcv;
1734}
1735
1736
1737/*------------------------------------------------------------*/
1738/*--- Data processing (immediate) ---*/
1739/*------------------------------------------------------------*/
1740
1741/* Helper functions for supporting "DecodeBitMasks" */
1742
1743static ULong dbm_ROR ( Int width, ULong x, Int rot )
1744{
1745 vassert(width > 0 && width <= 64);
1746 vassert(rot >= 0 && rot < width);
1747 if (rot == 0) return x;
1748 ULong res = x >> rot;
1749 res |= (x << (width - rot));
1750 if (width < 64)
1751 res &= ((1ULL << width) - 1);
1752 return res;
1753}
1754
1755static ULong dbm_RepTo64( Int esize, ULong x )
1756{
1757 switch (esize) {
1758 case 64:
1759 return x;
1760 case 32:
1761 x &= 0xFFFFFFFF; x |= (x << 32);
1762 return x;
1763 case 16:
1764 x &= 0xFFFF; x |= (x << 16); x |= (x << 32);
1765 return x;
1766 case 8:
1767 x &= 0xFF; x |= (x << 8); x |= (x << 16); x |= (x << 32);
1768 return x;
1769 case 4:
1770 x &= 0xF; x |= (x << 4); x |= (x << 8);
1771 x |= (x << 16); x |= (x << 32);
1772 return x;
1773 case 2:
1774 x &= 0x3; x |= (x << 2); x |= (x << 4); x |= (x << 8);
1775 x |= (x << 16); x |= (x << 32);
1776 return x;
1777 default:
1778 break;
1779 }
1780 vpanic("dbm_RepTo64");
1781 /*NOTREACHED*/
1782 return 0;
1783}
1784
1785static Int dbm_highestSetBit ( ULong x )
1786{
1787 Int i;
1788 for (i = 63; i >= 0; i--) {
1789 if (x & (1ULL << i))
1790 return i;
1791 }
1792 vassert(x == 0);
1793 return -1;
1794}
1795
1796static
1797Bool dbm_DecodeBitMasks ( /*OUT*/ULong* wmask, /*OUT*/ULong* tmask,
1798 ULong immN, ULong imms, ULong immr, Bool immediate,
1799 UInt M /*32 or 64*/)
1800{
1801 vassert(immN < (1ULL << 1));
1802 vassert(imms < (1ULL << 6));
1803 vassert(immr < (1ULL << 6));
1804 vassert(immediate == False || immediate == True);
1805 vassert(M == 32 || M == 64);
1806
1807 Int len = dbm_highestSetBit( ((immN << 6) & 64) | ((~imms) & 63) );
1808 if (len < 1) { /* printf("fail1\n"); */ return False; }
1809 vassert(len <= 6);
1810 vassert(M >= (1 << len));
1811
1812 vassert(len >= 1 && len <= 6);
1813 ULong levels = // (zeroes(6 - len) << (6-len)) | ones(len);
1814 (1 << len) - 1;
1815 vassert(levels >= 1 && levels <= 63);
1816
1817 if (immediate && ((imms & levels) == levels)) {
1818 /* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
1819 return False;
1820 }
1821
1822 ULong S = imms & levels;
1823 ULong R = immr & levels;
1824 Int diff = S - R;
1825 diff &= 63;
1826 Int esize = 1 << len;
1827 vassert(2 <= esize && esize <= 64);
1828
1829 /* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
1830 same below with d. S can be 63 in which case we have an out of
1831 range and hence undefined shift. */
1832 vassert(S >= 0 && S <= 63);
1833 vassert(esize >= (S+1));
1834 ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
1835 //(1ULL << (S+1)) - 1;
1836 ((1ULL << S) - 1) + (1ULL << S);
1837
1838 Int d = // diff<len-1:0>
1839 diff & ((1 << len)-1);
1840 vassert(esize >= (d+1));
1841 vassert(d >= 0 && d <= 63);
1842
1843 ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
1844 //(1ULL << (d+1)) - 1;
1845 ((1ULL << d) - 1) + (1ULL << d);
1846
1847 if (esize != 64) vassert(elem_s < (1ULL << esize));
1848 if (esize != 64) vassert(elem_d < (1ULL << esize));
1849
1850 if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
1851 if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
1852
1853 return True;
1854}
1855
1856
1857static
1858Bool dis_ARM64_data_processing_immediate(/*MB_OUT*/DisResult* dres,
1859 UInt insn)
1860{
1861# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
1862
1863 /* insn[28:23]
1864 10000x PC-rel addressing
1865 10001x Add/subtract (immediate)
1866 100100 Logical (immediate)
1867 100101 Move Wide (immediate)
1868 100110 Bitfield
1869 100111 Extract
1870 */
1871
1872 /* ------------------ ADD/SUB{,S} imm12 ------------------ */
1873 if (INSN(28,24) == BITS5(1,0,0,0,1)) {
1874 Bool is64 = INSN(31,31) == 1;
1875 Bool isSub = INSN(30,30) == 1;
1876 Bool setCC = INSN(29,29) == 1;
1877 UInt sh = INSN(23,22);
1878 UInt uimm12 = INSN(21,10);
1879 UInt nn = INSN(9,5);
1880 UInt dd = INSN(4,0);
1881 const HChar* nm = isSub ? "sub" : "add";
1882 if (sh >= 2) {
1883 /* Invalid; fall through */
1884 } else {
1885 vassert(sh <= 1);
1886 uimm12 <<= (12 * sh);
1887 if (is64) {
1888 IRTemp argL = newTemp(Ity_I64);
1889 IRTemp argR = newTemp(Ity_I64);
1890 IRTemp res = newTemp(Ity_I64);
1891 assign(argL, getIReg64orSP(nn));
1892 assign(argR, mkU64(uimm12));
1893 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
1894 mkexpr(argL), mkexpr(argR)));
1895 if (setCC) {
1896 putIReg64orZR(dd, mkexpr(res));
1897 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
1898 DIP("%ss %s, %s, 0x%x\n",
1899 nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
1900 } else {
1901 putIReg64orSP(dd, mkexpr(res));
1902 DIP("%s %s, %s, 0x%x\n",
1903 nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
1904 }
1905 } else {
1906 IRTemp argL = newTemp(Ity_I32);
1907 IRTemp argR = newTemp(Ity_I32);
1908 IRTemp res = newTemp(Ity_I32);
1909 assign(argL, getIReg32orSP(nn));
1910 assign(argR, mkU32(uimm12));
1911 assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
1912 mkexpr(argL), mkexpr(argR)));
1913 if (setCC) {
1914 putIReg32orZR(dd, mkexpr(res));
1915 setFlags_ADD_SUB(False/*!is64*/, isSub, argL, argR);
1916 DIP("%ss %s, %s, 0x%x\n",
1917 nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
1918 } else {
1919 putIReg32orSP(dd, mkexpr(res));
1920 DIP("%s %s, %s, 0x%x\n",
1921 nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
1922 }
1923 }
1924 return True;
1925 }
1926 }
1927
1928 /* -------------------- ADR/ADRP -------------------- */
1929 if (INSN(28,24) == BITS5(1,0,0,0,0)) {
1930 UInt bP = INSN(31,31);
1931 UInt immLo = INSN(30,29);
1932 UInt immHi = INSN(23,5);
1933 UInt rD = INSN(4,0);
1934 ULong uimm = (immHi << 2) | immLo;
1935 ULong simm = sx_to_64(uimm, 21);
1936 ULong val;
1937 if (bP) {
1938 val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
1939 } else {
1940 val = guest_PC_curr_instr + simm;
1941 }
1942 putIReg64orZR(rD, mkU64(val));
1943 DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
1944 return True;
1945 }
1946
1947 /* -------------------- LOGIC(imm) -------------------- */
1948 if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
1949 /* 31 30 28 22 21 15 9 4
1950 sf op 100100 N immr imms Rn Rd
1951 op=00: AND Rd|SP, Rn, #imm
1952 op=01: ORR Rd|SP, Rn, #imm
1953 op=10: EOR Rd|SP, Rn, #imm
1954 op=11: ANDS Rd|ZR, Rn, #imm
1955 */
1956 Bool is64 = INSN(31,31) == 1;
1957 UInt op = INSN(30,29);
1958 UInt N = INSN(22,22);
1959 UInt immR = INSN(21,16);
1960 UInt immS = INSN(15,10);
1961 UInt nn = INSN(9,5);
1962 UInt dd = INSN(4,0);
1963 ULong imm = 0;
1964 Bool ok;
1965 if (N == 1 && !is64)
1966 goto after_logic_imm; /* not allowed; fall through */
1967 ok = dbm_DecodeBitMasks(&imm, NULL,
1968 N, immS, immR, True, is64 ? 64 : 32);
1969 if (!ok)
1970 goto after_logic_imm;
1971
1972 const HChar* names[4] = { "and", "orr", "eor", "ands" };
1973 const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
1974 const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
1975
1976 vassert(op < 4);
1977 if (is64) {
1978 IRExpr* argL = getIReg64orZR(nn);
1979 IRExpr* argR = mkU64(imm);
1980 IRTemp res = newTemp(Ity_I64);
1981 assign(res, binop(ops64[op], argL, argR));
1982 if (op < 3) {
1983 putIReg64orSP(dd, mkexpr(res));
1984 DIP("%s %s, %s, 0x%llx\n", names[op],
1985 nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
1986 } else {
1987 putIReg64orZR(dd, mkexpr(res));
1988 setFlags_LOGIC(True/*is64*/, res);
1989 DIP("%s %s, %s, 0x%llx\n", names[op],
1990 nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
1991 }
1992 } else {
1993 IRExpr* argL = getIReg32orZR(nn);
1994 IRExpr* argR = mkU32((UInt)imm);
1995 IRTemp res = newTemp(Ity_I32);
1996 assign(res, binop(ops32[op], argL, argR));
1997 if (op < 3) {
1998 putIReg32orSP(dd, mkexpr(res));
1999 DIP("%s %s, %s, 0x%x\n", names[op],
2000 nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
2001 } else {
2002 putIReg32orZR(dd, mkexpr(res));
2003 setFlags_LOGIC(False/*!is64*/, res);
2004 DIP("%s %s, %s, 0x%x\n", names[op],
2005 nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
2006 }
2007 }
2008 return True;
2009 }
2010 after_logic_imm:
2011
2012 /* -------------------- MOV{Z,N,K} -------------------- */
2013 if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
2014 /* 31 30 28 22 20 4
2015 | | | | | |
2016 sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
2017 sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
2018 sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
2019 */
2020 Bool is64 = INSN(31,31) == 1;
2021 UInt subopc = INSN(30,29);
2022 UInt hw = INSN(22,21);
2023 UInt imm16 = INSN(20,5);
2024 UInt dd = INSN(4,0);
2025 if (subopc == BITS2(0,1) || (!is64 && hw >= 2)) {
2026 /* invalid; fall through */
2027 } else {
2028 ULong imm64 = ((ULong)imm16) << (16 * hw);
2029 if (!is64)
2030 vassert(imm64 < 0x100000000ULL);
2031 switch (subopc) {
2032 case BITS2(1,0): // MOVZ
2033 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2034 DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2035 break;
2036 case BITS2(0,0): // MOVN
2037 imm64 = ~imm64;
2038 if (!is64)
2039 imm64 &= 0xFFFFFFFFULL;
2040 putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
2041 DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
2042 break;
2043 case BITS2(1,1): // MOVK
2044 /* This is more complex. We are inserting a slice into
2045 the destination register, so we need to have the old
2046 value of it. */
2047 if (is64) {
2048 IRTemp old = newTemp(Ity_I64);
2049 assign(old, getIReg64orZR(dd));
2050 ULong mask = 0xFFFFULL << (16 * hw);
2051 IRExpr* res
2052 = binop(Iop_Or64,
2053 binop(Iop_And64, mkexpr(old), mkU64(~mask)),
2054 mkU64(imm64));
2055 putIReg64orZR(dd, res);
2056 DIP("movk %s, 0x%x, lsl %u\n",
2057 nameIReg64orZR(dd), imm16, 16*hw);
2058 } else {
2059 IRTemp old = newTemp(Ity_I32);
2060 assign(old, getIReg32orZR(dd));
2061 vassert(hw <= 1);
2062 UInt mask = 0xFFFF << (16 * hw);
2063 IRExpr* res
2064 = binop(Iop_Or32,
2065 binop(Iop_And32, mkexpr(old), mkU32(~mask)),
2066 mkU32((UInt)imm64));
2067 putIReg32orZR(dd, res);
2068 DIP("movk %s, 0x%x, lsl %u\n",
2069 nameIReg32orZR(dd), imm16, 16*hw);
2070 }
2071 break;
2072 default:
2073 vassert(0);
2074 }
2075 return True;
2076 }
2077 }
2078
2079 /* -------------------- {U,S,}BFM -------------------- */
2080 /* 30 28 22 21 15 9 4
2081
2082 sf 10 100110 N immr imms nn dd
2083 UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2084 UBFM Xd, Xn, #immr, #imms when sf=1, N=1
2085
2086 sf 00 100110 N immr imms nn dd
2087 SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2088 SBFM Xd, Xn, #immr, #imms when sf=1, N=1
2089
2090 sf 01 100110 N immr imms nn dd
2091 BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
2092 BFM Xd, Xn, #immr, #imms when sf=1, N=1
2093 */
2094 if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
2095 UInt sf = INSN(31,31);
2096 UInt opc = INSN(30,29);
2097 UInt N = INSN(22,22);
2098 UInt immR = INSN(21,16);
2099 UInt immS = INSN(15,10);
2100 UInt nn = INSN(9,5);
2101 UInt dd = INSN(4,0);
2102 Bool inZero = False;
2103 Bool extend = False;
2104 const HChar* nm = "???";
2105 /* skip invalid combinations */
2106 switch (opc) {
2107 case BITS2(0,0):
2108 inZero = True; extend = True; nm = "sbfm"; break;
2109 case BITS2(0,1):
2110 inZero = False; extend = False; nm = "bfm"; break;
2111 case BITS2(1,0):
2112 inZero = True; extend = False; nm = "ubfm"; break;
2113 case BITS2(1,1):
2114 goto after_bfm; /* invalid */
2115 default:
2116 vassert(0);
2117 }
2118 if (sf == 1 && N != 1) goto after_bfm;
2119 if (sf == 0 && (N != 0 || ((immR >> 5) & 1) != 0
2120 || ((immS >> 5) & 1) != 0)) goto after_bfm;
2121 ULong wmask = 0, tmask = 0;
2122 Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
2123 N, immS, immR, False, sf == 1 ? 64 : 32);
2124 if (!ok) goto after_bfm; /* hmmm */
2125
2126 Bool is64 = sf == 1;
2127 IRType ty = is64 ? Ity_I64 : Ity_I32;
2128
2129 IRTemp dst = newTemp(ty);
2130 IRTemp src = newTemp(ty);
2131 IRTemp bot = newTemp(ty);
2132 IRTemp top = newTemp(ty);
2133 IRTemp res = newTemp(ty);
2134 assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
2135 assign(src, getIRegOrZR(is64, nn));
2136 /* perform bitfield move on low bits */
2137 assign(bot, binop(mkOR(ty),
2138 binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
2139 binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
2140 mkU(ty, wmask))));
2141 /* determine extension bits (sign, zero or dest register) */
2142 assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
2143 /* combine extension bits and result bits */
2144 assign(res, binop(mkOR(ty),
2145 binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
2146 binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
2147 putIRegOrZR(is64, dd, mkexpr(res));
2148 DIP("%s %s, %s, immR=%u, immS=%u\n",
2149 nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
2150 return True;
2151 }
2152 after_bfm:
2153
2154 /* ---------------------- EXTR ---------------------- */
2155 /* 30 28 22 20 15 9 4
2156 1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
2157 0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
2158 */
2159 if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
2160 Bool is64 = INSN(31,31) == 1;
2161 UInt mm = INSN(20,16);
2162 UInt imm6 = INSN(15,10);
2163 UInt nn = INSN(9,5);
2164 UInt dd = INSN(4,0);
2165 Bool valid = True;
2166 if (INSN(31,31) != INSN(22,22))
2167 valid = False;
2168 if (!is64 && imm6 >= 32)
2169 valid = False;
2170 if (!valid) goto after_extr;
2171 IRType ty = is64 ? Ity_I64 : Ity_I32;
2172 IRTemp srcHi = newTemp(ty);
2173 IRTemp srcLo = newTemp(ty);
2174 IRTemp res = newTemp(ty);
2175 assign(srcHi, getIRegOrZR(is64, nn));
2176 assign(srcLo, getIRegOrZR(is64, mm));
2177 if (imm6 == 0) {
2178 assign(res, mkexpr(srcLo));
2179 } else {
2180 UInt szBits = 8 * sizeofIRType(ty);
2181 vassert(imm6 > 0 && imm6 < szBits);
2182 assign(res, binop(mkOR(ty),
2183 binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
2184 binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
2185 }
2186 putIRegOrZR(is64, dd, mkexpr(res));
2187 DIP("extr %s, %s, %s, #%u\n",
2188 nameIRegOrZR(is64,dd),
2189 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
2190 return True;
2191 }
2192 after_extr:
2193
2194 vex_printf("ARM64 front end: data_processing_immediate\n");
2195 return False;
2196# undef INSN
2197}
2198
2199
2200/*------------------------------------------------------------*/
2201/*--- Data processing (register) instructions ---*/
2202/*------------------------------------------------------------*/
2203
2204static const HChar* nameSH ( UInt sh ) {
2205 switch (sh) {
2206 case 0: return "lsl";
2207 case 1: return "lsr";
2208 case 2: return "asr";
2209 case 3: return "ror";
2210 default: vassert(0);
2211 }
2212}
2213
2214/* Generate IR to get a register value, possibly shifted by an
2215 immediate. Returns either a 32- or 64-bit temporary holding the
2216 result. After the shift, the value can optionally be NOT-ed
2217 too.
2218
2219 sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
2220 in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
2221 isn't allowed, but it's the job of the caller to check that.
2222*/
2223static IRTemp getShiftedIRegOrZR ( Bool is64,
2224 UInt sh_how, UInt sh_amt, UInt regNo,
2225 Bool invert )
2226{
2227 vassert(sh_how < 4);
2228 vassert(sh_amt < (is64 ? 64 : 32));
2229 IRType ty = is64 ? Ity_I64 : Ity_I32;
2230 IRTemp t0 = newTemp(ty);
2231 assign(t0, getIRegOrZR(is64, regNo));
2232 IRTemp t1 = newTemp(ty);
2233 switch (sh_how) {
2234 case BITS2(0,0):
2235 assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
2236 break;
2237 case BITS2(0,1):
2238 assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
2239 break;
2240 case BITS2(1,0):
2241 assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
2242 break;
2243 case BITS2(1,1):
2244 assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
2245 break;
2246 default:
2247 vassert(0);
2248 }
2249 if (invert) {
2250 IRTemp t2 = newTemp(ty);
2251 assign(t2, unop(mkNOT(ty), mkexpr(t1)));
2252 return t2;
2253 } else {
2254 return t1;
2255 }
2256}
2257
2258
2259static
2260Bool dis_ARM64_data_processing_register(/*MB_OUT*/DisResult* dres,
2261 UInt insn)
2262{
2263# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
2264
2265 /* ------------------- ADD/SUB(reg) ------------------- */
2266 /* x==0 => 32 bit op x==1 => 64 bit op
2267 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
2268
2269 31 30 29 28 23 21 20 15 9 4
2270 | | | | | | | | | |
2271 x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
2272 x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
2273 x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
2274 x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
2275 */
2276 if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
2277 UInt bX = INSN(31,31);
2278 UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
2279 UInt bS = INSN(29, 29); /* set flags? */
2280 UInt sh = INSN(23,22);
2281 UInt rM = INSN(20,16);
2282 UInt imm6 = INSN(15,10);
2283 UInt rN = INSN(9,5);
2284 UInt rD = INSN(4,0);
2285 Bool isSUB = bOP == 1;
2286 Bool is64 = bX == 1;
2287 IRType ty = is64 ? Ity_I64 : Ity_I32;
2288 if ((!is64 && imm6 > 31) || sh == BITS2(1,1)) {
2289 /* invalid; fall through */
2290 } else {
2291 IRTemp argL = newTemp(ty);
2292 assign(argL, getIRegOrZR(is64, rN));
2293 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
2294 IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
2295 IRTemp res = newTemp(ty);
2296 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2297 if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
2298 if (bS) {
2299 setFlags_ADD_SUB(is64, isSUB, argL, argR);
2300 }
2301 DIP("%s%s %s, %s, %s, %s #%u\n",
2302 bOP ? "sub" : "add", bS ? "s" : "",
2303 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2304 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2305 return True;
2306 }
2307 }
2308
2309 /* -------------------- LOGIC(reg) -------------------- */
2310 /* x==0 => 32 bit op x==1 => 64 bit op
2311 N==0 => inv? is no-op (no inversion)
2312 N==1 => inv? is NOT
2313 sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
2314
2315 31 30 28 23 21 20 15 9 4
2316 | | | | | | | | |
2317 x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
2318 x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
2319 x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
2320 x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
2321 With N=1, the names are: BIC ORN EON BICS
2322 */
2323 if (INSN(28,24) == BITS5(0,1,0,1,0)) {
2324 UInt bX = INSN(31,31);
2325 UInt sh = INSN(23,22);
2326 UInt bN = INSN(21,21);
2327 UInt rM = INSN(20,16);
2328 UInt imm6 = INSN(15,10);
2329 UInt rN = INSN(9,5);
2330 UInt rD = INSN(4,0);
2331 Bool is64 = bX == 1;
2332 IRType ty = is64 ? Ity_I64 : Ity_I32;
2333 if (!is64 && imm6 > 31) {
2334 /* invalid; fall though */
2335 } else {
2336 IRTemp argL = newTemp(ty);
2337 assign(argL, getIRegOrZR(is64, rN));
2338 IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
2339 IROp op = Iop_INVALID;
2340 switch (INSN(30,29)) {
2341 case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
2342 case BITS2(0,1): op = mkOR(ty); break;
2343 case BITS2(1,0): op = mkXOR(ty); break;
2344 default: vassert(0);
2345 }
2346 IRTemp res = newTemp(ty);
2347 assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
2348 if (INSN(30,29) == BITS2(1,1)) {
2349 setFlags_LOGIC(is64, res);
2350 }
2351 putIRegOrZR(is64, rD, mkexpr(res));
2352
2353 static const HChar* names_op[8]
2354 = { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
2355 vassert(((bN << 2) | INSN(30,29)) < 8);
2356 const HChar* nm_op = names_op[(bN << 2) | INSN(30,29)];
2357 /* Special-case the printing of "MOV" */
2358 if (rN == 31/*zr*/ && sh == 0/*LSL*/ && imm6 == 0 && bN == 0) {
2359 DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
2360 nameIRegOrZR(is64, rM));
2361 } else {
2362 DIP("%s %s, %s, %s, %s #%u\n", nm_op,
2363 nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
2364 nameIRegOrZR(is64, rM), nameSH(sh), imm6);
2365 }
2366 return True;
2367 }
2368 }
2369
2370 /* -------------------- {U,S}MULH -------------------- */
2371 /* 31 23 22 20 15 9 4
2372 10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
2373 10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
2374 */
2375 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
2376 && INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)
2377 && INSN(23,23) == 1/*ATC*/) {
2378 Bool isU = INSN(23,23) == 1;
2379 UInt mm = INSN(20,16);
2380 UInt nn = INSN(9,5);
2381 UInt dd = INSN(4,0);
2382 putIReg64orZR(dd, unop(Iop_128HIto64,
2383 binop(isU ? Iop_MullU64 : Iop_MullS64,
2384 getIReg64orZR(nn), getIReg64orZR(mm))));
2385 DIP("%cmulh %s, %s, %s\n",
2386 isU ? 'u' : 's',
2387 nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
2388 return True;
2389 }
2390
2391 /* -------------------- M{ADD,SUB} -------------------- */
2392 /* 31 30 20 15 14 9 4
2393 sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
2394 sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
2395 */
2396 if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
2397 Bool is64 = INSN(31,31) == 1;
2398 UInt mm = INSN(20,16);
2399 Bool isAdd = INSN(15,15) == 0;
2400 UInt aa = INSN(14,10);
2401 UInt nn = INSN(9,5);
2402 UInt dd = INSN(4,0);
2403 if (is64) {
2404 putIReg64orZR(
2405 dd,
2406 binop(isAdd ? Iop_Add64 : Iop_Sub64,
2407 getIReg64orZR(aa),
2408 binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
2409 } else {
2410 putIReg32orZR(
2411 dd,
2412 binop(isAdd ? Iop_Add32 : Iop_Sub32,
2413 getIReg32orZR(aa),
2414 binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
2415 }
2416 DIP("%s %s, %s, %s, %s\n",
2417 isAdd ? "madd" : "msub",
2418 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
2419 nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
2420 return True;
2421 }
2422
2423 /* ---------------- CS{EL,INC,INV,NEG} ---------------- */
2424 /* 31 30 28 20 15 11 9 4
2425 sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
2426 sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
2427 sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
2428 sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
2429 In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
2430 */
2431 if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
2432 Bool is64 = INSN(31,31) == 1;
2433 UInt b30 = INSN(30,30);
2434 UInt mm = INSN(20,16);
2435 UInt cond = INSN(15,12);
2436 UInt b10 = INSN(10,10);
2437 UInt nn = INSN(9,5);
2438 UInt dd = INSN(4,0);
2439 UInt op = (b30 << 1) | b10; /* 00=id 01=inc 10=inv 11=neg */
2440 IRType ty = is64 ? Ity_I64 : Ity_I32;
2441 IRExpr* argL = getIRegOrZR(is64, nn);
2442 IRExpr* argR = getIRegOrZR(is64, mm);
2443 switch (op) {
2444 case BITS2(0,0):
2445 break;
2446 case BITS2(0,1):
2447 argR = binop(mkADD(ty), argR, mkU(ty,1));
2448 break;
2449 case BITS2(1,0):
2450 argR = unop(mkNOT(ty), argR);
2451 break;
2452 case BITS2(1,1):
2453 argR = binop(mkSUB(ty), mkU(ty,0), argR);
2454 break;
2455 default:
2456 vassert(0);
2457 }
2458 putIRegOrZR(
2459 is64, dd,
2460 IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
2461 argL, argR)
2462 );
2463 const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
2464 DIP("%s %s, %s, %s, %s\n", op_nm[op],
2465 nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
2466 nameIRegOrZR(is64, mm), nameCC(cond));
2467 return True;
2468 }
2469
2470 /* -------------- ADD/SUB(extended reg) -------------- */
2471 /* 28 20 15 12 9 4
2472 000 01011 00 1 m opt imm3 n d ADD Wd|SP, Wn|SP, Wm ext&lsld
2473 100 01011 00 1 m opt imm3 n d ADD Xd|SP, Xn|SP, Rm ext&lsld
2474
2475 001 01011 00 1 m opt imm3 n d ADDS Wd, Wn|SP, Wm ext&lsld
2476 101 01011 00 1 m opt imm3 n d ADDS Xd, Xn|SP, Rm ext&lsld
2477
2478 010 01011 00 1 m opt imm3 n d SUB Wd|SP, Wn|SP, Wm ext&lsld
2479 110 01011 00 1 m opt imm3 n d SUB Xd|SP, Xn|SP, Rm ext&lsld
2480
2481 011 01011 00 1 m opt imm3 n d SUBS Wd, Wn|SP, Wm ext&lsld
2482 111 01011 00 1 m opt imm3 n d SUBS Xd, Xn|SP, Rm ext&lsld
2483
2484 The 'm' operand is extended per opt, thusly:
2485
2486 000 Xm & 0xFF UXTB
2487 001 Xm & 0xFFFF UXTH
2488 010 Xm & (2^32)-1 UXTW
2489 011 Xm UXTX
2490
2491 100 Xm sx from bit 7 SXTB
2492 101 Xm sx from bit 15 SXTH
2493 110 Xm sx from bit 31 SXTW
2494 111 Xm SXTX
2495
2496 In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
2497 operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
2498 are the identity operation on Wm.
2499
2500 After extension, the value is shifted left by imm3 bits, which
2501 may only be in the range 0 .. 4 inclusive.
2502 */
2503 if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
2504 Bool is64 = INSN(31,31) == 1;
2505 Bool isSub = INSN(30,30) == 1;
2506 Bool setCC = INSN(29,29) == 1;
2507 UInt mm = INSN(20,16);
2508 UInt opt = INSN(15,13);
2509 UInt imm3 = INSN(12,10);
2510 UInt nn = INSN(9,5);
2511 UInt dd = INSN(4,0);
2512 const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
2513 "sxtb", "sxth", "sxtw", "sxtx" };
2514 /* Do almost the same thing in the 32- and 64-bit cases. */
2515 IRTemp xN = newTemp(Ity_I64);
2516 IRTemp xM = newTemp(Ity_I64);
2517 assign(xN, getIReg64orSP(nn));
2518 assign(xM, getIReg64orZR(mm));
2519 IRExpr* xMw = mkexpr(xM); /* "xM widened" */
2520 Int shSX = 0;
2521 /* widen Xm .. */
2522 switch (opt) {
2523 case BITS3(0,0,0): // UXTB
2524 xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
2525 case BITS3(0,0,1): // UXTH
2526 xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
2527 case BITS3(0,1,0): // UXTW -- noop for the 32bit case
2528 if (is64) {
2529 xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
2530 }
2531 break;
2532 case BITS3(0,1,1): // UXTX -- always a noop
2533 break;
2534 case BITS3(1,0,0): // SXTB
2535 shSX = 56; goto sxTo64;
2536 case BITS3(1,0,1): // SXTH
2537 shSX = 48; goto sxTo64;
2538 case BITS3(1,1,0): // SXTW -- noop for the 32bit case
2539 if (is64) {
2540 shSX = 32; goto sxTo64;
2541 }
2542 break;
2543 case BITS3(1,1,1): // SXTX -- always a noop
2544 break;
2545 sxTo64:
2546 vassert(shSX >= 32);
2547 xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
2548 mkU8(shSX));
2549 break;
2550 default:
2551 vassert(0);
2552 }
2553 /* and now shift */
2554 IRTemp argL = xN;
2555 IRTemp argR = newTemp(Ity_I64);
2556 assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
2557 IRTemp res = newTemp(Ity_I64);
2558 assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
2559 mkexpr(argL), mkexpr(argR)));
2560 if (is64) {
2561 if (setCC) {
2562 putIReg64orZR(dd, mkexpr(res));
2563 setFlags_ADD_SUB(True/*is64*/, isSub, argL, argR);
2564 } else {
2565 putIReg64orSP(dd, mkexpr(res));
2566 }
2567 } else {
2568 if (setCC) {
2569 IRTemp argL32 = newTemp(Ity_I32);
2570 IRTemp argR32 = newTemp(Ity_I32);
2571 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
2572 assign(argL32, unop(Iop_64to32, mkexpr(argL)));
2573 assign(argR32, unop(Iop_64to32, mkexpr(argR)));
2574 setFlags_ADD_SUB(False/*!is64*/, isSub, argL32, argR32);
2575 } else {
2576 putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
2577 }
2578 }
2579 DIP("%s%s %s, %s, %s %s lsl %u\n",
2580 isSub ? "sub" : "add", setCC ? "s" : "",
2581 setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
2582 nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
2583 nameExt[opt], imm3);
2584 return True;
2585 }
2586
2587 /* ---------------- CCMP/CCMN(imm) ---------------- */
2588 /* Bizarrely, these appear in the "data processing register"
2589 category, even though they are operations against an
2590 immediate. */
2591 /* 31 29 20 15 11 9 3
2592 sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
2593 sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
2594
2595 Operation is:
2596 (CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
2597 (CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
2598 */
2599 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
2600 && INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
2601 Bool is64 = INSN(31,31) == 1;
2602 Bool isSUB = INSN(30,30) == 1;
2603 UInt imm5 = INSN(20,16);
2604 UInt cond = INSN(15,12);
2605 UInt nn = INSN(9,5);
2606 UInt nzcv = INSN(3,0);
2607
2608 IRTemp condT = newTemp(Ity_I1);
2609 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
2610
2611 IRType ty = is64 ? Ity_I64 : Ity_I32;
2612 IRTemp argL = newTemp(ty);
2613 IRTemp argR = newTemp(ty);
2614
2615 if (is64) {
2616 assign(argL, getIReg64orZR(nn));
2617 assign(argR, mkU64(imm5));
2618 } else {
2619 assign(argL, getIReg32orZR(nn));
2620 assign(argR, mkU32(imm5));
2621 }
2622 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
2623
2624 DIP("ccm%c %s, #%u, #%u, %s\n",
2625 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
2626 imm5, nzcv, nameCC(cond));
2627 return True;
2628 }
2629
2630 /* ---------------- CCMP/CCMN(reg) ---------------- */
2631 /* 31 29 20 15 11 9 3
2632 sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
2633 sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
2634 Operation is:
2635 (CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
2636 (CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
2637 */
2638 if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
2639 && INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
2640 Bool is64 = INSN(31,31) == 1;
2641 Bool isSUB = INSN(30,30) == 1;
2642 UInt mm = INSN(20,16);
2643 UInt cond = INSN(15,12);
2644 UInt nn = INSN(9,5);
2645 UInt nzcv = INSN(3,0);
2646
2647 IRTemp condT = newTemp(Ity_I1);
2648 assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
2649
2650 IRType ty = is64 ? Ity_I64 : Ity_I32;
2651 IRTemp argL = newTemp(ty);
2652 IRTemp argR = newTemp(ty);
2653
2654 if (is64) {
2655 assign(argL, getIReg64orZR(nn));
2656 assign(argR, getIReg64orZR(mm));
2657 } else {
2658 assign(argL, getIReg32orZR(nn));
2659 assign(argR, getIReg32orZR(mm));
2660 }
2661 setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
2662
2663 DIP("ccm%c %s, %s, #%u, %s\n",
2664 isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
2665 nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
2666 return True;
2667 }
2668
2669
2670 /* -------------- REV/REV16/REV32/RBIT -------------- */
2671 /* 31 30 28 20 15 11 9 4
2672
sewardj32d86752014-03-02 12:47:18 +00002673 1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
2674 0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00002675
sewardj32d86752014-03-02 12:47:18 +00002676 1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
2677 0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00002678
sewardjdc9259c2014-02-27 11:10:19 +00002679 1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
2680 0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
sewardjbbcf1882014-01-12 12:49:10 +00002681
sewardjdc9259c2014-02-27 11:10:19 +00002682 1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
sewardjbbcf1882014-01-12 12:49:10 +00002683 */
sewardjbbcf1882014-01-12 12:49:10 +00002684 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
sewardjdc9259c2014-02-27 11:10:19 +00002685 && INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
2686 UInt b31 = INSN(31,31);
2687 UInt opc = INSN(11,10);
2688
2689 UInt ix = 0;
2690 /**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
2691 else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
2692 else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
2693 else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
2694 else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
2695 else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
2696 else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
sewardj32d86752014-03-02 12:47:18 +00002697 if (ix >= 1 && ix <= 7) {
2698 Bool is64 = ix == 1 || ix == 3 || ix == 5 || ix == 7;
sewardjdc9259c2014-02-27 11:10:19 +00002699 UInt nn = INSN(9,5);
2700 UInt dd = INSN(4,0);
2701 IRTemp src = newTemp(Ity_I64);
2702 IRTemp dst = IRTemp_INVALID;
sewardj32d86752014-03-02 12:47:18 +00002703 IRTemp (*math)(IRTemp) = NULL;
2704 switch (ix) {
2705 case 1: case 2: math = math_BYTESWAP64; break;
2706 case 3: case 4: math = math_BITSWAP64; break;
2707 case 5: case 6: math = math_USHORTSWAP64; break;
2708 case 7: math = math_UINTSWAP64; break;
2709 default: vassert(0);
2710 }
2711 const HChar* names[7]
2712 = { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
2713 const HChar* nm = names[ix-1];
2714 vassert(math);
2715 if (ix == 6) {
2716 /* This has to be special cased, since the logic below doesn't
2717 handle it correctly. */
sewardjdc9259c2014-02-27 11:10:19 +00002718 assign(src, getIReg64orZR(nn));
sewardj32d86752014-03-02 12:47:18 +00002719 dst = math(src);
2720 putIReg64orZR(dd,
2721 unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
2722 } else if (is64) {
2723 assign(src, getIReg64orZR(nn));
2724 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00002725 putIReg64orZR(dd, mkexpr(dst));
2726 } else {
2727 assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
sewardj32d86752014-03-02 12:47:18 +00002728 dst = math(src);
sewardjdc9259c2014-02-27 11:10:19 +00002729 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
2730 }
sewardj32d86752014-03-02 12:47:18 +00002731 DIP("%s %s, %s\n", nm,
sewardjdc9259c2014-02-27 11:10:19 +00002732 nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
2733 return True;
sewardjbbcf1882014-01-12 12:49:10 +00002734 }
sewardjdc9259c2014-02-27 11:10:19 +00002735 /* else fall through */
sewardjbbcf1882014-01-12 12:49:10 +00002736 }
2737
2738 /* -------------------- CLZ/CLS -------------------- */
2739 /* 30 28 24 20 15 9 4
2740 sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
2741 sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
2742 */
2743 if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
2744 && INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
2745 Bool is64 = INSN(31,31) == 1;
2746 Bool isCLS = INSN(10,10) == 1;
2747 UInt nn = INSN(9,5);
2748 UInt dd = INSN(4,0);
2749 IRTemp src = newTemp(Ity_I64);
2750 IRTemp dst = newTemp(Ity_I64);
2751 if (!isCLS) { // CLS not yet supported
2752 if (is64) {
2753 assign(src, getIReg64orZR(nn));
2754 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
2755 mkU64(64),
2756 unop(Iop_Clz64, mkexpr(src))));
2757 putIReg64orZR(dd, mkexpr(dst));
2758 } else {
2759 assign(src, binop(Iop_Shl64,
2760 unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
2761 assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
2762 mkU64(32),
2763 unop(Iop_Clz64, mkexpr(src))));
2764 putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
2765 }
2766 DIP("cl%c %s, %s\n",
2767 isCLS ? 's' : 'z', nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
2768 return True;
2769 }
2770 }
2771
2772 /* -------------------- LSLV/LSRV/ASRV -------------------- */
2773 /* 30 28 20 15 11 9 4
2774 sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
2775 sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
2776 sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
2777 */
2778 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
2779 && INSN(15,12) == BITS4(0,0,1,0) && INSN(11,10) < BITS2(1,1)) {
2780 Bool is64 = INSN(31,31) == 1;
2781 UInt mm = INSN(20,16);
2782 UInt op = INSN(11,10);
2783 UInt nn = INSN(9,5);
2784 UInt dd = INSN(4,0);
2785 IRType ty = is64 ? Ity_I64 : Ity_I32;
2786 IRTemp srcL = newTemp(ty);
2787 IRTemp srcR = newTemp(Ity_I8);
2788 IRTemp res = newTemp(ty);
2789 IROp iop = Iop_INVALID;
2790 assign(srcL, getIRegOrZR(is64, nn));
2791 assign(srcR,
2792 unop(Iop_64to8,
2793 binop(Iop_And64,
2794 getIReg64orZR(mm), mkU64(is64 ? 63 : 31))));
2795 switch (op) {
2796 case BITS2(0,0): iop = mkSHL(ty); break;
2797 case BITS2(0,1): iop = mkSHR(ty); break;
2798 case BITS2(1,0): iop = mkSAR(ty); break;
2799 default: vassert(0);
2800 }
2801 assign(res, binop(iop, mkexpr(srcL), mkexpr(srcR)));
2802 putIRegOrZR(is64, dd, mkexpr(res));
2803 vassert(op < 3);
2804 const HChar* names[3] = { "lslv", "lsrv", "asrv" };
2805 DIP("%s %s, %s, %s\n",
2806 names[op], nameIRegOrZR(is64,dd),
2807 nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
2808 return True;
2809 }
2810
2811 /* -------------------- SDIV/UDIV -------------------- */
2812 /* 30 28 20 15 10 9 4
2813 sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
2814 sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
2815 */
2816 if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
2817 && INSN(15,11) == BITS5(0,0,0,0,1)) {
2818 Bool is64 = INSN(31,31) == 1;
2819 UInt mm = INSN(20,16);
2820 Bool isS = INSN(10,10) == 1;
2821 UInt nn = INSN(9,5);
2822 UInt dd = INSN(4,0);
2823 if (isS) {
2824 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
2825 getIRegOrZR(is64, nn),
2826 getIRegOrZR(is64, mm)));
2827 } else {
2828 putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
2829 getIRegOrZR(is64, nn),
2830 getIRegOrZR(is64, mm)));
2831 }
2832 DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
2833 nameIRegOrZR(is64, dd),
2834 nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
2835 return True;
2836 }
2837
2838 /* ------------------ {S,U}M{ADD,SUB}L ------------------ */
2839 /* 31 23 20 15 14 9 4
2840 1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
2841 1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
2842 1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
2843 1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
2844 with operation
2845 Xd = Xa +/- (Wn *u/s Wm)
2846 */
2847 if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
2848 Bool isU = INSN(23,23) == 1;
2849 UInt mm = INSN(20,16);
2850 Bool isAdd = INSN(15,15) == 0;
2851 UInt aa = INSN(14,10);
2852 UInt nn = INSN(9,5);
2853 UInt dd = INSN(4,0);
2854 IRTemp wN = newTemp(Ity_I32);
2855 IRTemp wM = newTemp(Ity_I32);
2856 IRTemp xA = newTemp(Ity_I64);
2857 IRTemp muld = newTemp(Ity_I64);
2858 IRTemp res = newTemp(Ity_I64);
2859 assign(wN, getIReg32orZR(nn));
2860 assign(wM, getIReg32orZR(mm));
2861 assign(xA, getIReg64orZR(aa));
2862 assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
2863 mkexpr(wN), mkexpr(wM)));
2864 assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
2865 mkexpr(xA), mkexpr(muld)));
2866 putIReg64orZR(dd, mkexpr(res));
2867 DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
2868 nameIReg64orZR(dd), nameIReg32orZR(nn),
2869 nameIReg32orZR(mm), nameIReg64orZR(aa));
2870 return True;
2871 }
2872 vex_printf("ARM64 front end: data_processing_register\n");
2873 return False;
2874# undef INSN
2875}
2876
2877
2878/*------------------------------------------------------------*/
2879/*--- Load and Store instructions ---*/
2880/*------------------------------------------------------------*/
2881
2882/* Generate the EA for a "reg + reg" style amode. This is done from
2883 parts of the insn, but for sanity checking sake it takes the whole
2884 insn. This appears to depend on insn[15:12], with opt=insn[15:13]
2885 and S=insn[12]:
2886
2887 The possible forms, along with their opt:S values, are:
2888 011:0 Xn|SP + Xm
2889 111:0 Xn|SP + Xm
2890 011:1 Xn|SP + Xm * transfer_szB
2891 111:1 Xn|SP + Xm * transfer_szB
2892 010:0 Xn|SP + 32Uto64(Wm)
2893 010:1 Xn|SP + 32Uto64(Wm) * transfer_szB
2894 110:0 Xn|SP + 32Sto64(Wm)
2895 110:1 Xn|SP + 32Sto64(Wm) * transfer_szB
2896
2897 Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
2898 the transfer size is insn[23,31,30]. For integer loads/stores,
2899 insn[23] is zero, hence szLg2 can be at most 3 in such cases.
2900
2901 If the decoding fails, it returns IRTemp_INVALID.
2902
2903 isInt is True iff this is decoding is for transfers to/from integer
2904 registers. If False it is for transfers to/from vector registers.
2905*/
2906static IRTemp gen_indexed_EA ( /*OUT*/HChar* buf, UInt insn, Bool isInt )
2907{
2908 UInt optS = SLICE_UInt(insn, 15, 12);
2909 UInt mm = SLICE_UInt(insn, 20, 16);
2910 UInt nn = SLICE_UInt(insn, 9, 5);
2911 UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
2912 | SLICE_UInt(insn, 31, 30); // Log2 of the size
2913
2914 buf[0] = 0;
2915
2916 /* Sanity checks, that this really is a load/store insn. */
2917 if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
2918 goto fail;
2919
2920 if (isInt
2921 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/*LDR*/
2922 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/*STR*/
2923 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/*LDRSbhw Xt*/
2924 && SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/*LDRSbhw Wt*/
2925 goto fail;
2926
2927 if (!isInt
2928 && SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /*LDR/STR*/
2929 goto fail;
2930
2931 /* Throw out non-verified but possibly valid cases. */
2932 switch (szLg2) {
2933 case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
2934 case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
2935 case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
2936 case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
2937 case BITS3(1,0,0): // can only ever be valid for the vector case
2938 if (isInt) goto fail; else goto fail;
2939 case BITS3(1,0,1): // these sizes are never valid
2940 case BITS3(1,1,0):
2941 case BITS3(1,1,1): goto fail;
2942
2943 default: vassert(0);
2944 }
2945
2946 IRExpr* rhs = NULL;
2947 switch (optS) {
2948 case BITS4(1,1,1,0): goto fail; //ATC
2949 case BITS4(0,1,1,0):
2950 rhs = getIReg64orZR(mm);
2951 vex_sprintf(buf, "[%s, %s]",
2952 nameIReg64orZR(nn), nameIReg64orZR(mm));
2953 break;
2954 case BITS4(1,1,1,1): goto fail; //ATC
2955 case BITS4(0,1,1,1):
2956 rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
2957 vex_sprintf(buf, "[%s, %s lsl %u]",
2958 nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
2959 break;
2960 case BITS4(0,1,0,0):
2961 rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
2962 vex_sprintf(buf, "[%s, %s uxtx]",
2963 nameIReg64orZR(nn), nameIReg32orZR(mm));
2964 break;
2965 case BITS4(0,1,0,1):
2966 rhs = binop(Iop_Shl64,
2967 unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
2968 vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
2969 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
2970 break;
2971 case BITS4(1,1,0,0):
2972 rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
2973 vex_sprintf(buf, "[%s, %s sxtx]",
2974 nameIReg64orZR(nn), nameIReg32orZR(mm));
2975 break;
2976 case BITS4(1,1,0,1):
2977 rhs = binop(Iop_Shl64,
2978 unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
2979 vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
2980 nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
2981 break;
2982 default:
2983 /* The rest appear to be genuinely invalid */
2984 goto fail;
2985 }
2986
2987 vassert(rhs);
2988 IRTemp res = newTemp(Ity_I64);
2989 assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
2990 return res;
2991
2992 fail:
2993 vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
2994 return IRTemp_INVALID;
2995}
2996
2997
2998/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
2999 bits of DATAE :: Ity_I64. */
3000static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
3001{
3002 IRExpr* addrE = mkexpr(addr);
3003 switch (szB) {
3004 case 8:
3005 storeLE(addrE, dataE);
3006 break;
3007 case 4:
3008 storeLE(addrE, unop(Iop_64to32, dataE));
3009 break;
3010 case 2:
3011 storeLE(addrE, unop(Iop_64to16, dataE));
3012 break;
3013 case 1:
3014 storeLE(addrE, unop(Iop_64to8, dataE));
3015 break;
3016 default:
3017 vassert(0);
3018 }
3019}
3020
3021
3022/* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
3023 placing the result in an Ity_I64 temporary. */
3024static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
3025{
3026 IRTemp res = newTemp(Ity_I64);
3027 IRExpr* addrE = mkexpr(addr);
3028 switch (szB) {
3029 case 8:
3030 assign(res, loadLE(Ity_I64,addrE));
3031 break;
3032 case 4:
3033 assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
3034 break;
3035 case 2:
3036 assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
3037 break;
3038 case 1:
3039 assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
3040 break;
3041 default:
3042 vassert(0);
3043 }
3044 return res;
3045}
3046
3047
3048static
3049Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
3050{
3051# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
3052
3053 /* ------------ LDR,STR (immediate, uimm12) ----------- */
3054 /* uimm12 is scaled by the transfer size
3055
3056 31 29 26 21 9 4
3057 | | | | | |
3058 11 111 00100 imm12 nn tt STR Xt, [Xn|SP, #imm12 * 8]
3059 11 111 00101 imm12 nn tt LDR Xt, [Xn|SP, #imm12 * 8]
3060
3061 10 111 00100 imm12 nn tt STR Wt, [Xn|SP, #imm12 * 4]
3062 10 111 00101 imm12 nn tt LDR Wt, [Xn|SP, #imm12 * 4]
3063
3064 01 111 00100 imm12 nn tt STRH Wt, [Xn|SP, #imm12 * 2]
3065 01 111 00101 imm12 nn tt LDRH Wt, [Xn|SP, #imm12 * 2]
3066
3067 00 111 00100 imm12 nn tt STRB Wt, [Xn|SP, #imm12 * 1]
3068 00 111 00101 imm12 nn tt LDRB Wt, [Xn|SP, #imm12 * 1]
3069 */
3070 if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
3071 UInt szLg2 = INSN(31,30);
3072 UInt szB = 1 << szLg2;
3073 Bool isLD = INSN(22,22) == 1;
3074 UInt offs = INSN(21,10) * szB;
3075 UInt nn = INSN(9,5);
3076 UInt tt = INSN(4,0);
3077 IRTemp ta = newTemp(Ity_I64);
3078 assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
3079 if (nn == 31) { /* FIXME generate stack alignment check */ }
3080 vassert(szLg2 < 4);
3081 if (isLD) {
3082 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
3083 } else {
3084 gen_narrowing_store(szB, ta, getIReg64orZR(tt));
3085 }
3086 const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
3087 const HChar* st_name[4] = { "strb", "strh", "str", "str" };
3088 DIP("%s %s, [%s, #%u]\n",
3089 (isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
3090 nameIReg64orSP(nn), offs);
3091 return True;
3092 }
3093
3094 /* ------------ LDUR,STUR (immediate, simm9) ----------- */
3095 /*
3096 31 29 26 20 11 9 4
3097 | | | | | | |
3098 (at-Rn-then-Rn=EA) | | |
3099 sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn|SP], #simm9
3100 sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn|SP], #simm9
3101
3102 (at-EA-then-Rn=EA)
3103 sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn|SP, #simm9]!
3104 sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn|SP, #simm9]!
3105
3106 (at-EA)
3107 sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn|SP, #simm9]
3108 sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn|SP, #simm9]
3109
3110 simm9 is unscaled.
3111
3112 The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
3113 load case this is because would create two competing values for
3114 Rt. In the store case the reason is unclear, but the spec
3115 disallows it anyway.
3116
3117 Stores are narrowing, loads are unsigned widening. sz encodes
3118 the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
3119 */
3120 if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
3121 == BITS9(1,1,1, 0,0,0,0,0, 0)) {
3122 UInt szLg2 = INSN(31,30);
3123 UInt szB = 1 << szLg2;
3124 Bool isLoad = INSN(22,22) == 1;
3125 UInt imm9 = INSN(20,12);
3126 UInt nn = INSN(9,5);
3127 UInt tt = INSN(4,0);
3128 Bool wBack = INSN(10,10) == 1;
3129 UInt how = INSN(11,10);
3130 if (how == BITS2(1,0) || (wBack && nn == tt && tt != 31)) {
3131 /* undecodable; fall through */
3132 } else {
3133 if (nn == 31) { /* FIXME generate stack alignment check */ }
3134
3135 // Compute the transfer address TA and the writeback address WA.
3136 IRTemp tRN = newTemp(Ity_I64);
3137 assign(tRN, getIReg64orSP(nn));
3138 IRTemp tEA = newTemp(Ity_I64);
3139 Long simm9 = (Long)sx_to_64(imm9, 9);
3140 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3141
3142 IRTemp tTA = newTemp(Ity_I64);
3143 IRTemp tWA = newTemp(Ity_I64);
3144 switch (how) {
3145 case BITS2(0,1):
3146 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3147 case BITS2(1,1):
3148 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3149 case BITS2(0,0):
3150 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3151 default:
3152 vassert(0); /* NOTREACHED */
3153 }
3154
sewardje0bff8b2014-03-09 09:40:23 +00003155 /* Normally rN would be updated after the transfer. However, in
3156 the special case typifed by
3157 str x30, [sp,#-16]!
3158 it is necessary to update SP before the transfer, (1)
3159 because Memcheck will otherwise complain about a write
3160 below the stack pointer, and (2) because the segfault
3161 stack extension mechanism will otherwise extend the stack
3162 only down to SP before the instruction, which might not be
3163 far enough, if the -16 bit takes the actual access
3164 address to the next page.
3165 */
3166 Bool earlyWBack
3167 = wBack && simm9 < 0 && szB == 8
3168 && how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
3169
3170 if (wBack && earlyWBack)
3171 putIReg64orSP(nn, mkexpr(tEA));
3172
sewardjbbcf1882014-01-12 12:49:10 +00003173 if (isLoad) {
3174 putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
3175 } else {
3176 gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
3177 }
3178
sewardje0bff8b2014-03-09 09:40:23 +00003179 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00003180 putIReg64orSP(nn, mkexpr(tEA));
3181
3182 const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
3183 const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
3184 const HChar* fmt_str = NULL;
3185 switch (how) {
3186 case BITS2(0,1):
3187 fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3188 break;
3189 case BITS2(1,1):
3190 fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3191 break;
3192 case BITS2(0,0):
3193 fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
3194 break;
3195 default:
3196 vassert(0);
3197 }
3198 DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
3199 nameIRegOrZR(szB == 8, tt),
3200 nameIReg64orSP(nn), simm9);
3201 return True;
3202 }
3203 }
3204
3205 /* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
3206 /* L==1 => mm==LD
3207 L==0 => mm==ST
3208 x==0 => 32 bit transfers, and zero extended loads
3209 x==1 => 64 bit transfers
3210 simm7 is scaled by the (single-register) transfer size
3211
3212 (at-Rn-then-Rn=EA)
3213 x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP], #imm
3214
3215 (at-EA-then-Rn=EA)
3216 x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]!
3217
3218 (at-EA)
3219 x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn|SP, #imm]
3220 */
3221
3222 UInt insn_30_23 = INSN(30,23);
3223 if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
3224 || insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
3225 || insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
3226 UInt bL = INSN(22,22);
3227 UInt bX = INSN(31,31);
3228 UInt bWBack = INSN(23,23);
3229 UInt rT1 = INSN(4,0);
3230 UInt rN = INSN(9,5);
3231 UInt rT2 = INSN(14,10);
3232 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
3233 if ((bWBack && (rT1 == rN || rT2 == rN) && rN != 31)
3234 || (bL && rT1 == rT2)) {
3235 /* undecodable; fall through */
3236 } else {
3237 if (rN == 31) { /* FIXME generate stack alignment check */ }
3238
3239 // Compute the transfer address TA and the writeback address WA.
3240 IRTemp tRN = newTemp(Ity_I64);
3241 assign(tRN, getIReg64orSP(rN));
3242 IRTemp tEA = newTemp(Ity_I64);
3243 simm7 = (bX ? 8 : 4) * simm7;
3244 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
3245
3246 IRTemp tTA = newTemp(Ity_I64);
3247 IRTemp tWA = newTemp(Ity_I64);
3248 switch (INSN(24,23)) {
3249 case BITS2(0,1):
3250 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3251 case BITS2(1,1):
3252 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3253 case BITS2(1,0):
3254 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3255 default:
3256 vassert(0); /* NOTREACHED */
3257 }
3258
3259 /* Normally rN would be updated after the transfer. However, in
3260 the special case typifed by
3261 stp x29, x30, [sp,#-112]!
3262 it is necessary to update SP before the transfer, (1)
3263 because Memcheck will otherwise complain about a write
3264 below the stack pointer, and (2) because the segfault
3265 stack extension mechanism will otherwise extend the stack
3266 only down to SP before the instruction, which might not be
3267 far enough, if the -112 bit takes the actual access
3268 address to the next page.
3269 */
3270 Bool earlyWBack
3271 = bWBack && simm7 < 0
3272 && INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
3273
3274 if (bWBack && earlyWBack)
3275 putIReg64orSP(rN, mkexpr(tEA));
3276
3277 /**/ if (bL == 1 && bX == 1) {
3278 // 64 bit load
3279 putIReg64orZR(rT1, loadLE(Ity_I64,
3280 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3281 putIReg64orZR(rT2, loadLE(Ity_I64,
3282 binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
3283 } else if (bL == 1 && bX == 0) {
sewardjbbcf1882014-01-12 12:49:10 +00003284 // 32 bit load
3285 putIReg32orZR(rT1, loadLE(Ity_I32,
3286 binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
3287 putIReg32orZR(rT2, loadLE(Ity_I32,
3288 binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
3289 } else if (bL == 0 && bX == 1) {
3290 // 64 bit store
3291 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3292 getIReg64orZR(rT1));
3293 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
3294 getIReg64orZR(rT2));
3295 } else {
3296 vassert(bL == 0 && bX == 0);
sewardjbbcf1882014-01-12 12:49:10 +00003297 // 32 bit store
3298 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
3299 getIReg32orZR(rT1));
3300 storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
3301 getIReg32orZR(rT2));
3302 }
3303
3304 if (bWBack && !earlyWBack)
3305 putIReg64orSP(rN, mkexpr(tEA));
3306
3307 const HChar* fmt_str = NULL;
3308 switch (INSN(24,23)) {
3309 case BITS2(0,1):
3310 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3311 break;
3312 case BITS2(1,1):
3313 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3314 break;
3315 case BITS2(1,0):
3316 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
3317 break;
3318 default:
3319 vassert(0);
3320 }
3321 DIP(fmt_str, bL == 0 ? "st" : "ld",
3322 nameIRegOrZR(bX == 1, rT1),
3323 nameIRegOrZR(bX == 1, rT2),
3324 nameIReg64orSP(rN), simm7);
3325 return True;
3326 }
3327 }
3328
3329 /* ---------------- LDR (literal, int reg) ---------------- */
3330 /* 31 29 23 4
3331 00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
3332 01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
3333 10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
3334 11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
3335 Just handles the first two cases for now.
3336 */
3337 if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
3338 UInt imm19 = INSN(23,5);
3339 UInt rT = INSN(4,0);
3340 UInt bX = INSN(30,30);
3341 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
3342 if (bX) {
3343 putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
3344 } else {
3345 putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
3346 }
3347 DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
3348 return True;
3349 }
3350
3351 /* -------------- {LD,ST}R (integer register) --------------- */
3352 /* 31 29 20 15 12 11 9 4
3353 | | | | | | | |
3354 11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn|SP, R<m>{ext/sh}]
3355 10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn|SP, R<m>{ext/sh}]
3356 01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn|SP, R<m>{ext/sh}]
3357 00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn|SP, R<m>{ext/sh}]
3358
3359 11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn|SP, R<m>{ext/sh}]
3360 10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn|SP, R<m>{ext/sh}]
3361 01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn|SP, R<m>{ext/sh}]
3362 00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn|SP, R<m>{ext/sh}]
3363 */
3364 if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
3365 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3366 HChar dis_buf[64];
3367 UInt szLg2 = INSN(31,30);
3368 Bool isLD = INSN(22,22) == 1;
3369 UInt tt = INSN(4,0);
3370 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
3371 if (ea != IRTemp_INVALID) {
3372 switch (szLg2) {
3373 case 3: /* 64 bit */
3374 if (isLD) {
3375 putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
3376 DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
3377 } else {
3378 storeLE(mkexpr(ea), getIReg64orZR(tt));
3379 DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
3380 }
3381 break;
3382 case 2: /* 32 bit */
3383 if (isLD) {
3384 putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
3385 DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
3386 } else {
3387 storeLE(mkexpr(ea), getIReg32orZR(tt));
3388 DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
3389 }
3390 break;
3391 case 1: /* 16 bit */
3392 if (isLD) {
3393 putIReg64orZR(tt, unop(Iop_16Uto64,
3394 loadLE(Ity_I16, mkexpr(ea))));
3395 DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3396 } else {
3397 storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
3398 DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3399 }
3400 break;
3401 case 0: /* 8 bit */
3402 if (isLD) {
3403 putIReg64orZR(tt, unop(Iop_8Uto64,
3404 loadLE(Ity_I8, mkexpr(ea))));
3405 DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
3406 } else {
3407 storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
3408 DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
3409 }
3410 break;
3411 default:
3412 vassert(0);
3413 }
3414 return True;
3415 }
3416 }
3417
3418 /* -------------- LDRS{B,H,W} (uimm12) -------------- */
3419 /* 31 29 26 23 21 9 4
3420 10 111 001 10 imm12 n t LDRSW Xt, [Xn|SP, #pimm12 * 4]
3421 01 111 001 1x imm12 n t LDRSH Rt, [Xn|SP, #pimm12 * 2]
3422 00 111 001 1x imm12 n t LDRSB Rt, [Xn|SP, #pimm12 * 1]
3423 where
3424 Rt is Wt when x==1, Xt when x==0
3425 */
3426 if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
3427 /* Further checks on bits 31:30 and 22 */
3428 Bool valid = False;
3429 switch ((INSN(31,30) << 1) | INSN(22,22)) {
3430 case BITS3(1,0,0):
3431 case BITS3(0,1,0): case BITS3(0,1,1):
3432 case BITS3(0,0,0): case BITS3(0,0,1):
3433 valid = True;
3434 break;
3435 }
3436 if (valid) {
3437 UInt szLg2 = INSN(31,30);
3438 UInt bitX = INSN(22,22);
3439 UInt imm12 = INSN(21,10);
3440 UInt nn = INSN(9,5);
3441 UInt tt = INSN(4,0);
3442 UInt szB = 1 << szLg2;
3443 IRExpr* ea = binop(Iop_Add64,
3444 getIReg64orSP(nn), mkU64(imm12 * szB));
3445 switch (szB) {
3446 case 4:
3447 vassert(bitX == 0);
3448 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
3449 DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
3450 nameIReg64orSP(nn), imm12 * szB);
3451 break;
3452 case 2:
3453 if (bitX == 1) {
3454 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
3455 } else {
3456 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
3457 }
3458 DIP("ldrsh %s, [%s, #%u]\n",
3459 nameIRegOrZR(bitX == 0, tt),
3460 nameIReg64orSP(nn), imm12 * szB);
3461 break;
3462 case 1:
3463 if (bitX == 1) {
3464 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
3465 } else {
3466 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
3467 }
3468 DIP("ldrsb %s, [%s, #%u]\n",
3469 nameIRegOrZR(bitX == 0, tt),
3470 nameIReg64orSP(nn), imm12 * szB);
3471 break;
3472 default:
3473 vassert(0);
3474 }
3475 return True;
3476 }
3477 /* else fall through */
3478 }
3479
3480 /* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
3481 /* (at-Rn-then-Rn=EA)
3482 31 29 23 21 20 11 9 4
3483 00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn|SP], #simm9
3484 01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn|SP], #simm9
3485 10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn|SP], #simm9
3486
3487 (at-EA-then-Rn=EA)
3488 00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn|SP, #simm9]!
3489 01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn|SP, #simm9]!
3490 10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn|SP, #simm9]!
3491 where
3492 Rt is Wt when x==1, Xt when x==0
3493 transfer-at-Rn when [11]==0, at EA when [11]==1
3494 */
3495 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3496 && INSN(21,21) == 0 && INSN(10,10) == 1) {
3497 /* Further checks on bits 31:30 and 22 */
3498 Bool valid = False;
3499 switch ((INSN(31,30) << 1) | INSN(22,22)) {
3500 case BITS3(1,0,0): // LDRSW Xt
3501 case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
3502 case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
3503 valid = True;
3504 break;
3505 }
3506 if (valid) {
3507 UInt szLg2 = INSN(31,30);
3508 UInt imm9 = INSN(20,12);
3509 Bool atRN = INSN(11,11) == 0;
3510 UInt nn = INSN(9,5);
3511 UInt tt = INSN(4,0);
3512 IRTemp tRN = newTemp(Ity_I64);
3513 IRTemp tEA = newTemp(Ity_I64);
3514 IRTemp tTA = IRTemp_INVALID;
3515 ULong simm9 = sx_to_64(imm9, 9);
3516 Bool is64 = INSN(22,22) == 0;
3517 assign(tRN, getIReg64orSP(nn));
3518 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3519 tTA = atRN ? tRN : tEA;
3520 HChar ch = '?';
3521 /* There are 5 cases:
3522 byte load, SX to 64
3523 byte load, SX to 32, ZX to 64
3524 halfword load, SX to 64
3525 halfword load, SX to 32, ZX to 64
3526 word load, SX to 64
3527 The ifs below handle them in the listed order.
3528 */
3529 if (szLg2 == 0) {
3530 ch = 'b';
3531 if (is64) {
3532 putIReg64orZR(tt, unop(Iop_8Sto64,
3533 loadLE(Ity_I8, mkexpr(tTA))));
3534 } else {
3535 putIReg32orZR(tt, unop(Iop_8Sto32,
3536 loadLE(Ity_I8, mkexpr(tTA))));
3537 }
3538 }
3539 else if (szLg2 == 1) {
3540 ch = 'h';
3541 if (is64) {
3542 putIReg64orZR(tt, unop(Iop_16Sto64,
3543 loadLE(Ity_I16, mkexpr(tTA))));
3544 } else {
3545 putIReg32orZR(tt, unop(Iop_16Sto32,
3546 loadLE(Ity_I16, mkexpr(tTA))));
3547 }
3548 }
3549 else if (szLg2 == 2 && is64) {
3550 ch = 'w';
3551 putIReg64orZR(tt, unop(Iop_32Sto64,
3552 loadLE(Ity_I32, mkexpr(tTA))));
3553 }
3554 else {
3555 vassert(0);
3556 }
3557 putIReg64orSP(nn, mkexpr(tEA));
3558 DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
3559 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
3560 return True;
3561 }
3562 /* else fall through */
3563 }
3564
3565 /* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
3566 /* 31 29 23 21 20 11 9 4
3567 00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn|SP, #simm9]
3568 01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn|SP, #simm9]
3569 10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn|SP, #simm9]
3570 where
3571 Rt is Wt when x==1, Xt when x==0
3572 */
3573 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3574 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
3575 /* Further checks on bits 31:30 and 22 */
3576 Bool valid = False;
3577 switch ((INSN(31,30) << 1) | INSN(22,22)) {
3578 case BITS3(1,0,0): // LDURSW Xt
3579 case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
3580 case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
3581 valid = True;
3582 break;
3583 }
3584 if (valid) {
3585 UInt szLg2 = INSN(31,30);
3586 UInt imm9 = INSN(20,12);
3587 UInt nn = INSN(9,5);
3588 UInt tt = INSN(4,0);
3589 IRTemp tRN = newTemp(Ity_I64);
3590 IRTemp tEA = newTemp(Ity_I64);
3591 ULong simm9 = sx_to_64(imm9, 9);
3592 Bool is64 = INSN(22,22) == 0;
3593 assign(tRN, getIReg64orSP(nn));
3594 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3595 HChar ch = '?';
3596 /* There are 5 cases:
3597 byte load, SX to 64
3598 byte load, SX to 32, ZX to 64
3599 halfword load, SX to 64
3600 halfword load, SX to 32, ZX to 64
3601 word load, SX to 64
3602 The ifs below handle them in the listed order.
3603 */
3604 if (szLg2 == 0) {
3605 ch = 'b';
3606 if (is64) {
3607 putIReg64orZR(tt, unop(Iop_8Sto64,
3608 loadLE(Ity_I8, mkexpr(tEA))));
3609 } else {
3610 putIReg32orZR(tt, unop(Iop_8Sto32,
3611 loadLE(Ity_I8, mkexpr(tEA))));
3612 }
3613 }
3614 else if (szLg2 == 1) {
3615 ch = 'h';
3616 if (is64) {
3617 putIReg64orZR(tt, unop(Iop_16Sto64,
3618 loadLE(Ity_I16, mkexpr(tEA))));
3619 } else {
3620 putIReg32orZR(tt, unop(Iop_16Sto32,
3621 loadLE(Ity_I16, mkexpr(tEA))));
3622 }
3623 }
3624 else if (szLg2 == 2 && is64) {
3625 ch = 'w';
3626 putIReg64orZR(tt, unop(Iop_32Sto64,
3627 loadLE(Ity_I32, mkexpr(tEA))));
3628 }
3629 else {
3630 vassert(0);
3631 }
3632 DIP("ldurs%c %s, [%s, #%lld]",
3633 ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
3634 return True;
3635 }
3636 /* else fall through */
3637 }
3638
3639 /* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
3640 /* L==1 => mm==LD
3641 L==0 => mm==ST
3642 sz==00 => 32 bit (S) transfers
3643 sz==01 => 64 bit (D) transfers
3644 sz==10 => 128 bit (Q) transfers
3645 sz==11 isn't allowed
3646 simm7 is scaled by the (single-register) transfer size
3647
3648 31 29 22 21 14 9 4
3649 sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP], #imm
3650 (at-Rn-then-Rn=EA)
3651
3652 sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]!
3653 (at-EA-then-Rn=EA)
3654
3655 sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn|SP, #imm]
3656 (at-EA)
3657 */
3658
3659 UInt insn_29_23 = INSN(29,23);
3660 if (insn_29_23 == BITS7(1,0,1,1,0,0,1)
3661 || insn_29_23 == BITS7(1,0,1,1,0,1,1)
3662 || insn_29_23 == BITS7(1,0,1,1,0,1,0)) {
3663 UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
3664 Bool isLD = INSN(22,22) == 1;
3665 Bool wBack = INSN(23,23) == 1;
3666 Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
3667 UInt tt2 = INSN(14,10);
3668 UInt nn = INSN(9,5);
3669 UInt tt1 = INSN(4,0);
3670 if (szSlg2 == BITS2(1,1) || (isLD && tt1 == tt2)) {
3671 /* undecodable; fall through */
3672 } else {
3673 if (nn == 31) { /* FIXME generate stack alignment check */ }
3674
3675 // Compute the transfer address TA and the writeback address WA.
3676 UInt szB = 4 << szSlg2; /* szB is the per-register size */
3677 IRTemp tRN = newTemp(Ity_I64);
3678 assign(tRN, getIReg64orSP(nn));
3679 IRTemp tEA = newTemp(Ity_I64);
3680 simm7 = szB * simm7;
3681 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
3682
3683 IRTemp tTA = newTemp(Ity_I64);
3684 IRTemp tWA = newTemp(Ity_I64);
3685 switch (INSN(24,23)) {
3686 case BITS2(0,1):
3687 assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
3688 case BITS2(1,1):
3689 assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
3690 case BITS2(1,0):
3691 assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
3692 default:
3693 vassert(0); /* NOTREACHED */
3694 }
3695
3696 IRType ty = Ity_INVALID;
3697 switch (szB) {
3698 case 4: ty = Ity_F32; break;
3699 case 8: ty = Ity_F64; break;
3700 case 16: ty = Ity_V128; break;
3701 default: vassert(0);
3702 }
3703
sewardje0bff8b2014-03-09 09:40:23 +00003704 /* Normally rN would be updated after the transfer. However, in
3705 the special case typifed by
3706 stp q0, q1, [sp,#-512]!
3707 it is necessary to update SP before the transfer, (1)
3708 because Memcheck will otherwise complain about a write
3709 below the stack pointer, and (2) because the segfault
3710 stack extension mechanism will otherwise extend the stack
3711 only down to SP before the instruction, which might not be
3712 far enough, if the -512 bit takes the actual access
3713 address to the next page.
3714 */
3715 Bool earlyWBack
3716 = wBack && simm7 < 0 && szB == 16
3717 && INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
3718
3719 if (wBack && earlyWBack)
3720 putIReg64orSP(nn, mkexpr(tEA));
3721
sewardjbbcf1882014-01-12 12:49:10 +00003722 if (isLD) {
sewardj5ba41302014-03-03 08:42:16 +00003723 if (szB < 16) {
3724 putQReg128(tt1, mkV128(0x0000));
3725 }
sewardj606c4ba2014-01-26 19:11:14 +00003726 putQRegLO(tt1,
3727 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
sewardj5ba41302014-03-03 08:42:16 +00003728 if (szB < 16) {
3729 putQReg128(tt2, mkV128(0x0000));
3730 }
sewardj606c4ba2014-01-26 19:11:14 +00003731 putQRegLO(tt2,
3732 loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
sewardjbbcf1882014-01-12 12:49:10 +00003733 } else {
3734 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
sewardj606c4ba2014-01-26 19:11:14 +00003735 getQRegLO(tt1, ty));
sewardjbbcf1882014-01-12 12:49:10 +00003736 storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
sewardj606c4ba2014-01-26 19:11:14 +00003737 getQRegLO(tt2, ty));
sewardjbbcf1882014-01-12 12:49:10 +00003738 }
3739
sewardje0bff8b2014-03-09 09:40:23 +00003740 if (wBack && !earlyWBack)
sewardjbbcf1882014-01-12 12:49:10 +00003741 putIReg64orSP(nn, mkexpr(tEA));
3742
3743 const HChar* fmt_str = NULL;
3744 switch (INSN(24,23)) {
3745 case BITS2(0,1):
3746 fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
3747 break;
3748 case BITS2(1,1):
3749 fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
3750 break;
3751 case BITS2(1,0):
3752 fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
3753 break;
3754 default:
3755 vassert(0);
3756 }
3757 DIP(fmt_str, isLD ? "ld" : "st",
sewardj606c4ba2014-01-26 19:11:14 +00003758 nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
sewardjbbcf1882014-01-12 12:49:10 +00003759 nameIReg64orSP(nn), simm7);
3760 return True;
3761 }
3762 }
3763
3764 /* -------------- {LD,ST}R (vector register) --------------- */
3765 /* 31 29 23 20 15 12 11 9 4
3766 | | | | | | | | |
3767 00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn|SP, R<m>{ext/sh}]
3768 01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn|SP, R<m>{ext/sh}]
3769 10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn|SP, R<m>{ext/sh}]
3770 11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn|SP, R<m>{ext/sh}]
3771 00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn|SP, R<m>{ext/sh}]
3772
3773 00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn|SP, R<m>{ext/sh}]
3774 01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn|SP, R<m>{ext/sh}]
3775 10 111100 001 Rm option S 10 Rn Rt STR St, [Xn|SP, R<m>{ext/sh}]
3776 11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn|SP, R<m>{ext/sh}]
3777 00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn|SP, R<m>{ext/sh}]
3778 */
3779 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
3780 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3781 HChar dis_buf[64];
3782 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
3783 Bool isLD = INSN(22,22) == 1;
3784 UInt tt = INSN(4,0);
3785 if (szLg2 >= 4) goto after_LDR_STR_vector_register;
3786 IRTemp ea = gen_indexed_EA(dis_buf, insn, False/*to/from vec regs*/);
3787 if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
3788 switch (szLg2) {
3789 case 0: /* 8 bit */
3790 if (isLD) {
3791 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00003792 putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
3793 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003794 } else {
3795 vassert(0); //ATC
sewardj606c4ba2014-01-26 19:11:14 +00003796 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
3797 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003798 }
3799 break;
3800 case 1:
3801 if (isLD) {
3802 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00003803 putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
3804 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003805 } else {
3806 vassert(0); //ATC
sewardj606c4ba2014-01-26 19:11:14 +00003807 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
3808 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003809 }
3810 break;
3811 case 2: /* 32 bit */
3812 if (isLD) {
3813 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00003814 putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
3815 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003816 } else {
sewardj606c4ba2014-01-26 19:11:14 +00003817 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
3818 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003819 }
3820 break;
3821 case 3: /* 64 bit */
3822 if (isLD) {
3823 putQReg128(tt, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00003824 putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
3825 DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003826 } else {
sewardj606c4ba2014-01-26 19:11:14 +00003827 storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
3828 DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardjbbcf1882014-01-12 12:49:10 +00003829 }
3830 break;
3831 case 4: return False; //ATC
3832 default: vassert(0);
3833 }
3834 return True;
3835 }
3836 after_LDR_STR_vector_register:
3837
3838 /* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
3839 /* 31 29 22 20 15 12 11 9 4
3840 | | | | | | | | |
3841 10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn|SP, R<m>{ext/sh}]
3842
3843 01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn|SP, R<m>{ext/sh}]
3844 01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn|SP, R<m>{ext/sh}]
3845
3846 00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn|SP, R<m>{ext/sh}]
3847 00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn|SP, R<m>{ext/sh}]
3848 */
3849 if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
3850 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
3851 HChar dis_buf[64];
3852 UInt szLg2 = INSN(31,30);
3853 Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
3854 UInt tt = INSN(4,0);
3855 if (szLg2 == 3) goto after_LDRS_integer_register;
3856 IRTemp ea = gen_indexed_EA(dis_buf, insn, True/*to/from int regs*/);
3857 if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
3858 /* Enumerate the 5 variants explicitly. */
3859 if (szLg2 == 2/*32 bit*/ && sxTo64) {
3860 putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
3861 DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
3862 return True;
3863 }
3864 else
3865 if (szLg2 == 1/*16 bit*/) {
3866 if (sxTo64) {
3867 putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
3868 DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
3869 } else {
3870 putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
3871 DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
3872 }
3873 return True;
3874 }
3875 else
3876 if (szLg2 == 0/*8 bit*/) {
3877 if (sxTo64) {
3878 putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
3879 DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
3880 } else {
3881 putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
3882 DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
3883 }
3884 return True;
3885 }
3886 /* else it's an invalid combination */
3887 }
3888 after_LDRS_integer_register:
3889
3890 /* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
3891 /* This is the Unsigned offset variant only. The Post-Index and
3892 Pre-Index variants are below.
3893
3894 31 29 23 21 9 4
3895 00 111 101 01 imm12 n t LDR Bt, [Xn|SP + imm12 * 1]
3896 01 111 101 01 imm12 n t LDR Ht, [Xn|SP + imm12 * 2]
3897 10 111 101 01 imm12 n t LDR St, [Xn|SP + imm12 * 4]
3898 11 111 101 01 imm12 n t LDR Dt, [Xn|SP + imm12 * 8]
3899 00 111 101 11 imm12 n t LDR Qt, [Xn|SP + imm12 * 16]
3900
3901 00 111 101 00 imm12 n t STR Bt, [Xn|SP + imm12 * 1]
3902 01 111 101 00 imm12 n t STR Ht, [Xn|SP + imm12 * 2]
3903 10 111 101 00 imm12 n t STR St, [Xn|SP + imm12 * 4]
3904 11 111 101 00 imm12 n t STR Dt, [Xn|SP + imm12 * 8]
3905 00 111 101 10 imm12 n t STR Qt, [Xn|SP + imm12 * 16]
3906 */
3907 if (INSN(29,24) == BITS6(1,1,1,1,0,1)
3908 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4) {
3909 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
3910 Bool isLD = INSN(22,22) == 1;
3911 UInt pimm12 = INSN(21,10) << szLg2;
3912 UInt nn = INSN(9,5);
3913 UInt tt = INSN(4,0);
3914 IRTemp tEA = newTemp(Ity_I64);
3915 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
3916 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
3917 if (isLD) {
3918 if (szLg2 < 4) {
3919 putQReg128(tt, mkV128(0x0000));
3920 }
sewardj606c4ba2014-01-26 19:11:14 +00003921 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00003922 } else {
sewardj606c4ba2014-01-26 19:11:14 +00003923 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00003924 }
3925 DIP("%s %s, [%s, #%u]\n",
3926 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00003927 nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
sewardjbbcf1882014-01-12 12:49:10 +00003928 return True;
3929 }
3930
3931 /* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
3932 /* These are the Post-Index and Pre-Index variants.
3933
3934 31 29 23 20 11 9 4
3935 (at-Rn-then-Rn=EA)
3936 00 111 100 01 0 imm9 01 n t LDR Bt, [Xn|SP], #simm
3937 01 111 100 01 0 imm9 01 n t LDR Ht, [Xn|SP], #simm
3938 10 111 100 01 0 imm9 01 n t LDR St, [Xn|SP], #simm
3939 11 111 100 01 0 imm9 01 n t LDR Dt, [Xn|SP], #simm
3940 00 111 100 11 0 imm9 01 n t LDR Qt, [Xn|SP], #simm
3941
3942 (at-EA-then-Rn=EA)
3943 00 111 100 01 0 imm9 11 n t LDR Bt, [Xn|SP, #simm]!
3944 01 111 100 01 0 imm9 11 n t LDR Ht, [Xn|SP, #simm]!
3945 10 111 100 01 0 imm9 11 n t LDR St, [Xn|SP, #simm]!
3946 11 111 100 01 0 imm9 11 n t LDR Dt, [Xn|SP, #simm]!
3947 00 111 100 11 0 imm9 11 n t LDR Qt, [Xn|SP, #simm]!
3948
3949 Stores are the same except with bit 22 set to 0.
3950 */
3951 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
3952 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
3953 && INSN(21,21) == 0 && INSN(10,10) == 1) {
3954 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
3955 Bool isLD = INSN(22,22) == 1;
3956 UInt imm9 = INSN(20,12);
3957 Bool atRN = INSN(11,11) == 0;
3958 UInt nn = INSN(9,5);
3959 UInt tt = INSN(4,0);
3960 IRTemp tRN = newTemp(Ity_I64);
3961 IRTemp tEA = newTemp(Ity_I64);
3962 IRTemp tTA = IRTemp_INVALID;
3963 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
3964 ULong simm9 = sx_to_64(imm9, 9);
3965 assign(tRN, getIReg64orSP(nn));
3966 assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
3967 tTA = atRN ? tRN : tEA;
3968 if (isLD) {
3969 if (szLg2 < 4) {
3970 putQReg128(tt, mkV128(0x0000));
3971 }
sewardj606c4ba2014-01-26 19:11:14 +00003972 putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
sewardjbbcf1882014-01-12 12:49:10 +00003973 } else {
sewardj606c4ba2014-01-26 19:11:14 +00003974 storeLE(mkexpr(tTA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00003975 }
3976 putIReg64orSP(nn, mkexpr(tEA));
3977 DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
3978 isLD ? "ldr" : "str",
sewardj606c4ba2014-01-26 19:11:14 +00003979 nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
sewardjbbcf1882014-01-12 12:49:10 +00003980 return True;
3981 }
3982
3983 /* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
3984 /* 31 29 23 20 11 9 4
3985 00 111 100 01 0 imm9 00 n t LDR Bt, [Xn|SP, #simm]
3986 01 111 100 01 0 imm9 00 n t LDR Ht, [Xn|SP, #simm]
3987 10 111 100 01 0 imm9 00 n t LDR St, [Xn|SP, #simm]
3988 11 111 100 01 0 imm9 00 n t LDR Dt, [Xn|SP, #simm]
3989 00 111 100 11 0 imm9 00 n t LDR Qt, [Xn|SP, #simm]
3990
3991 00 111 100 00 0 imm9 00 n t STR Bt, [Xn|SP, #simm]
3992 01 111 100 00 0 imm9 00 n t STR Ht, [Xn|SP, #simm]
3993 10 111 100 00 0 imm9 00 n t STR St, [Xn|SP, #simm]
3994 11 111 100 00 0 imm9 00 n t STR Dt, [Xn|SP, #simm]
3995 00 111 100 10 0 imm9 00 n t STR Qt, [Xn|SP, #simm]
3996 */
3997 if (INSN(29,24) == BITS6(1,1,1,1,0,0)
3998 && ((INSN(23,23) << 2) | INSN(31,30)) <= 4
3999 && INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
4000 UInt szLg2 = (INSN(23,23) << 2) | INSN(31,30);
4001 Bool isLD = INSN(22,22) == 1;
4002 UInt imm9 = INSN(20,12);
4003 UInt nn = INSN(9,5);
4004 UInt tt = INSN(4,0);
4005 ULong simm9 = sx_to_64(imm9, 9);
4006 IRTemp tEA = newTemp(Ity_I64);
4007 IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
4008 assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
4009 if (isLD) {
sewardj606c4ba2014-01-26 19:11:14 +00004010 if (szLg2 < 4) {
4011 putQReg128(tt, mkV128(0x0000));
4012 }
4013 putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardjbbcf1882014-01-12 12:49:10 +00004014 } else {
sewardj606c4ba2014-01-26 19:11:14 +00004015 storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardjbbcf1882014-01-12 12:49:10 +00004016 }
4017 DIP("%s %s, [%s, #%lld]\n",
4018 isLD ? "ldur" : "stur",
sewardj606c4ba2014-01-26 19:11:14 +00004019 nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
sewardjbbcf1882014-01-12 12:49:10 +00004020 return True;
4021 }
4022
4023 /* ---------------- LDR (literal, SIMD&FP) ---------------- */
4024 /* 31 29 23 4
4025 00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
4026 01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
4027 10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
4028 */
4029 if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
4030 UInt szB = 4 << INSN(31,30);
4031 UInt imm19 = INSN(23,5);
4032 UInt tt = INSN(4,0);
4033 ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
4034 IRType ty = preferredVectorSubTypeFromSize(szB);
sewardj606c4ba2014-01-26 19:11:14 +00004035 putQReg128(tt, mkV128(0x0000));
4036 putQRegLO(tt, loadLE(ty, mkU64(ea)));
4037 DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
sewardjbbcf1882014-01-12 12:49:10 +00004038 return True;
4039 }
4040
sewardj606c4ba2014-01-26 19:11:14 +00004041 /* ---------- LD1/ST1 (single structure, no offset) ---------- */
sewardjbbcf1882014-01-12 12:49:10 +00004042 /* 31 23
sewardj606c4ba2014-01-26 19:11:14 +00004043 0100 1100 0100 0000 0111 11 N T LD1 {vT.2d}, [Xn|SP]
4044 0100 1100 0000 0000 0111 11 N T ST1 {vT.2d}, [Xn|SP]
4045 0100 1100 0100 0000 0111 10 N T LD1 {vT.4s}, [Xn|SP]
4046 0100 1100 0000 0000 0111 10 N T ST1 {vT.4s}, [Xn|SP]
4047 0100 1100 0100 0000 0111 01 N T LD1 {vT.8h}, [Xn|SP]
4048 0100 1100 0000 0000 0111 01 N T ST1 {vT.8h}, [Xn|SP]
sewardjbbcf1882014-01-12 12:49:10 +00004049 0100 1100 0100 0000 0111 00 N T LD1 {vT.16b}, [Xn|SP]
4050 0100 1100 0000 0000 0111 00 N T ST1 {vT.16b}, [Xn|SP]
sewardj606c4ba2014-01-26 19:11:14 +00004051 FIXME does this assume that the host is little endian?
sewardjbbcf1882014-01-12 12:49:10 +00004052 */
sewardj606c4ba2014-01-26 19:11:14 +00004053 if ( (insn & 0xFFFFF000) == 0x4C407000 // LD1 cases
4054 || (insn & 0xFFFFF000) == 0x4C007000 // ST1 cases
sewardjbbcf1882014-01-12 12:49:10 +00004055 ) {
4056 Bool isLD = INSN(22,22) == 1;
4057 UInt rN = INSN(9,5);
4058 UInt vT = INSN(4,0);
4059 IRTemp tEA = newTemp(Ity_I64);
sewardj606c4ba2014-01-26 19:11:14 +00004060 const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4061 const HChar* name = names[INSN(11,10)];
sewardjbbcf1882014-01-12 12:49:10 +00004062 assign(tEA, getIReg64orSP(rN));
4063 if (rN == 31) { /* FIXME generate stack alignment check */ }
4064 if (isLD) {
4065 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4066 } else {
4067 storeLE(mkexpr(tEA), getQReg128(vT));
4068 }
4069 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
sewardj606c4ba2014-01-26 19:11:14 +00004070 vT, name, nameIReg64orSP(rN));
sewardjbbcf1882014-01-12 12:49:10 +00004071 return True;
4072 }
4073
sewardj606c4ba2014-01-26 19:11:14 +00004074 /* 31 23
4075 0000 1100 0100 0000 0111 11 N T LD1 {vT.1d}, [Xn|SP]
4076 0000 1100 0000 0000 0111 11 N T ST1 {vT.1d}, [Xn|SP]
4077 0000 1100 0100 0000 0111 10 N T LD1 {vT.2s}, [Xn|SP]
4078 0000 1100 0000 0000 0111 10 N T ST1 {vT.2s}, [Xn|SP]
4079 0000 1100 0100 0000 0111 01 N T LD1 {vT.4h}, [Xn|SP]
4080 0000 1100 0000 0000 0111 01 N T ST1 {vT.4h}, [Xn|SP]
4081 0000 1100 0100 0000 0111 00 N T LD1 {vT.8b}, [Xn|SP]
4082 0000 1100 0000 0000 0111 00 N T ST1 {vT.8b}, [Xn|SP]
4083 FIXME does this assume that the host is little endian?
4084 */
4085 if ( (insn & 0xFFFFF000) == 0x0C407000 // LD1 cases
4086 || (insn & 0xFFFFF000) == 0x0C007000 // ST1 cases
4087 ) {
4088 Bool isLD = INSN(22,22) == 1;
4089 UInt rN = INSN(9,5);
4090 UInt vT = INSN(4,0);
4091 IRTemp tEA = newTemp(Ity_I64);
4092 const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4093 const HChar* name = names[INSN(11,10)];
4094 assign(tEA, getIReg64orSP(rN));
4095 if (rN == 31) { /* FIXME generate stack alignment check */ }
4096 if (isLD) {
4097 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4098 putQRegLane(vT, 1, mkU64(0));
4099 } else {
4100 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4101 }
4102 DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
4103 vT, name, nameIReg64orSP(rN));
4104 return True;
4105 }
4106
4107 /* ---------- LD1/ST1 (single structure, post index) ---------- */
4108 /* 31 23
sewardj7d009132014-02-20 17:43:38 +00004109 0100 1100 1001 1111 0111 11 N T ST1 {vT.2d}, [xN|SP], #16
4110 0100 1100 1101 1111 0111 11 N T LD1 {vT.2d}, [xN|SP], #16
4111 0100 1100 1001 1111 0111 10 N T ST1 {vT.4s}, [xN|SP], #16
4112 0100 1100 1101 1111 0111 10 N T LD1 {vT.4s}, [xN|SP], #16
4113 0100 1100 1001 1111 0111 01 N T ST1 {vT.8h}, [xN|SP], #16
4114 0100 1100 1101 1111 0111 01 N T LD1 {vT.8h}, [xN|SP], #16
4115 0100 1100 1001 1111 0111 00 N T ST1 {vT.16b}, [xN|SP], #16
sewardjf5b08912014-02-06 12:57:58 +00004116 0100 1100 1101 1111 0111 00 N T LD1 {vT.16b}, [xN|SP], #16
sewardj606c4ba2014-01-26 19:11:14 +00004117 Note that #16 is implied and cannot be any other value.
4118 FIXME does this assume that the host is little endian?
4119 */
sewardj7d009132014-02-20 17:43:38 +00004120 if ( (insn & 0xFFFFF000) == 0x4CDF7000 // LD1 cases
4121 || (insn & 0xFFFFF000) == 0x4C9F7000 // ST1 cases
sewardj606c4ba2014-01-26 19:11:14 +00004122 ) {
4123 Bool isLD = INSN(22,22) == 1;
4124 UInt rN = INSN(9,5);
4125 UInt vT = INSN(4,0);
4126 IRTemp tEA = newTemp(Ity_I64);
4127 const HChar* names[4] = { "2d", "4s", "8h", "16b" };
4128 const HChar* name = names[INSN(11,10)];
4129 assign(tEA, getIReg64orSP(rN));
4130 if (rN == 31) { /* FIXME generate stack alignment check */ }
4131 if (isLD) {
4132 putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
4133 } else {
4134 storeLE(mkexpr(tEA), getQReg128(vT));
4135 }
4136 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(16)));
4137 DIP("%s {v%u.%s}, [%s], #16\n", isLD ? "ld1" : "st1",
4138 vT, name, nameIReg64orSP(rN));
4139 return True;
4140 }
4141
sewardj950ca7a2014-04-03 23:03:32 +00004142 /* 31 23
4143 0000 1100 1001 1111 0111 11 N T ST1 {vT.1d}, [xN|SP], #8
4144 0000 1100 1101 1111 0111 11 N T LD1 {vT.1d}, [xN|SP], #8
sewardj606c4ba2014-01-26 19:11:14 +00004145 0000 1100 1001 1111 0111 10 N T ST1 {vT.2s}, [xN|SP], #8
sewardj950ca7a2014-04-03 23:03:32 +00004146 0000 1100 1101 1111 0111 10 N T LD1 {vT.2s}, [xN|SP], #8
sewardjf5b08912014-02-06 12:57:58 +00004147 0000 1100 1001 1111 0111 01 N T ST1 {vT.4h}, [xN|SP], #8
sewardj950ca7a2014-04-03 23:03:32 +00004148 0000 1100 1101 1111 0111 01 N T LD1 {vT.4h}, [xN|SP], #8
4149 0000 1100 1001 1111 0111 00 N T ST1 {vT.8b}, [xN|SP], #8
4150 0000 1100 1101 1111 0111 00 N T LD1 {vT.8b}, [xN|SP], #8
sewardj606c4ba2014-01-26 19:11:14 +00004151 Note that #8 is implied and cannot be any other value.
4152 FIXME does this assume that the host is little endian?
4153 */
sewardj950ca7a2014-04-03 23:03:32 +00004154 if ( (insn & 0xFFFFF000) == 0x0CDF7000 // LD1 cases
4155 || (insn & 0xFFFFF000) == 0x0C9F7000 // ST1 cases
sewardj606c4ba2014-01-26 19:11:14 +00004156 ) {
sewardj950ca7a2014-04-03 23:03:32 +00004157 Bool isLD = INSN(22,22) == 1;
sewardj606c4ba2014-01-26 19:11:14 +00004158 UInt rN = INSN(9,5);
4159 UInt vT = INSN(4,0);
4160 IRTemp tEA = newTemp(Ity_I64);
4161 const HChar* names[4] = { "1d", "2s", "4h", "8b" };
4162 const HChar* name = names[INSN(11,10)];
4163 assign(tEA, getIReg64orSP(rN));
4164 if (rN == 31) { /* FIXME generate stack alignment check */ }
sewardj950ca7a2014-04-03 23:03:32 +00004165 if (isLD) {
4166 putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
4167 putQRegLane(vT, 1, mkU64(0));
4168 } else {
4169 storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
4170 }
sewardj606c4ba2014-01-26 19:11:14 +00004171 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(8)));
sewardj950ca7a2014-04-03 23:03:32 +00004172 DIP("%s {v%u.%s}, [%s], #8\n", isLD ? "ld1" : "st1",
4173 vT, name, nameIReg64orSP(rN));
4174 return True;
4175 }
4176
4177 /* ---------- LD2/ST2 (multiple structures, post index) ---------- */
4178 /* Only a very few cases. */
4179 /* 31 23 11 9 4
4180 0100 1100 1101 1111 1000 11 n t LD2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4181 0100 1100 1001 1111 1000 11 n t ST2 {Vt.2d, V(t+1)%32.2d}, [Xn|SP], #32
4182 0100 1100 1101 1111 1000 10 n t LD2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4183 0100 1100 1001 1111 1000 10 n t ST2 {Vt.4s, V(t+1)%32.4s}, [Xn|SP], #32
4184 */
4185 if ( (insn & 0xFFFFFC00) == 0x4CDF8C00 // LD2 .2d
4186 || (insn & 0xFFFFFC00) == 0x4C9F8C00 // ST2 .2d
4187 || (insn & 0xFFFFFC00) == 0x4CDF8800 // LD2 .4s
4188 || (insn & 0xFFFFFC00) == 0x4C9F8800 // ST2 .4s
4189 ) {
4190 Bool isLD = INSN(22,22) == 1;
4191 UInt rN = INSN(9,5);
4192 UInt vT = INSN(4,0);
4193 IRTemp tEA = newTemp(Ity_I64);
4194 UInt sz = INSN(11,10);
4195 const HChar* name = "??";
4196 assign(tEA, getIReg64orSP(rN));
4197 if (rN == 31) { /* FIXME generate stack alignment check */ }
4198 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4199 IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8));
4200 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4201 IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24));
4202 if (sz == BITS2(1,1)) {
4203 name = "2d";
4204 if (isLD) {
4205 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0));
4206 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_16));
4207 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8));
4208 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_24));
4209 } else {
4210 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64));
4211 storeLE(tEA_16, getQRegLane((vT+0) % 32, 1, Ity_I64));
4212 storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64));
4213 storeLE(tEA_24, getQRegLane((vT+1) % 32, 1, Ity_I64));
4214 }
4215 }
4216 else if (sz == BITS2(1,0)) {
4217 /* Uh, this is ugly. TODO: better. */
4218 name = "4s";
4219 IRExpr* tEA_4 = binop(Iop_Add64, mkexpr(tEA), mkU64(4));
4220 IRExpr* tEA_12 = binop(Iop_Add64, mkexpr(tEA), mkU64(12));
4221 IRExpr* tEA_20 = binop(Iop_Add64, mkexpr(tEA), mkU64(20));
4222 IRExpr* tEA_28 = binop(Iop_Add64, mkexpr(tEA), mkU64(28));
4223 if (isLD) {
4224 putQRegLane((vT+0) % 32, 0, loadLE(Ity_I32, tEA_0));
4225 putQRegLane((vT+0) % 32, 1, loadLE(Ity_I32, tEA_8));
4226 putQRegLane((vT+0) % 32, 2, loadLE(Ity_I32, tEA_16));
4227 putQRegLane((vT+0) % 32, 3, loadLE(Ity_I32, tEA_24));
4228 putQRegLane((vT+1) % 32, 0, loadLE(Ity_I32, tEA_4));
4229 putQRegLane((vT+1) % 32, 1, loadLE(Ity_I32, tEA_12));
4230 putQRegLane((vT+1) % 32, 2, loadLE(Ity_I32, tEA_20));
4231 putQRegLane((vT+1) % 32, 3, loadLE(Ity_I32, tEA_28));
4232 } else {
4233 storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I32));
4234 storeLE(tEA_8, getQRegLane((vT+0) % 32, 1, Ity_I32));
4235 storeLE(tEA_16, getQRegLane((vT+0) % 32, 2, Ity_I32));
4236 storeLE(tEA_24, getQRegLane((vT+0) % 32, 3, Ity_I32));
4237 storeLE(tEA_4, getQRegLane((vT+1) % 32, 0, Ity_I32));
4238 storeLE(tEA_12, getQRegLane((vT+1) % 32, 1, Ity_I32));
4239 storeLE(tEA_20, getQRegLane((vT+1) % 32, 2, Ity_I32));
4240 storeLE(tEA_28, getQRegLane((vT+1) % 32, 3, Ity_I32));
4241 }
4242 }
4243 else {
4244 vassert(0); // Can't happen.
4245 }
4246 putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32)));
4247 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld2" : "st2",
4248 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
4249 return True;
4250 }
4251
4252 /* ---------- LD1/ST1 (multiple structures, no offset) ---------- */
4253 /* Only a very few cases. */
4254 /* 31 23
4255 0100 1100 0100 0000 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4256 0100 1100 0000 0000 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn|SP]
4257 */
4258 if ( (insn & 0xFFFFFC00) == 0x4C40A000 // LD1
4259 || (insn & 0xFFFFFC00) == 0x4C00A000 // ST1
4260 ) {
4261 Bool isLD = INSN(22,22) == 1;
4262 UInt rN = INSN(9,5);
4263 UInt vT = INSN(4,0);
4264 IRTemp tEA = newTemp(Ity_I64);
4265 const HChar* name = "16b";
4266 assign(tEA, getIReg64orSP(rN));
4267 if (rN == 31) { /* FIXME generate stack alignment check */ }
4268 IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
4269 IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
4270 if (isLD) {
4271 putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
4272 putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
4273 } else {
4274 storeLE(tEA_0, getQReg128((vT+0) % 32));
4275 storeLE(tEA_16, getQReg128((vT+1) % 32));
4276 }
4277 DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld1" : "st1",
4278 (vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
sewardj606c4ba2014-01-26 19:11:14 +00004279 return True;
4280 }
4281
sewardj7d009132014-02-20 17:43:38 +00004282 /* ------------------ LD{,A}X{R,RH,RB} ------------------ */
4283 /* ------------------ ST{,L}X{R,RH,RB} ------------------ */
4284 /* 31 29 23 20 14 9 4
4285 sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn|SP]
4286 sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn|SP]
4287 sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
4288 sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
sewardjbbcf1882014-01-12 12:49:10 +00004289 */
sewardj7d009132014-02-20 17:43:38 +00004290 if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
4291 && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
4292 && INSN(14,10) == BITS5(1,1,1,1,1)) {
sewardjdc9259c2014-02-27 11:10:19 +00004293 UInt szBlg2 = INSN(31,30);
4294 Bool isLD = INSN(22,22) == 1;
4295 Bool isAcqOrRel = INSN(15,15) == 1;
4296 UInt ss = INSN(20,16);
4297 UInt nn = INSN(9,5);
4298 UInt tt = INSN(4,0);
sewardjbbcf1882014-01-12 12:49:10 +00004299
sewardjdc9259c2014-02-27 11:10:19 +00004300 vassert(szBlg2 < 4);
4301 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
4302 IRType ty = integerIRTypeOfSize(szB);
4303 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
sewardj7d009132014-02-20 17:43:38 +00004304
sewardjdc9259c2014-02-27 11:10:19 +00004305 IRTemp ea = newTemp(Ity_I64);
4306 assign(ea, getIReg64orSP(nn));
4307 /* FIXME generate check that ea is szB-aligned */
sewardj7d009132014-02-20 17:43:38 +00004308
sewardjdc9259c2014-02-27 11:10:19 +00004309 if (isLD && ss == BITS5(1,1,1,1,1)) {
4310 IRTemp res = newTemp(ty);
4311 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
4312 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
4313 if (isAcqOrRel) {
4314 stmt(IRStmt_MBE(Imbe_Fence));
4315 }
4316 DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
4317 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4318 return True;
4319 }
4320 if (!isLD) {
4321 if (isAcqOrRel) {
4322 stmt(IRStmt_MBE(Imbe_Fence));
4323 }
4324 IRTemp res = newTemp(Ity_I1);
4325 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
4326 stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
4327 /* IR semantics: res is 1 if store succeeds, 0 if it fails.
4328 Need to set rS to 1 on failure, 0 on success. */
4329 putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
4330 mkU64(1)));
4331 DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
4332 nameIRegOrZR(False, ss),
4333 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4334 return True;
4335 }
4336 /* else fall through */
4337 }
4338
4339 /* ------------------ LDA{R,RH,RB} ------------------ */
4340 /* ------------------ STL{R,RH,RB} ------------------ */
4341 /* 31 29 23 20 14 9 4
4342 sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn|SP]
4343 sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn|SP]
4344 */
4345 if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
4346 && INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
4347 UInt szBlg2 = INSN(31,30);
4348 Bool isLD = INSN(22,22) == 1;
4349 UInt nn = INSN(9,5);
4350 UInt tt = INSN(4,0);
4351
4352 vassert(szBlg2 < 4);
4353 UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
4354 IRType ty = integerIRTypeOfSize(szB);
4355 const HChar* suffix[4] = { "rb", "rh", "r", "r" };
4356
4357 IRTemp ea = newTemp(Ity_I64);
4358 assign(ea, getIReg64orSP(nn));
4359 /* FIXME generate check that ea is szB-aligned */
4360
4361 if (isLD) {
4362 IRTemp res = newTemp(ty);
4363 assign(res, loadLE(ty, mkexpr(ea)));
4364 putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
4365 stmt(IRStmt_MBE(Imbe_Fence));
4366 DIP("lda%s %s, [%s]\n", suffix[szBlg2],
4367 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4368 } else {
4369 stmt(IRStmt_MBE(Imbe_Fence));
4370 IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
4371 storeLE(mkexpr(ea), data);
4372 DIP("stl%s %s, [%s]\n", suffix[szBlg2],
4373 nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
4374 }
4375 return True;
sewardjbbcf1882014-01-12 12:49:10 +00004376 }
4377
4378 vex_printf("ARM64 front end: load_store\n");
4379 return False;
4380# undef INSN
4381}
4382
4383
4384/*------------------------------------------------------------*/
4385/*--- Control flow and misc instructions ---*/
4386/*------------------------------------------------------------*/
4387
4388static
4389Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn)
4390{
4391# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
4392
4393 /* ---------------------- B cond ----------------------- */
4394 /* 31 24 4 3
4395 0101010 0 imm19 0 cond */
4396 if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
4397 UInt cond = INSN(3,0);
4398 ULong uimm64 = INSN(23,5) << 2;
4399 Long simm64 = (Long)sx_to_64(uimm64, 21);
4400 vassert(dres->whatNext == Dis_Continue);
4401 vassert(dres->len == 4);
4402 vassert(dres->continueAt == 0);
4403 vassert(dres->jk_StopHere == Ijk_INVALID);
4404 stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
4405 Ijk_Boring,
4406 IRConst_U64(guest_PC_curr_instr + simm64),
4407 OFFB_PC) );
4408 putPC(mkU64(guest_PC_curr_instr + 4));
4409 dres->whatNext = Dis_StopHere;
4410 dres->jk_StopHere = Ijk_Boring;
4411 DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
4412 return True;
4413 }
4414
4415 /* -------------------- B{L} uncond -------------------- */
4416 if (INSN(30,26) == BITS5(0,0,1,0,1)) {
4417 /* 000101 imm26 B (PC + sxTo64(imm26 << 2))
4418 100101 imm26 B (PC + sxTo64(imm26 << 2))
4419 */
4420 UInt bLink = INSN(31,31);
4421 ULong uimm64 = INSN(25,0) << 2;
4422 Long simm64 = (Long)sx_to_64(uimm64, 28);
4423 if (bLink) {
4424 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
4425 }
4426 putPC(mkU64(guest_PC_curr_instr + simm64));
4427 dres->whatNext = Dis_StopHere;
4428 dres->jk_StopHere = Ijk_Call;
4429 DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
4430 guest_PC_curr_instr + simm64);
4431 return True;
4432 }
4433
4434 /* --------------------- B{L} reg --------------------- */
4435 /* 31 24 22 20 15 9 4
4436 1101011 00 10 11111 000000 nn 00000 RET Rn
4437 1101011 00 01 11111 000000 nn 00000 CALL Rn
4438 1101011 00 00 11111 000000 nn 00000 JMP Rn
4439 */
4440 if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
4441 && INSN(20,16) == BITS5(1,1,1,1,1)
4442 && INSN(15,10) == BITS6(0,0,0,0,0,0)
4443 && INSN(4,0) == BITS5(0,0,0,0,0)) {
4444 UInt branch_type = INSN(22,21);
4445 UInt nn = INSN(9,5);
4446 if (branch_type == BITS2(1,0) /* RET */) {
4447 putPC(getIReg64orZR(nn));
4448 dres->whatNext = Dis_StopHere;
4449 dres->jk_StopHere = Ijk_Ret;
4450 DIP("ret %s\n", nameIReg64orZR(nn));
4451 return True;
4452 }
4453 if (branch_type == BITS2(0,1) /* CALL */) {
4454 putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
4455 putPC(getIReg64orZR(nn));
4456 dres->whatNext = Dis_StopHere;
4457 dres->jk_StopHere = Ijk_Call;
4458 DIP("blr %s\n", nameIReg64orZR(nn));
4459 return True;
4460 }
4461 if (branch_type == BITS2(0,0) /* JMP */) {
4462 putPC(getIReg64orZR(nn));
4463 dres->whatNext = Dis_StopHere;
4464 dres->jk_StopHere = Ijk_Boring;
4465 DIP("jmp %s\n", nameIReg64orZR(nn));
4466 return True;
4467 }
4468 }
4469
4470 /* -------------------- CB{N}Z -------------------- */
4471 /* sf 011 010 1 imm19 Rt CBNZ Xt|Wt, (PC + sxTo64(imm19 << 2))
4472 sf 011 010 0 imm19 Rt CBZ Xt|Wt, (PC + sxTo64(imm19 << 2))
4473 */
4474 if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
4475 Bool is64 = INSN(31,31) == 1;
4476 Bool bIfZ = INSN(24,24) == 0;
4477 ULong uimm64 = INSN(23,5) << 2;
4478 UInt rT = INSN(4,0);
4479 Long simm64 = (Long)sx_to_64(uimm64, 21);
4480 IRExpr* cond = NULL;
4481 if (is64) {
4482 cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
4483 getIReg64orZR(rT), mkU64(0));
4484 } else {
4485 cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
4486 getIReg32orZR(rT), mkU32(0));
4487 }
4488 stmt( IRStmt_Exit(cond,
4489 Ijk_Boring,
4490 IRConst_U64(guest_PC_curr_instr + simm64),
4491 OFFB_PC) );
4492 putPC(mkU64(guest_PC_curr_instr + 4));
4493 dres->whatNext = Dis_StopHere;
4494 dres->jk_StopHere = Ijk_Boring;
4495 DIP("cb%sz %s, 0x%llx\n",
4496 bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
4497 guest_PC_curr_instr + simm64);
4498 return True;
4499 }
4500
4501 /* -------------------- TB{N}Z -------------------- */
4502 /* 31 30 24 23 18 5 4
4503 b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
4504 b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
4505 */
4506 if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
4507 UInt b5 = INSN(31,31);
4508 Bool bIfZ = INSN(24,24) == 0;
4509 UInt b40 = INSN(23,19);
4510 UInt imm14 = INSN(18,5);
4511 UInt tt = INSN(4,0);
4512 UInt bitNo = (b5 << 5) | b40;
4513 ULong uimm64 = imm14 << 2;
4514 Long simm64 = sx_to_64(uimm64, 16);
4515 IRExpr* cond
4516 = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
4517 binop(Iop_And64,
4518 binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
4519 mkU64(1)),
4520 mkU64(0));
4521 stmt( IRStmt_Exit(cond,
4522 Ijk_Boring,
4523 IRConst_U64(guest_PC_curr_instr + simm64),
4524 OFFB_PC) );
4525 putPC(mkU64(guest_PC_curr_instr + 4));
4526 dres->whatNext = Dis_StopHere;
4527 dres->jk_StopHere = Ijk_Boring;
4528 DIP("tb%sz %s, #%u, 0x%llx\n",
4529 bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
4530 guest_PC_curr_instr + simm64);
4531 return True;
4532 }
4533
4534 /* -------------------- SVC -------------------- */
4535 /* 11010100 000 imm16 000 01
4536 Don't bother with anything except the imm16==0 case.
4537 */
4538 if (INSN(31,0) == 0xD4000001) {
4539 putPC(mkU64(guest_PC_curr_instr + 4));
4540 dres->whatNext = Dis_StopHere;
4541 dres->jk_StopHere = Ijk_Sys_syscall;
4542 DIP("svc #0\n");
4543 return True;
4544 }
4545
4546 /* ------------------ M{SR,RS} ------------------ */
4547 /* Only handles the case where the system register is TPIDR_EL0.
4548 0xD51BD0 010 Rt MSR tpidr_el0, rT
4549 0xD53BD0 010 Rt MRS rT, tpidr_el0
4550 */
4551 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /*MSR*/
4552 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /*MRS*/) {
4553 Bool toSys = INSN(21,21) == 0;
4554 UInt tt = INSN(4,0);
4555 if (toSys) {
4556 stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
4557 DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
4558 } else {
4559 putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
4560 DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
4561 }
4562 return True;
4563 }
4564 /* Cases for FPCR
4565 0xD51B44 000 Rt MSR fpcr, rT
4566 0xD53B44 000 Rt MSR rT, fpcr
4567 */
4568 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /*MSR*/
4569 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /*MRS*/) {
4570 Bool toSys = INSN(21,21) == 0;
4571 UInt tt = INSN(4,0);
4572 if (toSys) {
4573 stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
4574 DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
4575 } else {
4576 putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
4577 DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
4578 }
4579 return True;
4580 }
4581 /* Cases for FPSR
sewardj7d009132014-02-20 17:43:38 +00004582 0xD51B44 001 Rt MSR fpsr, rT
4583 0xD53B44 001 Rt MSR rT, fpsr
sewardjbbcf1882014-01-12 12:49:10 +00004584 */
4585 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /*MSR*/
4586 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /*MRS*/) {
4587 Bool toSys = INSN(21,21) == 0;
4588 UInt tt = INSN(4,0);
4589 if (toSys) {
4590 stmt( IRStmt_Put( OFFB_FPSR, getIReg32orZR(tt)) );
4591 DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
4592 } else {
4593 putIReg32orZR(tt, IRExpr_Get(OFFB_FPSR, Ity_I32));
4594 DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
4595 }
4596 return True;
4597 }
4598 /* Cases for NZCV
4599 D51B42 000 Rt MSR nzcv, rT
4600 D53B42 000 Rt MRS rT, nzcv
4601 */
4602 if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /*MSR*/
4603 || (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /*MRS*/) {
4604 Bool toSys = INSN(21,21) == 0;
4605 UInt tt = INSN(4,0);
4606 if (toSys) {
4607 IRTemp t = newTemp(Ity_I64);
4608 assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
4609 setFlags_COPY(t);
4610 DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
4611 } else {
4612 IRTemp res = newTemp(Ity_I64);
4613 assign(res, mk_arm64g_calculate_flags_nzcv());
4614 putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
4615 DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
4616 }
4617 return True;
4618 }
sewardjd512d102014-02-21 14:49:44 +00004619 /* Cases for DCZID_EL0
4620 Don't support arbitrary reads and writes to this register. Just
4621 return the value 16, which indicates that the DC ZVA instruction
4622 is not permitted, so we don't have to emulate it.
4623 D5 3B 00 111 Rt MRS rT, dczid_el0
4624 */
4625 if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
4626 UInt tt = INSN(4,0);
4627 putIReg64orZR(tt, mkU64(1<<4));
4628 DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
4629 return True;
4630 }
sewardjbbcf1882014-01-12 12:49:10 +00004631
sewardjd512d102014-02-21 14:49:44 +00004632 /* ------------------ ISB, DSB ------------------ */
sewardjbbcf1882014-01-12 12:49:10 +00004633 if (INSN(31,0) == 0xD5033FDF) {
sewardjd512d102014-02-21 14:49:44 +00004634 stmt(IRStmt_MBE(Imbe_Fence));
sewardjbbcf1882014-01-12 12:49:10 +00004635 DIP("isb\n");
4636 return True;
4637 }
4638 if (INSN(31,0) == 0xD5033BBF) {
sewardjd512d102014-02-21 14:49:44 +00004639 stmt(IRStmt_MBE(Imbe_Fence));
sewardjbbcf1882014-01-12 12:49:10 +00004640 DIP("dmb ish\n");
4641 return True;
4642 }
4643
sewardjdc9259c2014-02-27 11:10:19 +00004644 /* -------------------- NOP -------------------- */
4645 if (INSN(31,0) == 0xD503201F) {
4646 DIP("nop\n");
4647 return True;
4648 }
4649
sewardjbbcf1882014-01-12 12:49:10 +00004650 //fail:
4651 vex_printf("ARM64 front end: branch_etc\n");
4652 return False;
4653# undef INSN
4654}
4655
4656
4657/*------------------------------------------------------------*/
4658/*--- SIMD and FP instructions ---*/
4659/*------------------------------------------------------------*/
4660
sewardjecde6972014-02-05 11:01:19 +00004661/* begin FIXME -- rm temp scaffolding */
4662static IRExpr* mk_CatEvenLanes64x2 ( IRTemp, IRTemp );
4663static IRExpr* mk_CatOddLanes64x2 ( IRTemp, IRTemp );
sewardje520bb32014-02-17 11:00:53 +00004664
sewardjecde6972014-02-05 11:01:19 +00004665static IRExpr* mk_CatEvenLanes32x4 ( IRTemp, IRTemp );
4666static IRExpr* mk_CatOddLanes32x4 ( IRTemp, IRTemp );
sewardje520bb32014-02-17 11:00:53 +00004667static IRExpr* mk_InterleaveLO32x4 ( IRTemp, IRTemp );
4668static IRExpr* mk_InterleaveHI32x4 ( IRTemp, IRTemp );
4669
sewardjecde6972014-02-05 11:01:19 +00004670static IRExpr* mk_CatEvenLanes16x8 ( IRTemp, IRTemp );
4671static IRExpr* mk_CatOddLanes16x8 ( IRTemp, IRTemp );
sewardje520bb32014-02-17 11:00:53 +00004672static IRExpr* mk_InterleaveLO16x8 ( IRTemp, IRTemp );
4673static IRExpr* mk_InterleaveHI16x8 ( IRTemp, IRTemp );
4674
sewardjfab09142014-02-10 10:28:13 +00004675static IRExpr* mk_CatEvenLanes8x16 ( IRTemp, IRTemp );
4676static IRExpr* mk_CatOddLanes8x16 ( IRTemp, IRTemp );
sewardje520bb32014-02-17 11:00:53 +00004677static IRExpr* mk_InterleaveLO8x16 ( IRTemp, IRTemp );
4678static IRExpr* mk_InterleaveHI8x16 ( IRTemp, IRTemp );
sewardjecde6972014-02-05 11:01:19 +00004679/* end FIXME -- rm temp scaffolding */
4680
sewardjbbcf1882014-01-12 12:49:10 +00004681/* Generate N copies of |bit| in the bottom of a ULong. */
4682static ULong Replicate ( ULong bit, Int N )
4683{
sewardj606c4ba2014-01-26 19:11:14 +00004684 vassert(bit <= 1 && N >= 1 && N < 64);
4685 if (bit == 0) {
4686 return 0;
4687 } else {
4688 /* Careful. This won't work for N == 64. */
4689 return (1ULL << N) - 1;
4690 }
sewardjbbcf1882014-01-12 12:49:10 +00004691}
4692
sewardjfab09142014-02-10 10:28:13 +00004693static ULong Replicate32x2 ( ULong bits32 )
4694{
4695 vassert(0 == (bits32 & ~0xFFFFFFFFULL));
4696 return (bits32 << 32) | bits32;
4697}
4698
4699static ULong Replicate16x4 ( ULong bits16 )
4700{
4701 vassert(0 == (bits16 & ~0xFFFFULL));
4702 return Replicate32x2((bits16 << 16) | bits16);
4703}
4704
4705static ULong Replicate8x8 ( ULong bits8 )
4706{
4707 vassert(0 == (bits8 & ~0xFFULL));
4708 return Replicate16x4((bits8 << 8) | bits8);
4709}
4710
4711/* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
4712 |imm8| to either a 32-bit value if N is 32 or a 64 bit value if N
4713 is 64. In the former case, the upper 32 bits of the returned value
4714 are guaranteed to be zero. */
sewardjbbcf1882014-01-12 12:49:10 +00004715static ULong VFPExpandImm ( ULong imm8, Int N )
4716{
sewardj606c4ba2014-01-26 19:11:14 +00004717 vassert(imm8 <= 0xFF);
4718 vassert(N == 32 || N == 64);
4719 Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
4720 Int F = N - E - 1;
4721 ULong imm8_6 = (imm8 >> 6) & 1;
4722 /* sign: 1 bit */
4723 /* exp: E bits */
4724 /* frac: F bits */
4725 ULong sign = (imm8 >> 7) & 1;
4726 ULong exp = ((imm8_6 ^ 1) << (E-1)) | Replicate(imm8_6, E-1);
4727 ULong frac = ((imm8 & 63) << (F-6)) | Replicate(0, F-6);
4728 vassert(sign < (1ULL << 1));
4729 vassert(exp < (1ULL << E));
4730 vassert(frac < (1ULL << F));
4731 vassert(1 + E + F == N);
4732 ULong res = (sign << (E+F)) | (exp << F) | frac;
4733 return res;
sewardjbbcf1882014-01-12 12:49:10 +00004734}
4735
sewardjfab09142014-02-10 10:28:13 +00004736/* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
4737 This might fail, as indicated by the returned Bool. Page 2530 of
4738 the manual. */
4739static Bool AdvSIMDExpandImm ( /*OUT*/ULong* res,
4740 UInt op, UInt cmode, UInt imm8 )
4741{
4742 vassert(op <= 1);
4743 vassert(cmode <= 15);
4744 vassert(imm8 <= 255);
4745
4746 *res = 0; /* will overwrite iff returning True */
4747
4748 ULong imm64 = 0;
4749 Bool testimm8 = False;
4750
4751 switch (cmode >> 1) {
4752 case 0:
4753 testimm8 = False; imm64 = Replicate32x2(imm8); break;
4754 case 1:
4755 testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
4756 case 2:
4757 testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
4758 case 3:
4759 testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
4760 case 4:
4761 testimm8 = False; imm64 = Replicate16x4(imm8); break;
4762 case 5:
4763 testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
4764 case 6:
4765 testimm8 = True;
4766 if ((cmode & 1) == 0)
4767 imm64 = Replicate32x2((imm8 << 8) | 0xFF);
4768 else
4769 imm64 = Replicate32x2((imm8 << 16) | 0xFFFF);
4770 break;
4771 case 7:
4772 testimm8 = False;
4773 if ((cmode & 1) == 0 && op == 0)
4774 imm64 = Replicate8x8(imm8);
4775 if ((cmode & 1) == 0 && op == 1) {
4776 imm64 = 0; imm64 |= (imm8 & 0x80) ? 0xFF : 0x00;
4777 imm64 <<= 8; imm64 |= (imm8 & 0x40) ? 0xFF : 0x00;
4778 imm64 <<= 8; imm64 |= (imm8 & 0x20) ? 0xFF : 0x00;
4779 imm64 <<= 8; imm64 |= (imm8 & 0x10) ? 0xFF : 0x00;
4780 imm64 <<= 8; imm64 |= (imm8 & 0x08) ? 0xFF : 0x00;
4781 imm64 <<= 8; imm64 |= (imm8 & 0x04) ? 0xFF : 0x00;
4782 imm64 <<= 8; imm64 |= (imm8 & 0x02) ? 0xFF : 0x00;
4783 imm64 <<= 8; imm64 |= (imm8 & 0x01) ? 0xFF : 0x00;
4784 }
4785 if ((cmode & 1) == 1 && op == 0) {
4786 ULong imm8_7 = (imm8 >> 7) & 1;
4787 ULong imm8_6 = (imm8 >> 6) & 1;
4788 ULong imm8_50 = imm8 & 63;
4789 ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
4790 | ((imm8_6 ^ 1) << (5 + 6 + 19))
4791 | (Replicate(imm8_6, 5) << (6 + 19))
4792 | (imm8_50 << 19);
4793 imm64 = Replicate32x2(imm32);
4794 }
4795 if ((cmode & 1) == 1 && op == 1) {
4796 // imm64 = imm8<7>:NOT(imm8<6>)
4797 // :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
4798 ULong imm8_7 = (imm8 >> 7) & 1;
4799 ULong imm8_6 = (imm8 >> 6) & 1;
4800 ULong imm8_50 = imm8 & 63;
4801 imm64 = (imm8_7 << 63) | ((imm8_6 ^ 1) << 62)
4802 | (Replicate(imm8_6, 8) << 54)
4803 | (imm8_50 << 48);
4804 }
4805 break;
4806 default:
4807 vassert(0);
4808 }
4809
4810 if (testimm8 && imm8 == 0)
4811 return False;
4812
4813 *res = imm64;
4814 return True;
4815}
4816
4817
sewardj606c4ba2014-01-26 19:11:14 +00004818/* Help a bit for decoding laneage for vector operations that can be
4819 of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
4820 and SZ bits, typically for vector floating point. */
4821static Bool getLaneInfo_Q_SZ ( /*OUT*/IRType* tyI, /*OUT*/IRType* tyF,
4822 /*OUT*/UInt* nLanes, /*OUT*/Bool* zeroUpper,
4823 /*OUT*/const HChar** arrSpec,
4824 Bool bitQ, Bool bitSZ )
4825{
4826 vassert(bitQ == True || bitQ == False);
4827 vassert(bitSZ == True || bitSZ == False);
4828 if (bitQ && bitSZ) { // 2x64
4829 if (tyI) *tyI = Ity_I64;
4830 if (tyF) *tyF = Ity_F64;
4831 if (nLanes) *nLanes = 2;
4832 if (zeroUpper) *zeroUpper = False;
4833 if (arrSpec) *arrSpec = "2d";
4834 return True;
4835 }
4836 if (bitQ && !bitSZ) { // 4x32
4837 if (tyI) *tyI = Ity_I32;
4838 if (tyF) *tyF = Ity_F32;
4839 if (nLanes) *nLanes = 4;
4840 if (zeroUpper) *zeroUpper = False;
4841 if (arrSpec) *arrSpec = "4s";
4842 return True;
4843 }
4844 if (!bitQ && !bitSZ) { // 2x32
4845 if (tyI) *tyI = Ity_I32;
4846 if (tyF) *tyF = Ity_F32;
4847 if (nLanes) *nLanes = 2;
4848 if (zeroUpper) *zeroUpper = True;
4849 if (arrSpec) *arrSpec = "2s";
4850 return True;
4851 }
4852 // Else impliedly 1x64, which isn't allowed.
4853 return False;
4854}
4855
4856/* Helper for decoding laneage for simple vector operations,
4857 eg integer add. */
4858static Bool getLaneInfo_SIMPLE ( /*OUT*/Bool* zeroUpper,
4859 /*OUT*/const HChar** arrSpec,
4860 Bool bitQ, UInt szBlg2 )
4861{
4862 vassert(bitQ == True || bitQ == False);
4863 vassert(szBlg2 < 4);
4864 Bool zu = False;
4865 const HChar* as = NULL;
4866 switch ((szBlg2 << 1) | (bitQ ? 1 : 0)) {
4867 case 0: zu = True; as = "8b"; break;
4868 case 1: zu = False; as = "16b"; break;
4869 case 2: zu = True; as = "4h"; break;
4870 case 3: zu = False; as = "8h"; break;
4871 case 4: zu = True; as = "2s"; break;
4872 case 5: zu = False; as = "4s"; break;
4873 case 6: return False; // impliedly 1x64
4874 case 7: zu = False; as = "2d"; break;
4875 default: vassert(0);
4876 }
4877 vassert(as);
4878 if (arrSpec) *arrSpec = as;
4879 if (zeroUpper) *zeroUpper = zu;
4880 return True;
4881}
4882
4883
sewardje520bb32014-02-17 11:00:53 +00004884/* Helper for decoding laneage for shift-style vector operations
4885 that involve an immediate shift amount. */
4886static Bool getLaneInfo_IMMH_IMMB ( /*OUT*/UInt* shift, /*OUT*/UInt* szBlg2,
4887 UInt immh, UInt immb )
4888{
4889 vassert(immh < (1<<4));
4890 vassert(immb < (1<<3));
4891 UInt immhb = (immh << 3) | immb;
4892 if (immh & 8) {
4893 if (shift) *shift = 128 - immhb;
4894 if (szBlg2) *szBlg2 = 3;
4895 return True;
4896 }
4897 if (immh & 4) {
4898 if (shift) *shift = 64 - immhb;
4899 if (szBlg2) *szBlg2 = 2;
4900 return True;
4901 }
4902 if (immh & 2) {
4903 if (shift) *shift = 32 - immhb;
4904 if (szBlg2) *szBlg2 = 1;
4905 return True;
4906 }
4907 if (immh & 1) {
4908 if (shift) *shift = 16 - immhb;
4909 if (szBlg2) *szBlg2 = 0;
4910 return True;
4911 }
4912 return False;
4913}
4914
4915
sewardjecde6972014-02-05 11:01:19 +00004916/* Generate IR to fold all lanes of the V128 value in 'src' as
4917 characterised by the operator 'op', and return the result in the
4918 bottom bits of a V128, with all other bits set to zero. */
4919static IRTemp math_MINMAXV ( IRTemp src, IROp op )
4920{
4921 /* The basic idea is to use repeated applications of Iop_CatEven*
4922 and Iop_CatOdd* operators to 'src' so as to clone each lane into
4923 a complete vector. Then fold all those vectors with 'op' and
4924 zero out all but the least significant lane. */
4925 switch (op) {
4926 case Iop_Min8Sx16: case Iop_Min8Ux16:
4927 case Iop_Max8Sx16: case Iop_Max8Ux16: {
sewardjfab09142014-02-10 10:28:13 +00004928 /* NB: temp naming here is misleading -- the naming is for 8
4929 lanes of 16 bit, whereas what is being operated on is 16
4930 lanes of 8 bits. */
4931 IRTemp x76543210 = src;
4932 IRTemp x76547654 = newTemp(Ity_V128);
4933 IRTemp x32103210 = newTemp(Ity_V128);
4934 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
4935 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
4936 IRTemp x76767676 = newTemp(Ity_V128);
4937 IRTemp x54545454 = newTemp(Ity_V128);
4938 IRTemp x32323232 = newTemp(Ity_V128);
4939 IRTemp x10101010 = newTemp(Ity_V128);
4940 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
4941 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
4942 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
4943 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
4944 IRTemp x77777777 = newTemp(Ity_V128);
4945 IRTemp x66666666 = newTemp(Ity_V128);
4946 IRTemp x55555555 = newTemp(Ity_V128);
4947 IRTemp x44444444 = newTemp(Ity_V128);
4948 IRTemp x33333333 = newTemp(Ity_V128);
4949 IRTemp x22222222 = newTemp(Ity_V128);
4950 IRTemp x11111111 = newTemp(Ity_V128);
4951 IRTemp x00000000 = newTemp(Ity_V128);
4952 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
4953 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
4954 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
4955 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
4956 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
4957 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
4958 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
4959 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
4960 /* Naming not misleading after here. */
4961 IRTemp xAllF = newTemp(Ity_V128);
4962 IRTemp xAllE = newTemp(Ity_V128);
4963 IRTemp xAllD = newTemp(Ity_V128);
4964 IRTemp xAllC = newTemp(Ity_V128);
4965 IRTemp xAllB = newTemp(Ity_V128);
4966 IRTemp xAllA = newTemp(Ity_V128);
4967 IRTemp xAll9 = newTemp(Ity_V128);
4968 IRTemp xAll8 = newTemp(Ity_V128);
4969 IRTemp xAll7 = newTemp(Ity_V128);
4970 IRTemp xAll6 = newTemp(Ity_V128);
4971 IRTemp xAll5 = newTemp(Ity_V128);
4972 IRTemp xAll4 = newTemp(Ity_V128);
4973 IRTemp xAll3 = newTemp(Ity_V128);
4974 IRTemp xAll2 = newTemp(Ity_V128);
4975 IRTemp xAll1 = newTemp(Ity_V128);
4976 IRTemp xAll0 = newTemp(Ity_V128);
4977 assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
4978 assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
4979 assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
4980 assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
4981 assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
4982 assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
4983 assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
4984 assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
4985 assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
4986 assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
4987 assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
4988 assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
4989 assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
4990 assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
4991 assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
4992 assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
4993 IRTemp maxFE = newTemp(Ity_V128);
4994 IRTemp maxDC = newTemp(Ity_V128);
4995 IRTemp maxBA = newTemp(Ity_V128);
4996 IRTemp max98 = newTemp(Ity_V128);
4997 IRTemp max76 = newTemp(Ity_V128);
4998 IRTemp max54 = newTemp(Ity_V128);
4999 IRTemp max32 = newTemp(Ity_V128);
5000 IRTemp max10 = newTemp(Ity_V128);
5001 assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
5002 assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
5003 assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
5004 assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
5005 assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
5006 assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
5007 assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
5008 assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
5009 IRTemp maxFEDC = newTemp(Ity_V128);
5010 IRTemp maxBA98 = newTemp(Ity_V128);
5011 IRTemp max7654 = newTemp(Ity_V128);
5012 IRTemp max3210 = newTemp(Ity_V128);
5013 assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
5014 assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
5015 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
5016 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5017 IRTemp maxFEDCBA98 = newTemp(Ity_V128);
5018 IRTemp max76543210 = newTemp(Ity_V128);
5019 assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
5020 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
5021 IRTemp maxAllLanes = newTemp(Ity_V128);
5022 assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
5023 mkexpr(max76543210)));
5024 IRTemp res = newTemp(Ity_V128);
5025 assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
5026 return res;
sewardjecde6972014-02-05 11:01:19 +00005027 }
5028 case Iop_Min16Sx8: case Iop_Min16Ux8:
5029 case Iop_Max16Sx8: case Iop_Max16Ux8: {
5030 IRTemp x76543210 = src;
5031 IRTemp x76547654 = newTemp(Ity_V128);
5032 IRTemp x32103210 = newTemp(Ity_V128);
5033 assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
5034 assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
5035 IRTemp x76767676 = newTemp(Ity_V128);
5036 IRTemp x54545454 = newTemp(Ity_V128);
5037 IRTemp x32323232 = newTemp(Ity_V128);
5038 IRTemp x10101010 = newTemp(Ity_V128);
5039 assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
5040 assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
5041 assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
5042 assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
5043 IRTemp x77777777 = newTemp(Ity_V128);
5044 IRTemp x66666666 = newTemp(Ity_V128);
5045 IRTemp x55555555 = newTemp(Ity_V128);
5046 IRTemp x44444444 = newTemp(Ity_V128);
5047 IRTemp x33333333 = newTemp(Ity_V128);
5048 IRTemp x22222222 = newTemp(Ity_V128);
5049 IRTemp x11111111 = newTemp(Ity_V128);
5050 IRTemp x00000000 = newTemp(Ity_V128);
5051 assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
5052 assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
5053 assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
5054 assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
5055 assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
5056 assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
5057 assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
5058 assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
5059 IRTemp max76 = newTemp(Ity_V128);
5060 IRTemp max54 = newTemp(Ity_V128);
5061 IRTemp max32 = newTemp(Ity_V128);
5062 IRTemp max10 = newTemp(Ity_V128);
5063 assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
5064 assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
5065 assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
5066 assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
5067 IRTemp max7654 = newTemp(Ity_V128);
5068 IRTemp max3210 = newTemp(Ity_V128);
5069 assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
5070 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5071 IRTemp max76543210 = newTemp(Ity_V128);
5072 assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
5073 IRTemp res = newTemp(Ity_V128);
5074 assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
5075 return res;
5076 }
5077 case Iop_Min32Sx4: case Iop_Min32Ux4:
5078 case Iop_Max32Sx4: case Iop_Max32Ux4: {
5079 IRTemp x3210 = src;
5080 IRTemp x3232 = newTemp(Ity_V128);
5081 IRTemp x1010 = newTemp(Ity_V128);
5082 assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
5083 assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
5084 IRTemp x3333 = newTemp(Ity_V128);
5085 IRTemp x2222 = newTemp(Ity_V128);
5086 IRTemp x1111 = newTemp(Ity_V128);
5087 IRTemp x0000 = newTemp(Ity_V128);
5088 assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
5089 assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
5090 assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
5091 assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
5092 IRTemp max32 = newTemp(Ity_V128);
5093 IRTemp max10 = newTemp(Ity_V128);
5094 assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
5095 assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
5096 IRTemp max3210 = newTemp(Ity_V128);
5097 assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
5098 IRTemp res = newTemp(Ity_V128);
5099 assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
5100 return res;
5101 }
5102 default:
5103 vassert(0);
5104 }
5105}
5106
5107
sewardj92d0ae32014-04-03 13:48:54 +00005108/* Generate IR for TBL and TBX. This deals with the 128 bit case
5109 only. */
5110static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
5111 IRTemp oor_values )
5112{
5113 vassert(len >= 0 && len <= 3);
5114
5115 /* Generate some useful constants as concisely as possible. */
5116 IRTemp half15 = newTemp(Ity_I64);
5117 assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
5118 IRTemp half16 = newTemp(Ity_I64);
5119 assign(half16, mkU64(0x1010101010101010ULL));
5120
5121 /* A zero vector */
5122 IRTemp allZero = newTemp(Ity_V128);
5123 assign(allZero, mkV128(0x0000));
5124 /* A vector containing 15 in each 8-bit lane */
5125 IRTemp all15 = newTemp(Ity_V128);
5126 assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
5127 /* A vector containing 16 in each 8-bit lane */
5128 IRTemp all16 = newTemp(Ity_V128);
5129 assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
5130 /* A vector containing 32 in each 8-bit lane */
5131 IRTemp all32 = newTemp(Ity_V128);
5132 assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
5133 /* A vector containing 48 in each 8-bit lane */
5134 IRTemp all48 = newTemp(Ity_V128);
5135 assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
5136 /* A vector containing 64 in each 8-bit lane */
5137 IRTemp all64 = newTemp(Ity_V128);
5138 assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
5139
5140 /* Group the 16/32/48/64 vectors so as to be indexable. */
5141 IRTemp allXX[4] = { all16, all32, all48, all64 };
5142
5143 /* Compute the result for each table vector, with zeroes in places
5144 where the index values are out of range, and OR them into the
5145 running vector. */
5146 IRTemp running_result = newTemp(Ity_V128);
5147 assign(running_result, mkV128(0));
5148
5149 UInt tabent;
5150 for (tabent = 0; tabent <= len; tabent++) {
5151 vassert(tabent >= 0 && tabent < 4);
5152 IRTemp bias = newTemp(Ity_V128);
5153 assign(bias,
5154 mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
5155 IRTemp biased_indices = newTemp(Ity_V128);
5156 assign(biased_indices,
5157 binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
5158 IRTemp valid_mask = newTemp(Ity_V128);
5159 assign(valid_mask,
5160 binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
5161 IRTemp safe_biased_indices = newTemp(Ity_V128);
5162 assign(safe_biased_indices,
5163 binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
5164 IRTemp results_or_junk = newTemp(Ity_V128);
5165 assign(results_or_junk,
5166 binop(Iop_Perm8x16, mkexpr(tab[tabent]),
5167 mkexpr(safe_biased_indices)));
5168 IRTemp results_or_zero = newTemp(Ity_V128);
5169 assign(results_or_zero,
5170 binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
5171 /* And OR that into the running result. */
5172 IRTemp tmp = newTemp(Ity_V128);
5173 assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
5174 mkexpr(running_result)));
5175 running_result = tmp;
5176 }
5177
5178 /* So now running_result holds the overall result where the indices
5179 are in range, and zero in out-of-range lanes. Now we need to
5180 compute an overall validity mask and use this to copy in the
5181 lanes in the oor_values for out of range indices. This is
5182 unnecessary for TBL but will get folded out by iropt, so we lean
5183 on that and generate the same code for TBL and TBX here. */
5184 IRTemp overall_valid_mask = newTemp(Ity_V128);
5185 assign(overall_valid_mask,
5186 binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
5187 IRTemp result = newTemp(Ity_V128);
5188 assign(result,
5189 binop(Iop_OrV128,
5190 mkexpr(running_result),
5191 binop(Iop_AndV128,
5192 mkexpr(oor_values),
5193 unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
5194 return result;
5195}
5196
5197
sewardjbbcf1882014-01-12 12:49:10 +00005198static
5199Bool dis_ARM64_simd_and_fp(/*MB_OUT*/DisResult* dres, UInt insn)
5200{
5201# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
5202
5203 /* ---------------- FMOV (general) ---------------- */
5204 /* case 30 23 20 18 15 9 4
5205 (1) 0 00 11110 00 1 00 111 000000 n d FMOV Sd, Wn
5206 (2) 1 00 11110 01 1 00 111 000000 n d FMOV Dd, Xn
5207 (3) 1 00 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
5208
5209 (4) 0 00 11110 00 1 00 110 000000 n d FMOV Wd, Sn
5210 (5) 1 00 11110 01 1 00 110 000000 n d FMOV Xd, Dn
5211 (6) 1 00 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
5212 */
5213 if (INSN(30,24) == BITS7(0,0,1,1,1,1,0)
5214 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5215 UInt sf = INSN(31,31);
5216 UInt ty = INSN(23,22); // type
5217 UInt rm = INSN(20,19); // rmode
5218 UInt op = INSN(18,16); // opcode
5219 UInt nn = INSN(9,5);
5220 UInt dd = INSN(4,0);
5221 UInt ix = 0; // case
5222 if (sf == 0) {
5223 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
5224 ix = 1;
5225 else
5226 if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
5227 ix = 4;
5228 } else {
5229 vassert(sf == 1);
5230 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
5231 ix = 2;
5232 else
5233 if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
5234 ix = 5;
5235 else
5236 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
5237 ix = 3;
5238 else
5239 if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
5240 ix = 6;
5241 }
5242 if (ix > 0) {
5243 switch (ix) {
5244 case 1:
5245 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005246 putQRegLO(dd, getIReg32orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005247 DIP("fmov s%u, w%u\n", dd, nn);
5248 break;
5249 case 2:
5250 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005251 putQRegLO(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005252 DIP("fmov d%u, x%u\n", dd, nn);
5253 break;
5254 case 3:
sewardj606c4ba2014-01-26 19:11:14 +00005255 putQRegHI64(dd, getIReg64orZR(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005256 DIP("fmov v%u.d[1], x%u\n", dd, nn);
5257 break;
5258 case 4:
sewardj606c4ba2014-01-26 19:11:14 +00005259 putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
sewardjbbcf1882014-01-12 12:49:10 +00005260 DIP("fmov w%u, s%u\n", dd, nn);
5261 break;
5262 case 5:
sewardj606c4ba2014-01-26 19:11:14 +00005263 putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
sewardjbbcf1882014-01-12 12:49:10 +00005264 DIP("fmov x%u, d%u\n", dd, nn);
5265 break;
5266 case 6:
sewardj606c4ba2014-01-26 19:11:14 +00005267 putIReg64orZR(dd, getQRegHI64(nn));
sewardjbbcf1882014-01-12 12:49:10 +00005268 DIP("fmov x%u, v%u.d[1]\n", dd, nn);
5269 break;
5270 default:
5271 vassert(0);
5272 }
5273 return True;
5274 }
5275 /* undecodable; fall through */
5276 }
5277
5278 /* -------------- FMOV (scalar, immediate) -------------- */
5279 /* 31 28 23 20 12 9 4
5280 000 11110 00 1 imm8 100 00000 d FMOV Sd, #imm
5281 000 11110 01 1 imm8 100 00000 d FMOV Dd, #imm
5282 */
5283 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5284 && INSN(21,21) == 1 && INSN(12,5) == BITS8(1,0,0,0,0,0,0,0)) {
5285 Bool isD = INSN(22,22) == 1;
5286 UInt imm8 = INSN(20,13);
5287 UInt dd = INSN(4,0);
5288 ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
5289 if (!isD) {
sewardjaeeb31d2014-01-12 18:23:45 +00005290 vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
sewardjbbcf1882014-01-12 12:49:10 +00005291 }
5292 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005293 putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
5294 DIP("fmov %s, #0x%llx\n",
5295 nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
sewardjbbcf1882014-01-12 12:49:10 +00005296 return True;
5297 }
5298
sewardjfab09142014-02-10 10:28:13 +00005299 /* -------------- {FMOV,MOVI} (vector, immediate) -------------- */
5300 /* 31 28 18 15 11 9 4
5301 0q op 01111 00000 abc cmode 01 defgh d MOV Dd, #imm (q=0)
5302 MOV Vd.2d #imm (q=1)
5303 Allowable op:cmode
5304 FMOV = 1:1111
5305 MOVI = 0:xx00, 1:0x00, 1:10x0, 1:110x, 11110
5306 */
5307 if (INSN(31,31) == 0
5308 && INSN(28,19) == BITS10(0,1,1,1,1,0,0,0,0,0)
5309 && INSN(11,10) == BITS2(0,1)) {
5310 UInt bitQ = INSN(30,30);
5311 UInt bitOP = INSN(29,29);
5312 UInt cmode = INSN(15,12);
5313 UInt imm8 = (INSN(18,16) << 5) | INSN(9,5);
5314 UInt dd = INSN(4,0);
5315 ULong imm64lo = 0;
5316 UInt op_cmode = (bitOP << 4) | cmode;
5317 Bool ok = False;
5318 switch (op_cmode) {
5319 case BITS5(1,1,1,1,1): // 1:1111
5320 case BITS5(0,0,0,0,0): case BITS5(0,0,1,0,0):
5321 case BITS5(0,1,0,0,0): case BITS5(0,1,1,0,0): // 0:xx00
5322 case BITS5(1,0,0,0,0): case BITS5(1,0,1,0,0): // 1:0x00
5323 case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
5324 case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
5325 case BITS5(1,1,1,1,0): // 1:1110
5326 ok = True; break;
5327 default:
5328 break;
5329 }
5330 if (ok) {
5331 ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, imm8);
5332 }
5333 if (ok) {
5334 ULong imm64hi = (bitQ == 0 && bitOP == 0) ? 0 : imm64lo;
5335 putQReg128(dd, binop(Iop_64HLtoV128, mkU64(imm64hi), mkU64(imm64lo)));
5336 DIP("mov %s, #0x016%llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
5337 return True;
5338 }
5339 /* else fall through */
5340 }
sewardjfab09142014-02-10 10:28:13 +00005341
sewardjbbcf1882014-01-12 12:49:10 +00005342 /* -------------- {S,U}CVTF (scalar, integer) -------------- */
5343 /* 31 28 23 21 20 18 15 9 4 ix
5344 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn 0
5345 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn 1
5346 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn 2
5347 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn 3
5348
5349 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn 4
5350 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn 5
5351 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn 6
5352 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn 7
5353
5354 These are signed/unsigned conversion from integer registers to
5355 FP registers, all 4 32/64-bit combinations, rounded per FPCR.
5356 */
5357 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,17) == BITS5(1,0,0,0,1)
5358 && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5359 Bool isI64 = INSN(31,31) == 1;
5360 Bool isF64 = INSN(22,22) == 1;
5361 Bool isU = INSN(16,16) == 1;
5362 UInt nn = INSN(9,5);
5363 UInt dd = INSN(4,0);
5364 UInt ix = (isU ? 4 : 0) | (isI64 ? 2 : 0) | (isF64 ? 1 : 0);
5365 const IROp ops[8]
5366 = { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
5367 Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
5368 IRExpr* src = getIRegOrZR(isI64, nn);
5369 IRExpr* res = (isF64 && !isI64)
5370 ? unop(ops[ix], src)
5371 : binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), src);
5372 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005373 putQRegLO(dd, res);
sewardjbbcf1882014-01-12 12:49:10 +00005374 DIP("%ccvtf %s, %s\n",
sewardj606c4ba2014-01-26 19:11:14 +00005375 isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
sewardjbbcf1882014-01-12 12:49:10 +00005376 nameIRegOrZR(isI64, nn));
5377 return True;
5378 }
5379
sewardj5860ec72014-03-01 11:19:45 +00005380 /* ------------ F{ADD,SUB,MUL,DIV,NMUL} (scalar) ------------ */
sewardjbbcf1882014-01-12 12:49:10 +00005381 /* 31 23 20 15 11 9 4
5382 ---------------- 0000 ------ FMUL --------
5383 000 11110 001 m 0001 10 n d FDIV Sd,Sn,Sm
5384 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm
5385 ---------------- 0010 ------ FADD --------
5386 ---------------- 0011 ------ FSUB --------
5387 ---------------- 1000 ------ FNMUL --------
5388 */
5389 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5390 && INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
5391 Bool isD = INSN(22,22) == 1;
5392 UInt mm = INSN(20,16);
5393 UInt op = INSN(15,12);
5394 UInt nn = INSN(9,5);
5395 UInt dd = INSN(4,0);
5396 IROp iop = Iop_INVALID;
5397 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005398 Bool neg = False;
5399 const HChar* nm = "???";
5400 switch (op) {
5401 case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ty); break;
5402 case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ty); break;
5403 case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ty); break;
5404 case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ty); break;
5405 case BITS4(1,0,0,0): nm = "fnmul"; iop = mkMULF(ty);
5406 neg = True; break;
5407 default: return False;
5408 }
5409 vassert(iop != Iop_INVALID);
5410 IRExpr* resE = triop(iop, mkexpr(mk_get_IR_rounding_mode()),
sewardj606c4ba2014-01-26 19:11:14 +00005411 getQRegLO(nn, ty), getQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005412 IRTemp res = newTemp(ty);
5413 assign(res, neg ? unop(mkNEGF(ty),resE) : resE);
5414 putQReg128(dd, mkV128(0));
sewardj606c4ba2014-01-26 19:11:14 +00005415 putQRegLO(dd, mkexpr(res));
sewardjbbcf1882014-01-12 12:49:10 +00005416 DIP("%s %s, %s, %s\n",
sewardj606c4ba2014-01-26 19:11:14 +00005417 nm, nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005418 return True;
5419 }
5420
5421 /* ------------ F{MOV,ABS,NEG,SQRT} D/D or S/S ------------ */
5422 /* 31 23 21 16 14 9 4
5423 000 11110 00 10000 00 10000 n d FMOV Sd, Sn
5424 000 11110 01 10000 00 10000 n d FMOV Dd, Dn
5425 ------------------ 01 --------- FABS ------
5426 ------------------ 10 --------- FNEG ------
sewardjfab09142014-02-10 10:28:13 +00005427 ------------------ 11 --------- FSQRT -----
sewardjbbcf1882014-01-12 12:49:10 +00005428 */
5429 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5430 && INSN(21,17) == BITS5(1,0,0,0,0)
5431 && INSN(14,10) == BITS5(1,0,0,0,0)) {
5432 Bool isD = INSN(22,22) == 1;
5433 UInt opc = INSN(16,15);
5434 UInt nn = INSN(9,5);
5435 UInt dd = INSN(4,0);
5436 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005437 IRTemp res = newTemp(ty);
5438 if (opc == BITS2(0,0)) {
sewardj606c4ba2014-01-26 19:11:14 +00005439 assign(res, getQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005440 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005441 putQRegLO(dd, mkexpr(res));
5442 DIP("fmov %s, %s\n",
5443 nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005444 return True;
5445 }
5446 if (opc == BITS2(1,0) || opc == BITS2(0,1)) {
5447 Bool isAbs = opc == BITS2(0,1);
5448 IROp op = isAbs ? mkABSF(ty) : mkNEGF(ty);
sewardj606c4ba2014-01-26 19:11:14 +00005449 assign(res, unop(op, getQRegLO(nn, ty)));
sewardjbbcf1882014-01-12 12:49:10 +00005450 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005451 putQRegLO(dd, mkexpr(res));
sewardjbbcf1882014-01-12 12:49:10 +00005452 DIP("%s %s, %s\n", isAbs ? "fabs" : "fneg",
sewardj606c4ba2014-01-26 19:11:14 +00005453 nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005454 return True;
5455 }
5456 if (opc == BITS2(1,1)) {
5457 assign(res,
5458 binop(mkSQRTF(ty),
sewardj606c4ba2014-01-26 19:11:14 +00005459 mkexpr(mk_get_IR_rounding_mode()), getQRegLO(nn, ty)));
sewardjbbcf1882014-01-12 12:49:10 +00005460 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005461 putQRegLO(dd, mkexpr(res));
5462 DIP("fsqrt %s, %s\n", nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005463 return True;
5464 }
5465 /* else fall through; other cases are ATC */
5466 }
5467
sewardjfab09142014-02-10 10:28:13 +00005468 /* ---------------- F{ABS,NEG} (vector) ---------------- */
5469 /* 31 28 22 21 16 9 4
5470 0q0 01110 1 sz 10000 01111 10 n d FABS Vd.T, Vn.T
5471 0q1 01110 1 sz 10000 01111 10 n d FNEG Vd.T, Vn.T
5472 */
5473 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,1)
5474 && INSN(21,17) == BITS5(1,0,0,0,0)
5475 && INSN(16,10) == BITS7(0,1,1,1,1,1,0)) {
5476 UInt bitQ = INSN(30,30);
5477 UInt bitSZ = INSN(22,22);
5478 Bool isFNEG = INSN(29,29) == 1;
5479 UInt nn = INSN(9,5);
5480 UInt dd = INSN(4,0);
5481 const HChar* ar = "??";
5482 IRType tyF = Ity_INVALID;
5483 Bool zeroHI = False;
5484 Bool ok = getLaneInfo_Q_SZ(NULL, &tyF, NULL, &zeroHI, &ar,
5485 (Bool)bitQ, (Bool)bitSZ);
5486 if (ok) {
sewardj32d86752014-03-02 12:47:18 +00005487 vassert(tyF == Ity_F64 || tyF == Ity_F32);
sewardjfab09142014-02-10 10:28:13 +00005488 IROp op = (tyF == Ity_F64) ? (isFNEG ? Iop_Neg64Fx2 : Iop_Abs64Fx2)
5489 : (isFNEG ? Iop_Neg32Fx4 : Iop_Abs32Fx4);
5490 IRTemp res = newTemp(Ity_V128);
5491 assign(res, unop(op, getQReg128(nn)));
5492 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
5493 : mkexpr(res));
5494 DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
5495 nameQReg128(dd), ar, nameQReg128(nn), ar);
5496 return True;
5497 }
5498 /* else fall through */
5499 }
5500
sewardjbbcf1882014-01-12 12:49:10 +00005501 /* -------------------- FCMP,FCMPE -------------------- */
5502 /* 31 23 20 15 9 4
5503 000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
5504 000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
5505 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
5506 000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
5507
5508 000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
5509 000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
5510 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
5511 000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
5512
5513 FCMPE generates Invalid Operation exn if either arg is any kind
5514 of NaN. FCMP generates Invalid Operation exn if either arg is a
5515 signalling NaN. We ignore this detail here and produce the same
5516 IR for both.
5517 */
5518 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) && INSN(21,21) == 1
5519 && INSN(15,10) == BITS6(0,0,1,0,0,0) && INSN(2,0) == BITS3(0,0,0)) {
5520 Bool isD = INSN(22,22) == 1;
5521 UInt mm = INSN(20,16);
5522 UInt nn = INSN(9,5);
5523 Bool isCMPE = INSN(4,4) == 1;
5524 Bool cmpZero = INSN(3,3) == 1;
5525 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005526 Bool valid = True;
5527 if (cmpZero && mm != 0) valid = False;
5528 if (valid) {
5529 IRTemp argL = newTemp(ty);
5530 IRTemp argR = newTemp(ty);
5531 IRTemp irRes = newTemp(Ity_I32);
sewardj606c4ba2014-01-26 19:11:14 +00005532 assign(argL, getQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005533 assign(argR,
5534 cmpZero
5535 ? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
sewardj606c4ba2014-01-26 19:11:14 +00005536 : getQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005537 assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
5538 mkexpr(argL), mkexpr(argR)));
5539 IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
5540 IRTemp nzcv_28x0 = newTemp(Ity_I64);
5541 assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
5542 setFlags_COPY(nzcv_28x0);
sewardj606c4ba2014-01-26 19:11:14 +00005543 DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ty),
5544 cmpZero ? "#0.0" : nameQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005545 return True;
5546 }
5547 }
5548
5549 /* -------------------- F{N}M{ADD,SUB} -------------------- */
5550 /* 31 22 20 15 14 9 4 ix
5551 000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
5552 000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
5553 000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
5554 000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
5555 where Fx=Dx when sz=1, Fx=Sx when sz=0
5556
5557 -----SPEC------ ----IMPL----
5558 fmadd a + n * m a + n * m
5559 fmsub a + (-n) * m a - n * m
5560 fnmadd (-a) + (-n) * m -(a + n * m)
5561 fnmsub (-a) + n * m -(a - n * m)
5562 */
5563 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,1,0)) {
5564 Bool isD = INSN(22,22) == 1;
5565 UInt mm = INSN(20,16);
5566 UInt aa = INSN(14,10);
5567 UInt nn = INSN(9,5);
5568 UInt dd = INSN(4,0);
5569 UInt ix = (INSN(21,21) << 1) | INSN(15,15);
5570 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005571 IROp opADD = mkADDF(ty);
5572 IROp opSUB = mkSUBF(ty);
5573 IROp opMUL = mkMULF(ty);
5574 IROp opNEG = mkNEGF(ty);
5575 IRTemp res = newTemp(ty);
sewardj606c4ba2014-01-26 19:11:14 +00005576 IRExpr* eA = getQRegLO(aa, ty);
5577 IRExpr* eN = getQRegLO(nn, ty);
5578 IRExpr* eM = getQRegLO(mm, ty);
sewardjbbcf1882014-01-12 12:49:10 +00005579 IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
5580 IRExpr* eNxM = triop(opMUL, rm, eN, eM);
5581 switch (ix) {
5582 case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
5583 case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
5584 case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
5585 case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
5586 default: vassert(0);
5587 }
5588 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005589 putQRegLO(dd, mkexpr(res));
sewardjbbcf1882014-01-12 12:49:10 +00005590 const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
5591 DIP("%s %s, %s, %s, %s\n",
sewardj606c4ba2014-01-26 19:11:14 +00005592 names[ix], nameQRegLO(dd, ty), nameQRegLO(nn, ty),
5593 nameQRegLO(mm, ty), nameQRegLO(aa, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005594 return True;
5595 }
5596
5597 /* -------- FCVT{N,P,M,Z}{S,U} (scalar, integer) -------- */
5598 /* 30 23 20 18 15 9 4
5599 sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
5600 sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
5601 ---------------- 01 -------------- FCVTP-------- (round to +inf)
5602 ---------------- 10 -------------- FCVTM-------- (round to -inf)
5603 ---------------- 11 -------------- FCVTZ-------- (round to zero)
5604
5605 Rd is Xd when sf==1, Wd when sf==0
5606 Fn is Dn when x==1, Sn when x==0
5607 20:19 carry the rounding mode, using the same encoding as FPCR
5608 */
5609 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,21) == 1
5610 && INSN(18,17) == BITS2(0,0) && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5611 Bool isI64 = INSN(31,31) == 1;
5612 Bool isF64 = INSN(22,22) == 1;
5613 UInt rm = INSN(20,19);
5614 Bool isU = INSN(16,16) == 1;
5615 UInt nn = INSN(9,5);
5616 UInt dd = INSN(4,0);
5617 /* Decide on the IR rounding mode to use. */
5618 IRRoundingMode irrm = 8; /*impossible*/
5619 HChar ch = '?';
5620 switch (rm) {
5621 case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
5622 case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
5623 case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
5624 case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
5625 default: vassert(0);
5626 }
5627 vassert(irrm != 8);
5628 /* Decide on the conversion primop, based on the source size,
5629 dest size and signedness (8 possibilities). Case coding:
5630 F32 ->s I32 0
5631 F32 ->u I32 1
5632 F32 ->s I64 2
5633 F32 ->u I64 3
5634 F64 ->s I32 4
5635 F64 ->u I32 5
5636 F64 ->s I64 6
5637 F64 ->u I64 7
5638 */
5639 UInt ix = (isF64 ? 4 : 0) | (isI64 ? 2 : 0) | (isU ? 1 : 0);
5640 vassert(ix < 8);
5641 const IROp ops[8]
5642 = { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
5643 Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
5644 IROp op = ops[ix];
5645 // A bit of ATCery: bounce all cases we haven't seen an example of.
5646 if (/* F32toI32S */
5647 (op == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
sewardj1eaaec22014-03-07 22:52:19 +00005648 || (op == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
5649 || (op == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
sewardjbbcf1882014-01-12 12:49:10 +00005650 /* F32toI32U */
sewardj1eaaec22014-03-07 22:52:19 +00005651 || (op == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
5652 || (op == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
sewardjbbcf1882014-01-12 12:49:10 +00005653 /* F32toI64S */
sewardj1eaaec22014-03-07 22:52:19 +00005654 || (op == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
sewardjbbcf1882014-01-12 12:49:10 +00005655 /* F32toI64U */
5656 || (op == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
5657 /* F64toI32S */
5658 || (op == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
5659 || (op == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
5660 || (op == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
5661 /* F64toI32U */
sewardjbbcf1882014-01-12 12:49:10 +00005662 || (op == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
sewardj1eaaec22014-03-07 22:52:19 +00005663 || (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
5664 || (op == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
sewardjbbcf1882014-01-12 12:49:10 +00005665 /* F64toI64S */
5666 || (op == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
sewardj1eaaec22014-03-07 22:52:19 +00005667 || (op == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
5668 || (op == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
sewardjbbcf1882014-01-12 12:49:10 +00005669 /* F64toI64U */
5670 || (op == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
sewardj1eaaec22014-03-07 22:52:19 +00005671 || (op == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
sewardjbbcf1882014-01-12 12:49:10 +00005672 ) {
5673 /* validated */
5674 } else {
5675 return False;
5676 }
sewardjbbcf1882014-01-12 12:49:10 +00005677 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
5678 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
5679 IRTemp src = newTemp(srcTy);
5680 IRTemp dst = newTemp(dstTy);
sewardj606c4ba2014-01-26 19:11:14 +00005681 assign(src, getQRegLO(nn, srcTy));
sewardjbbcf1882014-01-12 12:49:10 +00005682 assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
5683 putIRegOrZR(isI64, dd, mkexpr(dst));
5684 DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
sewardj606c4ba2014-01-26 19:11:14 +00005685 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
sewardjbbcf1882014-01-12 12:49:10 +00005686 return True;
5687 }
5688
sewardj1eaaec22014-03-07 22:52:19 +00005689 /* -------- FCVTAS (KLUDGED) (scalar, integer) -------- */
5690 /* 30 23 20 18 15 9 4
5691 1 00 11110 0x 1 00 100 000000 n d FCVTAS Xd, Fn
5692 0 00 11110 0x 1 00 100 000000 n d FCVTAS Wd, Fn
5693 Fn is Dn when x==1, Sn when x==0
5694 */
5695 if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0)
5696 && INSN(21,16) == BITS6(1,0,0,1,0,0)
5697 && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
5698 Bool isI64 = INSN(31,31) == 1;
5699 Bool isF64 = INSN(22,22) == 1;
5700 UInt nn = INSN(9,5);
5701 UInt dd = INSN(4,0);
5702 /* Decide on the IR rounding mode to use. */
5703 /* KLUDGE: should be Irrm_NEAREST_TIE_AWAY_0 */
5704 IRRoundingMode irrm = Irrm_NEAREST;
5705 /* Decide on the conversion primop. */
5706 IROp op = isI64 ? (isF64 ? Iop_F64toI64S : Iop_F32toI64S)
5707 : (isF64 ? Iop_F64toI32S : Iop_F32toI32S);
5708 IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
5709 IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
5710 IRTemp src = newTemp(srcTy);
5711 IRTemp dst = newTemp(dstTy);
5712 assign(src, getQRegLO(nn, srcTy));
5713 assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
5714 putIRegOrZR(isI64, dd, mkexpr(dst));
5715 DIP("fcvtas %s, %s (KLUDGED)\n",
5716 nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
5717 return True;
5718 }
5719
sewardjbbcf1882014-01-12 12:49:10 +00005720 /* ---------------- FRINT{I,M,P,Z} (scalar) ---------------- */
5721 /* 31 23 21 17 14 9 4
5722 000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
5723 rm
5724 x==0 => S-registers, x==1 => D-registers
5725 rm (17:15) encodings:
5726 111 per FPCR (FRINTI)
5727 001 +inf (FRINTP)
5728 010 -inf (FRINTM)
5729 011 zero (FRINTZ)
5730 000 tieeven
sewardj1eaaec22014-03-07 22:52:19 +00005731 100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
sewardjbbcf1882014-01-12 12:49:10 +00005732 110 per FPCR + "exact = TRUE"
5733 101 unallocated
5734 */
5735 if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
5736 && INSN(21,18) == BITS4(1,0,0,1) && INSN(14,10) == BITS5(1,0,0,0,0)) {
5737 Bool isD = INSN(22,22) == 1;
5738 UInt rm = INSN(17,15);
5739 UInt nn = INSN(9,5);
5740 UInt dd = INSN(4,0);
5741 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005742 IRExpr* irrmE = NULL;
5743 UChar ch = '?';
5744 switch (rm) {
5745 case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
5746 case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
5747 case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
sewardj1eaaec22014-03-07 22:52:19 +00005748 // The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
5749 case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
sewardjbbcf1882014-01-12 12:49:10 +00005750 default: break;
5751 }
5752 if (irrmE) {
5753 IRTemp src = newTemp(ty);
5754 IRTemp dst = newTemp(ty);
sewardj606c4ba2014-01-26 19:11:14 +00005755 assign(src, getQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005756 assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
5757 irrmE, mkexpr(src)));
5758 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005759 putQRegLO(dd, mkexpr(dst));
5760 DIP("frint%c %s, %s\n",
5761 ch, nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005762 return True;
5763 }
5764 /* else unhandled rounding mode case -- fall through */
5765 }
5766
5767 /* ------------------ FCVT (scalar) ------------------ */
5768 /* 31 23 21 16 14 9 4
5769 000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp)
5770 --------- 11 ----- 01 --------- FCVT Dd, Hn (unimp)
5771 --------- 00 ----- 11 --------- FCVT Hd, Sn (unimp)
sewardj1eaaec22014-03-07 22:52:19 +00005772 --------- 00 ----- 01 --------- FCVT Dd, Sn
sewardjbbcf1882014-01-12 12:49:10 +00005773 --------- 01 ----- 11 --------- FCVT Hd, Dn (unimp)
sewardj1eaaec22014-03-07 22:52:19 +00005774 --------- 01 ----- 00 --------- FCVT Sd, Dn
sewardjbbcf1882014-01-12 12:49:10 +00005775 Rounding, when dst is smaller than src, is per the FPCR.
5776 */
5777 if (INSN(31,24) == BITS8(0,0,0,1,1,1,1,0)
5778 && INSN(21,17) == BITS5(1,0,0,0,1)
5779 && INSN(14,10) == BITS5(1,0,0,0,0)) {
5780 UInt b2322 = INSN(23,22);
5781 UInt b1615 = INSN(16,15);
5782 UInt nn = INSN(9,5);
5783 UInt dd = INSN(4,0);
5784 if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) {
5785 /* Convert S to D */
5786 IRTemp res = newTemp(Ity_F64);
sewardj606c4ba2014-01-26 19:11:14 +00005787 assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32)));
sewardjbbcf1882014-01-12 12:49:10 +00005788 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005789 putQRegLO(dd, mkexpr(res));
5790 DIP("fcvt %s, %s\n",
5791 nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32));
sewardjbbcf1882014-01-12 12:49:10 +00005792 return True;
5793 }
5794 if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) {
5795 /* Convert D to S */
5796 IRTemp res = newTemp(Ity_F32);
5797 assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()),
sewardj606c4ba2014-01-26 19:11:14 +00005798 getQRegLO(nn, Ity_F64)));
sewardjbbcf1882014-01-12 12:49:10 +00005799 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005800 putQRegLO(dd, mkexpr(res));
5801 DIP("fcvt %s, %s\n",
5802 nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64));
sewardjbbcf1882014-01-12 12:49:10 +00005803 return True;
5804 }
5805 /* else unhandled */
5806 }
5807
5808 /* ------------------ FABD (scalar) ------------------ */
5809 /* 31 23 20 15 9 4
5810 011 11110 111 m 110101 n d FABD Dd, Dn, Dm
5811 011 11110 101 m 110101 n d FABD Sd, Sn, Sm
5812 */
5813 if (INSN(31,23) == BITS9(0,1,1,1,1,1,1,0,1) && INSN(21,21) == 1
5814 && INSN(15,10) == BITS6(1,1,0,1,0,1)) {
5815 Bool isD = INSN(22,22) == 1;
5816 UInt mm = INSN(20,16);
5817 UInt nn = INSN(9,5);
5818 UInt dd = INSN(4,0);
5819 IRType ty = isD ? Ity_F64 : Ity_F32;
sewardjbbcf1882014-01-12 12:49:10 +00005820 IRTemp res = newTemp(ty);
sewardj606c4ba2014-01-26 19:11:14 +00005821 assign(res, unop(mkABSF(ty),
5822 triop(mkSUBF(ty),
5823 mkexpr(mk_get_IR_rounding_mode()),
5824 getQRegLO(nn,ty), getQRegLO(mm,ty))));
sewardjbbcf1882014-01-12 12:49:10 +00005825 putQReg128(dd, mkV128(0x0000));
sewardj606c4ba2014-01-26 19:11:14 +00005826 putQRegLO(dd, mkexpr(res));
sewardjbbcf1882014-01-12 12:49:10 +00005827 DIP("fabd %s, %s, %s\n",
sewardj606c4ba2014-01-26 19:11:14 +00005828 nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty));
sewardjbbcf1882014-01-12 12:49:10 +00005829 return True;
5830 }
5831
sewardj606c4ba2014-01-26 19:11:14 +00005832 /* -------------- {S,U}CVTF (vector, integer) -------------- */
5833 /* 31 28 22 21 15 9 4
5834 0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
5835 0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
5836 with laneage:
5837 case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
5838 */
5839 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,0)
5840 && INSN(21,16) == BITS6(1,0,0,0,0,1)
5841 && INSN(15,10) == BITS6(1,1,0,1,1,0)) {
5842 Bool isQ = INSN(30,30) == 1;
5843 Bool isU = INSN(29,29) == 1;
5844 Bool isF64 = INSN(22,22) == 1;
5845 UInt nn = INSN(9,5);
5846 UInt dd = INSN(4,0);
5847 if (isQ || !isF64) {
5848 IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
5849 UInt nLanes = 0;
5850 Bool zeroHI = False;
5851 const HChar* arrSpec = NULL;
5852 Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
5853 isQ, isF64 );
5854 IROp op = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
5855 : (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
5856 IRTemp rm = mk_get_IR_rounding_mode();
5857 UInt i;
5858 vassert(ok); /* the 'if' above should ensure this */
5859 for (i = 0; i < nLanes; i++) {
5860 putQRegLane(dd, i,
5861 binop(op, mkexpr(rm), getQRegLane(nn, i, tyI)));
5862 }
5863 if (zeroHI) {
5864 putQRegLane(dd, 1, mkU64(0));
5865 }
5866 DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
5867 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
5868 return True;
5869 }
5870 /* else fall through */
5871 }
5872
5873 /* ---------- F{ADD,SUB,MUL,DIV,MLA,MLS} (vector) ---------- */
5874 /* 31 28 22 21 20 15 9 4 case
5875 0q0 01110 0 sz 1 m 110101 n d FADD Vd,Vn,Vm 1
5876 0q0 01110 1 sz 1 m 110101 n d FSUB Vd,Vn,Vm 2
5877 0q1 01110 0 sz 1 m 110111 n d FMUL Vd,Vn,Vm 3
5878 0q1 01110 0 sz 1 m 111111 n d FDIV Vd,Vn,Vm 4
5879 0q0 01110 0 sz 1 m 110011 n d FMLA Vd,Vn,Vm 5
5880 0q0 01110 1 sz 1 m 110011 n d FMLS Vd,Vn,Vm 6
sewardje520bb32014-02-17 11:00:53 +00005881 0q1 01110 1 sz 1 m 110101 n d FABD Vd,Vn,Vm 7
sewardj606c4ba2014-01-26 19:11:14 +00005882 */
5883 if (INSN(31,31) == 0
5884 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) {
5885 Bool isQ = INSN(30,30) == 1;
5886 UInt b29 = INSN(29,29);
5887 UInt b23 = INSN(23,23);
5888 Bool isF64 = INSN(22,22) == 1;
5889 UInt mm = INSN(20,16);
5890 UInt b1510 = INSN(15,10);
5891 UInt nn = INSN(9,5);
5892 UInt dd = INSN(4,0);
5893 UInt ix = 0;
5894 /**/ if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,1,0,1)) ix = 1;
5895 else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 2;
5896 else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,0,1,1,1)) ix = 3;
5897 else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,1,1,1,1)) ix = 4;
5898 else if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,0,1,1)) ix = 5;
5899 else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,0,1,1)) ix = 6;
sewardje520bb32014-02-17 11:00:53 +00005900 else if (b29 == 1 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 7;
sewardj606c4ba2014-01-26 19:11:14 +00005901 IRType laneTy = Ity_INVALID;
5902 Bool zeroHI = False;
5903 const HChar* arr = "??";
5904 Bool ok
5905 = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64);
5906 /* Skip MLA/MLS for the time being */
5907 if (ok && ix >= 1 && ix <= 4) {
5908 const IROp ops64[4]
5909 = { Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2 };
5910 const IROp ops32[4]
5911 = { Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4 };
5912 const HChar* names[4]
5913 = { "fadd", "fsub", "fmul", "fdiv" };
5914 IROp op = laneTy==Ity_F64 ? ops64[ix-1] : ops32[ix-1];
5915 IRTemp rm = mk_get_IR_rounding_mode();
5916 IRTemp t1 = newTemp(Ity_V128);
5917 IRTemp t2 = newTemp(Ity_V128);
5918 assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardjecde6972014-02-05 11:01:19 +00005919 assign(t2, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t1))
5920 : mkexpr(t1));
sewardj606c4ba2014-01-26 19:11:14 +00005921 putQReg128(dd, mkexpr(t2));
5922 DIP("%s %s.%s, %s.%s, %s.%s\n", names[ix-1],
5923 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
5924 return True;
5925 }
sewardjfab09142014-02-10 10:28:13 +00005926 if (ok && ix >= 5 && ix <= 6) {
5927 IROp opADD = laneTy==Ity_F64 ? Iop_Add64Fx2 : Iop_Add32Fx4;
5928 IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
5929 IROp opMUL = laneTy==Ity_F64 ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
5930 IRTemp rm = mk_get_IR_rounding_mode();
5931 IRTemp t1 = newTemp(Ity_V128);
5932 IRTemp t2 = newTemp(Ity_V128);
5933 // FIXME: double rounding; use FMA primops instead
5934 assign(t1, triop(opMUL,
5935 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
5936 assign(t2, triop(ix == 5 ? opADD : opSUB,
5937 mkexpr(rm), getQReg128(dd), mkexpr(t1)));
sewardje520bb32014-02-17 11:00:53 +00005938 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
5939 : mkexpr(t2));
sewardjfab09142014-02-10 10:28:13 +00005940 DIP("%s %s.%s, %s.%s, %s.%s\n", ix == 5 ? "fmla" : "fmls",
5941 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
5942 return True;
5943 }
sewardje520bb32014-02-17 11:00:53 +00005944 if (ok && ix == 7) {
5945 IROp opSUB = laneTy==Ity_F64 ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
5946 IROp opABS = laneTy==Ity_F64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
5947 IRTemp rm = mk_get_IR_rounding_mode();
5948 IRTemp t1 = newTemp(Ity_V128);
5949 IRTemp t2 = newTemp(Ity_V128);
5950 // FIXME: use Abd primop instead?
5951 assign(t1, triop(opSUB,
5952 mkexpr(rm), getQReg128(nn), getQReg128(mm)));
5953 assign(t2, unop(opABS, mkexpr(t1)));
5954 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
5955 : mkexpr(t2));
5956 DIP("fabd %s.%s, %s.%s, %s.%s\n",
5957 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
5958 return True;
5959 }
sewardj606c4ba2014-01-26 19:11:14 +00005960 }
5961
sewardj2bd1ffe2014-03-27 18:59:00 +00005962 /* ------------ FCM{EQ,GE,GT}, FAC{GE,GT} (vector) ------------ */
5963 /* 31 28 22 20 15 9 4 case
5964 0q1 01110 0 sz 1 m 111011 n d FACGE Vd, Vn, Vm
5965 0q1 01110 1 sz 1 m 111011 n d FACGT Vd, Vn, Vm
5966 0q0 01110 0 sz 1 m 111001 n d FCMEQ Vd, Vn, Vm
5967 0q1 01110 0 sz 1 m 111001 n d FCMGE Vd, Vn, Vm
5968 0q1 01110 1 sz 1 m 111001 n d FCMGT Vd, Vn, Vm
5969 */
5970 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1
5971 && INSN(15,12) == BITS4(1,1,1,0) && INSN(10,10) == 1) {
5972 Bool isQ = INSN(30,30) == 1;
5973 UInt U = INSN(29,29);
5974 UInt E = INSN(23,23);
5975 Bool isF64 = INSN(22,22) == 1;
5976 UInt ac = INSN(11,11);
5977 UInt mm = INSN(20,16);
5978 UInt nn = INSN(9,5);
5979 UInt dd = INSN(4,0);
5980 /* */
5981 UInt EUac = (E << 2) | (U << 1) | ac;
5982 IROp opABS = Iop_INVALID;
5983 IROp opCMP = Iop_INVALID;
5984 IRType laneTy = Ity_INVALID;
5985 Bool zeroHI = False;
5986 Bool swap = True;
5987 const HChar* arr = "??";
5988 const HChar* nm = "??";
5989 Bool ok
5990 = getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64);
5991 if (ok) {
5992 vassert((isF64 && laneTy == Ity_F64) || (!isF64 && laneTy == Ity_F32));
5993 switch (EUac) {
5994 case BITS3(0,0,0):
5995 nm = "fcmeq";
5996 opCMP = isF64 ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
5997 swap = False;
5998 break;
5999 case BITS3(0,1,0):
6000 nm = "fcmge";
6001 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6002 break;
6003 case BITS3(0,1,1):
6004 nm = "facge";
6005 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6006 opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
6007 break;
6008 case BITS3(1,1,0):
6009 nm = "fcmgt";
6010 opCMP = isF64 ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
6011 break;
6012 case BITS3(1,1,1):
6013 nm = "fcagt";
6014 opCMP = isF64 ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
6015 opABS = isF64 ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
6016 break;
6017 default:
6018 break;
6019 }
6020 }
6021 if (opCMP != Iop_INVALID) {
6022 IRExpr* argN = getQReg128(nn);
6023 IRExpr* argM = getQReg128(mm);
6024 if (opABS != Iop_INVALID) {
6025 argN = unop(opABS, argN);
6026 argM = unop(opABS, argM);
6027 }
6028 IRExpr* res = swap ? binop(opCMP, argM, argN)
6029 : binop(opCMP, argN, argM);
6030 if (zeroHI) {
6031 res = unop(Iop_ZeroHI64ofV128, res);
6032 }
6033 putQReg128(dd, res);
6034 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6035 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6036 return True;
6037 }
6038 /* else fall through */
6039 }
6040
sewardj32d86752014-03-02 12:47:18 +00006041 /* -------------------- FCVTN -------------------- */
6042 /* 31 28 23 20 15 9 4
6043 0q0 01110 0s1 00001 011010 n d FCVTN Vd, Vn
6044 where case q:s of 00: 16Fx4(lo) <- 32Fx4
6045 01: 32Fx2(lo) <- 64Fx2
6046 10: 16Fx4(hi) <- 32Fx4
6047 11: 32Fx2(hi) <- 64Fx2
6048 Only deals with the 32Fx2 <- 64Fx2 version (s==1)
6049 */
6050 if (INSN(31,31) == 0 && INSN(29,23) == BITS7(0,0,1,1,1,0,0)
6051 && INSN(21,10) == BITS12(1,0,0,0,0,1,0,1,1,0,1,0)) {
6052 UInt bQ = INSN(30,30);
6053 UInt bS = INSN(22,22);
6054 UInt nn = INSN(9,5);
6055 UInt dd = INSN(4,0);
6056 if (bS == 1) {
6057 IRTemp rm = mk_get_IR_rounding_mode();
6058 IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64);
6059 IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64);
6060 putQRegLane(dd, 2 * bQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo));
6061 putQRegLane(dd, 2 * bQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi));
6062 if (bQ == 0) {
6063 putQRegLane(dd, 1, mkU64(0));
6064 }
6065 DIP("fcvtn%s %s.%s, %s.2d\n", bQ ? "2" : "",
6066 nameQReg128(dd), bQ ? "4s" : "2s", nameQReg128(nn));
6067 return True;
6068 }
6069 /* else fall through */
6070 }
6071
sewardj606c4ba2014-01-26 19:11:14 +00006072 /* ---------------- ADD/SUB (vector) ---------------- */
6073 /* 31 28 23 21 20 15 9 4
6074 0q0 01110 size 1 m 100001 n d ADD Vd.T, Vn.T, Vm.T
6075 0q1 01110 size 1 m 100001 n d SUB Vd.T, Vn.T, Vm.T
6076 */
6077 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6078 && INSN(21,21) == 1 && INSN(15,10) == BITS6(1,0,0,0,0,1)) {
6079 Bool isQ = INSN(30,30) == 1;
6080 UInt szBlg2 = INSN(23,22);
6081 Bool isSUB = INSN(29,29) == 1;
6082 UInt mm = INSN(20,16);
6083 UInt nn = INSN(9,5);
6084 UInt dd = INSN(4,0);
6085 Bool zeroHI = False;
6086 const HChar* arrSpec = "";
6087 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6088 if (ok) {
sewardjf5b08912014-02-06 12:57:58 +00006089 const IROp opsADD[4]
sewardj606c4ba2014-01-26 19:11:14 +00006090 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
sewardjf5b08912014-02-06 12:57:58 +00006091 const IROp opsSUB[4]
sewardj606c4ba2014-01-26 19:11:14 +00006092 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
6093 vassert(szBlg2 < 4);
sewardjf5b08912014-02-06 12:57:58 +00006094 IROp op = isSUB ? opsSUB[szBlg2] : opsADD[szBlg2];
6095 IRTemp t = newTemp(Ity_V128);
sewardj606c4ba2014-01-26 19:11:14 +00006096 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardjecde6972014-02-05 11:01:19 +00006097 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t))
6098 : mkexpr(t));
sewardj606c4ba2014-01-26 19:11:14 +00006099 const HChar* nm = isSUB ? "sub" : "add";
6100 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6101 nameQReg128(dd), arrSpec,
6102 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6103 return True;
6104 }
6105 /* else fall through */
6106 }
6107
sewardjecde6972014-02-05 11:01:19 +00006108 /* ---------------- ADD/SUB (scalar) ---------------- */
6109 /* 31 28 23 21 20 15 9 4
6110 010 11110 11 1 m 100001 n d ADD Dd, Dn, Dm
6111 011 11110 11 1 m 100001 n d SUB Dd, Dn, Dm
6112 */
6113 if (INSN(31,30) == BITS2(0,1) && INSN(28,21) == BITS8(1,1,1,1,0,1,1,1)
6114 && INSN(15,10) == BITS6(1,0,0,0,0,1)) {
6115 Bool isSUB = INSN(29,29) == 1;
6116 UInt mm = INSN(20,16);
6117 UInt nn = INSN(9,5);
6118 UInt dd = INSN(4,0);
6119 IRTemp res = newTemp(Ity_I64);
6120 assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
6121 getQRegLane(nn, 0, Ity_I64),
6122 getQRegLane(mm, 0, Ity_I64)));
6123 putQRegLane(dd, 0, mkexpr(res));
6124 putQRegLane(dd, 1, mkU64(0));
6125 DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
6126 nameQRegLO(dd, Ity_I64),
6127 nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
6128 return True;
6129 }
6130
sewardjf5b08912014-02-06 12:57:58 +00006131 /* ------------ MUL/PMUL/MLA/MLS (vector) ------------ */
6132 /* 31 28 23 21 20 15 9 4
6133 0q0 01110 size 1 m 100111 n d MUL Vd.T, Vn.T, Vm.T B/H/S only
6134 0q1 01110 size 1 m 100111 n d PMUL Vd.T, Vn.T, Vm.T B only
6135 0q0 01110 size 1 m 100101 n d MLA Vd.T, Vn.T, Vm.T B/H/S only
6136 0q1 01110 size 1 m 100101 n d MLS Vd.T, Vn.T, Vm.T B/H/S only
6137 */
6138 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6139 && INSN(21,21) == 1
6140 && (INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(1,0,0,1,0,1)) {
6141 Bool isQ = INSN(30,30) == 1;
6142 UInt szBlg2 = INSN(23,22);
6143 UInt bit29 = INSN(29,29);
6144 UInt mm = INSN(20,16);
6145 UInt nn = INSN(9,5);
6146 UInt dd = INSN(4,0);
6147 Bool isMLAS = INSN(11,11) == 0;
6148 const IROp opsADD[4]
6149 = { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_INVALID };
6150 const IROp opsSUB[4]
6151 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_INVALID };
6152 const IROp opsMUL[4]
6153 = { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
6154 const IROp opsPMUL[4]
6155 = { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
6156 /* Set opMUL and, if necessary, opACC. A result value of
6157 Iop_INVALID for opMUL indicates that the instruction is
6158 invalid. */
6159 Bool zeroHI = False;
6160 const HChar* arrSpec = "";
6161 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6162 vassert(szBlg2 < 4);
6163 IROp opACC = Iop_INVALID;
6164 IROp opMUL = Iop_INVALID;
6165 if (ok) {
6166 opMUL = (bit29 == 1 && !isMLAS) ? opsPMUL[szBlg2]
6167 : opsMUL[szBlg2];
6168 opACC = isMLAS ? (bit29 == 1 ? opsSUB[szBlg2] : opsADD[szBlg2])
6169 : Iop_INVALID;
6170 }
6171 if (ok && opMUL != Iop_INVALID) {
6172 IRTemp t1 = newTemp(Ity_V128);
6173 assign(t1, binop(opMUL, getQReg128(nn), getQReg128(mm)));
6174 IRTemp t2 = newTemp(Ity_V128);
6175 assign(t2, opACC == Iop_INVALID
6176 ? mkexpr(t1)
6177 : binop(opACC, getQReg128(dd), mkexpr(t1)));
6178 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
6179 : mkexpr(t2));
6180 const HChar* nm = isMLAS ? (bit29 == 1 ? "mls" : "mla")
6181 : (bit29 == 1 ? "pmul" : "mul");
6182 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6183 nameQReg128(dd), arrSpec,
6184 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6185 return True;
6186 }
6187 /* else fall through */
6188 }
6189
sewardjecde6972014-02-05 11:01:19 +00006190 /* ---------------- {S,U}{MIN,MAX} (vector) ---------------- */
6191 /* 31 28 23 21 20 15 9 4
6192 0q0 01110 size 1 m 011011 n d SMIN Vd.T, Vn.T, Vm.T
6193 0q1 01110 size 1 m 011011 n d UMIN Vd.T, Vn.T, Vm.T
6194 0q0 01110 size 1 m 011001 n d SMAX Vd.T, Vn.T, Vm.T
6195 0q1 01110 size 1 m 011001 n d UMAX Vd.T, Vn.T, Vm.T
6196 */
6197 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6198 && INSN(21,21) == 1
6199 && ((INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(0,1,1,0,0,1))) {
6200 Bool isQ = INSN(30,30) == 1;
6201 Bool isU = INSN(29,29) == 1;
6202 UInt szBlg2 = INSN(23,22);
sewardj5860ec72014-03-01 11:19:45 +00006203 Bool isMAX = INSN(11,11) == 0;
sewardjecde6972014-02-05 11:01:19 +00006204 UInt mm = INSN(20,16);
6205 UInt nn = INSN(9,5);
6206 UInt dd = INSN(4,0);
6207 Bool zeroHI = False;
6208 const HChar* arrSpec = "";
6209 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6210 if (ok) {
6211 const IROp opMINS[4]
6212 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
6213 const IROp opMINU[4]
6214 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
6215 const IROp opMAXS[4]
6216 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
6217 const IROp opMAXU[4]
6218 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
6219 vassert(szBlg2 < 4);
6220 IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2])
6221 : (isU ? opMINU[szBlg2] : opMINS[szBlg2]);
6222 IRTemp t = newTemp(Ity_V128);
6223 assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
6224 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t))
6225 : mkexpr(t));
6226 const HChar* nm = isMAX ? (isU ? "umax" : "smax")
6227 : (isU ? "umin" : "smin");
6228 DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
6229 nameQReg128(dd), arrSpec,
6230 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6231 return True;
6232 }
6233 /* else fall through */
6234 }
6235
6236 /* -------------------- {S,U}{MIN,MAX}V -------------------- */
6237 /* 31 28 23 21 16 15 9 4
6238 0q0 01110 size 11000 1 101010 n d SMINV Vd, Vn.T
6239 0q1 01110 size 11000 1 101010 n d UMINV Vd, Vn.T
6240 0q0 01110 size 11000 0 101010 n d SMAXV Vd, Vn.T
6241 0q1 01110 size 11000 0 101010 n d UMAXV Vd, Vn.T
6242 */
6243 if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
6244 && INSN(21,17) == BITS5(1,1,0,0,0)
6245 && INSN(15,10) == BITS6(1,0,1,0,1,0)) {
6246 Bool isQ = INSN(30,30) == 1;
6247 Bool isU = INSN(29,29) == 1;
6248 UInt szBlg2 = INSN(23,22);
6249 Bool isMAX = INSN(16,16) == 0;
6250 UInt nn = INSN(9,5);
6251 UInt dd = INSN(4,0);
6252 Bool zeroHI = False;
6253 const HChar* arrSpec = "";
6254 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2);
6255 if (ok) {
6256 if (szBlg2 == 3) ok = False;
6257 if (szBlg2 == 2 && !isQ) ok = False;
6258 }
6259 if (ok) {
6260 const IROp opMINS[3]
6261 = { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
6262 const IROp opMINU[3]
6263 = { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
6264 const IROp opMAXS[3]
6265 = { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
6266 const IROp opMAXU[3]
6267 = { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
6268 vassert(szBlg2 < 3);
6269 IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2])
6270 : (isU ? opMINU[szBlg2] : opMINS[szBlg2]);
6271 IRTemp tN1 = newTemp(Ity_V128);
6272 assign(tN1, getQReg128(nn));
6273 /* If Q == 0, we're just folding lanes in the lower half of
6274 the value. In which case, copy the lower half of the
6275 source into the upper half, so we can then treat it the
6276 same as the full width case. */
6277 IRTemp tN2 = newTemp(Ity_V128);
sewardj5860ec72014-03-01 11:19:45 +00006278 assign(tN2, zeroHI ? mk_CatEvenLanes64x2(tN1,tN1) : mkexpr(tN1));
sewardjecde6972014-02-05 11:01:19 +00006279 IRTemp res = math_MINMAXV(tN2, op);
6280 if (res == IRTemp_INVALID)
6281 return False; /* means math_MINMAXV
6282 doesn't handle this case yet */
6283 putQReg128(dd, mkexpr(res));
6284 const HChar* nm = isMAX ? (isU ? "umaxv" : "smaxv")
6285 : (isU ? "uminv" : "sminv");
6286 const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
6287 IRType laneTy = tys[szBlg2];
6288 DIP("%s %s, %s.%s\n", nm,
6289 nameQRegLO(dd, laneTy), nameQReg128(nn), arrSpec);
6290 return True;
6291 }
6292 /* else fall through */
6293 }
6294
sewardjfab09142014-02-10 10:28:13 +00006295 /* ------------ {AND,BIC,ORR,ORN} (vector) ------------ */
6296 /* 31 28 23 20 15 9 4
6297 0q0 01110 001 m 000111 n d AND Vd.T, Vn.T, Vm.T
6298 0q0 01110 011 m 000111 n d BIC Vd.T, Vn.T, Vm.T
6299 0q0 01110 101 m 000111 n d ORR Vd.T, Vn.T, Vm.T
6300 0q0 01110 111 m 000111 n d ORN Vd.T, Vn.T, Vm.T
6301 T is 16b when q==1, 8b when q==0
6302 */
6303 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
6304 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
6305 Bool isQ = INSN(30,30) == 1;
6306 Bool isORR = INSN(23,23) == 1;
6307 Bool invert = INSN(22,22) == 1;
6308 UInt mm = INSN(20,16);
6309 UInt nn = INSN(9,5);
6310 UInt dd = INSN(4,0);
6311 IRTemp res = newTemp(Ity_V128);
6312 assign(res, binop(isORR ? Iop_OrV128 : Iop_AndV128,
6313 getQReg128(nn),
6314 invert ? unop(Iop_NotV128, getQReg128(mm))
6315 : getQReg128(mm)));
6316 putQReg128(dd, isQ ? mkexpr(res)
6317 : unop(Iop_ZeroHI64ofV128, mkexpr(res)));
6318 const HChar* names[4] = { "and", "bic", "orr", "orn" };
6319 const HChar* ar = isQ ? "16b" : "8b";
6320 DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
6321 nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
6322 return True;
6323 }
6324
sewardje520bb32014-02-17 11:00:53 +00006325 /* ---------- CM{EQ,HI,HS,GE,GT,TST,LE,LT} (vector) ---------- */
6326 /* 31 28 23 21 15 9 4 ix
6327 0q1 01110 size 1 m 100011 n d CMEQ Vd.T, Vn.T, Vm.T (1) ==
6328 0q0 01110 size 1 m 100011 n d CMTST Vd.T, Vn.T, Vm.T (2) &, == 0
6329
6330 0q1 01110 size 1 m 001101 n d CMHI Vd.T, Vn.T, Vm.T (3) >u
6331 0q0 01110 size 1 m 001101 n d CMGT Vd.T, Vn.T, Vm.T (4) >s
6332
6333 0q1 01110 size 1 m 001111 n d CMHS Vd.T, Vn.T, Vm.T (5) >=u
6334 0q0 01110 size 1 m 001111 n d CMGE Vd.T, Vn.T, Vm.T (6) >=s
6335
6336 0q1 01110 size 100000 100010 n d CMGE Vd.T, Vn.T, #0 (7) >=s 0
6337 0q0 01110 size 100000 100010 n d CMGT Vd.T, Vn.T, #0 (8) >s 0
6338
6339 0q1 01110 size 100000 100110 n d CMLE Vd.T, Vn.T, #0 (9) <=s 0
6340 0q0 01110 size 100000 100110 n d CMEQ Vd.T, Vn.T, #0 (10) == 0
6341
6342 0q0 01110 size 100000 101010 n d CMLT Vd.T, Vn.T, #0 (11) <s 0
6343 */
6344 if (INSN(31,31) == 0
6345 && INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) {
6346 Bool isQ = INSN(30,30) == 1;
6347 UInt bit29 = INSN(29,29);
6348 UInt szBlg2 = INSN(23,22);
6349 UInt mm = INSN(20,16);
6350 UInt b1510 = INSN(15,10);
6351 UInt nn = INSN(9,5);
6352 UInt dd = INSN(4,0);
6353 const IROp opsEQ[4]
6354 = { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
6355 const IROp opsGTS[4]
6356 = { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
6357 const IROp opsGTU[4]
6358 = { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
6359 Bool zeroHI = False;
6360 const HChar* arrSpec = "??";
6361 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2);
6362 UInt ix = 0;
6363 if (ok) {
6364 switch (b1510) {
6365 case BITS6(1,0,0,0,1,1): ix = bit29 ? 1 : 2; break;
6366 case BITS6(0,0,1,1,0,1): ix = bit29 ? 3 : 4; break;
6367 case BITS6(0,0,1,1,1,1): ix = bit29 ? 5 : 6; break;
6368 case BITS6(1,0,0,0,1,0):
6369 if (mm == 0) { ix = bit29 ? 7 : 8; }; break;
6370 case BITS6(1,0,0,1,1,0):
6371 if (mm == 0) { ix = bit29 ? 9 : 10; }; break;
6372 case BITS6(1,0,1,0,1,0):
6373 if (mm == 0 && bit29 == 0) { ix = 11; }; break;
6374 default: break;
6375 }
6376 }
6377 if (ix != 0) {
6378 vassert(ok && szBlg2 < 4);
6379 IRExpr* argL = getQReg128(nn);
6380 IRExpr* argR = (ix <= 6) ? getQReg128(mm) : mkV128(0x0000);
6381 IRExpr* res = NULL;
6382 /* Some useful identities:
6383 x > y can be expressed directly
6384 x < y == y > x
6385 x <= y == not (x > y)
6386 x >= y == not (y > x)
6387 */
6388 switch (ix) {
6389 case 1: res = binop(opsEQ[szBlg2], argL, argR); break;
6390 case 2: binop(opsEQ[szBlg2],
6391 binop(Iop_AndV128, argL, argR),
6392 mkV128(0x0000));
6393 break;
6394 case 3: res = binop(opsGTU[szBlg2], argL, argR); break;
6395 case 4: res = binop(opsGTS[szBlg2], argL, argR); break;
6396 case 5: res = unop(Iop_NotV128, binop(opsGTU[szBlg2], argR, argL));
6397 break;
6398 case 6: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL));
6399 break;
6400 case 7: res = unop(Iop_NotV128, binop(opsGTS[szBlg2], argR, argL));
6401 break;
6402 case 8: res = binop(opsGTS[szBlg2], argL, argR); break;
6403 case 9: res = unop(Iop_NotV128,
6404 binop(opsGTS[szBlg2], argL, argR));
6405 break;
6406 case 10: res = binop(opsEQ[szBlg2], argL, argR); break;
6407 case 11: res = binop(opsGTS[szBlg2], argR, argL); break;
6408 default: vassert(0);
6409 }
6410 vassert(res);
6411 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, res) : res);
6412 const HChar* nms[11] = { "eq", "tst", "hi", "gt", "hs", "ge",
6413 "ge", "gt", "le", "eq", "lt" };
6414 if (ix <= 6) {
6415 DIP("cm%s %s.%s, %s.%s, %s.%s\n", nms[ix-1],
6416 nameQReg128(dd), arrSpec,
6417 nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
6418 } else {
6419 DIP("cm%s %s.%s, %s.%s, #0\n", nms[ix-1],
6420 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
6421 }
6422 return True;
6423 }
6424 /* else fall through */
6425 }
6426
6427 /* -------------- {EOR,BSL,BIT,BIF} (vector) -------------- */
6428 /* 31 28 23 20 15 9 4
6429 0q1 01110 00 1 m 000111 n d EOR Vd.T, Vm.T, Vn.T
6430 0q1 01110 01 1 m 000111 n d BSL Vd.T, Vm.T, Vn.T
6431 0q1 01110 10 1 m 000111 n d BIT Vd.T, Vm.T, Vn.T
6432 0q1 01110 11 1 m 000111 n d BIF Vd.T, Vm.T, Vn.T
6433 */
6434 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0)
6435 && INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
6436 Bool isQ = INSN(30,30) == 1;
6437 UInt op = INSN(23,22);
6438 UInt mm = INSN(20,16);
6439 UInt nn = INSN(9,5);
6440 UInt dd = INSN(4,0);
6441 IRTemp argD = newTemp(Ity_V128);
6442 IRTemp argN = newTemp(Ity_V128);
6443 IRTemp argM = newTemp(Ity_V128);
6444 assign(argD, getQReg128(dd));
6445 assign(argN, getQReg128(nn));
6446 assign(argM, getQReg128(mm));
6447 const IROp opXOR = Iop_XorV128;
6448 const IROp opAND = Iop_AndV128;
6449 const IROp opNOT = Iop_NotV128;
6450 IRExpr* res = NULL;
6451 switch (op) {
sewardj5860ec72014-03-01 11:19:45 +00006452 case BITS2(0,0): /* EOR */
sewardje520bb32014-02-17 11:00:53 +00006453 res = binop(opXOR, mkexpr(argM), mkexpr(argN));
6454 break;
sewardj5860ec72014-03-01 11:19:45 +00006455 case BITS2(0,1): /* BSL */
sewardje520bb32014-02-17 11:00:53 +00006456 res = binop(opXOR, mkexpr(argM),
6457 binop(opAND,
6458 binop(opXOR, mkexpr(argM), mkexpr(argN)),
6459 mkexpr(argD)));
6460 break;
sewardj5860ec72014-03-01 11:19:45 +00006461 case BITS2(1,0): /* BIT */
sewardje520bb32014-02-17 11:00:53 +00006462 res = binop(opXOR, mkexpr(argD),
6463 binop(opAND,
6464 binop(opXOR, mkexpr(argD), mkexpr(argN)),
6465 mkexpr(argM)));
6466 break;
sewardj5860ec72014-03-01 11:19:45 +00006467 case BITS2(1,1): /* BIF */
sewardje520bb32014-02-17 11:00:53 +00006468 res = binop(opXOR, mkexpr(argD),
6469 binop(opAND,
6470 binop(opXOR, mkexpr(argD), mkexpr(argN)),
6471 unop(opNOT, mkexpr(argM))));
6472 break;
6473 default:
6474 vassert(0);
6475 }
6476 vassert(res);
6477 putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res));
6478 const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
6479 const HChar* arr = isQ ? "16b" : "8b";
6480 vassert(op < 4);
6481 DIP("%s %s.%s, %s.%s, %s.%s\n", nms[op],
6482 nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
6483 return True;
6484 }
6485
sewardj32d86752014-03-02 12:47:18 +00006486 /* ------------ {USHR,SSHR,SHL} (vector, immediate) ------------ */
sewardje520bb32014-02-17 11:00:53 +00006487 /* 31 28 22 18 15 9 4
sewardj32d86752014-03-02 12:47:18 +00006488 0q1 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #shift (1)
6489 0q0 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #shift (2)
6490 0q0 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #shift (3)
sewardje520bb32014-02-17 11:00:53 +00006491 laneTy, shift = case immh:immb of
sewardj32d86752014-03-02 12:47:18 +00006492 0001:xxx -> B, SHR:8-xxx, SHL:xxx
6493 001x:xxx -> H, SHR:16-xxxx SHL:xxxx
6494 01xx:xxx -> S, SHR:32-xxxxx SHL:xxxxx
6495 1xxx:xxx -> D, SHR:64-xxxxxx SHL:xxxxxx
sewardje520bb32014-02-17 11:00:53 +00006496 other -> invalid
6497 As usual the case laneTy==D && q==0 is not allowed.
6498 */
6499 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0)
sewardj32d86752014-03-02 12:47:18 +00006500 && INSN(10,10) == 1) {
6501 UInt ix = 0;
6502 /**/ if (INSN(29,29) == 1 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 1;
6503 else if (INSN(29,29) == 0 && INSN(15,11) == BITS5(0,0,0,0,0)) ix = 2;
6504 else if (INSN(29,29) == 0 && INSN(15,11) == BITS5(0,1,0,1,0)) ix = 3;
6505 if (ix > 0) {
6506 Bool isQ = INSN(30,30) == 1;
6507 UInt immh = INSN(22,19);
6508 UInt immb = INSN(18,16);
6509 UInt nn = INSN(9,5);
6510 UInt dd = INSN(4,0);
6511 const IROp opsSHRN[4]
6512 = { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
6513 const IROp opsSARN[4]
6514 = { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
6515 const IROp opsSHLN[4]
6516 = { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
6517 UInt szBlg2 = 0;
6518 UInt shift = 0;
6519 Bool ok = getLaneInfo_IMMH_IMMB(&shift, &szBlg2, immh, immb);
6520 if (ix == 3) {
6521 /* The shift encoding has opposite sign for the leftwards
6522 case. Adjust shift to compensate. */
6523 shift = (8 << szBlg2) - shift;
6524 }
6525 if (ok && szBlg2 < 4 && shift > 0 && shift < (8 << szBlg2)
6526 && !(szBlg2 == 3/*64bit*/ && !isQ)) {
6527 IROp op = Iop_INVALID;
6528 const HChar* nm = NULL;
6529 switch (ix) {
6530 case 1: op = opsSHRN[szBlg2]; nm = "ushr"; break;
6531 case 2: op = opsSARN[szBlg2]; nm = "sshr"; break;
6532 case 3: op = opsSHLN[szBlg2]; nm = "shl"; break;
6533 default: vassert(0);
6534 }
6535 IRExpr* src = getQReg128(nn);
6536 IRExpr* res = binop(op, src, mkU8(shift));
6537 putQReg128(dd, isQ ? res : unop(Iop_ZeroHI64ofV128, res));
6538 HChar laneCh = "bhsd"[szBlg2];
6539 UInt nLanes = (isQ ? 128 : 64) / (8 << szBlg2);
6540 DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
6541 nameQReg128(dd), nLanes, laneCh,
6542 nameQReg128(nn), nLanes, laneCh, shift);
6543 return True;
6544 }
6545 /* else fall through */
sewardje520bb32014-02-17 11:00:53 +00006546 }
sewardje520bb32014-02-17 11:00:53 +00006547 }
6548
6549 /* -------------------- {U,S}SHLL{,2} -------------------- */
6550 /* 31 28 22 18 15 9 4
6551 0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
6552 0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
6553 where Ta,Tb,sh
sewardj5860ec72014-03-01 11:19:45 +00006554 = case immh of 1xxx -> invalid
sewardje520bb32014-02-17 11:00:53 +00006555 01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
sewardj5860ec72014-03-01 11:19:45 +00006556 001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
6557 0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
6558 0000 -> AdvSIMD modified immediate (???)
sewardje520bb32014-02-17 11:00:53 +00006559 */
6560 if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,1,0)
6561 && INSN(15,10) == BITS6(1,0,1,0,0,1)) {
6562 Bool isQ = INSN(30,30) == 1;
6563 Bool isU = INSN(29,29) == 1;
6564 UInt immh = INSN(22,19);
6565 UInt immb = INSN(18,16);
6566 UInt nn = INSN(9,5);
6567 UInt dd = INSN(4,0);
6568 UInt immhb = (immh << 3) | immb;
6569 IRTemp src = newTemp(Ity_V128);
6570 IRTemp zero = newTemp(Ity_V128);
6571 IRExpr* res = NULL;
6572 UInt sh = 0;
6573 const HChar* ta = "??";
6574 const HChar* tb = "??";
6575 assign(src, getQReg128(nn));
6576 assign(zero, mkV128(0x0000));
sewardj5860ec72014-03-01 11:19:45 +00006577 if (immh & 8) {
6578 /* invalid; don't assign to res */
6579 }
6580 else if (immh & 4) {
6581 sh = immhb - 32;
6582 vassert(sh < 32); /* so 32-sh is 1..32 */
6583 ta = "2d";
6584 tb = isQ ? "4s" : "2s";
6585 IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
6586 : mk_InterleaveLO32x4(src, zero);
6587 res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
sewardje520bb32014-02-17 11:00:53 +00006588 }
6589 else if (immh & 2) {
6590 sh = immhb - 16;
6591 vassert(sh < 16); /* so 16-sh is 1..16 */
6592 ta = "4s";
6593 tb = isQ ? "8h" : "4h";
6594 IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
6595 : mk_InterleaveLO16x8(src, zero);
6596 res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
6597 }
sewardj5860ec72014-03-01 11:19:45 +00006598 else if (immh & 1) {
6599 sh = immhb - 8;
6600 vassert(sh < 8); /* so 8-sh is 1..8 */
6601 ta = "8h";
6602 tb = isQ ? "16b" : "8b";
6603 IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
6604 : mk_InterleaveLO8x16(src, zero);
6605 res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
6606 } else {
6607 vassert(immh == 0);
6608 /* invalid; don't assign to res */
sewardje520bb32014-02-17 11:00:53 +00006609 }
6610 /* */
6611 if (res) {
6612 putQReg128(dd, res);
6613 DIP("%cshll%s %s.%s, %s.%s, #%d\n",
6614 isU ? 'u' : 's', isQ ? "2" : "",
6615 nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
6616 return True;
6617 }
6618 /* else fall through */
6619 }
6620
sewardj606c4ba2014-01-26 19:11:14 +00006621 /* -------------------- XTN{,2} -------------------- */
sewardjecde6972014-02-05 11:01:19 +00006622 /* 31 28 23 21 15 9 4 XTN{,2} Vd.Tb, Vn.Ta
sewardj606c4ba2014-01-26 19:11:14 +00006623 0q0 01110 size 100001 001010 n d
6624 */
6625 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
6626 && INSN(21,16) == BITS6(1,0,0,0,0,1)
6627 && INSN(15,10) == BITS6(0,0,1,0,1,0)) {
6628 Bool isQ = INSN(30,30) == 1;
6629 UInt size = INSN(23,22);
6630 UInt nn = INSN(9,5);
6631 UInt dd = INSN(4,0);
6632 IROp op = Iop_INVALID;
6633 const HChar* tb = NULL;
6634 const HChar* ta = NULL;
6635 switch ((size << 1) | (isQ ? 1 : 0)) {
6636 case 0: tb = "8b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break;
6637 case 1: tb = "16b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break;
6638 case 2: tb = "4h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break;
6639 case 3: tb = "8h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break;
6640 case 4: tb = "2s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break;
6641 case 5: tb = "4s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break;
6642 case 6: break;
6643 case 7: break;
6644 default: vassert(0);
6645 }
6646 if (op != Iop_INVALID) {
6647 if (!isQ) {
6648 putQRegLane(dd, 1, mkU64(0));
6649 }
6650 putQRegLane(dd, isQ ? 1 : 0, unop(op, getQReg128(nn)));
6651 DIP("xtn%s %s.%s, %s.%s\n", isQ ? "2" : "",
6652 nameQReg128(dd), tb, nameQReg128(nn), ta);
6653 return True;
6654 }
6655 /* else fall through */
6656 }
6657
6658 /* ---------------- DUP (element, vector) ---------------- */
6659 /* 31 28 20 15 9 4
6660 0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
6661 */
6662 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
6663 && INSN(15,10) == BITS6(0,0,0,0,0,1)) {
6664 Bool isQ = INSN(30,30) == 1;
6665 UInt imm5 = INSN(20,16);
6666 UInt nn = INSN(9,5);
6667 UInt dd = INSN(4,0);
6668 IRTemp w0 = newTemp(Ity_I64);
6669 const HChar* arT = "??";
6670 const HChar* arTs = "??";
6671 IRType laneTy = Ity_INVALID;
6672 UInt laneNo = 16; /* invalid */
6673 if (imm5 & 1) {
6674 arT = isQ ? "16b" : "8b";
6675 arTs = "b";
6676 laneNo = (imm5 >> 1) & 15;
6677 laneTy = Ity_I8;
6678 assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
6679 }
6680 else if (imm5 & 2) {
6681 arT = isQ ? "8h" : "4h";
6682 arTs = "h";
6683 laneNo = (imm5 >> 2) & 7;
6684 laneTy = Ity_I16;
6685 assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
6686 }
6687 else if (imm5 & 4) {
6688 arT = isQ ? "4s" : "2s";
6689 arTs = "s";
6690 laneNo = (imm5 >> 3) & 3;
6691 laneTy = Ity_I32;
6692 assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
6693 }
6694 else if ((imm5 & 8) && isQ) {
6695 arT = "2d";
6696 arTs = "d";
6697 laneNo = (imm5 >> 4) & 1;
6698 laneTy = Ity_I64;
6699 assign(w0, getQRegLane(nn, laneNo, laneTy));
6700 }
6701 else {
6702 /* invalid; leave laneTy unchanged. */
6703 }
6704 /* */
6705 if (laneTy != Ity_INVALID) {
6706 vassert(laneNo < 16);
6707 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
6708 putQReg128(dd, binop(Iop_64HLtoV128,
6709 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
6710 DIP("dup %s.%s, %s.%s[%u]\n",
6711 nameQReg128(dd), arT, nameQReg128(nn), arTs, laneNo);
6712 return True;
6713 }
6714 /* else fall through */
6715 }
6716
sewardjecde6972014-02-05 11:01:19 +00006717 /* ---------------- DUP (general, vector) ---------------- */
6718 /* 31 28 23 20 15 9 4
6719 0q0 01110 000 imm5 000011 n d DUP Vd.T, Rn
6720 Q=0 writes 64, Q=1 writes 128
6721 imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
6722 xxx10 4H(q=0) or 8H(q=1), R=W
6723 xx100 2S(q=0) or 4S(q=1), R=W
6724 x1000 Invalid(q=0) or 2D(q=1), R=X
6725 x0000 Invalid(q=0) or Invalid(q=1)
6726 */
6727 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
6728 && INSN(15,10) == BITS6(0,0,0,0,1,1)) {
6729 Bool isQ = INSN(30,30) == 1;
6730 UInt imm5 = INSN(20,16);
6731 UInt nn = INSN(9,5);
6732 UInt dd = INSN(4,0);
6733 IRTemp w0 = newTemp(Ity_I64);
6734 const HChar* arT = "??";
6735 IRType laneTy = Ity_INVALID;
6736 if (imm5 & 1) {
6737 arT = isQ ? "16b" : "8b";
6738 laneTy = Ity_I8;
6739 assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
6740 }
6741 else if (imm5 & 2) {
6742 arT = isQ ? "8h" : "4h";
6743 laneTy = Ity_I16;
6744 assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
6745 }
6746 else if (imm5 & 4) {
6747 arT = isQ ? "4s" : "2s";
6748 laneTy = Ity_I32;
6749 assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
6750 }
6751 else if ((imm5 & 8) && isQ) {
6752 arT = "2d";
6753 laneTy = Ity_I64;
6754 assign(w0, getIReg64orZR(nn));
6755 }
6756 else {
6757 /* invalid; leave laneTy unchanged. */
6758 }
6759 /* */
6760 if (laneTy != Ity_INVALID) {
6761 IRTemp w1 = math_DUP_TO_64(w0, laneTy);
6762 putQReg128(dd, binop(Iop_64HLtoV128,
6763 isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
6764 DIP("dup %s.%s, %s\n",
6765 nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
6766 return True;
6767 }
6768 /* else fall through */
6769 }
6770
sewardjf5b08912014-02-06 12:57:58 +00006771 /* ---------------------- {S,U}MOV ---------------------- */
6772 /* 31 28 20 15 9 4
6773 0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
6774 0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
6775 dest is Xd when q==1, Wd when q==0
6776 UMOV:
6777 Ts,index,ops = case q:imm5 of
6778 0:xxxx1 -> B, xxxx, 8Uto64
6779 1:xxxx1 -> invalid
6780 0:xxx10 -> H, xxx, 16Uto64
6781 1:xxx10 -> invalid
6782 0:xx100 -> S, xx, 32Uto64
6783 1:xx100 -> invalid
6784 1:x1000 -> D, x, copy64
6785 other -> invalid
6786 SMOV:
6787 Ts,index,ops = case q:imm5 of
6788 0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
6789 1:xxxx1 -> B, xxxx, 8Sto64
6790 0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
6791 1:xxx10 -> H, xxx, 16Sto64
6792 0:xx100 -> invalid
6793 1:xx100 -> S, xx, 32Sto64
6794 1:x1000 -> invalid
6795 other -> invalid
6796 */
6797 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
6798 && (INSN(15,10) & BITS6(1,1,1,0,1,1)) == BITS6(0,0,1,0,1,1)) {
6799 UInt bitQ = INSN(30,30) == 1;
6800 UInt imm5 = INSN(20,16);
6801 UInt nn = INSN(9,5);
6802 UInt dd = INSN(4,0);
6803 Bool isU = INSN(12,12) == 1;
6804 const HChar* arTs = "??";
6805 UInt laneNo = 16; /* invalid */
6806 // Setting 'res' to non-NULL determines valid/invalid
6807 IRExpr* res = NULL;
6808 if (!bitQ && (imm5 & 1)) { // 0:xxxx1
6809 laneNo = (imm5 >> 1) & 15;
6810 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
6811 res = isU ? unop(Iop_8Uto64, lane)
6812 : unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
6813 arTs = "b";
6814 }
6815 else if (bitQ && (imm5 & 1)) { // 1:xxxx1
6816 laneNo = (imm5 >> 1) & 15;
6817 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
6818 res = isU ? NULL
6819 : unop(Iop_8Sto64, lane);
6820 arTs = "b";
6821 }
6822 else if (!bitQ && (imm5 & 2)) { // 0:xxx10
6823 laneNo = (imm5 >> 2) & 7;
6824 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
6825 res = isU ? unop(Iop_16Uto64, lane)
6826 : unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
6827 arTs = "h";
6828 }
6829 else if (bitQ && (imm5 & 2)) { // 1:xxx10
6830 laneNo = (imm5 >> 2) & 7;
6831 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
6832 res = isU ? NULL
6833 : unop(Iop_16Sto64, lane);
6834 arTs = "h";
6835 }
6836 else if (!bitQ && (imm5 & 4)) { // 0:xx100
6837 laneNo = (imm5 >> 3) & 3;
6838 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
6839 res = isU ? unop(Iop_32Uto64, lane)
6840 : NULL;
6841 arTs = "s";
6842 }
6843 else if (bitQ && (imm5 & 4)) { // 1:xxx10
6844 laneNo = (imm5 >> 3) & 3;
6845 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
6846 res = isU ? NULL
6847 : unop(Iop_32Sto64, lane);
6848 arTs = "s";
6849 }
6850 else if (bitQ && (imm5 & 8)) { // 1:x1000
6851 laneNo = (imm5 >> 4) & 1;
6852 IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
6853 res = isU ? lane
6854 : NULL;
6855 arTs = "d";
6856 }
6857 /* */
6858 if (res) {
6859 vassert(laneNo < 16);
6860 putIReg64orZR(dd, res);
6861 DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
6862 nameIRegOrZR(bitQ == 1, dd),
6863 nameQReg128(nn), arTs, laneNo);
6864 return True;
6865 }
6866 /* else fall through */
6867 }
6868
sewardje520bb32014-02-17 11:00:53 +00006869 /* -------------------- INS (general) -------------------- */
6870 /* 31 28 20 15 9 4
6871 010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
6872 where Ts,ix = case imm5 of xxxx1 -> B, xxxx
6873 xxx10 -> H, xxx
6874 xx100 -> S, xx
6875 x1000 -> D, x
6876 */
6877 if (INSN(31,21) == BITS11(0,1,0,0,1,1,1,0,0,0,0)
6878 && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
6879 UInt imm5 = INSN(20,16);
6880 UInt nn = INSN(9,5);
6881 UInt dd = INSN(4,0);
6882 HChar ts = '?';
6883 UInt laneNo = 16;
6884 IRExpr* src = NULL;
6885 if (imm5 & 1) {
6886 src = unop(Iop_64to8, getIReg64orZR(nn));
6887 laneNo = (imm5 >> 1) & 15;
6888 ts = 'b';
6889 }
6890 else if (imm5 & 2) {
6891 src = unop(Iop_64to16, getIReg64orZR(nn));
6892 laneNo = (imm5 >> 2) & 7;
6893 ts = 'h';
6894 }
6895 else if (imm5 & 4) {
6896 src = unop(Iop_64to32, getIReg64orZR(nn));
6897 laneNo = (imm5 >> 3) & 3;
6898 ts = 's';
6899 }
6900 else if (imm5 & 8) {
6901 src = getIReg64orZR(nn);
6902 laneNo = (imm5 >> 4) & 1;
6903 ts = 'd';
6904 }
6905 /* */
6906 if (src) {
6907 vassert(laneNo < 16);
6908 putQRegLane(dd, laneNo, src);
6909 DIP("ins %s.%c[%u], %s\n",
6910 nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
6911 return True;
6912 }
6913 /* else invalid; fall through */
6914 }
6915
sewardj32d86752014-03-02 12:47:18 +00006916 /* -------------------- NEG (vector) -------------------- */
6917 /* 31 28 23 21 16 9 4
6918 0q1 01110 sz 10000 0101110 n d NEG Vd, Vn
6919 sz is laneSz, q:sz == 011 is disallowed, as usual
6920 */
6921 if (INSN(31,31) == 0 && INSN(29,24) == BITS6(1,0,1,1,1,0)
6922 && INSN(21,10) == BITS12(1,0,0,0,0,0,1,0,1,1,1,0)) {
6923 Bool isQ = INSN(30,30) == 1;
6924 UInt szBlg2 = INSN(23,22);
6925 UInt nn = INSN(9,5);
6926 UInt dd = INSN(4,0);
6927 Bool zeroHI = False;
6928 const HChar* arrSpec = "";
6929 Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
6930 if (ok) {
6931 const IROp opSUB[4]
6932 = { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
6933 IRTemp res = newTemp(Ity_V128);
6934 vassert(szBlg2 < 4);
6935 assign(res, binop(opSUB[szBlg2], mkV128(0x0000), getQReg128(nn)));
6936 putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(res))
6937 : mkexpr(res));
6938 DIP("neg %s.%s, %s.%s\n",
6939 nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
6940 return True;
6941 }
6942 /* else fall through */
6943 }
6944
sewardj92d0ae32014-04-03 13:48:54 +00006945 /* -------------------- TBL, TBX -------------------- */
6946 /* 31 28 20 15 14 12 9 4
6947 0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
6948 0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
6949 where Ta = 16b(q=1) or 8b(q=0)
6950 */
6951 if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
6952 && INSN(15,15) == 0 && INSN(11,10) == BITS2(0,0)) {
6953 Bool isQ = INSN(30,30) == 1;
6954 Bool isTBX = INSN(12,12) == 1;
6955 UInt mm = INSN(20,16);
6956 UInt len = INSN(14,13);
6957 UInt nn = INSN(9,5);
6958 UInt dd = INSN(4,0);
6959 /* The out-of-range values to use. */
6960 IRTemp oor_values = newTemp(Ity_V128);
6961 assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
6962 /* src value */
6963 IRTemp src = newTemp(Ity_V128);
6964 assign(src, getQReg128(mm));
6965 /* The table values */
6966 IRTemp tab[4];
6967 UInt i;
6968 for (i = 0; i <= len; i++) {
6969 vassert(i < 4);
6970 tab[i] = newTemp(Ity_V128);
6971 assign(tab[i], getQReg128((nn + i) % 32));
6972 }
6973 IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
6974 putQReg128(dd, isQ ? mkexpr(res)
6975 : unop(Iop_ZeroHI64ofV128, mkexpr(res)) );
6976 const HChar* Ta = isQ ? "16b" : "8b";
6977 const HChar* nm = isTBX ? "tbx" : "tbl";
6978 DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
6979 nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
6980 return True;
6981 }
sewardjbbcf1882014-01-12 12:49:10 +00006982 /* FIXME Temporary hacks to get through ld.so FIXME */
6983
6984 /* ------------------ movi vD.4s, #0x0 ------------------ */
6985 /* 0x4F 0x00 0x04 000 vD */
6986 if ((insn & 0xFFFFFFE0) == 0x4F000400) {
6987 UInt vD = INSN(4,0);
6988 putQReg128(vD, mkV128(0x0000));
6989 DIP("movi v%u.4s, #0x0\n", vD);
6990 return True;
6991 }
6992
sewardjbbcf1882014-01-12 12:49:10 +00006993 /* ---------------- MOV vD.16b, vN.16b ---------------- */
6994 /* 31 23 20 15 9 4
6995 010 01110 101 m 000111 n d ORR vD.16b, vN.16b, vM.16b
6996 This only handles the N == M case.
6997 */
6998 if (INSN(31,24) == BITS8(0,1,0,0,1,1,1,0)
6999 && INSN(23,21) == BITS3(1,0,1) && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
7000 UInt mm = INSN(20,16);
7001 UInt nn = INSN(9,5);
7002 UInt dd = INSN(4,0);
7003 if (mm == nn) {
7004 putQReg128(dd, getQReg128(nn));
7005 DIP("mov v%u.16b, v%u.16b\n", dd, nn);
7006 return True;
7007 }
7008 /* else it's really an ORR; fall through. */
7009 }
7010
7011 vex_printf("ARM64 front end: simd_and_fp\n");
7012 return False;
7013# undef INSN
7014}
7015
7016
7017/*------------------------------------------------------------*/
7018/*--- Disassemble a single ARM64 instruction ---*/
7019/*------------------------------------------------------------*/
7020
7021/* Disassemble a single ARM64 instruction into IR. The instruction
7022 has is located at |guest_instr| and has guest IP of
7023 |guest_PC_curr_instr|, which will have been set before the call
7024 here. Returns True iff the instruction was decoded, in which case
7025 *dres will be set accordingly, or False, in which case *dres should
7026 be ignored by the caller. */
7027
7028static
7029Bool disInstr_ARM64_WRK (
7030 /*MB_OUT*/DisResult* dres,
7031 Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
7032 Bool resteerCisOk,
7033 void* callback_opaque,
7034 UChar* guest_instr,
7035 VexArchInfo* archinfo,
7036 VexAbiInfo* abiinfo
7037 )
7038{
7039 // A macro to fish bits out of 'insn'.
7040# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
7041
7042//ZZ DisResult dres;
7043//ZZ UInt insn;
7044//ZZ //Bool allow_VFP = False;
7045//ZZ //UInt hwcaps = archinfo->hwcaps;
7046//ZZ IRTemp condT; /* :: Ity_I32 */
7047//ZZ UInt summary;
7048//ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
7049//ZZ
7050//ZZ /* What insn variants are we supporting today? */
7051//ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
7052//ZZ // etc etc
7053
7054 /* Set result defaults. */
7055 dres->whatNext = Dis_Continue;
7056 dres->len = 4;
7057 dres->continueAt = 0;
7058 dres->jk_StopHere = Ijk_INVALID;
7059
7060 /* At least this is simple on ARM64: insns are all 4 bytes long, and
7061 4-aligned. So just fish the whole thing out of memory right now
7062 and have done. */
7063 UInt insn = getUIntLittleEndianly( guest_instr );
7064
7065 if (0) vex_printf("insn: 0x%x\n", insn);
7066
7067 DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
7068
7069 vassert(0 == (guest_PC_curr_instr & 3ULL));
7070
7071 /* ----------------------------------------------------------- */
7072
7073 /* Spot "Special" instructions (see comment at top of file). */
7074 {
7075 UChar* code = (UChar*)guest_instr;
7076 /* Spot the 16-byte preamble:
7077 93CC0D8C ror x12, x12, #3
7078 93CC358C ror x12, x12, #13
7079 93CCCD8C ror x12, x12, #51
7080 93CCF58C ror x12, x12, #61
7081 */
7082 UInt word1 = 0x93CC0D8C;
7083 UInt word2 = 0x93CC358C;
7084 UInt word3 = 0x93CCCD8C;
7085 UInt word4 = 0x93CCF58C;
7086 if (getUIntLittleEndianly(code+ 0) == word1 &&
7087 getUIntLittleEndianly(code+ 4) == word2 &&
7088 getUIntLittleEndianly(code+ 8) == word3 &&
7089 getUIntLittleEndianly(code+12) == word4) {
7090 /* Got a "Special" instruction preamble. Which one is it? */
7091 if (getUIntLittleEndianly(code+16) == 0xAA0A014A
7092 /* orr x10,x10,x10 */) {
7093 /* X3 = client_request ( X4 ) */
7094 DIP("x3 = client_request ( x4 )\n");
7095 putPC(mkU64( guest_PC_curr_instr + 20 ));
7096 dres->jk_StopHere = Ijk_ClientReq;
7097 dres->whatNext = Dis_StopHere;
7098 return True;
7099 }
7100 else
7101 if (getUIntLittleEndianly(code+16) == 0xAA0B016B
7102 /* orr x11,x11,x11 */) {
7103 /* X3 = guest_NRADDR */
7104 DIP("x3 = guest_NRADDR\n");
7105 dres->len = 20;
7106 putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
7107 return True;
7108 }
7109 else
7110 if (getUIntLittleEndianly(code+16) == 0xAA0C018C
7111 /* orr x12,x12,x12 */) {
7112 /* branch-and-link-to-noredir X8 */
7113 DIP("branch-and-link-to-noredir x8\n");
7114 putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
7115 putPC(getIReg64orZR(8));
7116 dres->jk_StopHere = Ijk_NoRedir;
7117 dres->whatNext = Dis_StopHere;
7118 return True;
7119 }
7120 else
7121 if (getUIntLittleEndianly(code+16) == 0xAA090129
7122 /* orr x9,x9,x9 */) {
7123 /* IR injection */
7124 DIP("IR injection\n");
7125 vex_inject_ir(irsb, Iend_LE);
7126 // Invalidate the current insn. The reason is that the IRop we're
7127 // injecting here can change. In which case the translation has to
7128 // be redone. For ease of handling, we simply invalidate all the
7129 // time.
7130 stmt(IRStmt_Put(OFFB_TISTART, mkU64(guest_PC_curr_instr)));
7131 stmt(IRStmt_Put(OFFB_TILEN, mkU64(20)));
7132 putPC(mkU64( guest_PC_curr_instr + 20 ));
7133 dres->whatNext = Dis_StopHere;
7134 dres->jk_StopHere = Ijk_TInval;
7135 return True;
7136 }
7137 /* We don't know what it is. */
7138 return False;
7139 /*NOTREACHED*/
7140 }
7141 }
7142
7143 /* ----------------------------------------------------------- */
7144
7145 /* Main ARM64 instruction decoder starts here. */
7146
7147 Bool ok = False;
7148
7149 /* insn[28:25] determines the top-level grouping, so let's start
7150 off with that.
7151
7152 For all of these dis_ARM64_ functions, we pass *dres with the
7153 normal default results "insn OK, 4 bytes long, keep decoding" so
7154 they don't need to change it. However, decodes of control-flow
7155 insns may cause *dres to change.
7156 */
7157 switch (INSN(28,25)) {
7158 case BITS4(1,0,0,0): case BITS4(1,0,0,1):
7159 // Data processing - immediate
7160 ok = dis_ARM64_data_processing_immediate(dres, insn);
7161 break;
7162 case BITS4(1,0,1,0): case BITS4(1,0,1,1):
7163 // Branch, exception generation and system instructions
7164 ok = dis_ARM64_branch_etc(dres, insn);
7165 break;
7166 case BITS4(0,1,0,0): case BITS4(0,1,1,0):
7167 case BITS4(1,1,0,0): case BITS4(1,1,1,0):
7168 // Loads and stores
7169 ok = dis_ARM64_load_store(dres, insn);
7170 break;
7171 case BITS4(0,1,0,1): case BITS4(1,1,0,1):
7172 // Data processing - register
7173 ok = dis_ARM64_data_processing_register(dres, insn);
7174 break;
7175 case BITS4(0,1,1,1): case BITS4(1,1,1,1):
7176 // Data processing - SIMD and floating point
7177 ok = dis_ARM64_simd_and_fp(dres, insn);
7178 break;
7179 case BITS4(0,0,0,0): case BITS4(0,0,0,1):
7180 case BITS4(0,0,1,0): case BITS4(0,0,1,1):
7181 // UNALLOCATED
7182 break;
7183 default:
7184 vassert(0); /* Can't happen */
7185 }
7186
7187 /* If the next-level down decoders failed, make sure |dres| didn't
7188 get changed. */
7189 if (!ok) {
7190 vassert(dres->whatNext == Dis_Continue);
7191 vassert(dres->len == 4);
7192 vassert(dres->continueAt == 0);
7193 vassert(dres->jk_StopHere == Ijk_INVALID);
7194 }
7195
7196 return ok;
7197
7198# undef INSN
7199}
7200
7201
7202/*------------------------------------------------------------*/
7203/*--- Top-level fn ---*/
7204/*------------------------------------------------------------*/
7205
7206/* Disassemble a single instruction into IR. The instruction
7207 is located in host memory at &guest_code[delta]. */
7208
7209DisResult disInstr_ARM64 ( IRSB* irsb_IN,
7210 Bool (*resteerOkFn) ( void*, Addr64 ),
7211 Bool resteerCisOk,
7212 void* callback_opaque,
7213 UChar* guest_code_IN,
7214 Long delta_IN,
7215 Addr64 guest_IP,
7216 VexArch guest_arch,
7217 VexArchInfo* archinfo,
7218 VexAbiInfo* abiinfo,
7219 Bool host_bigendian_IN,
7220 Bool sigill_diag_IN )
7221{
7222 DisResult dres;
7223 vex_bzero(&dres, sizeof(dres));
7224
7225 /* Set globals (see top of this file) */
7226 vassert(guest_arch == VexArchARM64);
7227
7228 irsb = irsb_IN;
7229 host_is_bigendian = host_bigendian_IN;
7230 guest_PC_curr_instr = (Addr64)guest_IP;
7231
7232 /* Try to decode */
7233 Bool ok = disInstr_ARM64_WRK( &dres,
7234 resteerOkFn, resteerCisOk, callback_opaque,
7235 (UChar*)&guest_code_IN[delta_IN],
7236 archinfo, abiinfo );
7237 if (ok) {
7238 /* All decode successes end up here. */
sewardjdc9259c2014-02-27 11:10:19 +00007239 vassert(dres.len == 4 || dres.len == 20);
sewardjbbcf1882014-01-12 12:49:10 +00007240 switch (dres.whatNext) {
7241 case Dis_Continue:
7242 putPC( mkU64(dres.len + guest_PC_curr_instr) );
7243 break;
7244 case Dis_ResteerU:
7245 case Dis_ResteerC:
7246 putPC(mkU64(dres.continueAt));
7247 break;
7248 case Dis_StopHere:
7249 break;
7250 default:
7251 vassert(0);
7252 }
7253 DIP("\n");
7254 } else {
7255 /* All decode failures end up here. */
7256 if (sigill_diag_IN) {
7257 Int i, j;
7258 UChar buf[64];
7259 UInt insn
7260 = getUIntLittleEndianly( (UChar*)&guest_code_IN[delta_IN] );
7261 vex_bzero(buf, sizeof(buf));
7262 for (i = j = 0; i < 32; i++) {
7263 if (i > 0) {
7264 if ((i & 7) == 0) buf[j++] = ' ';
7265 else if ((i & 3) == 0) buf[j++] = '\'';
7266 }
7267 buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
7268 }
7269 vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
7270 vex_printf("disInstr(arm64): %s\n", buf);
7271 }
7272
7273 /* Tell the dispatcher that this insn cannot be decoded, and so
7274 has not been executed, and (is currently) the next to be
7275 executed. PC should be up-to-date since it is made so at the
7276 start of each insn, but nevertheless be paranoid and update
7277 it again right now. */
7278 putPC( mkU64(guest_PC_curr_instr) );
7279 dres.whatNext = Dis_StopHere;
7280 dres.len = 0;
7281 dres.continueAt = 0;
7282 dres.jk_StopHere = Ijk_NoDecode;
7283 }
7284 return dres;
7285}
7286
sewardjecde6972014-02-05 11:01:19 +00007287////////////////////////////////////////////////////////////////////////
7288////////////////////////////////////////////////////////////////////////
7289
7290/* Spare code for doing reference implementations of various 128-bit
7291 SIMD interleaves/deinterleaves/concatenation ops. For 64-bit
7292 equivalents see the end of guest_arm_toIR.c. */
7293
7294////////////////////////////////////////////////////////////////
7295// 64x2 operations
7296//
7297static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 )
7298{
7299 // returns a0 b0
7300 return binop(Iop_64HLtoV128, unop(Iop_V128to64, mkexpr(a10)),
7301 unop(Iop_V128to64, mkexpr(b10)));
7302}
7303
7304static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 )
7305{
7306 // returns a1 b1
7307 return binop(Iop_64HLtoV128, unop(Iop_V128HIto64, mkexpr(a10)),
7308 unop(Iop_V128HIto64, mkexpr(b10)));
7309}
7310
7311
7312////////////////////////////////////////////////////////////////
7313// 32x4 operations
7314//
7315
7316// Split a 128 bit value into 4 32 bit ones, in 64-bit IRTemps with
7317// the top halves guaranteed to be zero.
7318static void breakV128to32s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
7319 IRTemp* out0, IRTemp v128 )
7320{
7321 if (out3) *out3 = newTemp(Ity_I64);
7322 if (out2) *out2 = newTemp(Ity_I64);
7323 if (out1) *out1 = newTemp(Ity_I64);
7324 if (out0) *out0 = newTemp(Ity_I64);
7325 IRTemp hi64 = newTemp(Ity_I64);
7326 IRTemp lo64 = newTemp(Ity_I64);
7327 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
7328 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) );
7329 if (out3) assign(*out3, binop(Iop_Shr64, mkexpr(hi64), mkU8(32)));
7330 if (out2) assign(*out2, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFFFFFF)));
7331 if (out1) assign(*out1, binop(Iop_Shr64, mkexpr(lo64), mkU8(32)));
7332 if (out0) assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFFFFFF)));
7333}
7334
7335// Make a V128 bit value from 4 32 bit ones, each of which is in a 64 bit
7336// IRTemp.
7337static IRTemp mkV128from32s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
7338{
7339 IRTemp hi64 = newTemp(Ity_I64);
7340 IRTemp lo64 = newTemp(Ity_I64);
7341 assign(hi64,
7342 binop(Iop_Or64,
7343 binop(Iop_Shl64, mkexpr(in3), mkU8(32)),
7344 binop(Iop_And64, mkexpr(in2), mkU64(0xFFFFFFFF))));
7345 assign(lo64,
7346 binop(Iop_Or64,
7347 binop(Iop_Shl64, mkexpr(in1), mkU8(32)),
7348 binop(Iop_And64, mkexpr(in0), mkU64(0xFFFFFFFF))));
7349 IRTemp res = newTemp(Ity_V128);
7350 assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64)));
7351 return res;
7352}
7353
7354static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 )
7355{
7356 // returns a2 a0 b2 b0
7357 IRTemp a2, a0, b2, b0;
7358 breakV128to32s(NULL, &a2, NULL, &a0, a3210);
7359 breakV128to32s(NULL, &b2, NULL, &b0, b3210);
7360 return mkexpr(mkV128from32s(a2, a0, b2, b0));
7361}
7362
7363static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 )
7364{
7365 // returns a3 a1 b3 b1
7366 IRTemp a3, a1, b3, b1;
7367 breakV128to32s(&a3, NULL, &a1, NULL, a3210);
7368 breakV128to32s(&b3, NULL, &b1, NULL, b3210);
7369 return mkexpr(mkV128from32s(a3, a1, b3, b1));
7370}
7371
sewardje520bb32014-02-17 11:00:53 +00007372static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 )
7373{
7374 // returns a1 b1 a0 b0
7375 IRTemp a1, a0, b1, b0;
7376 breakV128to32s(NULL, NULL, &a1, &a0, a3210);
7377 breakV128to32s(NULL, NULL, &b1, &b0, b3210);
7378 return mkexpr(mkV128from32s(a1, b1, a0, b0));
7379}
7380
7381static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 )
7382{
7383 // returns a3 b3 a2 b2
7384 IRTemp a3, a2, b3, b2;
7385 breakV128to32s(&a3, &a2, NULL, NULL, a3210);
7386 breakV128to32s(&b3, &b2, NULL, NULL, b3210);
7387 return mkexpr(mkV128from32s(a3, b3, a2, b2));
7388}
sewardjecde6972014-02-05 11:01:19 +00007389
7390////////////////////////////////////////////////////////////////
7391// 16x8 operations
7392//
7393
7394static void breakV128to16s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
7395 IRTemp* out4, IRTemp* out3, IRTemp* out2,
7396 IRTemp* out1,IRTemp* out0, IRTemp v128 )
7397{
7398 if (out7) *out7 = newTemp(Ity_I64);
7399 if (out6) *out6 = newTemp(Ity_I64);
7400 if (out5) *out5 = newTemp(Ity_I64);
7401 if (out4) *out4 = newTemp(Ity_I64);
7402 if (out3) *out3 = newTemp(Ity_I64);
7403 if (out2) *out2 = newTemp(Ity_I64);
7404 if (out1) *out1 = newTemp(Ity_I64);
7405 if (out0) *out0 = newTemp(Ity_I64);
7406 IRTemp hi64 = newTemp(Ity_I64);
7407 IRTemp lo64 = newTemp(Ity_I64);
7408 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
7409 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) );
7410 if (out7)
7411 assign(*out7, binop(Iop_And64,
7412 binop(Iop_Shr64, mkexpr(hi64), mkU8(48)),
7413 mkU64(0xFFFF)));
7414 if (out6)
7415 assign(*out6, binop(Iop_And64,
7416 binop(Iop_Shr64, mkexpr(hi64), mkU8(32)),
7417 mkU64(0xFFFF)));
7418 if (out5)
7419 assign(*out5, binop(Iop_And64,
7420 binop(Iop_Shr64, mkexpr(hi64), mkU8(16)),
7421 mkU64(0xFFFF)));
7422 if (out4)
7423 assign(*out4, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFF)));
7424 if (out3)
7425 assign(*out3, binop(Iop_And64,
7426 binop(Iop_Shr64, mkexpr(lo64), mkU8(48)),
7427 mkU64(0xFFFF)));
7428 if (out2)
7429 assign(*out2, binop(Iop_And64,
7430 binop(Iop_Shr64, mkexpr(lo64), mkU8(32)),
7431 mkU64(0xFFFF)));
7432 if (out1)
7433 assign(*out1, binop(Iop_And64,
7434 binop(Iop_Shr64, mkexpr(lo64), mkU8(16)),
7435 mkU64(0xFFFF)));
7436 if (out0)
7437 assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFF)));
7438}
7439
7440static IRTemp mkV128from16s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
7441 IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
7442{
7443 IRTemp hi64 = newTemp(Ity_I64);
7444 IRTemp lo64 = newTemp(Ity_I64);
7445 assign(hi64,
7446 binop(Iop_Or64,
7447 binop(Iop_Or64,
7448 binop(Iop_Shl64,
7449 binop(Iop_And64, mkexpr(in7), mkU64(0xFFFF)),
7450 mkU8(48)),
7451 binop(Iop_Shl64,
7452 binop(Iop_And64, mkexpr(in6), mkU64(0xFFFF)),
7453 mkU8(32))),
7454 binop(Iop_Or64,
7455 binop(Iop_Shl64,
7456 binop(Iop_And64, mkexpr(in5), mkU64(0xFFFF)),
7457 mkU8(16)),
7458 binop(Iop_And64,
7459 mkexpr(in4), mkU64(0xFFFF)))));
7460 assign(lo64,
7461 binop(Iop_Or64,
7462 binop(Iop_Or64,
7463 binop(Iop_Shl64,
7464 binop(Iop_And64, mkexpr(in3), mkU64(0xFFFF)),
7465 mkU8(48)),
7466 binop(Iop_Shl64,
7467 binop(Iop_And64, mkexpr(in2), mkU64(0xFFFF)),
7468 mkU8(32))),
7469 binop(Iop_Or64,
7470 binop(Iop_Shl64,
7471 binop(Iop_And64, mkexpr(in1), mkU64(0xFFFF)),
7472 mkU8(16)),
7473 binop(Iop_And64,
7474 mkexpr(in0), mkU64(0xFFFF)))));
7475 IRTemp res = newTemp(Ity_V128);
7476 assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64)));
7477 return res;
7478}
7479
7480static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 )
7481{
7482 // returns a6 a4 a2 a0 b6 b4 b2 b0
7483 IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
7484 breakV128to16s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
7485 breakV128to16s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
7486 return mkexpr(mkV128from16s(a6, a4, a2, a0, b6, b4, b2, b0));
7487}
7488
7489static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 )
7490{
7491 // returns a7 a5 a3 a1 b7 b5 b3 b1
7492 IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
7493 breakV128to16s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
7494 breakV128to16s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
7495 return mkexpr(mkV128from16s(a7, a5, a3, a1, b7, b5, b3, b1));
7496}
7497
sewardje520bb32014-02-17 11:00:53 +00007498static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 )
7499{
7500 // returns a3 b3 a2 b2 a1 b1 a0 b0
7501 IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
7502 breakV128to16s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
7503 breakV128to16s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
7504 return mkexpr(mkV128from16s(a3, b3, a2, b2, a1, b1, a0, b0));
7505}
7506
7507static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 )
7508{
7509 // returns a7 b7 a6 b6 a5 b5 a4 b4
7510 IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
7511 breakV128to16s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
7512 breakV128to16s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
7513 return mkexpr(mkV128from16s(a7, b7, a6, b6, a5, b5, a4, b4));
7514}
7515
sewardjfab09142014-02-10 10:28:13 +00007516////////////////////////////////////////////////////////////////
7517// 8x16 operations
7518//
7519
7520static void breakV128to8s ( IRTemp* outF, IRTemp* outE, IRTemp* outD,
7521 IRTemp* outC, IRTemp* outB, IRTemp* outA,
7522 IRTemp* out9, IRTemp* out8,
7523 IRTemp* out7, IRTemp* out6, IRTemp* out5,
7524 IRTemp* out4, IRTemp* out3, IRTemp* out2,
7525 IRTemp* out1,IRTemp* out0, IRTemp v128 )
7526{
7527 if (outF) *outF = newTemp(Ity_I64);
7528 if (outE) *outE = newTemp(Ity_I64);
7529 if (outD) *outD = newTemp(Ity_I64);
7530 if (outC) *outC = newTemp(Ity_I64);
7531 if (outB) *outB = newTemp(Ity_I64);
7532 if (outA) *outA = newTemp(Ity_I64);
7533 if (out9) *out9 = newTemp(Ity_I64);
7534 if (out8) *out8 = newTemp(Ity_I64);
7535 if (out7) *out7 = newTemp(Ity_I64);
7536 if (out6) *out6 = newTemp(Ity_I64);
7537 if (out5) *out5 = newTemp(Ity_I64);
7538 if (out4) *out4 = newTemp(Ity_I64);
7539 if (out3) *out3 = newTemp(Ity_I64);
7540 if (out2) *out2 = newTemp(Ity_I64);
7541 if (out1) *out1 = newTemp(Ity_I64);
7542 if (out0) *out0 = newTemp(Ity_I64);
7543 IRTemp hi64 = newTemp(Ity_I64);
7544 IRTemp lo64 = newTemp(Ity_I64);
7545 assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
7546 assign(lo64, unop(Iop_V128to64, mkexpr(v128)) );
7547 if (outF)
7548 assign(*outF, binop(Iop_And64,
7549 binop(Iop_Shr64, mkexpr(hi64), mkU8(56)),
7550 mkU64(0xFF)));
7551 if (outE)
7552 assign(*outE, binop(Iop_And64,
7553 binop(Iop_Shr64, mkexpr(hi64), mkU8(48)),
7554 mkU64(0xFF)));
7555 if (outD)
7556 assign(*outD, binop(Iop_And64,
7557 binop(Iop_Shr64, mkexpr(hi64), mkU8(40)),
7558 mkU64(0xFF)));
7559 if (outC)
7560 assign(*outC, binop(Iop_And64,
7561 binop(Iop_Shr64, mkexpr(hi64), mkU8(32)),
7562 mkU64(0xFF)));
7563 if (outB)
7564 assign(*outB, binop(Iop_And64,
7565 binop(Iop_Shr64, mkexpr(hi64), mkU8(24)),
7566 mkU64(0xFF)));
7567 if (outA)
7568 assign(*outA, binop(Iop_And64,
7569 binop(Iop_Shr64, mkexpr(hi64), mkU8(16)),
7570 mkU64(0xFF)));
7571 if (out9)
7572 assign(*out9, binop(Iop_And64,
7573 binop(Iop_Shr64, mkexpr(hi64), mkU8(8)),
7574 mkU64(0xFF)));
7575 if (out8)
7576 assign(*out8, binop(Iop_And64,
7577 binop(Iop_Shr64, mkexpr(hi64), mkU8(0)),
7578 mkU64(0xFF)));
7579 if (out7)
7580 assign(*out7, binop(Iop_And64,
7581 binop(Iop_Shr64, mkexpr(lo64), mkU8(56)),
7582 mkU64(0xFF)));
7583 if (out6)
7584 assign(*out6, binop(Iop_And64,
7585 binop(Iop_Shr64, mkexpr(lo64), mkU8(48)),
7586 mkU64(0xFF)));
7587 if (out5)
7588 assign(*out5, binop(Iop_And64,
7589 binop(Iop_Shr64, mkexpr(lo64), mkU8(40)),
7590 mkU64(0xFF)));
7591 if (out4)
7592 assign(*out4, binop(Iop_And64,
7593 binop(Iop_Shr64, mkexpr(lo64), mkU8(32)),
7594 mkU64(0xFF)));
7595 if (out3)
7596 assign(*out3, binop(Iop_And64,
7597 binop(Iop_Shr64, mkexpr(lo64), mkU8(24)),
7598 mkU64(0xFF)));
7599 if (out2)
7600 assign(*out2, binop(Iop_And64,
7601 binop(Iop_Shr64, mkexpr(lo64), mkU8(16)),
7602 mkU64(0xFF)));
7603 if (out1)
7604 assign(*out1, binop(Iop_And64,
7605 binop(Iop_Shr64, mkexpr(lo64), mkU8(8)),
7606 mkU64(0xFF)));
7607 if (out0)
7608 assign(*out0, binop(Iop_And64,
7609 binop(Iop_Shr64, mkexpr(lo64), mkU8(0)),
7610 mkU64(0xFF)));
7611}
7612
7613static IRTemp mkV128from8s ( IRTemp inF, IRTemp inE, IRTemp inD, IRTemp inC,
7614 IRTemp inB, IRTemp inA, IRTemp in9, IRTemp in8,
7615 IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
7616 IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
7617{
7618 IRTemp vFE = newTemp(Ity_I64);
7619 IRTemp vDC = newTemp(Ity_I64);
7620 IRTemp vBA = newTemp(Ity_I64);
7621 IRTemp v98 = newTemp(Ity_I64);
7622 IRTemp v76 = newTemp(Ity_I64);
7623 IRTemp v54 = newTemp(Ity_I64);
7624 IRTemp v32 = newTemp(Ity_I64);
7625 IRTemp v10 = newTemp(Ity_I64);
7626 assign(vFE, binop(Iop_Or64,
7627 binop(Iop_Shl64,
7628 binop(Iop_And64, mkexpr(inF), mkU64(0xFF)), mkU8(8)),
7629 binop(Iop_And64, mkexpr(inE), mkU64(0xFF))));
7630 assign(vDC, binop(Iop_Or64,
7631 binop(Iop_Shl64,
7632 binop(Iop_And64, mkexpr(inD), mkU64(0xFF)), mkU8(8)),
7633 binop(Iop_And64, mkexpr(inC), mkU64(0xFF))));
7634 assign(vBA, binop(Iop_Or64,
7635 binop(Iop_Shl64,
7636 binop(Iop_And64, mkexpr(inB), mkU64(0xFF)), mkU8(8)),
7637 binop(Iop_And64, mkexpr(inA), mkU64(0xFF))));
7638 assign(v98, binop(Iop_Or64,
7639 binop(Iop_Shl64,
7640 binop(Iop_And64, mkexpr(in9), mkU64(0xFF)), mkU8(8)),
7641 binop(Iop_And64, mkexpr(in8), mkU64(0xFF))));
7642 assign(v76, binop(Iop_Or64,
7643 binop(Iop_Shl64,
7644 binop(Iop_And64, mkexpr(in7), mkU64(0xFF)), mkU8(8)),
7645 binop(Iop_And64, mkexpr(in6), mkU64(0xFF))));
7646 assign(v54, binop(Iop_Or64,
7647 binop(Iop_Shl64,
7648 binop(Iop_And64, mkexpr(in5), mkU64(0xFF)), mkU8(8)),
7649 binop(Iop_And64, mkexpr(in4), mkU64(0xFF))));
7650 assign(v32, binop(Iop_Or64,
7651 binop(Iop_Shl64,
7652 binop(Iop_And64, mkexpr(in3), mkU64(0xFF)), mkU8(8)),
7653 binop(Iop_And64, mkexpr(in2), mkU64(0xFF))));
7654 assign(v10, binop(Iop_Or64,
7655 binop(Iop_Shl64,
7656 binop(Iop_And64, mkexpr(in1), mkU64(0xFF)), mkU8(8)),
7657 binop(Iop_And64, mkexpr(in0), mkU64(0xFF))));
7658 return mkV128from16s(vFE, vDC, vBA, v98, v76, v54, v32, v10);
7659}
7660
7661static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
7662 IRTemp bFEDCBA9876543210 )
7663{
7664 // returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
7665 IRTemp aE, aC, aA, a8, a6, a4, a2, a0, bE, bC, bA, b8, b6, b4, b2, b0;
7666 breakV128to8s(NULL, &aE, NULL, &aC, NULL, &aA, NULL, &a8,
7667 NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0,
7668 aFEDCBA9876543210);
7669 breakV128to8s(NULL, &bE, NULL, &bC, NULL, &bA, NULL, &b8,
7670 NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0,
7671 bFEDCBA9876543210);
7672 return mkexpr(mkV128from8s(aE, aC, aA, a8, a6, a4, a2, a0,
7673 bE, bC, bA, b8, b6, b4, b2, b0));
7674}
7675
7676static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
7677 IRTemp bFEDCBA9876543210 )
7678{
7679 // returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
7680 IRTemp aF, aD, aB, a9, a7, a5, a3, a1, bF, bD, bB, b9, b7, b5, b3, b1;
7681 breakV128to8s(&aF, NULL, &aD, NULL, &aB, NULL, &a9, NULL,
7682 &a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL,
7683 aFEDCBA9876543210);
7684
7685 breakV128to8s(&bF, NULL, &bD, NULL, &bB, NULL, &b9, NULL,
7686 &b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL,
7687 aFEDCBA9876543210);
7688
7689 return mkexpr(mkV128from8s(aF, aD, aB, a9, a7, a5, a3, a1,
7690 bF, bD, bB, b9, b7, b5, b3, b1));
7691}
7692
sewardje520bb32014-02-17 11:00:53 +00007693static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
7694 IRTemp bFEDCBA9876543210 )
7695{
7696 // returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
7697 IRTemp a7, b7, a6, b6, a5, b5, a4, b4, a3, b3, a2, b2, a1, b1, a0, b0;
7698 breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
7699 &a7, &a6, &a5, &a4, &a3, &a2, &a1, &a0,
7700 aFEDCBA9876543210);
7701 breakV128to8s(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
7702 &b7, &b6, &b5, &b4, &b3, &b2, &b1, &b0,
7703 bFEDCBA9876543210);
7704 return mkexpr(mkV128from8s(a7, b7, a6, b6, a5, b5, a4, b4,
7705 a3, b3, a2, b2, a1, b1, a0, b0));
7706}
7707
7708static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
7709 IRTemp bFEDCBA9876543210 )
7710{
7711 // returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
7712 IRTemp aF, bF, aE, bE, aD, bD, aC, bC, aB, bB, aA, bA, a9, b9, a8, b8;
7713 breakV128to8s(&aF, &aE, &aD, &aC, &aB, &aA, &a9, &a8,
7714 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
7715 aFEDCBA9876543210);
7716 breakV128to8s(&bF, &bE, &bD, &bC, &bB, &bA, &b9, &b8,
7717 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
7718 bFEDCBA9876543210);
7719 return mkexpr(mkV128from8s(aF, bF, aE, bE, aD, bD, aC, bC,
7720 aB, bB, aA, bA, a9, b9, a8, b8));
7721}
sewardjecde6972014-02-05 11:01:19 +00007722
sewardjbbcf1882014-01-12 12:49:10 +00007723/*--------------------------------------------------------------------*/
7724/*--- end guest_arm64_toIR.c ---*/
7725/*--------------------------------------------------------------------*/