Blame - priv/guest_arm64_toIR.c - platform/external/valgrind

blob: e30103c329a5b247655d6b265d23b3cd06031dba [file] [log] [blame]

sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1	/* -- mode: C; c-basic-offset: 3; -- */
				2
				3	/--------------------------------------------------------------------/
				4	/--- begin guest_arm64_toIR.c ---/
				5	/--------------------------------------------------------------------/
				6
				7	/*
				8	This file is part of Valgrind, a dynamic binary instrumentation
				9	framework.
				10
				11	Copyright (C) 2013-2013 OpenWorks
				12	info@open-works.net
				13
				14	This program is free software; you can redistribute it and/or
				15	modify it under the terms of the GNU General Public License as
				16	published by the Free Software Foundation; either version 2 of the
				17	License, or (at your option) any later version.
				18
				19	This program is distributed in the hope that it will be useful, but
				20	WITHOUT ANY WARRANTY; without even the implied warranty of
				21	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				22	General Public License for more details.
				23
				24	You should have received a copy of the GNU General Public License
				25	along with this program; if not, write to the Free Software
				26	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
				27	02110-1301, USA.
				28
				29	The GNU General Public License is contained in the file COPYING.
				30	*/
				31
				32	//ZZ /* XXXX thumb to check:
				33	//ZZ that all cases where putIRegT writes r15, we generate a jump.
				34	//ZZ
				35	//ZZ All uses of newTemp assign to an IRTemp and not a UInt
				36	//ZZ
				37	//ZZ For all thumb loads and stores, including VFP ones, new-ITSTATE is
				38	//ZZ backed out before the memory op, and restored afterwards. This
				39	//ZZ needs to happen even after we go uncond. (and for sure it doesn't
				40	//ZZ happen for VFP loads/stores right now).
				41	//ZZ
				42	//ZZ VFP on thumb: check that we exclude all r13/r15 cases that we
				43	//ZZ should.
				44	//ZZ
				45	//ZZ XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
				46	//ZZ taking into account the number of insns guarded by an IT.
				47	//ZZ
				48	//ZZ remove the nasty hack, in the spechelper, of looking for Or32(...,
				49	//ZZ 0xE0) in as the first arg to armg_calculate_condition, and instead
				50	//ZZ use Slice44 as specified in comments in the spechelper.
				51	//ZZ
				52	//ZZ add specialisations for armg_calculate_flag_c and _v, as they
				53	//ZZ are moderately often needed in Thumb code.
				54	//ZZ
				55	//ZZ Correctness: ITSTATE handling in Thumb SVCs is wrong.
				56	//ZZ
				57	//ZZ Correctness (obscure): in m_transtab, when invalidating code
				58	//ZZ address ranges, invalidate up to 18 bytes after the end of the
				59	//ZZ range. This is because the ITSTATE optimisation at the top of
				60	//ZZ _THUMB_WRK below analyses up to 18 bytes before the start of any
				61	//ZZ given instruction, and so might depend on the invalidated area.
				62	//ZZ */
				63	//ZZ
				64	//ZZ /* Limitations, etc
				65	//ZZ
				66	//ZZ - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
				67	//ZZ These instructions are non-restartable in the case where the
				68	//ZZ transfer(s) fault.
				69	//ZZ
				70	//ZZ - SWP: the restart jump back is Ijk_Boring; it should be
				71	//ZZ Ijk_NoRedir but that's expensive. See comments on casLE() in
				72	//ZZ guest_x86_toIR.c.
				73	//ZZ */
				74
				75	/* "Special" instructions.
				76
				77	This instruction decoder can decode four special instructions
				78	which mean nothing natively (are no-ops as far as regs/mem are
				79	concerned) but have meaning for supporting Valgrind. A special
				80	instruction is flagged by a 16-byte preamble:
				81
				82	93CC0D8C 93CC358C 93CCCD8C 93CCF58C
				83	(ror x12, x12, #3; ror x12, x12, #13
				84	ror x12, x12, #51; ror x12, x12, #61)
				85
				86	Following that, one of the following 3 are allowed
				87	(standard interpretation in parentheses):
				88
				89	AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
				90	AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
				91	AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
				92	AA090129 (orr x9,x9,x9) IR injection
				93
				94	Any other bytes following the 16-byte preamble are illegal and
				95	constitute a failure in instruction decoding. This all assumes
				96	that the preamble will never occur except in specific code
				97	fragments designed for Valgrind to catch.
				98	*/
				99
				100	/* Translates ARM64 code to IR. */
				101
				102	#include "libvex_basictypes.h"
				103	#include "libvex_ir.h"
				104	#include "libvex.h"
				105	#include "libvex_guest_arm64.h"
				106
				107	#include "main_util.h"
				108	#include "main_globals.h"
				109	#include "guest_generic_bb_to_IR.h"
				110	#include "guest_arm64_defs.h"
				111
				112
				113	/------------------------------------------------------------/
				114	/--- Globals ---/
				115	/------------------------------------------------------------/
				116
				117	/* These are set at the start of the translation of a instruction, so
				118	that we don't have to pass them around endlessly. CONST means does
				119	not change during translation of the instruction.
				120	*/
				121
				122	/* CONST: is the host bigendian? We need to know this in order to do
				123	sub-register accesses to the SIMD/FP registers correctly. */
				124	static Bool host_is_bigendian;
				125
				126	/* CONST: The guest address for the instruction currently being
				127	translated. */
				128	static Addr64 guest_PC_curr_instr;
				129
				130	/* MOD: The IRSB* into which we're generating code. */
				131	static IRSB* irsb;
				132
				133
				134	/------------------------------------------------------------/
				135	/--- Debugging output ---/
				136	/------------------------------------------------------------/
				137
				138	#define DIP(format, args...) \
				139	if (vex_traceflags & VEX_TRACE_FE) \
				140	vex_printf(format, ## args)
				141
				142	#define DIS(buf, format, args...) \
				143	if (vex_traceflags & VEX_TRACE_FE) \
				144	vex_sprintf(buf, format, ## args)
				145
				146
				147	/------------------------------------------------------------/
				148	/--- Helper bits and pieces for deconstructing the ---/
				149	/--- arm insn stream. ---/
				150	/------------------------------------------------------------/
				151
				152	/* Do a little-endian load of a 32-bit word, regardless of the
				153	endianness of the underlying host. */
				154	static inline UInt getUIntLittleEndianly ( UChar* p )
				155	{
				156	UInt w = 0;
				157	w = (w << 8) \| p[3];
				158	w = (w << 8) \| p[2];
				159	w = (w << 8) \| p[1];
				160	w = (w << 8) \| p[0];
				161	return w;
				162	}
				163
				164	/* Sign extend a N-bit value up to 64 bits, by copying
				165	bit N-1 into all higher positions. */
				166	static ULong sx_to_64 ( ULong x, UInt n )
				167	{
				168	vassert(n > 1 && n < 64);
				169	Long r = (Long)x;
				170	r = (r << (64-n)) >> (64-n);
				171	return (ULong)r;
				172	}
				173
				174	//ZZ /* Do a little-endian load of a 16-bit word, regardless of the
				175	//ZZ endianness of the underlying host. */
				176	//ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
				177	//ZZ {
				178	//ZZ UShort w = 0;
				179	//ZZ w = (w << 8) \| p[1];
				180	//ZZ w = (w << 8) \| p[0];
				181	//ZZ return w;
				182	//ZZ }
				183	//ZZ
				184	//ZZ static UInt ROR32 ( UInt x, UInt sh ) {
				185	//ZZ vassert(sh >= 0 && sh < 32);
				186	//ZZ if (sh == 0)
				187	//ZZ return x;
				188	//ZZ else
				189	//ZZ return (x << (32-sh)) \| (x >> sh);
				190	//ZZ }
				191	//ZZ
				192	//ZZ static Int popcount32 ( UInt x )
				193	//ZZ {
				194	//ZZ Int res = 0, i;
				195	//ZZ for (i = 0; i < 32; i++) {
				196	//ZZ res += (x & 1);
				197	//ZZ x >>= 1;
				198	//ZZ }
				199	//ZZ return res;
				200	//ZZ }
				201	//ZZ
				202	//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
				203	//ZZ {
				204	//ZZ UInt mask = 1 << ix;
				205	//ZZ x &= ~mask;
				206	//ZZ x \|= ((b << ix) & mask);
				207	//ZZ return x;
				208	//ZZ }
				209
				210	#define BITS2(_b1,_b0) \
				211	(((_b1) << 1) \| (_b0))
				212
				213	#define BITS3(_b2,_b1,_b0) \
				214	(((_b2) << 2) \| ((_b1) << 1) \| (_b0))
				215
				216	#define BITS4(_b3,_b2,_b1,_b0) \
				217	(((_b3) << 3) \| ((_b2) << 2) \| ((_b1) << 1) \| (_b0))
				218
				219	#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
				220	((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
				221	\| BITS4((_b3),(_b2),(_b1),(_b0)))
				222
				223	#define BITS5(_b4,_b3,_b2,_b1,_b0) \
				224	(BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
				225	#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
				226	(BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
				227	#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
				228	(BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
				229
				230	#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
				231	(((_b8) << 8) \
				232	\| BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
				233
				234	#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
				235	(((_b9) << 9) \| ((_b8) << 8) \
				236	\| BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
				237
				238	#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
				239	(((_b10) << 10) \
				240	\| BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
				241
				242	// produces _uint[_bMax:_bMin]
				243	#define SLICE_UInt(_uint,_bMax,_bMin) \
				244	(( ((UInt)(_uint)) >> (_bMin)) \
				245	& (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
				246
				247
				248	/------------------------------------------------------------/
				249	/--- Helper bits and pieces for creating IR fragments. ---/
				250	/------------------------------------------------------------/
				251
				252	static IRExpr* mkV128 ( UShort w )
				253	{
				254	return IRExpr_Const(IRConst_V128(w));
				255	}
				256
				257	static IRExpr* mkU64 ( ULong i )
				258	{
				259	return IRExpr_Const(IRConst_U64(i));
				260	}
				261
				262	static IRExpr* mkU32 ( UInt i )
				263	{
				264	return IRExpr_Const(IRConst_U32(i));
				265	}
				266
				267	static IRExpr* mkU8 ( UInt i )
				268	{
				269	vassert(i < 256);
				270	return IRExpr_Const(IRConst_U8( (UChar)i ));
				271	}
				272
				273	static IRExpr* mkexpr ( IRTemp tmp )
				274	{
				275	return IRExpr_RdTmp(tmp);
				276	}
				277
				278	static IRExpr* unop ( IROp op, IRExpr* a )
				279	{
				280	return IRExpr_Unop(op, a);
				281	}
				282
				283	static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
				284	{
				285	return IRExpr_Binop(op, a1, a2);
				286	}
				287
				288	static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
				289	{
				290	return IRExpr_Triop(op, a1, a2, a3);
				291	}
				292
				293	static IRExpr* loadLE ( IRType ty, IRExpr* addr )
				294	{
				295	return IRExpr_Load(Iend_LE, ty, addr);
				296	}
				297
				298	/* Add a statement to the list held by "irbb". */
				299	static void stmt ( IRStmt* st )
				300	{
				301	addStmtToIRSB( irsb, st );
				302	}
				303
				304	static void assign ( IRTemp dst, IRExpr* e )
				305	{
				306	stmt( IRStmt_WrTmp(dst, e) );
				307	}
				308
				309	static void storeLE ( IRExpr* addr, IRExpr* data )
				310	{
				311	stmt( IRStmt_Store(Iend_LE, addr, data) );
				312	}
				313
				314	//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
				315	//ZZ {
				316	//ZZ if (guardT == IRTemp_INVALID) {
				317	//ZZ /* unconditional */
				318	//ZZ storeLE(addr, data);
				319	//ZZ } else {
				320	//ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
				321	//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
				322	//ZZ }
				323	//ZZ }
				324	//ZZ
				325	//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
				326	//ZZ IRExpr* addr, IRExpr* alt,
				327	//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
				328	//ZZ {
				329	//ZZ if (guardT == IRTemp_INVALID) {
				330	//ZZ /* unconditional */
				331	//ZZ IRExpr* loaded = NULL;
				332	//ZZ switch (cvt) {
				333	//ZZ case ILGop_Ident32:
				334	//ZZ loaded = loadLE(Ity_I32, addr); break;
				335	//ZZ case ILGop_8Uto32:
				336	//ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
				337	//ZZ case ILGop_8Sto32:
				338	//ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
				339	//ZZ case ILGop_16Uto32:
				340	//ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
				341	//ZZ case ILGop_16Sto32:
				342	//ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
				343	//ZZ default:
				344	//ZZ vassert(0);
				345	//ZZ }
				346	//ZZ vassert(loaded != NULL);
				347	//ZZ assign(dst, loaded);
				348	//ZZ } else {
				349	//ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
				350	//ZZ loaded data before putting the data in 'dst'. If the load
				351	//ZZ does not take place, 'alt' is placed directly in 'dst'. */
				352	//ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
				353	//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
				354	//ZZ }
				355	//ZZ }
				356
				357	/* Generate a new temporary of the given type. */
				358	static IRTemp newTemp ( IRType ty )
				359	{
				360	vassert(isPlausibleIRType(ty));
				361	return newIRTemp( irsb->tyenv, ty );
				362	}
				363
				364	//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
				365	//ZZ IRRoundingMode. */
				366	//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
				367	//ZZ {
				368	//ZZ return mkU32(Irrm_NEAREST);
				369	//ZZ }
				370	//ZZ
				371	//ZZ /* Generate an expression for SRC rotated right by ROT. */
				372	//ZZ static IRExpr* genROR32( IRTemp src, Int rot )
				373	//ZZ {
				374	//ZZ vassert(rot >= 0 && rot < 32);
				375	//ZZ if (rot == 0)
				376	//ZZ return mkexpr(src);
				377	//ZZ return
				378	//ZZ binop(Iop_Or32,
				379	//ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
				380	//ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
				381	//ZZ }
				382	//ZZ
				383	//ZZ static IRExpr* mkU128 ( ULong i )
				384	//ZZ {
				385	//ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
				386	//ZZ }
				387	//ZZ
				388	//ZZ /* Generate a 4-aligned version of the given expression if
				389	//ZZ the given condition is true. Else return it unchanged. */
				390	//ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
				391	//ZZ {
				392	//ZZ if (b)
				393	//ZZ return binop(Iop_And32, e, mkU32(~3));
				394	//ZZ else
				395	//ZZ return e;
				396	//ZZ }
				397
				398	/* Other IR construction helpers. */
				399	static IROp mkAND ( IRType ty ) {
				400	switch (ty) {
				401	case Ity_I32: return Iop_And32;
				402	case Ity_I64: return Iop_And64;
				403	default: vpanic("mkAND");
				404	}
				405	}
				406
				407	static IROp mkOR ( IRType ty ) {
				408	switch (ty) {
				409	case Ity_I32: return Iop_Or32;
				410	case Ity_I64: return Iop_Or64;
				411	default: vpanic("mkOR");
				412	}
				413	}
				414
				415	static IROp mkXOR ( IRType ty ) {
				416	switch (ty) {
				417	case Ity_I32: return Iop_Xor32;
				418	case Ity_I64: return Iop_Xor64;
				419	default: vpanic("mkXOR");
				420	}
				421	}
				422
				423	static IROp mkSHL ( IRType ty ) {
				424	switch (ty) {
				425	case Ity_I32: return Iop_Shl32;
				426	case Ity_I64: return Iop_Shl64;
				427	default: vpanic("mkSHL");
				428	}
				429	}
				430
				431	static IROp mkSHR ( IRType ty ) {
				432	switch (ty) {
				433	case Ity_I32: return Iop_Shr32;
				434	case Ity_I64: return Iop_Shr64;
				435	default: vpanic("mkSHR");
				436	}
				437	}
				438
				439	static IROp mkSAR ( IRType ty ) {
				440	switch (ty) {
				441	case Ity_I32: return Iop_Sar32;
				442	case Ity_I64: return Iop_Sar64;
				443	default: vpanic("mkSAR");
				444	}
				445	}
				446
				447	static IROp mkNOT ( IRType ty ) {
				448	switch (ty) {
				449	case Ity_I32: return Iop_Not32;
				450	case Ity_I64: return Iop_Not64;
				451	default: vpanic("mkNOT");
				452	}
				453	}
				454
				455	static IROp mkADD ( IRType ty ) {
				456	switch (ty) {
				457	case Ity_I32: return Iop_Add32;
				458	case Ity_I64: return Iop_Add64;
				459	default: vpanic("mkADD");
				460	}
				461	}
				462
				463	static IROp mkSUB ( IRType ty ) {
				464	switch (ty) {
				465	case Ity_I32: return Iop_Sub32;
				466	case Ity_I64: return Iop_Sub64;
				467	default: vpanic("mkSUB");
				468	}
				469	}
				470
				471	static IROp mkADDF ( IRType ty ) {
				472	switch (ty) {
				473	case Ity_F32: return Iop_AddF32;
				474	case Ity_F64: return Iop_AddF64;
				475	default: vpanic("mkADDF");
				476	}
				477	}
				478
				479	static IROp mkSUBF ( IRType ty ) {
				480	switch (ty) {
				481	case Ity_F32: return Iop_SubF32;
				482	case Ity_F64: return Iop_SubF64;
				483	default: vpanic("mkSUBF");
				484	}
				485	}
				486
				487	static IROp mkMULF ( IRType ty ) {
				488	switch (ty) {
				489	case Ity_F32: return Iop_MulF32;
				490	case Ity_F64: return Iop_MulF64;
				491	default: vpanic("mkMULF");
				492	}
				493	}
				494
				495	static IROp mkDIVF ( IRType ty ) {
				496	switch (ty) {
				497	case Ity_F32: return Iop_DivF32;
				498	case Ity_F64: return Iop_DivF64;
				499	default: vpanic("mkMULF");
				500	}
				501	}
				502
				503	static IROp mkNEGF ( IRType ty ) {
				504	switch (ty) {
				505	case Ity_F32: return Iop_NegF32;
				506	case Ity_F64: return Iop_NegF64;
				507	default: vpanic("mkNEGF");
				508	}
				509	}
				510
				511	static IROp mkABSF ( IRType ty ) {
				512	switch (ty) {
				513	case Ity_F32: return Iop_AbsF32;
				514	case Ity_F64: return Iop_AbsF64;
				515	default: vpanic("mkNEGF");
				516	}
				517	}
				518
				519	static IROp mkSQRTF ( IRType ty ) {
				520	switch (ty) {
				521	case Ity_F32: return Iop_SqrtF32;
				522	case Ity_F64: return Iop_SqrtF64;
				523	default: vpanic("mkNEGF");
				524	}
				525	}
				526
				527	static IRExpr* mkU ( IRType ty, ULong imm ) {
				528	switch (ty) {
				529	case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
				530	case Ity_I64: return mkU64(imm);
				531	default: vpanic("mkU");
				532	}
				533	}
				534
				535	/* Generate IR to create 'arg rotated right by imm', for sane values
				536	of 'ty' and 'imm'. */
				537	static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
				538	{
				539	UInt w = 0;
				540	if (ty == Ity_I64) {
				541	w = 64;
				542	} else {
				543	vassert(ty == Ity_I32);
				544	w = 32;
				545	}
				546	vassert(w != 0);
				547	vassert(imm < w);
				548	if (imm == 0) {
				549	return arg;
				550	}
				551	IRTemp res = newTemp(ty);
				552	assign(res, binop(mkOR(ty),
				553	binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
				554	binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
				555	return res;
				556	}
				557
				558	/* Generate IR to set the returned temp to either all-zeroes or
				559	all ones, as a copy of arg<imm>. */
				560	static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
				561	{
				562	UInt w = 0;
				563	if (ty == Ity_I64) {
				564	w = 64;
				565	} else {
				566	vassert(ty == Ity_I32);
				567	w = 32;
				568	}
				569	vassert(w != 0);
				570	vassert(imm < w);
				571	IRTemp res = newTemp(ty);
				572	assign(res, binop(mkSAR(ty),
				573	binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
				574	mkU8(w - 1)));
				575	return res;
				576	}
				577
				578
				579	/------------------------------------------------------------/
				580	/--- Helpers for accessing guest registers. ---/
				581	/------------------------------------------------------------/
				582
				583	#define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
				584	#define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
				585	#define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
				586	#define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
				587	#define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
				588	#define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
				589	#define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
				590	#define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
				591	#define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
				592	#define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
				593	#define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
				594	#define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
				595	#define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
				596	#define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
				597	#define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
				598	#define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
				599	#define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
				600	#define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
				601	#define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
				602	#define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
				603	#define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
				604	#define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
				605	#define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
				606	#define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
				607	#define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
				608	#define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
				609	#define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
				610	#define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
				611	#define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
				612	#define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
				613	#define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
				614
sewardj	6068788	2014-01-15 10:25:21 +0000	[diff] [blame]	615	#define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	616	#define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
				617
				618	#define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
				619	#define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
				620	#define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
				621	#define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
				622
				623	#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
				624	#define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
				625
				626	#define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
				627	#define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
				628	#define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
				629	#define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
				630	#define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
				631	#define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
				632	#define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
				633	#define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
				634	#define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
				635	#define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
				636	#define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
				637	#define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
				638	#define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
				639	#define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
				640	#define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
				641	#define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
				642	#define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
				643	#define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
				644	#define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
				645	#define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
				646	#define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
				647	#define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
				648	#define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
				649	#define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
				650	#define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
				651	#define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
				652	#define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
				653	#define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
				654	#define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
				655	#define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
				656	#define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
				657	#define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
				658
				659	#define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
				660	#define OFFB_FPSR offsetof(VexGuestARM64State,guest_FPSR)
				661	//ZZ #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
				662	//ZZ #define OFFB_ITSTATE offsetof(VexGuestARMState,guest_ITSTATE)
				663	//ZZ #define OFFB_QFLAG32 offsetof(VexGuestARMState,guest_QFLAG32)
				664	//ZZ #define OFFB_GEFLAG0 offsetof(VexGuestARMState,guest_GEFLAG0)
				665	//ZZ #define OFFB_GEFLAG1 offsetof(VexGuestARMState,guest_GEFLAG1)
				666	//ZZ #define OFFB_GEFLAG2 offsetof(VexGuestARMState,guest_GEFLAG2)
				667	//ZZ #define OFFB_GEFLAG3 offsetof(VexGuestARMState,guest_GEFLAG3)
				668
				669	#define OFFB_TISTART offsetof(VexGuestARM64State,guest_TISTART)
				670	#define OFFB_TILEN offsetof(VexGuestARM64State,guest_TILEN)
				671
				672
				673	/* ---------------- Integer registers ---------------- */
				674
				675	static Int offsetIReg64 ( UInt iregNo )
				676	{
				677	/* Do we care about endianness here? We do if sub-parts of integer
				678	registers are accessed. */
				679	switch (iregNo) {
				680	case 0: return OFFB_X0;
				681	case 1: return OFFB_X1;
				682	case 2: return OFFB_X2;
				683	case 3: return OFFB_X3;
				684	case 4: return OFFB_X4;
				685	case 5: return OFFB_X5;
				686	case 6: return OFFB_X6;
				687	case 7: return OFFB_X7;
				688	case 8: return OFFB_X8;
				689	case 9: return OFFB_X9;
				690	case 10: return OFFB_X10;
				691	case 11: return OFFB_X11;
				692	case 12: return OFFB_X12;
				693	case 13: return OFFB_X13;
				694	case 14: return OFFB_X14;
				695	case 15: return OFFB_X15;
				696	case 16: return OFFB_X16;
				697	case 17: return OFFB_X17;
				698	case 18: return OFFB_X18;
				699	case 19: return OFFB_X19;
				700	case 20: return OFFB_X20;
				701	case 21: return OFFB_X21;
				702	case 22: return OFFB_X22;
				703	case 23: return OFFB_X23;
				704	case 24: return OFFB_X24;
				705	case 25: return OFFB_X25;
				706	case 26: return OFFB_X26;
				707	case 27: return OFFB_X27;
				708	case 28: return OFFB_X28;
				709	case 29: return OFFB_X29;
				710	case 30: return OFFB_X30;
				711	/* but not 31 */
				712	default: vassert(0);
				713	}
				714	}
				715
				716	static Int offsetIReg64orSP ( UInt iregNo )
				717	{
sewardj	6068788	2014-01-15 10:25:21 +0000	[diff] [blame]	718	return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	719	}
				720
				721	static const HChar* nameIReg64orZR ( UInt iregNo )
				722	{
				723	vassert(iregNo < 32);
				724	static const HChar* names[32]
				725	= { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
				726	"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
				727	"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
				728	"x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
				729	return names[iregNo];
				730	}
				731
				732	static const HChar* nameIReg64orSP ( UInt iregNo )
				733	{
				734	if (iregNo == 31) {
				735	return "sp";
				736	}
				737	vassert(iregNo < 31);
				738	return nameIReg64orZR(iregNo);
				739	}
				740
				741	static IRExpr* getIReg64orSP ( UInt iregNo )
				742	{
				743	vassert(iregNo < 32);
				744	return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
				745	}
				746
				747	static IRExpr* getIReg64orZR ( UInt iregNo )
				748	{
				749	if (iregNo == 31) {
				750	return mkU64(0);
				751	}
				752	vassert(iregNo < 31);
				753	return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
				754	}
				755
				756	static void putIReg64orSP ( UInt iregNo, IRExpr* e )
				757	{
				758	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
				759	stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
				760	}
				761
				762	static void putIReg64orZR ( UInt iregNo, IRExpr* e )
				763	{
				764	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
				765	if (iregNo == 31) {
				766	return;
				767	}
				768	vassert(iregNo < 31);
				769	stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
				770	}
				771
				772	static const HChar* nameIReg32orZR ( UInt iregNo )
				773	{
				774	vassert(iregNo < 32);
				775	static const HChar* names[32]
				776	= { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
				777	"w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
				778	"w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
				779	"w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
				780	return names[iregNo];
				781	}
				782
				783	static const HChar* nameIReg32orSP ( UInt iregNo )
				784	{
				785	if (iregNo == 31) {
				786	return "wsp";
				787	}
				788	vassert(iregNo < 31);
				789	return nameIReg32orZR(iregNo);
				790	}
				791
				792	static IRExpr* getIReg32orSP ( UInt iregNo )
				793	{
				794	vassert(iregNo < 32);
				795	return unop(Iop_64to32,
				796	IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
				797	}
				798
				799	static IRExpr* getIReg32orZR ( UInt iregNo )
				800	{
				801	if (iregNo == 31) {
				802	return mkU32(0);
				803	}
				804	vassert(iregNo < 31);
				805	return unop(Iop_64to32,
				806	IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
				807	}
				808
				809	static void putIReg32orSP ( UInt iregNo, IRExpr* e )
				810	{
				811	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
				812	stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
				813	}
				814
				815	static void putIReg32orZR ( UInt iregNo, IRExpr* e )
				816	{
				817	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
				818	if (iregNo == 31) {
				819	return;
				820	}
				821	vassert(iregNo < 31);
				822	stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
				823	}
				824
				825	static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
				826	{
				827	vassert(is64 == True \|\| is64 == False);
				828	return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
				829	}
				830
				831	static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
				832	{
				833	vassert(is64 == True \|\| is64 == False);
				834	return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
				835	}
				836
				837	static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
				838	{
				839	vassert(is64 == True \|\| is64 == False);
				840	return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
				841	}
				842
				843	static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
				844	{
				845	vassert(is64 == True \|\| is64 == False);
				846	if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
				847	}
				848
				849	static void putPC ( IRExpr* e )
				850	{
				851	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
				852	stmt( IRStmt_Put(OFFB_PC, e) );
				853	}
				854
				855
				856	/* ---------------- Vector (Q) registers ---------------- */
				857
				858	static Int offsetQReg128 ( UInt qregNo )
				859	{
				860	/* We don't care about endianness at this point. It only becomes
				861	relevant when dealing with sections of these registers.*/
				862	switch (qregNo) {
				863	case 0: return OFFB_Q0;
				864	case 1: return OFFB_Q1;
				865	case 2: return OFFB_Q2;
				866	case 3: return OFFB_Q3;
				867	case 4: return OFFB_Q4;
				868	case 5: return OFFB_Q5;
				869	case 6: return OFFB_Q6;
				870	case 7: return OFFB_Q7;
				871	case 8: return OFFB_Q8;
				872	case 9: return OFFB_Q9;
				873	case 10: return OFFB_Q10;
				874	case 11: return OFFB_Q11;
				875	case 12: return OFFB_Q12;
				876	case 13: return OFFB_Q13;
				877	case 14: return OFFB_Q14;
				878	case 15: return OFFB_Q15;
				879	case 16: return OFFB_Q16;
				880	case 17: return OFFB_Q17;
				881	case 18: return OFFB_Q18;
				882	case 19: return OFFB_Q19;
				883	case 20: return OFFB_Q20;
				884	case 21: return OFFB_Q21;
				885	case 22: return OFFB_Q22;
				886	case 23: return OFFB_Q23;
				887	case 24: return OFFB_Q24;
				888	case 25: return OFFB_Q25;
				889	case 26: return OFFB_Q26;
				890	case 27: return OFFB_Q27;
				891	case 28: return OFFB_Q28;
				892	case 29: return OFFB_Q29;
				893	case 30: return OFFB_Q30;
				894	case 31: return OFFB_Q31;
				895	default: vassert(0);
				896	}
				897	}
				898
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	899	/* Write to a complete Qreg. */
				900	static void putQReg128 ( UInt qregNo, IRExpr* e )
				901	{
				902	vassert(qregNo < 32);
				903	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
				904	stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
				905	}
				906
				907	/* Read a complete Qreg. */
				908	static IRExpr* getQReg128 ( UInt qregNo )
				909	{
				910	vassert(qregNo < 32);
				911	return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
				912	}
				913
				914	/* Produce the IR type for some sub-part of a vector. For 32- and 64-
				915	bit sub-parts we can choose either integer or float types, and
				916	choose float on the basis that that is the common use case and so
				917	will give least interference with Put-to-Get forwarding later
				918	on. */
				919	static IRType preferredVectorSubTypeFromSize ( UInt szB )
				920	{
				921	switch (szB) {
				922	case 1: return Ity_I8;
				923	case 2: return Ity_I16;
				924	case 4: return Ity_I32; //Ity_F32;
				925	case 8: return Ity_F64;
				926	case 16: return Ity_V128;
				927	default: vassert(0);
				928	}
				929	}
				930
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	931	/* Find the offset of the laneNo'th lane of type laneTy in the given
				932	Qreg. Since the host is little-endian, the least significant lane
				933	has the lowest offset. */
				934	static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	935	{
				936	vassert(!host_is_bigendian);
				937	Int base = offsetQReg128(qregNo);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	938	/* Since the host is little-endian, the least significant lane
				939	will be at the lowest address. */
				940	/* Restrict this to known types, so as to avoid silently accepting
				941	stupid types. */
				942	UInt laneSzB = 0;
				943	switch (laneTy) {
				944	case Ity_F32: case Ity_I32: laneSzB = 4; break;
				945	case Ity_F64: case Ity_I64: laneSzB = 8; break;
				946	case Ity_V128: laneSzB = 16; break;
				947	default: break;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	948	}
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	949	vassert(laneSzB > 0);
				950	UInt minOff = laneNo * laneSzB;
				951	UInt maxOff = minOff + laneSzB - 1;
				952	vassert(maxOff < 16);
				953	return base + minOff;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	954	}
				955
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	956	/* Put to the least significant lane of a Qreg. */
				957	static void putQRegLO ( UInt qregNo, IRExpr* e )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	958	{
				959	IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	960	Int off = offsetQRegLane(qregNo, ty, 0);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	961	switch (ty) {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	962	case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
				963	case Ity_F32: case Ity_F64: case Ity_V128:
				964	break;
				965	default:
				966	vassert(0); // Other cases are probably invalid
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	967	}
				968	stmt(IRStmt_Put(off, e));
				969	}
				970
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	971	/* Get from the least significant lane of a Qreg. */
				972	static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	973	{
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	974	Int off = offsetQRegLane(qregNo, ty, 0);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	975	switch (ty) {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	976	case Ity_I32: case Ity_I64:
				977	case Ity_F32: case Ity_F64: case Ity_V128:
				978	break;
				979	default:
				980	vassert(0); // Other cases are ATC
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	981	}
				982	return IRExpr_Get(off, ty);
				983	}
				984
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	985	static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	986	{
				987	static const HChar* namesQ[32]
				988	= { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
				989	"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
				990	"q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
				991	"q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
				992	static const HChar* namesD[32]
				993	= { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
				994	"d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
				995	"d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
				996	"d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
				997	static const HChar* namesS[32]
				998	= { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
				999	"s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
				1000	"s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
				1001	"s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
				1002	static const HChar* namesH[32]
				1003	= { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
				1004	"h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
				1005	"h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
				1006	"h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
				1007	static const HChar* namesB[32]
				1008	= { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
				1009	"b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
				1010	"b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
				1011	"b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
				1012	vassert(qregNo < 32);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1013	switch (sizeofIRType(laneTy)) {
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1014	case 1: return namesB[qregNo];
				1015	case 2: return namesH[qregNo];
				1016	case 4: return namesS[qregNo];
				1017	case 8: return namesD[qregNo];
				1018	case 16: return namesQ[qregNo];
				1019	default: vassert(0);
				1020	}
				1021	/NOTREACHED/
				1022	}
				1023
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1024	static const HChar* nameQReg128 ( UInt qregNo )
				1025	{
				1026	return nameQRegLO(qregNo, Ity_V128);
				1027	}
				1028
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1029	/* Find the offset of the most significant half (8 bytes) of the given
				1030	Qreg. This requires knowing the endianness of the host. */
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1031	static Int offsetQRegHI64 ( UInt qregNo )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1032	{
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1033	return offsetQRegLane(qregNo, Ity_I64, 1);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1034	}
				1035
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1036	static IRExpr* getQRegHI64 ( UInt qregNo )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1037	{
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1038	return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1039	}
				1040
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1041	static void putQRegHI64 ( UInt qregNo, IRExpr* e )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1042	{
				1043	IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1044	Int off = offsetQRegHI64(qregNo);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1045	switch (ty) {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1046	case Ity_I64: case Ity_F64:
				1047	break;
				1048	default:
				1049	vassert(0); // Other cases are plain wrong
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1050	}
				1051	stmt(IRStmt_Put(off, e));
				1052	}
				1053
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1054	/* Put to a specified lane of a Qreg. */
				1055	static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
				1056	{
				1057	IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
				1058	Int off = offsetQRegLane(qregNo, laneTy, laneNo);
				1059	switch (laneTy) {
				1060	case Ity_F64: case Ity_I64:
				1061	break;
				1062	default:
				1063	vassert(0); // Other cases are ATC
				1064	}
				1065	stmt(IRStmt_Put(off, e));
				1066	}
				1067
				1068	/* Get from the least significant lane of a Qreg. */
				1069	static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
				1070	{
				1071	Int off = offsetQRegLane(qregNo, laneTy, laneNo);
				1072	switch (laneTy) {
				1073	case Ity_I64: case Ity_I32:
				1074	break;
				1075	default:
				1076	vassert(0); // Other cases are ATC
				1077	}
				1078	return IRExpr_Get(off, laneTy);
				1079	}
				1080
				1081
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1082	//ZZ /* ---------------- Misc registers ---------------- */
				1083	//ZZ
				1084	//ZZ static void putMiscReg32 ( UInt gsoffset,
				1085	//ZZ IRExpr* e, /* :: Ity_I32 */
				1086	//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
				1087	//ZZ {
				1088	//ZZ switch (gsoffset) {
				1089	//ZZ case OFFB_FPSCR: break;
				1090	//ZZ case OFFB_QFLAG32: break;
				1091	//ZZ case OFFB_GEFLAG0: break;
				1092	//ZZ case OFFB_GEFLAG1: break;
				1093	//ZZ case OFFB_GEFLAG2: break;
				1094	//ZZ case OFFB_GEFLAG3: break;
				1095	//ZZ default: vassert(0); /* awaiting more cases */
				1096	//ZZ }
				1097	//ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
				1098	//ZZ
				1099	//ZZ if (guardT == IRTemp_INVALID) {
				1100	//ZZ /* unconditional write */
				1101	//ZZ stmt(IRStmt_Put(gsoffset, e));
				1102	//ZZ } else {
				1103	//ZZ stmt(IRStmt_Put(
				1104	//ZZ gsoffset,
				1105	//ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
				1106	//ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
				1107	//ZZ ));
				1108	//ZZ }
				1109	//ZZ }
				1110	//ZZ
				1111	//ZZ static IRTemp get_ITSTATE ( void )
				1112	//ZZ {
				1113	//ZZ ASSERT_IS_THUMB;
				1114	//ZZ IRTemp t = newTemp(Ity_I32);
				1115	//ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
				1116	//ZZ return t;
				1117	//ZZ }
				1118	//ZZ
				1119	//ZZ static void put_ITSTATE ( IRTemp t )
				1120	//ZZ {
				1121	//ZZ ASSERT_IS_THUMB;
				1122	//ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
				1123	//ZZ }
				1124	//ZZ
				1125	//ZZ static IRTemp get_QFLAG32 ( void )
				1126	//ZZ {
				1127	//ZZ IRTemp t = newTemp(Ity_I32);
				1128	//ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
				1129	//ZZ return t;
				1130	//ZZ }
				1131	//ZZ
				1132	//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
				1133	//ZZ {
				1134	//ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
				1135	//ZZ }
				1136	//ZZ
				1137	//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
				1138	//ZZ Status Register) to indicate that overflow or saturation occurred.
				1139	//ZZ Nb: t must be zero to denote no saturation, and any nonzero
				1140	//ZZ value to indicate saturation. */
				1141	//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
				1142	//ZZ {
				1143	//ZZ IRTemp old = get_QFLAG32();
				1144	//ZZ IRTemp nyu = newTemp(Ity_I32);
				1145	//ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
				1146	//ZZ put_QFLAG32(nyu, condT);
				1147	//ZZ }
				1148
				1149
				1150	/* ---------------- FPCR stuff ---------------- */
				1151
				1152	/* Generate IR to get hold of the rounding mode bits in FPCR, and
				1153	convert them to IR format. Bind the final result to the
				1154	returned temp. */
				1155	static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
				1156	{
				1157	/* The ARMvfp encoding for rounding mode bits is:
				1158	00 to nearest
				1159	01 to +infinity
				1160	10 to -infinity
				1161	11 to zero
				1162	We need to convert that to the IR encoding:
				1163	00 to nearest (the default)
				1164	10 to +infinity
				1165	01 to -infinity
				1166	11 to zero
				1167	Which can be done by swapping bits 0 and 1.
				1168	The rmode bits are at 23:22 in FPSCR.
				1169	*/
				1170	IRTemp armEncd = newTemp(Ity_I32);
				1171	IRTemp swapped = newTemp(Ity_I32);
				1172	/* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
				1173	we don't zero out bits 24 and above, since the assignment to
				1174	'swapped' will mask them out anyway. */
				1175	assign(armEncd,
				1176	binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
				1177	/* Now swap them. */
				1178	assign(swapped,
				1179	binop(Iop_Or32,
				1180	binop(Iop_And32,
				1181	binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
				1182	mkU32(2)),
				1183	binop(Iop_And32,
				1184	binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
				1185	mkU32(1))
				1186	));
				1187	return swapped;
				1188	}
				1189
				1190
				1191	/------------------------------------------------------------/
				1192	/--- Helpers for flag handling and conditional insns ---/
				1193	/------------------------------------------------------------/
				1194
				1195	static const HChar* nameARM64Condcode ( ARM64Condcode cond )
				1196	{
				1197	switch (cond) {
				1198	case ARM64CondEQ: return "eq";
				1199	case ARM64CondNE: return "ne";
				1200	case ARM64CondCS: return "cs"; // or 'hs'
				1201	case ARM64CondCC: return "cc"; // or 'lo'
				1202	case ARM64CondMI: return "mi";
				1203	case ARM64CondPL: return "pl";
				1204	case ARM64CondVS: return "vs";
				1205	case ARM64CondVC: return "vc";
				1206	case ARM64CondHI: return "hi";
				1207	case ARM64CondLS: return "ls";
				1208	case ARM64CondGE: return "ge";
				1209	case ARM64CondLT: return "lt";
				1210	case ARM64CondGT: return "gt";
				1211	case ARM64CondLE: return "le";
				1212	case ARM64CondAL: return "al";
				1213	case ARM64CondNV: return "nv";
				1214	default: vpanic("name_ARM64Condcode");
				1215	}
				1216	}
				1217
				1218	/* and a handy shorthand for it */
				1219	static const HChar* nameCC ( ARM64Condcode cond ) {
				1220	return nameARM64Condcode(cond);
				1221	}
				1222
				1223
				1224	/* Build IR to calculate some particular condition from stored
				1225	CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
				1226	Ity_I64, suitable for narrowing. Although the return type is
				1227	Ity_I64, the returned value is either 0 or 1. 'cond' must be
				1228	:: Ity_I64 and must denote the condition to compute in
				1229	bits 7:4, and be zero everywhere else.
				1230	*/
				1231	static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
				1232	{
				1233	vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
				1234	/* And 'cond' had better produce a value in which only bits 7:4 are
				1235	nonzero. However, obviously we can't assert for that. */
				1236
				1237	/* So what we're constructing for the first argument is
				1238	"(cond << 4) \| stored-operation".
				1239	However, as per comments above, 'cond' must be supplied
				1240	pre-shifted to this function.
				1241
				1242	This pairing scheme requires that the ARM64_CC_OP_ values all fit
				1243	in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
				1244	8 bits of the first argument. */
				1245	IRExpr** args
				1246	= mkIRExprVec_4(
				1247	binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
				1248	IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
				1249	IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
				1250	IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
				1251	);
				1252	IRExpr* call
				1253	= mkIRExprCCall(
				1254	Ity_I64,
				1255	0/regparm/,
				1256	"arm64g_calculate_condition", &arm64g_calculate_condition,
				1257	args
				1258	);
				1259
				1260	/* Exclude the requested condition, OP and NDEP from definedness
				1261	checking. We're only interested in DEP1 and DEP2. */
				1262	call->Iex.CCall.cee->mcx_mask = (1<<0) \| (1<<3);
				1263	return call;
				1264	}
				1265
				1266
				1267	/* Build IR to calculate some particular condition from stored
				1268	CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
				1269	Ity_I64, suitable for narrowing. Although the return type is
				1270	Ity_I64, the returned value is either 0 or 1.
				1271	*/
				1272	static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
				1273	{
				1274	/* First arg is "(cond << 4) \| condition". This requires that the
				1275	ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
				1276	(COND, OP) pair in the lowest 8 bits of the first argument. */
				1277	vassert(cond >= 0 && cond <= 15);
				1278	return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
				1279	}
				1280
				1281
				1282	//ZZ /* Build IR to calculate just the carry flag from stored
				1283	//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
				1284	//ZZ Ity_I32. */
				1285	//ZZ static IRExpr* mk_armg_calculate_flag_c ( void )
				1286	//ZZ {
				1287	//ZZ IRExpr** args
				1288	//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
				1289	//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
				1290	//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
				1291	//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
				1292	//ZZ IRExpr* call
				1293	//ZZ = mkIRExprCCall(
				1294	//ZZ Ity_I32,
				1295	//ZZ 0/regparm/,
				1296	//ZZ "armg_calculate_flag_c", &armg_calculate_flag_c,
				1297	//ZZ args
				1298	//ZZ );
				1299	//ZZ /* Exclude OP and NDEP from definedness checking. We're only
				1300	//ZZ interested in DEP1 and DEP2. */
				1301	//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) \| (1<<3);
				1302	//ZZ return call;
				1303	//ZZ }
				1304	//ZZ
				1305	//ZZ
				1306	//ZZ /* Build IR to calculate just the overflow flag from stored
				1307	//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
				1308	//ZZ Ity_I32. */
				1309	//ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
				1310	//ZZ {
				1311	//ZZ IRExpr** args
				1312	//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
				1313	//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
				1314	//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
				1315	//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
				1316	//ZZ IRExpr* call
				1317	//ZZ = mkIRExprCCall(
				1318	//ZZ Ity_I32,
				1319	//ZZ 0/regparm/,
				1320	//ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
				1321	//ZZ args
				1322	//ZZ );
				1323	//ZZ /* Exclude OP and NDEP from definedness checking. We're only
				1324	//ZZ interested in DEP1 and DEP2. */
				1325	//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) \| (1<<3);
				1326	//ZZ return call;
				1327	//ZZ }
				1328
				1329
				1330	/* Build IR to calculate N Z C V in bits 31:28 of the
				1331	returned word. */
				1332	static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
				1333	{
				1334	IRExpr** args
				1335	= mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
				1336	IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
				1337	IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
				1338	IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
				1339	IRExpr* call
				1340	= mkIRExprCCall(
				1341	Ity_I64,
				1342	0/regparm/,
				1343	"arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
				1344	args
				1345	);
				1346	/* Exclude OP and NDEP from definedness checking. We're only
				1347	interested in DEP1 and DEP2. */
				1348	call->Iex.CCall.cee->mcx_mask = (1<<0) \| (1<<3);
				1349	return call;
				1350	}
				1351
				1352
				1353	/* Build IR to set the flags thunk, in the most general case. */
				1354	static
				1355	void setFlags_D1_D2_ND ( UInt cc_op,
				1356	IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
				1357	{
				1358	vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
				1359	vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
				1360	vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
				1361	vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
				1362	stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
				1363	stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
				1364	stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
				1365	stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
				1366	}
				1367
				1368	/* Build IR to set the flags thunk after ADD or SUB. */
				1369	static
				1370	void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
				1371	{
				1372	IRTemp argL64 = IRTemp_INVALID;
				1373	IRTemp argR64 = IRTemp_INVALID;
				1374	IRTemp z64 = newTemp(Ity_I64);
				1375	if (is64) {
				1376	argL64 = argL;
				1377	argR64 = argR;
				1378	} else {
				1379	argL64 = newTemp(Ity_I64);
				1380	argR64 = newTemp(Ity_I64);
				1381	assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
				1382	assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
				1383	}
				1384	assign(z64, mkU64(0));
				1385	UInt cc_op = ARM64G_CC_OP_NUMBER;
				1386	/**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
				1387	else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
				1388	else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
				1389	else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
				1390	else { vassert(0); }
				1391	setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
				1392	}
				1393
				1394	/* Build IR to set the flags thunk after ADD or SUB, if the given
				1395	condition evaluates to True at run time. If not, the flags are set
				1396	to the specified NZCV value. */
				1397	static
				1398	void setFlags_ADD_SUB_conditionally (
				1399	Bool is64, Bool isSUB,
				1400	IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
				1401	)
				1402	{
				1403	/* Generate IR as follows:
				1404	CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
				1405	CC_DEP1 = ITE(cond, argL64, nzcv << 28)
				1406	CC_DEP2 = ITE(cond, argR64, 0)
				1407	CC_NDEP = 0
				1408	*/
				1409
				1410	IRTemp z64 = newTemp(Ity_I64);
				1411	assign(z64, mkU64(0));
				1412
				1413	/* Establish the operation and operands for the True case. */
				1414	IRTemp t_dep1 = IRTemp_INVALID;
				1415	IRTemp t_dep2 = IRTemp_INVALID;
				1416	UInt t_op = ARM64G_CC_OP_NUMBER;
				1417	/**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
				1418	else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
				1419	else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
				1420	else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
				1421	else { vassert(0); }
				1422	/* */
				1423	if (is64) {
				1424	t_dep1 = argL;
				1425	t_dep2 = argR;
				1426	} else {
				1427	t_dep1 = newTemp(Ity_I64);
				1428	t_dep2 = newTemp(Ity_I64);
				1429	assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
				1430	assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
				1431	}
				1432
				1433	/* Establish the operation and operands for the False case. */
				1434	IRTemp f_dep1 = newTemp(Ity_I64);
				1435	IRTemp f_dep2 = z64;
				1436	UInt f_op = ARM64G_CC_OP_COPY;
				1437	assign(f_dep1, mkU64(nzcv << 28));
				1438
				1439	/* Final thunk values */
				1440	IRTemp dep1 = newTemp(Ity_I64);
				1441	IRTemp dep2 = newTemp(Ity_I64);
				1442	IRTemp op = newTemp(Ity_I64);
				1443
				1444	assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
				1445	assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
				1446	assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
				1447
				1448	/* finally .. */
				1449	stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
				1450	stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
				1451	stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
				1452	stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
				1453	}
				1454
				1455	/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
				1456	static
				1457	void setFlags_LOGIC ( Bool is64, IRTemp res )
				1458	{
				1459	IRTemp res64 = IRTemp_INVALID;
				1460	IRTemp z64 = newTemp(Ity_I64);
				1461	UInt cc_op = ARM64G_CC_OP_NUMBER;
				1462	if (is64) {
				1463	res64 = res;
				1464	cc_op = ARM64G_CC_OP_LOGIC64;
				1465	} else {
				1466	res64 = newTemp(Ity_I64);
				1467	assign(res64, unop(Iop_32Uto64, mkexpr(res)));
				1468	cc_op = ARM64G_CC_OP_LOGIC32;
				1469	}
				1470	assign(z64, mkU64(0));
				1471	setFlags_D1_D2_ND(cc_op, res64, z64, z64);
				1472	}
				1473
				1474	/* Build IR to set the flags thunk to a given NZCV value. NZCV is
				1475	located in bits 31:28 of the supplied value. */
				1476	static
				1477	void setFlags_COPY ( IRTemp nzcv_28x0 )
				1478	{
				1479	IRTemp z64 = newTemp(Ity_I64);
				1480	assign(z64, mkU64(0));
				1481	setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
				1482	}
				1483
				1484
				1485	//ZZ /* Minor variant of the above that sets NDEP to zero (if it
				1486	//ZZ sets it at all) */
				1487	//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
				1488	//ZZ IRTemp t_dep2,
				1489	//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
				1490	//ZZ {
				1491	//ZZ IRTemp z32 = newTemp(Ity_I32);
				1492	//ZZ assign( z32, mkU32(0) );
				1493	//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
				1494	//ZZ }
				1495	//ZZ
				1496	//ZZ
				1497	//ZZ /* Minor variant of the above that sets DEP2 to zero (if it
				1498	//ZZ sets it at all) */
				1499	//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
				1500	//ZZ IRTemp t_ndep,
				1501	//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
				1502	//ZZ {
				1503	//ZZ IRTemp z32 = newTemp(Ity_I32);
				1504	//ZZ assign( z32, mkU32(0) );
				1505	//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
				1506	//ZZ }
				1507	//ZZ
				1508	//ZZ
				1509	//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
				1510	//ZZ sets them at all) */
				1511	//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
				1512	//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
				1513	//ZZ {
				1514	//ZZ IRTemp z32 = newTemp(Ity_I32);
				1515	//ZZ assign( z32, mkU32(0) );
				1516	//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
				1517	//ZZ }
				1518
				1519
				1520	/------------------------------------------------------------/
				1521	/--- Misc math helpers ---/
				1522	/------------------------------------------------------------/
				1523
				1524	/* Generates a 64-bit byte swap. */
				1525	static IRTemp math_BSWAP64 ( IRTemp t1 )
				1526	{
				1527	IRTemp t2 = newTemp(Ity_I64);
				1528	IRTemp m8 = newTemp(Ity_I64);
				1529	IRTemp s8 = newTemp(Ity_I64);
				1530	IRTemp m16 = newTemp(Ity_I64);
				1531	IRTemp s16 = newTemp(Ity_I64);
				1532	IRTemp m32 = newTemp(Ity_I64);
				1533	assign( m8, mkU64(0xFF00FF00FF00FF00ULL) );
				1534	assign( s8,
				1535	binop(Iop_Or64,
				1536	binop(Iop_Shr64,
				1537	binop(Iop_And64,mkexpr(t1),mkexpr(m8)),
				1538	mkU8(8)),
				1539	binop(Iop_And64,
				1540	binop(Iop_Shl64,mkexpr(t1),mkU8(8)),
				1541	mkexpr(m8))
				1542	)
				1543	);
				1544
				1545	assign( m16, mkU64(0xFFFF0000FFFF0000ULL) );
				1546	assign( s16,
				1547	binop(Iop_Or64,
				1548	binop(Iop_Shr64,
				1549	binop(Iop_And64,mkexpr(s8),mkexpr(m16)),
				1550	mkU8(16)),
				1551	binop(Iop_And64,
				1552	binop(Iop_Shl64,mkexpr(s8),mkU8(16)),
				1553	mkexpr(m16))
				1554	)
				1555	);
				1556
				1557	assign( m32, mkU64(0xFFFFFFFF00000000ULL) );
				1558	assign( t2,
				1559	binop(Iop_Or64,
				1560	binop(Iop_Shr64,
				1561	binop(Iop_And64,mkexpr(s16),mkexpr(m32)),
				1562	mkU8(32)),
				1563	binop(Iop_And64,
				1564	binop(Iop_Shl64,mkexpr(s16),mkU8(32)),
				1565	mkexpr(m32))
				1566	)
				1567	);
				1568	return t2;
				1569	}
				1570
				1571
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1572	/* Duplicates the bits at the bottom of the given word to fill the
				1573	whole word. src :: Ity_I64 is assumed to have zeroes everywhere
				1574	except for the bottom bits. */
				1575	static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
				1576	{
				1577	if (srcTy == Ity_I8) {
				1578	IRTemp t16 = newTemp(Ity_I64);
				1579	assign(t16, binop(Iop_Or64, mkexpr(src),
				1580	binop(Iop_Shl64, mkexpr(src), mkU8(8))));
				1581	IRTemp t32 = newTemp(Ity_I64);
				1582	assign(t32, binop(Iop_Or64, mkexpr(t16),
				1583	binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
				1584	IRTemp t64 = newTemp(Ity_I64);
				1585	assign(t64, binop(Iop_Or64, mkexpr(t32),
				1586	binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
				1587	return t64;
				1588	}
				1589	if (srcTy == Ity_I16) {
				1590	IRTemp t32 = newTemp(Ity_I64);
				1591	assign(t32, binop(Iop_Or64, mkexpr(src),
				1592	binop(Iop_Shl64, mkexpr(src), mkU8(16))));
				1593	IRTemp t64 = newTemp(Ity_I64);
				1594	assign(t64, binop(Iop_Or64, mkexpr(t32),
				1595	binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
				1596	return t64;
				1597	}
				1598	if (srcTy == Ity_I32) {
				1599	IRTemp t64 = newTemp(Ity_I64);
				1600	assign(t64, binop(Iop_Or64, mkexpr(src),
				1601	binop(Iop_Shl64, mkexpr(src), mkU8(32))));
				1602	return t64;
				1603	}
				1604	if (srcTy == Ity_I64) {
				1605	return src;
				1606	}
				1607	vassert(0);
				1608	}
				1609
				1610
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1611	/------------------------------------------------------------/
				1612	/--- FP comparison helpers ---/
				1613	/------------------------------------------------------------/
				1614
				1615	/* irRes :: Ity_I32 holds a floating point comparison result encoded
				1616	as an IRCmpF64Result. Generate code to convert it to an
				1617	ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
				1618	Assign a new temp to hold that value, and return the temp. */
				1619	static
				1620	IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
				1621	{
				1622	IRTemp ix = newTemp(Ity_I64);
				1623	IRTemp termL = newTemp(Ity_I64);
				1624	IRTemp termR = newTemp(Ity_I64);
				1625	IRTemp nzcv = newTemp(Ity_I64);
				1626	IRTemp irRes = newTemp(Ity_I64);
				1627
				1628	/* This is where the fun starts. We have to convert 'irRes' from
				1629	an IR-convention return result (IRCmpF64Result) to an
				1630	ARM-encoded (N,Z,C,V) group. The final result is in the bottom
				1631	4 bits of 'nzcv'. */
				1632	/* Map compare result from IR to ARM(nzcv) */
				1633	/*
				1634	FP cmp result \| IR \| ARM(nzcv)
				1635	--------------------------------
				1636	UN 0x45 0011
				1637	LT 0x01 1000
				1638	GT 0x00 0010
				1639	EQ 0x40 0110
				1640	*/
				1641	/* Now since you're probably wondering WTF ..
				1642
				1643	ix fishes the useful bits out of the IR value, bits 6 and 0, and
				1644	places them side by side, giving a number which is 0, 1, 2 or 3.
				1645
				1646	termL is a sequence cooked up by GNU superopt. It converts ix
				1647	into an almost correct value NZCV value (incredibly), except
				1648	for the case of UN, where it produces 0100 instead of the
				1649	required 0011.
				1650
				1651	termR is therefore a correction term, also computed from ix. It
				1652	is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
				1653	the final correct value, we subtract termR from termL.
				1654
				1655	Don't take my word for it. There's a test program at the bottom
				1656	of guest_arm_toIR.c, to try this out with.
				1657	*/
				1658	assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
				1659
				1660	assign(
				1661	ix,
				1662	binop(Iop_Or64,
				1663	binop(Iop_And64,
				1664	binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
				1665	mkU64(3)),
				1666	binop(Iop_And64, mkexpr(irRes), mkU64(1))));
				1667
				1668	assign(
				1669	termL,
				1670	binop(Iop_Add64,
				1671	binop(Iop_Shr64,
				1672	binop(Iop_Sub64,
				1673	binop(Iop_Shl64,
				1674	binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
				1675	mkU8(62)),
				1676	mkU64(1)),
				1677	mkU8(61)),
				1678	mkU64(1)));
				1679
				1680	assign(
				1681	termR,
				1682	binop(Iop_And64,
				1683	binop(Iop_And64,
				1684	mkexpr(ix),
				1685	binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
				1686	mkU64(1)));
				1687
				1688	assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
				1689	return nzcv;
				1690	}
				1691
				1692
				1693	/------------------------------------------------------------/
				1694	/--- Data processing (immediate) ---/
				1695	/------------------------------------------------------------/
				1696
				1697	/* Helper functions for supporting "DecodeBitMasks" */
				1698
				1699	static ULong dbm_ROR ( Int width, ULong x, Int rot )
				1700	{
				1701	vassert(width > 0 && width <= 64);
				1702	vassert(rot >= 0 && rot < width);
				1703	if (rot == 0) return x;
				1704	ULong res = x >> rot;
				1705	res \|= (x << (width - rot));
				1706	if (width < 64)
				1707	res &= ((1ULL << width) - 1);
				1708	return res;
				1709	}
				1710
				1711	static ULong dbm_RepTo64( Int esize, ULong x )
				1712	{
				1713	switch (esize) {
				1714	case 64:
				1715	return x;
				1716	case 32:
				1717	x &= 0xFFFFFFFF; x \|= (x << 32);
				1718	return x;
				1719	case 16:
				1720	x &= 0xFFFF; x \|= (x << 16); x \|= (x << 32);
				1721	return x;
				1722	case 8:
				1723	x &= 0xFF; x \|= (x << 8); x \|= (x << 16); x \|= (x << 32);
				1724	return x;
				1725	case 4:
				1726	x &= 0xF; x \|= (x << 4); x \|= (x << 8);
				1727	x \|= (x << 16); x \|= (x << 32);
				1728	return x;
				1729	case 2:
				1730	x &= 0x3; x \|= (x << 2); x \|= (x << 4); x \|= (x << 8);
				1731	x \|= (x << 16); x \|= (x << 32);
				1732	return x;
				1733	default:
				1734	break;
				1735	}
				1736	vpanic("dbm_RepTo64");
				1737	/NOTREACHED/
				1738	return 0;
				1739	}
				1740
				1741	static Int dbm_highestSetBit ( ULong x )
				1742	{
				1743	Int i;
				1744	for (i = 63; i >= 0; i--) {
				1745	if (x & (1ULL << i))
				1746	return i;
				1747	}
				1748	vassert(x == 0);
				1749	return -1;
				1750	}
				1751
				1752	static
				1753	Bool dbm_DecodeBitMasks ( /OUT/ULong* wmask, /OUT/ULong* tmask,
				1754	ULong immN, ULong imms, ULong immr, Bool immediate,
				1755	UInt M /32 or 64/)
				1756	{
				1757	vassert(immN < (1ULL << 1));
				1758	vassert(imms < (1ULL << 6));
				1759	vassert(immr < (1ULL << 6));
				1760	vassert(immediate == False \|\| immediate == True);
				1761	vassert(M == 32 \|\| M == 64);
				1762
				1763	Int len = dbm_highestSetBit( ((immN << 6) & 64) \| ((~imms) & 63) );
				1764	if (len < 1) { /* printf("fail1\n"); */ return False; }
				1765	vassert(len <= 6);
				1766	vassert(M >= (1 << len));
				1767
				1768	vassert(len >= 1 && len <= 6);
				1769	ULong levels = // (zeroes(6 - len) << (6-len)) \| ones(len);
				1770	(1 << len) - 1;
				1771	vassert(levels >= 1 && levels <= 63);
				1772
				1773	if (immediate && ((imms & levels) == levels)) {
				1774	/* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
				1775	return False;
				1776	}
				1777
				1778	ULong S = imms & levels;
				1779	ULong R = immr & levels;
				1780	Int diff = S - R;
				1781	diff &= 63;
				1782	Int esize = 1 << len;
				1783	vassert(2 <= esize && esize <= 64);
				1784
				1785	/* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
				1786	same below with d. S can be 63 in which case we have an out of
				1787	range and hence undefined shift. */
				1788	vassert(S >= 0 && S <= 63);
				1789	vassert(esize >= (S+1));
				1790	ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
				1791	//(1ULL << (S+1)) - 1;
				1792	((1ULL << S) - 1) + (1ULL << S);
				1793
				1794	Int d = // diff<len-1:0>
				1795	diff & ((1 << len)-1);
				1796	vassert(esize >= (d+1));
				1797	vassert(d >= 0 && d <= 63);
				1798
				1799	ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
				1800	//(1ULL << (d+1)) - 1;
				1801	((1ULL << d) - 1) + (1ULL << d);
				1802
				1803	if (esize != 64) vassert(elem_s < (1ULL << esize));
				1804	if (esize != 64) vassert(elem_d < (1ULL << esize));
				1805
				1806	if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
				1807	if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
				1808
				1809	return True;
				1810	}
				1811
				1812
				1813	static
				1814	Bool dis_ARM64_data_processing_immediate(/MB_OUT/DisResult* dres,
				1815	UInt insn)
				1816	{
				1817	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				1818
				1819	/* insn[28:23]
				1820	10000x PC-rel addressing
				1821	10001x Add/subtract (immediate)
				1822	100100 Logical (immediate)
				1823	100101 Move Wide (immediate)
				1824	100110 Bitfield
				1825	100111 Extract
				1826	*/
				1827
				1828	/* ------------------ ADD/SUB{,S} imm12 ------------------ */
				1829	if (INSN(28,24) == BITS5(1,0,0,0,1)) {
				1830	Bool is64 = INSN(31,31) == 1;
				1831	Bool isSub = INSN(30,30) == 1;
				1832	Bool setCC = INSN(29,29) == 1;
				1833	UInt sh = INSN(23,22);
				1834	UInt uimm12 = INSN(21,10);
				1835	UInt nn = INSN(9,5);
				1836	UInt dd = INSN(4,0);
				1837	const HChar* nm = isSub ? "sub" : "add";
				1838	if (sh >= 2) {
				1839	/* Invalid; fall through */
				1840	} else {
				1841	vassert(sh <= 1);
				1842	uimm12 <<= (12 * sh);
				1843	if (is64) {
				1844	IRTemp argL = newTemp(Ity_I64);
				1845	IRTemp argR = newTemp(Ity_I64);
				1846	IRTemp res = newTemp(Ity_I64);
				1847	assign(argL, getIReg64orSP(nn));
				1848	assign(argR, mkU64(uimm12));
				1849	assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
				1850	mkexpr(argL), mkexpr(argR)));
				1851	if (setCC) {
				1852	putIReg64orZR(dd, mkexpr(res));
				1853	setFlags_ADD_SUB(True/is64/, isSub, argL, argR);
				1854	DIP("%ss %s, %s, 0x%x\n",
				1855	nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
				1856	} else {
				1857	putIReg64orSP(dd, mkexpr(res));
				1858	DIP("%s %s, %s, 0x%x\n",
				1859	nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
				1860	}
				1861	} else {
				1862	IRTemp argL = newTemp(Ity_I32);
				1863	IRTemp argR = newTemp(Ity_I32);
				1864	IRTemp res = newTemp(Ity_I32);
				1865	assign(argL, getIReg32orSP(nn));
				1866	assign(argR, mkU32(uimm12));
				1867	assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
				1868	mkexpr(argL), mkexpr(argR)));
				1869	if (setCC) {
				1870	putIReg32orZR(dd, mkexpr(res));
				1871	setFlags_ADD_SUB(False/!is64/, isSub, argL, argR);
				1872	DIP("%ss %s, %s, 0x%x\n",
				1873	nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
				1874	} else {
				1875	putIReg32orSP(dd, mkexpr(res));
				1876	DIP("%s %s, %s, 0x%x\n",
				1877	nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
				1878	}
				1879	}
				1880	return True;
				1881	}
				1882	}
				1883
				1884	/* -------------------- ADR/ADRP -------------------- */
				1885	if (INSN(28,24) == BITS5(1,0,0,0,0)) {
				1886	UInt bP = INSN(31,31);
				1887	UInt immLo = INSN(30,29);
				1888	UInt immHi = INSN(23,5);
				1889	UInt rD = INSN(4,0);
				1890	ULong uimm = (immHi << 2) \| immLo;
				1891	ULong simm = sx_to_64(uimm, 21);
				1892	ULong val;
				1893	if (bP) {
				1894	val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
				1895	} else {
				1896	val = guest_PC_curr_instr + simm;
				1897	}
				1898	putIReg64orZR(rD, mkU64(val));
				1899	DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
				1900	return True;
				1901	}
				1902
				1903	/* -------------------- LOGIC(imm) -------------------- */
				1904	if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
				1905	/* 31 30 28 22 21 15 9 4
				1906	sf op 100100 N immr imms Rn Rd
				1907	op=00: AND Rd\|SP, Rn, #imm
				1908	op=01: ORR Rd\|SP, Rn, #imm
				1909	op=10: EOR Rd\|SP, Rn, #imm
				1910	op=11: ANDS Rd\|ZR, Rn, #imm
				1911	*/
				1912	Bool is64 = INSN(31,31) == 1;
				1913	UInt op = INSN(30,29);
				1914	UInt N = INSN(22,22);
				1915	UInt immR = INSN(21,16);
				1916	UInt immS = INSN(15,10);
				1917	UInt nn = INSN(9,5);
				1918	UInt dd = INSN(4,0);
				1919	ULong imm = 0;
				1920	Bool ok;
				1921	if (N == 1 && !is64)
				1922	goto after_logic_imm; /* not allowed; fall through */
				1923	ok = dbm_DecodeBitMasks(&imm, NULL,
				1924	N, immS, immR, True, is64 ? 64 : 32);
				1925	if (!ok)
				1926	goto after_logic_imm;
				1927
				1928	const HChar* names[4] = { "and", "orr", "eor", "ands" };
				1929	const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
				1930	const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
				1931
				1932	vassert(op < 4);
				1933	if (is64) {
				1934	IRExpr* argL = getIReg64orZR(nn);
				1935	IRExpr* argR = mkU64(imm);
				1936	IRTemp res = newTemp(Ity_I64);
				1937	assign(res, binop(ops64[op], argL, argR));
				1938	if (op < 3) {
				1939	putIReg64orSP(dd, mkexpr(res));
				1940	DIP("%s %s, %s, 0x%llx\n", names[op],
				1941	nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
				1942	} else {
				1943	putIReg64orZR(dd, mkexpr(res));
				1944	setFlags_LOGIC(True/is64/, res);
				1945	DIP("%s %s, %s, 0x%llx\n", names[op],
				1946	nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
				1947	}
				1948	} else {
				1949	IRExpr* argL = getIReg32orZR(nn);
				1950	IRExpr* argR = mkU32((UInt)imm);
				1951	IRTemp res = newTemp(Ity_I32);
				1952	assign(res, binop(ops32[op], argL, argR));
				1953	if (op < 3) {
				1954	putIReg32orSP(dd, mkexpr(res));
				1955	DIP("%s %s, %s, 0x%x\n", names[op],
				1956	nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
				1957	} else {
				1958	putIReg32orZR(dd, mkexpr(res));
				1959	setFlags_LOGIC(False/!is64/, res);
				1960	DIP("%s %s, %s, 0x%x\n", names[op],
				1961	nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
				1962	}
				1963	}
				1964	return True;
				1965	}
				1966	after_logic_imm:
				1967
				1968	/* -------------------- MOV{Z,N,K} -------------------- */
				1969	if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
				1970	/* 31 30 28 22 20 4
				1971	\| \| \| \| \| \|
				1972	sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
				1973	sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
				1974	sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
				1975	*/
				1976	Bool is64 = INSN(31,31) == 1;
				1977	UInt subopc = INSN(30,29);
				1978	UInt hw = INSN(22,21);
				1979	UInt imm16 = INSN(20,5);
				1980	UInt dd = INSN(4,0);
				1981	if (subopc == BITS2(0,1) \|\| (!is64 && hw >= 2)) {
				1982	/* invalid; fall through */
				1983	} else {
				1984	ULong imm64 = ((ULong)imm16) << (16 * hw);
				1985	if (!is64)
				1986	vassert(imm64 < 0x100000000ULL);
				1987	switch (subopc) {
				1988	case BITS2(1,0): // MOVZ
				1989	putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
				1990	DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
				1991	break;
				1992	case BITS2(0,0): // MOVN
				1993	imm64 = ~imm64;
				1994	if (!is64)
				1995	imm64 &= 0xFFFFFFFFULL;
				1996	putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
				1997	DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
				1998	break;
				1999	case BITS2(1,1): // MOVK
				2000	/* This is more complex. We are inserting a slice into
				2001	the destination register, so we need to have the old
				2002	value of it. */
				2003	if (is64) {
				2004	IRTemp old = newTemp(Ity_I64);
				2005	assign(old, getIReg64orZR(dd));
				2006	ULong mask = 0xFFFFULL << (16 * hw);
				2007	IRExpr* res
				2008	= binop(Iop_Or64,
				2009	binop(Iop_And64, mkexpr(old), mkU64(~mask)),
				2010	mkU64(imm64));
				2011	putIReg64orZR(dd, res);
				2012	DIP("movk %s, 0x%x, lsl %u\n",
				2013	nameIReg64orZR(dd), imm16, 16*hw);
				2014	} else {
				2015	IRTemp old = newTemp(Ity_I32);
				2016	assign(old, getIReg32orZR(dd));
				2017	vassert(hw <= 1);
				2018	UInt mask = 0xFFFF << (16 * hw);
				2019	IRExpr* res
				2020	= binop(Iop_Or32,
				2021	binop(Iop_And32, mkexpr(old), mkU32(~mask)),
				2022	mkU32((UInt)imm64));
				2023	putIReg32orZR(dd, res);
				2024	DIP("movk %s, 0x%x, lsl %u\n",
				2025	nameIReg32orZR(dd), imm16, 16*hw);
				2026	}
				2027	break;
				2028	default:
				2029	vassert(0);
				2030	}
				2031	return True;
				2032	}
				2033	}
				2034
				2035	/* -------------------- {U,S,}BFM -------------------- */
				2036	/* 30 28 22 21 15 9 4
				2037
				2038	sf 10 100110 N immr imms nn dd
				2039	UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
				2040	UBFM Xd, Xn, #immr, #imms when sf=1, N=1
				2041
				2042	sf 00 100110 N immr imms nn dd
				2043	SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
				2044	SBFM Xd, Xn, #immr, #imms when sf=1, N=1
				2045
				2046	sf 01 100110 N immr imms nn dd
				2047	BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
				2048	BFM Xd, Xn, #immr, #imms when sf=1, N=1
				2049	*/
				2050	if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
				2051	UInt sf = INSN(31,31);
				2052	UInt opc = INSN(30,29);
				2053	UInt N = INSN(22,22);
				2054	UInt immR = INSN(21,16);
				2055	UInt immS = INSN(15,10);
				2056	UInt nn = INSN(9,5);
				2057	UInt dd = INSN(4,0);
				2058	Bool inZero = False;
				2059	Bool extend = False;
				2060	const HChar* nm = "???";
				2061	/* skip invalid combinations */
				2062	switch (opc) {
				2063	case BITS2(0,0):
				2064	inZero = True; extend = True; nm = "sbfm"; break;
				2065	case BITS2(0,1):
				2066	inZero = False; extend = False; nm = "bfm"; break;
				2067	case BITS2(1,0):
				2068	inZero = True; extend = False; nm = "ubfm"; break;
				2069	case BITS2(1,1):
				2070	goto after_bfm; /* invalid */
				2071	default:
				2072	vassert(0);
				2073	}
				2074	if (sf == 1 && N != 1) goto after_bfm;
				2075	if (sf == 0 && (N != 0 \|\| ((immR >> 5) & 1) != 0
				2076	\|\| ((immS >> 5) & 1) != 0)) goto after_bfm;
				2077	ULong wmask = 0, tmask = 0;
				2078	Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
				2079	N, immS, immR, False, sf == 1 ? 64 : 32);
				2080	if (!ok) goto after_bfm; /* hmmm */
				2081
				2082	Bool is64 = sf == 1;
				2083	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2084
				2085	IRTemp dst = newTemp(ty);
				2086	IRTemp src = newTemp(ty);
				2087	IRTemp bot = newTemp(ty);
				2088	IRTemp top = newTemp(ty);
				2089	IRTemp res = newTemp(ty);
				2090	assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
				2091	assign(src, getIRegOrZR(is64, nn));
				2092	/* perform bitfield move on low bits */
				2093	assign(bot, binop(mkOR(ty),
				2094	binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
				2095	binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
				2096	mkU(ty, wmask))));
				2097	/* determine extension bits (sign, zero or dest register) */
				2098	assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
				2099	/* combine extension bits and result bits */
				2100	assign(res, binop(mkOR(ty),
				2101	binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
				2102	binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
				2103	putIRegOrZR(is64, dd, mkexpr(res));
				2104	DIP("%s %s, %s, immR=%u, immS=%u\n",
				2105	nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
				2106	return True;
				2107	}
				2108	after_bfm:
				2109
				2110	/* ---------------------- EXTR ---------------------- */
				2111	/* 30 28 22 20 15 9 4
				2112	1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
				2113	0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
				2114	*/
				2115	if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
				2116	Bool is64 = INSN(31,31) == 1;
				2117	UInt mm = INSN(20,16);
				2118	UInt imm6 = INSN(15,10);
				2119	UInt nn = INSN(9,5);
				2120	UInt dd = INSN(4,0);
				2121	Bool valid = True;
				2122	if (INSN(31,31) != INSN(22,22))
				2123	valid = False;
				2124	if (!is64 && imm6 >= 32)
				2125	valid = False;
				2126	if (!valid) goto after_extr;
				2127	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2128	IRTemp srcHi = newTemp(ty);
				2129	IRTemp srcLo = newTemp(ty);
				2130	IRTemp res = newTemp(ty);
				2131	assign(srcHi, getIRegOrZR(is64, nn));
				2132	assign(srcLo, getIRegOrZR(is64, mm));
				2133	if (imm6 == 0) {
				2134	assign(res, mkexpr(srcLo));
				2135	} else {
				2136	UInt szBits = 8 * sizeofIRType(ty);
				2137	vassert(imm6 > 0 && imm6 < szBits);
				2138	assign(res, binop(mkOR(ty),
				2139	binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
				2140	binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
				2141	}
				2142	putIRegOrZR(is64, dd, mkexpr(res));
				2143	DIP("extr %s, %s, %s, #%u\n",
				2144	nameIRegOrZR(is64,dd),
				2145	nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
				2146	return True;
				2147	}
				2148	after_extr:
				2149
				2150	vex_printf("ARM64 front end: data_processing_immediate\n");
				2151	return False;
				2152	# undef INSN
				2153	}
				2154
				2155
				2156	/------------------------------------------------------------/
				2157	/--- Data processing (register) instructions ---/
				2158	/------------------------------------------------------------/
				2159
				2160	static const HChar* nameSH ( UInt sh ) {
				2161	switch (sh) {
				2162	case 0: return "lsl";
				2163	case 1: return "lsr";
				2164	case 2: return "asr";
				2165	case 3: return "ror";
				2166	default: vassert(0);
				2167	}
				2168	}
				2169
				2170	/* Generate IR to get a register value, possibly shifted by an
				2171	immediate. Returns either a 32- or 64-bit temporary holding the
				2172	result. After the shift, the value can optionally be NOT-ed
				2173	too.
				2174
				2175	sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
				2176	in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
				2177	isn't allowed, but it's the job of the caller to check that.
				2178	*/
				2179	static IRTemp getShiftedIRegOrZR ( Bool is64,
				2180	UInt sh_how, UInt sh_amt, UInt regNo,
				2181	Bool invert )
				2182	{
				2183	vassert(sh_how < 4);
				2184	vassert(sh_amt < (is64 ? 64 : 32));
				2185	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2186	IRTemp t0 = newTemp(ty);
				2187	assign(t0, getIRegOrZR(is64, regNo));
				2188	IRTemp t1 = newTemp(ty);
				2189	switch (sh_how) {
				2190	case BITS2(0,0):
				2191	assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
				2192	break;
				2193	case BITS2(0,1):
				2194	assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
				2195	break;
				2196	case BITS2(1,0):
				2197	assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
				2198	break;
				2199	case BITS2(1,1):
				2200	assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
				2201	break;
				2202	default:
				2203	vassert(0);
				2204	}
				2205	if (invert) {
				2206	IRTemp t2 = newTemp(ty);
				2207	assign(t2, unop(mkNOT(ty), mkexpr(t1)));
				2208	return t2;
				2209	} else {
				2210	return t1;
				2211	}
				2212	}
				2213
				2214
				2215	static
				2216	Bool dis_ARM64_data_processing_register(/MB_OUT/DisResult* dres,
				2217	UInt insn)
				2218	{
				2219	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				2220
				2221	/* ------------------- ADD/SUB(reg) ------------------- */
				2222	/* x==0 => 32 bit op x==1 => 64 bit op
				2223	sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
				2224
				2225	31 30 29 28 23 21 20 15 9 4
				2226	\| \| \| \| \| \| \| \| \| \|
				2227	x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
				2228	x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
				2229	x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
				2230	x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
				2231	*/
				2232	if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
				2233	UInt bX = INSN(31,31);
				2234	UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
				2235	UInt bS = INSN(29, 29); /* set flags? */
				2236	UInt sh = INSN(23,22);
				2237	UInt rM = INSN(20,16);
				2238	UInt imm6 = INSN(15,10);
				2239	UInt rN = INSN(9,5);
				2240	UInt rD = INSN(4,0);
				2241	Bool isSUB = bOP == 1;
				2242	Bool is64 = bX == 1;
				2243	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2244	if ((!is64 && imm6 > 31) \|\| sh == BITS2(1,1)) {
				2245	/* invalid; fall through */
				2246	} else {
				2247	IRTemp argL = newTemp(ty);
				2248	assign(argL, getIRegOrZR(is64, rN));
				2249	IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
				2250	IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
				2251	IRTemp res = newTemp(ty);
				2252	assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
				2253	if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
				2254	if (bS) {
				2255	setFlags_ADD_SUB(is64, isSUB, argL, argR);
				2256	}
				2257	DIP("%s%s %s, %s, %s, %s #%u\n",
				2258	bOP ? "sub" : "add", bS ? "s" : "",
				2259	nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
				2260	nameIRegOrZR(is64, rM), nameSH(sh), imm6);
				2261	return True;
				2262	}
				2263	}
				2264
				2265	/* -------------------- LOGIC(reg) -------------------- */
				2266	/* x==0 => 32 bit op x==1 => 64 bit op
				2267	N==0 => inv? is no-op (no inversion)
				2268	N==1 => inv? is NOT
				2269	sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
				2270
				2271	31 30 28 23 21 20 15 9 4
				2272	\| \| \| \| \| \| \| \| \|
				2273	x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
				2274	x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
				2275	x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
				2276	x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
				2277	With N=1, the names are: BIC ORN EON BICS
				2278	*/
				2279	if (INSN(28,24) == BITS5(0,1,0,1,0)) {
				2280	UInt bX = INSN(31,31);
				2281	UInt sh = INSN(23,22);
				2282	UInt bN = INSN(21,21);
				2283	UInt rM = INSN(20,16);
				2284	UInt imm6 = INSN(15,10);
				2285	UInt rN = INSN(9,5);
				2286	UInt rD = INSN(4,0);
				2287	Bool is64 = bX == 1;
				2288	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2289	if (!is64 && imm6 > 31) {
				2290	/* invalid; fall though */
				2291	} else {
				2292	IRTemp argL = newTemp(ty);
				2293	assign(argL, getIRegOrZR(is64, rN));
				2294	IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
				2295	IROp op = Iop_INVALID;
				2296	switch (INSN(30,29)) {
				2297	case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
				2298	case BITS2(0,1): op = mkOR(ty); break;
				2299	case BITS2(1,0): op = mkXOR(ty); break;
				2300	default: vassert(0);
				2301	}
				2302	IRTemp res = newTemp(ty);
				2303	assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
				2304	if (INSN(30,29) == BITS2(1,1)) {
				2305	setFlags_LOGIC(is64, res);
				2306	}
				2307	putIRegOrZR(is64, rD, mkexpr(res));
				2308
				2309	static const HChar* names_op[8]
				2310	= { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
				2311	vassert(((bN << 2) \| INSN(30,29)) < 8);
				2312	const HChar* nm_op = names_op[(bN << 2) \| INSN(30,29)];
				2313	/* Special-case the printing of "MOV" */
				2314	if (rN == 31/zr/ && sh == 0/LSL/ && imm6 == 0 && bN == 0) {
				2315	DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
				2316	nameIRegOrZR(is64, rM));
				2317	} else {
				2318	DIP("%s %s, %s, %s, %s #%u\n", nm_op,
				2319	nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
				2320	nameIRegOrZR(is64, rM), nameSH(sh), imm6);
				2321	}
				2322	return True;
				2323	}
				2324	}
				2325
				2326	/* -------------------- {U,S}MULH -------------------- */
				2327	/* 31 23 22 20 15 9 4
				2328	10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
				2329	10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
				2330	*/
				2331	if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
				2332	&& INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)
				2333	&& INSN(23,23) == 1/ATC/) {
				2334	Bool isU = INSN(23,23) == 1;
				2335	UInt mm = INSN(20,16);
				2336	UInt nn = INSN(9,5);
				2337	UInt dd = INSN(4,0);
				2338	putIReg64orZR(dd, unop(Iop_128HIto64,
				2339	binop(isU ? Iop_MullU64 : Iop_MullS64,
				2340	getIReg64orZR(nn), getIReg64orZR(mm))));
				2341	DIP("%cmulh %s, %s, %s\n",
				2342	isU ? 'u' : 's',
				2343	nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
				2344	return True;
				2345	}
				2346
				2347	/* -------------------- M{ADD,SUB} -------------------- */
				2348	/* 31 30 20 15 14 9 4
				2349	sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
				2350	sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
				2351	*/
				2352	if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
				2353	Bool is64 = INSN(31,31) == 1;
				2354	UInt mm = INSN(20,16);
				2355	Bool isAdd = INSN(15,15) == 0;
				2356	UInt aa = INSN(14,10);
				2357	UInt nn = INSN(9,5);
				2358	UInt dd = INSN(4,0);
				2359	if (is64) {
				2360	putIReg64orZR(
				2361	dd,
				2362	binop(isAdd ? Iop_Add64 : Iop_Sub64,
				2363	getIReg64orZR(aa),
				2364	binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
				2365	} else {
				2366	putIReg32orZR(
				2367	dd,
				2368	binop(isAdd ? Iop_Add32 : Iop_Sub32,
				2369	getIReg32orZR(aa),
				2370	binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
				2371	}
				2372	DIP("%s %s, %s, %s, %s\n",
				2373	isAdd ? "madd" : "msub",
				2374	nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
				2375	nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
				2376	return True;
				2377	}
				2378
				2379	/* ---------------- CS{EL,INC,INV,NEG} ---------------- */
				2380	/* 31 30 28 20 15 11 9 4
				2381	sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
				2382	sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
				2383	sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
				2384	sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
				2385	In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
				2386	*/
				2387	if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
				2388	Bool is64 = INSN(31,31) == 1;
				2389	UInt b30 = INSN(30,30);
				2390	UInt mm = INSN(20,16);
				2391	UInt cond = INSN(15,12);
				2392	UInt b10 = INSN(10,10);
				2393	UInt nn = INSN(9,5);
				2394	UInt dd = INSN(4,0);
				2395	UInt op = (b30 << 1) \| b10; /* 00=id 01=inc 10=inv 11=neg */
				2396	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2397	IRExpr* argL = getIRegOrZR(is64, nn);
				2398	IRExpr* argR = getIRegOrZR(is64, mm);
				2399	switch (op) {
				2400	case BITS2(0,0):
				2401	break;
				2402	case BITS2(0,1):
				2403	argR = binop(mkADD(ty), argR, mkU(ty,1));
				2404	break;
				2405	case BITS2(1,0):
				2406	argR = unop(mkNOT(ty), argR);
				2407	break;
				2408	case BITS2(1,1):
				2409	argR = binop(mkSUB(ty), mkU(ty,0), argR);
				2410	break;
				2411	default:
				2412	vassert(0);
				2413	}
				2414	putIRegOrZR(
				2415	is64, dd,
				2416	IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
				2417	argL, argR)
				2418	);
				2419	const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
				2420	DIP("%s %s, %s, %s, %s\n", op_nm[op],
				2421	nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
				2422	nameIRegOrZR(is64, mm), nameCC(cond));
				2423	return True;
				2424	}
				2425
				2426	/* -------------- ADD/SUB(extended reg) -------------- */
				2427	/* 28 20 15 12 9 4
				2428	000 01011 00 1 m opt imm3 n d ADD Wd\|SP, Wn\|SP, Wm ext&lsld
				2429	100 01011 00 1 m opt imm3 n d ADD Xd\|SP, Xn\|SP, Rm ext&lsld
				2430
				2431	001 01011 00 1 m opt imm3 n d ADDS Wd, Wn\|SP, Wm ext&lsld
				2432	101 01011 00 1 m opt imm3 n d ADDS Xd, Xn\|SP, Rm ext&lsld
				2433
				2434	010 01011 00 1 m opt imm3 n d SUB Wd\|SP, Wn\|SP, Wm ext&lsld
				2435	110 01011 00 1 m opt imm3 n d SUB Xd\|SP, Xn\|SP, Rm ext&lsld
				2436
				2437	011 01011 00 1 m opt imm3 n d SUBS Wd, Wn\|SP, Wm ext&lsld
				2438	111 01011 00 1 m opt imm3 n d SUBS Xd, Xn\|SP, Rm ext&lsld
				2439
				2440	The 'm' operand is extended per opt, thusly:
				2441
				2442	000 Xm & 0xFF UXTB
				2443	001 Xm & 0xFFFF UXTH
				2444	010 Xm & (2^32)-1 UXTW
				2445	011 Xm UXTX
				2446
				2447	100 Xm sx from bit 7 SXTB
				2448	101 Xm sx from bit 15 SXTH
				2449	110 Xm sx from bit 31 SXTW
				2450	111 Xm SXTX
				2451
				2452	In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
				2453	operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
				2454	are the identity operation on Wm.
				2455
				2456	After extension, the value is shifted left by imm3 bits, which
				2457	may only be in the range 0 .. 4 inclusive.
				2458	*/
				2459	if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
				2460	Bool is64 = INSN(31,31) == 1;
				2461	Bool isSub = INSN(30,30) == 1;
				2462	Bool setCC = INSN(29,29) == 1;
				2463	UInt mm = INSN(20,16);
				2464	UInt opt = INSN(15,13);
				2465	UInt imm3 = INSN(12,10);
				2466	UInt nn = INSN(9,5);
				2467	UInt dd = INSN(4,0);
				2468	const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
				2469	"sxtb", "sxth", "sxtw", "sxtx" };
				2470	/* Do almost the same thing in the 32- and 64-bit cases. */
				2471	IRTemp xN = newTemp(Ity_I64);
				2472	IRTemp xM = newTemp(Ity_I64);
				2473	assign(xN, getIReg64orSP(nn));
				2474	assign(xM, getIReg64orZR(mm));
				2475	IRExpr* xMw = mkexpr(xM); /* "xM widened" */
				2476	Int shSX = 0;
				2477	/* widen Xm .. */
				2478	switch (opt) {
				2479	case BITS3(0,0,0): // UXTB
				2480	xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
				2481	case BITS3(0,0,1): // UXTH
				2482	xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
				2483	case BITS3(0,1,0): // UXTW -- noop for the 32bit case
				2484	if (is64) {
				2485	xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
				2486	}
				2487	break;
				2488	case BITS3(0,1,1): // UXTX -- always a noop
				2489	break;
				2490	case BITS3(1,0,0): // SXTB
				2491	shSX = 56; goto sxTo64;
				2492	case BITS3(1,0,1): // SXTH
				2493	shSX = 48; goto sxTo64;
				2494	case BITS3(1,1,0): // SXTW -- noop for the 32bit case
				2495	if (is64) {
				2496	shSX = 32; goto sxTo64;
				2497	}
				2498	break;
				2499	case BITS3(1,1,1): // SXTX -- always a noop
				2500	break;
				2501	sxTo64:
				2502	vassert(shSX >= 32);
				2503	xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
				2504	mkU8(shSX));
				2505	break;
				2506	default:
				2507	vassert(0);
				2508	}
				2509	/* and now shift */
				2510	IRTemp argL = xN;
				2511	IRTemp argR = newTemp(Ity_I64);
				2512	assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
				2513	IRTemp res = newTemp(Ity_I64);
				2514	assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
				2515	mkexpr(argL), mkexpr(argR)));
				2516	if (is64) {
				2517	if (setCC) {
				2518	putIReg64orZR(dd, mkexpr(res));
				2519	setFlags_ADD_SUB(True/is64/, isSub, argL, argR);
				2520	} else {
				2521	putIReg64orSP(dd, mkexpr(res));
				2522	}
				2523	} else {
				2524	if (setCC) {
				2525	IRTemp argL32 = newTemp(Ity_I32);
				2526	IRTemp argR32 = newTemp(Ity_I32);
				2527	putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
				2528	assign(argL32, unop(Iop_64to32, mkexpr(argL)));
				2529	assign(argR32, unop(Iop_64to32, mkexpr(argR)));
				2530	setFlags_ADD_SUB(False/!is64/, isSub, argL32, argR32);
				2531	} else {
				2532	putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
				2533	}
				2534	}
				2535	DIP("%s%s %s, %s, %s %s lsl %u\n",
				2536	isSub ? "sub" : "add", setCC ? "s" : "",
				2537	setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
				2538	nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
				2539	nameExt[opt], imm3);
				2540	return True;
				2541	}
				2542
				2543	/* ---------------- CCMP/CCMN(imm) ---------------- */
				2544	/* Bizarrely, these appear in the "data processing register"
				2545	category, even though they are operations against an
				2546	immediate. */
				2547	/* 31 29 20 15 11 9 3
				2548	sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
				2549	sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
				2550
				2551	Operation is:
				2552	(CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
				2553	(CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
				2554	*/
				2555	if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
				2556	&& INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
				2557	Bool is64 = INSN(31,31) == 1;
				2558	Bool isSUB = INSN(30,30) == 1;
				2559	UInt imm5 = INSN(20,16);
				2560	UInt cond = INSN(15,12);
				2561	UInt nn = INSN(9,5);
				2562	UInt nzcv = INSN(3,0);
				2563
				2564	IRTemp condT = newTemp(Ity_I1);
				2565	assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
				2566
				2567	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2568	IRTemp argL = newTemp(ty);
				2569	IRTemp argR = newTemp(ty);
				2570
				2571	if (is64) {
				2572	assign(argL, getIReg64orZR(nn));
				2573	assign(argR, mkU64(imm5));
				2574	} else {
				2575	assign(argL, getIReg32orZR(nn));
				2576	assign(argR, mkU32(imm5));
				2577	}
				2578	setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
				2579
				2580	DIP("ccm%c %s, #%u, #%u, %s\n",
				2581	isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
				2582	imm5, nzcv, nameCC(cond));
				2583	return True;
				2584	}
				2585
				2586	/* ---------------- CCMP/CCMN(reg) ---------------- */
				2587	/* 31 29 20 15 11 9 3
				2588	sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
				2589	sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
				2590	Operation is:
				2591	(CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
				2592	(CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
				2593	*/
				2594	if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
				2595	&& INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
				2596	Bool is64 = INSN(31,31) == 1;
				2597	Bool isSUB = INSN(30,30) == 1;
				2598	UInt mm = INSN(20,16);
				2599	UInt cond = INSN(15,12);
				2600	UInt nn = INSN(9,5);
				2601	UInt nzcv = INSN(3,0);
				2602
				2603	IRTemp condT = newTemp(Ity_I1);
				2604	assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
				2605
				2606	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2607	IRTemp argL = newTemp(ty);
				2608	IRTemp argR = newTemp(ty);
				2609
				2610	if (is64) {
				2611	assign(argL, getIReg64orZR(nn));
				2612	assign(argR, getIReg64orZR(mm));
				2613	} else {
				2614	assign(argL, getIReg32orZR(nn));
				2615	assign(argR, getIReg32orZR(mm));
				2616	}
				2617	setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
				2618
				2619	DIP("ccm%c %s, %s, #%u, %s\n",
				2620	isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
				2621	nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
				2622	return True;
				2623	}
				2624
				2625
				2626	/* -------------- REV/REV16/REV32/RBIT -------------- */
				2627	/* 31 30 28 20 15 11 9 4
				2628
				2629	1 10 11010110 00000 0000 11 n d REV Xd, Xn
				2630	0 10 11010110 00000 0000 10 n d REV Wd, Wn
				2631
				2632	1 10 11010110 00000 0000 01 n d REV16 Xd, Xn
				2633	0 10 11010110 00000 0000 01 n d REV16 Wd, Wn
				2634
				2635	1 10 11010110 00000 0000 10 n d REV32 Xd, Xn
				2636
				2637	1 10 11010110 00000 0000 00 n d RBIT Xd, Xn
				2638	0 10 11010110 00000 0000 00 n d RBIT Wd, Wn
				2639	*/
				2640	/* Only REV is currently implemented. */
				2641	if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
				2642	&& INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,0,1)
				2643	&& INSN(31,31) == INSN(10,10)) {
				2644	Bool is64 = INSN(31,31) == 1;
				2645	UInt nn = INSN(9,5);
				2646	UInt dd = INSN(4,0);
				2647	IRTemp src = newTemp(Ity_I64);
				2648	IRTemp dst = IRTemp_INVALID;
				2649	if (is64) {
				2650	assign(src, getIReg64orZR(nn));
				2651	dst = math_BSWAP64(src);
				2652	putIReg64orZR(dd, mkexpr(dst));
				2653	} else {
				2654	assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
				2655	dst = math_BSWAP64(src);
				2656	putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
				2657	}
				2658	DIP("rev %s, %s\n", nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
				2659	return True;
				2660	}
				2661
				2662	/* -------------------- CLZ/CLS -------------------- */
				2663	/* 30 28 24 20 15 9 4
				2664	sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
				2665	sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
				2666	*/
				2667	if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
				2668	&& INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
				2669	Bool is64 = INSN(31,31) == 1;
				2670	Bool isCLS = INSN(10,10) == 1;
				2671	UInt nn = INSN(9,5);
				2672	UInt dd = INSN(4,0);
				2673	IRTemp src = newTemp(Ity_I64);
				2674	IRTemp dst = newTemp(Ity_I64);
				2675	if (!isCLS) { // CLS not yet supported
				2676	if (is64) {
				2677	assign(src, getIReg64orZR(nn));
				2678	assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
				2679	mkU64(64),
				2680	unop(Iop_Clz64, mkexpr(src))));
				2681	putIReg64orZR(dd, mkexpr(dst));
				2682	} else {
				2683	assign(src, binop(Iop_Shl64,
				2684	unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
				2685	assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
				2686	mkU64(32),
				2687	unop(Iop_Clz64, mkexpr(src))));
				2688	putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
				2689	}
				2690	DIP("cl%c %s, %s\n",
				2691	isCLS ? 's' : 'z', nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
				2692	return True;
				2693	}
				2694	}
				2695
				2696	/* -------------------- LSLV/LSRV/ASRV -------------------- */
				2697	/* 30 28 20 15 11 9 4
				2698	sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
				2699	sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
				2700	sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
				2701	*/
				2702	if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
				2703	&& INSN(15,12) == BITS4(0,0,1,0) && INSN(11,10) < BITS2(1,1)) {
				2704	Bool is64 = INSN(31,31) == 1;
				2705	UInt mm = INSN(20,16);
				2706	UInt op = INSN(11,10);
				2707	UInt nn = INSN(9,5);
				2708	UInt dd = INSN(4,0);
				2709	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2710	IRTemp srcL = newTemp(ty);
				2711	IRTemp srcR = newTemp(Ity_I8);
				2712	IRTemp res = newTemp(ty);
				2713	IROp iop = Iop_INVALID;
				2714	assign(srcL, getIRegOrZR(is64, nn));
				2715	assign(srcR,
				2716	unop(Iop_64to8,
				2717	binop(Iop_And64,
				2718	getIReg64orZR(mm), mkU64(is64 ? 63 : 31))));
				2719	switch (op) {
				2720	case BITS2(0,0): iop = mkSHL(ty); break;
				2721	case BITS2(0,1): iop = mkSHR(ty); break;
				2722	case BITS2(1,0): iop = mkSAR(ty); break;
				2723	default: vassert(0);
				2724	}
				2725	assign(res, binop(iop, mkexpr(srcL), mkexpr(srcR)));
				2726	putIRegOrZR(is64, dd, mkexpr(res));
				2727	vassert(op < 3);
				2728	const HChar* names[3] = { "lslv", "lsrv", "asrv" };
				2729	DIP("%s %s, %s, %s\n",
				2730	names[op], nameIRegOrZR(is64,dd),
				2731	nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
				2732	return True;
				2733	}
				2734
				2735	/* -------------------- SDIV/UDIV -------------------- */
				2736	/* 30 28 20 15 10 9 4
				2737	sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
				2738	sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
				2739	*/
				2740	if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
				2741	&& INSN(15,11) == BITS5(0,0,0,0,1)) {
				2742	Bool is64 = INSN(31,31) == 1;
				2743	UInt mm = INSN(20,16);
				2744	Bool isS = INSN(10,10) == 1;
				2745	UInt nn = INSN(9,5);
				2746	UInt dd = INSN(4,0);
				2747	if (isS) {
				2748	putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
				2749	getIRegOrZR(is64, nn),
				2750	getIRegOrZR(is64, mm)));
				2751	} else {
				2752	putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
				2753	getIRegOrZR(is64, nn),
				2754	getIRegOrZR(is64, mm)));
				2755	}
				2756	DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
				2757	nameIRegOrZR(is64, dd),
				2758	nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
				2759	return True;
				2760	}
				2761
				2762	/* ------------------ {S,U}M{ADD,SUB}L ------------------ */
				2763	/* 31 23 20 15 14 9 4
				2764	1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
				2765	1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
				2766	1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
				2767	1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
				2768	with operation
				2769	Xd = Xa +/- (Wn *u/s Wm)
				2770	*/
				2771	if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
				2772	Bool isU = INSN(23,23) == 1;
				2773	UInt mm = INSN(20,16);
				2774	Bool isAdd = INSN(15,15) == 0;
				2775	UInt aa = INSN(14,10);
				2776	UInt nn = INSN(9,5);
				2777	UInt dd = INSN(4,0);
				2778	IRTemp wN = newTemp(Ity_I32);
				2779	IRTemp wM = newTemp(Ity_I32);
				2780	IRTemp xA = newTemp(Ity_I64);
				2781	IRTemp muld = newTemp(Ity_I64);
				2782	IRTemp res = newTemp(Ity_I64);
				2783	assign(wN, getIReg32orZR(nn));
				2784	assign(wM, getIReg32orZR(mm));
				2785	assign(xA, getIReg64orZR(aa));
				2786	assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
				2787	mkexpr(wN), mkexpr(wM)));
				2788	assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
				2789	mkexpr(xA), mkexpr(muld)));
				2790	putIReg64orZR(dd, mkexpr(res));
				2791	DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
				2792	nameIReg64orZR(dd), nameIReg32orZR(nn),
				2793	nameIReg32orZR(mm), nameIReg64orZR(aa));
				2794	return True;
				2795	}
				2796	vex_printf("ARM64 front end: data_processing_register\n");
				2797	return False;
				2798	# undef INSN
				2799	}
				2800
				2801
				2802	/------------------------------------------------------------/
				2803	/--- Load and Store instructions ---/
				2804	/------------------------------------------------------------/
				2805
				2806	/* Generate the EA for a "reg + reg" style amode. This is done from
				2807	parts of the insn, but for sanity checking sake it takes the whole
				2808	insn. This appears to depend on insn[15:12], with opt=insn[15:13]
				2809	and S=insn[12]:
				2810
				2811	The possible forms, along with their opt:S values, are:
				2812	011:0 Xn\|SP + Xm
				2813	111:0 Xn\|SP + Xm
				2814	011:1 Xn\|SP + Xm * transfer_szB
				2815	111:1 Xn\|SP + Xm * transfer_szB
				2816	010:0 Xn\|SP + 32Uto64(Wm)
				2817	010:1 Xn\|SP + 32Uto64(Wm) * transfer_szB
				2818	110:0 Xn\|SP + 32Sto64(Wm)
				2819	110:1 Xn\|SP + 32Sto64(Wm) * transfer_szB
				2820
				2821	Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
				2822	the transfer size is insn[23,31,30]. For integer loads/stores,
				2823	insn[23] is zero, hence szLg2 can be at most 3 in such cases.
				2824
				2825	If the decoding fails, it returns IRTemp_INVALID.
				2826
				2827	isInt is True iff this is decoding is for transfers to/from integer
				2828	registers. If False it is for transfers to/from vector registers.
				2829	*/
				2830	static IRTemp gen_indexed_EA ( /OUT/HChar* buf, UInt insn, Bool isInt )
				2831	{
				2832	UInt optS = SLICE_UInt(insn, 15, 12);
				2833	UInt mm = SLICE_UInt(insn, 20, 16);
				2834	UInt nn = SLICE_UInt(insn, 9, 5);
				2835	UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
				2836	\| SLICE_UInt(insn, 31, 30); // Log2 of the size
				2837
				2838	buf[0] = 0;
				2839
				2840	/* Sanity checks, that this really is a load/store insn. */
				2841	if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
				2842	goto fail;
				2843
				2844	if (isInt
				2845	&& SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/LDR/
				2846	&& SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/STR/
				2847	&& SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/LDRSbhw Xt/
				2848	&& SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/LDRSbhw Wt/
				2849	goto fail;
				2850
				2851	if (!isInt
				2852	&& SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /LDR/STR/
				2853	goto fail;
				2854
				2855	/* Throw out non-verified but possibly valid cases. */
				2856	switch (szLg2) {
				2857	case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
				2858	case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
				2859	case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
				2860	case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
				2861	case BITS3(1,0,0): // can only ever be valid for the vector case
				2862	if (isInt) goto fail; else goto fail;
				2863	case BITS3(1,0,1): // these sizes are never valid
				2864	case BITS3(1,1,0):
				2865	case BITS3(1,1,1): goto fail;
				2866
				2867	default: vassert(0);
				2868	}
				2869
				2870	IRExpr* rhs = NULL;
				2871	switch (optS) {
				2872	case BITS4(1,1,1,0): goto fail; //ATC
				2873	case BITS4(0,1,1,0):
				2874	rhs = getIReg64orZR(mm);
				2875	vex_sprintf(buf, "[%s, %s]",
				2876	nameIReg64orZR(nn), nameIReg64orZR(mm));
				2877	break;
				2878	case BITS4(1,1,1,1): goto fail; //ATC
				2879	case BITS4(0,1,1,1):
				2880	rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
				2881	vex_sprintf(buf, "[%s, %s lsl %u]",
				2882	nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
				2883	break;
				2884	case BITS4(0,1,0,0):
				2885	rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
				2886	vex_sprintf(buf, "[%s, %s uxtx]",
				2887	nameIReg64orZR(nn), nameIReg32orZR(mm));
				2888	break;
				2889	case BITS4(0,1,0,1):
				2890	rhs = binop(Iop_Shl64,
				2891	unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
				2892	vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
				2893	nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
				2894	break;
				2895	case BITS4(1,1,0,0):
				2896	rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
				2897	vex_sprintf(buf, "[%s, %s sxtx]",
				2898	nameIReg64orZR(nn), nameIReg32orZR(mm));
				2899	break;
				2900	case BITS4(1,1,0,1):
				2901	rhs = binop(Iop_Shl64,
				2902	unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
				2903	vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
				2904	nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
				2905	break;
				2906	default:
				2907	/* The rest appear to be genuinely invalid */
				2908	goto fail;
				2909	}
				2910
				2911	vassert(rhs);
				2912	IRTemp res = newTemp(Ity_I64);
				2913	assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
				2914	return res;
				2915
				2916	fail:
				2917	vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
				2918	return IRTemp_INVALID;
				2919	}
				2920
				2921
				2922	/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
				2923	bits of DATAE :: Ity_I64. */
				2924	static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
				2925	{
				2926	IRExpr* addrE = mkexpr(addr);
				2927	switch (szB) {
				2928	case 8:
				2929	storeLE(addrE, dataE);
				2930	break;
				2931	case 4:
				2932	storeLE(addrE, unop(Iop_64to32, dataE));
				2933	break;
				2934	case 2:
				2935	storeLE(addrE, unop(Iop_64to16, dataE));
				2936	break;
				2937	case 1:
				2938	storeLE(addrE, unop(Iop_64to8, dataE));
				2939	break;
				2940	default:
				2941	vassert(0);
				2942	}
				2943	}
				2944
				2945
				2946	/* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
				2947	placing the result in an Ity_I64 temporary. */
				2948	static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
				2949	{
				2950	IRTemp res = newTemp(Ity_I64);
				2951	IRExpr* addrE = mkexpr(addr);
				2952	switch (szB) {
				2953	case 8:
				2954	assign(res, loadLE(Ity_I64,addrE));
				2955	break;
				2956	case 4:
				2957	assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
				2958	break;
				2959	case 2:
				2960	assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
				2961	break;
				2962	case 1:
				2963	assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
				2964	break;
				2965	default:
				2966	vassert(0);
				2967	}
				2968	return res;
				2969	}
				2970
				2971
				2972	static
				2973	Bool dis_ARM64_load_store(/MB_OUT/DisResult* dres, UInt insn)
				2974	{
				2975	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				2976
				2977	/* ------------ LDR,STR (immediate, uimm12) ----------- */
				2978	/* uimm12 is scaled by the transfer size
				2979
				2980	31 29 26 21 9 4
				2981	\| \| \| \| \| \|
				2982	11 111 00100 imm12 nn tt STR Xt, [Xn\|SP, #imm12 * 8]
				2983	11 111 00101 imm12 nn tt LDR Xt, [Xn\|SP, #imm12 * 8]
				2984
				2985	10 111 00100 imm12 nn tt STR Wt, [Xn\|SP, #imm12 * 4]
				2986	10 111 00101 imm12 nn tt LDR Wt, [Xn\|SP, #imm12 * 4]
				2987
				2988	01 111 00100 imm12 nn tt STRH Wt, [Xn\|SP, #imm12 * 2]
				2989	01 111 00101 imm12 nn tt LDRH Wt, [Xn\|SP, #imm12 * 2]
				2990
				2991	00 111 00100 imm12 nn tt STRB Wt, [Xn\|SP, #imm12 * 1]
				2992	00 111 00101 imm12 nn tt LDRB Wt, [Xn\|SP, #imm12 * 1]
				2993	*/
				2994	if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
				2995	UInt szLg2 = INSN(31,30);
				2996	UInt szB = 1 << szLg2;
				2997	Bool isLD = INSN(22,22) == 1;
				2998	UInt offs = INSN(21,10) * szB;
				2999	UInt nn = INSN(9,5);
				3000	UInt tt = INSN(4,0);
				3001	IRTemp ta = newTemp(Ity_I64);
				3002	assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
				3003	if (nn == 31) { /* FIXME generate stack alignment check */ }
				3004	vassert(szLg2 < 4);
				3005	if (isLD) {
				3006	putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
				3007	} else {
				3008	gen_narrowing_store(szB, ta, getIReg64orZR(tt));
				3009	}
				3010	const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
				3011	const HChar* st_name[4] = { "strb", "strh", "str", "str" };
				3012	DIP("%s %s, [%s, #%u]\n",
				3013	(isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
				3014	nameIReg64orSP(nn), offs);
				3015	return True;
				3016	}
				3017
				3018	/* ------------ LDUR,STUR (immediate, simm9) ----------- */
				3019	/*
				3020	31 29 26 20 11 9 4
				3021	\| \| \| \| \| \| \|
				3022	(at-Rn-then-Rn=EA) \| \| \|
				3023	sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn\|SP], #simm9
				3024	sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn\|SP], #simm9
				3025
				3026	(at-EA-then-Rn=EA)
				3027	sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn\|SP, #simm9]!
				3028	sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn\|SP, #simm9]!
				3029
				3030	(at-EA)
				3031	sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn\|SP, #simm9]
				3032	sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn\|SP, #simm9]
				3033
				3034	simm9 is unscaled.
				3035
				3036	The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
				3037	load case this is because would create two competing values for
				3038	Rt. In the store case the reason is unclear, but the spec
				3039	disallows it anyway.
				3040
				3041	Stores are narrowing, loads are unsigned widening. sz encodes
				3042	the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
				3043	*/
				3044	if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
				3045	== BITS9(1,1,1, 0,0,0,0,0, 0)) {
				3046	UInt szLg2 = INSN(31,30);
				3047	UInt szB = 1 << szLg2;
				3048	Bool isLoad = INSN(22,22) == 1;
				3049	UInt imm9 = INSN(20,12);
				3050	UInt nn = INSN(9,5);
				3051	UInt tt = INSN(4,0);
				3052	Bool wBack = INSN(10,10) == 1;
				3053	UInt how = INSN(11,10);
				3054	if (how == BITS2(1,0) \|\| (wBack && nn == tt && tt != 31)) {
				3055	/* undecodable; fall through */
				3056	} else {
				3057	if (nn == 31) { /* FIXME generate stack alignment check */ }
				3058
				3059	// Compute the transfer address TA and the writeback address WA.
				3060	IRTemp tRN = newTemp(Ity_I64);
				3061	assign(tRN, getIReg64orSP(nn));
				3062	IRTemp tEA = newTemp(Ity_I64);
				3063	Long simm9 = (Long)sx_to_64(imm9, 9);
				3064	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
				3065
				3066	IRTemp tTA = newTemp(Ity_I64);
				3067	IRTemp tWA = newTemp(Ity_I64);
				3068	switch (how) {
				3069	case BITS2(0,1):
				3070	assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
				3071	case BITS2(1,1):
				3072	assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
				3073	case BITS2(0,0):
				3074	assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
				3075	default:
				3076	vassert(0); /* NOTREACHED */
				3077	}
				3078
				3079	if (isLoad) {
				3080	putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
				3081	} else {
				3082	gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
				3083	}
				3084
				3085	if (wBack)
				3086	putIReg64orSP(nn, mkexpr(tEA));
				3087
				3088	const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
				3089	const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
				3090	const HChar* fmt_str = NULL;
				3091	switch (how) {
				3092	case BITS2(0,1):
				3093	fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
				3094	break;
				3095	case BITS2(1,1):
				3096	fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
				3097	break;
				3098	case BITS2(0,0):
				3099	fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
				3100	break;
				3101	default:
				3102	vassert(0);
				3103	}
				3104	DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
				3105	nameIRegOrZR(szB == 8, tt),
				3106	nameIReg64orSP(nn), simm9);
				3107	return True;
				3108	}
				3109	}
				3110
				3111	/* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
				3112	/* L==1 => mm==LD
				3113	L==0 => mm==ST
				3114	x==0 => 32 bit transfers, and zero extended loads
				3115	x==1 => 64 bit transfers
				3116	simm7 is scaled by the (single-register) transfer size
				3117
				3118	(at-Rn-then-Rn=EA)
				3119	x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn\|SP], #imm
				3120
				3121	(at-EA-then-Rn=EA)
				3122	x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn\|SP, #imm]!
				3123
				3124	(at-EA)
				3125	x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn\|SP, #imm]
				3126	*/
				3127
				3128	UInt insn_30_23 = INSN(30,23);
				3129	if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
				3130	\|\| insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
				3131	\|\| insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
				3132	UInt bL = INSN(22,22);
				3133	UInt bX = INSN(31,31);
				3134	UInt bWBack = INSN(23,23);
				3135	UInt rT1 = INSN(4,0);
				3136	UInt rN = INSN(9,5);
				3137	UInt rT2 = INSN(14,10);
				3138	Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
				3139	if ((bWBack && (rT1 == rN \|\| rT2 == rN) && rN != 31)
				3140	\|\| (bL && rT1 == rT2)) {
				3141	/* undecodable; fall through */
				3142	} else {
				3143	if (rN == 31) { /* FIXME generate stack alignment check */ }
				3144
				3145	// Compute the transfer address TA and the writeback address WA.
				3146	IRTemp tRN = newTemp(Ity_I64);
				3147	assign(tRN, getIReg64orSP(rN));
				3148	IRTemp tEA = newTemp(Ity_I64);
				3149	simm7 = (bX ? 8 : 4) * simm7;
				3150	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
				3151
				3152	IRTemp tTA = newTemp(Ity_I64);
				3153	IRTemp tWA = newTemp(Ity_I64);
				3154	switch (INSN(24,23)) {
				3155	case BITS2(0,1):
				3156	assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
				3157	case BITS2(1,1):
				3158	assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
				3159	case BITS2(1,0):
				3160	assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
				3161	default:
				3162	vassert(0); /* NOTREACHED */
				3163	}
				3164
				3165	/* Normally rN would be updated after the transfer. However, in
				3166	the special case typifed by
				3167	stp x29, x30, [sp,#-112]!
				3168	it is necessary to update SP before the transfer, (1)
				3169	because Memcheck will otherwise complain about a write
				3170	below the stack pointer, and (2) because the segfault
				3171	stack extension mechanism will otherwise extend the stack
				3172	only down to SP before the instruction, which might not be
				3173	far enough, if the -112 bit takes the actual access
				3174	address to the next page.
				3175	*/
				3176	Bool earlyWBack
				3177	= bWBack && simm7 < 0
				3178	&& INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
				3179
				3180	if (bWBack && earlyWBack)
				3181	putIReg64orSP(rN, mkexpr(tEA));
				3182
				3183	/**/ if (bL == 1 && bX == 1) {
				3184	// 64 bit load
				3185	putIReg64orZR(rT1, loadLE(Ity_I64,
				3186	binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
				3187	putIReg64orZR(rT2, loadLE(Ity_I64,
				3188	binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
				3189	} else if (bL == 1 && bX == 0) {
				3190	vassert(0); //ATC
				3191	// 32 bit load
				3192	putIReg32orZR(rT1, loadLE(Ity_I32,
				3193	binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
				3194	putIReg32orZR(rT2, loadLE(Ity_I32,
				3195	binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
				3196	} else if (bL == 0 && bX == 1) {
				3197	// 64 bit store
				3198	storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
				3199	getIReg64orZR(rT1));
				3200	storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
				3201	getIReg64orZR(rT2));
				3202	} else {
				3203	vassert(bL == 0 && bX == 0);
				3204	vassert(0); //ATC
				3205	// 32 bit store
				3206	storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
				3207	getIReg32orZR(rT1));
				3208	storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
				3209	getIReg32orZR(rT2));
				3210	}
				3211
				3212	if (bWBack && !earlyWBack)
				3213	putIReg64orSP(rN, mkexpr(tEA));
				3214
				3215	const HChar* fmt_str = NULL;
				3216	switch (INSN(24,23)) {
				3217	case BITS2(0,1):
				3218	fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
				3219	break;
				3220	case BITS2(1,1):
				3221	fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
				3222	break;
				3223	case BITS2(1,0):
				3224	fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
				3225	break;
				3226	default:
				3227	vassert(0);
				3228	}
				3229	DIP(fmt_str, bL == 0 ? "st" : "ld",
				3230	nameIRegOrZR(bX == 1, rT1),
				3231	nameIRegOrZR(bX == 1, rT2),
				3232	nameIReg64orSP(rN), simm7);
				3233	return True;
				3234	}
				3235	}
				3236
				3237	/* ---------------- LDR (literal, int reg) ---------------- */
				3238	/* 31 29 23 4
				3239	00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
				3240	01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
				3241	10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
				3242	11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
				3243	Just handles the first two cases for now.
				3244	*/
				3245	if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
				3246	UInt imm19 = INSN(23,5);
				3247	UInt rT = INSN(4,0);
				3248	UInt bX = INSN(30,30);
				3249	ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
				3250	if (bX) {
				3251	putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
				3252	} else {
				3253	putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
				3254	}
				3255	DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
				3256	return True;
				3257	}
				3258
				3259	/* -------------- {LD,ST}R (integer register) --------------- */
				3260	/* 31 29 20 15 12 11 9 4
				3261	\| \| \| \| \| \| \| \|
				3262	11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn\|SP, R<m>{ext/sh}]
				3263	10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn\|SP, R<m>{ext/sh}]
				3264	01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn\|SP, R<m>{ext/sh}]
				3265	00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn\|SP, R<m>{ext/sh}]
				3266
				3267	11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn\|SP, R<m>{ext/sh}]
				3268	10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn\|SP, R<m>{ext/sh}]
				3269	01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn\|SP, R<m>{ext/sh}]
				3270	00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn\|SP, R<m>{ext/sh}]
				3271	*/
				3272	if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
				3273	&& INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
				3274	HChar dis_buf[64];
				3275	UInt szLg2 = INSN(31,30);
				3276	Bool isLD = INSN(22,22) == 1;
				3277	UInt tt = INSN(4,0);
				3278	IRTemp ea = gen_indexed_EA(dis_buf, insn, True/to/from int regs/);
				3279	if (ea != IRTemp_INVALID) {
				3280	switch (szLg2) {
				3281	case 3: /* 64 bit */
				3282	if (isLD) {
				3283	putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
				3284	DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
				3285	} else {
				3286	storeLE(mkexpr(ea), getIReg64orZR(tt));
				3287	DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
				3288	}
				3289	break;
				3290	case 2: /* 32 bit */
				3291	if (isLD) {
				3292	putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
				3293	DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
				3294	} else {
				3295	storeLE(mkexpr(ea), getIReg32orZR(tt));
				3296	DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
				3297	}
				3298	break;
				3299	case 1: /* 16 bit */
				3300	if (isLD) {
				3301	putIReg64orZR(tt, unop(Iop_16Uto64,
				3302	loadLE(Ity_I16, mkexpr(ea))));
				3303	DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
				3304	} else {
				3305	storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
				3306	DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
				3307	}
				3308	break;
				3309	case 0: /* 8 bit */
				3310	if (isLD) {
				3311	putIReg64orZR(tt, unop(Iop_8Uto64,
				3312	loadLE(Ity_I8, mkexpr(ea))));
				3313	DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
				3314	} else {
				3315	storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
				3316	DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
				3317	}
				3318	break;
				3319	default:
				3320	vassert(0);
				3321	}
				3322	return True;
				3323	}
				3324	}
				3325
				3326	/* -------------- LDRS{B,H,W} (uimm12) -------------- */
				3327	/* 31 29 26 23 21 9 4
				3328	10 111 001 10 imm12 n t LDRSW Xt, [Xn\|SP, #pimm12 * 4]
				3329	01 111 001 1x imm12 n t LDRSH Rt, [Xn\|SP, #pimm12 * 2]
				3330	00 111 001 1x imm12 n t LDRSB Rt, [Xn\|SP, #pimm12 * 1]
				3331	where
				3332	Rt is Wt when x==1, Xt when x==0
				3333	*/
				3334	if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
				3335	/* Further checks on bits 31:30 and 22 */
				3336	Bool valid = False;
				3337	switch ((INSN(31,30) << 1) \| INSN(22,22)) {
				3338	case BITS3(1,0,0):
				3339	case BITS3(0,1,0): case BITS3(0,1,1):
				3340	case BITS3(0,0,0): case BITS3(0,0,1):
				3341	valid = True;
				3342	break;
				3343	}
				3344	if (valid) {
				3345	UInt szLg2 = INSN(31,30);
				3346	UInt bitX = INSN(22,22);
				3347	UInt imm12 = INSN(21,10);
				3348	UInt nn = INSN(9,5);
				3349	UInt tt = INSN(4,0);
				3350	UInt szB = 1 << szLg2;
				3351	IRExpr* ea = binop(Iop_Add64,
				3352	getIReg64orSP(nn), mkU64(imm12 * szB));
				3353	switch (szB) {
				3354	case 4:
				3355	vassert(bitX == 0);
				3356	putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
				3357	DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
				3358	nameIReg64orSP(nn), imm12 * szB);
				3359	break;
				3360	case 2:
				3361	if (bitX == 1) {
				3362	putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
				3363	} else {
				3364	putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
				3365	}
				3366	DIP("ldrsh %s, [%s, #%u]\n",
				3367	nameIRegOrZR(bitX == 0, tt),
				3368	nameIReg64orSP(nn), imm12 * szB);
				3369	break;
				3370	case 1:
				3371	if (bitX == 1) {
				3372	putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
				3373	} else {
				3374	putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
				3375	}
				3376	DIP("ldrsb %s, [%s, #%u]\n",
				3377	nameIRegOrZR(bitX == 0, tt),
				3378	nameIReg64orSP(nn), imm12 * szB);
				3379	break;
				3380	default:
				3381	vassert(0);
				3382	}
				3383	return True;
				3384	}
				3385	/* else fall through */
				3386	}
				3387
				3388	/* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
				3389	/* (at-Rn-then-Rn=EA)
				3390	31 29 23 21 20 11 9 4
				3391	00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn\|SP], #simm9
				3392	01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn\|SP], #simm9
				3393	10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn\|SP], #simm9
				3394
				3395	(at-EA-then-Rn=EA)
				3396	00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn\|SP, #simm9]!
				3397	01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn\|SP, #simm9]!
				3398	10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn\|SP, #simm9]!
				3399	where
				3400	Rt is Wt when x==1, Xt when x==0
				3401	transfer-at-Rn when [11]==0, at EA when [11]==1
				3402	*/
				3403	if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
				3404	&& INSN(21,21) == 0 && INSN(10,10) == 1) {
				3405	/* Further checks on bits 31:30 and 22 */
				3406	Bool valid = False;
				3407	switch ((INSN(31,30) << 1) \| INSN(22,22)) {
				3408	case BITS3(1,0,0): // LDRSW Xt
				3409	case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
				3410	case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
				3411	valid = True;
				3412	break;
				3413	}
				3414	if (valid) {
				3415	UInt szLg2 = INSN(31,30);
				3416	UInt imm9 = INSN(20,12);
				3417	Bool atRN = INSN(11,11) == 0;
				3418	UInt nn = INSN(9,5);
				3419	UInt tt = INSN(4,0);
				3420	IRTemp tRN = newTemp(Ity_I64);
				3421	IRTemp tEA = newTemp(Ity_I64);
				3422	IRTemp tTA = IRTemp_INVALID;
				3423	ULong simm9 = sx_to_64(imm9, 9);
				3424	Bool is64 = INSN(22,22) == 0;
				3425	assign(tRN, getIReg64orSP(nn));
				3426	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
				3427	tTA = atRN ? tRN : tEA;
				3428	HChar ch = '?';
				3429	/* There are 5 cases:
				3430	byte load, SX to 64
				3431	byte load, SX to 32, ZX to 64
				3432	halfword load, SX to 64
				3433	halfword load, SX to 32, ZX to 64
				3434	word load, SX to 64
				3435	The ifs below handle them in the listed order.
				3436	*/
				3437	if (szLg2 == 0) {
				3438	ch = 'b';
				3439	if (is64) {
				3440	putIReg64orZR(tt, unop(Iop_8Sto64,
				3441	loadLE(Ity_I8, mkexpr(tTA))));
				3442	} else {
				3443	putIReg32orZR(tt, unop(Iop_8Sto32,
				3444	loadLE(Ity_I8, mkexpr(tTA))));
				3445	}
				3446	}
				3447	else if (szLg2 == 1) {
				3448	ch = 'h';
				3449	if (is64) {
				3450	putIReg64orZR(tt, unop(Iop_16Sto64,
				3451	loadLE(Ity_I16, mkexpr(tTA))));
				3452	} else {
				3453	putIReg32orZR(tt, unop(Iop_16Sto32,
				3454	loadLE(Ity_I16, mkexpr(tTA))));
				3455	}
				3456	}
				3457	else if (szLg2 == 2 && is64) {
				3458	ch = 'w';
				3459	putIReg64orZR(tt, unop(Iop_32Sto64,
				3460	loadLE(Ity_I32, mkexpr(tTA))));
				3461	}
				3462	else {
				3463	vassert(0);
				3464	}
				3465	putIReg64orSP(nn, mkexpr(tEA));
				3466	DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
				3467	ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
				3468	return True;
				3469	}
				3470	/* else fall through */
				3471	}
				3472
				3473	/* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
				3474	/* 31 29 23 21 20 11 9 4
				3475	00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn\|SP, #simm9]
				3476	01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn\|SP, #simm9]
				3477	10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn\|SP, #simm9]
				3478	where
				3479	Rt is Wt when x==1, Xt when x==0
				3480	*/
				3481	if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
				3482	&& INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
				3483	/* Further checks on bits 31:30 and 22 */
				3484	Bool valid = False;
				3485	switch ((INSN(31,30) << 1) \| INSN(22,22)) {
				3486	case BITS3(1,0,0): // LDURSW Xt
				3487	case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
				3488	case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
				3489	valid = True;
				3490	break;
				3491	}
				3492	if (valid) {
				3493	UInt szLg2 = INSN(31,30);
				3494	UInt imm9 = INSN(20,12);
				3495	UInt nn = INSN(9,5);
				3496	UInt tt = INSN(4,0);
				3497	IRTemp tRN = newTemp(Ity_I64);
				3498	IRTemp tEA = newTemp(Ity_I64);
				3499	ULong simm9 = sx_to_64(imm9, 9);
				3500	Bool is64 = INSN(22,22) == 0;
				3501	assign(tRN, getIReg64orSP(nn));
				3502	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
				3503	HChar ch = '?';
				3504	/* There are 5 cases:
				3505	byte load, SX to 64
				3506	byte load, SX to 32, ZX to 64
				3507	halfword load, SX to 64
				3508	halfword load, SX to 32, ZX to 64
				3509	word load, SX to 64
				3510	The ifs below handle them in the listed order.
				3511	*/
				3512	if (szLg2 == 0) {
				3513	ch = 'b';
				3514	if (is64) {
				3515	putIReg64orZR(tt, unop(Iop_8Sto64,
				3516	loadLE(Ity_I8, mkexpr(tEA))));
				3517	} else {
				3518	putIReg32orZR(tt, unop(Iop_8Sto32,
				3519	loadLE(Ity_I8, mkexpr(tEA))));
				3520	}
				3521	}
				3522	else if (szLg2 == 1) {
				3523	ch = 'h';
				3524	if (is64) {
				3525	putIReg64orZR(tt, unop(Iop_16Sto64,
				3526	loadLE(Ity_I16, mkexpr(tEA))));
				3527	} else {
				3528	putIReg32orZR(tt, unop(Iop_16Sto32,
				3529	loadLE(Ity_I16, mkexpr(tEA))));
				3530	}
				3531	}
				3532	else if (szLg2 == 2 && is64) {
				3533	ch = 'w';
				3534	putIReg64orZR(tt, unop(Iop_32Sto64,
				3535	loadLE(Ity_I32, mkexpr(tEA))));
				3536	}
				3537	else {
				3538	vassert(0);
				3539	}
				3540	DIP("ldurs%c %s, [%s, #%lld]",
				3541	ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
				3542	return True;
				3543	}
				3544	/* else fall through */
				3545	}
				3546
				3547	/* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
				3548	/* L==1 => mm==LD
				3549	L==0 => mm==ST
				3550	sz==00 => 32 bit (S) transfers
				3551	sz==01 => 64 bit (D) transfers
				3552	sz==10 => 128 bit (Q) transfers
				3553	sz==11 isn't allowed
				3554	simm7 is scaled by the (single-register) transfer size
				3555
				3556	31 29 22 21 14 9 4
				3557	sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn\|SP], #imm
				3558	(at-Rn-then-Rn=EA)
				3559
				3560	sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn\|SP, #imm]!
				3561	(at-EA-then-Rn=EA)
				3562
				3563	sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn\|SP, #imm]
				3564	(at-EA)
				3565	*/
				3566
				3567	UInt insn_29_23 = INSN(29,23);
				3568	if (insn_29_23 == BITS7(1,0,1,1,0,0,1)
				3569	\|\| insn_29_23 == BITS7(1,0,1,1,0,1,1)
				3570	\|\| insn_29_23 == BITS7(1,0,1,1,0,1,0)) {
				3571	UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
				3572	Bool isLD = INSN(22,22) == 1;
				3573	Bool wBack = INSN(23,23) == 1;
				3574	Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
				3575	UInt tt2 = INSN(14,10);
				3576	UInt nn = INSN(9,5);
				3577	UInt tt1 = INSN(4,0);
				3578	if (szSlg2 == BITS2(1,1) \|\| (isLD && tt1 == tt2)) {
				3579	/* undecodable; fall through */
				3580	} else {
				3581	if (nn == 31) { /* FIXME generate stack alignment check */ }
				3582
				3583	// Compute the transfer address TA and the writeback address WA.
				3584	UInt szB = 4 << szSlg2; /* szB is the per-register size */
				3585	IRTemp tRN = newTemp(Ity_I64);
				3586	assign(tRN, getIReg64orSP(nn));
				3587	IRTemp tEA = newTemp(Ity_I64);
				3588	simm7 = szB * simm7;
				3589	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
				3590
				3591	IRTemp tTA = newTemp(Ity_I64);
				3592	IRTemp tWA = newTemp(Ity_I64);
				3593	switch (INSN(24,23)) {
				3594	case BITS2(0,1):
				3595	assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
				3596	case BITS2(1,1):
				3597	assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
				3598	case BITS2(1,0):
				3599	assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
				3600	default:
				3601	vassert(0); /* NOTREACHED */
				3602	}
				3603
				3604	IRType ty = Ity_INVALID;
				3605	switch (szB) {
				3606	case 4: ty = Ity_F32; break;
				3607	case 8: ty = Ity_F64; break;
				3608	case 16: ty = Ity_V128; break;
				3609	default: vassert(0);
				3610	}
				3611
				3612	if (isLD) {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3613	putQRegLO(tt1,
				3614	loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
				3615	putQRegLO(tt2,
				3616	loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3617	} else {
				3618	storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3619	getQRegLO(tt1, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3620	storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3621	getQRegLO(tt2, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3622	}
				3623
				3624	if (wBack)
				3625	putIReg64orSP(nn, mkexpr(tEA));
				3626
				3627	const HChar* fmt_str = NULL;
				3628	switch (INSN(24,23)) {
				3629	case BITS2(0,1):
				3630	fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
				3631	break;
				3632	case BITS2(1,1):
				3633	fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
				3634	break;
				3635	case BITS2(1,0):
				3636	fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
				3637	break;
				3638	default:
				3639	vassert(0);
				3640	}
				3641	DIP(fmt_str, isLD ? "ld" : "st",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3642	nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3643	nameIReg64orSP(nn), simm7);
				3644	return True;
				3645	}
				3646	}
				3647
				3648	/* -------------- {LD,ST}R (vector register) --------------- */
				3649	/* 31 29 23 20 15 12 11 9 4
				3650	\| \| \| \| \| \| \| \| \|
				3651	00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn\|SP, R<m>{ext/sh}]
				3652	01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn\|SP, R<m>{ext/sh}]
				3653	10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn\|SP, R<m>{ext/sh}]
				3654	11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn\|SP, R<m>{ext/sh}]
				3655	00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn\|SP, R<m>{ext/sh}]
				3656
				3657	00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn\|SP, R<m>{ext/sh}]
				3658	01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn\|SP, R<m>{ext/sh}]
				3659	10 111100 001 Rm option S 10 Rn Rt STR St, [Xn\|SP, R<m>{ext/sh}]
				3660	11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn\|SP, R<m>{ext/sh}]
				3661	00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn\|SP, R<m>{ext/sh}]
				3662	*/
				3663	if (INSN(29,24) == BITS6(1,1,1,1,0,0)
				3664	&& INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
				3665	HChar dis_buf[64];
				3666	UInt szLg2 = (INSN(23,23) << 2) \| INSN(31,30);
				3667	Bool isLD = INSN(22,22) == 1;
				3668	UInt tt = INSN(4,0);
				3669	if (szLg2 >= 4) goto after_LDR_STR_vector_register;
				3670	IRTemp ea = gen_indexed_EA(dis_buf, insn, False/to/from vec regs/);
				3671	if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
				3672	switch (szLg2) {
				3673	case 0: /* 8 bit */
				3674	if (isLD) {
				3675	putQReg128(tt, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3676	putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
				3677	DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3678	} else {
				3679	vassert(0); //ATC
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3680	storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
				3681	DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3682	}
				3683	break;
				3684	case 1:
				3685	if (isLD) {
				3686	putQReg128(tt, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3687	putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
				3688	DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3689	} else {
				3690	vassert(0); //ATC
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3691	storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
				3692	DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3693	}
				3694	break;
				3695	case 2: /* 32 bit */
				3696	if (isLD) {
				3697	putQReg128(tt, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3698	putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
				3699	DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3700	} else {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3701	storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
				3702	DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3703	}
				3704	break;
				3705	case 3: /* 64 bit */
				3706	if (isLD) {
				3707	putQReg128(tt, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3708	putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
				3709	DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3710	} else {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3711	storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
				3712	DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3713	}
				3714	break;
				3715	case 4: return False; //ATC
				3716	default: vassert(0);
				3717	}
				3718	return True;
				3719	}
				3720	after_LDR_STR_vector_register:
				3721
				3722	/* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
				3723	/* 31 29 22 20 15 12 11 9 4
				3724	\| \| \| \| \| \| \| \| \|
				3725	10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn\|SP, R<m>{ext/sh}]
				3726
				3727	01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn\|SP, R<m>{ext/sh}]
				3728	01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn\|SP, R<m>{ext/sh}]
				3729
				3730	00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn\|SP, R<m>{ext/sh}]
				3731	00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn\|SP, R<m>{ext/sh}]
				3732	*/
				3733	if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
				3734	&& INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
				3735	HChar dis_buf[64];
				3736	UInt szLg2 = INSN(31,30);
				3737	Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
				3738	UInt tt = INSN(4,0);
				3739	if (szLg2 == 3) goto after_LDRS_integer_register;
				3740	IRTemp ea = gen_indexed_EA(dis_buf, insn, True/to/from int regs/);
				3741	if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
				3742	/* Enumerate the 5 variants explicitly. */
				3743	if (szLg2 == 2/32 bit/ && sxTo64) {
				3744	putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
				3745	DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
				3746	return True;
				3747	}
				3748	else
				3749	if (szLg2 == 1/16 bit/) {
				3750	if (sxTo64) {
				3751	putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
				3752	DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
				3753	} else {
				3754	putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
				3755	DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
				3756	}
				3757	return True;
				3758	}
				3759	else
				3760	if (szLg2 == 0/8 bit/) {
				3761	if (sxTo64) {
				3762	putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
				3763	DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
				3764	} else {
				3765	putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
				3766	DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
				3767	}
				3768	return True;
				3769	}
				3770	/* else it's an invalid combination */
				3771	}
				3772	after_LDRS_integer_register:
				3773
				3774	/* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
				3775	/* This is the Unsigned offset variant only. The Post-Index and
				3776	Pre-Index variants are below.
				3777
				3778	31 29 23 21 9 4
				3779	00 111 101 01 imm12 n t LDR Bt, [Xn\|SP + imm12 * 1]
				3780	01 111 101 01 imm12 n t LDR Ht, [Xn\|SP + imm12 * 2]
				3781	10 111 101 01 imm12 n t LDR St, [Xn\|SP + imm12 * 4]
				3782	11 111 101 01 imm12 n t LDR Dt, [Xn\|SP + imm12 * 8]
				3783	00 111 101 11 imm12 n t LDR Qt, [Xn\|SP + imm12 * 16]
				3784
				3785	00 111 101 00 imm12 n t STR Bt, [Xn\|SP + imm12 * 1]
				3786	01 111 101 00 imm12 n t STR Ht, [Xn\|SP + imm12 * 2]
				3787	10 111 101 00 imm12 n t STR St, [Xn\|SP + imm12 * 4]
				3788	11 111 101 00 imm12 n t STR Dt, [Xn\|SP + imm12 * 8]
				3789	00 111 101 10 imm12 n t STR Qt, [Xn\|SP + imm12 * 16]
				3790	*/
				3791	if (INSN(29,24) == BITS6(1,1,1,1,0,1)
				3792	&& ((INSN(23,23) << 2) \| INSN(31,30)) <= 4) {
				3793	UInt szLg2 = (INSN(23,23) << 2) \| INSN(31,30);
				3794	Bool isLD = INSN(22,22) == 1;
				3795	UInt pimm12 = INSN(21,10) << szLg2;
				3796	UInt nn = INSN(9,5);
				3797	UInt tt = INSN(4,0);
				3798	IRTemp tEA = newTemp(Ity_I64);
				3799	IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
				3800	assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
				3801	if (isLD) {
				3802	if (szLg2 < 4) {
				3803	putQReg128(tt, mkV128(0x0000));
				3804	}
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3805	putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3806	} else {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3807	storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3808	}
				3809	DIP("%s %s, [%s, #%u]\n",
				3810	isLD ? "ldr" : "str",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3811	nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3812	return True;
				3813	}
				3814
				3815	/* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
				3816	/* These are the Post-Index and Pre-Index variants.
				3817
				3818	31 29 23 20 11 9 4
				3819	(at-Rn-then-Rn=EA)
				3820	00 111 100 01 0 imm9 01 n t LDR Bt, [Xn\|SP], #simm
				3821	01 111 100 01 0 imm9 01 n t LDR Ht, [Xn\|SP], #simm
				3822	10 111 100 01 0 imm9 01 n t LDR St, [Xn\|SP], #simm
				3823	11 111 100 01 0 imm9 01 n t LDR Dt, [Xn\|SP], #simm
				3824	00 111 100 11 0 imm9 01 n t LDR Qt, [Xn\|SP], #simm
				3825
				3826	(at-EA-then-Rn=EA)
				3827	00 111 100 01 0 imm9 11 n t LDR Bt, [Xn\|SP, #simm]!
				3828	01 111 100 01 0 imm9 11 n t LDR Ht, [Xn\|SP, #simm]!
				3829	10 111 100 01 0 imm9 11 n t LDR St, [Xn\|SP, #simm]!
				3830	11 111 100 01 0 imm9 11 n t LDR Dt, [Xn\|SP, #simm]!
				3831	00 111 100 11 0 imm9 11 n t LDR Qt, [Xn\|SP, #simm]!
				3832
				3833	Stores are the same except with bit 22 set to 0.
				3834	*/
				3835	if (INSN(29,24) == BITS6(1,1,1,1,0,0)
				3836	&& ((INSN(23,23) << 2) \| INSN(31,30)) <= 4
				3837	&& INSN(21,21) == 0 && INSN(10,10) == 1) {
				3838	UInt szLg2 = (INSN(23,23) << 2) \| INSN(31,30);
				3839	Bool isLD = INSN(22,22) == 1;
				3840	UInt imm9 = INSN(20,12);
				3841	Bool atRN = INSN(11,11) == 0;
				3842	UInt nn = INSN(9,5);
				3843	UInt tt = INSN(4,0);
				3844	IRTemp tRN = newTemp(Ity_I64);
				3845	IRTemp tEA = newTemp(Ity_I64);
				3846	IRTemp tTA = IRTemp_INVALID;
				3847	IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
				3848	ULong simm9 = sx_to_64(imm9, 9);
				3849	assign(tRN, getIReg64orSP(nn));
				3850	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
				3851	tTA = atRN ? tRN : tEA;
				3852	if (isLD) {
				3853	if (szLg2 < 4) {
				3854	putQReg128(tt, mkV128(0x0000));
				3855	}
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3856	putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3857	} else {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3858	storeLE(mkexpr(tTA), getQRegLO(tt, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3859	}
				3860	putIReg64orSP(nn, mkexpr(tEA));
				3861	DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
				3862	isLD ? "ldr" : "str",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3863	nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3864	return True;
				3865	}
				3866
				3867	/* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
				3868	/* 31 29 23 20 11 9 4
				3869	00 111 100 01 0 imm9 00 n t LDR Bt, [Xn\|SP, #simm]
				3870	01 111 100 01 0 imm9 00 n t LDR Ht, [Xn\|SP, #simm]
				3871	10 111 100 01 0 imm9 00 n t LDR St, [Xn\|SP, #simm]
				3872	11 111 100 01 0 imm9 00 n t LDR Dt, [Xn\|SP, #simm]
				3873	00 111 100 11 0 imm9 00 n t LDR Qt, [Xn\|SP, #simm]
				3874
				3875	00 111 100 00 0 imm9 00 n t STR Bt, [Xn\|SP, #simm]
				3876	01 111 100 00 0 imm9 00 n t STR Ht, [Xn\|SP, #simm]
				3877	10 111 100 00 0 imm9 00 n t STR St, [Xn\|SP, #simm]
				3878	11 111 100 00 0 imm9 00 n t STR Dt, [Xn\|SP, #simm]
				3879	00 111 100 10 0 imm9 00 n t STR Qt, [Xn\|SP, #simm]
				3880	*/
				3881	if (INSN(29,24) == BITS6(1,1,1,1,0,0)
				3882	&& ((INSN(23,23) << 2) \| INSN(31,30)) <= 4
				3883	&& INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
				3884	UInt szLg2 = (INSN(23,23) << 2) \| INSN(31,30);
				3885	Bool isLD = INSN(22,22) == 1;
				3886	UInt imm9 = INSN(20,12);
				3887	UInt nn = INSN(9,5);
				3888	UInt tt = INSN(4,0);
				3889	ULong simm9 = sx_to_64(imm9, 9);
				3890	IRTemp tEA = newTemp(Ity_I64);
				3891	IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
				3892	assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
				3893	if (isLD) {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3894	if (szLg2 < 4) {
				3895	putQReg128(tt, mkV128(0x0000));
				3896	}
				3897	putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3898	} else {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3899	storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3900	}
				3901	DIP("%s %s, [%s, #%lld]\n",
				3902	isLD ? "ldur" : "stur",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3903	nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3904	return True;
				3905	}
				3906
				3907	/* ---------------- LDR (literal, SIMD&FP) ---------------- */
				3908	/* 31 29 23 4
				3909	00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
				3910	01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
				3911	10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
				3912	*/
				3913	if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
				3914	UInt szB = 4 << INSN(31,30);
				3915	UInt imm19 = INSN(23,5);
				3916	UInt tt = INSN(4,0);
				3917	ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
				3918	IRType ty = preferredVectorSubTypeFromSize(szB);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3919	putQReg128(tt, mkV128(0x0000));
				3920	putQRegLO(tt, loadLE(ty, mkU64(ea)));
				3921	DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3922	return True;
				3923	}
				3924
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3925	/* ---------- LD1/ST1 (single structure, no offset) ---------- */
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3926	/* 31 23
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3927	0100 1100 0100 0000 0111 11 N T LD1 {vT.2d}, [Xn\|SP]
				3928	0100 1100 0000 0000 0111 11 N T ST1 {vT.2d}, [Xn\|SP]
				3929	0100 1100 0100 0000 0111 10 N T LD1 {vT.4s}, [Xn\|SP]
				3930	0100 1100 0000 0000 0111 10 N T ST1 {vT.4s}, [Xn\|SP]
				3931	0100 1100 0100 0000 0111 01 N T LD1 {vT.8h}, [Xn\|SP]
				3932	0100 1100 0000 0000 0111 01 N T ST1 {vT.8h}, [Xn\|SP]
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3933	0100 1100 0100 0000 0111 00 N T LD1 {vT.16b}, [Xn\|SP]
				3934	0100 1100 0000 0000 0111 00 N T ST1 {vT.16b}, [Xn\|SP]
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3935	FIXME does this assume that the host is little endian?
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3936	*/
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3937	if ( (insn & 0xFFFFF000) == 0x4C407000 // LD1 cases
				3938	\|\| (insn & 0xFFFFF000) == 0x4C007000 // ST1 cases
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3939	) {
				3940	Bool isLD = INSN(22,22) == 1;
				3941	UInt rN = INSN(9,5);
				3942	UInt vT = INSN(4,0);
				3943	IRTemp tEA = newTemp(Ity_I64);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3944	const HChar* names[4] = { "2d", "4s", "8h", "16b" };
				3945	const HChar* name = names[INSN(11,10)];
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3946	assign(tEA, getIReg64orSP(rN));
				3947	if (rN == 31) { /* FIXME generate stack alignment check */ }
				3948	if (isLD) {
				3949	putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
				3950	} else {
				3951	storeLE(mkexpr(tEA), getQReg128(vT));
				3952	}
				3953	DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3954	vT, name, nameIReg64orSP(rN));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3955	return True;
				3956	}
				3957
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3958	/* 31 23
				3959	0000 1100 0100 0000 0111 11 N T LD1 {vT.1d}, [Xn\|SP]
				3960	0000 1100 0000 0000 0111 11 N T ST1 {vT.1d}, [Xn\|SP]
				3961	0000 1100 0100 0000 0111 10 N T LD1 {vT.2s}, [Xn\|SP]
				3962	0000 1100 0000 0000 0111 10 N T ST1 {vT.2s}, [Xn\|SP]
				3963	0000 1100 0100 0000 0111 01 N T LD1 {vT.4h}, [Xn\|SP]
				3964	0000 1100 0000 0000 0111 01 N T ST1 {vT.4h}, [Xn\|SP]
				3965	0000 1100 0100 0000 0111 00 N T LD1 {vT.8b}, [Xn\|SP]
				3966	0000 1100 0000 0000 0111 00 N T ST1 {vT.8b}, [Xn\|SP]
				3967	FIXME does this assume that the host is little endian?
				3968	*/
				3969	if ( (insn & 0xFFFFF000) == 0x0C407000 // LD1 cases
				3970	\|\| (insn & 0xFFFFF000) == 0x0C007000 // ST1 cases
				3971	) {
				3972	Bool isLD = INSN(22,22) == 1;
				3973	UInt rN = INSN(9,5);
				3974	UInt vT = INSN(4,0);
				3975	IRTemp tEA = newTemp(Ity_I64);
				3976	const HChar* names[4] = { "1d", "2s", "4h", "8b" };
				3977	const HChar* name = names[INSN(11,10)];
				3978	assign(tEA, getIReg64orSP(rN));
				3979	if (rN == 31) { /* FIXME generate stack alignment check */ }
				3980	if (isLD) {
				3981	putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
				3982	putQRegLane(vT, 1, mkU64(0));
				3983	} else {
				3984	storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
				3985	}
				3986	DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
				3987	vT, name, nameIReg64orSP(rN));
				3988	return True;
				3989	}
				3990
				3991	/* ---------- LD1/ST1 (single structure, post index) ---------- */
				3992	/* 31 23
				3993	0100 1100 1001 1111 0111 11 N T ST1 {vT.2d}, [xN\|SP], #16
				3994	0100 1100 1101 1111 0111 11 N T LD1 {vT.2d}, [xN\|SP], #16
				3995	0100 1100 1001 1111 0111 10 N T ST1 {vT.4s}, [xN\|SP], #16
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	3996	0100 1100 1101 1111 0111 10 N T LD1 {vT.4s}, [xN\|SP], #16
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	3997	0100 1100 1001 1111 0111 01 N T ST1 {vT.8h}, [xN\|SP], #16
sewardj	f5b0891	2014-02-06 12:57:58 +0000	[diff] [blame^]	3998	0100 1100 1101 1111 0111 01 N T LD1 {vT.8h}, [xN\|SP], #16
				3999	..
				4000	0100 1100 1101 1111 0111 00 N T LD1 {vT.16b}, [xN\|SP], #16
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4001	Note that #16 is implied and cannot be any other value.
				4002	FIXME does this assume that the host is little endian?
				4003	*/
				4004	if ( (insn & 0xFFFFFC00) == 0x4C9F7C00 // ST1 {vT.2d}, [xN\|SP], #16
				4005	\|\| (insn & 0xFFFFFC00) == 0x4CDF7C00 // LD1 {vT.2d}, [xN\|SP], #16
				4006	\|\| (insn & 0xFFFFFC00) == 0x4C9F7800 // ST1 {vT.4s}, [xN\|SP], #16
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	4007	\|\| (insn & 0xFFFFFC00) == 0x4CDF7800 // LD1 {vT.4s}, [xN\|SP], #16
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4008	\|\| (insn & 0xFFFFFC00) == 0x4C9F7400 // ST1 {vT.8h}, [xN\|SP], #16
sewardj	f5b0891	2014-02-06 12:57:58 +0000	[diff] [blame^]	4009	\|\| (insn & 0xFFFFFC00) == 0x4CDF7400 // LD1 {vT.8h}, [xN\|SP], #16
				4010	/* */
				4011	\|\| (insn & 0xFFFFFC00) == 0x4CDF7000 // LD1 {vT.16b}, [xN\|SP], #16
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4012	) {
				4013	Bool isLD = INSN(22,22) == 1;
				4014	UInt rN = INSN(9,5);
				4015	UInt vT = INSN(4,0);
				4016	IRTemp tEA = newTemp(Ity_I64);
				4017	const HChar* names[4] = { "2d", "4s", "8h", "16b" };
				4018	const HChar* name = names[INSN(11,10)];
				4019	assign(tEA, getIReg64orSP(rN));
				4020	if (rN == 31) { /* FIXME generate stack alignment check */ }
				4021	if (isLD) {
				4022	putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
				4023	} else {
				4024	storeLE(mkexpr(tEA), getQReg128(vT));
				4025	}
				4026	putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(16)));
				4027	DIP("%s {v%u.%s}, [%s], #16\n", isLD ? "ld1" : "st1",
				4028	vT, name, nameIReg64orSP(rN));
				4029	return True;
				4030	}
				4031
				4032	/*
				4033	0000 1100 1001 1111 0111 10 N T ST1 {vT.2s}, [xN\|SP], #8
sewardj	f5b0891	2014-02-06 12:57:58 +0000	[diff] [blame^]	4034	0000 1100 1001 1111 0111 01 N T ST1 {vT.4h}, [xN\|SP], #8
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4035	Note that #8 is implied and cannot be any other value.
				4036	FIXME does this assume that the host is little endian?
				4037	*/
				4038	if ( (insn & 0xFFFFFC00) == 0x0C9F7800 // st1 {vT.2s}, [xN\|SP], #8
sewardj	f5b0891	2014-02-06 12:57:58 +0000	[diff] [blame^]	4039	\|\| (insn & 0xFFFFFC00) == 0x0C9F7400 // st1 {vT.4h}, [xN\|SP], #8
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4040	) {
				4041	UInt rN = INSN(9,5);
				4042	UInt vT = INSN(4,0);
				4043	IRTemp tEA = newTemp(Ity_I64);
				4044	const HChar* names[4] = { "1d", "2s", "4h", "8b" };
				4045	const HChar* name = names[INSN(11,10)];
				4046	assign(tEA, getIReg64orSP(rN));
				4047	if (rN == 31) { /* FIXME generate stack alignment check */ }
				4048	storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
				4049	putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(8)));
				4050	DIP("st1 {v%u.%s}, [%s], #8\n", vT, name, nameIReg64orSP(rN));
				4051	return True;
				4052	}
				4053
				4054	/* FIXME Temporary hacks to get through ld.so FIXME */
				4055
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4056	/* -------------------- LD{A}XR -------------------- */
				4057	/* FIXME: this is a hack; needs real atomicity stuff. */
				4058	/* 31 29 20 19 9 4
				4059	1x(size) 001000010 1 1111 1 11111 n t LDAXR Rt, [Xn\|SP]
				4060	1x(size) 001000010 1 1111 0 11111 n t LDXR Rt, [Xn\|SP]
				4061	*/
				4062	if (INSN(29,20) == BITS10(0,0,1,0,0,0,0,1,0,1)
				4063	&& (INSN(19,10) == BITS10(1,1,1,1,1,1,1,1,1,1)
				4064	\|\| INSN(19,10) == BITS10(1,1,1,1,0,1,1,1,1,1))
				4065	&& INSN(31,31) == 1) {
				4066	Bool is64 = INSN(30,30) == 1;
				4067	Bool isA = INSN(15,15) == 1;
				4068	UInt nn = INSN(9,5);
				4069	UInt tt = INSN(4,0);
				4070	if (is64) {
				4071	putIReg64orZR(tt, loadLE(Ity_I64, getIReg64orSP(nn)));
				4072	} else {
				4073	putIReg32orZR(tt, loadLE(Ity_I32, getIReg64orSP(nn)));
				4074	}
				4075	DIP("ld%sxr %s, [%s]\n",
				4076	isA ? "s" : "", nameIRegOrZR(is64, tt), nameIReg64orSP(nn));
				4077	return True;
				4078	}
				4079
				4080	/* -------------------- ST{L}XR -------------------- */
				4081	/* FIXME: this is a hack; needs real atomicity stuff. */
				4082	/* 31 29 20 15 14 9 4
				4083	1x(size) 001000000 s 0 11111 n t STXR Ws, Rt, [Xn\|SP]
				4084	1x(size) 001000000 s 1 11111 n t STLXR Ws, Rt, [Xn\|SP]
				4085	with the result coding that Ws == 0 iff the store succeeded
				4086	*/
				4087	if (INSN(29,21) == BITS9(0,0,1,0,0,0,0,0,0)
				4088	&& INSN(14,10) == BITS5(1,1,1,1,1) && INSN(31,31) == 1) {
				4089	Bool is64 = INSN(30,30) == 1;
				4090	UInt ss = INSN(20,16);
				4091	Bool isL = INSN(15,15) == 1;
				4092	UInt nn = INSN(9,5);
				4093	UInt tt = INSN(4,0);
				4094	if (is64) {
				4095	storeLE(getIReg64orSP(nn), getIReg64orZR(tt));
				4096	} else {
				4097	storeLE(getIReg64orSP(nn), getIReg32orZR(tt));
				4098	}
				4099	putIReg32orZR(ss, mkU32(0));
				4100	DIP("st%sxr %s, %s, [%s]\n",
				4101	isL ? "s" : "",
				4102	nameIReg32orZR(ss), nameIRegOrZR(is64, tt), nameIReg64orSP(nn));
				4103	return True;
				4104	}
				4105
				4106	vex_printf("ARM64 front end: load_store\n");
				4107	return False;
				4108	# undef INSN
				4109	}
				4110
				4111
				4112	/------------------------------------------------------------/
				4113	/--- Control flow and misc instructions ---/
				4114	/------------------------------------------------------------/
				4115
				4116	static
				4117	Bool dis_ARM64_branch_etc(/MB_OUT/DisResult* dres, UInt insn)
				4118	{
				4119	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				4120
				4121	/* ---------------------- B cond ----------------------- */
				4122	/* 31 24 4 3
				4123	0101010 0 imm19 0 cond */
				4124	if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
				4125	UInt cond = INSN(3,0);
				4126	ULong uimm64 = INSN(23,5) << 2;
				4127	Long simm64 = (Long)sx_to_64(uimm64, 21);
				4128	vassert(dres->whatNext == Dis_Continue);
				4129	vassert(dres->len == 4);
				4130	vassert(dres->continueAt == 0);
				4131	vassert(dres->jk_StopHere == Ijk_INVALID);
				4132	stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
				4133	Ijk_Boring,
				4134	IRConst_U64(guest_PC_curr_instr + simm64),
				4135	OFFB_PC) );
				4136	putPC(mkU64(guest_PC_curr_instr + 4));
				4137	dres->whatNext = Dis_StopHere;
				4138	dres->jk_StopHere = Ijk_Boring;
				4139	DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
				4140	return True;
				4141	}
				4142
				4143	/* -------------------- B{L} uncond -------------------- */
				4144	if (INSN(30,26) == BITS5(0,0,1,0,1)) {
				4145	/* 000101 imm26 B (PC + sxTo64(imm26 << 2))
				4146	100101 imm26 B (PC + sxTo64(imm26 << 2))
				4147	*/
				4148	UInt bLink = INSN(31,31);
				4149	ULong uimm64 = INSN(25,0) << 2;
				4150	Long simm64 = (Long)sx_to_64(uimm64, 28);
				4151	if (bLink) {
				4152	putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
				4153	}
				4154	putPC(mkU64(guest_PC_curr_instr + simm64));
				4155	dres->whatNext = Dis_StopHere;
				4156	dres->jk_StopHere = Ijk_Call;
				4157	DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
				4158	guest_PC_curr_instr + simm64);
				4159	return True;
				4160	}
				4161
				4162	/* --------------------- B{L} reg --------------------- */
				4163	/* 31 24 22 20 15 9 4
				4164	1101011 00 10 11111 000000 nn 00000 RET Rn
				4165	1101011 00 01 11111 000000 nn 00000 CALL Rn
				4166	1101011 00 00 11111 000000 nn 00000 JMP Rn
				4167	*/
				4168	if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
				4169	&& INSN(20,16) == BITS5(1,1,1,1,1)
				4170	&& INSN(15,10) == BITS6(0,0,0,0,0,0)
				4171	&& INSN(4,0) == BITS5(0,0,0,0,0)) {
				4172	UInt branch_type = INSN(22,21);
				4173	UInt nn = INSN(9,5);
				4174	if (branch_type == BITS2(1,0) /* RET */) {
				4175	putPC(getIReg64orZR(nn));
				4176	dres->whatNext = Dis_StopHere;
				4177	dres->jk_StopHere = Ijk_Ret;
				4178	DIP("ret %s\n", nameIReg64orZR(nn));
				4179	return True;
				4180	}
				4181	if (branch_type == BITS2(0,1) /* CALL */) {
				4182	putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
				4183	putPC(getIReg64orZR(nn));
				4184	dres->whatNext = Dis_StopHere;
				4185	dres->jk_StopHere = Ijk_Call;
				4186	DIP("blr %s\n", nameIReg64orZR(nn));
				4187	return True;
				4188	}
				4189	if (branch_type == BITS2(0,0) /* JMP */) {
				4190	putPC(getIReg64orZR(nn));
				4191	dres->whatNext = Dis_StopHere;
				4192	dres->jk_StopHere = Ijk_Boring;
				4193	DIP("jmp %s\n", nameIReg64orZR(nn));
				4194	return True;
				4195	}
				4196	}
				4197
				4198	/* -------------------- CB{N}Z -------------------- */
				4199	/* sf 011 010 1 imm19 Rt CBNZ Xt\|Wt, (PC + sxTo64(imm19 << 2))
				4200	sf 011 010 0 imm19 Rt CBZ Xt\|Wt, (PC + sxTo64(imm19 << 2))
				4201	*/
				4202	if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
				4203	Bool is64 = INSN(31,31) == 1;
				4204	Bool bIfZ = INSN(24,24) == 0;
				4205	ULong uimm64 = INSN(23,5) << 2;
				4206	UInt rT = INSN(4,0);
				4207	Long simm64 = (Long)sx_to_64(uimm64, 21);
				4208	IRExpr* cond = NULL;
				4209	if (is64) {
				4210	cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
				4211	getIReg64orZR(rT), mkU64(0));
				4212	} else {
				4213	cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
				4214	getIReg32orZR(rT), mkU32(0));
				4215	}
				4216	stmt( IRStmt_Exit(cond,
				4217	Ijk_Boring,
				4218	IRConst_U64(guest_PC_curr_instr + simm64),
				4219	OFFB_PC) );
				4220	putPC(mkU64(guest_PC_curr_instr + 4));
				4221	dres->whatNext = Dis_StopHere;
				4222	dres->jk_StopHere = Ijk_Boring;
				4223	DIP("cb%sz %s, 0x%llx\n",
				4224	bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
				4225	guest_PC_curr_instr + simm64);
				4226	return True;
				4227	}
				4228
				4229	/* -------------------- TB{N}Z -------------------- */
				4230	/* 31 30 24 23 18 5 4
				4231	b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
				4232	b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
				4233	*/
				4234	if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
				4235	UInt b5 = INSN(31,31);
				4236	Bool bIfZ = INSN(24,24) == 0;
				4237	UInt b40 = INSN(23,19);
				4238	UInt imm14 = INSN(18,5);
				4239	UInt tt = INSN(4,0);
				4240	UInt bitNo = (b5 << 5) \| b40;
				4241	ULong uimm64 = imm14 << 2;
				4242	Long simm64 = sx_to_64(uimm64, 16);
				4243	IRExpr* cond
				4244	= binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
				4245	binop(Iop_And64,
				4246	binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
				4247	mkU64(1)),
				4248	mkU64(0));
				4249	stmt( IRStmt_Exit(cond,
				4250	Ijk_Boring,
				4251	IRConst_U64(guest_PC_curr_instr + simm64),
				4252	OFFB_PC) );
				4253	putPC(mkU64(guest_PC_curr_instr + 4));
				4254	dres->whatNext = Dis_StopHere;
				4255	dres->jk_StopHere = Ijk_Boring;
				4256	DIP("tb%sz %s, #%u, 0x%llx\n",
				4257	bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
				4258	guest_PC_curr_instr + simm64);
				4259	return True;
				4260	}
				4261
				4262	/* -------------------- SVC -------------------- */
				4263	/* 11010100 000 imm16 000 01
				4264	Don't bother with anything except the imm16==0 case.
				4265	*/
				4266	if (INSN(31,0) == 0xD4000001) {
				4267	putPC(mkU64(guest_PC_curr_instr + 4));
				4268	dres->whatNext = Dis_StopHere;
				4269	dres->jk_StopHere = Ijk_Sys_syscall;
				4270	DIP("svc #0\n");
				4271	return True;
				4272	}
				4273
				4274	/* ------------------ M{SR,RS} ------------------ */
				4275	/* Only handles the case where the system register is TPIDR_EL0.
				4276	0xD51BD0 010 Rt MSR tpidr_el0, rT
				4277	0xD53BD0 010 Rt MRS rT, tpidr_el0
				4278	*/
				4279	if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /MSR/
				4280	\|\| (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /MRS/) {
				4281	Bool toSys = INSN(21,21) == 0;
				4282	UInt tt = INSN(4,0);
				4283	if (toSys) {
				4284	stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
				4285	DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
				4286	} else {
				4287	putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
				4288	DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
				4289	}
				4290	return True;
				4291	}
				4292	/* Cases for FPCR
				4293	0xD51B44 000 Rt MSR fpcr, rT
				4294	0xD53B44 000 Rt MSR rT, fpcr
				4295	*/
				4296	if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /MSR/
				4297	\|\| (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /MRS/) {
				4298	Bool toSys = INSN(21,21) == 0;
				4299	UInt tt = INSN(4,0);
				4300	if (toSys) {
				4301	stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
				4302	DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
				4303	} else {
				4304	putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
				4305	DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
				4306	}
				4307	return True;
				4308	}
				4309	/* Cases for FPSR
				4310	0xD51B44 001 Rt MSR fpcr, rT
				4311	0xD53B44 001 Rt MSR rT, fpcr
				4312	*/
				4313	if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /MSR/
				4314	\|\| (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /MRS/) {
				4315	Bool toSys = INSN(21,21) == 0;
				4316	UInt tt = INSN(4,0);
				4317	if (toSys) {
				4318	stmt( IRStmt_Put( OFFB_FPSR, getIReg32orZR(tt)) );
				4319	DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
				4320	} else {
				4321	putIReg32orZR(tt, IRExpr_Get(OFFB_FPSR, Ity_I32));
				4322	DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
				4323	}
				4324	return True;
				4325	}
				4326	/* Cases for NZCV
				4327	D51B42 000 Rt MSR nzcv, rT
				4328	D53B42 000 Rt MRS rT, nzcv
				4329	*/
				4330	if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /MSR/
				4331	\|\| (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /MRS/) {
				4332	Bool toSys = INSN(21,21) == 0;
				4333	UInt tt = INSN(4,0);
				4334	if (toSys) {
				4335	IRTemp t = newTemp(Ity_I64);
				4336	assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
				4337	setFlags_COPY(t);
				4338	DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
				4339	} else {
				4340	IRTemp res = newTemp(Ity_I64);
				4341	assign(res, mk_arm64g_calculate_flags_nzcv());
				4342	putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
				4343	DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
				4344	}
				4345	return True;
				4346	}
				4347
				4348	/* FIXME Temporary hacks to get through ld.so FIXME */
				4349	/* ------------------ ISB ------------------ */
				4350	if (INSN(31,0) == 0xD5033FDF) {
				4351	/* FIXME: not really a nop */
				4352	DIP("isb\n");
				4353	return True;
				4354	}
				4355	if (INSN(31,0) == 0xD5033BBF) {
				4356	/* FIXME: not really a nop */
				4357	DIP("dmb ish\n");
				4358	return True;
				4359	}
				4360
				4361	//fail:
				4362	vex_printf("ARM64 front end: branch_etc\n");
				4363	return False;
				4364	# undef INSN
				4365	}
				4366
				4367
				4368	/------------------------------------------------------------/
				4369	/--- SIMD and FP instructions ---/
				4370	/------------------------------------------------------------/
				4371
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	4372	/* begin FIXME -- rm temp scaffolding */
				4373	static IRExpr* mk_CatEvenLanes64x2 ( IRTemp, IRTemp );
				4374	static IRExpr* mk_CatOddLanes64x2 ( IRTemp, IRTemp );
				4375	static IRExpr* mk_CatEvenLanes32x4 ( IRTemp, IRTemp );
				4376	static IRExpr* mk_CatOddLanes32x4 ( IRTemp, IRTemp );
				4377	static IRExpr* mk_CatEvenLanes16x8 ( IRTemp, IRTemp );
				4378	static IRExpr* mk_CatOddLanes16x8 ( IRTemp, IRTemp );
				4379	/* end FIXME -- rm temp scaffolding */
				4380
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4381	/* Generate N copies of \|bit\| in the bottom of a ULong. */
				4382	static ULong Replicate ( ULong bit, Int N )
				4383	{
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4384	vassert(bit <= 1 && N >= 1 && N < 64);
				4385	if (bit == 0) {
				4386	return 0;
				4387	} else {
				4388	/* Careful. This won't work for N == 64. */
				4389	return (1ULL << N) - 1;
				4390	}
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4391	}
				4392
				4393	static ULong VFPExpandImm ( ULong imm8, Int N )
				4394	{
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4395	vassert(imm8 <= 0xFF);
				4396	vassert(N == 32 \|\| N == 64);
				4397	Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
				4398	Int F = N - E - 1;
				4399	ULong imm8_6 = (imm8 >> 6) & 1;
				4400	/* sign: 1 bit */
				4401	/* exp: E bits */
				4402	/* frac: F bits */
				4403	ULong sign = (imm8 >> 7) & 1;
				4404	ULong exp = ((imm8_6 ^ 1) << (E-1)) \| Replicate(imm8_6, E-1);
				4405	ULong frac = ((imm8 & 63) << (F-6)) \| Replicate(0, F-6);
				4406	vassert(sign < (1ULL << 1));
				4407	vassert(exp < (1ULL << E));
				4408	vassert(frac < (1ULL << F));
				4409	vassert(1 + E + F == N);
				4410	ULong res = (sign << (E+F)) \| (exp << F) \| frac;
				4411	return res;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4412	}
				4413
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4414	/* Help a bit for decoding laneage for vector operations that can be
				4415	of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
				4416	and SZ bits, typically for vector floating point. */
				4417	static Bool getLaneInfo_Q_SZ ( /OUT/IRType* tyI, /OUT/IRType* tyF,
				4418	/OUT/UInt* nLanes, /OUT/Bool* zeroUpper,
				4419	/OUT/const HChar** arrSpec,
				4420	Bool bitQ, Bool bitSZ )
				4421	{
				4422	vassert(bitQ == True \|\| bitQ == False);
				4423	vassert(bitSZ == True \|\| bitSZ == False);
				4424	if (bitQ && bitSZ) { // 2x64
				4425	if (tyI) *tyI = Ity_I64;
				4426	if (tyF) *tyF = Ity_F64;
				4427	if (nLanes) *nLanes = 2;
				4428	if (zeroUpper) *zeroUpper = False;
				4429	if (arrSpec) *arrSpec = "2d";
				4430	return True;
				4431	}
				4432	if (bitQ && !bitSZ) { // 4x32
				4433	if (tyI) *tyI = Ity_I32;
				4434	if (tyF) *tyF = Ity_F32;
				4435	if (nLanes) *nLanes = 4;
				4436	if (zeroUpper) *zeroUpper = False;
				4437	if (arrSpec) *arrSpec = "4s";
				4438	return True;
				4439	}
				4440	if (!bitQ && !bitSZ) { // 2x32
				4441	if (tyI) *tyI = Ity_I32;
				4442	if (tyF) *tyF = Ity_F32;
				4443	if (nLanes) *nLanes = 2;
				4444	if (zeroUpper) *zeroUpper = True;
				4445	if (arrSpec) *arrSpec = "2s";
				4446	return True;
				4447	}
				4448	// Else impliedly 1x64, which isn't allowed.
				4449	return False;
				4450	}
				4451
				4452	/* Helper for decoding laneage for simple vector operations,
				4453	eg integer add. */
				4454	static Bool getLaneInfo_SIMPLE ( /OUT/Bool* zeroUpper,
				4455	/OUT/const HChar** arrSpec,
				4456	Bool bitQ, UInt szBlg2 )
				4457	{
				4458	vassert(bitQ == True \|\| bitQ == False);
				4459	vassert(szBlg2 < 4);
				4460	Bool zu = False;
				4461	const HChar* as = NULL;
				4462	switch ((szBlg2 << 1) \| (bitQ ? 1 : 0)) {
				4463	case 0: zu = True; as = "8b"; break;
				4464	case 1: zu = False; as = "16b"; break;
				4465	case 2: zu = True; as = "4h"; break;
				4466	case 3: zu = False; as = "8h"; break;
				4467	case 4: zu = True; as = "2s"; break;
				4468	case 5: zu = False; as = "4s"; break;
				4469	case 6: return False; // impliedly 1x64
				4470	case 7: zu = False; as = "2d"; break;
				4471	default: vassert(0);
				4472	}
				4473	vassert(as);
				4474	if (arrSpec) *arrSpec = as;
				4475	if (zeroUpper) *zeroUpper = zu;
				4476	return True;
				4477	}
				4478
				4479
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	4480	/* Generate IR to fold all lanes of the V128 value in 'src' as
				4481	characterised by the operator 'op', and return the result in the
				4482	bottom bits of a V128, with all other bits set to zero. */
				4483	static IRTemp math_MINMAXV ( IRTemp src, IROp op )
				4484	{
				4485	/* The basic idea is to use repeated applications of Iop_CatEven*
				4486	and Iop_CatOdd* operators to 'src' so as to clone each lane into
				4487	a complete vector. Then fold all those vectors with 'op' and
				4488	zero out all but the least significant lane. */
				4489	switch (op) {
				4490	case Iop_Min8Sx16: case Iop_Min8Ux16:
				4491	case Iop_Max8Sx16: case Iop_Max8Ux16: {
				4492	return IRTemp_INVALID; // ATC
				4493	}
				4494	case Iop_Min16Sx8: case Iop_Min16Ux8:
				4495	case Iop_Max16Sx8: case Iop_Max16Ux8: {
				4496	IRTemp x76543210 = src;
				4497	IRTemp x76547654 = newTemp(Ity_V128);
				4498	IRTemp x32103210 = newTemp(Ity_V128);
				4499	assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
				4500	assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
				4501	IRTemp x76767676 = newTemp(Ity_V128);
				4502	IRTemp x54545454 = newTemp(Ity_V128);
				4503	IRTemp x32323232 = newTemp(Ity_V128);
				4504	IRTemp x10101010 = newTemp(Ity_V128);
				4505	assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
				4506	assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
				4507	assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
				4508	assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
				4509	IRTemp x77777777 = newTemp(Ity_V128);
				4510	IRTemp x66666666 = newTemp(Ity_V128);
				4511	IRTemp x55555555 = newTemp(Ity_V128);
				4512	IRTemp x44444444 = newTemp(Ity_V128);
				4513	IRTemp x33333333 = newTemp(Ity_V128);
				4514	IRTemp x22222222 = newTemp(Ity_V128);
				4515	IRTemp x11111111 = newTemp(Ity_V128);
				4516	IRTemp x00000000 = newTemp(Ity_V128);
				4517	assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
				4518	assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
				4519	assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
				4520	assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
				4521	assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
				4522	assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
				4523	assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
				4524	assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
				4525	IRTemp max76 = newTemp(Ity_V128);
				4526	IRTemp max54 = newTemp(Ity_V128);
				4527	IRTemp max32 = newTemp(Ity_V128);
				4528	IRTemp max10 = newTemp(Ity_V128);
				4529	assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
				4530	assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
				4531	assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
				4532	assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
				4533	IRTemp max7654 = newTemp(Ity_V128);
				4534	IRTemp max3210 = newTemp(Ity_V128);
				4535	assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
				4536	assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
				4537	IRTemp max76543210 = newTemp(Ity_V128);
				4538	assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
				4539	IRTemp res = newTemp(Ity_V128);
				4540	assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
				4541	return res;
				4542	}
				4543	case Iop_Min32Sx4: case Iop_Min32Ux4:
				4544	case Iop_Max32Sx4: case Iop_Max32Ux4: {
				4545	IRTemp x3210 = src;
				4546	IRTemp x3232 = newTemp(Ity_V128);
				4547	IRTemp x1010 = newTemp(Ity_V128);
				4548	assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
				4549	assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
				4550	IRTemp x3333 = newTemp(Ity_V128);
				4551	IRTemp x2222 = newTemp(Ity_V128);
				4552	IRTemp x1111 = newTemp(Ity_V128);
				4553	IRTemp x0000 = newTemp(Ity_V128);
				4554	assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
				4555	assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
				4556	assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
				4557	assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
				4558	IRTemp max32 = newTemp(Ity_V128);
				4559	IRTemp max10 = newTemp(Ity_V128);
				4560	assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
				4561	assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
				4562	IRTemp max3210 = newTemp(Ity_V128);
				4563	assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
				4564	IRTemp res = newTemp(Ity_V128);
				4565	assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
				4566	return res;
				4567	}
				4568	default:
				4569	vassert(0);
				4570	}
				4571	}
				4572
				4573
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4574	static
				4575	Bool dis_ARM64_simd_and_fp(/MB_OUT/DisResult* dres, UInt insn)
				4576	{
				4577	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				4578
				4579	/* ---------------- FMOV (general) ---------------- */
				4580	/* case 30 23 20 18 15 9 4
				4581	(1) 0 00 11110 00 1 00 111 000000 n d FMOV Sd, Wn
				4582	(2) 1 00 11110 01 1 00 111 000000 n d FMOV Dd, Xn
				4583	(3) 1 00 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
				4584
				4585	(4) 0 00 11110 00 1 00 110 000000 n d FMOV Wd, Sn
				4586	(5) 1 00 11110 01 1 00 110 000000 n d FMOV Xd, Dn
				4587	(6) 1 00 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
				4588	*/
				4589	if (INSN(30,24) == BITS7(0,0,1,1,1,1,0)
				4590	&& INSN(21,21) == 1 && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
				4591	UInt sf = INSN(31,31);
				4592	UInt ty = INSN(23,22); // type
				4593	UInt rm = INSN(20,19); // rmode
				4594	UInt op = INSN(18,16); // opcode
				4595	UInt nn = INSN(9,5);
				4596	UInt dd = INSN(4,0);
				4597	UInt ix = 0; // case
				4598	if (sf == 0) {
				4599	if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
				4600	ix = 1;
				4601	else
				4602	if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
				4603	ix = 4;
				4604	} else {
				4605	vassert(sf == 1);
				4606	if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
				4607	ix = 2;
				4608	else
				4609	if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
				4610	ix = 5;
				4611	else
				4612	if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
				4613	ix = 3;
				4614	else
				4615	if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
				4616	ix = 6;
				4617	}
				4618	if (ix > 0) {
				4619	switch (ix) {
				4620	case 1:
				4621	putQReg128(dd, mkV128(0));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4622	putQRegLO(dd, getIReg32orZR(nn));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4623	DIP("fmov s%u, w%u\n", dd, nn);
				4624	break;
				4625	case 2:
				4626	putQReg128(dd, mkV128(0));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4627	putQRegLO(dd, getIReg64orZR(nn));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4628	DIP("fmov d%u, x%u\n", dd, nn);
				4629	break;
				4630	case 3:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4631	putQRegHI64(dd, getIReg64orZR(nn));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4632	DIP("fmov v%u.d[1], x%u\n", dd, nn);
				4633	break;
				4634	case 4:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4635	putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4636	DIP("fmov w%u, s%u\n", dd, nn);
				4637	break;
				4638	case 5:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4639	putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4640	DIP("fmov x%u, d%u\n", dd, nn);
				4641	break;
				4642	case 6:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4643	putIReg64orZR(dd, getQRegHI64(nn));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4644	DIP("fmov x%u, v%u.d[1]\n", dd, nn);
				4645	break;
				4646	default:
				4647	vassert(0);
				4648	}
				4649	return True;
				4650	}
				4651	/* undecodable; fall through */
				4652	}
				4653
				4654	/* -------------- FMOV (scalar, immediate) -------------- */
				4655	/* 31 28 23 20 12 9 4
				4656	000 11110 00 1 imm8 100 00000 d FMOV Sd, #imm
				4657	000 11110 01 1 imm8 100 00000 d FMOV Dd, #imm
				4658	*/
				4659	if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
				4660	&& INSN(21,21) == 1 && INSN(12,5) == BITS8(1,0,0,0,0,0,0,0)) {
				4661	Bool isD = INSN(22,22) == 1;
				4662	UInt imm8 = INSN(20,13);
				4663	UInt dd = INSN(4,0);
				4664	ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
				4665	if (!isD) {
sewardj	aeeb31d	2014-01-12 18:23:45 +0000	[diff] [blame]	4666	vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4667	}
				4668	putQReg128(dd, mkV128(0));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4669	putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
				4670	DIP("fmov %s, #0x%llx\n",
				4671	nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4672	return True;
				4673	}
				4674
				4675	/* -------------- {S,U}CVTF (scalar, integer) -------------- */
				4676	/* 31 28 23 21 20 18 15 9 4 ix
				4677	000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn 0
				4678	000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn 1
				4679	100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn 2
				4680	100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn 3
				4681
				4682	000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn 4
				4683	000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn 5
				4684	100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn 6
				4685	100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn 7
				4686
				4687	These are signed/unsigned conversion from integer registers to
				4688	FP registers, all 4 32/64-bit combinations, rounded per FPCR.
				4689	*/
				4690	if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,17) == BITS5(1,0,0,0,1)
				4691	&& INSN(15,10) == BITS6(0,0,0,0,0,0)) {
				4692	Bool isI64 = INSN(31,31) == 1;
				4693	Bool isF64 = INSN(22,22) == 1;
				4694	Bool isU = INSN(16,16) == 1;
				4695	UInt nn = INSN(9,5);
				4696	UInt dd = INSN(4,0);
				4697	UInt ix = (isU ? 4 : 0) \| (isI64 ? 2 : 0) \| (isF64 ? 1 : 0);
				4698	const IROp ops[8]
				4699	= { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
				4700	Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
				4701	IRExpr* src = getIRegOrZR(isI64, nn);
				4702	IRExpr* res = (isF64 && !isI64)
				4703	? unop(ops[ix], src)
				4704	: binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), src);
				4705	putQReg128(dd, mkV128(0));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4706	putQRegLO(dd, res);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4707	DIP("%ccvtf %s, %s\n",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4708	isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4709	nameIRegOrZR(isI64, nn));
				4710	return True;
				4711	}
				4712
				4713	/* -------------- F{ADD,SUB,MUL,DIV} (scalar) -------------- */
				4714	/* 31 23 20 15 11 9 4
				4715	---------------- 0000 ------ FMUL --------
				4716	000 11110 001 m 0001 10 n d FDIV Sd,Sn,Sm
				4717	000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm
				4718	---------------- 0010 ------ FADD --------
				4719	---------------- 0011 ------ FSUB --------
				4720	---------------- 1000 ------ FNMUL --------
				4721	*/
				4722	if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
				4723	&& INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
				4724	Bool isD = INSN(22,22) == 1;
				4725	UInt mm = INSN(20,16);
				4726	UInt op = INSN(15,12);
				4727	UInt nn = INSN(9,5);
				4728	UInt dd = INSN(4,0);
				4729	IROp iop = Iop_INVALID;
				4730	IRType ty = isD ? Ity_F64 : Ity_F32;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4731	Bool neg = False;
				4732	const HChar* nm = "???";
				4733	switch (op) {
				4734	case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ty); break;
				4735	case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ty); break;
				4736	case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ty); break;
				4737	case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ty); break;
				4738	case BITS4(1,0,0,0): nm = "fnmul"; iop = mkMULF(ty);
				4739	neg = True; break;
				4740	default: return False;
				4741	}
				4742	vassert(iop != Iop_INVALID);
				4743	IRExpr* resE = triop(iop, mkexpr(mk_get_IR_rounding_mode()),
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4744	getQRegLO(nn, ty), getQRegLO(mm, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4745	IRTemp res = newTemp(ty);
				4746	assign(res, neg ? unop(mkNEGF(ty),resE) : resE);
				4747	putQReg128(dd, mkV128(0));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4748	putQRegLO(dd, mkexpr(res));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4749	DIP("%s %s, %s, %s\n",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4750	nm, nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4751	return True;
				4752	}
				4753
				4754	/* ------------ F{MOV,ABS,NEG,SQRT} D/D or S/S ------------ */
				4755	/* 31 23 21 16 14 9 4
				4756	000 11110 00 10000 00 10000 n d FMOV Sd, Sn
				4757	000 11110 01 10000 00 10000 n d FMOV Dd, Dn
				4758	------------------ 01 --------- FABS ------
				4759	------------------ 10 --------- FNEG ------
				4760	------------------ 11 --------- FQSRT -----
				4761	*/
				4762	if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
				4763	&& INSN(21,17) == BITS5(1,0,0,0,0)
				4764	&& INSN(14,10) == BITS5(1,0,0,0,0)) {
				4765	Bool isD = INSN(22,22) == 1;
				4766	UInt opc = INSN(16,15);
				4767	UInt nn = INSN(9,5);
				4768	UInt dd = INSN(4,0);
				4769	IRType ty = isD ? Ity_F64 : Ity_F32;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4770	IRTemp res = newTemp(ty);
				4771	if (opc == BITS2(0,0)) {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4772	assign(res, getQRegLO(nn, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4773	putQReg128(dd, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4774	putQRegLO(dd, mkexpr(res));
				4775	DIP("fmov %s, %s\n",
				4776	nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4777	return True;
				4778	}
				4779	if (opc == BITS2(1,0) \|\| opc == BITS2(0,1)) {
				4780	Bool isAbs = opc == BITS2(0,1);
				4781	IROp op = isAbs ? mkABSF(ty) : mkNEGF(ty);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4782	assign(res, unop(op, getQRegLO(nn, ty)));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4783	putQReg128(dd, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4784	putQRegLO(dd, mkexpr(res));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4785	DIP("%s %s, %s\n", isAbs ? "fabs" : "fneg",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4786	nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4787	return True;
				4788	}
				4789	if (opc == BITS2(1,1)) {
				4790	assign(res,
				4791	binop(mkSQRTF(ty),
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4792	mkexpr(mk_get_IR_rounding_mode()), getQRegLO(nn, ty)));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4793	putQReg128(dd, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4794	putQRegLO(dd, mkexpr(res));
				4795	DIP("fsqrt %s, %s\n", nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4796	return True;
				4797	}
				4798	/* else fall through; other cases are ATC */
				4799	}
				4800
				4801	/* -------------------- FCMP,FCMPE -------------------- */
				4802	/* 31 23 20 15 9 4
				4803	000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
				4804	000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
				4805	000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
				4806	000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
				4807
				4808	000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
				4809	000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
				4810	000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
				4811	000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
				4812
				4813	FCMPE generates Invalid Operation exn if either arg is any kind
				4814	of NaN. FCMP generates Invalid Operation exn if either arg is a
				4815	signalling NaN. We ignore this detail here and produce the same
				4816	IR for both.
				4817	*/
				4818	if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0) && INSN(21,21) == 1
				4819	&& INSN(15,10) == BITS6(0,0,1,0,0,0) && INSN(2,0) == BITS3(0,0,0)) {
				4820	Bool isD = INSN(22,22) == 1;
				4821	UInt mm = INSN(20,16);
				4822	UInt nn = INSN(9,5);
				4823	Bool isCMPE = INSN(4,4) == 1;
				4824	Bool cmpZero = INSN(3,3) == 1;
				4825	IRType ty = isD ? Ity_F64 : Ity_F32;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4826	Bool valid = True;
				4827	if (cmpZero && mm != 0) valid = False;
				4828	if (valid) {
				4829	IRTemp argL = newTemp(ty);
				4830	IRTemp argR = newTemp(ty);
				4831	IRTemp irRes = newTemp(Ity_I32);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4832	assign(argL, getQRegLO(nn, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4833	assign(argR,
				4834	cmpZero
				4835	? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4836	: getQRegLO(mm, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4837	assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
				4838	mkexpr(argL), mkexpr(argR)));
				4839	IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
				4840	IRTemp nzcv_28x0 = newTemp(Ity_I64);
				4841	assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
				4842	setFlags_COPY(nzcv_28x0);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4843	DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ty),
				4844	cmpZero ? "#0.0" : nameQRegLO(mm, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4845	return True;
				4846	}
				4847	}
				4848
				4849	/* -------------------- F{N}M{ADD,SUB} -------------------- */
				4850	/* 31 22 20 15 14 9 4 ix
				4851	000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
				4852	000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
				4853	000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
				4854	000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
				4855	where Fx=Dx when sz=1, Fx=Sx when sz=0
				4856
				4857	-----SPEC------ ----IMPL----
				4858	fmadd a + n * m a + n * m
				4859	fmsub a + (-n) * m a - n * m
				4860	fnmadd (-a) + (-n) * m -(a + n * m)
				4861	fnmsub (-a) + n * m -(a - n * m)
				4862	*/
				4863	if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,1,0)) {
				4864	Bool isD = INSN(22,22) == 1;
				4865	UInt mm = INSN(20,16);
				4866	UInt aa = INSN(14,10);
				4867	UInt nn = INSN(9,5);
				4868	UInt dd = INSN(4,0);
				4869	UInt ix = (INSN(21,21) << 1) \| INSN(15,15);
				4870	IRType ty = isD ? Ity_F64 : Ity_F32;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4871	IROp opADD = mkADDF(ty);
				4872	IROp opSUB = mkSUBF(ty);
				4873	IROp opMUL = mkMULF(ty);
				4874	IROp opNEG = mkNEGF(ty);
				4875	IRTemp res = newTemp(ty);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4876	IRExpr* eA = getQRegLO(aa, ty);
				4877	IRExpr* eN = getQRegLO(nn, ty);
				4878	IRExpr* eM = getQRegLO(mm, ty);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4879	IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
				4880	IRExpr* eNxM = triop(opMUL, rm, eN, eM);
				4881	switch (ix) {
				4882	case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
				4883	case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
				4884	case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
				4885	case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
				4886	default: vassert(0);
				4887	}
				4888	putQReg128(dd, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4889	putQRegLO(dd, mkexpr(res));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4890	const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
				4891	DIP("%s %s, %s, %s, %s\n",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4892	names[ix], nameQRegLO(dd, ty), nameQRegLO(nn, ty),
				4893	nameQRegLO(mm, ty), nameQRegLO(aa, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4894	return True;
				4895	}
				4896
				4897	/* -------- FCVT{N,P,M,Z}{S,U} (scalar, integer) -------- */
				4898	/* 30 23 20 18 15 9 4
				4899	sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
				4900	sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
				4901	---------------- 01 -------------- FCVTP-------- (round to +inf)
				4902	---------------- 10 -------------- FCVTM-------- (round to -inf)
				4903	---------------- 11 -------------- FCVTZ-------- (round to zero)
				4904
				4905	Rd is Xd when sf==1, Wd when sf==0
				4906	Fn is Dn when x==1, Sn when x==0
				4907	20:19 carry the rounding mode, using the same encoding as FPCR
				4908	*/
				4909	if (INSN(30,23) == BITS8(0,0,1,1,1,1,0,0) && INSN(21,21) == 1
				4910	&& INSN(18,17) == BITS2(0,0) && INSN(15,10) == BITS6(0,0,0,0,0,0)) {
				4911	Bool isI64 = INSN(31,31) == 1;
				4912	Bool isF64 = INSN(22,22) == 1;
				4913	UInt rm = INSN(20,19);
				4914	Bool isU = INSN(16,16) == 1;
				4915	UInt nn = INSN(9,5);
				4916	UInt dd = INSN(4,0);
				4917	/* Decide on the IR rounding mode to use. */
				4918	IRRoundingMode irrm = 8; /impossible/
				4919	HChar ch = '?';
				4920	switch (rm) {
				4921	case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
				4922	case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
				4923	case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
				4924	case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
				4925	default: vassert(0);
				4926	}
				4927	vassert(irrm != 8);
				4928	/* Decide on the conversion primop, based on the source size,
				4929	dest size and signedness (8 possibilities). Case coding:
				4930	F32 ->s I32 0
				4931	F32 ->u I32 1
				4932	F32 ->s I64 2
				4933	F32 ->u I64 3
				4934	F64 ->s I32 4
				4935	F64 ->u I32 5
				4936	F64 ->s I64 6
				4937	F64 ->u I64 7
				4938	*/
				4939	UInt ix = (isF64 ? 4 : 0) \| (isI64 ? 2 : 0) \| (isU ? 1 : 0);
				4940	vassert(ix < 8);
				4941	const IROp ops[8]
				4942	= { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
				4943	Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
				4944	IROp op = ops[ix];
				4945	// A bit of ATCery: bounce all cases we haven't seen an example of.
				4946	if (/* F32toI32S */
				4947	(op == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
				4948	/* F32toI32U */
				4949	/* F32toI64S */
				4950	/* F32toI64U */
				4951	\|\| (op == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
				4952	/* F64toI32S */
				4953	\|\| (op == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
				4954	\|\| (op == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
				4955	\|\| (op == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
				4956	/* F64toI32U */
				4957	\|\| (op == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
				4958	\|\| (op == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
				4959	/* F64toI64S */
				4960	\|\| (op == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
				4961	/* F64toI64U */
				4962	\|\| (op == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
				4963	) {
				4964	/* validated */
				4965	} else {
				4966	return False;
				4967	}
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4968	IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
				4969	IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
				4970	IRTemp src = newTemp(srcTy);
				4971	IRTemp dst = newTemp(dstTy);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4972	assign(src, getQRegLO(nn, srcTy));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4973	assign(dst, binop(op, mkU32(irrm), mkexpr(src)));
				4974	putIRegOrZR(isI64, dd, mkexpr(dst));
				4975	DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4976	nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4977	return True;
				4978	}
				4979
				4980	/* ---------------- FRINT{I,M,P,Z} (scalar) ---------------- */
				4981	/* 31 23 21 17 14 9 4
				4982	000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
				4983	rm
				4984	x==0 => S-registers, x==1 => D-registers
				4985	rm (17:15) encodings:
				4986	111 per FPCR (FRINTI)
				4987	001 +inf (FRINTP)
				4988	010 -inf (FRINTM)
				4989	011 zero (FRINTZ)
				4990	000 tieeven
				4991	100 tieaway
				4992	110 per FPCR + "exact = TRUE"
				4993	101 unallocated
				4994	*/
				4995	if (INSN(31,23) == BITS9(0,0,0,1,1,1,1,0,0)
				4996	&& INSN(21,18) == BITS4(1,0,0,1) && INSN(14,10) == BITS5(1,0,0,0,0)) {
				4997	Bool isD = INSN(22,22) == 1;
				4998	UInt rm = INSN(17,15);
				4999	UInt nn = INSN(9,5);
				5000	UInt dd = INSN(4,0);
				5001	IRType ty = isD ? Ity_F64 : Ity_F32;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5002	IRExpr* irrmE = NULL;
				5003	UChar ch = '?';
				5004	switch (rm) {
				5005	case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
				5006	case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
				5007	case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
				5008	default: break;
				5009	}
				5010	if (irrmE) {
				5011	IRTemp src = newTemp(ty);
				5012	IRTemp dst = newTemp(ty);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5013	assign(src, getQRegLO(nn, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5014	assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
				5015	irrmE, mkexpr(src)));
				5016	putQReg128(dd, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5017	putQRegLO(dd, mkexpr(dst));
				5018	DIP("frint%c %s, %s\n",
				5019	ch, nameQRegLO(dd, ty), nameQRegLO(nn, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5020	return True;
				5021	}
				5022	/* else unhandled rounding mode case -- fall through */
				5023	}
				5024
				5025	/* ------------------ FCVT (scalar) ------------------ */
				5026	/* 31 23 21 16 14 9 4
				5027	000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp)
				5028	--------- 11 ----- 01 --------- FCVT Dd, Hn (unimp)
				5029	--------- 00 ----- 11 --------- FCVT Hd, Sn (unimp)
				5030	--------- 00 ----- 01 --------- FCVT Dd, Sn (unimp)
				5031	--------- 01 ----- 11 --------- FCVT Hd, Dn (unimp)
				5032	--------- 01 ----- 00 --------- FCVT Sd, Dn (unimp)
				5033	Rounding, when dst is smaller than src, is per the FPCR.
				5034	*/
				5035	if (INSN(31,24) == BITS8(0,0,0,1,1,1,1,0)
				5036	&& INSN(21,17) == BITS5(1,0,0,0,1)
				5037	&& INSN(14,10) == BITS5(1,0,0,0,0)) {
				5038	UInt b2322 = INSN(23,22);
				5039	UInt b1615 = INSN(16,15);
				5040	UInt nn = INSN(9,5);
				5041	UInt dd = INSN(4,0);
				5042	if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) {
				5043	/* Convert S to D */
				5044	IRTemp res = newTemp(Ity_F64);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5045	assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32)));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5046	putQReg128(dd, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5047	putQRegLO(dd, mkexpr(res));
				5048	DIP("fcvt %s, %s\n",
				5049	nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5050	return True;
				5051	}
				5052	if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) {
				5053	/* Convert D to S */
				5054	IRTemp res = newTemp(Ity_F32);
				5055	assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()),
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5056	getQRegLO(nn, Ity_F64)));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5057	putQReg128(dd, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5058	putQRegLO(dd, mkexpr(res));
				5059	DIP("fcvt %s, %s\n",
				5060	nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5061	return True;
				5062	}
				5063	/* else unhandled */
				5064	}
				5065
				5066	/* ------------------ FABD (scalar) ------------------ */
				5067	/* 31 23 20 15 9 4
				5068	011 11110 111 m 110101 n d FABD Dd, Dn, Dm
				5069	011 11110 101 m 110101 n d FABD Sd, Sn, Sm
				5070	*/
				5071	if (INSN(31,23) == BITS9(0,1,1,1,1,1,1,0,1) && INSN(21,21) == 1
				5072	&& INSN(15,10) == BITS6(1,1,0,1,0,1)) {
				5073	Bool isD = INSN(22,22) == 1;
				5074	UInt mm = INSN(20,16);
				5075	UInt nn = INSN(9,5);
				5076	UInt dd = INSN(4,0);
				5077	IRType ty = isD ? Ity_F64 : Ity_F32;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5078	IRTemp res = newTemp(ty);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5079	assign(res, unop(mkABSF(ty),
				5080	triop(mkSUBF(ty),
				5081	mkexpr(mk_get_IR_rounding_mode()),
				5082	getQRegLO(nn,ty), getQRegLO(mm,ty))));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5083	putQReg128(dd, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5084	putQRegLO(dd, mkexpr(res));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5085	DIP("fabd %s, %s, %s\n",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5086	nameQRegLO(dd, ty), nameQRegLO(nn, ty), nameQRegLO(mm, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5087	return True;
				5088	}
				5089
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5090	/* -------------- {S,U}CVTF (vector, integer) -------------- */
				5091	/* 31 28 22 21 15 9 4
				5092	0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
				5093	0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
				5094	with laneage:
				5095	case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
				5096	*/
				5097	if (INSN(31,31) == 0 && INSN(28,23) == BITS6(0,1,1,1,0,0)
				5098	&& INSN(21,16) == BITS6(1,0,0,0,0,1)
				5099	&& INSN(15,10) == BITS6(1,1,0,1,1,0)) {
				5100	Bool isQ = INSN(30,30) == 1;
				5101	Bool isU = INSN(29,29) == 1;
				5102	Bool isF64 = INSN(22,22) == 1;
				5103	UInt nn = INSN(9,5);
				5104	UInt dd = INSN(4,0);
				5105	if (isQ \|\| !isF64) {
				5106	IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
				5107	UInt nLanes = 0;
				5108	Bool zeroHI = False;
				5109	const HChar* arrSpec = NULL;
				5110	Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
				5111	isQ, isF64 );
				5112	IROp op = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
				5113	: (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
				5114	IRTemp rm = mk_get_IR_rounding_mode();
				5115	UInt i;
				5116	vassert(ok); /* the 'if' above should ensure this */
				5117	for (i = 0; i < nLanes; i++) {
				5118	putQRegLane(dd, i,
				5119	binop(op, mkexpr(rm), getQRegLane(nn, i, tyI)));
				5120	}
				5121	if (zeroHI) {
				5122	putQRegLane(dd, 1, mkU64(0));
				5123	}
				5124	DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
				5125	nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
				5126	return True;
				5127	}
				5128	/* else fall through */
				5129	}
				5130
				5131	/* ---------- F{ADD,SUB,MUL,DIV,MLA,MLS} (vector) ---------- */
				5132	/* 31 28 22 21 20 15 9 4 case
				5133	0q0 01110 0 sz 1 m 110101 n d FADD Vd,Vn,Vm 1
				5134	0q0 01110 1 sz 1 m 110101 n d FSUB Vd,Vn,Vm 2
				5135	0q1 01110 0 sz 1 m 110111 n d FMUL Vd,Vn,Vm 3
				5136	0q1 01110 0 sz 1 m 111111 n d FDIV Vd,Vn,Vm 4
				5137	0q0 01110 0 sz 1 m 110011 n d FMLA Vd,Vn,Vm 5
				5138	0q0 01110 1 sz 1 m 110011 n d FMLS Vd,Vn,Vm 6
				5139	*/
				5140	if (INSN(31,31) == 0
				5141	&& INSN(28,24) == BITS5(0,1,1,1,0) && INSN(21,21) == 1) {
				5142	Bool isQ = INSN(30,30) == 1;
				5143	UInt b29 = INSN(29,29);
				5144	UInt b23 = INSN(23,23);
				5145	Bool isF64 = INSN(22,22) == 1;
				5146	UInt mm = INSN(20,16);
				5147	UInt b1510 = INSN(15,10);
				5148	UInt nn = INSN(9,5);
				5149	UInt dd = INSN(4,0);
				5150	UInt ix = 0;
				5151	/**/ if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,1,0,1)) ix = 1;
				5152	else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,1,0,1)) ix = 2;
				5153	else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,0,1,1,1)) ix = 3;
				5154	else if (b29 == 1 && b23 == 0 && b1510 == BITS6(1,1,1,1,1,1)) ix = 4;
				5155	else if (b29 == 0 && b23 == 0 && b1510 == BITS6(1,1,0,0,1,1)) ix = 5;
				5156	else if (b29 == 0 && b23 == 1 && b1510 == BITS6(1,1,0,0,1,1)) ix = 6;
				5157	IRType laneTy = Ity_INVALID;
				5158	Bool zeroHI = False;
				5159	const HChar* arr = "??";
				5160	Bool ok
				5161	= getLaneInfo_Q_SZ(NULL, &laneTy, NULL, &zeroHI, &arr, isQ, isF64);
				5162	/* Skip MLA/MLS for the time being */
				5163	if (ok && ix >= 1 && ix <= 4) {
				5164	const IROp ops64[4]
				5165	= { Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2 };
				5166	const IROp ops32[4]
				5167	= { Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4 };
				5168	const HChar* names[4]
				5169	= { "fadd", "fsub", "fmul", "fdiv" };
				5170	IROp op = laneTy==Ity_F64 ? ops64[ix-1] : ops32[ix-1];
				5171	IRTemp rm = mk_get_IR_rounding_mode();
				5172	IRTemp t1 = newTemp(Ity_V128);
				5173	IRTemp t2 = newTemp(Ity_V128);
				5174	assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	5175	assign(t2, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t1))
				5176	: mkexpr(t1));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5177	putQReg128(dd, mkexpr(t2));
				5178	DIP("%s %s.%s, %s.%s, %s.%s\n", names[ix-1],
				5179	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				5180	return True;
				5181	}
				5182	}
				5183
				5184	/* ---------------- ADD/SUB (vector) ---------------- */
				5185	/* 31 28 23 21 20 15 9 4
				5186	0q0 01110 size 1 m 100001 n d ADD Vd.T, Vn.T, Vm.T
				5187	0q1 01110 size 1 m 100001 n d SUB Vd.T, Vn.T, Vm.T
				5188	*/
				5189	if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
				5190	&& INSN(21,21) == 1 && INSN(15,10) == BITS6(1,0,0,0,0,1)) {
				5191	Bool isQ = INSN(30,30) == 1;
				5192	UInt szBlg2 = INSN(23,22);
				5193	Bool isSUB = INSN(29,29) == 1;
				5194	UInt mm = INSN(20,16);
				5195	UInt nn = INSN(9,5);
				5196	UInt dd = INSN(4,0);
				5197	Bool zeroHI = False;
				5198	const HChar* arrSpec = "";
				5199	Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
				5200	if (ok) {
sewardj	f5b0891	2014-02-06 12:57:58 +0000	[diff] [blame^]	5201	const IROp opsADD[4]
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5202	= { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
sewardj	f5b0891	2014-02-06 12:57:58 +0000	[diff] [blame^]	5203	const IROp opsSUB[4]
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5204	= { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
				5205	vassert(szBlg2 < 4);
sewardj	f5b0891	2014-02-06 12:57:58 +0000	[diff] [blame^]	5206	IROp op = isSUB ? opsSUB[szBlg2] : opsADD[szBlg2];
				5207	IRTemp t = newTemp(Ity_V128);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5208	assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	5209	putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t))
				5210	: mkexpr(t));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5211	const HChar* nm = isSUB ? "sub" : "add";
				5212	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				5213	nameQReg128(dd), arrSpec,
				5214	nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
				5215	return True;
				5216	}
				5217	/* else fall through */
				5218	}
				5219
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	5220	/* ---------------- ADD/SUB (scalar) ---------------- */
				5221	/* 31 28 23 21 20 15 9 4
				5222	010 11110 11 1 m 100001 n d ADD Dd, Dn, Dm
				5223	011 11110 11 1 m 100001 n d SUB Dd, Dn, Dm
				5224	*/
				5225	if (INSN(31,30) == BITS2(0,1) && INSN(28,21) == BITS8(1,1,1,1,0,1,1,1)
				5226	&& INSN(15,10) == BITS6(1,0,0,0,0,1)) {
				5227	Bool isSUB = INSN(29,29) == 1;
				5228	UInt mm = INSN(20,16);
				5229	UInt nn = INSN(9,5);
				5230	UInt dd = INSN(4,0);
				5231	IRTemp res = newTemp(Ity_I64);
				5232	assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
				5233	getQRegLane(nn, 0, Ity_I64),
				5234	getQRegLane(mm, 0, Ity_I64)));
				5235	putQRegLane(dd, 0, mkexpr(res));
				5236	putQRegLane(dd, 1, mkU64(0));
				5237	DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
				5238	nameQRegLO(dd, Ity_I64),
				5239	nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
				5240	return True;
				5241	}
				5242
sewardj	f5b0891	2014-02-06 12:57:58 +0000	[diff] [blame^]	5243	/* ------------ MUL/PMUL/MLA/MLS (vector) ------------ */
				5244	/* 31 28 23 21 20 15 9 4
				5245	0q0 01110 size 1 m 100111 n d MUL Vd.T, Vn.T, Vm.T B/H/S only
				5246	0q1 01110 size 1 m 100111 n d PMUL Vd.T, Vn.T, Vm.T B only
				5247	0q0 01110 size 1 m 100101 n d MLA Vd.T, Vn.T, Vm.T B/H/S only
				5248	0q1 01110 size 1 m 100101 n d MLS Vd.T, Vn.T, Vm.T B/H/S only
				5249	*/
				5250	if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
				5251	&& INSN(21,21) == 1
				5252	&& (INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(1,0,0,1,0,1)) {
				5253	Bool isQ = INSN(30,30) == 1;
				5254	UInt szBlg2 = INSN(23,22);
				5255	UInt bit29 = INSN(29,29);
				5256	UInt mm = INSN(20,16);
				5257	UInt nn = INSN(9,5);
				5258	UInt dd = INSN(4,0);
				5259	Bool isMLAS = INSN(11,11) == 0;
				5260	const IROp opsADD[4]
				5261	= { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_INVALID };
				5262	const IROp opsSUB[4]
				5263	= { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_INVALID };
				5264	const IROp opsMUL[4]
				5265	= { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
				5266	const IROp opsPMUL[4]
				5267	= { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
				5268	/* Set opMUL and, if necessary, opACC. A result value of
				5269	Iop_INVALID for opMUL indicates that the instruction is
				5270	invalid. */
				5271	Bool zeroHI = False;
				5272	const HChar* arrSpec = "";
				5273	Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
				5274	vassert(szBlg2 < 4);
				5275	IROp opACC = Iop_INVALID;
				5276	IROp opMUL = Iop_INVALID;
				5277	if (ok) {
				5278	opMUL = (bit29 == 1 && !isMLAS) ? opsPMUL[szBlg2]
				5279	: opsMUL[szBlg2];
				5280	opACC = isMLAS ? (bit29 == 1 ? opsSUB[szBlg2] : opsADD[szBlg2])
				5281	: Iop_INVALID;
				5282	}
				5283	if (ok && opMUL != Iop_INVALID) {
				5284	IRTemp t1 = newTemp(Ity_V128);
				5285	assign(t1, binop(opMUL, getQReg128(nn), getQReg128(mm)));
				5286	IRTemp t2 = newTemp(Ity_V128);
				5287	assign(t2, opACC == Iop_INVALID
				5288	? mkexpr(t1)
				5289	: binop(opACC, getQReg128(dd), mkexpr(t1)));
				5290	putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t2))
				5291	: mkexpr(t2));
				5292	const HChar* nm = isMLAS ? (bit29 == 1 ? "mls" : "mla")
				5293	: (bit29 == 1 ? "pmul" : "mul");
				5294	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				5295	nameQReg128(dd), arrSpec,
				5296	nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
				5297	return True;
				5298	}
				5299	/* else fall through */
				5300	}
				5301
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	5302	/* ---------------- {S,U}{MIN,MAX} (vector) ---------------- */
				5303	/* 31 28 23 21 20 15 9 4
				5304	0q0 01110 size 1 m 011011 n d SMIN Vd.T, Vn.T, Vm.T
				5305	0q1 01110 size 1 m 011011 n d UMIN Vd.T, Vn.T, Vm.T
				5306	0q0 01110 size 1 m 011001 n d SMAX Vd.T, Vn.T, Vm.T
				5307	0q1 01110 size 1 m 011001 n d UMAX Vd.T, Vn.T, Vm.T
				5308	*/
				5309	if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
				5310	&& INSN(21,21) == 1
				5311	&& ((INSN(15,10) & BITS6(1,1,1,1,0,1)) == BITS6(0,1,1,0,0,1))) {
				5312	Bool isQ = INSN(30,30) == 1;
				5313	Bool isU = INSN(29,29) == 1;
				5314	UInt szBlg2 = INSN(23,22);
				5315	Bool isMAX = INSN(12,12) == 0;
				5316	UInt mm = INSN(20,16);
				5317	UInt nn = INSN(9,5);
				5318	UInt dd = INSN(4,0);
				5319	Bool zeroHI = False;
				5320	const HChar* arrSpec = "";
				5321	Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2 );
				5322	if (ok) {
				5323	const IROp opMINS[4]
				5324	= { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
				5325	const IROp opMINU[4]
				5326	= { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
				5327	const IROp opMAXS[4]
				5328	= { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
				5329	const IROp opMAXU[4]
				5330	= { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
				5331	vassert(szBlg2 < 4);
				5332	IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2])
				5333	: (isU ? opMINU[szBlg2] : opMINS[szBlg2]);
				5334	IRTemp t = newTemp(Ity_V128);
				5335	assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
				5336	putQReg128(dd, zeroHI ? unop(Iop_ZeroHI64ofV128, mkexpr(t))
				5337	: mkexpr(t));
				5338	const HChar* nm = isMAX ? (isU ? "umax" : "smax")
				5339	: (isU ? "umin" : "smin");
				5340	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				5341	nameQReg128(dd), arrSpec,
				5342	nameQReg128(nn), arrSpec, nameQReg128(mm), arrSpec);
				5343	return True;
				5344	}
				5345	/* else fall through */
				5346	}
				5347
				5348	/* -------------------- {S,U}{MIN,MAX}V -------------------- */
				5349	/* 31 28 23 21 16 15 9 4
				5350	0q0 01110 size 11000 1 101010 n d SMINV Vd, Vn.T
				5351	0q1 01110 size 11000 1 101010 n d UMINV Vd, Vn.T
				5352	0q0 01110 size 11000 0 101010 n d SMAXV Vd, Vn.T
				5353	0q1 01110 size 11000 0 101010 n d UMAXV Vd, Vn.T
				5354	*/
				5355	if (INSN(31,31) == 0 && INSN(28,24) == BITS5(0,1,1,1,0)
				5356	&& INSN(21,17) == BITS5(1,1,0,0,0)
				5357	&& INSN(15,10) == BITS6(1,0,1,0,1,0)) {
				5358	Bool isQ = INSN(30,30) == 1;
				5359	Bool isU = INSN(29,29) == 1;
				5360	UInt szBlg2 = INSN(23,22);
				5361	Bool isMAX = INSN(16,16) == 0;
				5362	UInt nn = INSN(9,5);
				5363	UInt dd = INSN(4,0);
				5364	Bool zeroHI = False;
				5365	const HChar* arrSpec = "";
				5366	Bool ok = getLaneInfo_SIMPLE(&zeroHI, &arrSpec, isQ, szBlg2);
				5367	if (ok) {
				5368	if (szBlg2 == 3) ok = False;
				5369	if (szBlg2 == 2 && !isQ) ok = False;
				5370	}
				5371	if (ok) {
				5372	const IROp opMINS[3]
				5373	= { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
				5374	const IROp opMINU[3]
				5375	= { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
				5376	const IROp opMAXS[3]
				5377	= { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
				5378	const IROp opMAXU[3]
				5379	= { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
				5380	vassert(szBlg2 < 3);
				5381	IROp op = isMAX ? (isU ? opMAXU[szBlg2] : opMAXS[szBlg2])
				5382	: (isU ? opMINU[szBlg2] : opMINS[szBlg2]);
				5383	IRTemp tN1 = newTemp(Ity_V128);
				5384	assign(tN1, getQReg128(nn));
				5385	/* If Q == 0, we're just folding lanes in the lower half of
				5386	the value. In which case, copy the lower half of the
				5387	source into the upper half, so we can then treat it the
				5388	same as the full width case. */
				5389	IRTemp tN2 = newTemp(Ity_V128);
				5390	assign(tN2, zeroHI ? mk_CatOddLanes64x2(tN1,tN1) : mkexpr(tN1));
				5391	IRTemp res = math_MINMAXV(tN2, op);
				5392	if (res == IRTemp_INVALID)
				5393	return False; /* means math_MINMAXV
				5394	doesn't handle this case yet */
				5395	putQReg128(dd, mkexpr(res));
				5396	const HChar* nm = isMAX ? (isU ? "umaxv" : "smaxv")
				5397	: (isU ? "uminv" : "sminv");
				5398	const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
				5399	IRType laneTy = tys[szBlg2];
				5400	DIP("%s %s, %s.%s\n", nm,
				5401	nameQRegLO(dd, laneTy), nameQReg128(nn), arrSpec);
				5402	return True;
				5403	}
				5404	/* else fall through */
				5405	}
				5406
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5407	/* -------------------- XTN{,2} -------------------- */
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	5408	/* 31 28 23 21 15 9 4 XTN{,2} Vd.Tb, Vn.Ta
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5409	0q0 01110 size 100001 001010 n d
				5410	*/
				5411	if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,1,0)
				5412	&& INSN(21,16) == BITS6(1,0,0,0,0,1)
				5413	&& INSN(15,10) == BITS6(0,0,1,0,1,0)) {
				5414	Bool isQ = INSN(30,30) == 1;
				5415	UInt size = INSN(23,22);
				5416	UInt nn = INSN(9,5);
				5417	UInt dd = INSN(4,0);
				5418	IROp op = Iop_INVALID;
				5419	const HChar* tb = NULL;
				5420	const HChar* ta = NULL;
				5421	switch ((size << 1) \| (isQ ? 1 : 0)) {
				5422	case 0: tb = "8b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break;
				5423	case 1: tb = "16b"; ta = "8h"; op = Iop_NarrowUn16to8x8; break;
				5424	case 2: tb = "4h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break;
				5425	case 3: tb = "8h"; ta = "4s"; op = Iop_NarrowUn32to16x4; break;
				5426	case 4: tb = "2s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break;
				5427	case 5: tb = "4s"; ta = "2d"; op = Iop_NarrowUn64to32x2; break;
				5428	case 6: break;
				5429	case 7: break;
				5430	default: vassert(0);
				5431	}
				5432	if (op != Iop_INVALID) {
				5433	if (!isQ) {
				5434	putQRegLane(dd, 1, mkU64(0));
				5435	}
				5436	putQRegLane(dd, isQ ? 1 : 0, unop(op, getQReg128(nn)));
				5437	DIP("xtn%s %s.%s, %s.%s\n", isQ ? "2" : "",
				5438	nameQReg128(dd), tb, nameQReg128(nn), ta);
				5439	return True;
				5440	}
				5441	/* else fall through */
				5442	}
				5443
				5444	/* ---------------- DUP (element, vector) ---------------- */
				5445	/* 31 28 20 15 9 4
				5446	0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
				5447	*/
				5448	if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
				5449	&& INSN(15,10) == BITS6(0,0,0,0,0,1)) {
				5450	Bool isQ = INSN(30,30) == 1;
				5451	UInt imm5 = INSN(20,16);
				5452	UInt nn = INSN(9,5);
				5453	UInt dd = INSN(4,0);
				5454	IRTemp w0 = newTemp(Ity_I64);
				5455	const HChar* arT = "??";
				5456	const HChar* arTs = "??";
				5457	IRType laneTy = Ity_INVALID;
				5458	UInt laneNo = 16; /* invalid */
				5459	if (imm5 & 1) {
				5460	arT = isQ ? "16b" : "8b";
				5461	arTs = "b";
				5462	laneNo = (imm5 >> 1) & 15;
				5463	laneTy = Ity_I8;
				5464	assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
				5465	}
				5466	else if (imm5 & 2) {
				5467	arT = isQ ? "8h" : "4h";
				5468	arTs = "h";
				5469	laneNo = (imm5 >> 2) & 7;
				5470	laneTy = Ity_I16;
				5471	assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
				5472	}
				5473	else if (imm5 & 4) {
				5474	arT = isQ ? "4s" : "2s";
				5475	arTs = "s";
				5476	laneNo = (imm5 >> 3) & 3;
				5477	laneTy = Ity_I32;
				5478	assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
				5479	}
				5480	else if ((imm5 & 8) && isQ) {
				5481	arT = "2d";
				5482	arTs = "d";
				5483	laneNo = (imm5 >> 4) & 1;
				5484	laneTy = Ity_I64;
				5485	assign(w0, getQRegLane(nn, laneNo, laneTy));
				5486	}
				5487	else {
				5488	/* invalid; leave laneTy unchanged. */
				5489	}
				5490	/* */
				5491	if (laneTy != Ity_INVALID) {
				5492	vassert(laneNo < 16);
				5493	IRTemp w1 = math_DUP_TO_64(w0, laneTy);
				5494	putQReg128(dd, binop(Iop_64HLtoV128,
				5495	isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
				5496	DIP("dup %s.%s, %s.%s[%u]\n",
				5497	nameQReg128(dd), arT, nameQReg128(nn), arTs, laneNo);
				5498	return True;
				5499	}
				5500	/* else fall through */
				5501	}
				5502
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	5503	/* ---------------- DUP (general, vector) ---------------- */
				5504	/* 31 28 23 20 15 9 4
				5505	0q0 01110 000 imm5 000011 n d DUP Vd.T, Rn
				5506	Q=0 writes 64, Q=1 writes 128
				5507	imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
				5508	xxx10 4H(q=0) or 8H(q=1), R=W
				5509	xx100 2S(q=0) or 4S(q=1), R=W
				5510	x1000 Invalid(q=0) or 2D(q=1), R=X
				5511	x0000 Invalid(q=0) or Invalid(q=1)
				5512	*/
				5513	if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
				5514	&& INSN(15,10) == BITS6(0,0,0,0,1,1)) {
				5515	Bool isQ = INSN(30,30) == 1;
				5516	UInt imm5 = INSN(20,16);
				5517	UInt nn = INSN(9,5);
				5518	UInt dd = INSN(4,0);
				5519	IRTemp w0 = newTemp(Ity_I64);
				5520	const HChar* arT = "??";
				5521	IRType laneTy = Ity_INVALID;
				5522	if (imm5 & 1) {
				5523	arT = isQ ? "16b" : "8b";
				5524	laneTy = Ity_I8;
				5525	assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
				5526	}
				5527	else if (imm5 & 2) {
				5528	arT = isQ ? "8h" : "4h";
				5529	laneTy = Ity_I16;
				5530	assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
				5531	}
				5532	else if (imm5 & 4) {
				5533	arT = isQ ? "4s" : "2s";
				5534	laneTy = Ity_I32;
				5535	assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
				5536	}
				5537	else if ((imm5 & 8) && isQ) {
				5538	arT = "2d";
				5539	laneTy = Ity_I64;
				5540	assign(w0, getIReg64orZR(nn));
				5541	}
				5542	else {
				5543	/* invalid; leave laneTy unchanged. */
				5544	}
				5545	/* */
				5546	if (laneTy != Ity_INVALID) {
				5547	IRTemp w1 = math_DUP_TO_64(w0, laneTy);
				5548	putQReg128(dd, binop(Iop_64HLtoV128,
				5549	isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
				5550	DIP("dup %s.%s, %s\n",
				5551	nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
				5552	return True;
				5553	}
				5554	/* else fall through */
				5555	}
				5556
sewardj	f5b0891	2014-02-06 12:57:58 +0000	[diff] [blame^]	5557	/* ---------------------- {S,U}MOV ---------------------- */
				5558	/* 31 28 20 15 9 4
				5559	0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
				5560	0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
				5561	dest is Xd when q==1, Wd when q==0
				5562	UMOV:
				5563	Ts,index,ops = case q:imm5 of
				5564	0:xxxx1 -> B, xxxx, 8Uto64
				5565	1:xxxx1 -> invalid
				5566	0:xxx10 -> H, xxx, 16Uto64
				5567	1:xxx10 -> invalid
				5568	0:xx100 -> S, xx, 32Uto64
				5569	1:xx100 -> invalid
				5570	1:x1000 -> D, x, copy64
				5571	other -> invalid
				5572	SMOV:
				5573	Ts,index,ops = case q:imm5 of
				5574	0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
				5575	1:xxxx1 -> B, xxxx, 8Sto64
				5576	0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
				5577	1:xxx10 -> H, xxx, 16Sto64
				5578	0:xx100 -> invalid
				5579	1:xx100 -> S, xx, 32Sto64
				5580	1:x1000 -> invalid
				5581	other -> invalid
				5582	*/
				5583	if (INSN(31,31) == 0 && INSN(29,21) == BITS9(0,0,1,1,1,0,0,0,0)
				5584	&& (INSN(15,10) & BITS6(1,1,1,0,1,1)) == BITS6(0,0,1,0,1,1)) {
				5585	UInt bitQ = INSN(30,30) == 1;
				5586	UInt imm5 = INSN(20,16);
				5587	UInt nn = INSN(9,5);
				5588	UInt dd = INSN(4,0);
				5589	Bool isU = INSN(12,12) == 1;
				5590	const HChar* arTs = "??";
				5591	UInt laneNo = 16; /* invalid */
				5592	// Setting 'res' to non-NULL determines valid/invalid
				5593	IRExpr* res = NULL;
				5594	if (!bitQ && (imm5 & 1)) { // 0:xxxx1
				5595	laneNo = (imm5 >> 1) & 15;
				5596	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
				5597	res = isU ? unop(Iop_8Uto64, lane)
				5598	: unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
				5599	arTs = "b";
				5600	}
				5601	else if (bitQ && (imm5 & 1)) { // 1:xxxx1
				5602	laneNo = (imm5 >> 1) & 15;
				5603	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
				5604	res = isU ? NULL
				5605	: unop(Iop_8Sto64, lane);
				5606	arTs = "b";
				5607	}
				5608	else if (!bitQ && (imm5 & 2)) { // 0:xxx10
				5609	laneNo = (imm5 >> 2) & 7;
				5610	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
				5611	res = isU ? unop(Iop_16Uto64, lane)
				5612	: unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
				5613	arTs = "h";
				5614	}
				5615	else if (bitQ && (imm5 & 2)) { // 1:xxx10
				5616	laneNo = (imm5 >> 2) & 7;
				5617	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
				5618	res = isU ? NULL
				5619	: unop(Iop_16Sto64, lane);
				5620	arTs = "h";
				5621	}
				5622	else if (!bitQ && (imm5 & 4)) { // 0:xx100
				5623	laneNo = (imm5 >> 3) & 3;
				5624	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
				5625	res = isU ? unop(Iop_32Uto64, lane)
				5626	: NULL;
				5627	arTs = "s";
				5628	}
				5629	else if (bitQ && (imm5 & 4)) { // 1:xxx10
				5630	laneNo = (imm5 >> 3) & 3;
				5631	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
				5632	res = isU ? NULL
				5633	: unop(Iop_32Sto64, lane);
				5634	arTs = "s";
				5635	}
				5636	else if (bitQ && (imm5 & 8)) { // 1:x1000
				5637	laneNo = (imm5 >> 4) & 1;
				5638	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
				5639	res = isU ? lane
				5640	: NULL;
				5641	arTs = "d";
				5642	}
				5643	/* */
				5644	if (res) {
				5645	vassert(laneNo < 16);
				5646	putIReg64orZR(dd, res);
				5647	DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
				5648	nameIRegOrZR(bitQ == 1, dd),
				5649	nameQReg128(nn), arTs, laneNo);
				5650	return True;
				5651	}
				5652	/* else fall through */
				5653	}
				5654
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5655	/* FIXME Temporary hacks to get through ld.so FIXME */
				5656
				5657	/* ------------------ movi vD.4s, #0x0 ------------------ */
				5658	/* 0x4F 0x00 0x04 000 vD */
				5659	if ((insn & 0xFFFFFFE0) == 0x4F000400) {
				5660	UInt vD = INSN(4,0);
				5661	putQReg128(vD, mkV128(0x0000));
				5662	DIP("movi v%u.4s, #0x0\n", vD);
				5663	return True;
				5664	}
				5665
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5666	/* ---------------- MOV vD.16b, vN.16b ---------------- */
				5667	/* 31 23 20 15 9 4
				5668	010 01110 101 m 000111 n d ORR vD.16b, vN.16b, vM.16b
				5669	This only handles the N == M case.
				5670	*/
				5671	if (INSN(31,24) == BITS8(0,1,0,0,1,1,1,0)
				5672	&& INSN(23,21) == BITS3(1,0,1) && INSN(15,10) == BITS6(0,0,0,1,1,1)) {
				5673	UInt mm = INSN(20,16);
				5674	UInt nn = INSN(9,5);
				5675	UInt dd = INSN(4,0);
				5676	if (mm == nn) {
				5677	putQReg128(dd, getQReg128(nn));
				5678	DIP("mov v%u.16b, v%u.16b\n", dd, nn);
				5679	return True;
				5680	}
				5681	/* else it's really an ORR; fall through. */
				5682	}
				5683
				5684	vex_printf("ARM64 front end: simd_and_fp\n");
				5685	return False;
				5686	# undef INSN
				5687	}
				5688
				5689
				5690	/------------------------------------------------------------/
				5691	/--- Disassemble a single ARM64 instruction ---/
				5692	/------------------------------------------------------------/
				5693
				5694	/* Disassemble a single ARM64 instruction into IR. The instruction
				5695	has is located at \|guest_instr\| and has guest IP of
				5696	\|guest_PC_curr_instr\|, which will have been set before the call
				5697	here. Returns True iff the instruction was decoded, in which case
				5698	dres will be set accordingly, or False, in which case dres should
				5699	be ignored by the caller. */
				5700
				5701	static
				5702	Bool disInstr_ARM64_WRK (
				5703	/MB_OUT/DisResult* dres,
				5704	Bool (resteerOkFn) ( /opaque/void, Addr64 ),
				5705	Bool resteerCisOk,
				5706	void* callback_opaque,
				5707	UChar* guest_instr,
				5708	VexArchInfo* archinfo,
				5709	VexAbiInfo* abiinfo
				5710	)
				5711	{
				5712	// A macro to fish bits out of 'insn'.
				5713	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				5714
				5715	//ZZ DisResult dres;
				5716	//ZZ UInt insn;
				5717	//ZZ //Bool allow_VFP = False;
				5718	//ZZ //UInt hwcaps = archinfo->hwcaps;
				5719	//ZZ IRTemp condT; /* :: Ity_I32 */
				5720	//ZZ UInt summary;
				5721	//ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
				5722	//ZZ
				5723	//ZZ /* What insn variants are we supporting today? */
				5724	//ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
				5725	//ZZ // etc etc
				5726
				5727	/* Set result defaults. */
				5728	dres->whatNext = Dis_Continue;
				5729	dres->len = 4;
				5730	dres->continueAt = 0;
				5731	dres->jk_StopHere = Ijk_INVALID;
				5732
				5733	/* At least this is simple on ARM64: insns are all 4 bytes long, and
				5734	4-aligned. So just fish the whole thing out of memory right now
				5735	and have done. */
				5736	UInt insn = getUIntLittleEndianly( guest_instr );
				5737
				5738	if (0) vex_printf("insn: 0x%x\n", insn);
				5739
				5740	DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
				5741
				5742	vassert(0 == (guest_PC_curr_instr & 3ULL));
				5743
				5744	/* ----------------------------------------------------------- */
				5745
				5746	/* Spot "Special" instructions (see comment at top of file). */
				5747	{
				5748	UChar* code = (UChar*)guest_instr;
				5749	/* Spot the 16-byte preamble:
				5750	93CC0D8C ror x12, x12, #3
				5751	93CC358C ror x12, x12, #13
				5752	93CCCD8C ror x12, x12, #51
				5753	93CCF58C ror x12, x12, #61
				5754	*/
				5755	UInt word1 = 0x93CC0D8C;
				5756	UInt word2 = 0x93CC358C;
				5757	UInt word3 = 0x93CCCD8C;
				5758	UInt word4 = 0x93CCF58C;
				5759	if (getUIntLittleEndianly(code+ 0) == word1 &&
				5760	getUIntLittleEndianly(code+ 4) == word2 &&
				5761	getUIntLittleEndianly(code+ 8) == word3 &&
				5762	getUIntLittleEndianly(code+12) == word4) {
				5763	/* Got a "Special" instruction preamble. Which one is it? */
				5764	if (getUIntLittleEndianly(code+16) == 0xAA0A014A
				5765	/* orr x10,x10,x10 */) {
				5766	/* X3 = client_request ( X4 ) */
				5767	DIP("x3 = client_request ( x4 )\n");
				5768	putPC(mkU64( guest_PC_curr_instr + 20 ));
				5769	dres->jk_StopHere = Ijk_ClientReq;
				5770	dres->whatNext = Dis_StopHere;
				5771	return True;
				5772	}
				5773	else
				5774	if (getUIntLittleEndianly(code+16) == 0xAA0B016B
				5775	/* orr x11,x11,x11 */) {
				5776	/* X3 = guest_NRADDR */
				5777	DIP("x3 = guest_NRADDR\n");
				5778	dres->len = 20;
				5779	putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
				5780	return True;
				5781	}
				5782	else
				5783	if (getUIntLittleEndianly(code+16) == 0xAA0C018C
				5784	/* orr x12,x12,x12 */) {
				5785	/* branch-and-link-to-noredir X8 */
				5786	DIP("branch-and-link-to-noredir x8\n");
				5787	putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
				5788	putPC(getIReg64orZR(8));
				5789	dres->jk_StopHere = Ijk_NoRedir;
				5790	dres->whatNext = Dis_StopHere;
				5791	return True;
				5792	}
				5793	else
				5794	if (getUIntLittleEndianly(code+16) == 0xAA090129
				5795	/* orr x9,x9,x9 */) {
				5796	/* IR injection */
				5797	DIP("IR injection\n");
				5798	vex_inject_ir(irsb, Iend_LE);
				5799	// Invalidate the current insn. The reason is that the IRop we're
				5800	// injecting here can change. In which case the translation has to
				5801	// be redone. For ease of handling, we simply invalidate all the
				5802	// time.
				5803	stmt(IRStmt_Put(OFFB_TISTART, mkU64(guest_PC_curr_instr)));
				5804	stmt(IRStmt_Put(OFFB_TILEN, mkU64(20)));
				5805	putPC(mkU64( guest_PC_curr_instr + 20 ));
				5806	dres->whatNext = Dis_StopHere;
				5807	dres->jk_StopHere = Ijk_TInval;
				5808	return True;
				5809	}
				5810	/* We don't know what it is. */
				5811	return False;
				5812	/NOTREACHED/
				5813	}
				5814	}
				5815
				5816	/* ----------------------------------------------------------- */
				5817
				5818	/* Main ARM64 instruction decoder starts here. */
				5819
				5820	Bool ok = False;
				5821
				5822	/* insn[28:25] determines the top-level grouping, so let's start
				5823	off with that.
				5824
				5825	For all of these dis_ARM64_ functions, we pass *dres with the
				5826	normal default results "insn OK, 4 bytes long, keep decoding" so
				5827	they don't need to change it. However, decodes of control-flow
				5828	insns may cause *dres to change.
				5829	*/
				5830	switch (INSN(28,25)) {
				5831	case BITS4(1,0,0,0): case BITS4(1,0,0,1):
				5832	// Data processing - immediate
				5833	ok = dis_ARM64_data_processing_immediate(dres, insn);
				5834	break;
				5835	case BITS4(1,0,1,0): case BITS4(1,0,1,1):
				5836	// Branch, exception generation and system instructions
				5837	ok = dis_ARM64_branch_etc(dres, insn);
				5838	break;
				5839	case BITS4(0,1,0,0): case BITS4(0,1,1,0):
				5840	case BITS4(1,1,0,0): case BITS4(1,1,1,0):
				5841	// Loads and stores
				5842	ok = dis_ARM64_load_store(dres, insn);
				5843	break;
				5844	case BITS4(0,1,0,1): case BITS4(1,1,0,1):
				5845	// Data processing - register
				5846	ok = dis_ARM64_data_processing_register(dres, insn);
				5847	break;
				5848	case BITS4(0,1,1,1): case BITS4(1,1,1,1):
				5849	// Data processing - SIMD and floating point
				5850	ok = dis_ARM64_simd_and_fp(dres, insn);
				5851	break;
				5852	case BITS4(0,0,0,0): case BITS4(0,0,0,1):
				5853	case BITS4(0,0,1,0): case BITS4(0,0,1,1):
				5854	// UNALLOCATED
				5855	break;
				5856	default:
				5857	vassert(0); /* Can't happen */
				5858	}
				5859
				5860	/* If the next-level down decoders failed, make sure \|dres\| didn't
				5861	get changed. */
				5862	if (!ok) {
				5863	vassert(dres->whatNext == Dis_Continue);
				5864	vassert(dres->len == 4);
				5865	vassert(dres->continueAt == 0);
				5866	vassert(dres->jk_StopHere == Ijk_INVALID);
				5867	}
				5868
				5869	return ok;
				5870
				5871	# undef INSN
				5872	}
				5873
				5874
				5875	/------------------------------------------------------------/
				5876	/--- Top-level fn ---/
				5877	/------------------------------------------------------------/
				5878
				5879	/* Disassemble a single instruction into IR. The instruction
				5880	is located in host memory at &guest_code[delta]. */
				5881
				5882	DisResult disInstr_ARM64 ( IRSB* irsb_IN,
				5883	Bool (resteerOkFn) ( void, Addr64 ),
				5884	Bool resteerCisOk,
				5885	void* callback_opaque,
				5886	UChar* guest_code_IN,
				5887	Long delta_IN,
				5888	Addr64 guest_IP,
				5889	VexArch guest_arch,
				5890	VexArchInfo* archinfo,
				5891	VexAbiInfo* abiinfo,
				5892	Bool host_bigendian_IN,
				5893	Bool sigill_diag_IN )
				5894	{
				5895	DisResult dres;
				5896	vex_bzero(&dres, sizeof(dres));
				5897
				5898	/* Set globals (see top of this file) */
				5899	vassert(guest_arch == VexArchARM64);
				5900
				5901	irsb = irsb_IN;
				5902	host_is_bigendian = host_bigendian_IN;
				5903	guest_PC_curr_instr = (Addr64)guest_IP;
				5904
				5905	/* Try to decode */
				5906	Bool ok = disInstr_ARM64_WRK( &dres,
				5907	resteerOkFn, resteerCisOk, callback_opaque,
				5908	(UChar*)&guest_code_IN[delta_IN],
				5909	archinfo, abiinfo );
				5910	if (ok) {
				5911	/* All decode successes end up here. */
				5912	vassert(dres.len == 4 /\|\| dres.len == 20/);
				5913	switch (dres.whatNext) {
				5914	case Dis_Continue:
				5915	putPC( mkU64(dres.len + guest_PC_curr_instr) );
				5916	break;
				5917	case Dis_ResteerU:
				5918	case Dis_ResteerC:
				5919	putPC(mkU64(dres.continueAt));
				5920	break;
				5921	case Dis_StopHere:
				5922	break;
				5923	default:
				5924	vassert(0);
				5925	}
				5926	DIP("\n");
				5927	} else {
				5928	/* All decode failures end up here. */
				5929	if (sigill_diag_IN) {
				5930	Int i, j;
				5931	UChar buf[64];
				5932	UInt insn
				5933	= getUIntLittleEndianly( (UChar*)&guest_code_IN[delta_IN] );
				5934	vex_bzero(buf, sizeof(buf));
				5935	for (i = j = 0; i < 32; i++) {
				5936	if (i > 0) {
				5937	if ((i & 7) == 0) buf[j++] = ' ';
				5938	else if ((i & 3) == 0) buf[j++] = '\'';
				5939	}
				5940	buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
				5941	}
				5942	vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
				5943	vex_printf("disInstr(arm64): %s\n", buf);
				5944	}
				5945
				5946	/* Tell the dispatcher that this insn cannot be decoded, and so
				5947	has not been executed, and (is currently) the next to be
				5948	executed. PC should be up-to-date since it is made so at the
				5949	start of each insn, but nevertheless be paranoid and update
				5950	it again right now. */
				5951	putPC( mkU64(guest_PC_curr_instr) );
				5952	dres.whatNext = Dis_StopHere;
				5953	dres.len = 0;
				5954	dres.continueAt = 0;
				5955	dres.jk_StopHere = Ijk_NoDecode;
				5956	}
				5957	return dres;
				5958	}
				5959
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	5960	////////////////////////////////////////////////////////////////////////
				5961	////////////////////////////////////////////////////////////////////////
				5962
				5963	/* Spare code for doing reference implementations of various 128-bit
				5964	SIMD interleaves/deinterleaves/concatenation ops. For 64-bit
				5965	equivalents see the end of guest_arm_toIR.c. */
				5966
				5967	////////////////////////////////////////////////////////////////
				5968	// 64x2 operations
				5969	//
				5970	static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 )
				5971	{
				5972	// returns a0 b0
				5973	return binop(Iop_64HLtoV128, unop(Iop_V128to64, mkexpr(a10)),
				5974	unop(Iop_V128to64, mkexpr(b10)));
				5975	}
				5976
				5977	static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 )
				5978	{
				5979	// returns a1 b1
				5980	return binop(Iop_64HLtoV128, unop(Iop_V128HIto64, mkexpr(a10)),
				5981	unop(Iop_V128HIto64, mkexpr(b10)));
				5982	}
				5983
				5984
				5985	////////////////////////////////////////////////////////////////
				5986	// 32x4 operations
				5987	//
				5988
				5989	// Split a 128 bit value into 4 32 bit ones, in 64-bit IRTemps with
				5990	// the top halves guaranteed to be zero.
				5991	static void breakV128to32s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
				5992	IRTemp* out0, IRTemp v128 )
				5993	{
				5994	if (out3) *out3 = newTemp(Ity_I64);
				5995	if (out2) *out2 = newTemp(Ity_I64);
				5996	if (out1) *out1 = newTemp(Ity_I64);
				5997	if (out0) *out0 = newTemp(Ity_I64);
				5998	IRTemp hi64 = newTemp(Ity_I64);
				5999	IRTemp lo64 = newTemp(Ity_I64);
				6000	assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
				6001	assign(lo64, unop(Iop_V128to64, mkexpr(v128)) );
				6002	if (out3) assign(*out3, binop(Iop_Shr64, mkexpr(hi64), mkU8(32)));
				6003	if (out2) assign(*out2, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFFFFFF)));
				6004	if (out1) assign(*out1, binop(Iop_Shr64, mkexpr(lo64), mkU8(32)));
				6005	if (out0) assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFFFFFF)));
				6006	}
				6007
				6008	// Make a V128 bit value from 4 32 bit ones, each of which is in a 64 bit
				6009	// IRTemp.
				6010	static IRTemp mkV128from32s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
				6011	{
				6012	IRTemp hi64 = newTemp(Ity_I64);
				6013	IRTemp lo64 = newTemp(Ity_I64);
				6014	assign(hi64,
				6015	binop(Iop_Or64,
				6016	binop(Iop_Shl64, mkexpr(in3), mkU8(32)),
				6017	binop(Iop_And64, mkexpr(in2), mkU64(0xFFFFFFFF))));
				6018	assign(lo64,
				6019	binop(Iop_Or64,
				6020	binop(Iop_Shl64, mkexpr(in1), mkU8(32)),
				6021	binop(Iop_And64, mkexpr(in0), mkU64(0xFFFFFFFF))));
				6022	IRTemp res = newTemp(Ity_V128);
				6023	assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64)));
				6024	return res;
				6025	}
				6026
				6027	static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 )
				6028	{
				6029	// returns a2 a0 b2 b0
				6030	IRTemp a2, a0, b2, b0;
				6031	breakV128to32s(NULL, &a2, NULL, &a0, a3210);
				6032	breakV128to32s(NULL, &b2, NULL, &b0, b3210);
				6033	return mkexpr(mkV128from32s(a2, a0, b2, b0));
				6034	}
				6035
				6036	static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 )
				6037	{
				6038	// returns a3 a1 b3 b1
				6039	IRTemp a3, a1, b3, b1;
				6040	breakV128to32s(&a3, NULL, &a1, NULL, a3210);
				6041	breakV128to32s(&b3, NULL, &b1, NULL, b3210);
				6042	return mkexpr(mkV128from32s(a3, a1, b3, b1));
				6043	}
				6044
				6045
				6046	////////////////////////////////////////////////////////////////
				6047	// 16x8 operations
				6048	//
				6049
				6050	static void breakV128to16s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
				6051	IRTemp* out4, IRTemp* out3, IRTemp* out2,
				6052	IRTemp* out1,IRTemp* out0, IRTemp v128 )
				6053	{
				6054	if (out7) *out7 = newTemp(Ity_I64);
				6055	if (out6) *out6 = newTemp(Ity_I64);
				6056	if (out5) *out5 = newTemp(Ity_I64);
				6057	if (out4) *out4 = newTemp(Ity_I64);
				6058	if (out3) *out3 = newTemp(Ity_I64);
				6059	if (out2) *out2 = newTemp(Ity_I64);
				6060	if (out1) *out1 = newTemp(Ity_I64);
				6061	if (out0) *out0 = newTemp(Ity_I64);
				6062	IRTemp hi64 = newTemp(Ity_I64);
				6063	IRTemp lo64 = newTemp(Ity_I64);
				6064	assign(hi64, unop(Iop_V128HIto64, mkexpr(v128)) );
				6065	assign(lo64, unop(Iop_V128to64, mkexpr(v128)) );
				6066	if (out7)
				6067	assign(*out7, binop(Iop_And64,
				6068	binop(Iop_Shr64, mkexpr(hi64), mkU8(48)),
				6069	mkU64(0xFFFF)));
				6070	if (out6)
				6071	assign(*out6, binop(Iop_And64,
				6072	binop(Iop_Shr64, mkexpr(hi64), mkU8(32)),
				6073	mkU64(0xFFFF)));
				6074	if (out5)
				6075	assign(*out5, binop(Iop_And64,
				6076	binop(Iop_Shr64, mkexpr(hi64), mkU8(16)),
				6077	mkU64(0xFFFF)));
				6078	if (out4)
				6079	assign(*out4, binop(Iop_And64, mkexpr(hi64), mkU64(0xFFFF)));
				6080	if (out3)
				6081	assign(*out3, binop(Iop_And64,
				6082	binop(Iop_Shr64, mkexpr(lo64), mkU8(48)),
				6083	mkU64(0xFFFF)));
				6084	if (out2)
				6085	assign(*out2, binop(Iop_And64,
				6086	binop(Iop_Shr64, mkexpr(lo64), mkU8(32)),
				6087	mkU64(0xFFFF)));
				6088	if (out1)
				6089	assign(*out1, binop(Iop_And64,
				6090	binop(Iop_Shr64, mkexpr(lo64), mkU8(16)),
				6091	mkU64(0xFFFF)));
				6092	if (out0)
				6093	assign(*out0, binop(Iop_And64, mkexpr(lo64), mkU64(0xFFFF)));
				6094	}
				6095
				6096	static IRTemp mkV128from16s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
				6097	IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
				6098	{
				6099	IRTemp hi64 = newTemp(Ity_I64);
				6100	IRTemp lo64 = newTemp(Ity_I64);
				6101	assign(hi64,
				6102	binop(Iop_Or64,
				6103	binop(Iop_Or64,
				6104	binop(Iop_Shl64,
				6105	binop(Iop_And64, mkexpr(in7), mkU64(0xFFFF)),
				6106	mkU8(48)),
				6107	binop(Iop_Shl64,
				6108	binop(Iop_And64, mkexpr(in6), mkU64(0xFFFF)),
				6109	mkU8(32))),
				6110	binop(Iop_Or64,
				6111	binop(Iop_Shl64,
				6112	binop(Iop_And64, mkexpr(in5), mkU64(0xFFFF)),
				6113	mkU8(16)),
				6114	binop(Iop_And64,
				6115	mkexpr(in4), mkU64(0xFFFF)))));
				6116	assign(lo64,
				6117	binop(Iop_Or64,
				6118	binop(Iop_Or64,
				6119	binop(Iop_Shl64,
				6120	binop(Iop_And64, mkexpr(in3), mkU64(0xFFFF)),
				6121	mkU8(48)),
				6122	binop(Iop_Shl64,
				6123	binop(Iop_And64, mkexpr(in2), mkU64(0xFFFF)),
				6124	mkU8(32))),
				6125	binop(Iop_Or64,
				6126	binop(Iop_Shl64,
				6127	binop(Iop_And64, mkexpr(in1), mkU64(0xFFFF)),
				6128	mkU8(16)),
				6129	binop(Iop_And64,
				6130	mkexpr(in0), mkU64(0xFFFF)))));
				6131	IRTemp res = newTemp(Ity_V128);
				6132	assign(res, binop(Iop_64HLtoV128, mkexpr(hi64), mkexpr(lo64)));
				6133	return res;
				6134	}
				6135
				6136	static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 )
				6137	{
				6138	// returns a6 a4 a2 a0 b6 b4 b2 b0
				6139	IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
				6140	breakV128to16s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
				6141	breakV128to16s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
				6142	return mkexpr(mkV128from16s(a6, a4, a2, a0, b6, b4, b2, b0));
				6143	}
				6144
				6145	static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 )
				6146	{
				6147	// returns a7 a5 a3 a1 b7 b5 b3 b1
				6148	IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
				6149	breakV128to16s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
				6150	breakV128to16s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
				6151	return mkexpr(mkV128from16s(a7, a5, a3, a1, b7, b5, b3, b1));
				6152	}
				6153
				6154
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6155	/--------------------------------------------------------------------/
				6156	/--- end guest_arm64_toIR.c ---/
				6157	/--------------------------------------------------------------------/