Blame - priv/guest_arm64_toIR.c - platform/external/valgrind

blob: d0db663d665b0756ebfda3d07df0e33c413aa132 [file] [log] [blame]

sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1	/* -- mode: C; c-basic-offset: 3; -- */
				2
				3	/--------------------------------------------------------------------/
				4	/--- begin guest_arm64_toIR.c ---/
				5	/--------------------------------------------------------------------/
				6
				7	/*
				8	This file is part of Valgrind, a dynamic binary instrumentation
				9	framework.
				10
				11	Copyright (C) 2013-2013 OpenWorks
				12	info@open-works.net
				13
				14	This program is free software; you can redistribute it and/or
				15	modify it under the terms of the GNU General Public License as
				16	published by the Free Software Foundation; either version 2 of the
				17	License, or (at your option) any later version.
				18
				19	This program is distributed in the hope that it will be useful, but
				20	WITHOUT ANY WARRANTY; without even the implied warranty of
				21	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				22	General Public License for more details.
				23
				24	You should have received a copy of the GNU General Public License
				25	along with this program; if not, write to the Free Software
				26	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
				27	02110-1301, USA.
				28
				29	The GNU General Public License is contained in the file COPYING.
				30	*/
				31
				32	//ZZ /* XXXX thumb to check:
				33	//ZZ that all cases where putIRegT writes r15, we generate a jump.
				34	//ZZ
				35	//ZZ All uses of newTemp assign to an IRTemp and not a UInt
				36	//ZZ
				37	//ZZ For all thumb loads and stores, including VFP ones, new-ITSTATE is
				38	//ZZ backed out before the memory op, and restored afterwards. This
				39	//ZZ needs to happen even after we go uncond. (and for sure it doesn't
				40	//ZZ happen for VFP loads/stores right now).
				41	//ZZ
				42	//ZZ VFP on thumb: check that we exclude all r13/r15 cases that we
				43	//ZZ should.
				44	//ZZ
				45	//ZZ XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
				46	//ZZ taking into account the number of insns guarded by an IT.
				47	//ZZ
				48	//ZZ remove the nasty hack, in the spechelper, of looking for Or32(...,
				49	//ZZ 0xE0) in as the first arg to armg_calculate_condition, and instead
				50	//ZZ use Slice44 as specified in comments in the spechelper.
				51	//ZZ
				52	//ZZ add specialisations for armg_calculate_flag_c and _v, as they
				53	//ZZ are moderately often needed in Thumb code.
				54	//ZZ
				55	//ZZ Correctness: ITSTATE handling in Thumb SVCs is wrong.
				56	//ZZ
				57	//ZZ Correctness (obscure): in m_transtab, when invalidating code
				58	//ZZ address ranges, invalidate up to 18 bytes after the end of the
				59	//ZZ range. This is because the ITSTATE optimisation at the top of
				60	//ZZ _THUMB_WRK below analyses up to 18 bytes before the start of any
				61	//ZZ given instruction, and so might depend on the invalidated area.
				62	//ZZ */
				63	//ZZ
				64	//ZZ /* Limitations, etc
				65	//ZZ
				66	//ZZ - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
				67	//ZZ These instructions are non-restartable in the case where the
				68	//ZZ transfer(s) fault.
				69	//ZZ
				70	//ZZ - SWP: the restart jump back is Ijk_Boring; it should be
				71	//ZZ Ijk_NoRedir but that's expensive. See comments on casLE() in
				72	//ZZ guest_x86_toIR.c.
				73	//ZZ */
				74
				75	/* "Special" instructions.
				76
				77	This instruction decoder can decode four special instructions
				78	which mean nothing natively (are no-ops as far as regs/mem are
				79	concerned) but have meaning for supporting Valgrind. A special
				80	instruction is flagged by a 16-byte preamble:
				81
				82	93CC0D8C 93CC358C 93CCCD8C 93CCF58C
				83	(ror x12, x12, #3; ror x12, x12, #13
				84	ror x12, x12, #51; ror x12, x12, #61)
				85
				86	Following that, one of the following 3 are allowed
				87	(standard interpretation in parentheses):
				88
				89	AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
				90	AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
				91	AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
				92	AA090129 (orr x9,x9,x9) IR injection
				93
				94	Any other bytes following the 16-byte preamble are illegal and
				95	constitute a failure in instruction decoding. This all assumes
				96	that the preamble will never occur except in specific code
				97	fragments designed for Valgrind to catch.
				98	*/
				99
				100	/* Translates ARM64 code to IR. */
				101
				102	#include "libvex_basictypes.h"
				103	#include "libvex_ir.h"
				104	#include "libvex.h"
				105	#include "libvex_guest_arm64.h"
				106
				107	#include "main_util.h"
				108	#include "main_globals.h"
				109	#include "guest_generic_bb_to_IR.h"
				110	#include "guest_arm64_defs.h"
				111
				112
				113	/------------------------------------------------------------/
				114	/--- Globals ---/
				115	/------------------------------------------------------------/
				116
				117	/* These are set at the start of the translation of a instruction, so
				118	that we don't have to pass them around endlessly. CONST means does
				119	not change during translation of the instruction.
				120	*/
				121
sewardj	9b76916	2014-07-24 12:42:03 +0000	[diff] [blame]	122	/* CONST: what is the host's endianness? We need to know this in
				123	order to do sub-register accesses to the SIMD/FP registers
				124	correctly. */
				125	static VexEndness host_endness;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	126
				127	/* CONST: The guest address for the instruction currently being
				128	translated. */
				129	static Addr64 guest_PC_curr_instr;
				130
				131	/* MOD: The IRSB* into which we're generating code. */
				132	static IRSB* irsb;
				133
				134
				135	/------------------------------------------------------------/
				136	/--- Debugging output ---/
				137	/------------------------------------------------------------/
				138
				139	#define DIP(format, args...) \
				140	if (vex_traceflags & VEX_TRACE_FE) \
				141	vex_printf(format, ## args)
				142
				143	#define DIS(buf, format, args...) \
				144	if (vex_traceflags & VEX_TRACE_FE) \
				145	vex_sprintf(buf, format, ## args)
				146
				147
				148	/------------------------------------------------------------/
				149	/--- Helper bits and pieces for deconstructing the ---/
				150	/--- arm insn stream. ---/
				151	/------------------------------------------------------------/
				152
				153	/* Do a little-endian load of a 32-bit word, regardless of the
				154	endianness of the underlying host. */
				155	static inline UInt getUIntLittleEndianly ( UChar* p )
				156	{
				157	UInt w = 0;
				158	w = (w << 8) \| p[3];
				159	w = (w << 8) \| p[2];
				160	w = (w << 8) \| p[1];
				161	w = (w << 8) \| p[0];
				162	return w;
				163	}
				164
				165	/* Sign extend a N-bit value up to 64 bits, by copying
				166	bit N-1 into all higher positions. */
				167	static ULong sx_to_64 ( ULong x, UInt n )
				168	{
				169	vassert(n > 1 && n < 64);
				170	Long r = (Long)x;
				171	r = (r << (64-n)) >> (64-n);
				172	return (ULong)r;
				173	}
				174
				175	//ZZ /* Do a little-endian load of a 16-bit word, regardless of the
				176	//ZZ endianness of the underlying host. */
				177	//ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
				178	//ZZ {
				179	//ZZ UShort w = 0;
				180	//ZZ w = (w << 8) \| p[1];
				181	//ZZ w = (w << 8) \| p[0];
				182	//ZZ return w;
				183	//ZZ }
				184	//ZZ
				185	//ZZ static UInt ROR32 ( UInt x, UInt sh ) {
				186	//ZZ vassert(sh >= 0 && sh < 32);
				187	//ZZ if (sh == 0)
				188	//ZZ return x;
				189	//ZZ else
				190	//ZZ return (x << (32-sh)) \| (x >> sh);
				191	//ZZ }
				192	//ZZ
				193	//ZZ static Int popcount32 ( UInt x )
				194	//ZZ {
				195	//ZZ Int res = 0, i;
				196	//ZZ for (i = 0; i < 32; i++) {
				197	//ZZ res += (x & 1);
				198	//ZZ x >>= 1;
				199	//ZZ }
				200	//ZZ return res;
				201	//ZZ }
				202	//ZZ
				203	//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
				204	//ZZ {
				205	//ZZ UInt mask = 1 << ix;
				206	//ZZ x &= ~mask;
				207	//ZZ x \|= ((b << ix) & mask);
				208	//ZZ return x;
				209	//ZZ }
				210
				211	#define BITS2(_b1,_b0) \
				212	(((_b1) << 1) \| (_b0))
				213
				214	#define BITS3(_b2,_b1,_b0) \
				215	(((_b2) << 2) \| ((_b1) << 1) \| (_b0))
				216
				217	#define BITS4(_b3,_b2,_b1,_b0) \
				218	(((_b3) << 3) \| ((_b2) << 2) \| ((_b1) << 1) \| (_b0))
				219
				220	#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
				221	((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
				222	\| BITS4((_b3),(_b2),(_b1),(_b0)))
				223
				224	#define BITS5(_b4,_b3,_b2,_b1,_b0) \
				225	(BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
				226	#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
				227	(BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
				228	#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
				229	(BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
				230
				231	#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
				232	(((_b8) << 8) \
				233	\| BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
				234
				235	#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
				236	(((_b9) << 9) \| ((_b8) << 8) \
				237	\| BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
				238
				239	#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
				240	(((_b10) << 10) \
				241	\| BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
				242
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	243	#define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
				244	(((_b11) << 11) \
				245	\| BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
				246
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	247	#define X00 BITS2(0,0)
				248	#define X01 BITS2(0,1)
				249	#define X10 BITS2(1,0)
				250	#define X11 BITS2(1,1)
				251
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	252	// produces _uint[_bMax:_bMin]
				253	#define SLICE_UInt(_uint,_bMax,_bMin) \
				254	(( ((UInt)(_uint)) >> (_bMin)) \
				255	& (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
				256
				257
				258	/------------------------------------------------------------/
				259	/--- Helper bits and pieces for creating IR fragments. ---/
				260	/------------------------------------------------------------/
				261
				262	static IRExpr* mkV128 ( UShort w )
				263	{
				264	return IRExpr_Const(IRConst_V128(w));
				265	}
				266
				267	static IRExpr* mkU64 ( ULong i )
				268	{
				269	return IRExpr_Const(IRConst_U64(i));
				270	}
				271
				272	static IRExpr* mkU32 ( UInt i )
				273	{
				274	return IRExpr_Const(IRConst_U32(i));
				275	}
				276
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	277	static IRExpr* mkU16 ( UInt i )
				278	{
				279	vassert(i < 65536);
				280	return IRExpr_Const(IRConst_U16(i));
				281	}
				282
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	283	static IRExpr* mkU8 ( UInt i )
				284	{
				285	vassert(i < 256);
				286	return IRExpr_Const(IRConst_U8( (UChar)i ));
				287	}
				288
				289	static IRExpr* mkexpr ( IRTemp tmp )
				290	{
				291	return IRExpr_RdTmp(tmp);
				292	}
				293
				294	static IRExpr* unop ( IROp op, IRExpr* a )
				295	{
				296	return IRExpr_Unop(op, a);
				297	}
				298
				299	static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
				300	{
				301	return IRExpr_Binop(op, a1, a2);
				302	}
				303
				304	static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
				305	{
				306	return IRExpr_Triop(op, a1, a2, a3);
				307	}
				308
				309	static IRExpr* loadLE ( IRType ty, IRExpr* addr )
				310	{
				311	return IRExpr_Load(Iend_LE, ty, addr);
				312	}
				313
				314	/* Add a statement to the list held by "irbb". */
				315	static void stmt ( IRStmt* st )
				316	{
				317	addStmtToIRSB( irsb, st );
				318	}
				319
				320	static void assign ( IRTemp dst, IRExpr* e )
				321	{
				322	stmt( IRStmt_WrTmp(dst, e) );
				323	}
				324
				325	static void storeLE ( IRExpr* addr, IRExpr* data )
				326	{
				327	stmt( IRStmt_Store(Iend_LE, addr, data) );
				328	}
				329
				330	//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
				331	//ZZ {
				332	//ZZ if (guardT == IRTemp_INVALID) {
				333	//ZZ /* unconditional */
				334	//ZZ storeLE(addr, data);
				335	//ZZ } else {
				336	//ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
				337	//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
				338	//ZZ }
				339	//ZZ }
				340	//ZZ
				341	//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
				342	//ZZ IRExpr* addr, IRExpr* alt,
				343	//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
				344	//ZZ {
				345	//ZZ if (guardT == IRTemp_INVALID) {
				346	//ZZ /* unconditional */
				347	//ZZ IRExpr* loaded = NULL;
				348	//ZZ switch (cvt) {
				349	//ZZ case ILGop_Ident32:
				350	//ZZ loaded = loadLE(Ity_I32, addr); break;
				351	//ZZ case ILGop_8Uto32:
				352	//ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
				353	//ZZ case ILGop_8Sto32:
				354	//ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
				355	//ZZ case ILGop_16Uto32:
				356	//ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
				357	//ZZ case ILGop_16Sto32:
				358	//ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
				359	//ZZ default:
				360	//ZZ vassert(0);
				361	//ZZ }
				362	//ZZ vassert(loaded != NULL);
				363	//ZZ assign(dst, loaded);
				364	//ZZ } else {
				365	//ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
				366	//ZZ loaded data before putting the data in 'dst'. If the load
				367	//ZZ does not take place, 'alt' is placed directly in 'dst'. */
				368	//ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
				369	//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
				370	//ZZ }
				371	//ZZ }
				372
				373	/* Generate a new temporary of the given type. */
				374	static IRTemp newTemp ( IRType ty )
				375	{
				376	vassert(isPlausibleIRType(ty));
				377	return newIRTemp( irsb->tyenv, ty );
				378	}
				379
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	380	/* This is used in many places, so the brevity is an advantage. */
				381	static IRTemp newTempV128(void)
				382	{
				383	return newTemp(Ity_V128);
				384	}
				385
				386	/* Initialise V128 temporaries en masse. */
				387	static
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	388	void newTempsV128_2(IRTemp* t1, IRTemp* t2)
				389	{
				390	vassert(t1 && *t1 == IRTemp_INVALID);
				391	vassert(t2 && *t2 == IRTemp_INVALID);
				392	*t1 = newTempV128();
				393	*t2 = newTempV128();
				394	}
				395
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	396	static
				397	void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
				398	{
				399	vassert(t1 && *t1 == IRTemp_INVALID);
				400	vassert(t2 && *t2 == IRTemp_INVALID);
				401	vassert(t3 && *t3 == IRTemp_INVALID);
				402	*t1 = newTempV128();
				403	*t2 = newTempV128();
				404	*t3 = newTempV128();
				405	}
				406
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	407	//static
				408	//void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
				409	//{
				410	// vassert(t1 && *t1 == IRTemp_INVALID);
				411	// vassert(t2 && *t2 == IRTemp_INVALID);
				412	// vassert(t3 && *t3 == IRTemp_INVALID);
				413	// vassert(t4 && *t4 == IRTemp_INVALID);
				414	// *t1 = newTempV128();
				415	// *t2 = newTempV128();
				416	// *t3 = newTempV128();
				417	// *t4 = newTempV128();
				418	//}
				419
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	420	static
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	421	void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
				422	IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
				423	{
				424	vassert(t1 && *t1 == IRTemp_INVALID);
				425	vassert(t2 && *t2 == IRTemp_INVALID);
				426	vassert(t3 && *t3 == IRTemp_INVALID);
				427	vassert(t4 && *t4 == IRTemp_INVALID);
				428	vassert(t5 && *t5 == IRTemp_INVALID);
				429	vassert(t6 && *t6 == IRTemp_INVALID);
				430	vassert(t7 && *t7 == IRTemp_INVALID);
				431	*t1 = newTempV128();
				432	*t2 = newTempV128();
				433	*t3 = newTempV128();
				434	*t4 = newTempV128();
				435	*t5 = newTempV128();
				436	*t6 = newTempV128();
				437	*t7 = newTempV128();
				438	}
				439
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	440	//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
				441	//ZZ IRRoundingMode. */
				442	//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
				443	//ZZ {
				444	//ZZ return mkU32(Irrm_NEAREST);
				445	//ZZ }
				446	//ZZ
				447	//ZZ /* Generate an expression for SRC rotated right by ROT. */
				448	//ZZ static IRExpr* genROR32( IRTemp src, Int rot )
				449	//ZZ {
				450	//ZZ vassert(rot >= 0 && rot < 32);
				451	//ZZ if (rot == 0)
				452	//ZZ return mkexpr(src);
				453	//ZZ return
				454	//ZZ binop(Iop_Or32,
				455	//ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
				456	//ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
				457	//ZZ }
				458	//ZZ
				459	//ZZ static IRExpr* mkU128 ( ULong i )
				460	//ZZ {
				461	//ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
				462	//ZZ }
				463	//ZZ
				464	//ZZ /* Generate a 4-aligned version of the given expression if
				465	//ZZ the given condition is true. Else return it unchanged. */
				466	//ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
				467	//ZZ {
				468	//ZZ if (b)
				469	//ZZ return binop(Iop_And32, e, mkU32(~3));
				470	//ZZ else
				471	//ZZ return e;
				472	//ZZ }
				473
				474	/* Other IR construction helpers. */
				475	static IROp mkAND ( IRType ty ) {
				476	switch (ty) {
				477	case Ity_I32: return Iop_And32;
				478	case Ity_I64: return Iop_And64;
				479	default: vpanic("mkAND");
				480	}
				481	}
				482
				483	static IROp mkOR ( IRType ty ) {
				484	switch (ty) {
				485	case Ity_I32: return Iop_Or32;
				486	case Ity_I64: return Iop_Or64;
				487	default: vpanic("mkOR");
				488	}
				489	}
				490
				491	static IROp mkXOR ( IRType ty ) {
				492	switch (ty) {
				493	case Ity_I32: return Iop_Xor32;
				494	case Ity_I64: return Iop_Xor64;
				495	default: vpanic("mkXOR");
				496	}
				497	}
				498
				499	static IROp mkSHL ( IRType ty ) {
				500	switch (ty) {
				501	case Ity_I32: return Iop_Shl32;
				502	case Ity_I64: return Iop_Shl64;
				503	default: vpanic("mkSHL");
				504	}
				505	}
				506
				507	static IROp mkSHR ( IRType ty ) {
				508	switch (ty) {
				509	case Ity_I32: return Iop_Shr32;
				510	case Ity_I64: return Iop_Shr64;
				511	default: vpanic("mkSHR");
				512	}
				513	}
				514
				515	static IROp mkSAR ( IRType ty ) {
				516	switch (ty) {
				517	case Ity_I32: return Iop_Sar32;
				518	case Ity_I64: return Iop_Sar64;
				519	default: vpanic("mkSAR");
				520	}
				521	}
				522
				523	static IROp mkNOT ( IRType ty ) {
				524	switch (ty) {
				525	case Ity_I32: return Iop_Not32;
				526	case Ity_I64: return Iop_Not64;
				527	default: vpanic("mkNOT");
				528	}
				529	}
				530
				531	static IROp mkADD ( IRType ty ) {
				532	switch (ty) {
				533	case Ity_I32: return Iop_Add32;
				534	case Ity_I64: return Iop_Add64;
				535	default: vpanic("mkADD");
				536	}
				537	}
				538
				539	static IROp mkSUB ( IRType ty ) {
				540	switch (ty) {
				541	case Ity_I32: return Iop_Sub32;
				542	case Ity_I64: return Iop_Sub64;
				543	default: vpanic("mkSUB");
				544	}
				545	}
				546
				547	static IROp mkADDF ( IRType ty ) {
				548	switch (ty) {
				549	case Ity_F32: return Iop_AddF32;
				550	case Ity_F64: return Iop_AddF64;
				551	default: vpanic("mkADDF");
				552	}
				553	}
				554
				555	static IROp mkSUBF ( IRType ty ) {
				556	switch (ty) {
				557	case Ity_F32: return Iop_SubF32;
				558	case Ity_F64: return Iop_SubF64;
				559	default: vpanic("mkSUBF");
				560	}
				561	}
				562
				563	static IROp mkMULF ( IRType ty ) {
				564	switch (ty) {
				565	case Ity_F32: return Iop_MulF32;
				566	case Ity_F64: return Iop_MulF64;
				567	default: vpanic("mkMULF");
				568	}
				569	}
				570
				571	static IROp mkDIVF ( IRType ty ) {
				572	switch (ty) {
				573	case Ity_F32: return Iop_DivF32;
				574	case Ity_F64: return Iop_DivF64;
				575	default: vpanic("mkMULF");
				576	}
				577	}
				578
				579	static IROp mkNEGF ( IRType ty ) {
				580	switch (ty) {
				581	case Ity_F32: return Iop_NegF32;
				582	case Ity_F64: return Iop_NegF64;
				583	default: vpanic("mkNEGF");
				584	}
				585	}
				586
				587	static IROp mkABSF ( IRType ty ) {
				588	switch (ty) {
				589	case Ity_F32: return Iop_AbsF32;
				590	case Ity_F64: return Iop_AbsF64;
				591	default: vpanic("mkNEGF");
				592	}
				593	}
				594
				595	static IROp mkSQRTF ( IRType ty ) {
				596	switch (ty) {
				597	case Ity_F32: return Iop_SqrtF32;
				598	case Ity_F64: return Iop_SqrtF64;
				599	default: vpanic("mkNEGF");
				600	}
				601	}
				602
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	603	static IROp mkVecADD ( UInt size ) {
				604	const IROp ops[4]
				605	= { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
				606	vassert(size < 4);
				607	return ops[size];
				608	}
				609
				610	static IROp mkVecQADDU ( UInt size ) {
				611	const IROp ops[4]
				612	= { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
				613	vassert(size < 4);
				614	return ops[size];
				615	}
				616
				617	static IROp mkVecQADDS ( UInt size ) {
				618	const IROp ops[4]
				619	= { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
				620	vassert(size < 4);
				621	return ops[size];
				622	}
				623
sewardj	f7003bc	2014-08-18 12:28:02 +0000	[diff] [blame]	624	static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
				625	const IROp ops[4]
				626	= { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
				627	Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
				628	vassert(size < 4);
				629	return ops[size];
				630	}
				631
				632	static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
				633	const IROp ops[4]
				634	= { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
				635	Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
				636	vassert(size < 4);
				637	return ops[size];
				638	}
				639
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	640	static IROp mkVecSUB ( UInt size ) {
				641	const IROp ops[4]
				642	= { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
				643	vassert(size < 4);
				644	return ops[size];
				645	}
				646
				647	static IROp mkVecQSUBU ( UInt size ) {
				648	const IROp ops[4]
				649	= { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
				650	vassert(size < 4);
				651	return ops[size];
				652	}
				653
				654	static IROp mkVecQSUBS ( UInt size ) {
				655	const IROp ops[4]
				656	= { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
				657	vassert(size < 4);
				658	return ops[size];
				659	}
				660
				661	static IROp mkVecSARN ( UInt size ) {
				662	const IROp ops[4]
				663	= { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
				664	vassert(size < 4);
				665	return ops[size];
				666	}
				667
				668	static IROp mkVecSHRN ( UInt size ) {
				669	const IROp ops[4]
				670	= { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
				671	vassert(size < 4);
				672	return ops[size];
				673	}
				674
				675	static IROp mkVecSHLN ( UInt size ) {
				676	const IROp ops[4]
				677	= { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
				678	vassert(size < 4);
				679	return ops[size];
				680	}
				681
				682	static IROp mkVecCATEVENLANES ( UInt size ) {
				683	const IROp ops[4]
				684	= { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
				685	Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
				686	vassert(size < 4);
				687	return ops[size];
				688	}
				689
				690	static IROp mkVecCATODDLANES ( UInt size ) {
				691	const IROp ops[4]
				692	= { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
				693	Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
				694	vassert(size < 4);
				695	return ops[size];
				696	}
				697
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	698	static IROp mkVecINTERLEAVELO ( UInt size ) {
				699	const IROp ops[4]
				700	= { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
				701	Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
				702	vassert(size < 4);
				703	return ops[size];
				704	}
				705
				706	static IROp mkVecINTERLEAVEHI ( UInt size ) {
				707	const IROp ops[4]
				708	= { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
				709	Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
				710	vassert(size < 4);
				711	return ops[size];
				712	}
				713
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	714	static IROp mkVecMAXU ( UInt size ) {
				715	const IROp ops[4]
				716	= { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
				717	vassert(size < 4);
				718	return ops[size];
				719	}
				720
				721	static IROp mkVecMAXS ( UInt size ) {
				722	const IROp ops[4]
				723	= { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
				724	vassert(size < 4);
				725	return ops[size];
				726	}
				727
				728	static IROp mkVecMINU ( UInt size ) {
				729	const IROp ops[4]
				730	= { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
				731	vassert(size < 4);
				732	return ops[size];
				733	}
				734
				735	static IROp mkVecMINS ( UInt size ) {
				736	const IROp ops[4]
				737	= { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
				738	vassert(size < 4);
				739	return ops[size];
				740	}
				741
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	742	static IROp mkVecMUL ( UInt size ) {
				743	const IROp ops[4]
				744	= { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
				745	vassert(size < 3);
				746	return ops[size];
				747	}
				748
				749	static IROp mkVecMULLU ( UInt sizeNarrow ) {
				750	const IROp ops[4]
				751	= { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
				752	vassert(sizeNarrow < 3);
				753	return ops[sizeNarrow];
				754	}
				755
				756	static IROp mkVecMULLS ( UInt sizeNarrow ) {
				757	const IROp ops[4]
				758	= { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
				759	vassert(sizeNarrow < 3);
				760	return ops[sizeNarrow];
				761	}
				762
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	763	static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
				764	const IROp ops[4]
				765	= { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
				766	vassert(sizeNarrow < 3);
				767	return ops[sizeNarrow];
				768	}
				769
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	770	static IROp mkVecCMPEQ ( UInt size ) {
				771	const IROp ops[4]
				772	= { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
				773	vassert(size < 4);
				774	return ops[size];
				775	}
				776
				777	static IROp mkVecCMPGTU ( UInt size ) {
				778	const IROp ops[4]
				779	= { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
				780	vassert(size < 4);
				781	return ops[size];
				782	}
				783
				784	static IROp mkVecCMPGTS ( UInt size ) {
				785	const IROp ops[4]
				786	= { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
				787	vassert(size < 4);
				788	return ops[size];
				789	}
				790
				791	static IROp mkVecABS ( UInt size ) {
				792	const IROp ops[4]
				793	= { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
				794	vassert(size < 4);
				795	return ops[size];
				796	}
				797
				798	static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
				799	const IROp ops[4]
				800	= { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
				801	Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 };
				802	vassert(size < 4);
				803	return ops[size];
				804	}
				805
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	806	static IRExpr* mkU ( IRType ty, ULong imm ) {
				807	switch (ty) {
				808	case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
				809	case Ity_I64: return mkU64(imm);
				810	default: vpanic("mkU");
				811	}
				812	}
				813
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	814	static IROp mkVecQDMULHIS ( UInt size ) {
				815	const IROp ops[4]
				816	= { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
				817	vassert(size < 4);
				818	return ops[size];
				819	}
				820
				821	static IROp mkVecQRDMULHIS ( UInt size ) {
				822	const IROp ops[4]
				823	= { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
				824	vassert(size < 4);
				825	return ops[size];
				826	}
				827
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	828	static IROp mkVecQANDUQSH ( UInt size ) {
sewardj	1297218	2014-08-04 08:09:47 +0000	[diff] [blame]	829	const IROp ops[4]
				830	= { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
				831	Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
				832	vassert(size < 4);
				833	return ops[size];
				834	}
				835
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	836	static IROp mkVecQANDSQSH ( UInt size ) {
sewardj	1297218	2014-08-04 08:09:47 +0000	[diff] [blame]	837	const IROp ops[4]
				838	= { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
				839	Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
				840	vassert(size < 4);
				841	return ops[size];
				842	}
				843
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	844	static IROp mkVecQANDUQRSH ( UInt size ) {
sewardj	1297218	2014-08-04 08:09:47 +0000	[diff] [blame]	845	const IROp ops[4]
				846	= { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
				847	Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
				848	vassert(size < 4);
				849	return ops[size];
				850	}
				851
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	852	static IROp mkVecQANDSQRSH ( UInt size ) {
sewardj	1297218	2014-08-04 08:09:47 +0000	[diff] [blame]	853	const IROp ops[4]
				854	= { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
				855	Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
				856	vassert(size < 4);
				857	return ops[size];
				858	}
				859
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	860	static IROp mkVecSHU ( UInt size ) {
				861	const IROp ops[4]
				862	= { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
				863	vassert(size < 4);
				864	return ops[size];
				865	}
				866
				867	static IROp mkVecSHS ( UInt size ) {
				868	const IROp ops[4]
				869	= { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
				870	vassert(size < 4);
				871	return ops[size];
				872	}
				873
				874	static IROp mkVecRSHU ( UInt size ) {
				875	const IROp ops[4]
				876	= { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
				877	vassert(size < 4);
				878	return ops[size];
				879	}
				880
				881	static IROp mkVecRSHS ( UInt size ) {
				882	const IROp ops[4]
				883	= { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
				884	vassert(size < 4);
				885	return ops[size];
				886	}
				887
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	888	static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
				889	const IROp ops[4]
				890	= { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
				891	Iop_NarrowUn64to32x2, Iop_INVALID };
				892	vassert(sizeNarrow < 4);
				893	return ops[sizeNarrow];
				894	}
				895
				896	static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
				897	const IROp ops[4]
				898	= { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4,
				899	Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
				900	vassert(sizeNarrow < 4);
				901	return ops[sizeNarrow];
				902	}
				903
				904	static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
				905	const IROp ops[4]
				906	= { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4,
				907	Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
				908	vassert(sizeNarrow < 4);
				909	return ops[sizeNarrow];
				910	}
				911
				912	static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
				913	const IROp ops[4]
				914	= { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4,
				915	Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
				916	vassert(sizeNarrow < 4);
				917	return ops[sizeNarrow];
				918	}
				919
				920	static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
				921	const IROp ops[4]
				922	= { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
				923	Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
				924	vassert(sizeNarrow < 4);
				925	return ops[sizeNarrow];
				926	}
				927
				928	static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
				929	const IROp ops[4]
				930	= { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4,
				931	Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
				932	vassert(sizeNarrow < 4);
				933	return ops[sizeNarrow];
				934	}
				935
				936	static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
				937	const IROp ops[4]
				938	= { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4,
				939	Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
				940	vassert(sizeNarrow < 4);
				941	return ops[sizeNarrow];
				942	}
				943
				944	static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
				945	const IROp ops[4]
				946	= { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4,
				947	Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
				948	vassert(sizeNarrow < 4);
				949	return ops[sizeNarrow];
				950	}
				951
				952	static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
				953	const IROp ops[4]
				954	= { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4,
				955	Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
				956	vassert(sizeNarrow < 4);
				957	return ops[sizeNarrow];
				958	}
				959
				960	static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
				961	const IROp ops[4]
				962	= { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4,
				963	Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
				964	vassert(sizeNarrow < 4);
				965	return ops[sizeNarrow];
				966	}
				967
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	968	static IROp mkVecQSHLNSATUU ( UInt size ) {
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	969	const IROp ops[4]
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	970	= { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
				971	Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	972	vassert(size < 4);
				973	return ops[size];
				974	}
				975
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	976	static IROp mkVecQSHLNSATSS ( UInt size ) {
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	977	const IROp ops[4]
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	978	= { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
				979	Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	980	vassert(size < 4);
				981	return ops[size];
				982	}
				983
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	984	static IROp mkVecQSHLNSATSU ( UInt size ) {
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	985	const IROp ops[4]
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	986	= { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
				987	Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	988	vassert(size < 4);
				989	return ops[size];
				990	}
				991
				992
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	993	/* Generate IR to create 'arg rotated right by imm', for sane values
				994	of 'ty' and 'imm'. */
				995	static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
				996	{
				997	UInt w = 0;
				998	if (ty == Ity_I64) {
				999	w = 64;
				1000	} else {
				1001	vassert(ty == Ity_I32);
				1002	w = 32;
				1003	}
				1004	vassert(w != 0);
				1005	vassert(imm < w);
				1006	if (imm == 0) {
				1007	return arg;
				1008	}
				1009	IRTemp res = newTemp(ty);
				1010	assign(res, binop(mkOR(ty),
				1011	binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
				1012	binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
				1013	return res;
				1014	}
				1015
				1016	/* Generate IR to set the returned temp to either all-zeroes or
				1017	all ones, as a copy of arg<imm>. */
				1018	static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
				1019	{
				1020	UInt w = 0;
				1021	if (ty == Ity_I64) {
				1022	w = 64;
				1023	} else {
				1024	vassert(ty == Ity_I32);
				1025	w = 32;
				1026	}
				1027	vassert(w != 0);
				1028	vassert(imm < w);
				1029	IRTemp res = newTemp(ty);
				1030	assign(res, binop(mkSAR(ty),
				1031	binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
				1032	mkU8(w - 1)));
				1033	return res;
				1034	}
				1035
sewardj	7d00913	2014-02-20 17:43:38 +0000	[diff] [blame]	1036	/* U-widen 8/16/32/64 bit int expr to 64. */
				1037	static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
				1038	{
				1039	switch (srcTy) {
				1040	case Ity_I64: return e;
				1041	case Ity_I32: return unop(Iop_32Uto64, e);
				1042	case Ity_I16: return unop(Iop_16Uto64, e);
				1043	case Ity_I8: return unop(Iop_8Uto64, e);
				1044	default: vpanic("widenUto64(arm64)");
				1045	}
				1046	}
				1047
				1048	/* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
				1049	of these combinations make sense. */
				1050	static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
				1051	{
				1052	switch (dstTy) {
				1053	case Ity_I64: return e;
				1054	case Ity_I32: return unop(Iop_64to32, e);
				1055	case Ity_I16: return unop(Iop_64to16, e);
				1056	case Ity_I8: return unop(Iop_64to8, e);
				1057	default: vpanic("narrowFrom64(arm64)");
				1058	}
				1059	}
				1060
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1061
				1062	/------------------------------------------------------------/
				1063	/--- Helpers for accessing guest registers. ---/
				1064	/------------------------------------------------------------/
				1065
				1066	#define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
				1067	#define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
				1068	#define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
				1069	#define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
				1070	#define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
				1071	#define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
				1072	#define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
				1073	#define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
				1074	#define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
				1075	#define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
				1076	#define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
				1077	#define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
				1078	#define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
				1079	#define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
				1080	#define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
				1081	#define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
				1082	#define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
				1083	#define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
				1084	#define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
				1085	#define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
				1086	#define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
				1087	#define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
				1088	#define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
				1089	#define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
				1090	#define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
				1091	#define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
				1092	#define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
				1093	#define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
				1094	#define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
				1095	#define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
				1096	#define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
				1097
sewardj	6068788	2014-01-15 10:25:21 +0000	[diff] [blame]	1098	#define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1099	#define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
				1100
				1101	#define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
				1102	#define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
				1103	#define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
				1104	#define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
				1105
				1106	#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
				1107	#define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
				1108
				1109	#define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
				1110	#define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
				1111	#define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
				1112	#define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
				1113	#define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
				1114	#define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
				1115	#define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
				1116	#define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
				1117	#define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
				1118	#define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
				1119	#define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
				1120	#define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
				1121	#define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
				1122	#define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
				1123	#define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
				1124	#define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
				1125	#define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
				1126	#define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
				1127	#define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
				1128	#define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
				1129	#define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
				1130	#define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
				1131	#define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
				1132	#define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
				1133	#define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
				1134	#define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
				1135	#define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
				1136	#define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
				1137	#define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
				1138	#define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
				1139	#define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
				1140	#define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
				1141
				1142	#define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
sewardj	a0645d5	2014-06-28 22:11:16 +0000	[diff] [blame]	1143	#define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1144
sewardj	05f5e01	2014-05-04 10:52:11 +0000	[diff] [blame]	1145	#define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
				1146	#define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1147
				1148
				1149	/* ---------------- Integer registers ---------------- */
				1150
				1151	static Int offsetIReg64 ( UInt iregNo )
				1152	{
				1153	/* Do we care about endianness here? We do if sub-parts of integer
				1154	registers are accessed. */
				1155	switch (iregNo) {
				1156	case 0: return OFFB_X0;
				1157	case 1: return OFFB_X1;
				1158	case 2: return OFFB_X2;
				1159	case 3: return OFFB_X3;
				1160	case 4: return OFFB_X4;
				1161	case 5: return OFFB_X5;
				1162	case 6: return OFFB_X6;
				1163	case 7: return OFFB_X7;
				1164	case 8: return OFFB_X8;
				1165	case 9: return OFFB_X9;
				1166	case 10: return OFFB_X10;
				1167	case 11: return OFFB_X11;
				1168	case 12: return OFFB_X12;
				1169	case 13: return OFFB_X13;
				1170	case 14: return OFFB_X14;
				1171	case 15: return OFFB_X15;
				1172	case 16: return OFFB_X16;
				1173	case 17: return OFFB_X17;
				1174	case 18: return OFFB_X18;
				1175	case 19: return OFFB_X19;
				1176	case 20: return OFFB_X20;
				1177	case 21: return OFFB_X21;
				1178	case 22: return OFFB_X22;
				1179	case 23: return OFFB_X23;
				1180	case 24: return OFFB_X24;
				1181	case 25: return OFFB_X25;
				1182	case 26: return OFFB_X26;
				1183	case 27: return OFFB_X27;
				1184	case 28: return OFFB_X28;
				1185	case 29: return OFFB_X29;
				1186	case 30: return OFFB_X30;
				1187	/* but not 31 */
				1188	default: vassert(0);
				1189	}
				1190	}
				1191
				1192	static Int offsetIReg64orSP ( UInt iregNo )
				1193	{
sewardj	6068788	2014-01-15 10:25:21 +0000	[diff] [blame]	1194	return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1195	}
				1196
				1197	static const HChar* nameIReg64orZR ( UInt iregNo )
				1198	{
				1199	vassert(iregNo < 32);
				1200	static const HChar* names[32]
				1201	= { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
				1202	"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
				1203	"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
				1204	"x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
				1205	return names[iregNo];
				1206	}
				1207
				1208	static const HChar* nameIReg64orSP ( UInt iregNo )
				1209	{
				1210	if (iregNo == 31) {
				1211	return "sp";
				1212	}
				1213	vassert(iregNo < 31);
				1214	return nameIReg64orZR(iregNo);
				1215	}
				1216
				1217	static IRExpr* getIReg64orSP ( UInt iregNo )
				1218	{
				1219	vassert(iregNo < 32);
				1220	return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
				1221	}
				1222
				1223	static IRExpr* getIReg64orZR ( UInt iregNo )
				1224	{
				1225	if (iregNo == 31) {
				1226	return mkU64(0);
				1227	}
				1228	vassert(iregNo < 31);
				1229	return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
				1230	}
				1231
				1232	static void putIReg64orSP ( UInt iregNo, IRExpr* e )
				1233	{
				1234	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
				1235	stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
				1236	}
				1237
				1238	static void putIReg64orZR ( UInt iregNo, IRExpr* e )
				1239	{
				1240	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
				1241	if (iregNo == 31) {
				1242	return;
				1243	}
				1244	vassert(iregNo < 31);
				1245	stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
				1246	}
				1247
				1248	static const HChar* nameIReg32orZR ( UInt iregNo )
				1249	{
				1250	vassert(iregNo < 32);
				1251	static const HChar* names[32]
				1252	= { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
				1253	"w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
				1254	"w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
				1255	"w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
				1256	return names[iregNo];
				1257	}
				1258
				1259	static const HChar* nameIReg32orSP ( UInt iregNo )
				1260	{
				1261	if (iregNo == 31) {
				1262	return "wsp";
				1263	}
				1264	vassert(iregNo < 31);
				1265	return nameIReg32orZR(iregNo);
				1266	}
				1267
				1268	static IRExpr* getIReg32orSP ( UInt iregNo )
				1269	{
				1270	vassert(iregNo < 32);
				1271	return unop(Iop_64to32,
				1272	IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
				1273	}
				1274
				1275	static IRExpr* getIReg32orZR ( UInt iregNo )
				1276	{
				1277	if (iregNo == 31) {
				1278	return mkU32(0);
				1279	}
				1280	vassert(iregNo < 31);
				1281	return unop(Iop_64to32,
				1282	IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
				1283	}
				1284
				1285	static void putIReg32orSP ( UInt iregNo, IRExpr* e )
				1286	{
				1287	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
				1288	stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
				1289	}
				1290
				1291	static void putIReg32orZR ( UInt iregNo, IRExpr* e )
				1292	{
				1293	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
				1294	if (iregNo == 31) {
				1295	return;
				1296	}
				1297	vassert(iregNo < 31);
				1298	stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
				1299	}
				1300
				1301	static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
				1302	{
				1303	vassert(is64 == True \|\| is64 == False);
				1304	return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
				1305	}
				1306
				1307	static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
				1308	{
				1309	vassert(is64 == True \|\| is64 == False);
				1310	return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
				1311	}
				1312
				1313	static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
				1314	{
				1315	vassert(is64 == True \|\| is64 == False);
				1316	return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
				1317	}
				1318
				1319	static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
				1320	{
				1321	vassert(is64 == True \|\| is64 == False);
				1322	if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
				1323	}
				1324
				1325	static void putPC ( IRExpr* e )
				1326	{
				1327	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
				1328	stmt( IRStmt_Put(OFFB_PC, e) );
				1329	}
				1330
				1331
				1332	/* ---------------- Vector (Q) registers ---------------- */
				1333
				1334	static Int offsetQReg128 ( UInt qregNo )
				1335	{
				1336	/* We don't care about endianness at this point. It only becomes
				1337	relevant when dealing with sections of these registers.*/
				1338	switch (qregNo) {
				1339	case 0: return OFFB_Q0;
				1340	case 1: return OFFB_Q1;
				1341	case 2: return OFFB_Q2;
				1342	case 3: return OFFB_Q3;
				1343	case 4: return OFFB_Q4;
				1344	case 5: return OFFB_Q5;
				1345	case 6: return OFFB_Q6;
				1346	case 7: return OFFB_Q7;
				1347	case 8: return OFFB_Q8;
				1348	case 9: return OFFB_Q9;
				1349	case 10: return OFFB_Q10;
				1350	case 11: return OFFB_Q11;
				1351	case 12: return OFFB_Q12;
				1352	case 13: return OFFB_Q13;
				1353	case 14: return OFFB_Q14;
				1354	case 15: return OFFB_Q15;
				1355	case 16: return OFFB_Q16;
				1356	case 17: return OFFB_Q17;
				1357	case 18: return OFFB_Q18;
				1358	case 19: return OFFB_Q19;
				1359	case 20: return OFFB_Q20;
				1360	case 21: return OFFB_Q21;
				1361	case 22: return OFFB_Q22;
				1362	case 23: return OFFB_Q23;
				1363	case 24: return OFFB_Q24;
				1364	case 25: return OFFB_Q25;
				1365	case 26: return OFFB_Q26;
				1366	case 27: return OFFB_Q27;
				1367	case 28: return OFFB_Q28;
				1368	case 29: return OFFB_Q29;
				1369	case 30: return OFFB_Q30;
				1370	case 31: return OFFB_Q31;
				1371	default: vassert(0);
				1372	}
				1373	}
				1374
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1375	/* Write to a complete Qreg. */
				1376	static void putQReg128 ( UInt qregNo, IRExpr* e )
				1377	{
				1378	vassert(qregNo < 32);
				1379	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
				1380	stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
				1381	}
				1382
				1383	/* Read a complete Qreg. */
				1384	static IRExpr* getQReg128 ( UInt qregNo )
				1385	{
				1386	vassert(qregNo < 32);
				1387	return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
				1388	}
				1389
				1390	/* Produce the IR type for some sub-part of a vector. For 32- and 64-
				1391	bit sub-parts we can choose either integer or float types, and
				1392	choose float on the basis that that is the common use case and so
				1393	will give least interference with Put-to-Get forwarding later
				1394	on. */
				1395	static IRType preferredVectorSubTypeFromSize ( UInt szB )
				1396	{
				1397	switch (szB) {
				1398	case 1: return Ity_I8;
				1399	case 2: return Ity_I16;
				1400	case 4: return Ity_I32; //Ity_F32;
				1401	case 8: return Ity_F64;
				1402	case 16: return Ity_V128;
				1403	default: vassert(0);
				1404	}
				1405	}
				1406
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1407	/* Find the offset of the laneNo'th lane of type laneTy in the given
				1408	Qreg. Since the host is little-endian, the least significant lane
				1409	has the lowest offset. */
				1410	static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1411	{
sewardj	9b76916	2014-07-24 12:42:03 +0000	[diff] [blame]	1412	vassert(host_endness == VexEndnessLE);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1413	Int base = offsetQReg128(qregNo);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1414	/* Since the host is little-endian, the least significant lane
				1415	will be at the lowest address. */
				1416	/* Restrict this to known types, so as to avoid silently accepting
				1417	stupid types. */
				1418	UInt laneSzB = 0;
				1419	switch (laneTy) {
sewardj	5860ec7	2014-03-01 11:19:45 +0000	[diff] [blame]	1420	case Ity_I8: laneSzB = 1; break;
				1421	case Ity_I16: laneSzB = 2; break;
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1422	case Ity_F32: case Ity_I32: laneSzB = 4; break;
				1423	case Ity_F64: case Ity_I64: laneSzB = 8; break;
				1424	case Ity_V128: laneSzB = 16; break;
				1425	default: break;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1426	}
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1427	vassert(laneSzB > 0);
				1428	UInt minOff = laneNo * laneSzB;
				1429	UInt maxOff = minOff + laneSzB - 1;
				1430	vassert(maxOff < 16);
				1431	return base + minOff;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1432	}
				1433
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1434	/* Put to the least significant lane of a Qreg. */
				1435	static void putQRegLO ( UInt qregNo, IRExpr* e )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1436	{
				1437	IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1438	Int off = offsetQRegLane(qregNo, ty, 0);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1439	switch (ty) {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1440	case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
				1441	case Ity_F32: case Ity_F64: case Ity_V128:
				1442	break;
				1443	default:
				1444	vassert(0); // Other cases are probably invalid
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1445	}
				1446	stmt(IRStmt_Put(off, e));
				1447	}
				1448
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1449	/* Get from the least significant lane of a Qreg. */
				1450	static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1451	{
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1452	Int off = offsetQRegLane(qregNo, ty, 0);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1453	switch (ty) {
sewardj	b355347	2014-05-15 16:49:21 +0000	[diff] [blame]	1454	case Ity_I8:
				1455	case Ity_I16:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1456	case Ity_I32: case Ity_I64:
				1457	case Ity_F32: case Ity_F64: case Ity_V128:
				1458	break;
				1459	default:
				1460	vassert(0); // Other cases are ATC
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1461	}
				1462	return IRExpr_Get(off, ty);
				1463	}
				1464
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1465	static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1466	{
				1467	static const HChar* namesQ[32]
				1468	= { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
				1469	"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
				1470	"q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
				1471	"q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
				1472	static const HChar* namesD[32]
				1473	= { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
				1474	"d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
				1475	"d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
				1476	"d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
				1477	static const HChar* namesS[32]
				1478	= { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
				1479	"s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
				1480	"s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
				1481	"s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
				1482	static const HChar* namesH[32]
				1483	= { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
				1484	"h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
				1485	"h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
				1486	"h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
				1487	static const HChar* namesB[32]
				1488	= { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
				1489	"b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
				1490	"b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
				1491	"b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
				1492	vassert(qregNo < 32);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1493	switch (sizeofIRType(laneTy)) {
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1494	case 1: return namesB[qregNo];
				1495	case 2: return namesH[qregNo];
				1496	case 4: return namesS[qregNo];
				1497	case 8: return namesD[qregNo];
				1498	case 16: return namesQ[qregNo];
				1499	default: vassert(0);
				1500	}
				1501	/NOTREACHED/
				1502	}
				1503
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1504	static const HChar* nameQReg128 ( UInt qregNo )
				1505	{
				1506	return nameQRegLO(qregNo, Ity_V128);
				1507	}
				1508
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1509	/* Find the offset of the most significant half (8 bytes) of the given
				1510	Qreg. This requires knowing the endianness of the host. */
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1511	static Int offsetQRegHI64 ( UInt qregNo )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1512	{
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1513	return offsetQRegLane(qregNo, Ity_I64, 1);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1514	}
				1515
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1516	static IRExpr* getQRegHI64 ( UInt qregNo )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1517	{
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1518	return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1519	}
				1520
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1521	static void putQRegHI64 ( UInt qregNo, IRExpr* e )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1522	{
				1523	IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1524	Int off = offsetQRegHI64(qregNo);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1525	switch (ty) {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1526	case Ity_I64: case Ity_F64:
				1527	break;
				1528	default:
				1529	vassert(0); // Other cases are plain wrong
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1530	}
				1531	stmt(IRStmt_Put(off, e));
				1532	}
				1533
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1534	/* Put to a specified lane of a Qreg. */
				1535	static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
				1536	{
				1537	IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
				1538	Int off = offsetQRegLane(qregNo, laneTy, laneNo);
				1539	switch (laneTy) {
				1540	case Ity_F64: case Ity_I64:
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	1541	case Ity_I32: case Ity_F32:
sewardj	5860ec7	2014-03-01 11:19:45 +0000	[diff] [blame]	1542	case Ity_I16:
				1543	case Ity_I8:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1544	break;
				1545	default:
				1546	vassert(0); // Other cases are ATC
				1547	}
				1548	stmt(IRStmt_Put(off, e));
				1549	}
				1550
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	1551	/* Get from a specified lane of a Qreg. */
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1552	static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
				1553	{
				1554	Int off = offsetQRegLane(qregNo, laneTy, laneNo);
				1555	switch (laneTy) {
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	1556	case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	1557	case Ity_F64: case Ity_F32:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1558	break;
				1559	default:
				1560	vassert(0); // Other cases are ATC
				1561	}
				1562	return IRExpr_Get(off, laneTy);
				1563	}
				1564
				1565
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1566	//ZZ /* ---------------- Misc registers ---------------- */
				1567	//ZZ
				1568	//ZZ static void putMiscReg32 ( UInt gsoffset,
				1569	//ZZ IRExpr* e, /* :: Ity_I32 */
				1570	//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
				1571	//ZZ {
				1572	//ZZ switch (gsoffset) {
				1573	//ZZ case OFFB_FPSCR: break;
				1574	//ZZ case OFFB_QFLAG32: break;
				1575	//ZZ case OFFB_GEFLAG0: break;
				1576	//ZZ case OFFB_GEFLAG1: break;
				1577	//ZZ case OFFB_GEFLAG2: break;
				1578	//ZZ case OFFB_GEFLAG3: break;
				1579	//ZZ default: vassert(0); /* awaiting more cases */
				1580	//ZZ }
				1581	//ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
				1582	//ZZ
				1583	//ZZ if (guardT == IRTemp_INVALID) {
				1584	//ZZ /* unconditional write */
				1585	//ZZ stmt(IRStmt_Put(gsoffset, e));
				1586	//ZZ } else {
				1587	//ZZ stmt(IRStmt_Put(
				1588	//ZZ gsoffset,
				1589	//ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
				1590	//ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
				1591	//ZZ ));
				1592	//ZZ }
				1593	//ZZ }
				1594	//ZZ
				1595	//ZZ static IRTemp get_ITSTATE ( void )
				1596	//ZZ {
				1597	//ZZ ASSERT_IS_THUMB;
				1598	//ZZ IRTemp t = newTemp(Ity_I32);
				1599	//ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
				1600	//ZZ return t;
				1601	//ZZ }
				1602	//ZZ
				1603	//ZZ static void put_ITSTATE ( IRTemp t )
				1604	//ZZ {
				1605	//ZZ ASSERT_IS_THUMB;
				1606	//ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
				1607	//ZZ }
				1608	//ZZ
				1609	//ZZ static IRTemp get_QFLAG32 ( void )
				1610	//ZZ {
				1611	//ZZ IRTemp t = newTemp(Ity_I32);
				1612	//ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
				1613	//ZZ return t;
				1614	//ZZ }
				1615	//ZZ
				1616	//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
				1617	//ZZ {
				1618	//ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
				1619	//ZZ }
				1620	//ZZ
				1621	//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
				1622	//ZZ Status Register) to indicate that overflow or saturation occurred.
				1623	//ZZ Nb: t must be zero to denote no saturation, and any nonzero
				1624	//ZZ value to indicate saturation. */
				1625	//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
				1626	//ZZ {
				1627	//ZZ IRTemp old = get_QFLAG32();
				1628	//ZZ IRTemp nyu = newTemp(Ity_I32);
				1629	//ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
				1630	//ZZ put_QFLAG32(nyu, condT);
				1631	//ZZ }
				1632
				1633
				1634	/* ---------------- FPCR stuff ---------------- */
				1635
				1636	/* Generate IR to get hold of the rounding mode bits in FPCR, and
				1637	convert them to IR format. Bind the final result to the
				1638	returned temp. */
				1639	static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
				1640	{
				1641	/* The ARMvfp encoding for rounding mode bits is:
				1642	00 to nearest
				1643	01 to +infinity
				1644	10 to -infinity
				1645	11 to zero
				1646	We need to convert that to the IR encoding:
				1647	00 to nearest (the default)
				1648	10 to +infinity
				1649	01 to -infinity
				1650	11 to zero
				1651	Which can be done by swapping bits 0 and 1.
				1652	The rmode bits are at 23:22 in FPSCR.
				1653	*/
				1654	IRTemp armEncd = newTemp(Ity_I32);
				1655	IRTemp swapped = newTemp(Ity_I32);
				1656	/* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
				1657	we don't zero out bits 24 and above, since the assignment to
				1658	'swapped' will mask them out anyway. */
				1659	assign(armEncd,
				1660	binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
				1661	/* Now swap them. */
				1662	assign(swapped,
				1663	binop(Iop_Or32,
				1664	binop(Iop_And32,
				1665	binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
				1666	mkU32(2)),
				1667	binop(Iop_And32,
				1668	binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
				1669	mkU32(1))
				1670	));
				1671	return swapped;
				1672	}
				1673
				1674
				1675	/------------------------------------------------------------/
				1676	/--- Helpers for flag handling and conditional insns ---/
				1677	/------------------------------------------------------------/
				1678
				1679	static const HChar* nameARM64Condcode ( ARM64Condcode cond )
				1680	{
				1681	switch (cond) {
				1682	case ARM64CondEQ: return "eq";
				1683	case ARM64CondNE: return "ne";
				1684	case ARM64CondCS: return "cs"; // or 'hs'
				1685	case ARM64CondCC: return "cc"; // or 'lo'
				1686	case ARM64CondMI: return "mi";
				1687	case ARM64CondPL: return "pl";
				1688	case ARM64CondVS: return "vs";
				1689	case ARM64CondVC: return "vc";
				1690	case ARM64CondHI: return "hi";
				1691	case ARM64CondLS: return "ls";
				1692	case ARM64CondGE: return "ge";
				1693	case ARM64CondLT: return "lt";
				1694	case ARM64CondGT: return "gt";
				1695	case ARM64CondLE: return "le";
				1696	case ARM64CondAL: return "al";
				1697	case ARM64CondNV: return "nv";
				1698	default: vpanic("name_ARM64Condcode");
				1699	}
				1700	}
				1701
				1702	/* and a handy shorthand for it */
				1703	static const HChar* nameCC ( ARM64Condcode cond ) {
				1704	return nameARM64Condcode(cond);
				1705	}
				1706
				1707
				1708	/* Build IR to calculate some particular condition from stored
				1709	CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
				1710	Ity_I64, suitable for narrowing. Although the return type is
				1711	Ity_I64, the returned value is either 0 or 1. 'cond' must be
				1712	:: Ity_I64 and must denote the condition to compute in
				1713	bits 7:4, and be zero everywhere else.
				1714	*/
				1715	static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
				1716	{
				1717	vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
				1718	/* And 'cond' had better produce a value in which only bits 7:4 are
				1719	nonzero. However, obviously we can't assert for that. */
				1720
				1721	/* So what we're constructing for the first argument is
				1722	"(cond << 4) \| stored-operation".
				1723	However, as per comments above, 'cond' must be supplied
				1724	pre-shifted to this function.
				1725
				1726	This pairing scheme requires that the ARM64_CC_OP_ values all fit
				1727	in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
				1728	8 bits of the first argument. */
				1729	IRExpr** args
				1730	= mkIRExprVec_4(
				1731	binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
				1732	IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
				1733	IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
				1734	IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
				1735	);
				1736	IRExpr* call
				1737	= mkIRExprCCall(
				1738	Ity_I64,
				1739	0/regparm/,
				1740	"arm64g_calculate_condition", &arm64g_calculate_condition,
				1741	args
				1742	);
				1743
				1744	/* Exclude the requested condition, OP and NDEP from definedness
				1745	checking. We're only interested in DEP1 and DEP2. */
				1746	call->Iex.CCall.cee->mcx_mask = (1<<0) \| (1<<3);
				1747	return call;
				1748	}
				1749
				1750
				1751	/* Build IR to calculate some particular condition from stored
				1752	CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
				1753	Ity_I64, suitable for narrowing. Although the return type is
				1754	Ity_I64, the returned value is either 0 or 1.
				1755	*/
				1756	static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
				1757	{
				1758	/* First arg is "(cond << 4) \| condition". This requires that the
				1759	ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
				1760	(COND, OP) pair in the lowest 8 bits of the first argument. */
				1761	vassert(cond >= 0 && cond <= 15);
				1762	return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
				1763	}
				1764
				1765
sewardj	dee3050	2014-06-04 13:09:44 +0000	[diff] [blame]	1766	/* Build IR to calculate just the carry flag from stored
				1767	CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
				1768	Ity_I64. */
				1769	static IRExpr* mk_arm64g_calculate_flag_c ( void )
				1770	{
				1771	IRExpr** args
				1772	= mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
				1773	IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
				1774	IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
				1775	IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
				1776	IRExpr* call
				1777	= mkIRExprCCall(
				1778	Ity_I64,
				1779	0/regparm/,
				1780	"arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
				1781	args
				1782	);
				1783	/* Exclude OP and NDEP from definedness checking. We're only
				1784	interested in DEP1 and DEP2. */
				1785	call->Iex.CCall.cee->mcx_mask = (1<<0) \| (1<<3);
				1786	return call;
				1787	}
				1788
				1789
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1790	//ZZ /* Build IR to calculate just the overflow flag from stored
				1791	//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
				1792	//ZZ Ity_I32. */
				1793	//ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
				1794	//ZZ {
				1795	//ZZ IRExpr** args
				1796	//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
				1797	//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
				1798	//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
				1799	//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
				1800	//ZZ IRExpr* call
				1801	//ZZ = mkIRExprCCall(
				1802	//ZZ Ity_I32,
				1803	//ZZ 0/regparm/,
				1804	//ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
				1805	//ZZ args
				1806	//ZZ );
				1807	//ZZ /* Exclude OP and NDEP from definedness checking. We're only
				1808	//ZZ interested in DEP1 and DEP2. */
				1809	//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) \| (1<<3);
				1810	//ZZ return call;
				1811	//ZZ }
				1812
				1813
				1814	/* Build IR to calculate N Z C V in bits 31:28 of the
				1815	returned word. */
				1816	static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
				1817	{
				1818	IRExpr** args
				1819	= mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
				1820	IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
				1821	IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
				1822	IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
				1823	IRExpr* call
				1824	= mkIRExprCCall(
				1825	Ity_I64,
				1826	0/regparm/,
				1827	"arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
				1828	args
				1829	);
				1830	/* Exclude OP and NDEP from definedness checking. We're only
				1831	interested in DEP1 and DEP2. */
				1832	call->Iex.CCall.cee->mcx_mask = (1<<0) \| (1<<3);
				1833	return call;
				1834	}
				1835
				1836
				1837	/* Build IR to set the flags thunk, in the most general case. */
				1838	static
				1839	void setFlags_D1_D2_ND ( UInt cc_op,
				1840	IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
				1841	{
				1842	vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
				1843	vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
				1844	vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
				1845	vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
				1846	stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
				1847	stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
				1848	stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
				1849	stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
				1850	}
				1851
				1852	/* Build IR to set the flags thunk after ADD or SUB. */
				1853	static
				1854	void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
				1855	{
				1856	IRTemp argL64 = IRTemp_INVALID;
				1857	IRTemp argR64 = IRTemp_INVALID;
				1858	IRTemp z64 = newTemp(Ity_I64);
				1859	if (is64) {
				1860	argL64 = argL;
				1861	argR64 = argR;
				1862	} else {
				1863	argL64 = newTemp(Ity_I64);
				1864	argR64 = newTemp(Ity_I64);
				1865	assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
				1866	assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
				1867	}
				1868	assign(z64, mkU64(0));
				1869	UInt cc_op = ARM64G_CC_OP_NUMBER;
				1870	/**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
				1871	else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
				1872	else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
				1873	else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
				1874	else { vassert(0); }
				1875	setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
				1876	}
				1877
sewardj	dee3050	2014-06-04 13:09:44 +0000	[diff] [blame]	1878	/* Build IR to set the flags thunk after ADC or SBC. */
				1879	static
				1880	void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
				1881	IRTemp argL, IRTemp argR, IRTemp oldC )
				1882	{
				1883	IRTemp argL64 = IRTemp_INVALID;
				1884	IRTemp argR64 = IRTemp_INVALID;
				1885	IRTemp oldC64 = IRTemp_INVALID;
				1886	if (is64) {
				1887	argL64 = argL;
				1888	argR64 = argR;
				1889	oldC64 = oldC;
				1890	} else {
				1891	argL64 = newTemp(Ity_I64);
				1892	argR64 = newTemp(Ity_I64);
				1893	oldC64 = newTemp(Ity_I64);
				1894	assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
				1895	assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
				1896	assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
				1897	}
				1898	UInt cc_op = ARM64G_CC_OP_NUMBER;
				1899	/**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; }
				1900	else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
				1901	else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; }
				1902	else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
				1903	else { vassert(0); }
				1904	setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
				1905	}
				1906
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1907	/* Build IR to set the flags thunk after ADD or SUB, if the given
				1908	condition evaluates to True at run time. If not, the flags are set
				1909	to the specified NZCV value. */
				1910	static
				1911	void setFlags_ADD_SUB_conditionally (
				1912	Bool is64, Bool isSUB,
				1913	IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
				1914	)
				1915	{
				1916	/* Generate IR as follows:
				1917	CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
				1918	CC_DEP1 = ITE(cond, argL64, nzcv << 28)
				1919	CC_DEP2 = ITE(cond, argR64, 0)
				1920	CC_NDEP = 0
				1921	*/
				1922
				1923	IRTemp z64 = newTemp(Ity_I64);
				1924	assign(z64, mkU64(0));
				1925
				1926	/* Establish the operation and operands for the True case. */
				1927	IRTemp t_dep1 = IRTemp_INVALID;
				1928	IRTemp t_dep2 = IRTemp_INVALID;
				1929	UInt t_op = ARM64G_CC_OP_NUMBER;
				1930	/**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
				1931	else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
				1932	else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
				1933	else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
				1934	else { vassert(0); }
				1935	/* */
				1936	if (is64) {
				1937	t_dep1 = argL;
				1938	t_dep2 = argR;
				1939	} else {
				1940	t_dep1 = newTemp(Ity_I64);
				1941	t_dep2 = newTemp(Ity_I64);
				1942	assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
				1943	assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
				1944	}
				1945
				1946	/* Establish the operation and operands for the False case. */
				1947	IRTemp f_dep1 = newTemp(Ity_I64);
				1948	IRTemp f_dep2 = z64;
				1949	UInt f_op = ARM64G_CC_OP_COPY;
				1950	assign(f_dep1, mkU64(nzcv << 28));
				1951
				1952	/* Final thunk values */
				1953	IRTemp dep1 = newTemp(Ity_I64);
				1954	IRTemp dep2 = newTemp(Ity_I64);
				1955	IRTemp op = newTemp(Ity_I64);
				1956
				1957	assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
				1958	assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
				1959	assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
				1960
				1961	/* finally .. */
				1962	stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
				1963	stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
				1964	stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
				1965	stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
				1966	}
				1967
				1968	/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
				1969	static
				1970	void setFlags_LOGIC ( Bool is64, IRTemp res )
				1971	{
				1972	IRTemp res64 = IRTemp_INVALID;
				1973	IRTemp z64 = newTemp(Ity_I64);
				1974	UInt cc_op = ARM64G_CC_OP_NUMBER;
				1975	if (is64) {
				1976	res64 = res;
				1977	cc_op = ARM64G_CC_OP_LOGIC64;
				1978	} else {
				1979	res64 = newTemp(Ity_I64);
				1980	assign(res64, unop(Iop_32Uto64, mkexpr(res)));
				1981	cc_op = ARM64G_CC_OP_LOGIC32;
				1982	}
				1983	assign(z64, mkU64(0));
				1984	setFlags_D1_D2_ND(cc_op, res64, z64, z64);
				1985	}
				1986
				1987	/* Build IR to set the flags thunk to a given NZCV value. NZCV is
				1988	located in bits 31:28 of the supplied value. */
				1989	static
				1990	void setFlags_COPY ( IRTemp nzcv_28x0 )
				1991	{
				1992	IRTemp z64 = newTemp(Ity_I64);
				1993	assign(z64, mkU64(0));
				1994	setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
				1995	}
				1996
				1997
				1998	//ZZ /* Minor variant of the above that sets NDEP to zero (if it
				1999	//ZZ sets it at all) */
				2000	//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
				2001	//ZZ IRTemp t_dep2,
				2002	//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
				2003	//ZZ {
				2004	//ZZ IRTemp z32 = newTemp(Ity_I32);
				2005	//ZZ assign( z32, mkU32(0) );
				2006	//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
				2007	//ZZ }
				2008	//ZZ
				2009	//ZZ
				2010	//ZZ /* Minor variant of the above that sets DEP2 to zero (if it
				2011	//ZZ sets it at all) */
				2012	//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
				2013	//ZZ IRTemp t_ndep,
				2014	//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
				2015	//ZZ {
				2016	//ZZ IRTemp z32 = newTemp(Ity_I32);
				2017	//ZZ assign( z32, mkU32(0) );
				2018	//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
				2019	//ZZ }
				2020	//ZZ
				2021	//ZZ
				2022	//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
				2023	//ZZ sets them at all) */
				2024	//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
				2025	//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
				2026	//ZZ {
				2027	//ZZ IRTemp z32 = newTemp(Ity_I32);
				2028	//ZZ assign( z32, mkU32(0) );
				2029	//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
				2030	//ZZ }
				2031
				2032
				2033	/------------------------------------------------------------/
				2034	/--- Misc math helpers ---/
				2035	/------------------------------------------------------------/
				2036
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	2037	/* Generate IR for ((x & mask) >>u sh) \| ((x << sh) & mask) */
				2038	static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	2039	{
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	2040	IRTemp maskT = newTemp(Ity_I64);
				2041	IRTemp res = newTemp(Ity_I64);
				2042	vassert(sh >= 1 && sh <= 63);
				2043	assign(maskT, mkU64(mask));
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	2044	assign( res,
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	2045	binop(Iop_Or64,
				2046	binop(Iop_Shr64,
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	2047	binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
				2048	mkU8(sh)),
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	2049	binop(Iop_And64,
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	2050	binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
				2051	mkexpr(maskT))
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	2052	)
				2053	);
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	2054	return res;
				2055	}
				2056
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	2057	/* Generates byte swaps within 32-bit lanes. */
				2058	static IRTemp math_UINTSWAP64 ( IRTemp src )
				2059	{
				2060	IRTemp res;
				2061	res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
				2062	res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
				2063	return res;
				2064	}
				2065
				2066	/* Generates byte swaps within 16-bit lanes. */
				2067	static IRTemp math_USHORTSWAP64 ( IRTemp src )
				2068	{
				2069	IRTemp res;
				2070	res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
				2071	return res;
				2072	}
				2073
				2074	/* Generates a 64-bit byte swap. */
				2075	static IRTemp math_BYTESWAP64 ( IRTemp src )
				2076	{
				2077	IRTemp res;
				2078	res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
				2079	res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
				2080	res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
				2081	return res;
				2082	}
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	2083
				2084	/* Generates a 64-bit bit swap. */
				2085	static IRTemp math_BITSWAP64 ( IRTemp src )
				2086	{
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	2087	IRTemp res;
				2088	res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
				2089	res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
				2090	res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
				2091	return math_BYTESWAP64(res);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	2092	}
				2093
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	2094	/* Duplicates the bits at the bottom of the given word to fill the
				2095	whole word. src :: Ity_I64 is assumed to have zeroes everywhere
				2096	except for the bottom bits. */
				2097	static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
				2098	{
				2099	if (srcTy == Ity_I8) {
				2100	IRTemp t16 = newTemp(Ity_I64);
				2101	assign(t16, binop(Iop_Or64, mkexpr(src),
				2102	binop(Iop_Shl64, mkexpr(src), mkU8(8))));
				2103	IRTemp t32 = newTemp(Ity_I64);
				2104	assign(t32, binop(Iop_Or64, mkexpr(t16),
				2105	binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
				2106	IRTemp t64 = newTemp(Ity_I64);
				2107	assign(t64, binop(Iop_Or64, mkexpr(t32),
				2108	binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
				2109	return t64;
				2110	}
				2111	if (srcTy == Ity_I16) {
				2112	IRTemp t32 = newTemp(Ity_I64);
				2113	assign(t32, binop(Iop_Or64, mkexpr(src),
				2114	binop(Iop_Shl64, mkexpr(src), mkU8(16))));
				2115	IRTemp t64 = newTemp(Ity_I64);
				2116	assign(t64, binop(Iop_Or64, mkexpr(t32),
				2117	binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
				2118	return t64;
				2119	}
				2120	if (srcTy == Ity_I32) {
				2121	IRTemp t64 = newTemp(Ity_I64);
				2122	assign(t64, binop(Iop_Or64, mkexpr(src),
				2123	binop(Iop_Shl64, mkexpr(src), mkU8(32))));
				2124	return t64;
				2125	}
				2126	if (srcTy == Ity_I64) {
				2127	return src;
				2128	}
				2129	vassert(0);
				2130	}
				2131
				2132
sewardj	18bf517	2014-06-14 18:05:30 +0000	[diff] [blame]	2133	/* Duplicates the src element exactly so as to fill a V128 value. */
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	2134	static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
				2135	{
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	2136	IRTemp res = newTempV128();
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	2137	if (srcTy == Ity_F64) {
				2138	IRTemp i64 = newTemp(Ity_I64);
				2139	assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
				2140	assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
				2141	return res;
				2142	}
				2143	if (srcTy == Ity_F32) {
				2144	IRTemp i64a = newTemp(Ity_I64);
				2145	assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
				2146	IRTemp i64b = newTemp(Ity_I64);
				2147	assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
				2148	mkexpr(i64a)));
				2149	assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
				2150	return res;
				2151	}
sewardj	18bf517	2014-06-14 18:05:30 +0000	[diff] [blame]	2152	if (srcTy == Ity_I64) {
				2153	assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
				2154	return res;
				2155	}
				2156	if (srcTy == Ity_I32 \|\| srcTy == Ity_I16 \|\| srcTy == Ity_I8) {
				2157	IRTemp t1 = newTemp(Ity_I64);
				2158	assign(t1, widenUto64(srcTy, mkexpr(src)));
				2159	IRTemp t2 = math_DUP_TO_64(t1, srcTy);
				2160	assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
				2161	return res;
				2162	}
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	2163	vassert(0);
				2164	}
				2165
				2166
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	2167	/* \|fullWidth\| is a full V128 width result. Depending on bitQ,
				2168	zero out the upper half. */
				2169	static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
				2170	{
				2171	if (bitQ == 1) return mkexpr(fullWidth);
				2172	if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
				2173	vassert(0);
				2174	}
				2175
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	2176	/* The same, but from an expression instead. */
				2177	static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
				2178	{
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	2179	IRTemp fullWidthT = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	2180	assign(fullWidthT, fullWidth);
				2181	return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
				2182	}
				2183
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	2184
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	2185	/------------------------------------------------------------/
				2186	/--- FP comparison helpers ---/
				2187	/------------------------------------------------------------/
				2188
				2189	/* irRes :: Ity_I32 holds a floating point comparison result encoded
				2190	as an IRCmpF64Result. Generate code to convert it to an
				2191	ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
				2192	Assign a new temp to hold that value, and return the temp. */
				2193	static
				2194	IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
				2195	{
				2196	IRTemp ix = newTemp(Ity_I64);
				2197	IRTemp termL = newTemp(Ity_I64);
				2198	IRTemp termR = newTemp(Ity_I64);
				2199	IRTemp nzcv = newTemp(Ity_I64);
				2200	IRTemp irRes = newTemp(Ity_I64);
				2201
				2202	/* This is where the fun starts. We have to convert 'irRes' from
				2203	an IR-convention return result (IRCmpF64Result) to an
				2204	ARM-encoded (N,Z,C,V) group. The final result is in the bottom
				2205	4 bits of 'nzcv'. */
				2206	/* Map compare result from IR to ARM(nzcv) */
				2207	/*
				2208	FP cmp result \| IR \| ARM(nzcv)
				2209	--------------------------------
				2210	UN 0x45 0011
				2211	LT 0x01 1000
				2212	GT 0x00 0010
				2213	EQ 0x40 0110
				2214	*/
				2215	/* Now since you're probably wondering WTF ..
				2216
				2217	ix fishes the useful bits out of the IR value, bits 6 and 0, and
				2218	places them side by side, giving a number which is 0, 1, 2 or 3.
				2219
				2220	termL is a sequence cooked up by GNU superopt. It converts ix
				2221	into an almost correct value NZCV value (incredibly), except
				2222	for the case of UN, where it produces 0100 instead of the
				2223	required 0011.
				2224
				2225	termR is therefore a correction term, also computed from ix. It
				2226	is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
				2227	the final correct value, we subtract termR from termL.
				2228
				2229	Don't take my word for it. There's a test program at the bottom
				2230	of guest_arm_toIR.c, to try this out with.
				2231	*/
				2232	assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
				2233
				2234	assign(
				2235	ix,
				2236	binop(Iop_Or64,
				2237	binop(Iop_And64,
				2238	binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
				2239	mkU64(3)),
				2240	binop(Iop_And64, mkexpr(irRes), mkU64(1))));
				2241
				2242	assign(
				2243	termL,
				2244	binop(Iop_Add64,
				2245	binop(Iop_Shr64,
				2246	binop(Iop_Sub64,
				2247	binop(Iop_Shl64,
				2248	binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
				2249	mkU8(62)),
				2250	mkU64(1)),
				2251	mkU8(61)),
				2252	mkU64(1)));
				2253
				2254	assign(
				2255	termR,
				2256	binop(Iop_And64,
				2257	binop(Iop_And64,
				2258	mkexpr(ix),
				2259	binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
				2260	mkU64(1)));
				2261
				2262	assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
				2263	return nzcv;
				2264	}
				2265
				2266
				2267	/------------------------------------------------------------/
				2268	/--- Data processing (immediate) ---/
				2269	/------------------------------------------------------------/
				2270
				2271	/* Helper functions for supporting "DecodeBitMasks" */
				2272
				2273	static ULong dbm_ROR ( Int width, ULong x, Int rot )
				2274	{
				2275	vassert(width > 0 && width <= 64);
				2276	vassert(rot >= 0 && rot < width);
				2277	if (rot == 0) return x;
				2278	ULong res = x >> rot;
				2279	res \|= (x << (width - rot));
				2280	if (width < 64)
				2281	res &= ((1ULL << width) - 1);
				2282	return res;
				2283	}
				2284
				2285	static ULong dbm_RepTo64( Int esize, ULong x )
				2286	{
				2287	switch (esize) {
				2288	case 64:
				2289	return x;
				2290	case 32:
				2291	x &= 0xFFFFFFFF; x \|= (x << 32);
				2292	return x;
				2293	case 16:
				2294	x &= 0xFFFF; x \|= (x << 16); x \|= (x << 32);
				2295	return x;
				2296	case 8:
				2297	x &= 0xFF; x \|= (x << 8); x \|= (x << 16); x \|= (x << 32);
				2298	return x;
				2299	case 4:
				2300	x &= 0xF; x \|= (x << 4); x \|= (x << 8);
				2301	x \|= (x << 16); x \|= (x << 32);
				2302	return x;
				2303	case 2:
				2304	x &= 0x3; x \|= (x << 2); x \|= (x << 4); x \|= (x << 8);
				2305	x \|= (x << 16); x \|= (x << 32);
				2306	return x;
				2307	default:
				2308	break;
				2309	}
				2310	vpanic("dbm_RepTo64");
				2311	/NOTREACHED/
				2312	return 0;
				2313	}
				2314
				2315	static Int dbm_highestSetBit ( ULong x )
				2316	{
				2317	Int i;
				2318	for (i = 63; i >= 0; i--) {
				2319	if (x & (1ULL << i))
				2320	return i;
				2321	}
				2322	vassert(x == 0);
				2323	return -1;
				2324	}
				2325
				2326	static
				2327	Bool dbm_DecodeBitMasks ( /OUT/ULong* wmask, /OUT/ULong* tmask,
				2328	ULong immN, ULong imms, ULong immr, Bool immediate,
				2329	UInt M /32 or 64/)
				2330	{
				2331	vassert(immN < (1ULL << 1));
				2332	vassert(imms < (1ULL << 6));
				2333	vassert(immr < (1ULL << 6));
				2334	vassert(immediate == False \|\| immediate == True);
				2335	vassert(M == 32 \|\| M == 64);
				2336
				2337	Int len = dbm_highestSetBit( ((immN << 6) & 64) \| ((~imms) & 63) );
				2338	if (len < 1) { /* printf("fail1\n"); */ return False; }
				2339	vassert(len <= 6);
				2340	vassert(M >= (1 << len));
				2341
				2342	vassert(len >= 1 && len <= 6);
				2343	ULong levels = // (zeroes(6 - len) << (6-len)) \| ones(len);
				2344	(1 << len) - 1;
				2345	vassert(levels >= 1 && levels <= 63);
				2346
				2347	if (immediate && ((imms & levels) == levels)) {
				2348	/* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
				2349	return False;
				2350	}
				2351
				2352	ULong S = imms & levels;
				2353	ULong R = immr & levels;
				2354	Int diff = S - R;
				2355	diff &= 63;
				2356	Int esize = 1 << len;
				2357	vassert(2 <= esize && esize <= 64);
				2358
				2359	/* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
				2360	same below with d. S can be 63 in which case we have an out of
				2361	range and hence undefined shift. */
				2362	vassert(S >= 0 && S <= 63);
				2363	vassert(esize >= (S+1));
				2364	ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
				2365	//(1ULL << (S+1)) - 1;
				2366	((1ULL << S) - 1) + (1ULL << S);
				2367
				2368	Int d = // diff<len-1:0>
				2369	diff & ((1 << len)-1);
				2370	vassert(esize >= (d+1));
				2371	vassert(d >= 0 && d <= 63);
				2372
				2373	ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
				2374	//(1ULL << (d+1)) - 1;
				2375	((1ULL << d) - 1) + (1ULL << d);
				2376
				2377	if (esize != 64) vassert(elem_s < (1ULL << esize));
				2378	if (esize != 64) vassert(elem_d < (1ULL << esize));
				2379
				2380	if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
				2381	if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
				2382
				2383	return True;
				2384	}
				2385
				2386
				2387	static
				2388	Bool dis_ARM64_data_processing_immediate(/MB_OUT/DisResult* dres,
				2389	UInt insn)
				2390	{
				2391	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				2392
				2393	/* insn[28:23]
				2394	10000x PC-rel addressing
				2395	10001x Add/subtract (immediate)
				2396	100100 Logical (immediate)
				2397	100101 Move Wide (immediate)
				2398	100110 Bitfield
				2399	100111 Extract
				2400	*/
				2401
				2402	/* ------------------ ADD/SUB{,S} imm12 ------------------ */
				2403	if (INSN(28,24) == BITS5(1,0,0,0,1)) {
				2404	Bool is64 = INSN(31,31) == 1;
				2405	Bool isSub = INSN(30,30) == 1;
				2406	Bool setCC = INSN(29,29) == 1;
				2407	UInt sh = INSN(23,22);
				2408	UInt uimm12 = INSN(21,10);
				2409	UInt nn = INSN(9,5);
				2410	UInt dd = INSN(4,0);
				2411	const HChar* nm = isSub ? "sub" : "add";
				2412	if (sh >= 2) {
				2413	/* Invalid; fall through */
				2414	} else {
				2415	vassert(sh <= 1);
				2416	uimm12 <<= (12 * sh);
				2417	if (is64) {
				2418	IRTemp argL = newTemp(Ity_I64);
				2419	IRTemp argR = newTemp(Ity_I64);
				2420	IRTemp res = newTemp(Ity_I64);
				2421	assign(argL, getIReg64orSP(nn));
				2422	assign(argR, mkU64(uimm12));
				2423	assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
				2424	mkexpr(argL), mkexpr(argR)));
				2425	if (setCC) {
				2426	putIReg64orZR(dd, mkexpr(res));
				2427	setFlags_ADD_SUB(True/is64/, isSub, argL, argR);
				2428	DIP("%ss %s, %s, 0x%x\n",
				2429	nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
				2430	} else {
				2431	putIReg64orSP(dd, mkexpr(res));
				2432	DIP("%s %s, %s, 0x%x\n",
				2433	nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
				2434	}
				2435	} else {
				2436	IRTemp argL = newTemp(Ity_I32);
				2437	IRTemp argR = newTemp(Ity_I32);
				2438	IRTemp res = newTemp(Ity_I32);
				2439	assign(argL, getIReg32orSP(nn));
				2440	assign(argR, mkU32(uimm12));
				2441	assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
				2442	mkexpr(argL), mkexpr(argR)));
				2443	if (setCC) {
				2444	putIReg32orZR(dd, mkexpr(res));
				2445	setFlags_ADD_SUB(False/!is64/, isSub, argL, argR);
				2446	DIP("%ss %s, %s, 0x%x\n",
				2447	nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
				2448	} else {
				2449	putIReg32orSP(dd, mkexpr(res));
				2450	DIP("%s %s, %s, 0x%x\n",
				2451	nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
				2452	}
				2453	}
				2454	return True;
				2455	}
				2456	}
				2457
				2458	/* -------------------- ADR/ADRP -------------------- */
				2459	if (INSN(28,24) == BITS5(1,0,0,0,0)) {
				2460	UInt bP = INSN(31,31);
				2461	UInt immLo = INSN(30,29);
				2462	UInt immHi = INSN(23,5);
				2463	UInt rD = INSN(4,0);
				2464	ULong uimm = (immHi << 2) \| immLo;
				2465	ULong simm = sx_to_64(uimm, 21);
				2466	ULong val;
				2467	if (bP) {
				2468	val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
				2469	} else {
				2470	val = guest_PC_curr_instr + simm;
				2471	}
				2472	putIReg64orZR(rD, mkU64(val));
				2473	DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
				2474	return True;
				2475	}
				2476
				2477	/* -------------------- LOGIC(imm) -------------------- */
				2478	if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
				2479	/* 31 30 28 22 21 15 9 4
				2480	sf op 100100 N immr imms Rn Rd
				2481	op=00: AND Rd\|SP, Rn, #imm
				2482	op=01: ORR Rd\|SP, Rn, #imm
				2483	op=10: EOR Rd\|SP, Rn, #imm
				2484	op=11: ANDS Rd\|ZR, Rn, #imm
				2485	*/
				2486	Bool is64 = INSN(31,31) == 1;
				2487	UInt op = INSN(30,29);
				2488	UInt N = INSN(22,22);
				2489	UInt immR = INSN(21,16);
				2490	UInt immS = INSN(15,10);
				2491	UInt nn = INSN(9,5);
				2492	UInt dd = INSN(4,0);
				2493	ULong imm = 0;
				2494	Bool ok;
				2495	if (N == 1 && !is64)
				2496	goto after_logic_imm; /* not allowed; fall through */
				2497	ok = dbm_DecodeBitMasks(&imm, NULL,
				2498	N, immS, immR, True, is64 ? 64 : 32);
				2499	if (!ok)
				2500	goto after_logic_imm;
				2501
				2502	const HChar* names[4] = { "and", "orr", "eor", "ands" };
				2503	const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
				2504	const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
				2505
				2506	vassert(op < 4);
				2507	if (is64) {
				2508	IRExpr* argL = getIReg64orZR(nn);
				2509	IRExpr* argR = mkU64(imm);
				2510	IRTemp res = newTemp(Ity_I64);
				2511	assign(res, binop(ops64[op], argL, argR));
				2512	if (op < 3) {
				2513	putIReg64orSP(dd, mkexpr(res));
				2514	DIP("%s %s, %s, 0x%llx\n", names[op],
				2515	nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
				2516	} else {
				2517	putIReg64orZR(dd, mkexpr(res));
				2518	setFlags_LOGIC(True/is64/, res);
				2519	DIP("%s %s, %s, 0x%llx\n", names[op],
				2520	nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
				2521	}
				2522	} else {
				2523	IRExpr* argL = getIReg32orZR(nn);
				2524	IRExpr* argR = mkU32((UInt)imm);
				2525	IRTemp res = newTemp(Ity_I32);
				2526	assign(res, binop(ops32[op], argL, argR));
				2527	if (op < 3) {
				2528	putIReg32orSP(dd, mkexpr(res));
				2529	DIP("%s %s, %s, 0x%x\n", names[op],
				2530	nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
				2531	} else {
				2532	putIReg32orZR(dd, mkexpr(res));
				2533	setFlags_LOGIC(False/!is64/, res);
				2534	DIP("%s %s, %s, 0x%x\n", names[op],
				2535	nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
				2536	}
				2537	}
				2538	return True;
				2539	}
				2540	after_logic_imm:
				2541
				2542	/* -------------------- MOV{Z,N,K} -------------------- */
				2543	if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
				2544	/* 31 30 28 22 20 4
				2545	\| \| \| \| \| \|
				2546	sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
				2547	sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
				2548	sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
				2549	*/
				2550	Bool is64 = INSN(31,31) == 1;
				2551	UInt subopc = INSN(30,29);
				2552	UInt hw = INSN(22,21);
				2553	UInt imm16 = INSN(20,5);
				2554	UInt dd = INSN(4,0);
				2555	if (subopc == BITS2(0,1) \|\| (!is64 && hw >= 2)) {
				2556	/* invalid; fall through */
				2557	} else {
				2558	ULong imm64 = ((ULong)imm16) << (16 * hw);
				2559	if (!is64)
				2560	vassert(imm64 < 0x100000000ULL);
				2561	switch (subopc) {
				2562	case BITS2(1,0): // MOVZ
				2563	putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
				2564	DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
				2565	break;
				2566	case BITS2(0,0): // MOVN
				2567	imm64 = ~imm64;
				2568	if (!is64)
				2569	imm64 &= 0xFFFFFFFFULL;
				2570	putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
				2571	DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
				2572	break;
				2573	case BITS2(1,1): // MOVK
				2574	/* This is more complex. We are inserting a slice into
				2575	the destination register, so we need to have the old
				2576	value of it. */
				2577	if (is64) {
				2578	IRTemp old = newTemp(Ity_I64);
				2579	assign(old, getIReg64orZR(dd));
				2580	ULong mask = 0xFFFFULL << (16 * hw);
				2581	IRExpr* res
				2582	= binop(Iop_Or64,
				2583	binop(Iop_And64, mkexpr(old), mkU64(~mask)),
				2584	mkU64(imm64));
				2585	putIReg64orZR(dd, res);
				2586	DIP("movk %s, 0x%x, lsl %u\n",
				2587	nameIReg64orZR(dd), imm16, 16*hw);
				2588	} else {
				2589	IRTemp old = newTemp(Ity_I32);
				2590	assign(old, getIReg32orZR(dd));
				2591	vassert(hw <= 1);
				2592	UInt mask = 0xFFFF << (16 * hw);
				2593	IRExpr* res
				2594	= binop(Iop_Or32,
				2595	binop(Iop_And32, mkexpr(old), mkU32(~mask)),
				2596	mkU32((UInt)imm64));
				2597	putIReg32orZR(dd, res);
				2598	DIP("movk %s, 0x%x, lsl %u\n",
				2599	nameIReg32orZR(dd), imm16, 16*hw);
				2600	}
				2601	break;
				2602	default:
				2603	vassert(0);
				2604	}
				2605	return True;
				2606	}
				2607	}
				2608
				2609	/* -------------------- {U,S,}BFM -------------------- */
				2610	/* 30 28 22 21 15 9 4
				2611
				2612	sf 10 100110 N immr imms nn dd
				2613	UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
				2614	UBFM Xd, Xn, #immr, #imms when sf=1, N=1
				2615
				2616	sf 00 100110 N immr imms nn dd
				2617	SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
				2618	SBFM Xd, Xn, #immr, #imms when sf=1, N=1
				2619
				2620	sf 01 100110 N immr imms nn dd
				2621	BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
				2622	BFM Xd, Xn, #immr, #imms when sf=1, N=1
				2623	*/
				2624	if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
				2625	UInt sf = INSN(31,31);
				2626	UInt opc = INSN(30,29);
				2627	UInt N = INSN(22,22);
				2628	UInt immR = INSN(21,16);
				2629	UInt immS = INSN(15,10);
				2630	UInt nn = INSN(9,5);
				2631	UInt dd = INSN(4,0);
				2632	Bool inZero = False;
				2633	Bool extend = False;
				2634	const HChar* nm = "???";
				2635	/* skip invalid combinations */
				2636	switch (opc) {
				2637	case BITS2(0,0):
				2638	inZero = True; extend = True; nm = "sbfm"; break;
				2639	case BITS2(0,1):
				2640	inZero = False; extend = False; nm = "bfm"; break;
				2641	case BITS2(1,0):
				2642	inZero = True; extend = False; nm = "ubfm"; break;
				2643	case BITS2(1,1):
				2644	goto after_bfm; /* invalid */
				2645	default:
				2646	vassert(0);
				2647	}
				2648	if (sf == 1 && N != 1) goto after_bfm;
				2649	if (sf == 0 && (N != 0 \|\| ((immR >> 5) & 1) != 0
				2650	\|\| ((immS >> 5) & 1) != 0)) goto after_bfm;
				2651	ULong wmask = 0, tmask = 0;
				2652	Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
				2653	N, immS, immR, False, sf == 1 ? 64 : 32);
				2654	if (!ok) goto after_bfm; /* hmmm */
				2655
				2656	Bool is64 = sf == 1;
				2657	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2658
				2659	IRTemp dst = newTemp(ty);
				2660	IRTemp src = newTemp(ty);
				2661	IRTemp bot = newTemp(ty);
				2662	IRTemp top = newTemp(ty);
				2663	IRTemp res = newTemp(ty);
				2664	assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
				2665	assign(src, getIRegOrZR(is64, nn));
				2666	/* perform bitfield move on low bits */
				2667	assign(bot, binop(mkOR(ty),
				2668	binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
				2669	binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
				2670	mkU(ty, wmask))));
				2671	/* determine extension bits (sign, zero or dest register) */
				2672	assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
				2673	/* combine extension bits and result bits */
				2674	assign(res, binop(mkOR(ty),
				2675	binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
				2676	binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
				2677	putIRegOrZR(is64, dd, mkexpr(res));
				2678	DIP("%s %s, %s, immR=%u, immS=%u\n",
				2679	nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
				2680	return True;
				2681	}
				2682	after_bfm:
				2683
				2684	/* ---------------------- EXTR ---------------------- */
				2685	/* 30 28 22 20 15 9 4
				2686	1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
				2687	0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
				2688	*/
				2689	if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
				2690	Bool is64 = INSN(31,31) == 1;
				2691	UInt mm = INSN(20,16);
				2692	UInt imm6 = INSN(15,10);
				2693	UInt nn = INSN(9,5);
				2694	UInt dd = INSN(4,0);
				2695	Bool valid = True;
				2696	if (INSN(31,31) != INSN(22,22))
				2697	valid = False;
				2698	if (!is64 && imm6 >= 32)
				2699	valid = False;
				2700	if (!valid) goto after_extr;
				2701	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2702	IRTemp srcHi = newTemp(ty);
				2703	IRTemp srcLo = newTemp(ty);
				2704	IRTemp res = newTemp(ty);
				2705	assign(srcHi, getIRegOrZR(is64, nn));
				2706	assign(srcLo, getIRegOrZR(is64, mm));
				2707	if (imm6 == 0) {
				2708	assign(res, mkexpr(srcLo));
				2709	} else {
				2710	UInt szBits = 8 * sizeofIRType(ty);
				2711	vassert(imm6 > 0 && imm6 < szBits);
				2712	assign(res, binop(mkOR(ty),
				2713	binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
				2714	binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
				2715	}
				2716	putIRegOrZR(is64, dd, mkexpr(res));
				2717	DIP("extr %s, %s, %s, #%u\n",
				2718	nameIRegOrZR(is64,dd),
				2719	nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
				2720	return True;
				2721	}
				2722	after_extr:
				2723
				2724	vex_printf("ARM64 front end: data_processing_immediate\n");
				2725	return False;
				2726	# undef INSN
				2727	}
				2728
				2729
				2730	/------------------------------------------------------------/
				2731	/--- Data processing (register) instructions ---/
				2732	/------------------------------------------------------------/
				2733
				2734	static const HChar* nameSH ( UInt sh ) {
				2735	switch (sh) {
				2736	case 0: return "lsl";
				2737	case 1: return "lsr";
				2738	case 2: return "asr";
				2739	case 3: return "ror";
				2740	default: vassert(0);
				2741	}
				2742	}
				2743
				2744	/* Generate IR to get a register value, possibly shifted by an
				2745	immediate. Returns either a 32- or 64-bit temporary holding the
				2746	result. After the shift, the value can optionally be NOT-ed
				2747	too.
				2748
				2749	sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
				2750	in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
				2751	isn't allowed, but it's the job of the caller to check that.
				2752	*/
				2753	static IRTemp getShiftedIRegOrZR ( Bool is64,
				2754	UInt sh_how, UInt sh_amt, UInt regNo,
				2755	Bool invert )
				2756	{
				2757	vassert(sh_how < 4);
				2758	vassert(sh_amt < (is64 ? 64 : 32));
				2759	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2760	IRTemp t0 = newTemp(ty);
				2761	assign(t0, getIRegOrZR(is64, regNo));
				2762	IRTemp t1 = newTemp(ty);
				2763	switch (sh_how) {
				2764	case BITS2(0,0):
				2765	assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
				2766	break;
				2767	case BITS2(0,1):
				2768	assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
				2769	break;
				2770	case BITS2(1,0):
				2771	assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
				2772	break;
				2773	case BITS2(1,1):
				2774	assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
				2775	break;
				2776	default:
				2777	vassert(0);
				2778	}
				2779	if (invert) {
				2780	IRTemp t2 = newTemp(ty);
				2781	assign(t2, unop(mkNOT(ty), mkexpr(t1)));
				2782	return t2;
				2783	} else {
				2784	return t1;
				2785	}
				2786	}
				2787
				2788
				2789	static
				2790	Bool dis_ARM64_data_processing_register(/MB_OUT/DisResult* dres,
				2791	UInt insn)
				2792	{
				2793	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				2794
				2795	/* ------------------- ADD/SUB(reg) ------------------- */
				2796	/* x==0 => 32 bit op x==1 => 64 bit op
				2797	sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
				2798
				2799	31 30 29 28 23 21 20 15 9 4
				2800	\| \| \| \| \| \| \| \| \| \|
				2801	x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
				2802	x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
				2803	x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
				2804	x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
				2805	*/
				2806	if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
				2807	UInt bX = INSN(31,31);
				2808	UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
				2809	UInt bS = INSN(29, 29); /* set flags? */
				2810	UInt sh = INSN(23,22);
				2811	UInt rM = INSN(20,16);
				2812	UInt imm6 = INSN(15,10);
				2813	UInt rN = INSN(9,5);
				2814	UInt rD = INSN(4,0);
				2815	Bool isSUB = bOP == 1;
				2816	Bool is64 = bX == 1;
				2817	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2818	if ((!is64 && imm6 > 31) \|\| sh == BITS2(1,1)) {
				2819	/* invalid; fall through */
				2820	} else {
				2821	IRTemp argL = newTemp(ty);
				2822	assign(argL, getIRegOrZR(is64, rN));
				2823	IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
				2824	IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
				2825	IRTemp res = newTemp(ty);
				2826	assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
				2827	if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
				2828	if (bS) {
				2829	setFlags_ADD_SUB(is64, isSUB, argL, argR);
				2830	}
				2831	DIP("%s%s %s, %s, %s, %s #%u\n",
				2832	bOP ? "sub" : "add", bS ? "s" : "",
				2833	nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
				2834	nameIRegOrZR(is64, rM), nameSH(sh), imm6);
				2835	return True;
				2836	}
				2837	}
				2838
sewardj	dee3050	2014-06-04 13:09:44 +0000	[diff] [blame]	2839	/* ------------------- ADC/SBC(reg) ------------------- */
				2840	/* x==0 => 32 bit op x==1 => 64 bit op
				2841
				2842	31 30 29 28 23 21 20 15 9 4
				2843	\| \| \| \| \| \| \| \| \| \|
				2844	x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
				2845	x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
				2846	x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
				2847	x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
				2848	*/
				2849
				2850	if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
				2851	UInt bX = INSN(31,31);
				2852	UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */
				2853	UInt bS = INSN(29,29); /* set flags */
				2854	UInt rM = INSN(20,16);
				2855	UInt rN = INSN(9,5);
				2856	UInt rD = INSN(4,0);
				2857
				2858	Bool isSUB = bOP == 1;
				2859	Bool is64 = bX == 1;
				2860	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2861
				2862	IRTemp oldC = newTemp(ty);
				2863	assign(oldC,
				2864	is64 ? mk_arm64g_calculate_flag_c()
				2865	: unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
				2866
				2867	IRTemp argL = newTemp(ty);
				2868	assign(argL, getIRegOrZR(is64, rN));
				2869	IRTemp argR = newTemp(ty);
				2870	assign(argR, getIRegOrZR(is64, rM));
				2871
				2872	IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
				2873	IRTemp res = newTemp(ty);
				2874	if (isSUB) {
				2875	IRExpr* one = is64 ? mkU64(1) : mkU32(1);
				2876	IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
				2877	assign(res,
				2878	binop(op,
				2879	binop(op, mkexpr(argL), mkexpr(argR)),
				2880	binop(xorOp, mkexpr(oldC), one)));
				2881	} else {
				2882	assign(res,
				2883	binop(op,
				2884	binop(op, mkexpr(argL), mkexpr(argR)),
				2885	mkexpr(oldC)));
				2886	}
				2887
				2888	if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
				2889
				2890	if (bS) {
				2891	setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
				2892	}
				2893
				2894	DIP("%s%s %s, %s, %s\n",
				2895	bOP ? "sbc" : "adc", bS ? "s" : "",
				2896	nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
				2897	nameIRegOrZR(is64, rM));
				2898	return True;
				2899	}
				2900
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	2901	/* -------------------- LOGIC(reg) -------------------- */
				2902	/* x==0 => 32 bit op x==1 => 64 bit op
				2903	N==0 => inv? is no-op (no inversion)
				2904	N==1 => inv? is NOT
				2905	sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
				2906
				2907	31 30 28 23 21 20 15 9 4
				2908	\| \| \| \| \| \| \| \| \|
				2909	x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
				2910	x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
				2911	x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
				2912	x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
				2913	With N=1, the names are: BIC ORN EON BICS
				2914	*/
				2915	if (INSN(28,24) == BITS5(0,1,0,1,0)) {
				2916	UInt bX = INSN(31,31);
				2917	UInt sh = INSN(23,22);
				2918	UInt bN = INSN(21,21);
				2919	UInt rM = INSN(20,16);
				2920	UInt imm6 = INSN(15,10);
				2921	UInt rN = INSN(9,5);
				2922	UInt rD = INSN(4,0);
				2923	Bool is64 = bX == 1;
				2924	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2925	if (!is64 && imm6 > 31) {
				2926	/* invalid; fall though */
				2927	} else {
				2928	IRTemp argL = newTemp(ty);
				2929	assign(argL, getIRegOrZR(is64, rN));
				2930	IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
				2931	IROp op = Iop_INVALID;
				2932	switch (INSN(30,29)) {
				2933	case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
				2934	case BITS2(0,1): op = mkOR(ty); break;
				2935	case BITS2(1,0): op = mkXOR(ty); break;
				2936	default: vassert(0);
				2937	}
				2938	IRTemp res = newTemp(ty);
				2939	assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
				2940	if (INSN(30,29) == BITS2(1,1)) {
				2941	setFlags_LOGIC(is64, res);
				2942	}
				2943	putIRegOrZR(is64, rD, mkexpr(res));
				2944
				2945	static const HChar* names_op[8]
				2946	= { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
				2947	vassert(((bN << 2) \| INSN(30,29)) < 8);
				2948	const HChar* nm_op = names_op[(bN << 2) \| INSN(30,29)];
				2949	/* Special-case the printing of "MOV" */
				2950	if (rN == 31/zr/ && sh == 0/LSL/ && imm6 == 0 && bN == 0) {
				2951	DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
				2952	nameIRegOrZR(is64, rM));
				2953	} else {
				2954	DIP("%s %s, %s, %s, %s #%u\n", nm_op,
				2955	nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
				2956	nameIRegOrZR(is64, rM), nameSH(sh), imm6);
				2957	}
				2958	return True;
				2959	}
				2960	}
				2961
				2962	/* -------------------- {U,S}MULH -------------------- */
				2963	/* 31 23 22 20 15 9 4
				2964	10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
				2965	10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
				2966	*/
				2967	if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
sewardj	7fce7cc	2014-05-07 09:41:40 +0000	[diff] [blame]	2968	&& INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	2969	Bool isU = INSN(23,23) == 1;
				2970	UInt mm = INSN(20,16);
				2971	UInt nn = INSN(9,5);
				2972	UInt dd = INSN(4,0);
				2973	putIReg64orZR(dd, unop(Iop_128HIto64,
				2974	binop(isU ? Iop_MullU64 : Iop_MullS64,
				2975	getIReg64orZR(nn), getIReg64orZR(mm))));
				2976	DIP("%cmulh %s, %s, %s\n",
				2977	isU ? 'u' : 's',
				2978	nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
				2979	return True;
				2980	}
				2981
				2982	/* -------------------- M{ADD,SUB} -------------------- */
				2983	/* 31 30 20 15 14 9 4
				2984	sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
				2985	sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
				2986	*/
				2987	if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
				2988	Bool is64 = INSN(31,31) == 1;
				2989	UInt mm = INSN(20,16);
				2990	Bool isAdd = INSN(15,15) == 0;
				2991	UInt aa = INSN(14,10);
				2992	UInt nn = INSN(9,5);
				2993	UInt dd = INSN(4,0);
				2994	if (is64) {
				2995	putIReg64orZR(
				2996	dd,
				2997	binop(isAdd ? Iop_Add64 : Iop_Sub64,
				2998	getIReg64orZR(aa),
				2999	binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
				3000	} else {
				3001	putIReg32orZR(
				3002	dd,
				3003	binop(isAdd ? Iop_Add32 : Iop_Sub32,
				3004	getIReg32orZR(aa),
				3005	binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
				3006	}
				3007	DIP("%s %s, %s, %s, %s\n",
				3008	isAdd ? "madd" : "msub",
				3009	nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
				3010	nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
				3011	return True;
				3012	}
				3013
				3014	/* ---------------- CS{EL,INC,INV,NEG} ---------------- */
				3015	/* 31 30 28 20 15 11 9 4
				3016	sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
				3017	sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
				3018	sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
				3019	sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
				3020	In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
				3021	*/
				3022	if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
				3023	Bool is64 = INSN(31,31) == 1;
				3024	UInt b30 = INSN(30,30);
				3025	UInt mm = INSN(20,16);
				3026	UInt cond = INSN(15,12);
				3027	UInt b10 = INSN(10,10);
				3028	UInt nn = INSN(9,5);
				3029	UInt dd = INSN(4,0);
				3030	UInt op = (b30 << 1) \| b10; /* 00=id 01=inc 10=inv 11=neg */
				3031	IRType ty = is64 ? Ity_I64 : Ity_I32;
				3032	IRExpr* argL = getIRegOrZR(is64, nn);
				3033	IRExpr* argR = getIRegOrZR(is64, mm);
				3034	switch (op) {
				3035	case BITS2(0,0):
				3036	break;
				3037	case BITS2(0,1):
				3038	argR = binop(mkADD(ty), argR, mkU(ty,1));
				3039	break;
				3040	case BITS2(1,0):
				3041	argR = unop(mkNOT(ty), argR);
				3042	break;
				3043	case BITS2(1,1):
				3044	argR = binop(mkSUB(ty), mkU(ty,0), argR);
				3045	break;
				3046	default:
				3047	vassert(0);
				3048	}
				3049	putIRegOrZR(
				3050	is64, dd,
				3051	IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
				3052	argL, argR)
				3053	);
				3054	const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
				3055	DIP("%s %s, %s, %s, %s\n", op_nm[op],
				3056	nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
				3057	nameIRegOrZR(is64, mm), nameCC(cond));
				3058	return True;
				3059	}
				3060
				3061	/* -------------- ADD/SUB(extended reg) -------------- */
				3062	/* 28 20 15 12 9 4
				3063	000 01011 00 1 m opt imm3 n d ADD Wd\|SP, Wn\|SP, Wm ext&lsld
				3064	100 01011 00 1 m opt imm3 n d ADD Xd\|SP, Xn\|SP, Rm ext&lsld
				3065
				3066	001 01011 00 1 m opt imm3 n d ADDS Wd, Wn\|SP, Wm ext&lsld
				3067	101 01011 00 1 m opt imm3 n d ADDS Xd, Xn\|SP, Rm ext&lsld
				3068
				3069	010 01011 00 1 m opt imm3 n d SUB Wd\|SP, Wn\|SP, Wm ext&lsld
				3070	110 01011 00 1 m opt imm3 n d SUB Xd\|SP, Xn\|SP, Rm ext&lsld
				3071
				3072	011 01011 00 1 m opt imm3 n d SUBS Wd, Wn\|SP, Wm ext&lsld
				3073	111 01011 00 1 m opt imm3 n d SUBS Xd, Xn\|SP, Rm ext&lsld
				3074
				3075	The 'm' operand is extended per opt, thusly:
				3076
				3077	000 Xm & 0xFF UXTB
				3078	001 Xm & 0xFFFF UXTH
				3079	010 Xm & (2^32)-1 UXTW
				3080	011 Xm UXTX
				3081
				3082	100 Xm sx from bit 7 SXTB
				3083	101 Xm sx from bit 15 SXTH
				3084	110 Xm sx from bit 31 SXTW
				3085	111 Xm SXTX
				3086
				3087	In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
				3088	operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
				3089	are the identity operation on Wm.
				3090
				3091	After extension, the value is shifted left by imm3 bits, which
				3092	may only be in the range 0 .. 4 inclusive.
				3093	*/
				3094	if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
				3095	Bool is64 = INSN(31,31) == 1;
				3096	Bool isSub = INSN(30,30) == 1;
				3097	Bool setCC = INSN(29,29) == 1;
				3098	UInt mm = INSN(20,16);
				3099	UInt opt = INSN(15,13);
				3100	UInt imm3 = INSN(12,10);
				3101	UInt nn = INSN(9,5);
				3102	UInt dd = INSN(4,0);
				3103	const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
				3104	"sxtb", "sxth", "sxtw", "sxtx" };
				3105	/* Do almost the same thing in the 32- and 64-bit cases. */
				3106	IRTemp xN = newTemp(Ity_I64);
				3107	IRTemp xM = newTemp(Ity_I64);
				3108	assign(xN, getIReg64orSP(nn));
				3109	assign(xM, getIReg64orZR(mm));
				3110	IRExpr* xMw = mkexpr(xM); /* "xM widened" */
				3111	Int shSX = 0;
				3112	/* widen Xm .. */
				3113	switch (opt) {
				3114	case BITS3(0,0,0): // UXTB
				3115	xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
				3116	case BITS3(0,0,1): // UXTH
				3117	xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
				3118	case BITS3(0,1,0): // UXTW -- noop for the 32bit case
				3119	if (is64) {
				3120	xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
				3121	}
				3122	break;
				3123	case BITS3(0,1,1): // UXTX -- always a noop
				3124	break;
				3125	case BITS3(1,0,0): // SXTB
				3126	shSX = 56; goto sxTo64;
				3127	case BITS3(1,0,1): // SXTH
				3128	shSX = 48; goto sxTo64;
				3129	case BITS3(1,1,0): // SXTW -- noop for the 32bit case
				3130	if (is64) {
				3131	shSX = 32; goto sxTo64;
				3132	}
				3133	break;
				3134	case BITS3(1,1,1): // SXTX -- always a noop
				3135	break;
				3136	sxTo64:
				3137	vassert(shSX >= 32);
				3138	xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
				3139	mkU8(shSX));
				3140	break;
				3141	default:
				3142	vassert(0);
				3143	}
				3144	/* and now shift */
				3145	IRTemp argL = xN;
				3146	IRTemp argR = newTemp(Ity_I64);
				3147	assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
				3148	IRTemp res = newTemp(Ity_I64);
				3149	assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
				3150	mkexpr(argL), mkexpr(argR)));
				3151	if (is64) {
				3152	if (setCC) {
				3153	putIReg64orZR(dd, mkexpr(res));
				3154	setFlags_ADD_SUB(True/is64/, isSub, argL, argR);
				3155	} else {
				3156	putIReg64orSP(dd, mkexpr(res));
				3157	}
				3158	} else {
				3159	if (setCC) {
				3160	IRTemp argL32 = newTemp(Ity_I32);
				3161	IRTemp argR32 = newTemp(Ity_I32);
				3162	putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
				3163	assign(argL32, unop(Iop_64to32, mkexpr(argL)));
				3164	assign(argR32, unop(Iop_64to32, mkexpr(argR)));
				3165	setFlags_ADD_SUB(False/!is64/, isSub, argL32, argR32);
				3166	} else {
				3167	putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
				3168	}
				3169	}
				3170	DIP("%s%s %s, %s, %s %s lsl %u\n",
				3171	isSub ? "sub" : "add", setCC ? "s" : "",
				3172	setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
				3173	nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
				3174	nameExt[opt], imm3);
				3175	return True;
				3176	}
				3177
				3178	/* ---------------- CCMP/CCMN(imm) ---------------- */
				3179	/* Bizarrely, these appear in the "data processing register"
				3180	category, even though they are operations against an
				3181	immediate. */
				3182	/* 31 29 20 15 11 9 3
				3183	sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
				3184	sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
				3185
				3186	Operation is:
				3187	(CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
				3188	(CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
				3189	*/
				3190	if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
				3191	&& INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
				3192	Bool is64 = INSN(31,31) == 1;
				3193	Bool isSUB = INSN(30,30) == 1;
				3194	UInt imm5 = INSN(20,16);
				3195	UInt cond = INSN(15,12);
				3196	UInt nn = INSN(9,5);
				3197	UInt nzcv = INSN(3,0);
				3198
				3199	IRTemp condT = newTemp(Ity_I1);
				3200	assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
				3201
				3202	IRType ty = is64 ? Ity_I64 : Ity_I32;
				3203	IRTemp argL = newTemp(ty);
				3204	IRTemp argR = newTemp(ty);
				3205
				3206	if (is64) {
				3207	assign(argL, getIReg64orZR(nn));
				3208	assign(argR, mkU64(imm5));
				3209	} else {
				3210	assign(argL, getIReg32orZR(nn));
				3211	assign(argR, mkU32(imm5));
				3212	}
				3213	setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
				3214
				3215	DIP("ccm%c %s, #%u, #%u, %s\n",
				3216	isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
				3217	imm5, nzcv, nameCC(cond));
				3218	return True;
				3219	}
				3220
				3221	/* ---------------- CCMP/CCMN(reg) ---------------- */
				3222	/* 31 29 20 15 11 9 3
				3223	sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
				3224	sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
				3225	Operation is:
				3226	(CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
				3227	(CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
				3228	*/
				3229	if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
				3230	&& INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
				3231	Bool is64 = INSN(31,31) == 1;
				3232	Bool isSUB = INSN(30,30) == 1;
				3233	UInt mm = INSN(20,16);
				3234	UInt cond = INSN(15,12);
				3235	UInt nn = INSN(9,5);
				3236	UInt nzcv = INSN(3,0);
				3237
				3238	IRTemp condT = newTemp(Ity_I1);
				3239	assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
				3240
				3241	IRType ty = is64 ? Ity_I64 : Ity_I32;
				3242	IRTemp argL = newTemp(ty);
				3243	IRTemp argR = newTemp(ty);
				3244
				3245	if (is64) {
				3246	assign(argL, getIReg64orZR(nn));
				3247	assign(argR, getIReg64orZR(mm));
				3248	} else {
				3249	assign(argL, getIReg32orZR(nn));
				3250	assign(argR, getIReg32orZR(mm));
				3251	}
				3252	setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
				3253
				3254	DIP("ccm%c %s, %s, #%u, %s\n",
				3255	isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
				3256	nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
				3257	return True;
				3258	}
				3259
				3260
				3261	/* -------------- REV/REV16/REV32/RBIT -------------- */
				3262	/* 31 30 28 20 15 11 9 4
				3263
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	3264	1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
				3265	0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3266
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	3267	1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
				3268	0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3269
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3270	1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
				3271	0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3272
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3273	1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3274	*/
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3275	if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3276	&& INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
				3277	UInt b31 = INSN(31,31);
				3278	UInt opc = INSN(11,10);
				3279
				3280	UInt ix = 0;
				3281	/**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
				3282	else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
				3283	else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
				3284	else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
				3285	else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
				3286	else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
				3287	else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	3288	if (ix >= 1 && ix <= 7) {
				3289	Bool is64 = ix == 1 \|\| ix == 3 \|\| ix == 5 \|\| ix == 7;
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3290	UInt nn = INSN(9,5);
				3291	UInt dd = INSN(4,0);
				3292	IRTemp src = newTemp(Ity_I64);
				3293	IRTemp dst = IRTemp_INVALID;
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	3294	IRTemp (*math)(IRTemp) = NULL;
				3295	switch (ix) {
				3296	case 1: case 2: math = math_BYTESWAP64; break;
				3297	case 3: case 4: math = math_BITSWAP64; break;
				3298	case 5: case 6: math = math_USHORTSWAP64; break;
				3299	case 7: math = math_UINTSWAP64; break;
				3300	default: vassert(0);
				3301	}
				3302	const HChar* names[7]
				3303	= { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
				3304	const HChar* nm = names[ix-1];
				3305	vassert(math);
				3306	if (ix == 6) {
				3307	/* This has to be special cased, since the logic below doesn't
				3308	handle it correctly. */
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3309	assign(src, getIReg64orZR(nn));
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	3310	dst = math(src);
				3311	putIReg64orZR(dd,
				3312	unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
				3313	} else if (is64) {
				3314	assign(src, getIReg64orZR(nn));
				3315	dst = math(src);
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3316	putIReg64orZR(dd, mkexpr(dst));
				3317	} else {
				3318	assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	3319	dst = math(src);
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3320	putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
				3321	}
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	3322	DIP("%s %s, %s\n", nm,
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3323	nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
				3324	return True;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3325	}
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3326	/* else fall through */
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3327	}
				3328
				3329	/* -------------------- CLZ/CLS -------------------- */
				3330	/* 30 28 24 20 15 9 4
				3331	sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
				3332	sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
				3333	*/
				3334	if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
				3335	&& INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
				3336	Bool is64 = INSN(31,31) == 1;
				3337	Bool isCLS = INSN(10,10) == 1;
				3338	UInt nn = INSN(9,5);
				3339	UInt dd = INSN(4,0);
				3340	IRTemp src = newTemp(Ity_I64);
				3341	IRTemp dst = newTemp(Ity_I64);
				3342	if (!isCLS) { // CLS not yet supported
				3343	if (is64) {
				3344	assign(src, getIReg64orZR(nn));
				3345	assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
				3346	mkU64(64),
				3347	unop(Iop_Clz64, mkexpr(src))));
				3348	putIReg64orZR(dd, mkexpr(dst));
				3349	} else {
				3350	assign(src, binop(Iop_Shl64,
				3351	unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
				3352	assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(src), mkU64(0)),
				3353	mkU64(32),
				3354	unop(Iop_Clz64, mkexpr(src))));
				3355	putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
				3356	}
				3357	DIP("cl%c %s, %s\n",
				3358	isCLS ? 's' : 'z', nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
				3359	return True;
				3360	}
				3361	}
				3362
				3363	/* -------------------- LSLV/LSRV/ASRV -------------------- */
				3364	/* 30 28 20 15 11 9 4
				3365	sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
				3366	sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
				3367	sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
				3368	*/
				3369	if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
				3370	&& INSN(15,12) == BITS4(0,0,1,0) && INSN(11,10) < BITS2(1,1)) {
				3371	Bool is64 = INSN(31,31) == 1;
				3372	UInt mm = INSN(20,16);
				3373	UInt op = INSN(11,10);
				3374	UInt nn = INSN(9,5);
				3375	UInt dd = INSN(4,0);
				3376	IRType ty = is64 ? Ity_I64 : Ity_I32;
				3377	IRTemp srcL = newTemp(ty);
				3378	IRTemp srcR = newTemp(Ity_I8);
				3379	IRTemp res = newTemp(ty);
				3380	IROp iop = Iop_INVALID;
				3381	assign(srcL, getIRegOrZR(is64, nn));
				3382	assign(srcR,
				3383	unop(Iop_64to8,
				3384	binop(Iop_And64,
				3385	getIReg64orZR(mm), mkU64(is64 ? 63 : 31))));
				3386	switch (op) {
				3387	case BITS2(0,0): iop = mkSHL(ty); break;
				3388	case BITS2(0,1): iop = mkSHR(ty); break;
				3389	case BITS2(1,0): iop = mkSAR(ty); break;
				3390	default: vassert(0);
				3391	}
				3392	assign(res, binop(iop, mkexpr(srcL), mkexpr(srcR)));
				3393	putIRegOrZR(is64, dd, mkexpr(res));
				3394	vassert(op < 3);
				3395	const HChar* names[3] = { "lslv", "lsrv", "asrv" };
				3396	DIP("%s %s, %s, %s\n",
				3397	names[op], nameIRegOrZR(is64,dd),
				3398	nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
				3399	return True;
				3400	}
				3401
				3402	/* -------------------- SDIV/UDIV -------------------- */
				3403	/* 30 28 20 15 10 9 4
				3404	sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
				3405	sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
				3406	*/
				3407	if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
				3408	&& INSN(15,11) == BITS5(0,0,0,0,1)) {
				3409	Bool is64 = INSN(31,31) == 1;
				3410	UInt mm = INSN(20,16);
				3411	Bool isS = INSN(10,10) == 1;
				3412	UInt nn = INSN(9,5);
				3413	UInt dd = INSN(4,0);
				3414	if (isS) {
				3415	putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
				3416	getIRegOrZR(is64, nn),
				3417	getIRegOrZR(is64, mm)));
				3418	} else {
				3419	putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
				3420	getIRegOrZR(is64, nn),
				3421	getIRegOrZR(is64, mm)));
				3422	}
				3423	DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
				3424	nameIRegOrZR(is64, dd),
				3425	nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
				3426	return True;
				3427	}
				3428
				3429	/* ------------------ {S,U}M{ADD,SUB}L ------------------ */
				3430	/* 31 23 20 15 14 9 4
				3431	1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
				3432	1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
				3433	1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
				3434	1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
				3435	with operation
				3436	Xd = Xa +/- (Wn *u/s Wm)
				3437	*/
				3438	if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
				3439	Bool isU = INSN(23,23) == 1;
				3440	UInt mm = INSN(20,16);
				3441	Bool isAdd = INSN(15,15) == 0;
				3442	UInt aa = INSN(14,10);
				3443	UInt nn = INSN(9,5);
				3444	UInt dd = INSN(4,0);
				3445	IRTemp wN = newTemp(Ity_I32);
				3446	IRTemp wM = newTemp(Ity_I32);
				3447	IRTemp xA = newTemp(Ity_I64);
				3448	IRTemp muld = newTemp(Ity_I64);
				3449	IRTemp res = newTemp(Ity_I64);
				3450	assign(wN, getIReg32orZR(nn));
				3451	assign(wM, getIReg32orZR(mm));
				3452	assign(xA, getIReg64orZR(aa));
				3453	assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
				3454	mkexpr(wN), mkexpr(wM)));
				3455	assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
				3456	mkexpr(xA), mkexpr(muld)));
				3457	putIReg64orZR(dd, mkexpr(res));
				3458	DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
				3459	nameIReg64orZR(dd), nameIReg32orZR(nn),
				3460	nameIReg32orZR(mm), nameIReg64orZR(aa));
				3461	return True;
				3462	}
				3463	vex_printf("ARM64 front end: data_processing_register\n");
				3464	return False;
				3465	# undef INSN
				3466	}
				3467
				3468
				3469	/------------------------------------------------------------/
				3470	/--- Load and Store instructions ---/
				3471	/------------------------------------------------------------/
				3472
				3473	/* Generate the EA for a "reg + reg" style amode. This is done from
				3474	parts of the insn, but for sanity checking sake it takes the whole
				3475	insn. This appears to depend on insn[15:12], with opt=insn[15:13]
				3476	and S=insn[12]:
				3477
				3478	The possible forms, along with their opt:S values, are:
				3479	011:0 Xn\|SP + Xm
				3480	111:0 Xn\|SP + Xm
				3481	011:1 Xn\|SP + Xm * transfer_szB
				3482	111:1 Xn\|SP + Xm * transfer_szB
				3483	010:0 Xn\|SP + 32Uto64(Wm)
				3484	010:1 Xn\|SP + 32Uto64(Wm) * transfer_szB
				3485	110:0 Xn\|SP + 32Sto64(Wm)
				3486	110:1 Xn\|SP + 32Sto64(Wm) * transfer_szB
				3487
				3488	Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
				3489	the transfer size is insn[23,31,30]. For integer loads/stores,
				3490	insn[23] is zero, hence szLg2 can be at most 3 in such cases.
				3491
				3492	If the decoding fails, it returns IRTemp_INVALID.
				3493
				3494	isInt is True iff this is decoding is for transfers to/from integer
				3495	registers. If False it is for transfers to/from vector registers.
				3496	*/
				3497	static IRTemp gen_indexed_EA ( /OUT/HChar* buf, UInt insn, Bool isInt )
				3498	{
				3499	UInt optS = SLICE_UInt(insn, 15, 12);
				3500	UInt mm = SLICE_UInt(insn, 20, 16);
				3501	UInt nn = SLICE_UInt(insn, 9, 5);
				3502	UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
				3503	\| SLICE_UInt(insn, 31, 30); // Log2 of the size
				3504
				3505	buf[0] = 0;
				3506
				3507	/* Sanity checks, that this really is a load/store insn. */
				3508	if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
				3509	goto fail;
				3510
				3511	if (isInt
				3512	&& SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/LDR/
				3513	&& SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/STR/
				3514	&& SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/LDRSbhw Xt/
				3515	&& SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/LDRSbhw Wt/
				3516	goto fail;
				3517
				3518	if (!isInt
				3519	&& SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /LDR/STR/
				3520	goto fail;
				3521
				3522	/* Throw out non-verified but possibly valid cases. */
				3523	switch (szLg2) {
				3524	case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
				3525	case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
				3526	case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
				3527	case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
				3528	case BITS3(1,0,0): // can only ever be valid for the vector case
				3529	if (isInt) goto fail; else goto fail;
				3530	case BITS3(1,0,1): // these sizes are never valid
				3531	case BITS3(1,1,0):
				3532	case BITS3(1,1,1): goto fail;
				3533
				3534	default: vassert(0);
				3535	}
				3536
				3537	IRExpr* rhs = NULL;
				3538	switch (optS) {
				3539	case BITS4(1,1,1,0): goto fail; //ATC
				3540	case BITS4(0,1,1,0):
				3541	rhs = getIReg64orZR(mm);
				3542	vex_sprintf(buf, "[%s, %s]",
				3543	nameIReg64orZR(nn), nameIReg64orZR(mm));
				3544	break;
				3545	case BITS4(1,1,1,1): goto fail; //ATC
				3546	case BITS4(0,1,1,1):
				3547	rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
				3548	vex_sprintf(buf, "[%s, %s lsl %u]",
				3549	nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
				3550	break;
				3551	case BITS4(0,1,0,0):
				3552	rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
				3553	vex_sprintf(buf, "[%s, %s uxtx]",
				3554	nameIReg64orZR(nn), nameIReg32orZR(mm));
				3555	break;
				3556	case BITS4(0,1,0,1):
				3557	rhs = binop(Iop_Shl64,
				3558	unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
				3559	vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
				3560	nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
				3561	break;
				3562	case BITS4(1,1,0,0):
				3563	rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
				3564	vex_sprintf(buf, "[%s, %s sxtx]",
				3565	nameIReg64orZR(nn), nameIReg32orZR(mm));
				3566	break;
				3567	case BITS4(1,1,0,1):
				3568	rhs = binop(Iop_Shl64,
				3569	unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
				3570	vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
				3571	nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
				3572	break;
				3573	default:
				3574	/* The rest appear to be genuinely invalid */
				3575	goto fail;
				3576	}
				3577
				3578	vassert(rhs);
				3579	IRTemp res = newTemp(Ity_I64);
				3580	assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
				3581	return res;
				3582
				3583	fail:
				3584	vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
				3585	return IRTemp_INVALID;
				3586	}
				3587
				3588
				3589	/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
				3590	bits of DATAE :: Ity_I64. */
				3591	static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
				3592	{
				3593	IRExpr* addrE = mkexpr(addr);
				3594	switch (szB) {
				3595	case 8:
				3596	storeLE(addrE, dataE);
				3597	break;
				3598	case 4:
				3599	storeLE(addrE, unop(Iop_64to32, dataE));
				3600	break;
				3601	case 2:
				3602	storeLE(addrE, unop(Iop_64to16, dataE));
				3603	break;
				3604	case 1:
				3605	storeLE(addrE, unop(Iop_64to8, dataE));
				3606	break;
				3607	default:
				3608	vassert(0);
				3609	}
				3610	}
				3611
				3612
				3613	/* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
				3614	placing the result in an Ity_I64 temporary. */
				3615	static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
				3616	{
				3617	IRTemp res = newTemp(Ity_I64);
				3618	IRExpr* addrE = mkexpr(addr);
				3619	switch (szB) {
				3620	case 8:
				3621	assign(res, loadLE(Ity_I64,addrE));
				3622	break;
				3623	case 4:
				3624	assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
				3625	break;
				3626	case 2:
				3627	assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
				3628	break;
				3629	case 1:
				3630	assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
				3631	break;
				3632	default:
				3633	vassert(0);
				3634	}
				3635	return res;
				3636	}
				3637
				3638
sewardj	18bf517	2014-06-14 18:05:30 +0000	[diff] [blame]	3639	/* Generate a "standard 7" name, from bitQ and size. But also
				3640	allow ".1d" since that's occasionally useful. */
				3641	static
				3642	const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
				3643	{
				3644	vassert(bitQ <= 1 && size <= 3);
				3645	const HChar* nms[8]
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	3646	= { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
sewardj	18bf517	2014-06-14 18:05:30 +0000	[diff] [blame]	3647	UInt ix = (bitQ << 2) \| size;
				3648	vassert(ix < 8);
				3649	return nms[ix];
				3650	}
				3651
				3652
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3653	static
				3654	Bool dis_ARM64_load_store(/MB_OUT/DisResult* dres, UInt insn)
				3655	{
				3656	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				3657
				3658	/* ------------ LDR,STR (immediate, uimm12) ----------- */
				3659	/* uimm12 is scaled by the transfer size
				3660
				3661	31 29 26 21 9 4
				3662	\| \| \| \| \| \|
				3663	11 111 00100 imm12 nn tt STR Xt, [Xn\|SP, #imm12 * 8]
				3664	11 111 00101 imm12 nn tt LDR Xt, [Xn\|SP, #imm12 * 8]
				3665
				3666	10 111 00100 imm12 nn tt STR Wt, [Xn\|SP, #imm12 * 4]
				3667	10 111 00101 imm12 nn tt LDR Wt, [Xn\|SP, #imm12 * 4]
				3668
				3669	01 111 00100 imm12 nn tt STRH Wt, [Xn\|SP, #imm12 * 2]
				3670	01 111 00101 imm12 nn tt LDRH Wt, [Xn\|SP, #imm12 * 2]
				3671
				3672	00 111 00100 imm12 nn tt STRB Wt, [Xn\|SP, #imm12 * 1]
				3673	00 111 00101 imm12 nn tt LDRB Wt, [Xn\|SP, #imm12 * 1]
				3674	*/
				3675	if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
				3676	UInt szLg2 = INSN(31,30);
				3677	UInt szB = 1 << szLg2;
				3678	Bool isLD = INSN(22,22) == 1;
				3679	UInt offs = INSN(21,10) * szB;
				3680	UInt nn = INSN(9,5);
				3681	UInt tt = INSN(4,0);
				3682	IRTemp ta = newTemp(Ity_I64);
				3683	assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
				3684	if (nn == 31) { /* FIXME generate stack alignment check */ }
				3685	vassert(szLg2 < 4);
				3686	if (isLD) {
				3687	putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
				3688	} else {
				3689	gen_narrowing_store(szB, ta, getIReg64orZR(tt));
				3690	}
				3691	const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
				3692	const HChar* st_name[4] = { "strb", "strh", "str", "str" };
				3693	DIP("%s %s, [%s, #%u]\n",
				3694	(isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
				3695	nameIReg64orSP(nn), offs);
				3696	return True;
				3697	}
				3698
				3699	/* ------------ LDUR,STUR (immediate, simm9) ----------- */
				3700	/*
				3701	31 29 26 20 11 9 4
				3702	\| \| \| \| \| \| \|
				3703	(at-Rn-then-Rn=EA) \| \| \|
				3704	sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn\|SP], #simm9
				3705	sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn\|SP], #simm9
				3706
				3707	(at-EA-then-Rn=EA)
				3708	sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn\|SP, #simm9]!
				3709	sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn\|SP, #simm9]!
				3710
				3711	(at-EA)
				3712	sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn\|SP, #simm9]
				3713	sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn\|SP, #simm9]
				3714
				3715	simm9 is unscaled.
				3716
				3717	The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
				3718	load case this is because would create two competing values for
				3719	Rt. In the store case the reason is unclear, but the spec
				3720	disallows it anyway.
				3721
				3722	Stores are narrowing, loads are unsigned widening. sz encodes
				3723	the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
				3724	*/
				3725	if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
				3726	== BITS9(1,1,1, 0,0,0,0,0, 0)) {
				3727	UInt szLg2 = INSN(31,30);
				3728	UInt szB = 1 << szLg2;
				3729	Bool isLoad = INSN(22,22) == 1;
				3730	UInt imm9 = INSN(20,12);
				3731	UInt nn = INSN(9,5);
				3732	UInt tt = INSN(4,0);
				3733	Bool wBack = INSN(10,10) == 1;
				3734	UInt how = INSN(11,10);
				3735	if (how == BITS2(1,0) \|\| (wBack && nn == tt && tt != 31)) {
				3736	/* undecodable; fall through */
				3737	} else {
				3738	if (nn == 31) { /* FIXME generate stack alignment check */ }
				3739
				3740	// Compute the transfer address TA and the writeback address WA.
				3741	IRTemp tRN = newTemp(Ity_I64);
				3742	assign(tRN, getIReg64orSP(nn));
				3743	IRTemp tEA = newTemp(Ity_I64);
				3744	Long simm9 = (Long)sx_to_64(imm9, 9);
				3745	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
				3746
				3747	IRTemp tTA = newTemp(Ity_I64);
				3748	IRTemp tWA = newTemp(Ity_I64);
				3749	switch (how) {
				3750	case BITS2(0,1):
				3751	assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
				3752	case BITS2(1,1):
				3753	assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
				3754	case BITS2(0,0):
				3755	assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
				3756	default:
				3757	vassert(0); /* NOTREACHED */
				3758	}
				3759
sewardj	e0bff8b	2014-03-09 09:40:23 +0000	[diff] [blame]	3760	/* Normally rN would be updated after the transfer. However, in
				3761	the special case typifed by
				3762	str x30, [sp,#-16]!
				3763	it is necessary to update SP before the transfer, (1)
				3764	because Memcheck will otherwise complain about a write
				3765	below the stack pointer, and (2) because the segfault
				3766	stack extension mechanism will otherwise extend the stack
				3767	only down to SP before the instruction, which might not be
				3768	far enough, if the -16 bit takes the actual access
				3769	address to the next page.
				3770	*/
				3771	Bool earlyWBack
				3772	= wBack && simm9 < 0 && szB == 8
				3773	&& how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
				3774
				3775	if (wBack && earlyWBack)
				3776	putIReg64orSP(nn, mkexpr(tEA));
				3777
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3778	if (isLoad) {
				3779	putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
				3780	} else {
				3781	gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
				3782	}
				3783
sewardj	e0bff8b	2014-03-09 09:40:23 +0000	[diff] [blame]	3784	if (wBack && !earlyWBack)
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3785	putIReg64orSP(nn, mkexpr(tEA));
				3786
				3787	const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
				3788	const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
				3789	const HChar* fmt_str = NULL;
				3790	switch (how) {
				3791	case BITS2(0,1):
				3792	fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
				3793	break;
				3794	case BITS2(1,1):
				3795	fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
				3796	break;
				3797	case BITS2(0,0):
				3798	fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
				3799	break;
				3800	default:
				3801	vassert(0);
				3802	}
				3803	DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
				3804	nameIRegOrZR(szB == 8, tt),
				3805	nameIReg64orSP(nn), simm9);
				3806	return True;
				3807	}
				3808	}
				3809
				3810	/* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
				3811	/* L==1 => mm==LD
				3812	L==0 => mm==ST
				3813	x==0 => 32 bit transfers, and zero extended loads
				3814	x==1 => 64 bit transfers
				3815	simm7 is scaled by the (single-register) transfer size
				3816
				3817	(at-Rn-then-Rn=EA)
				3818	x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn\|SP], #imm
				3819
				3820	(at-EA-then-Rn=EA)
				3821	x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn\|SP, #imm]!
				3822
				3823	(at-EA)
				3824	x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn\|SP, #imm]
				3825	*/
				3826
				3827	UInt insn_30_23 = INSN(30,23);
				3828	if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
				3829	\|\| insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
				3830	\|\| insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
				3831	UInt bL = INSN(22,22);
				3832	UInt bX = INSN(31,31);
				3833	UInt bWBack = INSN(23,23);
				3834	UInt rT1 = INSN(4,0);
				3835	UInt rN = INSN(9,5);
				3836	UInt rT2 = INSN(14,10);
				3837	Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
				3838	if ((bWBack && (rT1 == rN \|\| rT2 == rN) && rN != 31)
				3839	\|\| (bL && rT1 == rT2)) {
				3840	/* undecodable; fall through */
				3841	} else {
				3842	if (rN == 31) { /* FIXME generate stack alignment check */ }
				3843
				3844	// Compute the transfer address TA and the writeback address WA.
				3845	IRTemp tRN = newTemp(Ity_I64);
				3846	assign(tRN, getIReg64orSP(rN));
				3847	IRTemp tEA = newTemp(Ity_I64);
				3848	simm7 = (bX ? 8 : 4) * simm7;
				3849	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
				3850
				3851	IRTemp tTA = newTemp(Ity_I64);
				3852	IRTemp tWA = newTemp(Ity_I64);
				3853	switch (INSN(24,23)) {
				3854	case BITS2(0,1):
				3855	assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
				3856	case BITS2(1,1):
				3857	assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
				3858	case BITS2(1,0):
				3859	assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
				3860	default:
				3861	vassert(0); /* NOTREACHED */
				3862	}
				3863
				3864	/* Normally rN would be updated after the transfer. However, in
				3865	the special case typifed by
				3866	stp x29, x30, [sp,#-112]!
				3867	it is necessary to update SP before the transfer, (1)
				3868	because Memcheck will otherwise complain about a write
				3869	below the stack pointer, and (2) because the segfault
				3870	stack extension mechanism will otherwise extend the stack
				3871	only down to SP before the instruction, which might not be
				3872	far enough, if the -112 bit takes the actual access
				3873	address to the next page.
				3874	*/
				3875	Bool earlyWBack
				3876	= bWBack && simm7 < 0
				3877	&& INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
				3878
				3879	if (bWBack && earlyWBack)
				3880	putIReg64orSP(rN, mkexpr(tEA));
				3881
				3882	/**/ if (bL == 1 && bX == 1) {
				3883	// 64 bit load
				3884	putIReg64orZR(rT1, loadLE(Ity_I64,
				3885	binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
				3886	putIReg64orZR(rT2, loadLE(Ity_I64,
				3887	binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
				3888	} else if (bL == 1 && bX == 0) {
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3889	// 32 bit load
				3890	putIReg32orZR(rT1, loadLE(Ity_I32,
				3891	binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
				3892	putIReg32orZR(rT2, loadLE(Ity_I32,
				3893	binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
				3894	} else if (bL == 0 && bX == 1) {
				3895	// 64 bit store
				3896	storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
				3897	getIReg64orZR(rT1));
				3898	storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
				3899	getIReg64orZR(rT2));
				3900	} else {
				3901	vassert(bL == 0 && bX == 0);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3902	// 32 bit store
				3903	storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
				3904	getIReg32orZR(rT1));
				3905	storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
				3906	getIReg32orZR(rT2));
				3907	}
				3908
				3909	if (bWBack && !earlyWBack)
				3910	putIReg64orSP(rN, mkexpr(tEA));
				3911
				3912	const HChar* fmt_str = NULL;
				3913	switch (INSN(24,23)) {
				3914	case BITS2(0,1):
				3915	fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
				3916	break;
				3917	case BITS2(1,1):
				3918	fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
				3919	break;
				3920	case BITS2(1,0):
				3921	fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
				3922	break;
				3923	default:
				3924	vassert(0);
				3925	}
				3926	DIP(fmt_str, bL == 0 ? "st" : "ld",
				3927	nameIRegOrZR(bX == 1, rT1),
				3928	nameIRegOrZR(bX == 1, rT2),
				3929	nameIReg64orSP(rN), simm7);
				3930	return True;
				3931	}
				3932	}
				3933
				3934	/* ---------------- LDR (literal, int reg) ---------------- */
				3935	/* 31 29 23 4
				3936	00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
				3937	01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
				3938	10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
				3939	11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
				3940	Just handles the first two cases for now.
				3941	*/
				3942	if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
				3943	UInt imm19 = INSN(23,5);
				3944	UInt rT = INSN(4,0);
				3945	UInt bX = INSN(30,30);
				3946	ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
				3947	if (bX) {
				3948	putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
				3949	} else {
				3950	putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
				3951	}
				3952	DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
				3953	return True;
				3954	}
				3955
				3956	/* -------------- {LD,ST}R (integer register) --------------- */
				3957	/* 31 29 20 15 12 11 9 4
				3958	\| \| \| \| \| \| \| \|
				3959	11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn\|SP, R<m>{ext/sh}]
				3960	10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn\|SP, R<m>{ext/sh}]
				3961	01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn\|SP, R<m>{ext/sh}]
				3962	00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn\|SP, R<m>{ext/sh}]
				3963
				3964	11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn\|SP, R<m>{ext/sh}]
				3965	10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn\|SP, R<m>{ext/sh}]
				3966	01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn\|SP, R<m>{ext/sh}]
				3967	00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn\|SP, R<m>{ext/sh}]
				3968	*/
				3969	if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
				3970	&& INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
				3971	HChar dis_buf[64];
				3972	UInt szLg2 = INSN(31,30);
				3973	Bool isLD = INSN(22,22) == 1;
				3974	UInt tt = INSN(4,0);
				3975	IRTemp ea = gen_indexed_EA(dis_buf, insn, True/to/from int regs/);
				3976	if (ea != IRTemp_INVALID) {
				3977	switch (szLg2) {
				3978	case 3: /* 64 bit */
				3979	if (isLD) {
				3980	putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
				3981	DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
				3982	} else {
				3983	storeLE(mkexpr(ea), getIReg64orZR(tt));
				3984	DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
				3985	}
				3986	break;
				3987	case 2: /* 32 bit */
				3988	if (isLD) {
				3989	putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
				3990	DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
				3991	} else {
				3992	storeLE(mkexpr(ea), getIReg32orZR(tt));
				3993	DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
				3994	}
				3995	break;
				3996	case 1: /* 16 bit */
				3997	if (isLD) {
				3998	putIReg64orZR(tt, unop(Iop_16Uto64,
				3999	loadLE(Ity_I16, mkexpr(ea))));
				4000	DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
				4001	} else {
				4002	storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
				4003	DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
				4004	}
				4005	break;
				4006	case 0: /* 8 bit */
				4007	if (isLD) {
				4008	putIReg64orZR(tt, unop(Iop_8Uto64,
				4009	loadLE(Ity_I8, mkexpr(ea))));
				4010	DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
				4011	} else {
				4012	storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
				4013	DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
				4014	}
				4015	break;
				4016	default:
				4017	vassert(0);
				4018	}
				4019	return True;
				4020	}
				4021	}
				4022
				4023	/* -------------- LDRS{B,H,W} (uimm12) -------------- */
				4024	/* 31 29 26 23 21 9 4
				4025	10 111 001 10 imm12 n t LDRSW Xt, [Xn\|SP, #pimm12 * 4]
				4026	01 111 001 1x imm12 n t LDRSH Rt, [Xn\|SP, #pimm12 * 2]
				4027	00 111 001 1x imm12 n t LDRSB Rt, [Xn\|SP, #pimm12 * 1]
				4028	where
				4029	Rt is Wt when x==1, Xt when x==0
				4030	*/
				4031	if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
				4032	/* Further checks on bits 31:30 and 22 */
				4033	Bool valid = False;
				4034	switch ((INSN(31,30) << 1) \| INSN(22,22)) {
				4035	case BITS3(1,0,0):
				4036	case BITS3(0,1,0): case BITS3(0,1,1):
				4037	case BITS3(0,0,0): case BITS3(0,0,1):
				4038	valid = True;
				4039	break;
				4040	}
				4041	if (valid) {
				4042	UInt szLg2 = INSN(31,30);
				4043	UInt bitX = INSN(22,22);
				4044	UInt imm12 = INSN(21,10);
				4045	UInt nn = INSN(9,5);
				4046	UInt tt = INSN(4,0);
				4047	UInt szB = 1 << szLg2;
				4048	IRExpr* ea = binop(Iop_Add64,
				4049	getIReg64orSP(nn), mkU64(imm12 * szB));
				4050	switch (szB) {
				4051	case 4:
				4052	vassert(bitX == 0);
				4053	putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
				4054	DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
				4055	nameIReg64orSP(nn), imm12 * szB);
				4056	break;
				4057	case 2:
				4058	if (bitX == 1) {
				4059	putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
				4060	} else {
				4061	putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
				4062	}
				4063	DIP("ldrsh %s, [%s, #%u]\n",
				4064	nameIRegOrZR(bitX == 0, tt),
				4065	nameIReg64orSP(nn), imm12 * szB);
				4066	break;
				4067	case 1:
				4068	if (bitX == 1) {
				4069	putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
				4070	} else {
				4071	putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
				4072	}
				4073	DIP("ldrsb %s, [%s, #%u]\n",
				4074	nameIRegOrZR(bitX == 0, tt),
				4075	nameIReg64orSP(nn), imm12 * szB);
				4076	break;
				4077	default:
				4078	vassert(0);
				4079	}
				4080	return True;
				4081	}
				4082	/* else fall through */
				4083	}
				4084
				4085	/* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
				4086	/* (at-Rn-then-Rn=EA)
				4087	31 29 23 21 20 11 9 4
				4088	00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn\|SP], #simm9
				4089	01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn\|SP], #simm9
				4090	10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn\|SP], #simm9
				4091
				4092	(at-EA-then-Rn=EA)
				4093	00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn\|SP, #simm9]!
				4094	01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn\|SP, #simm9]!
				4095	10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn\|SP, #simm9]!
				4096	where
				4097	Rt is Wt when x==1, Xt when x==0
				4098	transfer-at-Rn when [11]==0, at EA when [11]==1
				4099	*/
				4100	if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
				4101	&& INSN(21,21) == 0 && INSN(10,10) == 1) {
				4102	/* Further checks on bits 31:30 and 22 */
				4103	Bool valid = False;
				4104	switch ((INSN(31,30) << 1) \| INSN(22,22)) {
				4105	case BITS3(1,0,0): // LDRSW Xt
				4106	case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
				4107	case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
				4108	valid = True;
				4109	break;
				4110	}
				4111	if (valid) {
				4112	UInt szLg2 = INSN(31,30);
				4113	UInt imm9 = INSN(20,12);
				4114	Bool atRN = INSN(11,11) == 0;
				4115	UInt nn = INSN(9,5);
				4116	UInt tt = INSN(4,0);
				4117	IRTemp tRN = newTemp(Ity_I64);
				4118	IRTemp tEA = newTemp(Ity_I64);
				4119	IRTemp tTA = IRTemp_INVALID;
				4120	ULong simm9 = sx_to_64(imm9, 9);
				4121	Bool is64 = INSN(22,22) == 0;
				4122	assign(tRN, getIReg64orSP(nn));
				4123	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
				4124	tTA = atRN ? tRN : tEA;
				4125	HChar ch = '?';
				4126	/* There are 5 cases:
				4127	byte load, SX to 64
				4128	byte load, SX to 32, ZX to 64
				4129	halfword load, SX to 64
				4130	halfword load, SX to 32, ZX to 64
				4131	word load, SX to 64
				4132	The ifs below handle them in the listed order.
				4133	*/
				4134	if (szLg2 == 0) {
				4135	ch = 'b';
				4136	if (is64) {
				4137	putIReg64orZR(tt, unop(Iop_8Sto64,
				4138	loadLE(Ity_I8, mkexpr(tTA))));
				4139	} else {
				4140	putIReg32orZR(tt, unop(Iop_8Sto32,
				4141	loadLE(Ity_I8, mkexpr(tTA))));
				4142	}
				4143	}
				4144	else if (szLg2 == 1) {
				4145	ch = 'h';
				4146	if (is64) {
				4147	putIReg64orZR(tt, unop(Iop_16Sto64,
				4148	loadLE(Ity_I16, mkexpr(tTA))));
				4149	} else {
				4150	putIReg32orZR(tt, unop(Iop_16Sto32,
				4151	loadLE(Ity_I16, mkexpr(tTA))));
				4152	}
				4153	}
				4154	else if (szLg2 == 2 && is64) {
				4155	ch = 'w';
				4156	putIReg64orZR(tt, unop(Iop_32Sto64,
				4157	loadLE(Ity_I32, mkexpr(tTA))));
				4158	}
				4159	else {
				4160	vassert(0);
				4161	}
				4162	putIReg64orSP(nn, mkexpr(tEA));
				4163	DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
				4164	ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
				4165	return True;
				4166	}
				4167	/* else fall through */
				4168	}
				4169
				4170	/* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
				4171	/* 31 29 23 21 20 11 9 4
				4172	00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn\|SP, #simm9]
				4173	01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn\|SP, #simm9]
				4174	10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn\|SP, #simm9]
				4175	where
				4176	Rt is Wt when x==1, Xt when x==0
				4177	*/
				4178	if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
				4179	&& INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
				4180	/* Further checks on bits 31:30 and 22 */
				4181	Bool valid = False;
				4182	switch ((INSN(31,30) << 1) \| INSN(22,22)) {
				4183	case BITS3(1,0,0): // LDURSW Xt
				4184	case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
				4185	case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
				4186	valid = True;
				4187	break;
				4188	}
				4189	if (valid) {
				4190	UInt szLg2 = INSN(31,30);
				4191	UInt imm9 = INSN(20,12);
				4192	UInt nn = INSN(9,5);
				4193	UInt tt = INSN(4,0);
				4194	IRTemp tRN = newTemp(Ity_I64);
				4195	IRTemp tEA = newTemp(Ity_I64);
				4196	ULong simm9 = sx_to_64(imm9, 9);
				4197	Bool is64 = INSN(22,22) == 0;
				4198	assign(tRN, getIReg64orSP(nn));
				4199	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
				4200	HChar ch = '?';
				4201	/* There are 5 cases:
				4202	byte load, SX to 64
				4203	byte load, SX to 32, ZX to 64
				4204	halfword load, SX to 64
				4205	halfword load, SX to 32, ZX to 64
				4206	word load, SX to 64
				4207	The ifs below handle them in the listed order.
				4208	*/
				4209	if (szLg2 == 0) {
				4210	ch = 'b';
				4211	if (is64) {
				4212	putIReg64orZR(tt, unop(Iop_8Sto64,
				4213	loadLE(Ity_I8, mkexpr(tEA))));
				4214	} else {
				4215	putIReg32orZR(tt, unop(Iop_8Sto32,
				4216	loadLE(Ity_I8, mkexpr(tEA))));
				4217	}
				4218	}
				4219	else if (szLg2 == 1) {
				4220	ch = 'h';
				4221	if (is64) {
				4222	putIReg64orZR(tt, unop(Iop_16Sto64,
				4223	loadLE(Ity_I16, mkexpr(tEA))));
				4224	} else {
				4225	putIReg32orZR(tt, unop(Iop_16Sto32,
				4226	loadLE(Ity_I16, mkexpr(tEA))));
				4227	}
				4228	}
				4229	else if (szLg2 == 2 && is64) {
				4230	ch = 'w';
				4231	putIReg64orZR(tt, unop(Iop_32Sto64,
				4232	loadLE(Ity_I32, mkexpr(tEA))));
				4233	}
				4234	else {
				4235	vassert(0);
				4236	}
				4237	DIP("ldurs%c %s, [%s, #%lld]",
				4238	ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
				4239	return True;
				4240	}
				4241	/* else fall through */
				4242	}
				4243
				4244	/* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
				4245	/* L==1 => mm==LD
				4246	L==0 => mm==ST
				4247	sz==00 => 32 bit (S) transfers
				4248	sz==01 => 64 bit (D) transfers
				4249	sz==10 => 128 bit (Q) transfers
				4250	sz==11 isn't allowed
				4251	simm7 is scaled by the (single-register) transfer size
				4252
				4253	31 29 22 21 14 9 4
				4254	sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn\|SP], #imm
				4255	(at-Rn-then-Rn=EA)
				4256
				4257	sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn\|SP, #imm]!
				4258	(at-EA-then-Rn=EA)
				4259
				4260	sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn\|SP, #imm]
				4261	(at-EA)
				4262	*/
				4263
				4264	UInt insn_29_23 = INSN(29,23);
				4265	if (insn_29_23 == BITS7(1,0,1,1,0,0,1)
				4266	\|\| insn_29_23 == BITS7(1,0,1,1,0,1,1)
				4267	\|\| insn_29_23 == BITS7(1,0,1,1,0,1,0)) {
				4268	UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
				4269	Bool isLD = INSN(22,22) == 1;
				4270	Bool wBack = INSN(23,23) == 1;
				4271	Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
				4272	UInt tt2 = INSN(14,10);
				4273	UInt nn = INSN(9,5);
				4274	UInt tt1 = INSN(4,0);
				4275	if (szSlg2 == BITS2(1,1) \|\| (isLD && tt1 == tt2)) {
				4276	/* undecodable; fall through */
				4277	} else {
				4278	if (nn == 31) { /* FIXME generate stack alignment check */ }
				4279
				4280	// Compute the transfer address TA and the writeback address WA.
				4281	UInt szB = 4 << szSlg2; /* szB is the per-register size */
				4282	IRTemp tRN = newTemp(Ity_I64);
				4283	assign(tRN, getIReg64orSP(nn));
				4284	IRTemp tEA = newTemp(Ity_I64);
				4285	simm7 = szB * simm7;
				4286	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
				4287
				4288	IRTemp tTA = newTemp(Ity_I64);
				4289	IRTemp tWA = newTemp(Ity_I64);
				4290	switch (INSN(24,23)) {
				4291	case BITS2(0,1):
				4292	assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
				4293	case BITS2(1,1):
				4294	assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
				4295	case BITS2(1,0):
				4296	assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
				4297	default:
				4298	vassert(0); /* NOTREACHED */
				4299	}
				4300
				4301	IRType ty = Ity_INVALID;
				4302	switch (szB) {
				4303	case 4: ty = Ity_F32; break;
				4304	case 8: ty = Ity_F64; break;
				4305	case 16: ty = Ity_V128; break;
				4306	default: vassert(0);
				4307	}
				4308
sewardj	e0bff8b	2014-03-09 09:40:23 +0000	[diff] [blame]	4309	/* Normally rN would be updated after the transfer. However, in
sewardj	1955143	2014-05-07 09:20:11 +0000	[diff] [blame]	4310	the special cases typifed by
sewardj	e0bff8b	2014-03-09 09:40:23 +0000	[diff] [blame]	4311	stp q0, q1, [sp,#-512]!
sewardj	1955143	2014-05-07 09:20:11 +0000	[diff] [blame]	4312	stp d0, d1, [sp,#-512]!
				4313	stp s0, s1, [sp,#-512]!
sewardj	e0bff8b	2014-03-09 09:40:23 +0000	[diff] [blame]	4314	it is necessary to update SP before the transfer, (1)
				4315	because Memcheck will otherwise complain about a write
				4316	below the stack pointer, and (2) because the segfault
				4317	stack extension mechanism will otherwise extend the stack
				4318	only down to SP before the instruction, which might not be
				4319	far enough, if the -512 bit takes the actual access
				4320	address to the next page.
				4321	*/
				4322	Bool earlyWBack
sewardj	1955143	2014-05-07 09:20:11 +0000	[diff] [blame]	4323	= wBack && simm7 < 0
sewardj	e0bff8b	2014-03-09 09:40:23 +0000	[diff] [blame]	4324	&& INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
				4325
				4326	if (wBack && earlyWBack)
				4327	putIReg64orSP(nn, mkexpr(tEA));
				4328
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4329	if (isLD) {
sewardj	5ba4130	2014-03-03 08:42:16 +0000	[diff] [blame]	4330	if (szB < 16) {
				4331	putQReg128(tt1, mkV128(0x0000));
				4332	}
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4333	putQRegLO(tt1,
				4334	loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
sewardj	5ba4130	2014-03-03 08:42:16 +0000	[diff] [blame]	4335	if (szB < 16) {
				4336	putQReg128(tt2, mkV128(0x0000));
				4337	}
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4338	putQRegLO(tt2,
				4339	loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4340	} else {
				4341	storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4342	getQRegLO(tt1, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4343	storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4344	getQRegLO(tt2, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4345	}
				4346
sewardj	e0bff8b	2014-03-09 09:40:23 +0000	[diff] [blame]	4347	if (wBack && !earlyWBack)
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4348	putIReg64orSP(nn, mkexpr(tEA));
				4349
				4350	const HChar* fmt_str = NULL;
				4351	switch (INSN(24,23)) {
				4352	case BITS2(0,1):
				4353	fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
				4354	break;
				4355	case BITS2(1,1):
				4356	fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
				4357	break;
				4358	case BITS2(1,0):
				4359	fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
				4360	break;
				4361	default:
				4362	vassert(0);
				4363	}
				4364	DIP(fmt_str, isLD ? "ld" : "st",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4365	nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4366	nameIReg64orSP(nn), simm7);
				4367	return True;
				4368	}
				4369	}
				4370
				4371	/* -------------- {LD,ST}R (vector register) --------------- */
				4372	/* 31 29 23 20 15 12 11 9 4
				4373	\| \| \| \| \| \| \| \| \|
				4374	00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn\|SP, R<m>{ext/sh}]
				4375	01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn\|SP, R<m>{ext/sh}]
				4376	10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn\|SP, R<m>{ext/sh}]
				4377	11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn\|SP, R<m>{ext/sh}]
				4378	00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn\|SP, R<m>{ext/sh}]
				4379
				4380	00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn\|SP, R<m>{ext/sh}]
				4381	01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn\|SP, R<m>{ext/sh}]
				4382	10 111100 001 Rm option S 10 Rn Rt STR St, [Xn\|SP, R<m>{ext/sh}]
				4383	11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn\|SP, R<m>{ext/sh}]
				4384	00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn\|SP, R<m>{ext/sh}]
				4385	*/
				4386	if (INSN(29,24) == BITS6(1,1,1,1,0,0)
				4387	&& INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
				4388	HChar dis_buf[64];
				4389	UInt szLg2 = (INSN(23,23) << 2) \| INSN(31,30);
				4390	Bool isLD = INSN(22,22) == 1;
				4391	UInt tt = INSN(4,0);
				4392	if (szLg2 >= 4) goto after_LDR_STR_vector_register;
				4393	IRTemp ea = gen_indexed_EA(dis_buf, insn, False/to/from vec regs/);
				4394	if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
				4395	switch (szLg2) {
				4396	case 0: /* 8 bit */
				4397	if (isLD) {
				4398	putQReg128(tt, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4399	putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
				4400	DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4401	} else {
				4402	vassert(0); //ATC
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4403	storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
				4404	DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4405	}
				4406	break;
				4407	case 1:
				4408	if (isLD) {
				4409	putQReg128(tt, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4410	putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
				4411	DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4412	} else {
				4413	vassert(0); //ATC
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4414	storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
				4415	DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4416	}
				4417	break;
				4418	case 2: /* 32 bit */
				4419	if (isLD) {
				4420	putQReg128(tt, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4421	putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
				4422	DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4423	} else {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4424	storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
				4425	DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4426	}
				4427	break;
				4428	case 3: /* 64 bit */
				4429	if (isLD) {
				4430	putQReg128(tt, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4431	putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
				4432	DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4433	} else {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4434	storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
				4435	DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4436	}
				4437	break;
				4438	case 4: return False; //ATC
				4439	default: vassert(0);
				4440	}
				4441	return True;
				4442	}
				4443	after_LDR_STR_vector_register:
				4444
				4445	/* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
				4446	/* 31 29 22 20 15 12 11 9 4
				4447	\| \| \| \| \| \| \| \| \|
				4448	10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn\|SP, R<m>{ext/sh}]
				4449
				4450	01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn\|SP, R<m>{ext/sh}]
				4451	01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn\|SP, R<m>{ext/sh}]
				4452
				4453	00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn\|SP, R<m>{ext/sh}]
				4454	00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn\|SP, R<m>{ext/sh}]
				4455	*/
				4456	if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
				4457	&& INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
				4458	HChar dis_buf[64];
				4459	UInt szLg2 = INSN(31,30);
				4460	Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
				4461	UInt tt = INSN(4,0);
				4462	if (szLg2 == 3) goto after_LDRS_integer_register;
				4463	IRTemp ea = gen_indexed_EA(dis_buf, insn, True/to/from int regs/);
				4464	if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
				4465	/* Enumerate the 5 variants explicitly. */
				4466	if (szLg2 == 2/32 bit/ && sxTo64) {
				4467	putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
				4468	DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
				4469	return True;
				4470	}
				4471	else
				4472	if (szLg2 == 1/16 bit/) {
				4473	if (sxTo64) {
				4474	putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
				4475	DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
				4476	} else {
				4477	putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
				4478	DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
				4479	}
				4480	return True;
				4481	}
				4482	else
				4483	if (szLg2 == 0/8 bit/) {
				4484	if (sxTo64) {
				4485	putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
				4486	DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
				4487	} else {
				4488	putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
				4489	DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
				4490	}
				4491	return True;
				4492	}
				4493	/* else it's an invalid combination */
				4494	}
				4495	after_LDRS_integer_register:
				4496
				4497	/* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
				4498	/* This is the Unsigned offset variant only. The Post-Index and
				4499	Pre-Index variants are below.
				4500
				4501	31 29 23 21 9 4
				4502	00 111 101 01 imm12 n t LDR Bt, [Xn\|SP + imm12 * 1]
				4503	01 111 101 01 imm12 n t LDR Ht, [Xn\|SP + imm12 * 2]
				4504	10 111 101 01 imm12 n t LDR St, [Xn\|SP + imm12 * 4]
				4505	11 111 101 01 imm12 n t LDR Dt, [Xn\|SP + imm12 * 8]
				4506	00 111 101 11 imm12 n t LDR Qt, [Xn\|SP + imm12 * 16]
				4507
				4508	00 111 101 00 imm12 n t STR Bt, [Xn\|SP + imm12 * 1]
				4509	01 111 101 00 imm12 n t STR Ht, [Xn\|SP + imm12 * 2]
				4510	10 111 101 00 imm12 n t STR St, [Xn\|SP + imm12 * 4]
				4511	11 111 101 00 imm12 n t STR Dt, [Xn\|SP + imm12 * 8]
				4512	00 111 101 10 imm12 n t STR Qt, [Xn\|SP + imm12 * 16]
				4513	*/
				4514	if (INSN(29,24) == BITS6(1,1,1,1,0,1)
				4515	&& ((INSN(23,23) << 2) \| INSN(31,30)) <= 4) {
				4516	UInt szLg2 = (INSN(23,23) << 2) \| INSN(31,30);
				4517	Bool isLD = INSN(22,22) == 1;
				4518	UInt pimm12 = INSN(21,10) << szLg2;
				4519	UInt nn = INSN(9,5);
				4520	UInt tt = INSN(4,0);
				4521	IRTemp tEA = newTemp(Ity_I64);
				4522	IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
				4523	assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
				4524	if (isLD) {
				4525	if (szLg2 < 4) {
				4526	putQReg128(tt, mkV128(0x0000));
				4527	}
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4528	putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4529	} else {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4530	storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4531	}
				4532	DIP("%s %s, [%s, #%u]\n",
				4533	isLD ? "ldr" : "str",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4534	nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4535	return True;
				4536	}
				4537
				4538	/* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
				4539	/* These are the Post-Index and Pre-Index variants.
				4540
				4541	31 29 23 20 11 9 4
				4542	(at-Rn-then-Rn=EA)
				4543	00 111 100 01 0 imm9 01 n t LDR Bt, [Xn\|SP], #simm
				4544	01 111 100 01 0 imm9 01 n t LDR Ht, [Xn\|SP], #simm
				4545	10 111 100 01 0 imm9 01 n t LDR St, [Xn\|SP], #simm
				4546	11 111 100 01 0 imm9 01 n t LDR Dt, [Xn\|SP], #simm
				4547	00 111 100 11 0 imm9 01 n t LDR Qt, [Xn\|SP], #simm
				4548
				4549	(at-EA-then-Rn=EA)
				4550	00 111 100 01 0 imm9 11 n t LDR Bt, [Xn\|SP, #simm]!
				4551	01 111 100 01 0 imm9 11 n t LDR Ht, [Xn\|SP, #simm]!
				4552	10 111 100 01 0 imm9 11 n t LDR St, [Xn\|SP, #simm]!
				4553	11 111 100 01 0 imm9 11 n t LDR Dt, [Xn\|SP, #simm]!
				4554	00 111 100 11 0 imm9 11 n t LDR Qt, [Xn\|SP, #simm]!
				4555
				4556	Stores are the same except with bit 22 set to 0.
				4557	*/
				4558	if (INSN(29,24) == BITS6(1,1,1,1,0,0)
				4559	&& ((INSN(23,23) << 2) \| INSN(31,30)) <= 4
				4560	&& INSN(21,21) == 0 && INSN(10,10) == 1) {
				4561	UInt szLg2 = (INSN(23,23) << 2) \| INSN(31,30);
				4562	Bool isLD = INSN(22,22) == 1;
				4563	UInt imm9 = INSN(20,12);
				4564	Bool atRN = INSN(11,11) == 0;
				4565	UInt nn = INSN(9,5);
				4566	UInt tt = INSN(4,0);
				4567	IRTemp tRN = newTemp(Ity_I64);
				4568	IRTemp tEA = newTemp(Ity_I64);
				4569	IRTemp tTA = IRTemp_INVALID;
				4570	IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
				4571	ULong simm9 = sx_to_64(imm9, 9);
				4572	assign(tRN, getIReg64orSP(nn));
				4573	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
				4574	tTA = atRN ? tRN : tEA;
				4575	if (isLD) {
				4576	if (szLg2 < 4) {
				4577	putQReg128(tt, mkV128(0x0000));
				4578	}
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4579	putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4580	} else {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4581	storeLE(mkexpr(tTA), getQRegLO(tt, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4582	}
				4583	putIReg64orSP(nn, mkexpr(tEA));
				4584	DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
				4585	isLD ? "ldr" : "str",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4586	nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4587	return True;
				4588	}
				4589
				4590	/* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
				4591	/* 31 29 23 20 11 9 4
				4592	00 111 100 01 0 imm9 00 n t LDR Bt, [Xn\|SP, #simm]
				4593	01 111 100 01 0 imm9 00 n t LDR Ht, [Xn\|SP, #simm]
				4594	10 111 100 01 0 imm9 00 n t LDR St, [Xn\|SP, #simm]
				4595	11 111 100 01 0 imm9 00 n t LDR Dt, [Xn\|SP, #simm]
				4596	00 111 100 11 0 imm9 00 n t LDR Qt, [Xn\|SP, #simm]
				4597
				4598	00 111 100 00 0 imm9 00 n t STR Bt, [Xn\|SP, #simm]
				4599	01 111 100 00 0 imm9 00 n t STR Ht, [Xn\|SP, #simm]
				4600	10 111 100 00 0 imm9 00 n t STR St, [Xn\|SP, #simm]
				4601	11 111 100 00 0 imm9 00 n t STR Dt, [Xn\|SP, #simm]
				4602	00 111 100 10 0 imm9 00 n t STR Qt, [Xn\|SP, #simm]
				4603	*/
				4604	if (INSN(29,24) == BITS6(1,1,1,1,0,0)
				4605	&& ((INSN(23,23) << 2) \| INSN(31,30)) <= 4
				4606	&& INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
				4607	UInt szLg2 = (INSN(23,23) << 2) \| INSN(31,30);
				4608	Bool isLD = INSN(22,22) == 1;
				4609	UInt imm9 = INSN(20,12);
				4610	UInt nn = INSN(9,5);
				4611	UInt tt = INSN(4,0);
				4612	ULong simm9 = sx_to_64(imm9, 9);
				4613	IRTemp tEA = newTemp(Ity_I64);
				4614	IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
				4615	assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
				4616	if (isLD) {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4617	if (szLg2 < 4) {
				4618	putQReg128(tt, mkV128(0x0000));
				4619	}
				4620	putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4621	} else {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4622	storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4623	}
				4624	DIP("%s %s, [%s, #%lld]\n",
				4625	isLD ? "ldur" : "stur",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4626	nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4627	return True;
				4628	}
				4629
				4630	/* ---------------- LDR (literal, SIMD&FP) ---------------- */
				4631	/* 31 29 23 4
				4632	00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
				4633	01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
				4634	10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
				4635	*/
				4636	if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
				4637	UInt szB = 4 << INSN(31,30);
				4638	UInt imm19 = INSN(23,5);
				4639	UInt tt = INSN(4,0);
				4640	ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
				4641	IRType ty = preferredVectorSubTypeFromSize(szB);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4642	putQReg128(tt, mkV128(0x0000));
				4643	putQRegLO(tt, loadLE(ty, mkU64(ea)));
				4644	DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4645	return True;
				4646	}
				4647
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4648	/* ---------- LD1/ST1 (single structure, no offset) ---------- */
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4649	/* 31 23
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4650	0100 1100 0100 0000 0111 11 N T LD1 {vT.2d}, [Xn\|SP]
				4651	0100 1100 0000 0000 0111 11 N T ST1 {vT.2d}, [Xn\|SP]
				4652	0100 1100 0100 0000 0111 10 N T LD1 {vT.4s}, [Xn\|SP]
				4653	0100 1100 0000 0000 0111 10 N T ST1 {vT.4s}, [Xn\|SP]
				4654	0100 1100 0100 0000 0111 01 N T LD1 {vT.8h}, [Xn\|SP]
				4655	0100 1100 0000 0000 0111 01 N T ST1 {vT.8h}, [Xn\|SP]
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4656	0100 1100 0100 0000 0111 00 N T LD1 {vT.16b}, [Xn\|SP]
				4657	0100 1100 0000 0000 0111 00 N T ST1 {vT.16b}, [Xn\|SP]
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4658	FIXME does this assume that the host is little endian?
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4659	*/
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4660	if ( (insn & 0xFFFFF000) == 0x4C407000 // LD1 cases
				4661	\|\| (insn & 0xFFFFF000) == 0x4C007000 // ST1 cases
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4662	) {
				4663	Bool isLD = INSN(22,22) == 1;
				4664	UInt rN = INSN(9,5);
				4665	UInt vT = INSN(4,0);
				4666	IRTemp tEA = newTemp(Ity_I64);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4667	const HChar* names[4] = { "2d", "4s", "8h", "16b" };
				4668	const HChar* name = names[INSN(11,10)];
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4669	assign(tEA, getIReg64orSP(rN));
				4670	if (rN == 31) { /* FIXME generate stack alignment check */ }
				4671	if (isLD) {
				4672	putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
				4673	} else {
				4674	storeLE(mkexpr(tEA), getQReg128(vT));
				4675	}
				4676	DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4677	vT, name, nameIReg64orSP(rN));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4678	return True;
				4679	}
				4680
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4681	/* 31 23
				4682	0000 1100 0100 0000 0111 11 N T LD1 {vT.1d}, [Xn\|SP]
				4683	0000 1100 0000 0000 0111 11 N T ST1 {vT.1d}, [Xn\|SP]
				4684	0000 1100 0100 0000 0111 10 N T LD1 {vT.2s}, [Xn\|SP]
				4685	0000 1100 0000 0000 0111 10 N T ST1 {vT.2s}, [Xn\|SP]
				4686	0000 1100 0100 0000 0111 01 N T LD1 {vT.4h}, [Xn\|SP]
				4687	0000 1100 0000 0000 0111 01 N T ST1 {vT.4h}, [Xn\|SP]
				4688	0000 1100 0100 0000 0111 00 N T LD1 {vT.8b}, [Xn\|SP]
				4689	0000 1100 0000 0000 0111 00 N T ST1 {vT.8b}, [Xn\|SP]
				4690	FIXME does this assume that the host is little endian?
				4691	*/
				4692	if ( (insn & 0xFFFFF000) == 0x0C407000 // LD1 cases
				4693	\|\| (insn & 0xFFFFF000) == 0x0C007000 // ST1 cases
				4694	) {
				4695	Bool isLD = INSN(22,22) == 1;
				4696	UInt rN = INSN(9,5);
				4697	UInt vT = INSN(4,0);
				4698	IRTemp tEA = newTemp(Ity_I64);
				4699	const HChar* names[4] = { "1d", "2s", "4h", "8b" };
				4700	const HChar* name = names[INSN(11,10)];
				4701	assign(tEA, getIReg64orSP(rN));
				4702	if (rN == 31) { /* FIXME generate stack alignment check */ }
				4703	if (isLD) {
				4704	putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
				4705	putQRegLane(vT, 1, mkU64(0));
				4706	} else {
				4707	storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
				4708	}
				4709	DIP("%s {v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
				4710	vT, name, nameIReg64orSP(rN));
				4711	return True;
				4712	}
				4713
				4714	/* ---------- LD1/ST1 (single structure, post index) ---------- */
				4715	/* 31 23
sewardj	7d00913	2014-02-20 17:43:38 +0000	[diff] [blame]	4716	0100 1100 1001 1111 0111 11 N T ST1 {vT.2d}, [xN\|SP], #16
				4717	0100 1100 1101 1111 0111 11 N T LD1 {vT.2d}, [xN\|SP], #16
				4718	0100 1100 1001 1111 0111 10 N T ST1 {vT.4s}, [xN\|SP], #16
				4719	0100 1100 1101 1111 0111 10 N T LD1 {vT.4s}, [xN\|SP], #16
				4720	0100 1100 1001 1111 0111 01 N T ST1 {vT.8h}, [xN\|SP], #16
				4721	0100 1100 1101 1111 0111 01 N T LD1 {vT.8h}, [xN\|SP], #16
				4722	0100 1100 1001 1111 0111 00 N T ST1 {vT.16b}, [xN\|SP], #16
sewardj	f5b0891	2014-02-06 12:57:58 +0000	[diff] [blame]	4723	0100 1100 1101 1111 0111 00 N T LD1 {vT.16b}, [xN\|SP], #16
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4724	Note that #16 is implied and cannot be any other value.
				4725	FIXME does this assume that the host is little endian?
				4726	*/
sewardj	7d00913	2014-02-20 17:43:38 +0000	[diff] [blame]	4727	if ( (insn & 0xFFFFF000) == 0x4CDF7000 // LD1 cases
				4728	\|\| (insn & 0xFFFFF000) == 0x4C9F7000 // ST1 cases
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4729	) {
				4730	Bool isLD = INSN(22,22) == 1;
				4731	UInt rN = INSN(9,5);
				4732	UInt vT = INSN(4,0);
				4733	IRTemp tEA = newTemp(Ity_I64);
				4734	const HChar* names[4] = { "2d", "4s", "8h", "16b" };
				4735	const HChar* name = names[INSN(11,10)];
				4736	assign(tEA, getIReg64orSP(rN));
				4737	if (rN == 31) { /* FIXME generate stack alignment check */ }
				4738	if (isLD) {
				4739	putQReg128(vT, loadLE(Ity_V128, mkexpr(tEA)));
				4740	} else {
				4741	storeLE(mkexpr(tEA), getQReg128(vT));
				4742	}
				4743	putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(16)));
				4744	DIP("%s {v%u.%s}, [%s], #16\n", isLD ? "ld1" : "st1",
				4745	vT, name, nameIReg64orSP(rN));
				4746	return True;
				4747	}
				4748
sewardj	950ca7a	2014-04-03 23:03:32 +0000	[diff] [blame]	4749	/* 31 23
				4750	0000 1100 1001 1111 0111 11 N T ST1 {vT.1d}, [xN\|SP], #8
				4751	0000 1100 1101 1111 0111 11 N T LD1 {vT.1d}, [xN\|SP], #8
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4752	0000 1100 1001 1111 0111 10 N T ST1 {vT.2s}, [xN\|SP], #8
sewardj	950ca7a	2014-04-03 23:03:32 +0000	[diff] [blame]	4753	0000 1100 1101 1111 0111 10 N T LD1 {vT.2s}, [xN\|SP], #8
sewardj	f5b0891	2014-02-06 12:57:58 +0000	[diff] [blame]	4754	0000 1100 1001 1111 0111 01 N T ST1 {vT.4h}, [xN\|SP], #8
sewardj	950ca7a	2014-04-03 23:03:32 +0000	[diff] [blame]	4755	0000 1100 1101 1111 0111 01 N T LD1 {vT.4h}, [xN\|SP], #8
				4756	0000 1100 1001 1111 0111 00 N T ST1 {vT.8b}, [xN\|SP], #8
				4757	0000 1100 1101 1111 0111 00 N T LD1 {vT.8b}, [xN\|SP], #8
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4758	Note that #8 is implied and cannot be any other value.
				4759	FIXME does this assume that the host is little endian?
				4760	*/
sewardj	950ca7a	2014-04-03 23:03:32 +0000	[diff] [blame]	4761	if ( (insn & 0xFFFFF000) == 0x0CDF7000 // LD1 cases
				4762	\|\| (insn & 0xFFFFF000) == 0x0C9F7000 // ST1 cases
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4763	) {
sewardj	950ca7a	2014-04-03 23:03:32 +0000	[diff] [blame]	4764	Bool isLD = INSN(22,22) == 1;
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4765	UInt rN = INSN(9,5);
				4766	UInt vT = INSN(4,0);
				4767	IRTemp tEA = newTemp(Ity_I64);
				4768	const HChar* names[4] = { "1d", "2s", "4h", "8b" };
				4769	const HChar* name = names[INSN(11,10)];
				4770	assign(tEA, getIReg64orSP(rN));
				4771	if (rN == 31) { /* FIXME generate stack alignment check */ }
sewardj	950ca7a	2014-04-03 23:03:32 +0000	[diff] [blame]	4772	if (isLD) {
				4773	putQRegLane(vT, 0, loadLE(Ity_I64, mkexpr(tEA)));
				4774	putQRegLane(vT, 1, mkU64(0));
				4775	} else {
				4776	storeLE(mkexpr(tEA), getQRegLane(vT, 0, Ity_I64));
				4777	}
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4778	putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(8)));
sewardj	950ca7a	2014-04-03 23:03:32 +0000	[diff] [blame]	4779	DIP("%s {v%u.%s}, [%s], #8\n", isLD ? "ld1" : "st1",
				4780	vT, name, nameIReg64orSP(rN));
				4781	return True;
				4782	}
				4783
sewardj	18bf517	2014-06-14 18:05:30 +0000	[diff] [blame]	4784	/* ---------- LD1R (single structure, replicate) ---------- */
				4785	/* 31 29 22 20 15 11 9 4
				4786	0q 001 1010 10 00000 110 0 sz n t LD1R Vt.T, [Xn\|SP]
				4787	0q 001 1011 10 m 110 0 sz n t LD1R Vt.T, [Xn\|SP], #sz (m=11111)
				4788	, Xm (m!=11111)
				4789	*/
				4790	if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
				4791	&& INSN(22,21) == BITS2(1,0) && INSN(15,12) == BITS4(1,1,0,0)) {
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	4792	UInt bitQ = INSN(30,30);
sewardj	18bf517	2014-06-14 18:05:30 +0000	[diff] [blame]	4793	Bool isPX = INSN(23,23) == 1;
				4794	UInt mm = INSN(20,16);
				4795	UInt sz = INSN(11,10);
				4796	UInt nn = INSN(9,5);
				4797	UInt tt = INSN(4,0);
				4798	IRType ty = integerIRTypeOfSize(1 << sz);
				4799	IRTemp tEA = newTemp(Ity_I64);
				4800	assign(tEA, getIReg64orSP(nn));
				4801	if (nn == 31) { /* FIXME generate stack alignment check */ }
				4802	IRTemp loaded = newTemp(ty);
				4803	assign(loaded, loadLE(ty, mkexpr(tEA)));
				4804	IRTemp dupd = math_DUP_TO_V128(loaded, ty);
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	4805	putQReg128(tt, math_MAYBE_ZERO_HI64(bitQ, dupd));
				4806	const HChar* arr = nameArr_Q_SZ(bitQ, sz);
sewardj	18bf517	2014-06-14 18:05:30 +0000	[diff] [blame]	4807	/* Deal with the writeback, if any. */
				4808	if (!isPX && mm == BITS5(0,0,0,0,0)) {
				4809	/* No writeback. */
				4810	DIP("ld1r v%u.%s, [%s]\n", tt, arr, nameIReg64orSP(nn));
				4811	return True;
				4812	}
				4813	if (isPX) {
				4814	putIReg64orSP(nn, binop(Iop_Add64, mkexpr(tEA),
				4815	mm == BITS5(1,1,1,1,1) ? mkU64(1 << sz)
				4816	: getIReg64orZR(mm)));
				4817	if (mm == BITS5(1,1,1,1,1)) {
				4818	DIP("ld1r v%u.%s, [%s], %s\n", tt, arr,
				4819	nameIReg64orSP(nn), nameIReg64orZR(mm));
				4820	} else {
				4821	DIP("ld1r v%u.%s, [%s], #%u\n", tt, arr,
				4822	nameIReg64orSP(nn), 1 << sz);
				4823	}
				4824	return True;
				4825	}
				4826	return False;
				4827	}
				4828
sewardj	168c8bd	2014-06-25 13:05:23 +0000	[diff] [blame]	4829	/* -------- LD2/ST2 (multi 2-elem structs, 2 regs, post index) -------- */
sewardj	950ca7a	2014-04-03 23:03:32 +0000	[diff] [blame]	4830	/* Only a very few cases. */
				4831	/* 31 23 11 9 4
				4832	0100 1100 1101 1111 1000 11 n t LD2 {Vt.2d, V(t+1)%32.2d}, [Xn\|SP], #32
				4833	0100 1100 1001 1111 1000 11 n t ST2 {Vt.2d, V(t+1)%32.2d}, [Xn\|SP], #32
				4834	0100 1100 1101 1111 1000 10 n t LD2 {Vt.4s, V(t+1)%32.4s}, [Xn\|SP], #32
				4835	0100 1100 1001 1111 1000 10 n t ST2 {Vt.4s, V(t+1)%32.4s}, [Xn\|SP], #32
				4836	*/
				4837	if ( (insn & 0xFFFFFC00) == 0x4CDF8C00 // LD2 .2d
				4838	\|\| (insn & 0xFFFFFC00) == 0x4C9F8C00 // ST2 .2d
				4839	\|\| (insn & 0xFFFFFC00) == 0x4CDF8800 // LD2 .4s
				4840	\|\| (insn & 0xFFFFFC00) == 0x4C9F8800 // ST2 .4s
				4841	) {
				4842	Bool isLD = INSN(22,22) == 1;
				4843	UInt rN = INSN(9,5);
				4844	UInt vT = INSN(4,0);
				4845	IRTemp tEA = newTemp(Ity_I64);
				4846	UInt sz = INSN(11,10);
				4847	const HChar* name = "??";
				4848	assign(tEA, getIReg64orSP(rN));
				4849	if (rN == 31) { /* FIXME generate stack alignment check */ }
				4850	IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
				4851	IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8));
				4852	IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
				4853	IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24));
				4854	if (sz == BITS2(1,1)) {
				4855	name = "2d";
				4856	if (isLD) {
				4857	putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0));
				4858	putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_16));
				4859	putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8));
				4860	putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_24));
				4861	} else {
				4862	storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64));
				4863	storeLE(tEA_16, getQRegLane((vT+0) % 32, 1, Ity_I64));
				4864	storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64));
				4865	storeLE(tEA_24, getQRegLane((vT+1) % 32, 1, Ity_I64));
				4866	}
				4867	}
				4868	else if (sz == BITS2(1,0)) {
				4869	/* Uh, this is ugly. TODO: better. */
				4870	name = "4s";
				4871	IRExpr* tEA_4 = binop(Iop_Add64, mkexpr(tEA), mkU64(4));
				4872	IRExpr* tEA_12 = binop(Iop_Add64, mkexpr(tEA), mkU64(12));
				4873	IRExpr* tEA_20 = binop(Iop_Add64, mkexpr(tEA), mkU64(20));
				4874	IRExpr* tEA_28 = binop(Iop_Add64, mkexpr(tEA), mkU64(28));
				4875	if (isLD) {
				4876	putQRegLane((vT+0) % 32, 0, loadLE(Ity_I32, tEA_0));
				4877	putQRegLane((vT+0) % 32, 1, loadLE(Ity_I32, tEA_8));
				4878	putQRegLane((vT+0) % 32, 2, loadLE(Ity_I32, tEA_16));
				4879	putQRegLane((vT+0) % 32, 3, loadLE(Ity_I32, tEA_24));
				4880	putQRegLane((vT+1) % 32, 0, loadLE(Ity_I32, tEA_4));
				4881	putQRegLane((vT+1) % 32, 1, loadLE(Ity_I32, tEA_12));
				4882	putQRegLane((vT+1) % 32, 2, loadLE(Ity_I32, tEA_20));
				4883	putQRegLane((vT+1) % 32, 3, loadLE(Ity_I32, tEA_28));
				4884	} else {
				4885	storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I32));
				4886	storeLE(tEA_8, getQRegLane((vT+0) % 32, 1, Ity_I32));
				4887	storeLE(tEA_16, getQRegLane((vT+0) % 32, 2, Ity_I32));
				4888	storeLE(tEA_24, getQRegLane((vT+0) % 32, 3, Ity_I32));
				4889	storeLE(tEA_4, getQRegLane((vT+1) % 32, 0, Ity_I32));
				4890	storeLE(tEA_12, getQRegLane((vT+1) % 32, 1, Ity_I32));
				4891	storeLE(tEA_20, getQRegLane((vT+1) % 32, 2, Ity_I32));
				4892	storeLE(tEA_28, getQRegLane((vT+1) % 32, 3, Ity_I32));
				4893	}
				4894	}
				4895	else {
				4896	vassert(0); // Can't happen.
				4897	}
				4898	putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32)));
				4899	DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld2" : "st2",
				4900	(vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
				4901	return True;
				4902	}
				4903
sewardj	39f754d	2014-06-24 10:26:52 +0000	[diff] [blame]	4904	/* -------- LD1/ST1 (multi 1-elem structs, 2 regs, no offset) -------- */
sewardj	950ca7a	2014-04-03 23:03:32 +0000	[diff] [blame]	4905	/* Only a very few cases. */
				4906	/* 31 23
				4907	0100 1100 0100 0000 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn\|SP]
				4908	0100 1100 0000 0000 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn\|SP]
				4909	*/
				4910	if ( (insn & 0xFFFFFC00) == 0x4C40A000 // LD1
				4911	\|\| (insn & 0xFFFFFC00) == 0x4C00A000 // ST1
				4912	) {
				4913	Bool isLD = INSN(22,22) == 1;
				4914	UInt rN = INSN(9,5);
				4915	UInt vT = INSN(4,0);
				4916	IRTemp tEA = newTemp(Ity_I64);
				4917	const HChar* name = "16b";
				4918	assign(tEA, getIReg64orSP(rN));
				4919	if (rN == 31) { /* FIXME generate stack alignment check */ }
				4920	IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
				4921	IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
				4922	if (isLD) {
				4923	putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
				4924	putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
				4925	} else {
				4926	storeLE(tEA_0, getQReg128((vT+0) % 32));
				4927	storeLE(tEA_16, getQReg128((vT+1) % 32));
				4928	}
sewardj	8a5ed54	2014-07-15 11:08:42 +0000	[diff] [blame]	4929	DIP("%s {v%u.%s, v%u.%s}, [%s]\n", isLD ? "ld1" : "st1",
				4930	(vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
				4931	return True;
				4932	}
				4933
				4934	/* -------- LD1/ST1 (multi 1-elem structs, 2 regs, post index) -------- */
				4935	/* Only a very few cases. */
				4936	/* 31 23
				4937	0100 1100 1101 1111 1010 00 n t LD1 {Vt.16b, V(t+1)%32.16b}, [Xn\|SP], #32
				4938	0100 1100 1001 1111 1010 00 n t ST1 {Vt.16b, V(t+1)%32.16b}, [Xn\|SP], #32
				4939	*/
				4940	if ( (insn & 0xFFFFFC00) == 0x4CDFA000 // LD1
				4941	\|\| (insn & 0xFFFFFC00) == 0x4C9FA000 // ST1
				4942	) {
				4943	Bool isLD = INSN(22,22) == 1;
				4944	UInt rN = INSN(9,5);
				4945	UInt vT = INSN(4,0);
				4946	IRTemp tEA = newTemp(Ity_I64);
				4947	const HChar* name = "16b";
				4948	assign(tEA, getIReg64orSP(rN));
				4949	if (rN == 31) { /* FIXME generate stack alignment check */ }
				4950	IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
				4951	IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
				4952	if (isLD) {
				4953	putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
				4954	putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
				4955	} else {
				4956	storeLE(tEA_0, getQReg128((vT+0) % 32));
				4957	storeLE(tEA_16, getQReg128((vT+1) % 32));
				4958	}
				4959	putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(32)));
sewardj	950ca7a	2014-04-03 23:03:32 +0000	[diff] [blame]	4960	DIP("%s {v%u.%s, v%u.%s}, [%s], #32\n", isLD ? "ld1" : "st1",
				4961	(vT+0) % 32, name, (vT+1) % 32, name, nameIReg64orSP(rN));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	4962	return True;
				4963	}
				4964
sewardj	39f754d	2014-06-24 10:26:52 +0000	[diff] [blame]	4965	/* -------- LD1/ST1 (multi 1-elem structs, 3 regs, no offset) -------- */
				4966	/* Only a very few cases. */
				4967	/* 31 23
				4968	0100 1100 0100 0000 0110 00 n t LD1 {Vt.16b .. V(t+2)%32.16b}, [Xn\|SP]
				4969	0100 1100 0000 0000 0110 00 n t ST1 {Vt.16b .. V(t+2)%32.16b}, [Xn\|SP]
				4970	*/
				4971	if ( (insn & 0xFFFFFC00) == 0x4C406000 // LD1
				4972	\|\| (insn & 0xFFFFFC00) == 0x4C006000 // ST1
				4973	) {
				4974	Bool isLD = INSN(22,22) == 1;
				4975	UInt rN = INSN(9,5);
				4976	UInt vT = INSN(4,0);
				4977	IRTemp tEA = newTemp(Ity_I64);
				4978	const HChar* name = "16b";
				4979	assign(tEA, getIReg64orSP(rN));
				4980	if (rN == 31) { /* FIXME generate stack alignment check */ }
				4981	IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
				4982	IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
				4983	IRExpr* tEA_32 = binop(Iop_Add64, mkexpr(tEA), mkU64(32));
				4984	if (isLD) {
				4985	putQReg128((vT+0) % 32, loadLE(Ity_V128, tEA_0));
				4986	putQReg128((vT+1) % 32, loadLE(Ity_V128, tEA_16));
				4987	putQReg128((vT+2) % 32, loadLE(Ity_V128, tEA_32));
				4988	} else {
				4989	storeLE(tEA_0, getQReg128((vT+0) % 32));
				4990	storeLE(tEA_16, getQReg128((vT+1) % 32));
				4991	storeLE(tEA_32, getQReg128((vT+2) % 32));
				4992	}
				4993	DIP("%s {v%u.%s, v%u.%s, v%u.%s}, [%s], #32\n",
				4994	isLD ? "ld1" : "st1",
				4995	(vT+0) % 32, name, (vT+1) % 32, name, (vT+2) % 32, name,
				4996	nameIReg64orSP(rN));
				4997	return True;
				4998	}
				4999
sewardj	168c8bd	2014-06-25 13:05:23 +0000	[diff] [blame]	5000	/* -------- LD3/ST3 (multi 3-elem structs, 3 regs, post index) -------- */
				5001	/* Only a very few cases. */
				5002	/* 31 23 11 9 4
				5003	0100 1100 1101 1111 0100 11 n t LD3 {Vt.2d .. V(t+2)%32.2d}, [Xn\|SP], #48
				5004	0100 1100 1001 1111 0100 11 n t ST3 {Vt.2d .. V(t+2)%32.2d}, [Xn\|SP], #48
				5005	*/
				5006	if ( (insn & 0xFFFFFC00) == 0x4CDF4C00 // LD3 .2d
				5007	\|\| (insn & 0xFFFFFC00) == 0x4C9F4C00 // ST3 .2d
				5008	) {
				5009	Bool isLD = INSN(22,22) == 1;
				5010	UInt rN = INSN(9,5);
				5011	UInt vT = INSN(4,0);
				5012	IRTemp tEA = newTemp(Ity_I64);
				5013	UInt sz = INSN(11,10);
				5014	const HChar* name = "??";
				5015	assign(tEA, getIReg64orSP(rN));
				5016	if (rN == 31) { /* FIXME generate stack alignment check */ }
				5017	IRExpr* tEA_0 = binop(Iop_Add64, mkexpr(tEA), mkU64(0));
				5018	IRExpr* tEA_8 = binop(Iop_Add64, mkexpr(tEA), mkU64(8));
				5019	IRExpr* tEA_16 = binop(Iop_Add64, mkexpr(tEA), mkU64(16));
				5020	IRExpr* tEA_24 = binop(Iop_Add64, mkexpr(tEA), mkU64(24));
				5021	IRExpr* tEA_32 = binop(Iop_Add64, mkexpr(tEA), mkU64(32));
				5022	IRExpr* tEA_40 = binop(Iop_Add64, mkexpr(tEA), mkU64(40));
				5023	if (sz == BITS2(1,1)) {
				5024	name = "2d";
				5025	if (isLD) {
				5026	putQRegLane((vT+0) % 32, 0, loadLE(Ity_I64, tEA_0));
				5027	putQRegLane((vT+0) % 32, 1, loadLE(Ity_I64, tEA_24));
				5028	putQRegLane((vT+1) % 32, 0, loadLE(Ity_I64, tEA_8));
				5029	putQRegLane((vT+1) % 32, 1, loadLE(Ity_I64, tEA_32));
				5030	putQRegLane((vT+2) % 32, 0, loadLE(Ity_I64, tEA_16));
				5031	putQRegLane((vT+2) % 32, 1, loadLE(Ity_I64, tEA_40));
				5032	} else {
				5033	storeLE(tEA_0, getQRegLane((vT+0) % 32, 0, Ity_I64));
				5034	storeLE(tEA_24, getQRegLane((vT+0) % 32, 1, Ity_I64));
				5035	storeLE(tEA_8, getQRegLane((vT+1) % 32, 0, Ity_I64));
				5036	storeLE(tEA_32, getQRegLane((vT+1) % 32, 1, Ity_I64));
				5037	storeLE(tEA_16, getQRegLane((vT+2) % 32, 0, Ity_I64));
				5038	storeLE(tEA_40, getQRegLane((vT+2) % 32, 1, Ity_I64));
				5039	}
				5040	}
				5041	else {
				5042	vassert(0); // Can't happen.
				5043	}
				5044	putIReg64orSP(rN, binop(Iop_Add64, mkexpr(tEA), mkU64(48)));
				5045	DIP("%s {v%u.%s, v%u.%s, v%u.%s}, [%s], #32\n",
				5046	isLD ? "ld3" : "st3",
				5047	(vT+0) % 32, name, (vT+1) % 32, name, (vT+2) % 32, name,
				5048	nameIReg64orSP(rN));
				5049	return True;
				5050	}
				5051
sewardj	7d00913	2014-02-20 17:43:38 +0000	[diff] [blame]	5052	/* ------------------ LD{,A}X{R,RH,RB} ------------------ */
				5053	/* ------------------ ST{,L}X{R,RH,RB} ------------------ */
				5054	/* 31 29 23 20 14 9 4
				5055	sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn\|SP]
				5056	sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn\|SP]
				5057	sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn\|SP]
				5058	sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn\|SP]
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5059	*/
sewardj	7d00913	2014-02-20 17:43:38 +0000	[diff] [blame]	5060	if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
				5061	&& (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
				5062	&& INSN(14,10) == BITS5(1,1,1,1,1)) {
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	5063	UInt szBlg2 = INSN(31,30);
				5064	Bool isLD = INSN(22,22) == 1;
				5065	Bool isAcqOrRel = INSN(15,15) == 1;
				5066	UInt ss = INSN(20,16);
				5067	UInt nn = INSN(9,5);
				5068	UInt tt = INSN(4,0);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5069
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	5070	vassert(szBlg2 < 4);
				5071	UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
				5072	IRType ty = integerIRTypeOfSize(szB);
				5073	const HChar* suffix[4] = { "rb", "rh", "r", "r" };
sewardj	7d00913	2014-02-20 17:43:38 +0000	[diff] [blame]	5074
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	5075	IRTemp ea = newTemp(Ity_I64);
				5076	assign(ea, getIReg64orSP(nn));
				5077	/* FIXME generate check that ea is szB-aligned */
sewardj	7d00913	2014-02-20 17:43:38 +0000	[diff] [blame]	5078
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	5079	if (isLD && ss == BITS5(1,1,1,1,1)) {
				5080	IRTemp res = newTemp(ty);
				5081	stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/LL/));
				5082	putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
				5083	if (isAcqOrRel) {
				5084	stmt(IRStmt_MBE(Imbe_Fence));
				5085	}
				5086	DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
				5087	nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
				5088	return True;
				5089	}
				5090	if (!isLD) {
				5091	if (isAcqOrRel) {
				5092	stmt(IRStmt_MBE(Imbe_Fence));
				5093	}
				5094	IRTemp res = newTemp(Ity_I1);
				5095	IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
				5096	stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
				5097	/* IR semantics: res is 1 if store succeeds, 0 if it fails.
				5098	Need to set rS to 1 on failure, 0 on success. */
				5099	putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
				5100	mkU64(1)));
				5101	DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
				5102	nameIRegOrZR(False, ss),
				5103	nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
				5104	return True;
				5105	}
				5106	/* else fall through */
				5107	}
				5108
				5109	/* ------------------ LDA{R,RH,RB} ------------------ */
				5110	/* ------------------ STL{R,RH,RB} ------------------ */
				5111	/* 31 29 23 20 14 9 4
				5112	sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn\|SP]
				5113	sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn\|SP]
				5114	*/
				5115	if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
				5116	&& INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
				5117	UInt szBlg2 = INSN(31,30);
				5118	Bool isLD = INSN(22,22) == 1;
				5119	UInt nn = INSN(9,5);
				5120	UInt tt = INSN(4,0);
				5121
				5122	vassert(szBlg2 < 4);
				5123	UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
				5124	IRType ty = integerIRTypeOfSize(szB);
				5125	const HChar* suffix[4] = { "rb", "rh", "r", "r" };
				5126
				5127	IRTemp ea = newTemp(Ity_I64);
				5128	assign(ea, getIReg64orSP(nn));
				5129	/* FIXME generate check that ea is szB-aligned */
				5130
				5131	if (isLD) {
				5132	IRTemp res = newTemp(ty);
				5133	assign(res, loadLE(ty, mkexpr(ea)));
				5134	putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
				5135	stmt(IRStmt_MBE(Imbe_Fence));
				5136	DIP("lda%s %s, [%s]\n", suffix[szBlg2],
				5137	nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
				5138	} else {
				5139	stmt(IRStmt_MBE(Imbe_Fence));
				5140	IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
				5141	storeLE(mkexpr(ea), data);
				5142	DIP("stl%s %s, [%s]\n", suffix[szBlg2],
				5143	nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
				5144	}
				5145	return True;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5146	}
				5147
				5148	vex_printf("ARM64 front end: load_store\n");
				5149	return False;
				5150	# undef INSN
				5151	}
				5152
				5153
				5154	/------------------------------------------------------------/
				5155	/--- Control flow and misc instructions ---/
				5156	/------------------------------------------------------------/
				5157
				5158	static
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	5159	Bool dis_ARM64_branch_etc(/MB_OUT/DisResult* dres, UInt insn,
				5160	VexArchInfo* archinfo)
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5161	{
				5162	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				5163
				5164	/* ---------------------- B cond ----------------------- */
				5165	/* 31 24 4 3
				5166	0101010 0 imm19 0 cond */
				5167	if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
				5168	UInt cond = INSN(3,0);
				5169	ULong uimm64 = INSN(23,5) << 2;
				5170	Long simm64 = (Long)sx_to_64(uimm64, 21);
				5171	vassert(dres->whatNext == Dis_Continue);
				5172	vassert(dres->len == 4);
				5173	vassert(dres->continueAt == 0);
				5174	vassert(dres->jk_StopHere == Ijk_INVALID);
				5175	stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
				5176	Ijk_Boring,
				5177	IRConst_U64(guest_PC_curr_instr + simm64),
				5178	OFFB_PC) );
				5179	putPC(mkU64(guest_PC_curr_instr + 4));
				5180	dres->whatNext = Dis_StopHere;
				5181	dres->jk_StopHere = Ijk_Boring;
				5182	DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
				5183	return True;
				5184	}
				5185
				5186	/* -------------------- B{L} uncond -------------------- */
				5187	if (INSN(30,26) == BITS5(0,0,1,0,1)) {
				5188	/* 000101 imm26 B (PC + sxTo64(imm26 << 2))
				5189	100101 imm26 B (PC + sxTo64(imm26 << 2))
				5190	*/
				5191	UInt bLink = INSN(31,31);
				5192	ULong uimm64 = INSN(25,0) << 2;
				5193	Long simm64 = (Long)sx_to_64(uimm64, 28);
				5194	if (bLink) {
				5195	putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
				5196	}
				5197	putPC(mkU64(guest_PC_curr_instr + simm64));
				5198	dres->whatNext = Dis_StopHere;
				5199	dres->jk_StopHere = Ijk_Call;
				5200	DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
				5201	guest_PC_curr_instr + simm64);
				5202	return True;
				5203	}
				5204
				5205	/* --------------------- B{L} reg --------------------- */
				5206	/* 31 24 22 20 15 9 4
				5207	1101011 00 10 11111 000000 nn 00000 RET Rn
				5208	1101011 00 01 11111 000000 nn 00000 CALL Rn
				5209	1101011 00 00 11111 000000 nn 00000 JMP Rn
				5210	*/
				5211	if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
				5212	&& INSN(20,16) == BITS5(1,1,1,1,1)
				5213	&& INSN(15,10) == BITS6(0,0,0,0,0,0)
				5214	&& INSN(4,0) == BITS5(0,0,0,0,0)) {
				5215	UInt branch_type = INSN(22,21);
				5216	UInt nn = INSN(9,5);
				5217	if (branch_type == BITS2(1,0) /* RET */) {
				5218	putPC(getIReg64orZR(nn));
				5219	dres->whatNext = Dis_StopHere;
				5220	dres->jk_StopHere = Ijk_Ret;
				5221	DIP("ret %s\n", nameIReg64orZR(nn));
				5222	return True;
				5223	}
				5224	if (branch_type == BITS2(0,1) /* CALL */) {
sewardj	702054e	2014-05-07 11:09:28 +0000	[diff] [blame]	5225	IRTemp dst = newTemp(Ity_I64);
				5226	assign(dst, getIReg64orZR(nn));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5227	putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
sewardj	702054e	2014-05-07 11:09:28 +0000	[diff] [blame]	5228	putPC(mkexpr(dst));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5229	dres->whatNext = Dis_StopHere;
				5230	dres->jk_StopHere = Ijk_Call;
				5231	DIP("blr %s\n", nameIReg64orZR(nn));
				5232	return True;
				5233	}
				5234	if (branch_type == BITS2(0,0) /* JMP */) {
				5235	putPC(getIReg64orZR(nn));
				5236	dres->whatNext = Dis_StopHere;
				5237	dres->jk_StopHere = Ijk_Boring;
				5238	DIP("jmp %s\n", nameIReg64orZR(nn));
				5239	return True;
				5240	}
				5241	}
				5242
				5243	/* -------------------- CB{N}Z -------------------- */
				5244	/* sf 011 010 1 imm19 Rt CBNZ Xt\|Wt, (PC + sxTo64(imm19 << 2))
				5245	sf 011 010 0 imm19 Rt CBZ Xt\|Wt, (PC + sxTo64(imm19 << 2))
				5246	*/
				5247	if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
				5248	Bool is64 = INSN(31,31) == 1;
				5249	Bool bIfZ = INSN(24,24) == 0;
				5250	ULong uimm64 = INSN(23,5) << 2;
				5251	UInt rT = INSN(4,0);
				5252	Long simm64 = (Long)sx_to_64(uimm64, 21);
				5253	IRExpr* cond = NULL;
				5254	if (is64) {
				5255	cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
				5256	getIReg64orZR(rT), mkU64(0));
				5257	} else {
				5258	cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
				5259	getIReg32orZR(rT), mkU32(0));
				5260	}
				5261	stmt( IRStmt_Exit(cond,
				5262	Ijk_Boring,
				5263	IRConst_U64(guest_PC_curr_instr + simm64),
				5264	OFFB_PC) );
				5265	putPC(mkU64(guest_PC_curr_instr + 4));
				5266	dres->whatNext = Dis_StopHere;
				5267	dres->jk_StopHere = Ijk_Boring;
				5268	DIP("cb%sz %s, 0x%llx\n",
				5269	bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
				5270	guest_PC_curr_instr + simm64);
				5271	return True;
				5272	}
				5273
				5274	/* -------------------- TB{N}Z -------------------- */
				5275	/* 31 30 24 23 18 5 4
				5276	b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
				5277	b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
				5278	*/
				5279	if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
				5280	UInt b5 = INSN(31,31);
				5281	Bool bIfZ = INSN(24,24) == 0;
				5282	UInt b40 = INSN(23,19);
				5283	UInt imm14 = INSN(18,5);
				5284	UInt tt = INSN(4,0);
				5285	UInt bitNo = (b5 << 5) \| b40;
				5286	ULong uimm64 = imm14 << 2;
				5287	Long simm64 = sx_to_64(uimm64, 16);
				5288	IRExpr* cond
				5289	= binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
				5290	binop(Iop_And64,
				5291	binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
				5292	mkU64(1)),
				5293	mkU64(0));
				5294	stmt( IRStmt_Exit(cond,
				5295	Ijk_Boring,
				5296	IRConst_U64(guest_PC_curr_instr + simm64),
				5297	OFFB_PC) );
				5298	putPC(mkU64(guest_PC_curr_instr + 4));
				5299	dres->whatNext = Dis_StopHere;
				5300	dres->jk_StopHere = Ijk_Boring;
				5301	DIP("tb%sz %s, #%u, 0x%llx\n",
				5302	bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
				5303	guest_PC_curr_instr + simm64);
				5304	return True;
				5305	}
				5306
				5307	/* -------------------- SVC -------------------- */
				5308	/* 11010100 000 imm16 000 01
				5309	Don't bother with anything except the imm16==0 case.
				5310	*/
				5311	if (INSN(31,0) == 0xD4000001) {
				5312	putPC(mkU64(guest_PC_curr_instr + 4));
				5313	dres->whatNext = Dis_StopHere;
				5314	dres->jk_StopHere = Ijk_Sys_syscall;
				5315	DIP("svc #0\n");
				5316	return True;
				5317	}
				5318
				5319	/* ------------------ M{SR,RS} ------------------ */
sewardj	6eb5ef8	2014-07-14 20:39:23 +0000	[diff] [blame]	5320	/* ---- Cases for TPIDR_EL0 ----
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5321	0xD51BD0 010 Rt MSR tpidr_el0, rT
				5322	0xD53BD0 010 Rt MRS rT, tpidr_el0
				5323	*/
				5324	if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /MSR/
				5325	\|\| (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /MRS/) {
				5326	Bool toSys = INSN(21,21) == 0;
				5327	UInt tt = INSN(4,0);
				5328	if (toSys) {
				5329	stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
				5330	DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
				5331	} else {
				5332	putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
				5333	DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
				5334	}
				5335	return True;
				5336	}
sewardj	6eb5ef8	2014-07-14 20:39:23 +0000	[diff] [blame]	5337	/* ---- Cases for FPCR ----
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5338	0xD51B44 000 Rt MSR fpcr, rT
				5339	0xD53B44 000 Rt MSR rT, fpcr
				5340	*/
				5341	if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /MSR/
				5342	\|\| (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /MRS/) {
				5343	Bool toSys = INSN(21,21) == 0;
				5344	UInt tt = INSN(4,0);
				5345	if (toSys) {
				5346	stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
				5347	DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
				5348	} else {
				5349	putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
				5350	DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
				5351	}
				5352	return True;
				5353	}
sewardj	6eb5ef8	2014-07-14 20:39:23 +0000	[diff] [blame]	5354	/* ---- Cases for FPSR ----
sewardj	7d00913	2014-02-20 17:43:38 +0000	[diff] [blame]	5355	0xD51B44 001 Rt MSR fpsr, rT
				5356	0xD53B44 001 Rt MSR rT, fpsr
sewardj	a0645d5	2014-06-28 22:11:16 +0000	[diff] [blame]	5357	The only part of this we model is FPSR.QC. All other bits
				5358	are ignored when writing to it and RAZ when reading from it.
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5359	*/
				5360	if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /MSR/
				5361	\|\| (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /MRS/) {
				5362	Bool toSys = INSN(21,21) == 0;
				5363	UInt tt = INSN(4,0);
				5364	if (toSys) {
sewardj	a0645d5	2014-06-28 22:11:16 +0000	[diff] [blame]	5365	/* Just deal with FPSR.QC. Make up a V128 value which is
				5366	zero if Xt[27] is zero and any other value if Xt[27] is
				5367	nonzero. */
				5368	IRTemp qc64 = newTemp(Ity_I64);
				5369	assign(qc64, binop(Iop_And64,
				5370	binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
				5371	mkU64(1)));
				5372	IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
				5373	stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5374	DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
				5375	} else {
sewardj	a0645d5	2014-06-28 22:11:16 +0000	[diff] [blame]	5376	/* Generate a value which is all zeroes except for bit 27,
				5377	which must be zero if QCFLAG is all zeroes and one otherwise. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	5378	IRTemp qcV128 = newTempV128();
sewardj	a0645d5	2014-06-28 22:11:16 +0000	[diff] [blame]	5379	assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
				5380	IRTemp qc64 = newTemp(Ity_I64);
				5381	assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
				5382	unop(Iop_V128to64, mkexpr(qcV128))));
				5383	IRExpr* res = binop(Iop_Shl64,
				5384	unop(Iop_1Uto64,
				5385	binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
				5386	mkU8(27));
				5387	putIReg64orZR(tt, res);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5388	DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
				5389	}
				5390	return True;
				5391	}
sewardj	6eb5ef8	2014-07-14 20:39:23 +0000	[diff] [blame]	5392	/* ---- Cases for NZCV ----
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5393	D51B42 000 Rt MSR nzcv, rT
				5394	D53B42 000 Rt MRS rT, nzcv
sewardj	a0645d5	2014-06-28 22:11:16 +0000	[diff] [blame]	5395	The only parts of NZCV that actually exist are bits 31:28, which
				5396	are the N Z C and V bits themselves. Hence the flags thunk provides
				5397	all the state we need.
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5398	*/
				5399	if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /MSR/
				5400	\|\| (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /MRS/) {
				5401	Bool toSys = INSN(21,21) == 0;
				5402	UInt tt = INSN(4,0);
				5403	if (toSys) {
				5404	IRTemp t = newTemp(Ity_I64);
				5405	assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
				5406	setFlags_COPY(t);
				5407	DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
				5408	} else {
				5409	IRTemp res = newTemp(Ity_I64);
				5410	assign(res, mk_arm64g_calculate_flags_nzcv());
				5411	putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
				5412	DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
				5413	}
				5414	return True;
				5415	}
sewardj	6eb5ef8	2014-07-14 20:39:23 +0000	[diff] [blame]	5416	/* ---- Cases for DCZID_EL0 ----
sewardj	d512d10	2014-02-21 14:49:44 +0000	[diff] [blame]	5417	Don't support arbitrary reads and writes to this register. Just
				5418	return the value 16, which indicates that the DC ZVA instruction
				5419	is not permitted, so we don't have to emulate it.
				5420	D5 3B 00 111 Rt MRS rT, dczid_el0
				5421	*/
				5422	if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
				5423	UInt tt = INSN(4,0);
				5424	putIReg64orZR(tt, mkU64(1<<4));
				5425	DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
				5426	return True;
				5427	}
sewardj	6eb5ef8	2014-07-14 20:39:23 +0000	[diff] [blame]	5428	/* ---- Cases for CTR_EL0 ----
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	5429	We just handle reads, and make up a value from the D and I line
				5430	sizes in the VexArchInfo we are given, and patch in the following
				5431	fields that the Foundation model gives ("natively"):
				5432	CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
				5433	D5 3B 00 001 Rt MRS rT, dczid_el0
				5434	*/
				5435	if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
				5436	UInt tt = INSN(4,0);
				5437	/* Need to generate a value from dMinLine_lg2_szB and
				5438	dMinLine_lg2_szB. The value in the register is in 32-bit
				5439	units, so need to subtract 2 from the values in the
				5440	VexArchInfo. We can assume that the values here are valid --
				5441	disInstr_ARM64 checks them -- so there's no need to deal with
				5442	out-of-range cases. */
				5443	vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
				5444	&& archinfo->arm64_dMinLine_lg2_szB <= 17
				5445	&& archinfo->arm64_iMinLine_lg2_szB >= 2
				5446	&& archinfo->arm64_iMinLine_lg2_szB <= 17);
				5447	UInt val
				5448	= 0x8440c000 \| ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
				5449	\| ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
				5450	putIReg64orZR(tt, mkU64(val));
				5451	DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
				5452	return True;
				5453	}
sewardj	6eb5ef8	2014-07-14 20:39:23 +0000	[diff] [blame]	5454	/* ---- Cases for CNTVCT_EL0 ----
				5455	This is a timestamp counter of some sort. Support reads of it only
				5456	by passing through to the host.
				5457	D5 3B E0 010 Rt MRS Xt, cntvct_el0
				5458	*/
				5459	if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
				5460	UInt tt = INSN(4,0);
				5461	IRTemp val = newTemp(Ity_I64);
				5462	IRExpr** args = mkIRExprVec_0();
				5463	IRDirty* d = unsafeIRDirty_1_N (
				5464	val,
				5465	0/regparms/,
				5466	"arm64g_dirtyhelper_MRS_CNTVCT_EL0",
				5467	&arm64g_dirtyhelper_MRS_CNTVCT_EL0,
				5468	args
				5469	);
				5470	/* execute the dirty call, dumping the result in val. */
				5471	stmt( IRStmt_Dirty(d) );
				5472	putIReg64orZR(tt, mkexpr(val));
				5473	DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
				5474	return True;
				5475	}
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5476
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	5477	/* ------------------ IC_IVAU ------------------ */
				5478	/* D5 0B 75 001 Rt ic ivau, rT
				5479	*/
				5480	if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
				5481	/* We will always be provided with a valid iMinLine value. */
				5482	vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
				5483	&& archinfo->arm64_iMinLine_lg2_szB <= 17);
				5484	/* Round the requested address, in rT, down to the start of the
				5485	containing block. */
				5486	UInt tt = INSN(4,0);
				5487	ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
				5488	IRTemp addr = newTemp(Ity_I64);
				5489	assign( addr, binop( Iop_And64,
				5490	getIReg64orZR(tt),
				5491	mkU64(~(lineszB - 1))) );
				5492	/* Set the invalidation range, request exit-and-invalidate, with
				5493	continuation at the next instruction. */
sewardj	05f5e01	2014-05-04 10:52:11 +0000	[diff] [blame]	5494	stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
				5495	stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	5496	/* be paranoid ... */
				5497	stmt( IRStmt_MBE(Imbe_Fence) );
				5498	putPC(mkU64( guest_PC_curr_instr + 4 ));
				5499	dres->whatNext = Dis_StopHere;
sewardj	05f5e01	2014-05-04 10:52:11 +0000	[diff] [blame]	5500	dres->jk_StopHere = Ijk_InvalICache;
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	5501	DIP("ic ivau, %s\n", nameIReg64orZR(tt));
				5502	return True;
				5503	}
				5504
				5505	/* ------------------ DC_CVAU ------------------ */
				5506	/* D5 0B 7B 001 Rt dc cvau, rT
				5507	*/
				5508	if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
				5509	/* Exactly the same scheme as for IC IVAU, except we observe the
sewardj	05f5e01	2014-05-04 10:52:11 +0000	[diff] [blame]	5510	dMinLine size, and request an Ijk_FlushDCache instead of
				5511	Ijk_InvalICache. */
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	5512	/* We will always be provided with a valid dMinLine value. */
				5513	vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
				5514	&& archinfo->arm64_dMinLine_lg2_szB <= 17);
				5515	/* Round the requested address, in rT, down to the start of the
				5516	containing block. */
				5517	UInt tt = INSN(4,0);
				5518	ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
				5519	IRTemp addr = newTemp(Ity_I64);
				5520	assign( addr, binop( Iop_And64,
				5521	getIReg64orZR(tt),
				5522	mkU64(~(lineszB - 1))) );
				5523	/* Set the flush range, request exit-and-flush, with
				5524	continuation at the next instruction. */
sewardj	05f5e01	2014-05-04 10:52:11 +0000	[diff] [blame]	5525	stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
				5526	stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	5527	/* be paranoid ... */
				5528	stmt( IRStmt_MBE(Imbe_Fence) );
				5529	putPC(mkU64( guest_PC_curr_instr + 4 ));
				5530	dres->whatNext = Dis_StopHere;
				5531	dres->jk_StopHere = Ijk_FlushDCache;
				5532	DIP("dc cvau, %s\n", nameIReg64orZR(tt));
				5533	return True;
				5534	}
				5535
				5536	/* ------------------ ISB, DMB, DSB ------------------ */
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5537	if (INSN(31,0) == 0xD5033FDF) {
sewardj	d512d10	2014-02-21 14:49:44 +0000	[diff] [blame]	5538	stmt(IRStmt_MBE(Imbe_Fence));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5539	DIP("isb\n");
				5540	return True;
				5541	}
				5542	if (INSN(31,0) == 0xD5033BBF) {
sewardj	d512d10	2014-02-21 14:49:44 +0000	[diff] [blame]	5543	stmt(IRStmt_MBE(Imbe_Fence));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5544	DIP("dmb ish\n");
				5545	return True;
				5546	}
sewardj	ab102bd	2014-06-04 11:44:45 +0000	[diff] [blame]	5547	if (INSN(31,0) == 0xD5033ABF) {
				5548	stmt(IRStmt_MBE(Imbe_Fence));
				5549	DIP("dmb ishst\n");
				5550	return True;
				5551	}
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	5552	if (INSN(31,0) == 0xD5033B9F) {
				5553	stmt(IRStmt_MBE(Imbe_Fence));
				5554	DIP("dsb ish\n");
				5555	return True;
				5556	}
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5557
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	5558	/* -------------------- NOP -------------------- */
				5559	if (INSN(31,0) == 0xD503201F) {
				5560	DIP("nop\n");
				5561	return True;
				5562	}
				5563
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5564	//fail:
				5565	vex_printf("ARM64 front end: branch_etc\n");
				5566	return False;
				5567	# undef INSN
				5568	}
				5569
				5570
				5571	/------------------------------------------------------------/
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	5572	/--- SIMD and FP instructions: helper functions ---/
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5573	/------------------------------------------------------------/
				5574
sewardj	d96daf6	2014-06-15 08:17:35 +0000	[diff] [blame]	5575	/* Some constructors for interleave/deinterleave expressions. */
sewardj	e520bb3	2014-02-17 11:00:53 +0000	[diff] [blame]	5576
sewardj	d96daf6	2014-06-15 08:17:35 +0000	[diff] [blame]	5577	static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
				5578	// returns a0 b0
				5579	return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
				5580	}
sewardj	e520bb3	2014-02-17 11:00:53 +0000	[diff] [blame]	5581
sewardj	d96daf6	2014-06-15 08:17:35 +0000	[diff] [blame]	5582	static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
				5583	// returns a1 b1
				5584	return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
				5585	}
sewardj	e520bb3	2014-02-17 11:00:53 +0000	[diff] [blame]	5586
sewardj	d96daf6	2014-06-15 08:17:35 +0000	[diff] [blame]	5587	static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
				5588	// returns a2 a0 b2 b0
				5589	return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
				5590	}
				5591
				5592	static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
				5593	// returns a3 a1 b3 b1
				5594	return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
				5595	}
				5596
				5597	static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
				5598	// returns a1 b1 a0 b0
				5599	return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
				5600	}
				5601
				5602	static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
				5603	// returns a3 b3 a2 b2
				5604	return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
				5605	}
				5606
				5607	static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
				5608	// returns a6 a4 a2 a0 b6 b4 b2 b0
				5609	return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
				5610	}
				5611
				5612	static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
				5613	// returns a7 a5 a3 a1 b7 b5 b3 b1
				5614	return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
				5615	}
				5616
				5617	static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
				5618	// returns a3 b3 a2 b2 a1 b1 a0 b0
				5619	return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
				5620	}
				5621
				5622	static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
				5623	// returns a7 b7 a6 b6 a5 b5 a4 b4
				5624	return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
				5625	}
				5626
				5627	static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
				5628	IRTemp bFEDCBA9876543210 ) {
				5629	// returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
				5630	return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
				5631	mkexpr(bFEDCBA9876543210));
				5632	}
				5633
				5634	static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
				5635	IRTemp bFEDCBA9876543210 ) {
				5636	// returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
				5637	return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
				5638	mkexpr(bFEDCBA9876543210));
				5639	}
				5640
				5641	static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
				5642	IRTemp bFEDCBA9876543210 ) {
				5643	// returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
				5644	return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
				5645	mkexpr(bFEDCBA9876543210));
				5646	}
				5647
				5648	static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
				5649	IRTemp bFEDCBA9876543210 ) {
				5650	// returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
				5651	return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
				5652	mkexpr(bFEDCBA9876543210));
				5653	}
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	5654
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5655	/* Generate N copies of \|bit\| in the bottom of a ULong. */
				5656	static ULong Replicate ( ULong bit, Int N )
				5657	{
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5658	vassert(bit <= 1 && N >= 1 && N < 64);
				5659	if (bit == 0) {
				5660	return 0;
				5661	} else {
				5662	/* Careful. This won't work for N == 64. */
				5663	return (1ULL << N) - 1;
				5664	}
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5665	}
				5666
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	5667	static ULong Replicate32x2 ( ULong bits32 )
				5668	{
				5669	vassert(0 == (bits32 & ~0xFFFFFFFFULL));
				5670	return (bits32 << 32) \| bits32;
				5671	}
				5672
				5673	static ULong Replicate16x4 ( ULong bits16 )
				5674	{
				5675	vassert(0 == (bits16 & ~0xFFFFULL));
				5676	return Replicate32x2((bits16 << 16) \| bits16);
				5677	}
				5678
				5679	static ULong Replicate8x8 ( ULong bits8 )
				5680	{
				5681	vassert(0 == (bits8 & ~0xFFULL));
				5682	return Replicate16x4((bits8 << 8) \| bits8);
				5683	}
				5684
				5685	/* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
				5686	\|imm8\| to either a 32-bit value if N is 32 or a 64 bit value if N
				5687	is 64. In the former case, the upper 32 bits of the returned value
				5688	are guaranteed to be zero. */
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5689	static ULong VFPExpandImm ( ULong imm8, Int N )
				5690	{
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5691	vassert(imm8 <= 0xFF);
				5692	vassert(N == 32 \|\| N == 64);
				5693	Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
				5694	Int F = N - E - 1;
				5695	ULong imm8_6 = (imm8 >> 6) & 1;
				5696	/* sign: 1 bit */
				5697	/* exp: E bits */
				5698	/* frac: F bits */
				5699	ULong sign = (imm8 >> 7) & 1;
				5700	ULong exp = ((imm8_6 ^ 1) << (E-1)) \| Replicate(imm8_6, E-1);
				5701	ULong frac = ((imm8 & 63) << (F-6)) \| Replicate(0, F-6);
				5702	vassert(sign < (1ULL << 1));
				5703	vassert(exp < (1ULL << E));
				5704	vassert(frac < (1ULL << F));
				5705	vassert(1 + E + F == N);
				5706	ULong res = (sign << (E+F)) \| (exp << F) \| frac;
				5707	return res;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5708	}
				5709
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	5710	/* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
				5711	This might fail, as indicated by the returned Bool. Page 2530 of
				5712	the manual. */
				5713	static Bool AdvSIMDExpandImm ( /OUT/ULong* res,
				5714	UInt op, UInt cmode, UInt imm8 )
				5715	{
				5716	vassert(op <= 1);
				5717	vassert(cmode <= 15);
				5718	vassert(imm8 <= 255);
				5719
				5720	res = 0; / will overwrite iff returning True */
				5721
				5722	ULong imm64 = 0;
				5723	Bool testimm8 = False;
				5724
				5725	switch (cmode >> 1) {
				5726	case 0:
				5727	testimm8 = False; imm64 = Replicate32x2(imm8); break;
				5728	case 1:
				5729	testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
				5730	case 2:
				5731	testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
				5732	case 3:
				5733	testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
				5734	case 4:
				5735	testimm8 = False; imm64 = Replicate16x4(imm8); break;
				5736	case 5:
				5737	testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
				5738	case 6:
				5739	testimm8 = True;
				5740	if ((cmode & 1) == 0)
				5741	imm64 = Replicate32x2((imm8 << 8) \| 0xFF);
				5742	else
				5743	imm64 = Replicate32x2((imm8 << 16) \| 0xFFFF);
				5744	break;
				5745	case 7:
				5746	testimm8 = False;
				5747	if ((cmode & 1) == 0 && op == 0)
				5748	imm64 = Replicate8x8(imm8);
				5749	if ((cmode & 1) == 0 && op == 1) {
				5750	imm64 = 0; imm64 \|= (imm8 & 0x80) ? 0xFF : 0x00;
				5751	imm64 <<= 8; imm64 \|= (imm8 & 0x40) ? 0xFF : 0x00;
				5752	imm64 <<= 8; imm64 \|= (imm8 & 0x20) ? 0xFF : 0x00;
				5753	imm64 <<= 8; imm64 \|= (imm8 & 0x10) ? 0xFF : 0x00;
				5754	imm64 <<= 8; imm64 \|= (imm8 & 0x08) ? 0xFF : 0x00;
				5755	imm64 <<= 8; imm64 \|= (imm8 & 0x04) ? 0xFF : 0x00;
				5756	imm64 <<= 8; imm64 \|= (imm8 & 0x02) ? 0xFF : 0x00;
				5757	imm64 <<= 8; imm64 \|= (imm8 & 0x01) ? 0xFF : 0x00;
				5758	}
				5759	if ((cmode & 1) == 1 && op == 0) {
				5760	ULong imm8_7 = (imm8 >> 7) & 1;
				5761	ULong imm8_6 = (imm8 >> 6) & 1;
				5762	ULong imm8_50 = imm8 & 63;
				5763	ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
				5764	\| ((imm8_6 ^ 1) << (5 + 6 + 19))
				5765	\| (Replicate(imm8_6, 5) << (6 + 19))
				5766	\| (imm8_50 << 19);
				5767	imm64 = Replicate32x2(imm32);
				5768	}
				5769	if ((cmode & 1) == 1 && op == 1) {
				5770	// imm64 = imm8<7>:NOT(imm8<6>)
				5771	// :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
				5772	ULong imm8_7 = (imm8 >> 7) & 1;
				5773	ULong imm8_6 = (imm8 >> 6) & 1;
				5774	ULong imm8_50 = imm8 & 63;
				5775	imm64 = (imm8_7 << 63) \| ((imm8_6 ^ 1) << 62)
				5776	\| (Replicate(imm8_6, 8) << 54)
				5777	\| (imm8_50 << 48);
				5778	}
				5779	break;
				5780	default:
				5781	vassert(0);
				5782	}
				5783
				5784	if (testimm8 && imm8 == 0)
				5785	return False;
				5786
				5787	*res = imm64;
				5788	return True;
				5789	}
				5790
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5791	/* Help a bit for decoding laneage for vector operations that can be
				5792	of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
				5793	and SZ bits, typically for vector floating point. */
				5794	static Bool getLaneInfo_Q_SZ ( /OUT/IRType* tyI, /OUT/IRType* tyF,
				5795	/OUT/UInt* nLanes, /OUT/Bool* zeroUpper,
				5796	/OUT/const HChar** arrSpec,
				5797	Bool bitQ, Bool bitSZ )
				5798	{
				5799	vassert(bitQ == True \|\| bitQ == False);
				5800	vassert(bitSZ == True \|\| bitSZ == False);
				5801	if (bitQ && bitSZ) { // 2x64
				5802	if (tyI) *tyI = Ity_I64;
				5803	if (tyF) *tyF = Ity_F64;
				5804	if (nLanes) *nLanes = 2;
				5805	if (zeroUpper) *zeroUpper = False;
				5806	if (arrSpec) *arrSpec = "2d";
				5807	return True;
				5808	}
				5809	if (bitQ && !bitSZ) { // 4x32
				5810	if (tyI) *tyI = Ity_I32;
				5811	if (tyF) *tyF = Ity_F32;
				5812	if (nLanes) *nLanes = 4;
				5813	if (zeroUpper) *zeroUpper = False;
				5814	if (arrSpec) *arrSpec = "4s";
				5815	return True;
				5816	}
				5817	if (!bitQ && !bitSZ) { // 2x32
				5818	if (tyI) *tyI = Ity_I32;
				5819	if (tyF) *tyF = Ity_F32;
				5820	if (nLanes) *nLanes = 2;
				5821	if (zeroUpper) *zeroUpper = True;
				5822	if (arrSpec) *arrSpec = "2s";
				5823	return True;
				5824	}
				5825	// Else impliedly 1x64, which isn't allowed.
				5826	return False;
				5827	}
				5828
sewardj	e520bb3	2014-02-17 11:00:53 +0000	[diff] [blame]	5829	/* Helper for decoding laneage for shift-style vector operations
				5830	that involve an immediate shift amount. */
				5831	static Bool getLaneInfo_IMMH_IMMB ( /OUT/UInt* shift, /OUT/UInt* szBlg2,
				5832	UInt immh, UInt immb )
				5833	{
				5834	vassert(immh < (1<<4));
				5835	vassert(immb < (1<<3));
				5836	UInt immhb = (immh << 3) \| immb;
				5837	if (immh & 8) {
				5838	if (shift) *shift = 128 - immhb;
				5839	if (szBlg2) *szBlg2 = 3;
				5840	return True;
				5841	}
				5842	if (immh & 4) {
				5843	if (shift) *shift = 64 - immhb;
				5844	if (szBlg2) *szBlg2 = 2;
				5845	return True;
				5846	}
				5847	if (immh & 2) {
				5848	if (shift) *shift = 32 - immhb;
				5849	if (szBlg2) *szBlg2 = 1;
				5850	return True;
				5851	}
				5852	if (immh & 1) {
				5853	if (shift) *shift = 16 - immhb;
				5854	if (szBlg2) *szBlg2 = 0;
				5855	return True;
				5856	}
				5857	return False;
				5858	}
				5859
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	5860	/* Generate IR to fold all lanes of the V128 value in 'src' as
				5861	characterised by the operator 'op', and return the result in the
				5862	bottom bits of a V128, with all other bits set to zero. */
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	5863	static IRTemp math_FOLDV ( IRTemp src, IROp op )
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	5864	{
				5865	/* The basic idea is to use repeated applications of Iop_CatEven*
				5866	and Iop_CatOdd* operators to 'src' so as to clone each lane into
				5867	a complete vector. Then fold all those vectors with 'op' and
				5868	zero out all but the least significant lane. */
				5869	switch (op) {
				5870	case Iop_Min8Sx16: case Iop_Min8Ux16:
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	5871	case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	5872	/* NB: temp naming here is misleading -- the naming is for 8
				5873	lanes of 16 bit, whereas what is being operated on is 16
				5874	lanes of 8 bits. */
				5875	IRTemp x76543210 = src;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	5876	IRTemp x76547654 = newTempV128();
				5877	IRTemp x32103210 = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	5878	assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
				5879	assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	5880	IRTemp x76767676 = newTempV128();
				5881	IRTemp x54545454 = newTempV128();
				5882	IRTemp x32323232 = newTempV128();
				5883	IRTemp x10101010 = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	5884	assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
				5885	assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
				5886	assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
				5887	assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	5888	IRTemp x77777777 = newTempV128();
				5889	IRTemp x66666666 = newTempV128();
				5890	IRTemp x55555555 = newTempV128();
				5891	IRTemp x44444444 = newTempV128();
				5892	IRTemp x33333333 = newTempV128();
				5893	IRTemp x22222222 = newTempV128();
				5894	IRTemp x11111111 = newTempV128();
				5895	IRTemp x00000000 = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	5896	assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
				5897	assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
				5898	assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
				5899	assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
				5900	assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
				5901	assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
				5902	assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
				5903	assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
				5904	/* Naming not misleading after here. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	5905	IRTemp xAllF = newTempV128();
				5906	IRTemp xAllE = newTempV128();
				5907	IRTemp xAllD = newTempV128();
				5908	IRTemp xAllC = newTempV128();
				5909	IRTemp xAllB = newTempV128();
				5910	IRTemp xAllA = newTempV128();
				5911	IRTemp xAll9 = newTempV128();
				5912	IRTemp xAll8 = newTempV128();
				5913	IRTemp xAll7 = newTempV128();
				5914	IRTemp xAll6 = newTempV128();
				5915	IRTemp xAll5 = newTempV128();
				5916	IRTemp xAll4 = newTempV128();
				5917	IRTemp xAll3 = newTempV128();
				5918	IRTemp xAll2 = newTempV128();
				5919	IRTemp xAll1 = newTempV128();
				5920	IRTemp xAll0 = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	5921	assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
				5922	assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
				5923	assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
				5924	assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
				5925	assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
				5926	assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
				5927	assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
				5928	assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
				5929	assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
				5930	assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
				5931	assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
				5932	assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
				5933	assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
				5934	assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
				5935	assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
				5936	assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	5937	IRTemp maxFE = newTempV128();
				5938	IRTemp maxDC = newTempV128();
				5939	IRTemp maxBA = newTempV128();
				5940	IRTemp max98 = newTempV128();
				5941	IRTemp max76 = newTempV128();
				5942	IRTemp max54 = newTempV128();
				5943	IRTemp max32 = newTempV128();
				5944	IRTemp max10 = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	5945	assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
				5946	assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
				5947	assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
				5948	assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
				5949	assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
				5950	assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
				5951	assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
				5952	assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	5953	IRTemp maxFEDC = newTempV128();
				5954	IRTemp maxBA98 = newTempV128();
				5955	IRTemp max7654 = newTempV128();
				5956	IRTemp max3210 = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	5957	assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
				5958	assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
				5959	assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
				5960	assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	5961	IRTemp maxFEDCBA98 = newTempV128();
				5962	IRTemp max76543210 = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	5963	assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
				5964	assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	5965	IRTemp maxAllLanes = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	5966	assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
				5967	mkexpr(max76543210)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	5968	IRTemp res = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	5969	assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
				5970	return res;
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	5971	}
				5972	case Iop_Min16Sx8: case Iop_Min16Ux8:
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	5973	case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	5974	IRTemp x76543210 = src;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	5975	IRTemp x76547654 = newTempV128();
				5976	IRTemp x32103210 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	5977	assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
				5978	assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	5979	IRTemp x76767676 = newTempV128();
				5980	IRTemp x54545454 = newTempV128();
				5981	IRTemp x32323232 = newTempV128();
				5982	IRTemp x10101010 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	5983	assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
				5984	assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
				5985	assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
				5986	assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	5987	IRTemp x77777777 = newTempV128();
				5988	IRTemp x66666666 = newTempV128();
				5989	IRTemp x55555555 = newTempV128();
				5990	IRTemp x44444444 = newTempV128();
				5991	IRTemp x33333333 = newTempV128();
				5992	IRTemp x22222222 = newTempV128();
				5993	IRTemp x11111111 = newTempV128();
				5994	IRTemp x00000000 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	5995	assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
				5996	assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
				5997	assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
				5998	assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
				5999	assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
				6000	assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
				6001	assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
				6002	assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6003	IRTemp max76 = newTempV128();
				6004	IRTemp max54 = newTempV128();
				6005	IRTemp max32 = newTempV128();
				6006	IRTemp max10 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	6007	assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
				6008	assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
				6009	assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
				6010	assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6011	IRTemp max7654 = newTempV128();
				6012	IRTemp max3210 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	6013	assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
				6014	assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6015	IRTemp max76543210 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	6016	assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6017	IRTemp res = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	6018	assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
				6019	return res;
				6020	}
				6021	case Iop_Min32Sx4: case Iop_Min32Ux4:
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	6022	case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	6023	IRTemp x3210 = src;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6024	IRTemp x3232 = newTempV128();
				6025	IRTemp x1010 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	6026	assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
				6027	assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6028	IRTemp x3333 = newTempV128();
				6029	IRTemp x2222 = newTempV128();
				6030	IRTemp x1111 = newTempV128();
				6031	IRTemp x0000 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	6032	assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
				6033	assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
				6034	assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
				6035	assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6036	IRTemp max32 = newTempV128();
				6037	IRTemp max10 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	6038	assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
				6039	assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6040	IRTemp max3210 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	6041	assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6042	IRTemp res = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	6043	assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
				6044	return res;
				6045	}
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	6046	case Iop_Add64x2: {
				6047	IRTemp x10 = src;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6048	IRTemp x00 = newTempV128();
				6049	IRTemp x11 = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	6050	assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
				6051	assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6052	IRTemp max10 = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	6053	assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6054	IRTemp res = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	6055	assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
				6056	return res;
				6057	}
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	6058	default:
				6059	vassert(0);
				6060	}
				6061	}
				6062
				6063
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	6064	/* Generate IR for TBL and TBX. This deals with the 128 bit case
				6065	only. */
				6066	static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
				6067	IRTemp oor_values )
				6068	{
				6069	vassert(len >= 0 && len <= 3);
				6070
				6071	/* Generate some useful constants as concisely as possible. */
				6072	IRTemp half15 = newTemp(Ity_I64);
				6073	assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
				6074	IRTemp half16 = newTemp(Ity_I64);
				6075	assign(half16, mkU64(0x1010101010101010ULL));
				6076
				6077	/* A zero vector */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6078	IRTemp allZero = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	6079	assign(allZero, mkV128(0x0000));
				6080	/* A vector containing 15 in each 8-bit lane */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6081	IRTemp all15 = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	6082	assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
				6083	/* A vector containing 16 in each 8-bit lane */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6084	IRTemp all16 = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	6085	assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
				6086	/* A vector containing 32 in each 8-bit lane */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6087	IRTemp all32 = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	6088	assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
				6089	/* A vector containing 48 in each 8-bit lane */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6090	IRTemp all48 = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	6091	assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
				6092	/* A vector containing 64 in each 8-bit lane */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6093	IRTemp all64 = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	6094	assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
				6095
				6096	/* Group the 16/32/48/64 vectors so as to be indexable. */
				6097	IRTemp allXX[4] = { all16, all32, all48, all64 };
				6098
				6099	/* Compute the result for each table vector, with zeroes in places
				6100	where the index values are out of range, and OR them into the
				6101	running vector. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6102	IRTemp running_result = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	6103	assign(running_result, mkV128(0));
				6104
				6105	UInt tabent;
				6106	for (tabent = 0; tabent <= len; tabent++) {
				6107	vassert(tabent >= 0 && tabent < 4);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6108	IRTemp bias = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	6109	assign(bias,
				6110	mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6111	IRTemp biased_indices = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	6112	assign(biased_indices,
				6113	binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6114	IRTemp valid_mask = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	6115	assign(valid_mask,
				6116	binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6117	IRTemp safe_biased_indices = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	6118	assign(safe_biased_indices,
				6119	binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6120	IRTemp results_or_junk = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	6121	assign(results_or_junk,
				6122	binop(Iop_Perm8x16, mkexpr(tab[tabent]),
				6123	mkexpr(safe_biased_indices)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6124	IRTemp results_or_zero = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	6125	assign(results_or_zero,
				6126	binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
				6127	/* And OR that into the running result. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6128	IRTemp tmp = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	6129	assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
				6130	mkexpr(running_result)));
				6131	running_result = tmp;
				6132	}
				6133
				6134	/* So now running_result holds the overall result where the indices
				6135	are in range, and zero in out-of-range lanes. Now we need to
				6136	compute an overall validity mask and use this to copy in the
				6137	lanes in the oor_values for out of range indices. This is
				6138	unnecessary for TBL but will get folded out by iropt, so we lean
				6139	on that and generate the same code for TBL and TBX here. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6140	IRTemp overall_valid_mask = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	6141	assign(overall_valid_mask,
				6142	binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6143	IRTemp result = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	6144	assign(result,
				6145	binop(Iop_OrV128,
				6146	mkexpr(running_result),
				6147	binop(Iop_AndV128,
				6148	mkexpr(oor_values),
				6149	unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
				6150	return result;
				6151	}
				6152
				6153
sewardj	31b5a95	2014-06-26 07:41:14 +0000	[diff] [blame]	6154	/* Let \|argL\| and \|argR\| be V128 values, and let \|opI64x2toV128\| be
				6155	an op which takes two I64s and produces a V128. That is, a widening
				6156	operator. Generate IR which applies \|opI64x2toV128\| to either the
				6157	lower (if \|is2\| is False) or upper (if \|is2\| is True) halves of
				6158	\|argL\| and \|argR\|, and return the value in a new IRTemp.
				6159	*/
				6160	static
				6161	IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
				6162	IRExpr* argL, IRExpr* argR )
				6163	{
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6164	IRTemp res = newTempV128();
sewardj	31b5a95	2014-06-26 07:41:14 +0000	[diff] [blame]	6165	IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
				6166	assign(res, binop(opI64x2toV128, unop(slice, argL),
				6167	unop(slice, argR)));
				6168	return res;
				6169	}
				6170
				6171
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	6172	/* Generate signed/unsigned absolute difference vector IR. */
				6173	static
				6174	IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
				6175	{
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	6176	vassert(size <= 3);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6177	IRTemp argL = newTempV128();
				6178	IRTemp argR = newTempV128();
				6179	IRTemp msk = newTempV128();
				6180	IRTemp res = newTempV128();
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	6181	assign(argL, argLE);
				6182	assign(argR, argRE);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6183	assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	6184	mkexpr(argL), mkexpr(argR)));
				6185	assign(res,
				6186	binop(Iop_OrV128,
				6187	binop(Iop_AndV128,
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6188	binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	6189	mkexpr(msk)),
				6190	binop(Iop_AndV128,
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6191	binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	6192	unop(Iop_NotV128, mkexpr(msk)))));
				6193	return res;
				6194	}
				6195
				6196
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	6197	/* Generate IR that takes a V128 and sign- or zero-widens
				6198	either the lower or upper set of lanes to twice-as-wide,
				6199	resulting in a new V128 value. */
				6200	static
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	6201	IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
				6202	UInt sizeNarrow, IRExpr* srcE )
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	6203	{
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6204	IRTemp src = newTempV128();
				6205	IRTemp res = newTempV128();
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	6206	assign(src, srcE);
				6207	switch (sizeNarrow) {
				6208	case X10:
				6209	assign(res,
				6210	binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
				6211	binop(fromUpperHalf ? Iop_InterleaveHI32x4
				6212	: Iop_InterleaveLO32x4,
				6213	mkexpr(src),
				6214	mkexpr(src)),
				6215	mkU8(32)));
				6216	break;
				6217	case X01:
				6218	assign(res,
				6219	binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
				6220	binop(fromUpperHalf ? Iop_InterleaveHI16x8
				6221	: Iop_InterleaveLO16x8,
				6222	mkexpr(src),
				6223	mkexpr(src)),
				6224	mkU8(16)));
				6225	break;
				6226	case X00:
				6227	assign(res,
				6228	binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
				6229	binop(fromUpperHalf ? Iop_InterleaveHI8x16
				6230	: Iop_InterleaveLO8x16,
				6231	mkexpr(src),
				6232	mkexpr(src)),
				6233	mkU8(8)));
				6234	break;
				6235	default:
				6236	vassert(0);
				6237	}
				6238	return res;
				6239	}
				6240
				6241
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	6242	/* Generate IR that takes a V128 and sign- or zero-widens
				6243	either the even or odd lanes to twice-as-wide,
				6244	resulting in a new V128 value. */
				6245	static
				6246	IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
				6247	UInt sizeNarrow, IRExpr* srcE )
				6248	{
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6249	IRTemp src = newTempV128();
				6250	IRTemp res = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	6251	IROp opSAR = mkVecSARN(sizeNarrow+1);
				6252	IROp opSHR = mkVecSHRN(sizeNarrow+1);
				6253	IROp opSHL = mkVecSHLN(sizeNarrow+1);
				6254	IROp opSxR = zWiden ? opSHR : opSAR;
				6255	UInt amt = 0;
				6256	switch (sizeNarrow) {
				6257	case X10: amt = 32; break;
				6258	case X01: amt = 16; break;
				6259	case X00: amt = 8; break;
				6260	default: vassert(0);
				6261	}
				6262	assign(src, srcE);
				6263	if (fromOdd) {
				6264	assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
				6265	} else {
				6266	assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
				6267	mkU8(amt)));
				6268	}
				6269	return res;
				6270	}
				6271
				6272
				6273	/* Generate IR that takes two V128s and narrows (takes lower half)
				6274	of each lane, producing a single V128 value. */
				6275	static
				6276	IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
				6277	{
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6278	IRTemp res = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	6279	assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
				6280	mkexpr(argHi), mkexpr(argLo)));
				6281	return res;
				6282	}
				6283
				6284
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	6285	/* Return a temp which holds the vector dup of the lane of width
				6286	(1 << size) obtained from src[laneNo]. */
				6287	static
				6288	IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
				6289	{
				6290	vassert(size <= 3);
				6291	/* Normalise \|laneNo\| so it is of the form
				6292	x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
				6293	This puts the bits we want to inspect at constant offsets
				6294	regardless of the value of \|size\|.
				6295	*/
				6296	UInt ix = laneNo << size;
				6297	vassert(ix <= 15);
				6298	IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
				6299	switch (size) {
				6300	case 0: /* B */
				6301	ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
				6302	/* fallthrough */
				6303	case 1: /* H */
				6304	ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
				6305	/* fallthrough */
				6306	case 2: /* S */
				6307	ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
				6308	/* fallthrough */
				6309	case 3: /* D */
				6310	ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
				6311	break;
				6312	default:
				6313	vassert(0);
				6314	}
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6315	IRTemp res = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	6316	assign(res, src);
				6317	Int i;
				6318	for (i = 3; i >= 0; i--) {
				6319	if (ops[i] == Iop_INVALID)
				6320	break;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6321	IRTemp tmp = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	6322	assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
				6323	res = tmp;
				6324	}
				6325	return res;
				6326	}
				6327
				6328
				6329	/* Let \|srcV\| be a V128 value, and let \|imm5\| be a lane-and-size
				6330	selector encoded as shown below. Return a new V128 holding the
				6331	selected lane from \|srcV\| dup'd out to V128, and also return the
				6332	lane number, log2 of the lane size in bytes, and width-character via
				6333	laneNo, laneSzLg2 and *laneCh respectively. It may be that imm5
				6334	is an invalid selector, in which case return
				6335	IRTemp_INVALID, 0, 0 and '?' respectively.
				6336
				6337	imm5 = xxxx1 signifies .b[xxxx]
				6338	= xxx10 .h[xxx]
				6339	= xx100 .s[xx]
				6340	= x1000 .d[x]
				6341	otherwise invalid
				6342	*/
				6343	static
				6344	IRTemp handle_DUP_VEC_ELEM ( /OUT/UInt* laneNo,
				6345	/OUT/UInt* laneSzLg2, /OUT/HChar* laneCh,
				6346	IRExpr* srcV, UInt imm5 )
				6347	{
				6348	*laneNo = 0;
				6349	*laneSzLg2 = 0;
				6350	*laneCh = '?';
				6351
				6352	if (imm5 & 1) {
				6353	*laneNo = (imm5 >> 1) & 15;
				6354	*laneSzLg2 = 0;
				6355	*laneCh = 'b';
				6356	}
				6357	else if (imm5 & 2) {
				6358	*laneNo = (imm5 >> 2) & 7;
				6359	*laneSzLg2 = 1;
				6360	*laneCh = 'h';
				6361	}
				6362	else if (imm5 & 4) {
				6363	*laneNo = (imm5 >> 3) & 3;
				6364	*laneSzLg2 = 2;
				6365	*laneCh = 's';
				6366	}
				6367	else if (imm5 & 8) {
				6368	*laneNo = (imm5 >> 4) & 1;
				6369	*laneSzLg2 = 3;
				6370	*laneCh = 'd';
				6371	}
				6372	else {
				6373	/* invalid */
				6374	return IRTemp_INVALID;
				6375	}
				6376
				6377	return math_DUP_VEC_ELEM(srcV, laneSzLg2, laneNo);
				6378	}
				6379
				6380
				6381	/* Clone \|imm\| to every lane of a V128, with lane size log2 of \|size\|. */
				6382	static
				6383	IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
				6384	{
				6385	IRType ty = Ity_INVALID;
				6386	IRTemp rcS = IRTemp_INVALID;
				6387	switch (size) {
				6388	case X01:
				6389	vassert(imm <= 0xFFFFULL);
				6390	ty = Ity_I16;
				6391	rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
				6392	break;
				6393	case X10:
				6394	vassert(imm <= 0xFFFFFFFFULL);
				6395	ty = Ity_I32;
				6396	rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
				6397	break;
				6398	case X11:
				6399	ty = Ity_I64;
				6400	rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
				6401	default:
				6402	vassert(0);
				6403	}
				6404	IRTemp rcV = math_DUP_TO_V128(rcS, ty);
				6405	return rcV;
				6406	}
				6407
				6408
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	6409	/* Let \|new64\| be a V128 in which only the lower 64 bits are interesting,
				6410	and the upper can contain any value -- it is ignored. If \|is2\| is False,
				6411	generate IR to put \|new64\| in the lower half of vector reg \|dd\| and zero
				6412	the upper half. If \|is2\| is True, generate IR to put \|new64\| in the upper
				6413	half of vector reg \|dd\| and leave the lower half unchanged. This
				6414	simulates the behaviour of the "foo/foo2" instructions in which the
				6415	destination is half the width of sources, for example addhn/addhn2.
				6416	*/
				6417	static
				6418	void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
				6419	{
				6420	if (is2) {
				6421	/* Get the old contents of Vdd, zero the upper half, and replace
				6422	it with 'x'. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6423	IRTemp t_zero_oldLO = newTempV128();
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	6424	assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6425	IRTemp t_newHI_zero = newTempV128();
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	6426	assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
				6427	mkV128(0x0000)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6428	IRTemp res = newTempV128();
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	6429	assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
				6430	mkexpr(t_newHI_zero)));
				6431	putQReg128(dd, mkexpr(res));
				6432	} else {
				6433	/* This is simple. */
				6434	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
				6435	}
				6436	}
				6437
				6438
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6439	/* Compute vector SQABS at lane size \|size\| for \|srcE\|, returning
				6440	the q result in \|qabs\| and the normal result in \|nabs\|. */
				6441	static
				6442	void math_SQABS ( /OUT/IRTemp* qabs, /OUT/IRTemp* nabs,
				6443	IRExpr* srcE, UInt size )
				6444	{
				6445	IRTemp src, mask, maskn, nsub, qsub;
				6446	src = mask = maskn = nsub = qsub = IRTemp_INVALID;
				6447	newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
				6448	assign(src, srcE);
				6449	assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src)));
				6450	assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
				6451	assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
				6452	assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
				6453	assign(*nabs, binop(Iop_OrV128,
				6454	binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
				6455	binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
				6456	assign(*qabs, binop(Iop_OrV128,
				6457	binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
				6458	binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
				6459	}
				6460
				6461
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	6462	/* Compute vector SQNEG at lane size \|size\| for \|srcE\|, returning
				6463	the q result in \|qneg\| and the normal result in \|nneg\|. */
				6464	static
				6465	void math_SQNEG ( /OUT/IRTemp* qneg, /OUT/IRTemp* nneg,
				6466	IRExpr* srcE, UInt size )
				6467	{
				6468	IRTemp src = IRTemp_INVALID;
				6469	newTempsV128_3(&src, nneg, qneg);
				6470	assign(src, srcE);
				6471	assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
				6472	assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
				6473	}
				6474
				6475
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	6476	/* Zero all except the least significant lane of \|srcE\|, where \|size\|
				6477	indicates the lane size in the usual way. */
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	6478	static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6479	{
				6480	vassert(size < 4);
				6481	IRTemp t = newTempV128();
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	6482	assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6483	return t;
				6484	}
				6485
				6486
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	6487	/* Generate IR to compute vector widening MULL from either the lower
				6488	(is2==False) or upper (is2==True) halves of vecN and vecM. The
				6489	widening multiplies are unsigned when isU==True and signed when
				6490	isU==False. \|size\| is the narrow lane size indication. Optionally,
				6491	the product may be added to or subtracted from vecD, at the wide lane
				6492	size. This happens when \|mas\| is 'a' (add) or 's' (sub). When \|mas\|
				6493	is 'm' (only multiply) then the accumulate part does not happen, and
				6494	\|vecD\| is expected to == IRTemp_INVALID.
				6495
				6496	Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
				6497	are allowed. The result is returned in a new IRTemp, which is
				6498	returned in res. /
				6499	static
				6500	void math_MULL_ACC ( /OUT/IRTemp* res,
				6501	Bool is2, Bool isU, UInt size, HChar mas,
				6502	IRTemp vecN, IRTemp vecM, IRTemp vecD )
				6503	{
				6504	vassert(res && *res == IRTemp_INVALID);
				6505	vassert(size <= 2);
				6506	vassert(mas == 'm' \|\| mas == 'a' \|\| mas == 's');
				6507	if (mas == 'm') vassert(vecD == IRTemp_INVALID);
				6508	IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
				6509	IROp accOp = (mas == 'a') ? mkVecADD(size+1)
				6510	: (mas == 's' ? mkVecSUB(size+1)
				6511	: Iop_INVALID);
				6512	IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp,
				6513	mkexpr(vecN), mkexpr(vecM));
				6514	*res = newTempV128();
				6515	assign(*res, mas == 'm' ? mkexpr(mul)
				6516	: binop(accOp, mkexpr(vecD), mkexpr(mul)));
				6517	}
				6518
				6519
				6520	/* Same as math_MULL_ACC, except the multiply is signed widening,
				6521	the multiplied value is then doubled, before being added to or
				6522	subtracted from the accumulated value. And everything is
				6523	saturated. In all cases, saturation residuals are returned
				6524	via (sat1q, sat1n), and in the accumulate cases,
				6525	via (sat2q, sat2n) too. All results are returned in new temporaries.
				6526	In the no-accumulate case, sat2q and sat2n are never instantiated,
				6527	so the caller can tell this has happened. */
				6528	static
				6529	void math_SQDMULL_ACC ( /OUT/IRTemp* res,
				6530	/OUT/IRTemp* sat1q, /OUT/IRTemp* sat1n,
				6531	/OUT/IRTemp* sat2q, /OUT/IRTemp* sat2n,
				6532	Bool is2, UInt size, HChar mas,
				6533	IRTemp vecN, IRTemp vecM, IRTemp vecD )
				6534	{
				6535	vassert(size <= 2);
				6536	vassert(mas == 'm' \|\| mas == 'a' \|\| mas == 's');
				6537	/* Compute
				6538	sat1q = vecN.D[is2] sq vecM.d[is2] q 2
				6539	sat1n = vecN.D[is2] s vecM.d[is2] 2
				6540	IOW take either the low or high halves of vecN and vecM, signed widen,
				6541	multiply, double that, and signedly saturate. Also compute the same
				6542	but without saturation.
				6543	*/
				6544	vassert(sat2q && *sat2q == IRTemp_INVALID);
				6545	vassert(sat2n && *sat2n == IRTemp_INVALID);
				6546	newTempsV128_3(sat1q, sat1n, res);
				6547	IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
				6548	mkexpr(vecN), mkexpr(vecM));
				6549	IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
				6550	mkexpr(vecN), mkexpr(vecM));
				6551	assign(*sat1q, mkexpr(tq));
				6552	assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
				6553
				6554	/* If there is no accumulation, the final result is sat1q,
				6555	and there's no assignment to sat2q or sat2n. */
				6556	if (mas == 'm') {
				6557	assign(res, mkexpr(sat1q));
				6558	return;
				6559	}
				6560
				6561	/* Compute
				6562	sat2q = vecD +sq/-sq sat1q
				6563	sat2n = vecD +/- sat1n
				6564	result = sat2q
				6565	*/
				6566	newTempsV128_2(sat2q, sat2n);
				6567	assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
				6568	mkexpr(vecD), mkexpr(*sat1q)));
				6569	assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
				6570	mkexpr(vecD), mkexpr(*sat1n)));
				6571	assign(res, mkexpr(sat2q));
				6572	}
				6573
				6574
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	6575	/* Generate IR for widening signed vector multiplies. The operands
				6576	have their lane width signedly widened, and they are then multiplied
				6577	at the wider width, returning results in two new IRTemps. */
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	6578	static
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	6579	void math_MULLS ( /OUT/IRTemp* resHI, /OUT/IRTemp* resLO,
				6580	UInt sizeNarrow, IRTemp argL, IRTemp argR )
				6581	{
				6582	vassert(sizeNarrow <= 2);
				6583	newTempsV128_2(resHI, resLO);
				6584	IRTemp argLhi = newTemp(Ity_I64);
				6585	IRTemp argLlo = newTemp(Ity_I64);
				6586	IRTemp argRhi = newTemp(Ity_I64);
				6587	IRTemp argRlo = newTemp(Ity_I64);
				6588	assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
				6589	assign(argLlo, unop(Iop_V128to64, mkexpr(argL)));
				6590	assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
				6591	assign(argRlo, unop(Iop_V128to64, mkexpr(argR)));
				6592	IROp opMulls = mkVecMULLS(sizeNarrow);
				6593	assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
				6594	assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
				6595	}
				6596
				6597
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	6598	/* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
				6599	double that, possibly add a rounding constant (R variants), and take
				6600	the high half. */
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	6601	static
				6602	void math_SQDMULH ( /OUT/IRTemp* res,
				6603	/OUT/IRTemp* sat1q, /OUT/IRTemp* sat1n,
				6604	Bool isR, UInt size, IRTemp vN, IRTemp vM )
				6605	{
				6606	vassert(size == X01 \|\| size == X10); /* s or h only */
				6607
				6608	newTempsV128_3(res, sat1q, sat1n);
				6609
				6610	IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
				6611	math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
				6612
				6613	IRTemp addWide = mkVecADD(size+1);
				6614
				6615	if (isR) {
				6616	assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
				6617
				6618	Int rcShift = size == X01 ? 15 : 31;
				6619	IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
				6620	assign(*sat1n,
				6621	binop(mkVecCATODDLANES(size),
				6622	binop(addWide,
				6623	binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
				6624	mkexpr(roundConst)),
				6625	binop(addWide,
				6626	binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
				6627	mkexpr(roundConst))));
				6628	} else {
				6629	assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
				6630
				6631	assign(*sat1n,
				6632	binop(mkVecCATODDLANES(size),
				6633	binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
				6634	binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
				6635	}
				6636
				6637	assign(res, mkexpr(sat1q));
				6638	}
				6639
				6640
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	6641	/* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
				6642	a new temp in *res, and the Q difference pair in new temps in
				6643	qDiff1 and qDiff2 respectively. \|nm\| denotes which of the
				6644	three operations it is. */
				6645	static
				6646	void math_QSHL_IMM ( /OUT/IRTemp* res,
				6647	/OUT/IRTemp* qDiff1, /OUT/IRTemp* qDiff2,
				6648	IRTemp src, UInt size, UInt shift, const HChar* nm )
				6649	{
				6650	vassert(size <= 3);
				6651	UInt laneBits = 8 << size;
				6652	vassert(shift < laneBits);
				6653	newTempsV128_3(res, qDiff1, qDiff2);
				6654	IRTemp z128 = newTempV128();
				6655	assign(z128, mkV128(0x0000));
				6656
				6657	/* UQSHL */
				6658	if (vex_streq(nm, "uqshl")) {
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	6659	IROp qop = mkVecQSHLNSATUU(size);
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	6660	assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
				6661	if (shift == 0) {
				6662	/* No shift means no saturation. */
				6663	assign(*qDiff1, mkexpr(z128));
				6664	assign(*qDiff2, mkexpr(z128));
				6665	} else {
				6666	/* Saturation has occurred if any of the shifted-out bits are
				6667	nonzero. We get the shifted-out bits by right-shifting the
				6668	original value. */
				6669	UInt rshift = laneBits - shift;
				6670	vassert(rshift >= 1 && rshift < laneBits);
				6671	assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
				6672	assign(*qDiff2, mkexpr(z128));
				6673	}
				6674	return;
				6675	}
				6676
				6677	/* SQSHL */
				6678	if (vex_streq(nm, "sqshl")) {
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	6679	IROp qop = mkVecQSHLNSATSS(size);
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	6680	assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
				6681	if (shift == 0) {
				6682	/* No shift means no saturation. */
				6683	assign(*qDiff1, mkexpr(z128));
				6684	assign(*qDiff2, mkexpr(z128));
				6685	} else {
				6686	/* Saturation has occurred if any of the shifted-out bits are
				6687	different from the top bit of the original value. */
				6688	UInt rshift = laneBits - 1 - shift;
				6689	vassert(rshift >= 0 && rshift < laneBits-1);
				6690	/* qDiff1 is the shifted out bits, and the top bit of the original
				6691	value, preceded by zeroes. */
				6692	assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
				6693	/* qDiff2 is the top bit of the original value, cloned the
				6694	correct number of times. */
				6695	assign(*qDiff2, binop(mkVecSHRN(size),
				6696	binop(mkVecSARN(size), mkexpr(src),
				6697	mkU8(laneBits-1)),
				6698	mkU8(rshift)));
				6699	/* This also succeeds in comparing the top bit of the original
				6700	value to itself, which is a bit stupid, but not wrong. */
				6701	}
				6702	return;
				6703	}
				6704
				6705	/* SQSHLU */
				6706	if (vex_streq(nm, "sqshlu")) {
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	6707	IROp qop = mkVecQSHLNSATSU(size);
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	6708	assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
sewardj	acc2964	2014-08-15 05:35:35 +0000	[diff] [blame]	6709	if (shift == 0) {
				6710	/* If there's no shift, saturation depends on the top bit
				6711	of the source. */
				6712	assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
				6713	assign(*qDiff2, mkexpr(z128));
				6714	} else {
				6715	/* Saturation has occurred if any of the shifted-out bits are
				6716	nonzero. We get the shifted-out bits by right-shifting the
				6717	original value. */
				6718	UInt rshift = laneBits - shift;
				6719	vassert(rshift >= 1 && rshift < laneBits);
				6720	assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
				6721	assign(*qDiff2, mkexpr(z128));
				6722	}
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	6723	return;
				6724	}
				6725
				6726	vassert(0);
				6727	}
				6728
				6729
sewardj	62ece66	2014-08-17 19:59:09 +0000	[diff] [blame]	6730	/* Generate IR to do SRHADD and URHADD. */
				6731	static
				6732	IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
				6733	{
				6734	/* Generate this:
				6735	(A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
				6736	*/
				6737	vassert(size <= 3);
				6738	IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
				6739	IROp opADD = mkVecADD(size);
				6740	/* The only tricky bit is to generate the correct vector 1 constant. */
				6741	const ULong ones64[4]
				6742	= { 0x0101010101010101ULL, 0x0001000100010001ULL,
				6743	0x0000000100000001ULL, 0x0000000000000001ULL };
				6744	IRTemp imm64 = newTemp(Ity_I64);
				6745	assign(imm64, mkU64(ones64[size]));
				6746	IRTemp vecOne = newTempV128();
				6747	assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
				6748	IRTemp scaOne = newTemp(Ity_I8);
				6749	assign(scaOne, mkU8(1));
				6750	IRTemp res = newTempV128();
				6751	assign(res,
				6752	binop(opADD,
				6753	binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
				6754	binop(opADD,
				6755	binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
				6756	binop(opSHR,
				6757	binop(opADD,
				6758	binop(opADD,
				6759	binop(Iop_AndV128, mkexpr(aa),
				6760	mkexpr(vecOne)),
				6761	binop(Iop_AndV128, mkexpr(bb),
				6762	mkexpr(vecOne))
				6763	),
				6764	mkexpr(vecOne)
				6765	),
				6766	mkexpr(scaOne)
				6767	)
				6768	)
				6769	)
				6770	);
				6771	return res;
				6772	}
				6773
				6774
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	6775	/* QCFLAG tracks the SIMD sticky saturation status. Update the status
				6776	thusly: if, after application of \|opZHI\| to both \|qres\| and \|nres\|,
				6777	they have the same value, leave QCFLAG unchanged. Otherwise, set it
				6778	(implicitly) to 1. \|opZHI\| may only be one of the Iop_ZeroHIxxofV128
				6779	operators, or Iop_INVALID, in which case \|qres\| and \|nres\| are used
				6780	unmodified. The presence \|opZHI\| means this function can be used to
				6781	generate QCFLAG update code for both scalar and vector SIMD operations.
				6782	*/
				6783	static
				6784	void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	6785	{
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6786	IRTemp diff = newTempV128();
				6787	IRTemp oldQCFLAG = newTempV128();
				6788	IRTemp newQCFLAG = newTempV128();
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	6789	if (opZHI == Iop_INVALID) {
				6790	assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
				6791	} else {
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	6792	vassert(opZHI == Iop_ZeroHI64ofV128
				6793	\|\| opZHI == Iop_ZeroHI96ofV128 \|\| opZHI == Iop_ZeroHI112ofV128);
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	6794	assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
				6795	}
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	6796	assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
				6797	assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
				6798	stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
				6799	}
				6800
				6801
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	6802	/* A variant of updateQCFLAGwithDifferenceZHI in which \|qres\| and \|nres\|
				6803	are used unmodified, hence suitable for QCFLAG updates for whole-vector
				6804	operations. */
				6805	static
				6806	void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
				6807	{
				6808	updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
				6809	}
				6810
				6811
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6812	/------------------------------------------------------------/
				6813	/--- SIMD and FP instructions ---/
				6814	/------------------------------------------------------------/
				6815
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	6816	static
				6817	Bool dis_AdvSIMD_EXT(/MB_OUT/DisResult* dres, UInt insn)
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6818	{
sewardj	ab33a7a	2014-06-19 22:20:47 +0000	[diff] [blame]	6819	/* 31 29 23 21 20 15 14 10 9 4
				6820	0 q 101110 op2 0 m 0 imm4 0 n d
				6821	Decode fields: op2
				6822	*/
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6823	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	ab33a7a	2014-06-19 22:20:47 +0000	[diff] [blame]	6824	if (INSN(31,31) != 0
				6825	\|\| INSN(29,24) != BITS6(1,0,1,1,1,0)
				6826	\|\| INSN(21,21) != 0 \|\| INSN(15,15) != 0 \|\| INSN(10,10) != 0) {
				6827	return False;
				6828	}
				6829	UInt bitQ = INSN(30,30);
				6830	UInt op2 = INSN(23,22);
				6831	UInt mm = INSN(20,16);
				6832	UInt imm4 = INSN(14,11);
				6833	UInt nn = INSN(9,5);
				6834	UInt dd = INSN(4,0);
				6835
				6836	if (op2 == BITS2(0,0)) {
				6837	/* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6838	IRTemp sHi = newTempV128();
				6839	IRTemp sLo = newTempV128();
				6840	IRTemp res = newTempV128();
sewardj	ab33a7a	2014-06-19 22:20:47 +0000	[diff] [blame]	6841	assign(sHi, getQReg128(mm));
				6842	assign(sLo, getQReg128(nn));
				6843	if (bitQ == 1) {
				6844	if (imm4 == 0) {
				6845	assign(res, mkexpr(sLo));
				6846	} else {
				6847	vassert(imm4 <= 15);
				6848	assign(res,
				6849	binop(Iop_OrV128,
				6850	binop(Iop_ShlV128, mkexpr(sHi), mkU8(8 * (16-imm4))),
				6851	binop(Iop_ShrV128, mkexpr(sLo), mkU8(8 * imm4))));
				6852	}
				6853	putQReg128(dd, mkexpr(res));
				6854	DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
				6855	} else {
				6856	if (imm4 >= 8) return False;
				6857	if (imm4 == 0) {
				6858	assign(res, mkexpr(sLo));
				6859	} else {
				6860	assign(res,
				6861	binop(Iop_ShrV128,
				6862	binop(Iop_InterleaveLO64x2, mkexpr(sHi), mkexpr(sLo)),
				6863	mkU8(8 * imm4)));
				6864	}
				6865	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				6866	DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
				6867	}
				6868	return True;
				6869	}
				6870
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	6871	return False;
				6872	# undef INSN
				6873	}
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6874
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6875
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	6876	static
				6877	Bool dis_AdvSIMD_TBL_TBX(/MB_OUT/DisResult* dres, UInt insn)
				6878	{
				6879	/* 31 29 23 21 20 15 14 12 11 9 4
				6880	0 q 001110 op2 0 m 0 len op 00 n d
				6881	Decode fields: op2,len,op
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6882	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	6883	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				6884	if (INSN(31,31) != 0
				6885	\|\| INSN(29,24) != BITS6(0,0,1,1,1,0)
				6886	\|\| INSN(21,21) != 0
				6887	\|\| INSN(15,15) != 0
				6888	\|\| INSN(11,10) != BITS2(0,0)) {
				6889	return False;
				6890	}
				6891	UInt bitQ = INSN(30,30);
				6892	UInt op2 = INSN(23,22);
				6893	UInt mm = INSN(20,16);
				6894	UInt len = INSN(14,13);
				6895	UInt bitOP = INSN(12,12);
				6896	UInt nn = INSN(9,5);
				6897	UInt dd = INSN(4,0);
				6898
				6899	if (op2 == X00) {
				6900	/* -------- 00,xx,0 TBL, xx register table -------- */
				6901	/* -------- 00,xx,1 TBX, xx register table -------- */
				6902	/* 31 28 20 15 14 12 9 4
				6903	0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
				6904	0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
				6905	where Ta = 16b(q=1) or 8b(q=0)
				6906	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	6907	Bool isTBX = bitOP == 1;
				6908	/* The out-of-range values to use. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6909	IRTemp oor_values = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	6910	assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
				6911	/* src value */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6912	IRTemp src = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	6913	assign(src, getQReg128(mm));
				6914	/* The table values */
				6915	IRTemp tab[4];
				6916	UInt i;
				6917	for (i = 0; i <= len; i++) {
				6918	vassert(i < 4);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6919	tab[i] = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	6920	assign(tab[i], getQReg128((nn + i) % 32));
				6921	}
				6922	IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	6923	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				6924	const HChar* Ta = bitQ ==1 ? "16b" : "8b";
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	6925	const HChar* nm = isTBX ? "tbx" : "tbl";
				6926	DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
				6927	nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
				6928	return True;
				6929	}
				6930
				6931	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				6932	return False;
				6933	# undef INSN
				6934	}
				6935
				6936
				6937	static
				6938	Bool dis_AdvSIMD_ZIP_UZP_TRN(/MB_OUT/DisResult* dres, UInt insn)
				6939	{
sewardj	fc261d9	2014-08-24 20:36:14 +0000	[diff] [blame^]	6940	/* 31 29 23 21 20 15 14 11 9 4
				6941	0 q 001110 size 0 m 0 opcode 10 n d
				6942	Decode fields: opcode
				6943	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	6944	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	fc261d9	2014-08-24 20:36:14 +0000	[diff] [blame^]	6945	if (INSN(31,31) != 0
				6946	\|\| INSN(29,24) != BITS6(0,0,1,1,1,0)
				6947	\|\| INSN(21,21) != 0 \|\| INSN(15,15) != 0 \|\| INSN(11,10) != BITS2(1,0)) {
				6948	return False;
				6949	}
				6950	UInt bitQ = INSN(30,30);
				6951	UInt size = INSN(23,22);
				6952	UInt mm = INSN(20,16);
				6953	UInt opcode = INSN(14,12);
				6954	UInt nn = INSN(9,5);
				6955	UInt dd = INSN(4,0);
				6956
				6957	if (opcode == BITS3(0,0,1) \|\| opcode == BITS3(1,0,1)) {
				6958	/* -------- 001 UZP1 std7_std7_std7 -------- */
				6959	/* -------- 101 UZP2 std7_std7_std7 -------- */
				6960	if (bitQ == 0 && size == X11) return False; // implied 1d case
				6961	Bool isUZP1 = opcode == BITS3(0,0,1);
				6962	IROp op = isUZP1 ? mkVecCATEVENLANES(size)
				6963	: mkVecCATODDLANES(size);
				6964	IRTemp preL = newTempV128();
				6965	IRTemp preR = newTempV128();
				6966	IRTemp res = newTempV128();
				6967	if (bitQ == 0) {
				6968	assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
				6969	getQReg128(nn)));
				6970	assign(preR, mkexpr(preL));
				6971	} else {
				6972	assign(preL, getQReg128(mm));
				6973	assign(preR, getQReg128(nn));
				6974	}
				6975	assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
				6976	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				6977	const HChar* nm = isUZP1 ? "uzp1" : "uzp2";
				6978	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				6979	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				6980	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				6981	return True;
				6982	}
				6983
				6984	if (opcode == BITS3(0,1,0) \|\| opcode == BITS3(1,1,0)) {
				6985	/* -------- 010 TRN1 std7_std7_std7 -------- */
				6986	/* -------- 110 TRN2 std7_std7_std7 -------- */
				6987	if (bitQ == 0 && size == X11) return False; // implied 1d case
				6988	Bool isTRN1 = opcode == BITS3(0,1,0);
				6989	IROp op1 = isTRN1 ? mkVecCATEVENLANES(size)
				6990	: mkVecCATODDLANES(size);
				6991	IROp op2 = mkVecINTERLEAVEHI(size);
				6992	IRTemp srcM = newTempV128();
				6993	IRTemp srcN = newTempV128();
				6994	IRTemp res = newTempV128();
				6995	assign(srcM, getQReg128(mm));
				6996	assign(srcN, getQReg128(nn));
				6997	assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
				6998	binop(op1, mkexpr(srcN), mkexpr(srcN))));
				6999	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				7000	const HChar* nm = isTRN1 ? "trn1" : "trn2";
				7001	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				7002	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				7003	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				7004	return True;
				7005	}
				7006
				7007	if (opcode == BITS3(0,1,1) \|\| opcode == BITS3(1,1,1)) {
				7008	/* -------- 011 ZIP1 std7_std7_std7 -------- */
				7009	/* -------- 111 ZIP2 std7_std7_std7 -------- */
				7010	if (bitQ == 0 && size == X11) return False; // implied 1d case
				7011	Bool isZIP1 = opcode == BITS3(0,1,1);
				7012	IROp op = isZIP1 ? mkVecINTERLEAVELO(size)
				7013	: mkVecINTERLEAVEHI(size);
				7014	IRTemp preL = newTempV128();
				7015	IRTemp preR = newTempV128();
				7016	IRTemp res = newTempV128();
				7017	if (bitQ == 0 && !isZIP1) {
				7018	assign(preL, binop(Iop_ShlV128, getQReg128(mm), mkU8(32)));
				7019	assign(preR, binop(Iop_ShlV128, getQReg128(nn), mkU8(32)));
				7020	} else {
				7021	assign(preL, getQReg128(mm));
				7022	assign(preR, getQReg128(nn));
				7023	}
				7024	assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
				7025	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				7026	const HChar* nm = isZIP1 ? "zip1" : "zip2";
				7027	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				7028	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				7029	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				7030	return True;
				7031	}
				7032
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7033	return False;
				7034	# undef INSN
				7035	}
				7036
				7037
				7038	static
				7039	Bool dis_AdvSIMD_across_lanes(/MB_OUT/DisResult* dres, UInt insn)
				7040	{
				7041	/* 31 28 23 21 16 11 9 4
				7042	0 q u 01110 size 11000 opcode 10 n d
				7043	Decode fields: u,size,opcode
				7044	*/
				7045	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				7046	if (INSN(31,31) != 0
				7047	\|\| INSN(28,24) != BITS5(0,1,1,1,0)
				7048	\|\| INSN(21,17) != BITS5(1,1,0,0,0) \|\| INSN(11,10) != BITS2(1,0)) {
				7049	return False;
				7050	}
				7051	UInt bitQ = INSN(30,30);
				7052	UInt bitU = INSN(29,29);
				7053	UInt size = INSN(23,22);
				7054	UInt opcode = INSN(16,12);
				7055	UInt nn = INSN(9,5);
				7056	UInt dd = INSN(4,0);
				7057
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	7058	if (opcode == BITS5(0,0,0,1,1)) {
				7059	/* -------- 0,xx,00011 SADDLV -------- */
				7060	/* -------- 1,xx,00011 UADDLV -------- */
				7061	/* size is the narrow size */
				7062	if (size == X11 \|\| (size == X10 && bitQ == 0)) return False;
				7063	Bool isU = bitU == 1;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7064	IRTemp src = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	7065	assign(src, getQReg128(nn));
				7066	/* The basic plan is to widen the lower half, and if Q = 1,
				7067	the upper half too. Add them together (if Q = 1), and in
				7068	either case fold with add at twice the lane width.
				7069	*/
				7070	IRExpr* widened
				7071	= mkexpr(math_WIDEN_LO_OR_HI_LANES(
				7072	isU, False/!fromUpperHalf/, size, mkexpr(src)));
				7073	if (bitQ == 1) {
				7074	widened
				7075	= binop(mkVecADD(size+1),
				7076	widened,
				7077	mkexpr(math_WIDEN_LO_OR_HI_LANES(
				7078	isU, True/fromUpperHalf/, size, mkexpr(src)))
				7079	);
				7080	}
				7081	/* Now fold. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7082	IRTemp tWi = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	7083	assign(tWi, widened);
				7084	IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
				7085	putQReg128(dd, mkexpr(res));
				7086	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				7087	const HChar ch = "bhsd"[size];
				7088	DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
				7089	nameQReg128(dd), ch, nameQReg128(nn), arr);
				7090	return True;
				7091	}
				7092
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	7093	UInt ix = 0;
				7094	/**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
				7095	else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
				7096	else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
				7097	/**/
				7098	if (ix != 0) {
				7099	/* -------- 0,xx,01010: SMAXV -------- (1) */
				7100	/* -------- 1,xx,01010: UMAXV -------- (2) */
				7101	/* -------- 0,xx,11010: SMINV -------- (3) */
				7102	/* -------- 1,xx,11010: UMINV -------- (4) */
				7103	/* -------- 0,xx,11011: ADDV -------- (5) */
				7104	vassert(ix >= 1 && ix <= 5);
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7105	if (size == X11) return False; // 1d,2d cases not allowed
				7106	if (size == X10 && bitQ == 0) return False; // 2s case not allowed
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7107	const IROp opMAXS[3]
				7108	= { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
				7109	const IROp opMAXU[3]
				7110	= { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	7111	const IROp opMINS[3]
				7112	= { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
				7113	const IROp opMINU[3]
				7114	= { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
				7115	const IROp opADD[3]
				7116	= { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 };
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7117	vassert(size < 3);
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	7118	IROp op = Iop_INVALID;
				7119	const HChar* nm = NULL;
				7120	switch (ix) {
				7121	case 1: op = opMAXS[size]; nm = "smaxv"; break;
				7122	case 2: op = opMAXU[size]; nm = "umaxv"; break;
				7123	case 3: op = opMINS[size]; nm = "sminv"; break;
				7124	case 4: op = opMINU[size]; nm = "uminv"; break;
				7125	case 5: op = opADD[size]; nm = "addv"; break;
				7126	default: vassert(0);
				7127	}
				7128	vassert(op != Iop_INVALID && nm != NULL);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7129	IRTemp tN1 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7130	assign(tN1, getQReg128(nn));
				7131	/* If Q == 0, we're just folding lanes in the lower half of
				7132	the value. In which case, copy the lower half of the
				7133	source into the upper half, so we can then treat it the
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	7134	same as the full width case. Except for the addition case,
				7135	in which we have to zero out the upper half. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7136	IRTemp tN2 = newTempV128();
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	7137	assign(tN2, bitQ == 0
				7138	? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
				7139	: mk_CatEvenLanes64x2(tN1,tN1))
				7140	: mkexpr(tN1));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	7141	IRTemp res = math_FOLDV(tN2, op);
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7142	if (res == IRTemp_INVALID)
				7143	return False; /* means math_MINMAXV
				7144	doesn't handle this case yet */
				7145	putQReg128(dd, mkexpr(res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7146	const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
				7147	IRType laneTy = tys[size];
				7148	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				7149	DIP("%s %s, %s.%s\n", nm,
				7150	nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
				7151	return True;
				7152	}
				7153
				7154	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				7155	return False;
				7156	# undef INSN
				7157	}
				7158
				7159
				7160	static
				7161	Bool dis_AdvSIMD_copy(/MB_OUT/DisResult* dres, UInt insn)
				7162	{
				7163	/* 31 28 20 15 14 10 9 4
				7164	0 q op 01110000 imm5 0 imm4 1 n d
				7165	Decode fields: q,op,imm4
				7166	*/
				7167	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				7168	if (INSN(31,31) != 0
				7169	\|\| INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
				7170	\|\| INSN(15,15) != 0 \|\| INSN(10,10) != 1) {
				7171	return False;
				7172	}
				7173	UInt bitQ = INSN(30,30);
				7174	UInt bitOP = INSN(29,29);
				7175	UInt imm5 = INSN(20,16);
				7176	UInt imm4 = INSN(14,11);
				7177	UInt nn = INSN(9,5);
				7178	UInt dd = INSN(4,0);
				7179
				7180	/* -------- x,0,0000: DUP (element, vector) -------- */
				7181	/* 31 28 20 15 9 4
				7182	0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
				7183	*/
				7184	if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	7185	UInt laneNo = 0;
				7186	UInt laneSzLg2 = 0;
				7187	HChar laneCh = '?';
				7188	IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
				7189	getQReg128(nn), imm5);
				7190	if (res == IRTemp_INVALID)
				7191	return False;
				7192	if (bitQ == 0 && laneSzLg2 == X11)
				7193	return False; /* .1d case */
				7194	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				7195	const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
				7196	DIP("dup %s.%s, %s.%c[%u]\n",
				7197	nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
				7198	return True;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7199	}
				7200
				7201	/* -------- x,0,0001: DUP (general, vector) -------- */
				7202	/* 31 28 20 15 9 4
				7203	0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
				7204	Q=0 writes 64, Q=1 writes 128
				7205	imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
				7206	xxx10 4H(q=0) or 8H(q=1), R=W
				7207	xx100 2S(q=0) or 4S(q=1), R=W
				7208	x1000 Invalid(q=0) or 2D(q=1), R=X
				7209	x0000 Invalid(q=0) or Invalid(q=1)
				7210	Require op=0, imm4=0001
				7211	*/
				7212	if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
				7213	Bool isQ = bitQ == 1;
				7214	IRTemp w0 = newTemp(Ity_I64);
				7215	const HChar* arT = "??";
				7216	IRType laneTy = Ity_INVALID;
				7217	if (imm5 & 1) {
				7218	arT = isQ ? "16b" : "8b";
				7219	laneTy = Ity_I8;
				7220	assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
				7221	}
				7222	else if (imm5 & 2) {
				7223	arT = isQ ? "8h" : "4h";
				7224	laneTy = Ity_I16;
				7225	assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
				7226	}
				7227	else if (imm5 & 4) {
				7228	arT = isQ ? "4s" : "2s";
				7229	laneTy = Ity_I32;
				7230	assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
				7231	}
				7232	else if ((imm5 & 8) && isQ) {
				7233	arT = "2d";
				7234	laneTy = Ity_I64;
				7235	assign(w0, getIReg64orZR(nn));
				7236	}
				7237	else {
				7238	/* invalid; leave laneTy unchanged. */
				7239	}
				7240	/* */
				7241	if (laneTy != Ity_INVALID) {
				7242	IRTemp w1 = math_DUP_TO_64(w0, laneTy);
				7243	putQReg128(dd, binop(Iop_64HLtoV128,
				7244	isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
				7245	DIP("dup %s.%s, %s\n",
				7246	nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
				7247	return True;
				7248	}
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	7249	/* invalid */
				7250	return False;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7251	}
				7252
				7253	/* -------- 1,0,0011: INS (general) -------- */
				7254	/* 31 28 20 15 9 4
				7255	010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
				7256	where Ts,ix = case imm5 of xxxx1 -> B, xxxx
				7257	xxx10 -> H, xxx
				7258	xx100 -> S, xx
				7259	x1000 -> D, x
				7260	*/
				7261	if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
				7262	HChar ts = '?';
				7263	UInt laneNo = 16;
				7264	IRExpr* src = NULL;
				7265	if (imm5 & 1) {
				7266	src = unop(Iop_64to8, getIReg64orZR(nn));
				7267	laneNo = (imm5 >> 1) & 15;
				7268	ts = 'b';
				7269	}
				7270	else if (imm5 & 2) {
				7271	src = unop(Iop_64to16, getIReg64orZR(nn));
				7272	laneNo = (imm5 >> 2) & 7;
				7273	ts = 'h';
				7274	}
				7275	else if (imm5 & 4) {
				7276	src = unop(Iop_64to32, getIReg64orZR(nn));
				7277	laneNo = (imm5 >> 3) & 3;
				7278	ts = 's';
				7279	}
				7280	else if (imm5 & 8) {
				7281	src = getIReg64orZR(nn);
				7282	laneNo = (imm5 >> 4) & 1;
				7283	ts = 'd';
				7284	}
				7285	/* */
				7286	if (src) {
				7287	vassert(laneNo < 16);
				7288	putQRegLane(dd, laneNo, src);
				7289	DIP("ins %s.%c[%u], %s\n",
				7290	nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
				7291	return True;
				7292	}
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	7293	/* invalid */
				7294	return False;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7295	}
				7296
				7297	/* -------- x,0,0101: SMOV -------- */
				7298	/* -------- x,0,0111: UMOV -------- */
				7299	/* 31 28 20 15 9 4
				7300	0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
				7301	0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
				7302	dest is Xd when q==1, Wd when q==0
				7303	UMOV:
				7304	Ts,index,ops = case q:imm5 of
				7305	0:xxxx1 -> B, xxxx, 8Uto64
				7306	1:xxxx1 -> invalid
				7307	0:xxx10 -> H, xxx, 16Uto64
				7308	1:xxx10 -> invalid
				7309	0:xx100 -> S, xx, 32Uto64
				7310	1:xx100 -> invalid
				7311	1:x1000 -> D, x, copy64
				7312	other -> invalid
				7313	SMOV:
				7314	Ts,index,ops = case q:imm5 of
				7315	0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
				7316	1:xxxx1 -> B, xxxx, 8Sto64
				7317	0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
				7318	1:xxx10 -> H, xxx, 16Sto64
				7319	0:xx100 -> invalid
				7320	1:xx100 -> S, xx, 32Sto64
				7321	1:x1000 -> invalid
				7322	other -> invalid
				7323	*/
				7324	if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) \|\| imm4 == BITS4(0,1,1,1))) {
				7325	Bool isU = (imm4 & 2) == 2;
				7326	const HChar* arTs = "??";
				7327	UInt laneNo = 16; /* invalid */
				7328	// Setting 'res' to non-NULL determines valid/invalid
				7329	IRExpr* res = NULL;
				7330	if (!bitQ && (imm5 & 1)) { // 0:xxxx1
				7331	laneNo = (imm5 >> 1) & 15;
				7332	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
				7333	res = isU ? unop(Iop_8Uto64, lane)
				7334	: unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
				7335	arTs = "b";
				7336	}
				7337	else if (bitQ && (imm5 & 1)) { // 1:xxxx1
				7338	laneNo = (imm5 >> 1) & 15;
				7339	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
				7340	res = isU ? NULL
				7341	: unop(Iop_8Sto64, lane);
				7342	arTs = "b";
				7343	}
				7344	else if (!bitQ && (imm5 & 2)) { // 0:xxx10
				7345	laneNo = (imm5 >> 2) & 7;
				7346	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
				7347	res = isU ? unop(Iop_16Uto64, lane)
				7348	: unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
				7349	arTs = "h";
				7350	}
				7351	else if (bitQ && (imm5 & 2)) { // 1:xxx10
				7352	laneNo = (imm5 >> 2) & 7;
				7353	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
				7354	res = isU ? NULL
				7355	: unop(Iop_16Sto64, lane);
				7356	arTs = "h";
				7357	}
				7358	else if (!bitQ && (imm5 & 4)) { // 0:xx100
				7359	laneNo = (imm5 >> 3) & 3;
				7360	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
				7361	res = isU ? unop(Iop_32Uto64, lane)
				7362	: NULL;
				7363	arTs = "s";
				7364	}
				7365	else if (bitQ && (imm5 & 4)) { // 1:xxx10
				7366	laneNo = (imm5 >> 3) & 3;
				7367	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
				7368	res = isU ? NULL
				7369	: unop(Iop_32Sto64, lane);
				7370	arTs = "s";
				7371	}
				7372	else if (bitQ && (imm5 & 8)) { // 1:x1000
				7373	laneNo = (imm5 >> 4) & 1;
				7374	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
				7375	res = isU ? lane
				7376	: NULL;
				7377	arTs = "d";
				7378	}
				7379	/* */
				7380	if (res) {
				7381	vassert(laneNo < 16);
				7382	putIReg64orZR(dd, res);
				7383	DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
				7384	nameIRegOrZR(bitQ == 1, dd),
				7385	nameQReg128(nn), arTs, laneNo);
				7386	return True;
				7387	}
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	7388	/* invalid */
				7389	return False;
				7390	}
				7391
				7392	/* -------- 1,1,xxxx: INS (element) -------- */
				7393	/* 31 28 20 14 9 4
				7394	011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
				7395	where Ts,ix1,ix2
				7396	= case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
				7397	xxx10 -> H, xxx, imm4[3:1]
				7398	xx100 -> S, xx, imm4[3:2]
				7399	x1000 -> D, x, imm4[3:3]
				7400	*/
				7401	if (bitQ == 1 && bitOP == 1) {
				7402	HChar ts = '?';
				7403	IRType ity = Ity_INVALID;
				7404	UInt ix1 = 16;
				7405	UInt ix2 = 16;
				7406	if (imm5 & 1) {
				7407	ts = 'b';
				7408	ity = Ity_I8;
				7409	ix1 = (imm5 >> 1) & 15;
				7410	ix2 = (imm4 >> 0) & 15;
				7411	}
				7412	else if (imm5 & 2) {
				7413	ts = 'h';
				7414	ity = Ity_I16;
				7415	ix1 = (imm5 >> 2) & 7;
				7416	ix2 = (imm4 >> 1) & 7;
				7417	}
				7418	else if (imm5 & 4) {
				7419	ts = 's';
				7420	ity = Ity_I32;
				7421	ix1 = (imm5 >> 3) & 3;
				7422	ix2 = (imm4 >> 2) & 3;
				7423	}
				7424	else if (imm5 & 8) {
				7425	ts = 'd';
				7426	ity = Ity_I64;
				7427	ix1 = (imm5 >> 4) & 1;
				7428	ix2 = (imm4 >> 3) & 1;
				7429	}
				7430	/* */
				7431	if (ity != Ity_INVALID) {
				7432	vassert(ix1 < 16);
				7433	vassert(ix2 < 16);
				7434	putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
				7435	DIP("ins %s.%c[%u], %s.%c[%u]\n",
				7436	nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
				7437	return True;
				7438	}
				7439	/* invalid */
				7440	return False;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7441	}
				7442
				7443	return False;
				7444	# undef INSN
				7445	}
				7446
				7447
				7448	static
				7449	Bool dis_AdvSIMD_modified_immediate(/MB_OUT/DisResult* dres, UInt insn)
				7450	{
				7451	/* 31 28 18 15 11 9 4
				7452	0q op 01111 00000 abc cmode 01 defgh d
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	7453	Decode fields: q,op,cmode
				7454	Bit 11 is really "o2", but it is always zero.
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7455	*/
				7456	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				7457	if (INSN(31,31) != 0
				7458	\|\| INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
				7459	\|\| INSN(11,10) != BITS2(0,1)) {
				7460	return False;
				7461	}
				7462	UInt bitQ = INSN(30,30);
				7463	UInt bitOP = INSN(29,29);
				7464	UInt cmode = INSN(15,12);
				7465	UInt abcdefgh = (INSN(18,16) << 5) \| INSN(9,5);
				7466	UInt dd = INSN(4,0);
				7467
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7468	ULong imm64lo = 0;
				7469	UInt op_cmode = (bitOP << 4) \| cmode;
				7470	Bool ok = False;
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	7471	Bool isORR = False;
				7472	Bool isBIC = False;
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	7473	Bool isMOV = False;
				7474	Bool isMVN = False;
				7475	Bool isFMOV = False;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7476	switch (op_cmode) {
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	7477	/* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	7478	/* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	7479	/* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
				7480	/* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
				7481	case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
				7482	case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
				7483	ok = True; isMOV = True; break;
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	7484
				7485	/* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
				7486	/* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
				7487	/* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
				7488	/* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
				7489	case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
				7490	case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
				7491	ok = True; isORR = True; break;
				7492
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	7493	/* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
				7494	/* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
				7495	case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
				7496	ok = True; isMOV = True; break;
				7497
				7498	/* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
				7499	/* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
				7500	case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
				7501	ok = True; isORR = True; break;
				7502
				7503	/* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
				7504	/* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
				7505	case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
				7506	ok = True; isMOV = True; break;
				7507
				7508	/* -------- x,0,1110 MOVI 8-bit -------- */
				7509	case BITS5(0,1,1,1,0):
				7510	ok = True; isMOV = True; break;
				7511
				7512	/* FMOV (vector, immediate, single precision) */
				7513
				7514	/* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
				7515	/* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
				7516	/* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
				7517	/* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
				7518	case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
				7519	case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
				7520	ok = True; isMVN = True; break;
				7521
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	7522	/* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
				7523	/* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
				7524	/* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
				7525	/* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
				7526	case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
				7527	case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
				7528	ok = True; isBIC = True; break;
				7529
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	7530	/* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
				7531	/* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
				7532	case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
				7533	ok = True; isMVN = True; break;
				7534
				7535	/* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
				7536	/* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
				7537	case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
				7538	ok = True; isBIC = True; break;
				7539
				7540	/* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
				7541	/* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
				7542	case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
				7543	ok = True; isMVN = True; break;
				7544
				7545	/* -------- 0,1,1110 MOVI 64-bit scalar -------- */
				7546	/* -------- 1,1,1110 MOVI 64-bit vector -------- */
				7547	case BITS5(1,1,1,1,0):
				7548	ok = True; isMOV = True; break;
				7549
				7550	/* -------- 1,1,1111 FMOV (vector, immediate) -------- */
				7551	case BITS5(1,1,1,1,1): // 1:1111
				7552	ok = bitQ == 1; isFMOV = True; break;
				7553
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7554	default:
				7555	break;
				7556	}
				7557	if (ok) {
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	7558	vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
				7559	+ (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7560	ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
				7561	}
				7562	if (ok) {
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	7563	if (isORR \|\| isBIC) {
				7564	ULong inv
				7565	= isORR ? 0ULL : ~0ULL;
				7566	IRExpr* immV128
				7567	= binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
				7568	IRExpr* res
				7569	= binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	7570	const HChar* nm = isORR ? "orr" : "bic";
				7571	if (bitQ == 0) {
				7572	putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
				7573	DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
				7574	} else {
				7575	putQReg128(dd, res);
				7576	DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
				7577	nameQReg128(dd), imm64lo, imm64lo);
				7578	}
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	7579	}
				7580	else if (isMOV \|\| isMVN \|\| isFMOV) {
				7581	if (isMVN) imm64lo = ~imm64lo;
				7582	ULong imm64hi = bitQ == 0 ? 0 : imm64lo;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7583	IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
				7584	mkU64(imm64lo));
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	7585	putQReg128(dd, immV128);
				7586	DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
				7587	}
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7588	return True;
				7589	}
				7590	/* else fall through */
				7591
				7592	return False;
				7593	# undef INSN
				7594	}
				7595
				7596
				7597	static
				7598	Bool dis_AdvSIMD_scalar_copy(/MB_OUT/DisResult* dres, UInt insn)
				7599	{
sewardj	ab33a7a	2014-06-19 22:20:47 +0000	[diff] [blame]	7600	/* 31 28 20 15 14 10 9 4
				7601	01 op 11110000 imm5 0 imm4 1 n d
				7602	Decode fields: op,imm4
				7603	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7604	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	ab33a7a	2014-06-19 22:20:47 +0000	[diff] [blame]	7605	if (INSN(31,30) != BITS2(0,1)
				7606	\|\| INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
				7607	\|\| INSN(15,15) != 0 \|\| INSN(10,10) != 1) {
				7608	return False;
				7609	}
				7610	UInt bitOP = INSN(29,29);
				7611	UInt imm5 = INSN(20,16);
				7612	UInt imm4 = INSN(14,11);
				7613	UInt nn = INSN(9,5);
				7614	UInt dd = INSN(4,0);
				7615
				7616	if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
				7617	/* -------- 0,0000 DUP (element, scalar) -------- */
				7618	IRTemp w0 = newTemp(Ity_I64);
				7619	const HChar* arTs = "??";
				7620	IRType laneTy = Ity_INVALID;
				7621	UInt laneNo = 16; /* invalid */
				7622	if (imm5 & 1) {
				7623	arTs = "b";
				7624	laneNo = (imm5 >> 1) & 15;
				7625	laneTy = Ity_I8;
				7626	assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
				7627	}
				7628	else if (imm5 & 2) {
				7629	arTs = "h";
				7630	laneNo = (imm5 >> 2) & 7;
				7631	laneTy = Ity_I16;
				7632	assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
				7633	}
				7634	else if (imm5 & 4) {
				7635	arTs = "s";
				7636	laneNo = (imm5 >> 3) & 3;
				7637	laneTy = Ity_I32;
				7638	assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
				7639	}
				7640	else if (imm5 & 8) {
				7641	arTs = "d";
				7642	laneNo = (imm5 >> 4) & 1;
				7643	laneTy = Ity_I64;
				7644	assign(w0, getQRegLane(nn, laneNo, laneTy));
				7645	}
				7646	else {
				7647	/* invalid; leave laneTy unchanged. */
				7648	}
				7649	/* */
				7650	if (laneTy != Ity_INVALID) {
				7651	vassert(laneNo < 16);
				7652	putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
				7653	DIP("dup %s, %s.%s[%u]\n",
				7654	nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
				7655	return True;
				7656	}
				7657	/* else fall through */
				7658	}
				7659
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7660	return False;
				7661	# undef INSN
				7662	}
				7663
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	7664
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7665	static
				7666	Bool dis_AdvSIMD_scalar_pairwise(/MB_OUT/DisResult* dres, UInt insn)
				7667	{
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	7668	/* 31 28 23 21 16 11 9 4
				7669	01 u 11110 sz 11000 opcode 10 n d
				7670	Decode fields: u,sz,opcode
				7671	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7672	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	7673	if (INSN(31,30) != BITS2(0,1)
				7674	\|\| INSN(28,24) != BITS5(1,1,1,1,0)
				7675	\|\| INSN(21,17) != BITS5(1,1,0,0,0)
				7676	\|\| INSN(11,10) != BITS2(1,0)) {
				7677	return False;
				7678	}
				7679	UInt bitU = INSN(29,29);
				7680	UInt sz = INSN(23,22);
				7681	UInt opcode = INSN(16,12);
				7682	UInt nn = INSN(9,5);
				7683	UInt dd = INSN(4,0);
				7684
				7685	if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
				7686	/* -------- 0,11,11011 ADDP d_2d -------- */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7687	IRTemp xy = newTempV128();
				7688	IRTemp xx = newTempV128();
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	7689	assign(xy, getQReg128(nn));
				7690	assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
				7691	putQReg128(dd, unop(Iop_ZeroHI64ofV128,
				7692	binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
				7693	DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
				7694	return True;
				7695	}
				7696
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7697	return False;
				7698	# undef INSN
				7699	}
				7700
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	7701
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7702	static
				7703	Bool dis_AdvSIMD_scalar_shift_by_imm(/MB_OUT/DisResult* dres, UInt insn)
				7704	{
				7705	/* 31 28 22 18 15 10 9 4
				7706	01 u 111110 immh immb opcode 1 n d
				7707	Decode fields: u,immh,opcode
				7708	*/
				7709	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				7710	if (INSN(31,30) != BITS2(0,1)
				7711	\|\| INSN(28,23) != BITS6(1,1,1,1,1,0) \|\| INSN(10,10) != 1) {
				7712	return False;
				7713	}
				7714	UInt bitU = INSN(29,29);
				7715	UInt immh = INSN(22,19);
				7716	UInt immb = INSN(18,16);
				7717	UInt opcode = INSN(15,11);
				7718	UInt nn = INSN(9,5);
				7719	UInt dd = INSN(4,0);
				7720	UInt immhb = (immh << 3) \| immb;
				7721
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	7722	if ((immh & 8) == 8
				7723	&& (opcode == BITS5(0,0,0,0,0) \|\| opcode == BITS5(0,0,0,1,0))) {
				7724	/* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
				7725	/* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
				7726	/* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
				7727	/* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
				7728	Bool isU = bitU == 1;
				7729	Bool isAcc = opcode == BITS5(0,0,0,1,0);
				7730	UInt sh = 128 - immhb;
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	7731	vassert(sh >= 1 && sh <= 64);
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	7732	IROp op = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
				7733	IRExpr* src = getQReg128(nn);
				7734	IRTemp shf = newTempV128();
				7735	IRTemp res = newTempV128();
				7736	if (sh == 64 && isU) {
				7737	assign(shf, mkV128(0x0000));
				7738	} else {
				7739	UInt nudge = 0;
				7740	if (sh == 64) {
				7741	vassert(!isU);
				7742	nudge = 1;
				7743	}
				7744	assign(shf, binop(op, src, mkU8(sh - nudge)));
				7745	}
				7746	assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
				7747	: mkexpr(shf));
				7748	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				7749	const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
				7750	: (isU ? "ushr" : "sshr");
				7751	DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
				7752	return True;
				7753	}
				7754
				7755	if ((immh & 8) == 8
				7756	&& (opcode == BITS5(0,0,1,0,0) \|\| opcode == BITS5(0,0,1,1,0))) {
				7757	/* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
				7758	/* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
				7759	/* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
				7760	/* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
				7761	Bool isU = bitU == 1;
				7762	Bool isAcc = opcode == BITS5(0,0,1,1,0);
				7763	UInt sh = 128 - immhb;
				7764	vassert(sh >= 1 && sh <= 64);
				7765	IROp op = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
				7766	vassert(sh >= 1 && sh <= 64);
				7767	IRExpr* src = getQReg128(nn);
				7768	IRTemp imm8 = newTemp(Ity_I8);
				7769	assign(imm8, mkU8((UChar)(-sh)));
				7770	IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
				7771	IRTemp shf = newTempV128();
				7772	IRTemp res = newTempV128();
				7773	assign(shf, binop(op, src, amt));
				7774	assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
				7775	: mkexpr(shf));
				7776	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				7777	const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
				7778	: (isU ? "urshr" : "srshr");
				7779	DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	7780	return True;
				7781	}
				7782
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7783	if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
				7784	/* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
				7785	UInt sh = 128 - immhb;
				7786	vassert(sh >= 1 && sh <= 64);
				7787	if (sh == 64) {
				7788	putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
				7789	} else {
				7790	/* sh is in range 1 .. 63 */
				7791	ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
				7792	IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
				7793	IRTemp res = newTempV128();
				7794	assign(res, binop(Iop_OrV128,
				7795	binop(Iop_AndV128, getQReg128(dd), nmaskV),
				7796	binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
				7797	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				7798	}
				7799	DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
				7800	return True;
				7801	}
				7802
sewardj	acc2964	2014-08-15 05:35:35 +0000	[diff] [blame]	7803	if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
				7804	/* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
				7805	UInt sh = immhb - 64;
				7806	vassert(sh >= 0 && sh < 64);
				7807	putQReg128(dd,
				7808	unop(Iop_ZeroHI64ofV128,
				7809	sh == 0 ? getQReg128(nn)
				7810	: binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
				7811	DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
				7812	return True;
				7813	}
				7814
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7815	if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
				7816	/* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
				7817	UInt sh = immhb - 64;
				7818	vassert(sh >= 0 && sh < 64);
				7819	if (sh == 0) {
				7820	putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
				7821	} else {
				7822	/* sh is in range 1 .. 63 */
				7823	ULong nmask = (1ULL << sh) - 1;
				7824	IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
				7825	IRTemp res = newTempV128();
				7826	assign(res, binop(Iop_OrV128,
				7827	binop(Iop_AndV128, getQReg128(dd), nmaskV),
				7828	binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
				7829	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				7830	}
				7831	DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
				7832	return True;
				7833	}
				7834
sewardj	acc2964	2014-08-15 05:35:35 +0000	[diff] [blame]	7835	if (opcode == BITS5(0,1,1,1,0)
				7836	\|\| (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
				7837	/* -------- 0,01110 SQSHL #imm -------- */
				7838	/* -------- 1,01110 UQSHL #imm -------- */
				7839	/* -------- 1,01100 SQSHLU #imm -------- */
				7840	UInt size = 0;
				7841	UInt shift = 0;
				7842	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
				7843	if (!ok) return False;
				7844	vassert(size >= 0 && size <= 3);
				7845	/* The shift encoding has opposite sign for the leftwards case.
				7846	Adjust shift to compensate. */
				7847	UInt lanebits = 8 << size;
				7848	shift = lanebits - shift;
				7849	vassert(shift >= 0 && shift < lanebits);
				7850	const HChar* nm = NULL;
				7851	/**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
				7852	else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
				7853	else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
				7854	else vassert(0);
				7855	IRTemp qDiff1 = IRTemp_INVALID;
				7856	IRTemp qDiff2 = IRTemp_INVALID;
				7857	IRTemp res = IRTemp_INVALID;
				7858	IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
				7859	/* This relies on the fact that the zeroed out lanes generate zeroed
				7860	result lanes and don't saturate, so there's no point in trimming
				7861	the resulting res, qDiff1 or qDiff2 values. */
				7862	math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
				7863	putQReg128(dd, mkexpr(res));
				7864	updateQCFLAGwithDifference(qDiff1, qDiff2);
				7865	const HChar arr = "bhsd"[size];
				7866	DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
				7867	return True;
				7868	}
				7869
sewardj	e741d16	2014-08-13 13:10:47 +0000	[diff] [blame]	7870	if (opcode == BITS5(1,0,0,1,0) \|\| opcode == BITS5(1,0,0,1,1)
				7871	\|\| (bitU == 1
				7872	&& (opcode == BITS5(1,0,0,0,0) \|\| opcode == BITS5(1,0,0,0,1)))) {
				7873	/* -------- 0,10010 SQSHRN #imm -------- */
				7874	/* -------- 1,10010 UQSHRN #imm -------- */
				7875	/* -------- 0,10011 SQRSHRN #imm -------- */
				7876	/* -------- 1,10011 UQRSHRN #imm -------- */
				7877	/* -------- 1,10000 SQSHRUN #imm -------- */
				7878	/* -------- 1,10001 SQRSHRUN #imm -------- */
				7879	UInt size = 0;
				7880	UInt shift = 0;
				7881	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
				7882	if (!ok \|\| size == X11) return False;
				7883	vassert(size >= X00 && size <= X10);
				7884	vassert(shift >= 1 && shift <= (8 << size));
				7885	const HChar* nm = "??";
				7886	IROp op = Iop_INVALID;
				7887	/* Decide on the name and the operation. */
				7888	/**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
				7889	nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
				7890	}
				7891	else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
				7892	nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
				7893	}
				7894	else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
				7895	nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
				7896	}
				7897	else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
				7898	nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
				7899	}
				7900	else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
				7901	nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
				7902	}
				7903	else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
				7904	nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
				7905	}
				7906	else vassert(0);
				7907	/* Compute the result (Q, shifted value) pair. */
				7908	IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
				7909	IRTemp pair = newTempV128();
				7910	assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
				7911	/* Update the result reg */
				7912	IRTemp res64in128 = newTempV128();
				7913	assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
				7914	putQReg128(dd, mkexpr(res64in128));
				7915	/* Update the Q flag. */
				7916	IRTemp q64q64 = newTempV128();
				7917	assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
				7918	IRTemp z128 = newTempV128();
				7919	assign(z128, mkV128(0x0000));
				7920	updateQCFLAGwithDifference(q64q64, z128);
				7921	/* */
				7922	const HChar arrNarrow = "bhsd"[size];
				7923	const HChar arrWide = "bhsd"[size+1];
				7924	DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
				7925	return True;
				7926	}
				7927
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7928	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				7929	return False;
				7930	# undef INSN
				7931	}
				7932
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	7933
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7934	static
				7935	Bool dis_AdvSIMD_scalar_three_different(/MB_OUT/DisResult* dres, UInt insn)
				7936	{
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	7937	/* 31 29 28 23 21 20 15 11 9 4
				7938	01 U 11110 size 1 m opcode 00 n d
				7939	Decode fields: u,opcode
				7940	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7941	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	7942	if (INSN(31,30) != BITS2(0,1)
				7943	\|\| INSN(28,24) != BITS5(1,1,1,1,0)
				7944	\|\| INSN(21,21) != 1
				7945	\|\| INSN(11,10) != BITS2(0,0)) {
				7946	return False;
				7947	}
				7948	UInt bitU = INSN(29,29);
				7949	UInt size = INSN(23,22);
				7950	UInt mm = INSN(20,16);
				7951	UInt opcode = INSN(15,12);
				7952	UInt nn = INSN(9,5);
				7953	UInt dd = INSN(4,0);
				7954	vassert(size < 4);
				7955
				7956	if (bitU == 0
				7957	&& (opcode == BITS4(1,1,0,1)
				7958	\|\| opcode == BITS4(1,0,0,1) \|\| opcode == BITS4(1,0,1,1))) {
				7959	/* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
				7960	/* -------- 0,1001 SQDMLAL -------- */ // 1
				7961	/* -------- 0,1011 SQDMLSL -------- */ // 2
				7962	/* Widens, and size refers to the narrowed lanes. */
				7963	UInt ks = 3;
				7964	switch (opcode) {
				7965	case BITS4(1,1,0,1): ks = 0; break;
				7966	case BITS4(1,0,0,1): ks = 1; break;
				7967	case BITS4(1,0,1,1): ks = 2; break;
				7968	default: vassert(0);
				7969	}
				7970	vassert(ks >= 0 && ks <= 2);
				7971	if (size == X00 \|\| size == X11) return False;
				7972	vassert(size <= 2);
				7973	IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
				7974	vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
				7975	newTempsV128_3(&vecN, &vecM, &vecD);
				7976	assign(vecN, getQReg128(nn));
				7977	assign(vecM, getQReg128(mm));
				7978	assign(vecD, getQReg128(dd));
				7979	math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
				7980	False/!is2/, size, "mas"[ks],
				7981	vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
				7982	IROp opZHI = mkVecZEROHIxxOFV128(size+1);
				7983	putQReg128(dd, unop(opZHI, mkexpr(res)));
				7984	vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
				7985	updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
				7986	if (sat2q != IRTemp_INVALID \|\| sat2n != IRTemp_INVALID) {
				7987	updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
				7988	}
				7989	const HChar* nm = ks == 0 ? "sqdmull"
				7990	: (ks == 1 ? "sqdmlal" : "sqdmlsl");
				7991	const HChar arrNarrow = "bhsd"[size];
				7992	const HChar arrWide = "bhsd"[size+1];
				7993	DIP("%s %c%d, %c%d, %c%d\n",
				7994	nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
				7995	return True;
				7996	}
				7997
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	7998	return False;
				7999	# undef INSN
				8000	}
				8001
				8002
				8003	static
				8004	Bool dis_AdvSIMD_scalar_three_same(/MB_OUT/DisResult* dres, UInt insn)
				8005	{
				8006	/* 31 29 28 23 21 20 15 10 9 4
				8007	01 U 11110 size 1 m opcode 1 n d
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	8008	Decode fields: u,size,opcode
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8009	*/
				8010	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				8011	if (INSN(31,30) != BITS2(0,1)
				8012	\|\| INSN(28,24) != BITS5(1,1,1,1,0)
				8013	\|\| INSN(21,21) != 1
				8014	\|\| INSN(10,10) != 1) {
				8015	return False;
				8016	}
				8017	UInt bitU = INSN(29,29);
				8018	UInt size = INSN(23,22);
				8019	UInt mm = INSN(20,16);
				8020	UInt opcode = INSN(15,11);
				8021	UInt nn = INSN(9,5);
				8022	UInt dd = INSN(4,0);
				8023	vassert(size < 4);
				8024
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	8025	if (opcode == BITS5(0,0,0,0,1) \|\| opcode == BITS5(0,0,1,0,1)) {
				8026	/* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
				8027	/* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
				8028	/* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
				8029	/* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
				8030	Bool isADD = opcode == BITS5(0,0,0,0,1);
				8031	Bool isU = bitU == 1;
				8032	IROp qop = Iop_INVALID;
				8033	IROp nop = Iop_INVALID;
				8034	if (isADD) {
				8035	qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
				8036	nop = mkVecADD(size);
				8037	} else {
				8038	qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
				8039	nop = mkVecSUB(size);
				8040	}
				8041	IRTemp argL = newTempV128();
				8042	IRTemp argR = newTempV128();
				8043	IRTemp qres = newTempV128();
				8044	IRTemp nres = newTempV128();
				8045	assign(argL, getQReg128(nn));
				8046	assign(argR, getQReg128(mm));
				8047	assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	8048	size, binop(qop, mkexpr(argL), mkexpr(argR)))));
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	8049	assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	8050	size, binop(nop, mkexpr(argL), mkexpr(argR)))));
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	8051	putQReg128(dd, mkexpr(qres));
				8052	updateQCFLAGwithDifference(qres, nres);
				8053	const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
				8054	: (isU ? "uqsub" : "sqsub");
				8055	const HChar arr = "bhsd"[size];
sewardj	1297218	2014-08-04 08:09:47 +0000	[diff] [blame]	8056	DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	8057	return True;
				8058	}
				8059
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8060	if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
				8061	/* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
				8062	/* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
				8063	Bool isGT = bitU == 0;
				8064	IRExpr* argL = getQReg128(nn);
				8065	IRExpr* argR = getQReg128(mm);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8066	IRTemp res = newTempV128();
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8067	assign(res,
				8068	isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
				8069	: binop(Iop_CmpGT64Ux2, argL, argR));
				8070	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				8071	DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
				8072	nameQRegLO(dd, Ity_I64),
				8073	nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
				8074	return True;
				8075	}
				8076
				8077	if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
				8078	/* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
				8079	/* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
				8080	Bool isGE = bitU == 0;
				8081	IRExpr* argL = getQReg128(nn);
				8082	IRExpr* argR = getQReg128(mm);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8083	IRTemp res = newTempV128();
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8084	assign(res,
				8085	isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
				8086	: unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
				8087	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				8088	DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
				8089	nameQRegLO(dd, Ity_I64),
				8090	nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
				8091	return True;
				8092	}
				8093
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	8094	if (size == X11 && (opcode == BITS5(0,1,0,0,0)
				8095	\|\| opcode == BITS5(0,1,0,1,0))) {
				8096	/* -------- 0,xx,01000 SSHL d_d_d -------- */
				8097	/* -------- 0,xx,01010 SRSHL d_d_d -------- */
				8098	/* -------- 1,xx,01000 USHL d_d_d -------- */
				8099	/* -------- 1,xx,01010 URSHL d_d_d -------- */
				8100	Bool isU = bitU == 1;
				8101	Bool isR = opcode == BITS5(0,1,0,1,0);
				8102	IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
				8103	: (isU ? mkVecSHU(size) : mkVecSHS(size));
				8104	IRTemp res = newTempV128();
				8105	assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
				8106	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				8107	const HChar* nm = isR ? (isU ? "urshl" : "srshl")
				8108	: (isU ? "ushl" : "sshl");
				8109	DIP("%s %s, %s, %s\n", nm,
				8110	nameQRegLO(dd, Ity_I64),
				8111	nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
				8112	return True;
				8113	}
				8114
sewardj	1297218	2014-08-04 08:09:47 +0000	[diff] [blame]	8115	if (opcode == BITS5(0,1,0,0,1) \|\| opcode == BITS5(0,1,0,1,1)) {
				8116	/* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
				8117	/* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
				8118	/* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
				8119	/* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
				8120	Bool isU = bitU == 1;
				8121	Bool isR = opcode == BITS5(0,1,0,1,1);
				8122	IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
				8123	: (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
				8124	/* This is a bit tricky. Since we're only interested in the lowest
				8125	lane of the result, we zero out all the rest in the operands, so
				8126	as to ensure that other lanes don't pollute the returned Q value.
				8127	This works because it means, for the lanes we don't care about, we
				8128	are shifting zero by zero, which can never saturate. */
				8129	IRTemp res256 = newTemp(Ity_V256);
				8130	IRTemp resSH = newTempV128();
				8131	IRTemp resQ = newTempV128();
				8132	IRTemp zero = newTempV128();
				8133	assign(
				8134	res256,
				8135	binop(op,
				8136	mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
				8137	mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
				8138	assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
				8139	assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
				8140	assign(zero, mkV128(0x0000));
				8141	putQReg128(dd, mkexpr(resSH));
				8142	updateQCFLAGwithDifference(resQ, zero);
				8143	const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
				8144	: (isU ? "uqshl" : "sqshl");
				8145	const HChar arr = "bhsd"[size];
				8146	DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
				8147	return True;
				8148	}
				8149
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8150	if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
				8151	/* -------- 0,11,10000 ADD d_d_d -------- */
				8152	/* -------- 1,11,10000 SUB d_d_d -------- */
				8153	Bool isSUB = bitU == 1;
				8154	IRTemp res = newTemp(Ity_I64);
				8155	assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
				8156	getQRegLane(nn, 0, Ity_I64),
				8157	getQRegLane(mm, 0, Ity_I64)));
				8158	putQRegLane(dd, 0, mkexpr(res));
				8159	putQRegLane(dd, 1, mkU64(0));
				8160	DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
				8161	nameQRegLO(dd, Ity_I64),
				8162	nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
				8163	return True;
				8164	}
				8165
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8166	if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
				8167	/* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
				8168	/* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
				8169	Bool isEQ = bitU == 1;
				8170	IRExpr* argL = getQReg128(nn);
				8171	IRExpr* argR = getQReg128(mm);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8172	IRTemp res = newTempV128();
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8173	assign(res,
				8174	isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
				8175	: unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
				8176	binop(Iop_AndV128, argL, argR),
				8177	mkV128(0x0000))));
				8178	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				8179	DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
				8180	nameQRegLO(dd, Ity_I64),
				8181	nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
				8182	return True;
				8183	}
				8184
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	8185	if (opcode == BITS5(1,0,1,1,0)) {
				8186	/* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
				8187	/* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
				8188	if (size == X00 \|\| size == X11) return False;
				8189	Bool isR = bitU == 1;
				8190	IRTemp res, sat1q, sat1n, vN, vM;
				8191	res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
				8192	newTempsV128_2(&vN, &vM);
				8193	assign(vN, getQReg128(nn));
				8194	assign(vM, getQReg128(mm));
				8195	math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
				8196	putQReg128(dd,
				8197	mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
				8198	updateQCFLAGwithDifference(
				8199	math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
				8200	math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
				8201	const HChar arr = "bhsd"[size];
				8202	const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
				8203	DIP("%s %c%d, %c%d, %c%d\n", nm, arr, dd, arr, nn, arr, mm);
				8204	return True;
				8205	}
				8206
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8207	if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
				8208	/* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
				8209	IRType ity = size == X11 ? Ity_F64 : Ity_F32;
				8210	IRTemp res = newTemp(ity);
				8211	assign(res, unop(mkABSF(ity),
				8212	triop(mkSUBF(ity),
				8213	mkexpr(mk_get_IR_rounding_mode()),
				8214	getQRegLO(nn,ity), getQRegLO(mm,ity))));
				8215	putQReg128(dd, mkV128(0x0000));
				8216	putQRegLO(dd, mkexpr(res));
				8217	DIP("fabd %s, %s, %s\n",
				8218	nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
				8219	return True;
				8220	}
				8221
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8222	return False;
				8223	# undef INSN
				8224	}
				8225
				8226
				8227	static
				8228	Bool dis_AdvSIMD_scalar_two_reg_misc(/MB_OUT/DisResult* dres, UInt insn)
				8229	{
				8230	/* 31 29 28 23 21 16 11 9 4
				8231	01 U 11110 size 10000 opcode 10 n d
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8232	Decode fields: u,size,opcode
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8233	*/
				8234	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				8235	if (INSN(31,30) != BITS2(0,1)
				8236	\|\| INSN(28,24) != BITS5(1,1,1,1,0)
				8237	\|\| INSN(21,17) != BITS5(1,0,0,0,0)
				8238	\|\| INSN(11,10) != BITS2(1,0)) {
				8239	return False;
				8240	}
				8241	UInt bitU = INSN(29,29);
				8242	UInt size = INSN(23,22);
				8243	UInt opcode = INSN(16,12);
				8244	UInt nn = INSN(9,5);
				8245	UInt dd = INSN(4,0);
				8246	vassert(size < 4);
				8247
sewardj	f7003bc	2014-08-18 12:28:02 +0000	[diff] [blame]	8248	if (opcode == BITS5(0,0,0,1,1)) {
				8249	/* -------- 0,xx,00011: SUQADD std4_std4 -------- */
				8250	/* -------- 1,xx,00011: USQADD std4_std4 -------- */
				8251	/* These are a bit tricky (to say the least). See comments on
				8252	the vector variants (in dis_AdvSIMD_two_reg_misc) below for
				8253	details. */
				8254	Bool isUSQADD = bitU == 1;
				8255	IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
				8256	: mkVecQADDEXTUSSATSS(size);
				8257	IROp nop = mkVecADD(size);
				8258	IRTemp argL = newTempV128();
				8259	IRTemp argR = newTempV128();
				8260	assign(argL, getQReg128(nn));
				8261	assign(argR, getQReg128(dd));
				8262	IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
				8263	size, binop(qop, mkexpr(argL), mkexpr(argR)));
				8264	IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
				8265	size, binop(nop, mkexpr(argL), mkexpr(argR)));
				8266	putQReg128(dd, mkexpr(qres));
				8267	updateQCFLAGwithDifference(qres, nres);
				8268	const HChar arr = "bhsd"[size];
				8269	DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
				8270	return True;
				8271	}
				8272
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	8273	if (opcode == BITS5(0,0,1,1,1)) {
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8274	/* -------- 0,xx,00111 SQABS std4_std4 -------- */
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	8275	/* -------- 1,xx,00111 SQNEG std4_std4 -------- */
				8276	Bool isNEG = bitU == 1;
				8277	IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
				8278	(isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
				8279	getQReg128(nn), size );
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	8280	IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
				8281	IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8282	putQReg128(dd, mkexpr(qres));
				8283	updateQCFLAGwithDifference(qres, nres);
				8284	const HChar arr = "bhsd"[size];
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	8285	DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8286	return True;
				8287	}
				8288
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8289	if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
				8290	/* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
				8291	/* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
				8292	Bool isGT = bitU == 0;
				8293	IRExpr* argL = getQReg128(nn);
				8294	IRExpr* argR = mkV128(0x0000);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8295	IRTemp res = newTempV128();
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8296	assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
				8297	: unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
				8298	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				8299	DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
				8300	return True;
				8301	}
				8302
				8303	if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
				8304	/* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
				8305	/* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
				8306	Bool isEQ = bitU == 0;
				8307	IRExpr* argL = getQReg128(nn);
				8308	IRExpr* argR = mkV128(0x0000);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8309	IRTemp res = newTempV128();
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8310	assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
				8311	: unop(Iop_NotV128,
				8312	binop(Iop_CmpGT64Sx2, argL, argR)));
				8313	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				8314	DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
				8315	return True;
				8316	}
				8317
				8318	if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
				8319	/* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8320	putQReg128(dd, unop(Iop_ZeroHI64ofV128,
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8321	binop(Iop_CmpGT64Sx2, mkV128(0x0000),
				8322	getQReg128(nn))));
				8323	DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8324	return True;
				8325	}
				8326
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	8327	if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
				8328	/* -------- 0,11,01011 ABS d_d -------- */
				8329	putQReg128(dd, unop(Iop_ZeroHI64ofV128,
				8330	unop(Iop_Abs64x2, getQReg128(nn))));
				8331	DIP("abs d%u, d%u\n", dd, nn);
				8332	return True;
				8333	}
				8334
				8335	if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
				8336	/* -------- 1,11,01011 NEG d_d -------- */
				8337	putQReg128(dd, unop(Iop_ZeroHI64ofV128,
				8338	binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
				8339	DIP("neg d%u, d%u\n", dd, nn);
				8340	return True;
				8341	}
				8342
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	8343	if (opcode == BITS5(1,0,1,0,0)
				8344	\|\| (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
				8345	/* -------- 0,xx,10100: SQXTN -------- */
				8346	/* -------- 1,xx,10100: UQXTN -------- */
				8347	/* -------- 1,xx,10010: SQXTUN -------- */
				8348	if (size == X11) return False;
				8349	vassert(size < 3);
				8350	IROp opN = Iop_INVALID;
				8351	Bool zWiden = True;
				8352	const HChar* nm = "??";
				8353	/**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
				8354	opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
				8355	}
				8356	else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
				8357	opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
				8358	}
				8359	else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
				8360	opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
				8361	}
				8362	else vassert(0);
				8363	IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
				8364	size+1, getQReg128(nn));
				8365	IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
				8366	size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
				8367	putQReg128(dd, mkexpr(resN));
				8368	/* This widens zero lanes to zero, and compares it against zero, so all
				8369	of the non-participating lanes make no contribution to the
				8370	Q flag state. */
				8371	IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/!fromUpperHalf/,
				8372	size, mkexpr(resN));
				8373	updateQCFLAGwithDifference(src, resW);
				8374	const HChar arrNarrow = "bhsd"[size];
				8375	const HChar arrWide = "bhsd"[size+1];
				8376	DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
				8377	return True;
				8378	}
				8379
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8380	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				8381	return False;
				8382	# undef INSN
				8383	}
				8384
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	8385
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8386	static
				8387	Bool dis_AdvSIMD_scalar_x_indexed_element(/MB_OUT/DisResult* dres, UInt insn)
				8388	{
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	8389	/* 31 28 23 21 20 19 15 11 9 4
				8390	01 U 11111 size L M m opcode H 0 n d
				8391	Decode fields are: u,size,opcode
				8392	M is really part of the mm register number. Individual
				8393	cases need to inspect L and H though.
				8394	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8395	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	8396	if (INSN(31,30) != BITS2(0,1)
				8397	\|\| INSN(28,24) != BITS5(1,1,1,1,1) \|\| INSN(10,10) !=0) {
				8398	return False;
				8399	}
				8400	UInt bitU = INSN(29,29);
				8401	UInt size = INSN(23,22);
				8402	UInt bitL = INSN(21,21);
				8403	UInt bitM = INSN(20,20);
				8404	UInt mmLO4 = INSN(19,16);
				8405	UInt opcode = INSN(15,12);
				8406	UInt bitH = INSN(11,11);
				8407	UInt nn = INSN(9,5);
				8408	UInt dd = INSN(4,0);
				8409	vassert(size < 4);
				8410	vassert(bitH < 2 && bitM < 2 && bitL < 2);
				8411
				8412	if (bitU == 0
				8413	&& (opcode == BITS4(1,0,1,1)
				8414	\|\| opcode == BITS4(0,0,1,1) \|\| opcode == BITS4(0,1,1,1))) {
				8415	/* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
				8416	/* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
				8417	/* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
				8418	/* Widens, and size refers to the narrowed lanes. */
				8419	UInt ks = 3;
				8420	switch (opcode) {
				8421	case BITS4(1,0,1,1): ks = 0; break;
				8422	case BITS4(0,0,1,1): ks = 1; break;
				8423	case BITS4(0,1,1,1): ks = 2; break;
				8424	default: vassert(0);
				8425	}
				8426	vassert(ks >= 0 && ks <= 2);
				8427	UInt mm = 32; // invalid
				8428	UInt ix = 16; // invalid
				8429	switch (size) {
				8430	case X00:
				8431	return False; // h_b_b[] case is not allowed
				8432	case X01:
				8433	mm = mmLO4; ix = (bitH << 2) \| (bitL << 1) \| (bitM << 0); break;
				8434	case X10:
				8435	mm = (bitM << 4) \| mmLO4; ix = (bitH << 1) \| (bitL << 0); break;
				8436	case X11:
				8437	return False; // q_d_d[] case is not allowed
				8438	default:
				8439	vassert(0);
				8440	}
				8441	vassert(mm < 32 && ix < 16);
				8442	IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
				8443	vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
				8444	newTempsV128_2(&vecN, &vecD);
				8445	assign(vecN, getQReg128(nn));
				8446	IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
				8447	assign(vecD, getQReg128(dd));
				8448	math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
				8449	False/!is2/, size, "mas"[ks],
				8450	vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
				8451	IROp opZHI = mkVecZEROHIxxOFV128(size+1);
				8452	putQReg128(dd, unop(opZHI, mkexpr(res)));
				8453	vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
				8454	updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
				8455	if (sat2q != IRTemp_INVALID \|\| sat2n != IRTemp_INVALID) {
				8456	updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
				8457	}
				8458	const HChar* nm = ks == 0 ? "sqmull"
				8459	: (ks == 1 ? "sqdmlal" : "sqdmlsl");
				8460	const HChar arrNarrow = "bhsd"[size];
				8461	const HChar arrWide = "bhsd"[size+1];
				8462	DIP("%s %c%d, %c%d, v%d.%c[%u]\n",
				8463	nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
				8464	return True;
				8465	}
				8466
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	8467	if (opcode == BITS4(1,1,0,0) \|\| opcode == BITS4(1,1,0,1)) {
				8468	/* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
				8469	/* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
				8470	UInt mm = 32; // invalid
				8471	UInt ix = 16; // invalid
				8472	switch (size) {
				8473	case X00:
				8474	return False; // b case is not allowed
				8475	case X01:
				8476	mm = mmLO4; ix = (bitH << 2) \| (bitL << 1) \| (bitM << 0); break;
				8477	case X10:
				8478	mm = (bitM << 4) \| mmLO4; ix = (bitH << 1) \| (bitL << 0); break;
				8479	case X11:
				8480	return False; // q case is not allowed
				8481	default:
				8482	vassert(0);
				8483	}
				8484	vassert(mm < 32 && ix < 16);
				8485	Bool isR = opcode == BITS4(1,1,0,1);
				8486	IRTemp res, sat1q, sat1n, vN, vM;
				8487	res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
				8488	vN = newTempV128();
				8489	assign(vN, getQReg128(nn));
				8490	vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
				8491	math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
				8492	IROp opZHI = mkVecZEROHIxxOFV128(size);
				8493	putQReg128(dd, unop(opZHI, mkexpr(res)));
				8494	updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
				8495	const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
				8496	HChar ch = size == X01 ? 'h' : 's';
				8497	DIP("%s %c%d, %c%d, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, dd, ix);
				8498	return True;
				8499	}
				8500
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8501	return False;
				8502	# undef INSN
				8503	}
				8504
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	8505
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8506	static
				8507	Bool dis_AdvSIMD_shift_by_immediate(/MB_OUT/DisResult* dres, UInt insn)
				8508	{
				8509	/* 31 28 22 18 15 10 9 4
				8510	0 q u 011110 immh immb opcode 1 n d
				8511	Decode fields: u,opcode
				8512	*/
				8513	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				8514	if (INSN(31,31) != 0
				8515	\|\| INSN(28,23) != BITS6(0,1,1,1,1,0) \|\| INSN(10,10) != 1) {
				8516	return False;
				8517	}
				8518	UInt bitQ = INSN(30,30);
				8519	UInt bitU = INSN(29,29);
				8520	UInt immh = INSN(22,19);
				8521	UInt immb = INSN(18,16);
				8522	UInt opcode = INSN(15,11);
				8523	UInt nn = INSN(9,5);
				8524	UInt dd = INSN(4,0);
				8525
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	8526	if (opcode == BITS5(0,0,0,0,0) \|\| opcode == BITS5(0,0,0,1,0)) {
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8527	/* -------- 0,00000 SSHR std7_std7_#imm -------- */
				8528	/* -------- 1,00000 USHR std7_std7_#imm -------- */
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	8529	/* -------- 0,00010 SSRA std7_std7_#imm -------- */
				8530	/* -------- 1,00010 USRA std7_std7_#imm -------- */
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8531	/* laneTy, shift = case immh:immb of
				8532	0001:xxx -> B, SHR:8-xxx
				8533	001x:xxx -> H, SHR:16-xxxx
				8534	01xx:xxx -> S, SHR:32-xxxxx
				8535	1xxx:xxx -> D, SHR:64-xxxxxx
				8536	other -> invalid
				8537	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8538	UInt size = 0;
				8539	UInt shift = 0;
				8540	Bool isQ = bitQ == 1;
				8541	Bool isU = bitU == 1;
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	8542	Bool isAcc = opcode == BITS5(0,0,0,1,0);
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8543	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8544	if (!ok \|\| (bitQ == 0 && size == X11)) return False;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8545	vassert(size >= 0 && size <= 3);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8546	UInt lanebits = 8 << size;
				8547	vassert(shift >= 1 && shift <= lanebits);
				8548	IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size);
				8549	IRExpr* src = getQReg128(nn);
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	8550	IRTemp shf = newTempV128();
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8551	IRTemp res = newTempV128();
				8552	if (shift == lanebits && isU) {
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	8553	assign(shf, mkV128(0x0000));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8554	} else {
				8555	UInt nudge = 0;
				8556	if (shift == lanebits) {
				8557	vassert(!isU);
				8558	nudge = 1;
				8559	}
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	8560	assign(shf, binop(op, src, mkU8(shift - nudge)));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8561	}
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	8562	assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
				8563	: mkexpr(shf));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8564	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				8565	HChar laneCh = "bhsd"[size];
				8566	UInt nLanes = (isQ ? 128 : 64) / lanebits;
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	8567	const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
				8568	: (isU ? "ushr" : "sshr");
				8569	DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
				8570	nameQReg128(dd), nLanes, laneCh,
				8571	nameQReg128(nn), nLanes, laneCh, shift);
				8572	return True;
				8573	}
				8574
				8575	if (opcode == BITS5(0,0,1,0,0) \|\| opcode == BITS5(0,0,1,1,0)) {
				8576	/* -------- 0,00100 SRSHR std7_std7_#imm -------- */
				8577	/* -------- 1,00100 URSHR std7_std7_#imm -------- */
				8578	/* -------- 0,00110 SRSRA std7_std7_#imm -------- */
				8579	/* -------- 1,00110 URSRA std7_std7_#imm -------- */
				8580	/* laneTy, shift = case immh:immb of
				8581	0001:xxx -> B, SHR:8-xxx
				8582	001x:xxx -> H, SHR:16-xxxx
				8583	01xx:xxx -> S, SHR:32-xxxxx
				8584	1xxx:xxx -> D, SHR:64-xxxxxx
				8585	other -> invalid
				8586	*/
				8587	UInt size = 0;
				8588	UInt shift = 0;
				8589	Bool isQ = bitQ == 1;
				8590	Bool isU = bitU == 1;
				8591	Bool isAcc = opcode == BITS5(0,0,1,1,0);
				8592	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
				8593	if (!ok \|\| (bitQ == 0 && size == X11)) return False;
				8594	vassert(size >= 0 && size <= 3);
				8595	UInt lanebits = 8 << size;
				8596	vassert(shift >= 1 && shift <= lanebits);
				8597	IROp op = isU ? mkVecRSHU(size) : mkVecRSHS(size);
				8598	IRExpr* src = getQReg128(nn);
				8599	IRTemp imm8 = newTemp(Ity_I8);
				8600	assign(imm8, mkU8((UChar)(-shift)));
				8601	IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
				8602	IRTemp shf = newTempV128();
				8603	IRTemp res = newTempV128();
				8604	assign(shf, binop(op, src, amt));
				8605	assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
				8606	: mkexpr(shf));
				8607	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				8608	HChar laneCh = "bhsd"[size];
				8609	UInt nLanes = (isQ ? 128 : 64) / lanebits;
				8610	const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
				8611	: (isU ? "urshr" : "srshr");
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8612	DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
				8613	nameQReg128(dd), nLanes, laneCh,
				8614	nameQReg128(nn), nLanes, laneCh, shift);
				8615	return True;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8616	}
				8617
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8618	if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
				8619	/* -------- 1,01000 SRI std7_std7_#imm -------- */
				8620	/* laneTy, shift = case immh:immb of
				8621	0001:xxx -> B, SHR:8-xxx
				8622	001x:xxx -> H, SHR:16-xxxx
				8623	01xx:xxx -> S, SHR:32-xxxxx
				8624	1xxx:xxx -> D, SHR:64-xxxxxx
				8625	other -> invalid
				8626	*/
				8627	UInt size = 0;
				8628	UInt shift = 0;
				8629	Bool isQ = bitQ == 1;
				8630	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
				8631	if (!ok \|\| (bitQ == 0 && size == X11)) return False;
				8632	vassert(size >= 0 && size <= 3);
				8633	UInt lanebits = 8 << size;
				8634	vassert(shift >= 1 && shift <= lanebits);
				8635	IRExpr* src = getQReg128(nn);
				8636	IRTemp res = newTempV128();
				8637	if (shift == lanebits) {
				8638	assign(res, getQReg128(dd));
				8639	} else {
				8640	assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
				8641	IRExpr* nmask = binop(mkVecSHLN(size),
				8642	mkV128(0xFFFF), mkU8(lanebits - shift));
				8643	IRTemp tmp = newTempV128();
				8644	assign(tmp, binop(Iop_OrV128,
				8645	mkexpr(res),
				8646	binop(Iop_AndV128, getQReg128(dd), nmask)));
				8647	res = tmp;
				8648	}
				8649	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				8650	HChar laneCh = "bhsd"[size];
				8651	UInt nLanes = (isQ ? 128 : 64) / lanebits;
				8652	DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
				8653	nameQReg128(dd), nLanes, laneCh,
				8654	nameQReg128(nn), nLanes, laneCh, shift);
				8655	return True;
				8656	}
				8657
				8658	if (opcode == BITS5(0,1,0,1,0)) {
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8659	/* -------- 0,01010 SHL std7_std7_#imm -------- */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8660	/* -------- 1,01010 SLI std7_std7_#imm -------- */
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8661	/* laneTy, shift = case immh:immb of
				8662	0001:xxx -> B, xxx
				8663	001x:xxx -> H, xxxx
				8664	01xx:xxx -> S, xxxxx
				8665	1xxx:xxx -> D, xxxxxx
				8666	other -> invalid
				8667	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8668	UInt size = 0;
				8669	UInt shift = 0;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8670	Bool isSLI = bitU == 1;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8671	Bool isQ = bitQ == 1;
				8672	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8673	if (!ok \|\| (bitQ == 0 && size == X11)) return False;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8674	vassert(size >= 0 && size <= 3);
				8675	/* The shift encoding has opposite sign for the leftwards case.
				8676	Adjust shift to compensate. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8677	UInt lanebits = 8 << size;
				8678	shift = lanebits - shift;
				8679	vassert(shift >= 0 && shift < lanebits);
				8680	IROp op = mkVecSHLN(size);
				8681	IRExpr* src = getQReg128(nn);
				8682	IRTemp res = newTempV128();
				8683	if (shift == 0) {
				8684	assign(res, src);
				8685	} else {
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	8686	assign(res, binop(op, src, mkU8(shift)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8687	if (isSLI) {
				8688	IRExpr* nmask = binop(mkVecSHRN(size),
				8689	mkV128(0xFFFF), mkU8(lanebits - shift));
				8690	IRTemp tmp = newTempV128();
				8691	assign(tmp, binop(Iop_OrV128,
				8692	mkexpr(res),
				8693	binop(Iop_AndV128, getQReg128(dd), nmask)));
				8694	res = tmp;
				8695	}
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8696	}
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8697	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				8698	HChar laneCh = "bhsd"[size];
				8699	UInt nLanes = (isQ ? 128 : 64) / lanebits;
				8700	const HChar* nm = isSLI ? "sli" : "shl";
				8701	DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
				8702	nameQReg128(dd), nLanes, laneCh,
				8703	nameQReg128(nn), nLanes, laneCh, shift);
				8704	return True;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8705	}
				8706
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	8707	if (opcode == BITS5(0,1,1,1,0)
				8708	\|\| (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
				8709	/* -------- 0,01110 SQSHL std7_std7_#imm -------- */
				8710	/* -------- 1,01110 UQSHL std7_std7_#imm -------- */
				8711	/* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
				8712	UInt size = 0;
				8713	UInt shift = 0;
				8714	Bool isQ = bitQ == 1;
				8715	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
				8716	if (!ok \|\| (bitQ == 0 && size == X11)) return False;
				8717	vassert(size >= 0 && size <= 3);
				8718	/* The shift encoding has opposite sign for the leftwards case.
				8719	Adjust shift to compensate. */
				8720	UInt lanebits = 8 << size;
				8721	shift = lanebits - shift;
				8722	vassert(shift >= 0 && shift < lanebits);
				8723	const HChar* nm = NULL;
				8724	/**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
				8725	else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
				8726	else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
				8727	else vassert(0);
				8728	IRTemp qDiff1 = IRTemp_INVALID;
				8729	IRTemp qDiff2 = IRTemp_INVALID;
				8730	IRTemp res = IRTemp_INVALID;
				8731	IRTemp src = newTempV128();
				8732	assign(src, getQReg128(nn));
				8733	math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
				8734	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				8735	updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
sewardj	acc2964	2014-08-15 05:35:35 +0000	[diff] [blame]	8736	isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	8737	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				8738	DIP("%s %s.%s, %s.%s, #%u\n", nm,
				8739	nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
				8740	return True;
				8741	}
				8742
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	8743	if (bitU == 0
				8744	&& (opcode == BITS5(1,0,0,0,0) \|\| opcode == BITS5(1,0,0,0,1))) {
				8745	/* -------- 0,10000 SHRN{,2} #imm -------- */
				8746	/* -------- 0,10001 RSHRN{,2} #imm -------- */
				8747	/* Narrows, and size is the narrow size. */
				8748	UInt size = 0;
				8749	UInt shift = 0;
				8750	Bool is2 = bitQ == 1;
				8751	Bool isR = opcode == BITS5(1,0,0,0,1);
				8752	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
				8753	if (!ok \|\| size == X11) return False;
				8754	vassert(shift >= 1);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8755	IRTemp t1 = newTempV128();
				8756	IRTemp t2 = newTempV128();
				8757	IRTemp t3 = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	8758	assign(t1, getQReg128(nn));
				8759	assign(t2, isR ? binop(mkVecADD(size+1),
				8760	mkexpr(t1),
				8761	mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
				8762	: mkexpr(t1));
				8763	assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
				8764	IRTemp t4 = math_NARROW_LANES(t3, t3, size);
				8765	putLO64andZUorPutHI64(is2, dd, t4);
				8766	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				8767	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				8768	DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
				8769	nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
				8770	return True;
				8771	}
				8772
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	8773	if (opcode == BITS5(1,0,0,1,0) \|\| opcode == BITS5(1,0,0,1,1)
				8774	\|\| (bitU == 1
				8775	&& (opcode == BITS5(1,0,0,0,0) \|\| opcode == BITS5(1,0,0,0,1)))) {
				8776	/* -------- 0,10010 SQSHRN{,2} #imm -------- */
				8777	/* -------- 1,10010 UQSHRN{,2} #imm -------- */
				8778	/* -------- 0,10011 SQRSHRN{,2} #imm -------- */
				8779	/* -------- 1,10011 UQRSHRN{,2} #imm -------- */
				8780	/* -------- 1,10000 SQSHRUN{,2} #imm -------- */
				8781	/* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
				8782	UInt size = 0;
				8783	UInt shift = 0;
				8784	Bool is2 = bitQ == 1;
				8785	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
				8786	if (!ok \|\| size == X11) return False;
				8787	vassert(shift >= 1 && shift <= (8 << size));
				8788	const HChar* nm = "??";
				8789	IROp op = Iop_INVALID;
				8790	/* Decide on the name and the operation. */
				8791	/**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
				8792	nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
				8793	}
				8794	else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
				8795	nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
				8796	}
				8797	else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
				8798	nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
				8799	}
				8800	else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
				8801	nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
				8802	}
				8803	else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
				8804	nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
				8805	}
				8806	else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
				8807	nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
				8808	}
				8809	else vassert(0);
				8810	/* Compute the result (Q, shifted value) pair. */
				8811	IRTemp src128 = newTempV128();
				8812	assign(src128, getQReg128(nn));
				8813	IRTemp pair = newTempV128();
				8814	assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
				8815	/* Update the result reg */
				8816	IRTemp res64in128 = newTempV128();
				8817	assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
				8818	putLO64andZUorPutHI64(is2, dd, res64in128);
				8819	/* Update the Q flag. */
				8820	IRTemp q64q64 = newTempV128();
				8821	assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
				8822	IRTemp z128 = newTempV128();
				8823	assign(z128, mkV128(0x0000));
				8824	updateQCFLAGwithDifference(q64q64, z128);
				8825	/* */
				8826	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				8827	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				8828	DIP("%s %s.%s, %s.%s, #%u\n", nm,
				8829	nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
				8830	return True;
				8831	}
				8832
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8833	if (opcode == BITS5(1,0,1,0,0)) {
				8834	/* -------- 0,10100 SSHLL{,2} #imm -------- */
				8835	/* -------- 1,10100 USHLL{,2} #imm -------- */
				8836	/* 31 28 22 18 15 9 4
				8837	0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
				8838	0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
				8839	where Ta,Tb,sh
				8840	= case immh of 1xxx -> invalid
				8841	01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
				8842	001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
				8843	0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
				8844	0000 -> AdvSIMD modified immediate (???)
				8845	*/
				8846	Bool isQ = bitQ == 1;
				8847	Bool isU = bitU == 1;
				8848	UInt immhb = (immh << 3) \| immb;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8849	IRTemp src = newTempV128();
				8850	IRTemp zero = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8851	IRExpr* res = NULL;
				8852	UInt sh = 0;
				8853	const HChar* ta = "??";
				8854	const HChar* tb = "??";
				8855	assign(src, getQReg128(nn));
				8856	assign(zero, mkV128(0x0000));
				8857	if (immh & 8) {
				8858	/* invalid; don't assign to res */
				8859	}
				8860	else if (immh & 4) {
				8861	sh = immhb - 32;
				8862	vassert(sh < 32); /* so 32-sh is 1..32 */
				8863	ta = "2d";
				8864	tb = isQ ? "4s" : "2s";
				8865	IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
				8866	: mk_InterleaveLO32x4(src, zero);
				8867	res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
				8868	}
				8869	else if (immh & 2) {
				8870	sh = immhb - 16;
				8871	vassert(sh < 16); /* so 16-sh is 1..16 */
				8872	ta = "4s";
				8873	tb = isQ ? "8h" : "4h";
				8874	IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
				8875	: mk_InterleaveLO16x8(src, zero);
				8876	res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
				8877	}
				8878	else if (immh & 1) {
				8879	sh = immhb - 8;
				8880	vassert(sh < 8); /* so 8-sh is 1..8 */
				8881	ta = "8h";
				8882	tb = isQ ? "16b" : "8b";
				8883	IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
				8884	: mk_InterleaveLO8x16(src, zero);
				8885	res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
				8886	} else {
				8887	vassert(immh == 0);
				8888	/* invalid; don't assign to res */
				8889	}
				8890	/* */
				8891	if (res) {
				8892	putQReg128(dd, res);
				8893	DIP("%cshll%s %s.%s, %s.%s, #%d\n",
				8894	isU ? 'u' : 's', isQ ? "2" : "",
				8895	nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
				8896	return True;
				8897	}
				8898	return False;
				8899	}
				8900
				8901	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				8902	return False;
				8903	# undef INSN
				8904	}
				8905
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	8906
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8907	static
				8908	Bool dis_AdvSIMD_three_different(/MB_OUT/DisResult* dres, UInt insn)
				8909	{
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	8910	/* 31 30 29 28 23 21 20 15 11 9 4
				8911	0 Q U 01110 size 1 m opcode 00 n d
				8912	Decode fields: u,opcode
				8913	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8914	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	8915	if (INSN(31,31) != 0
				8916	\|\| INSN(28,24) != BITS5(0,1,1,1,0)
				8917	\|\| INSN(21,21) != 1
				8918	\|\| INSN(11,10) != BITS2(0,0)) {
				8919	return False;
				8920	}
				8921	UInt bitQ = INSN(30,30);
				8922	UInt bitU = INSN(29,29);
				8923	UInt size = INSN(23,22);
				8924	UInt mm = INSN(20,16);
				8925	UInt opcode = INSN(15,12);
				8926	UInt nn = INSN(9,5);
				8927	UInt dd = INSN(4,0);
				8928	vassert(size < 4);
				8929	Bool is2 = bitQ == 1;
				8930
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	8931	if (opcode == BITS4(0,0,0,0) \|\| opcode == BITS4(0,0,1,0)) {
				8932	/* -------- 0,0000 SADDL{2} -------- */
				8933	/* -------- 1,0000 UADDL{2} -------- */
				8934	/* -------- 0,0010 SSUBL{2} -------- */
				8935	/* -------- 1,0010 USUBL{2} -------- */
				8936	/* Widens, and size refers to the narrowed lanes. */
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	8937	if (size == X11) return False;
				8938	vassert(size <= 2);
				8939	Bool isU = bitU == 1;
				8940	Bool isADD = opcode == BITS4(0,0,0,0);
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	8941	IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
				8942	IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8943	IRTemp res = newTempV128();
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	8944	assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	8945	mkexpr(argL), mkexpr(argR)));
				8946	putQReg128(dd, mkexpr(res));
				8947	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				8948	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				8949	const HChar* nm = isADD ? (isU ? "uaddl" : "saddl")
				8950	: (isU ? "usubl" : "ssubl");
				8951	DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
				8952	nameQReg128(dd), arrWide,
				8953	nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
				8954	return True;
				8955	}
				8956
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	8957	if (opcode == BITS4(0,0,0,1) \|\| opcode == BITS4(0,0,1,1)) {
				8958	/* -------- 0,0001 SADDW{2} -------- */
				8959	/* -------- 1,0001 UADDW{2} -------- */
				8960	/* -------- 0,0011 SSUBW{2} -------- */
				8961	/* -------- 1,0011 USUBW{2} -------- */
				8962	/* Widens, and size refers to the narrowed lanes. */
				8963	if (size == X11) return False;
				8964	vassert(size <= 2);
				8965	Bool isU = bitU == 1;
				8966	Bool isADD = opcode == BITS4(0,0,0,1);
				8967	IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8968	IRTemp res = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	8969	assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
				8970	getQReg128(nn), mkexpr(argR)));
				8971	putQReg128(dd, mkexpr(res));
				8972	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				8973	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				8974	const HChar* nm = isADD ? (isU ? "uaddw" : "saddw")
				8975	: (isU ? "usubw" : "ssubw");
				8976	DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
				8977	nameQReg128(dd), arrWide,
				8978	nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
				8979	return True;
				8980	}
				8981
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	8982	if (opcode == BITS4(0,1,0,0) \|\| opcode == BITS4(0,1,1,0)) {
				8983	/* -------- 0,0100 ADDHN{2} -------- */
				8984	/* -------- 1,0100 RADDHN{2} -------- */
				8985	/* -------- 0,0110 SUBHN{2} -------- */
				8986	/* -------- 1,0110 RSUBHN{2} -------- */
				8987	/* Narrows, and size refers to the narrowed lanes. */
				8988	if (size == X11) return False;
				8989	vassert(size <= 2);
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	8990	const UInt shift[3] = { 8, 16, 32 };
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	8991	Bool isADD = opcode == BITS4(0,1,0,0);
				8992	Bool isR = bitU == 1;
				8993	/* Combined elements in wide lanes */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8994	IRTemp wide = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	8995	IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	8996	getQReg128(nn), getQReg128(mm));
				8997	if (isR) {
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	8998	wideE = binop(mkVecADD(size+1),
				8999	wideE,
				9000	mkexpr(math_VEC_DUP_IMM(size+1,
				9001	1ULL << (shift[size]-1))));
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	9002	}
				9003	assign(wide, wideE);
				9004	/* Top halves of elements, still in wide lanes */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9005	IRTemp shrd = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	9006	assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	9007	/* Elements now compacted into lower 64 bits */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9008	IRTemp new64 = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	9009	assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	9010	putLO64andZUorPutHI64(is2, dd, new64);
				9011	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				9012	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				9013	const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
				9014	: (isR ? "rsubhn" : "subhn");
				9015	DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
				9016	nameQReg128(dd), arrNarrow,
				9017	nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
				9018	return True;
				9019	}
				9020
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	9021	if (opcode == BITS4(0,1,0,1) \|\| opcode == BITS4(0,1,1,1)) {
				9022	/* -------- 0,0101 SABAL{2} -------- */
				9023	/* -------- 1,0101 UABAL{2} -------- */
				9024	/* -------- 0,0111 SABDL{2} -------- */
				9025	/* -------- 1,0111 UABDL{2} -------- */
				9026	/* Widens, and size refers to the narrowed lanes. */
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	9027	if (size == X11) return False;
				9028	vassert(size <= 2);
				9029	Bool isU = bitU == 1;
				9030	Bool isACC = opcode == BITS4(0,1,0,1);
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	9031	IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
				9032	IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	9033	IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9034	IRTemp res = newTempV128();
				9035	assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	9036	: mkexpr(abd));
				9037	putQReg128(dd, mkexpr(res));
				9038	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				9039	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				9040	const HChar* nm = isACC ? (isU ? "uabal" : "sabal")
				9041	: (isU ? "uabdl" : "sabdl");
				9042	DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
				9043	nameQReg128(dd), arrWide,
				9044	nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
				9045	return True;
				9046	}
				9047
				9048	if (opcode == BITS4(1,1,0,0)
				9049	\|\| opcode == BITS4(1,0,0,0) \|\| opcode == BITS4(1,0,1,0)) {
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	9050	/* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	9051	/* -------- 1,1100 UMULL{2} -------- */ // 0
				9052	/* -------- 0,1000 SMLAL{2} -------- */ // 1
				9053	/* -------- 1,1000 UMLAL{2} -------- */ // 1
				9054	/* -------- 0,1010 SMLSL{2} -------- */ // 2
				9055	/* -------- 1,1010 UMLSL{2} -------- */ // 2
				9056	/* Widens, and size refers to the narrowed lanes. */
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	9057	UInt ks = 3;
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	9058	switch (opcode) {
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	9059	case BITS4(1,1,0,0): ks = 0; break;
				9060	case BITS4(1,0,0,0): ks = 1; break;
				9061	case BITS4(1,0,1,0): ks = 2; break;
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	9062	default: vassert(0);
				9063	}
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	9064	vassert(ks >= 0 && ks <= 2);
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	9065	if (size == X11) return False;
				9066	vassert(size <= 2);
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	9067	Bool isU = bitU == 1;
				9068	IRTemp vecN = newTempV128();
				9069	IRTemp vecM = newTempV128();
				9070	IRTemp vecD = newTempV128();
				9071	assign(vecN, getQReg128(nn));
				9072	assign(vecM, getQReg128(mm));
				9073	assign(vecD, getQReg128(dd));
				9074	IRTemp res = IRTemp_INVALID;
				9075	math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
				9076	vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	9077	putQReg128(dd, mkexpr(res));
				9078	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				9079	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	9080	const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	9081	DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
				9082	nameQReg128(dd), arrWide,
				9083	nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
				9084	return True;
				9085	}
				9086
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	9087	if (bitU == 0
				9088	&& (opcode == BITS4(1,1,0,1)
				9089	\|\| opcode == BITS4(1,0,0,1) \|\| opcode == BITS4(1,0,1,1))) {
				9090	/* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
				9091	/* -------- 0,1001 SQDMLAL{2} -------- */ // 1
				9092	/* -------- 0,1011 SQDMLSL{2} -------- */ // 2
				9093	/* Widens, and size refers to the narrowed lanes. */
				9094	UInt ks = 3;
				9095	switch (opcode) {
				9096	case BITS4(1,1,0,1): ks = 0; break;
				9097	case BITS4(1,0,0,1): ks = 1; break;
				9098	case BITS4(1,0,1,1): ks = 2; break;
				9099	default: vassert(0);
				9100	}
				9101	vassert(ks >= 0 && ks <= 2);
				9102	if (size == X00 \|\| size == X11) return False;
				9103	vassert(size <= 2);
				9104	IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
				9105	vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
				9106	newTempsV128_3(&vecN, &vecM, &vecD);
				9107	assign(vecN, getQReg128(nn));
				9108	assign(vecM, getQReg128(mm));
				9109	assign(vecD, getQReg128(dd));
				9110	math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
				9111	is2, size, "mas"[ks],
				9112	vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
				9113	putQReg128(dd, mkexpr(res));
				9114	vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
				9115	updateQCFLAGwithDifference(sat1q, sat1n);
				9116	if (sat2q != IRTemp_INVALID \|\| sat2n != IRTemp_INVALID) {
				9117	updateQCFLAGwithDifference(sat2q, sat2n);
				9118	}
				9119	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				9120	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				9121	const HChar* nm = ks == 0 ? "sqdmull"
				9122	: (ks == 1 ? "sqdmlal" : "sqdmlsl");
				9123	DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
				9124	nameQReg128(dd), arrWide,
				9125	nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
				9126	return True;
				9127	}
				9128
sewardj	31b5a95	2014-06-26 07:41:14 +0000	[diff] [blame]	9129	if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
				9130	/* -------- 0,1110 PMULL{2} -------- */
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	9131	/* Widens, and size refers to the narrowed lanes. */
sewardj	31b5a95	2014-06-26 07:41:14 +0000	[diff] [blame]	9132	if (size != X00) return False;
				9133	IRTemp res
				9134	= math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
				9135	getQReg128(nn), getQReg128(mm));
				9136	putQReg128(dd, mkexpr(res));
				9137	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				9138	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				9139	DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
				9140	nameQReg128(dd), arrNarrow,
				9141	nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
				9142	return True;
				9143	}
				9144
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9145	return False;
				9146	# undef INSN
				9147	}
				9148
				9149
				9150	static
				9151	Bool dis_AdvSIMD_three_same(/MB_OUT/DisResult* dres, UInt insn)
				9152	{
				9153	/* 31 30 29 28 23 21 20 15 10 9 4
				9154	0 Q U 01110 size 1 m opcode 1 n d
				9155	Decode fields: u,size,opcode
				9156	*/
				9157	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				9158	if (INSN(31,31) != 0
				9159	\|\| INSN(28,24) != BITS5(0,1,1,1,0)
				9160	\|\| INSN(21,21) != 1
				9161	\|\| INSN(10,10) != 1) {
				9162	return False;
				9163	}
				9164	UInt bitQ = INSN(30,30);
				9165	UInt bitU = INSN(29,29);
				9166	UInt size = INSN(23,22);
				9167	UInt mm = INSN(20,16);
				9168	UInt opcode = INSN(15,11);
				9169	UInt nn = INSN(9,5);
				9170	UInt dd = INSN(4,0);
				9171	vassert(size < 4);
				9172
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	9173	if (opcode == BITS5(0,0,0,0,0) \|\| opcode == BITS5(0,0,1,0,0)) {
				9174	/* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
				9175	/* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
				9176	/* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
				9177	/* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
				9178	if (size == X11) return False;
				9179	Bool isADD = opcode == BITS5(0,0,0,0,0);
				9180	Bool isU = bitU == 1;
				9181	/* Widen both args out, do the math, narrow to final result. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9182	IRTemp argL = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	9183	IRTemp argLhi = IRTemp_INVALID;
				9184	IRTemp argLlo = IRTemp_INVALID;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9185	IRTemp argR = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	9186	IRTemp argRhi = IRTemp_INVALID;
				9187	IRTemp argRlo = IRTemp_INVALID;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9188	IRTemp resHi = newTempV128();
				9189	IRTemp resLo = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	9190	IRTemp res = IRTemp_INVALID;
				9191	assign(argL, getQReg128(nn));
				9192	argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
				9193	argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL));
				9194	assign(argR, getQReg128(mm));
				9195	argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
				9196	argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR));
				9197	IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
				9198	IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
				9199	assign(resHi, binop(opSxR,
				9200	binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
				9201	mkU8(1)));
				9202	assign(resLo, binop(opSxR,
				9203	binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
				9204	mkU8(1)));
				9205	res = math_NARROW_LANES ( resHi, resLo, size );
				9206	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				9207	const HChar* nm = isADD ? (isU ? "uhadd" : "shadd")
				9208	: (isU ? "uhsub" : "shsub");
				9209	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9210	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				9211	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9212	return True;
				9213	}
				9214
sewardj	62ece66	2014-08-17 19:59:09 +0000	[diff] [blame]	9215	if (opcode == BITS5(0,0,0,1,0)) {
				9216	/* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
				9217	/* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
				9218	if (bitQ == 0 && size == X11) return False; // implied 1d case
				9219	Bool isU = bitU == 1;
				9220	IRTemp argL = newTempV128();
				9221	IRTemp argR = newTempV128();
				9222	assign(argL, getQReg128(nn));
				9223	assign(argR, getQReg128(mm));
				9224	IRTemp res = math_RHADD(size, isU, argL, argR);
				9225	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				9226	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9227	DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
				9228	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9229	return True;
				9230	}
				9231
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	9232	if (opcode == BITS5(0,0,0,0,1) \|\| opcode == BITS5(0,0,1,0,1)) {
				9233	/* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
				9234	/* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
				9235	/* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
				9236	/* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
				9237	if (bitQ == 0 && size == X11) return False; // implied 1d case
				9238	Bool isADD = opcode == BITS5(0,0,0,0,1);
				9239	Bool isU = bitU == 1;
				9240	IROp qop = Iop_INVALID;
				9241	IROp nop = Iop_INVALID;
				9242	if (isADD) {
				9243	qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
				9244	nop = mkVecADD(size);
				9245	} else {
				9246	qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
				9247	nop = mkVecSUB(size);
				9248	}
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9249	IRTemp argL = newTempV128();
				9250	IRTemp argR = newTempV128();
				9251	IRTemp qres = newTempV128();
				9252	IRTemp nres = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	9253	assign(argL, getQReg128(nn));
				9254	assign(argR, getQReg128(mm));
				9255	assign(qres, math_MAYBE_ZERO_HI64_fromE(
				9256	bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
				9257	assign(nres, math_MAYBE_ZERO_HI64_fromE(
				9258	bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
				9259	putQReg128(dd, mkexpr(qres));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9260	updateQCFLAGwithDifference(qres, nres);
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	9261	const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
				9262	: (isU ? "uqsub" : "sqsub");
				9263	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9264	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				9265	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9266	return True;
				9267	}
				9268
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9269	if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
				9270	/* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
				9271	/* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
				9272	/* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
				9273	/* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9274	Bool isORx = (size & 2) == 2;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9275	Bool invert = (size & 1) == 1;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9276	IRTemp res = newTempV128();
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9277	assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9278	getQReg128(nn),
				9279	invert ? unop(Iop_NotV128, getQReg128(mm))
				9280	: getQReg128(mm)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9281	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9282	const HChar* names[4] = { "and", "bic", "orr", "orn" };
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9283	const HChar* ar = bitQ == 1 ? "16b" : "8b";
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9284	DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
				9285	nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
				9286	return True;
				9287	}
				9288
				9289	if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
				9290	/* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
				9291	/* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
				9292	/* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
				9293	/* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9294	IRTemp argD = newTempV128();
				9295	IRTemp argN = newTempV128();
				9296	IRTemp argM = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9297	assign(argD, getQReg128(dd));
				9298	assign(argN, getQReg128(nn));
				9299	assign(argM, getQReg128(mm));
				9300	const IROp opXOR = Iop_XorV128;
				9301	const IROp opAND = Iop_AndV128;
				9302	const IROp opNOT = Iop_NotV128;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9303	IRTemp res = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9304	switch (size) {
				9305	case BITS2(0,0): /* EOR */
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9306	assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9307	break;
				9308	case BITS2(0,1): /* BSL */
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9309	assign(res, binop(opXOR, mkexpr(argM),
				9310	binop(opAND,
				9311	binop(opXOR, mkexpr(argM), mkexpr(argN)),
				9312	mkexpr(argD))));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9313	break;
				9314	case BITS2(1,0): /* BIT */
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9315	assign(res, binop(opXOR, mkexpr(argD),
				9316	binop(opAND,
				9317	binop(opXOR, mkexpr(argD), mkexpr(argN)),
				9318	mkexpr(argM))));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9319	break;
				9320	case BITS2(1,1): /* BIF */
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9321	assign(res, binop(opXOR, mkexpr(argD),
				9322	binop(opAND,
				9323	binop(opXOR, mkexpr(argD), mkexpr(argN)),
				9324	unop(opNOT, mkexpr(argM)))));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9325	break;
				9326	default:
				9327	vassert(0);
				9328	}
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9329	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9330	const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9331	const HChar* arr = bitQ == 1 ? "16b" : "8b";
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9332	DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
				9333	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9334	return True;
				9335	}
				9336
				9337	if (opcode == BITS5(0,0,1,1,0)) {
				9338	/* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
				9339	/* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
				9340	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9341	Bool isGT = bitU == 0;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9342	IRExpr* argL = getQReg128(nn);
				9343	IRExpr* argR = getQReg128(mm);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9344	IRTemp res = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9345	assign(res,
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9346	isGT ? binop(mkVecCMPGTS(size), argL, argR)
				9347	: binop(mkVecCMPGTU(size), argL, argR));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9348	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9349	const HChar* nm = isGT ? "cmgt" : "cmhi";
				9350	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9351	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				9352	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9353	return True;
				9354	}
				9355
				9356	if (opcode == BITS5(0,0,1,1,1)) {
				9357	/* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
				9358	/* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
				9359	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9360	Bool isGE = bitU == 0;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9361	IRExpr* argL = getQReg128(nn);
				9362	IRExpr* argR = getQReg128(mm);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9363	IRTemp res = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9364	assign(res,
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9365	isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
				9366	: unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9367	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9368	const HChar* nm = isGE ? "cmge" : "cmhs";
				9369	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9370	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				9371	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9372	return True;
				9373	}
				9374
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	9375	if (opcode == BITS5(0,1,0,0,0) \|\| opcode == BITS5(0,1,0,1,0)) {
				9376	/* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
				9377	/* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
				9378	/* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
				9379	/* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
				9380	if (bitQ == 0 && size == X11) return False; // implied 1d case
				9381	Bool isU = bitU == 1;
				9382	Bool isR = opcode == BITS5(0,1,0,1,0);
				9383	IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
				9384	: (isU ? mkVecSHU(size) : mkVecSHS(size));
				9385	IRTemp res = newTempV128();
				9386	assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
				9387	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				9388	const HChar* nm = isR ? (isU ? "urshl" : "srshl")
				9389	: (isU ? "ushl" : "sshl");
				9390	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9391	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				9392	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9393	return True;
				9394	}
				9395
sewardj	1297218	2014-08-04 08:09:47 +0000	[diff] [blame]	9396	if (opcode == BITS5(0,1,0,0,1) \|\| opcode == BITS5(0,1,0,1,1)) {
				9397	/* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
				9398	/* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
				9399	/* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
				9400	/* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
				9401	if (bitQ == 0 && size == X11) return False; // implied 1d case
				9402	Bool isU = bitU == 1;
				9403	Bool isR = opcode == BITS5(0,1,0,1,1);
				9404	IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
				9405	: (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
				9406	/* This is a bit tricky. If we're only interested in the lowest 64 bits
				9407	of the result (viz, bitQ == 0), then we must adjust the operands to
				9408	ensure that the upper part of the result, that we don't care about,
				9409	doesn't pollute the returned Q value. To do this, zero out the upper
				9410	operand halves beforehand. This works because it means, for the
				9411	lanes we don't care about, we are shifting zero by zero, which can
				9412	never saturate. */
				9413	IRTemp res256 = newTemp(Ity_V256);
				9414	IRTemp resSH = newTempV128();
				9415	IRTemp resQ = newTempV128();
				9416	IRTemp zero = newTempV128();
				9417	assign(res256, binop(op,
				9418	math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
				9419	math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
				9420	assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
				9421	assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
				9422	assign(zero, mkV128(0x0000));
				9423	putQReg128(dd, mkexpr(resSH));
				9424	updateQCFLAGwithDifference(resQ, zero);
				9425	const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
				9426	: (isU ? "uqshl" : "sqshl");
				9427	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9428	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				9429	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9430	return True;
				9431	}
				9432
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9433	if (opcode == BITS5(0,1,1,0,0) \|\| opcode == BITS5(0,1,1,0,1)) {
				9434	/* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
				9435	/* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
				9436	/* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
				9437	/* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
				9438	if (bitQ == 0 && size == X11) return False; // implied 1d case
				9439	Bool isU = bitU == 1;
				9440	Bool isMAX = (opcode & 1) == 0;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9441	IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
				9442	: (isU ? mkVecMINU(size) : mkVecMINS(size));
				9443	IRTemp t = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9444	assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9445	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9446	const HChar* nm = isMAX ? (isU ? "umax" : "smax")
				9447	: (isU ? "umin" : "smin");
				9448	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9449	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				9450	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9451	return True;
				9452	}
				9453
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9454	if (opcode == BITS5(0,1,1,1,0) \|\| opcode == BITS5(0,1,1,1,1)) {
				9455	/* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
				9456	/* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
				9457	/* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
				9458	/* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
				9459	if (size == X11) return False; // 1d/2d cases not allowed
				9460	Bool isU = bitU == 1;
				9461	Bool isACC = opcode == BITS5(0,1,1,1,1);
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9462	vassert(size <= 2);
				9463	IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9464	IRTemp t2 = newTempV128();
				9465	assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9466	: mkexpr(t1));
				9467	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
				9468	const HChar* nm = isACC ? (isU ? "uaba" : "saba")
				9469	: (isU ? "uabd" : "sabd");
				9470	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9471	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				9472	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9473	return True;
				9474	}
				9475
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9476	if (opcode == BITS5(1,0,0,0,0)) {
				9477	/* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
				9478	/* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
				9479	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9480	Bool isSUB = bitU == 1;
				9481	IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size);
				9482	IRTemp t = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9483	assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9484	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9485	const HChar* nm = isSUB ? "sub" : "add";
				9486	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9487	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				9488	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9489	return True;
				9490	}
				9491
				9492	if (opcode == BITS5(1,0,0,0,1)) {
				9493	/* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
				9494	/* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
				9495	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9496	Bool isEQ = bitU == 1;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9497	IRExpr* argL = getQReg128(nn);
				9498	IRExpr* argR = getQReg128(mm);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9499	IRTemp res = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9500	assign(res,
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9501	isEQ ? binop(mkVecCMPEQ(size), argL, argR)
				9502	: unop(Iop_NotV128, binop(mkVecCMPEQ(size),
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9503	binop(Iop_AndV128, argL, argR),
				9504	mkV128(0x0000))));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9505	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9506	const HChar* nm = isEQ ? "cmeq" : "cmtst";
				9507	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9508	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				9509	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9510	return True;
				9511	}
				9512
				9513	if (opcode == BITS5(1,0,0,1,0)) {
				9514	/* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
				9515	/* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
				9516	if (bitQ == 0 && size == X11) return False; // implied 1d case
				9517	Bool isMLS = bitU == 1;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9518	IROp opMUL = mkVecMUL(size);
				9519	IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
				9520	IRTemp res = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9521	if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
				9522	assign(res, binop(opADDSUB,
				9523	getQReg128(dd),
				9524	binop(opMUL, getQReg128(nn), getQReg128(mm))));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9525	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9526	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9527	DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
				9528	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9529	return True;
				9530	}
				9531	return False;
				9532	}
				9533
				9534	if (opcode == BITS5(1,0,0,1,1)) {
				9535	/* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
				9536	/* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
				9537	if (bitQ == 0 && size == X11) return False; // implied 1d case
				9538	Bool isPMUL = bitU == 1;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9539	const IROp opsPMUL[4]
				9540	= { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9541	IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
				9542	IRTemp res = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9543	if (opMUL != Iop_INVALID) {
				9544	assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9545	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9546	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9547	DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
				9548	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9549	return True;
				9550	}
				9551	return False;
				9552	}
				9553
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	9554	if (opcode == BITS5(1,0,1,0,0) \|\| opcode == BITS5(1,0,1,0,1)) {
				9555	/* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
				9556	/* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
				9557	/* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
				9558	/* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
				9559	if (size == X11) return False;
				9560	Bool isU = bitU == 1;
				9561	Bool isMAX = opcode == BITS5(1,0,1,0,0);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9562	IRTemp vN = newTempV128();
				9563	IRTemp vM = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	9564	IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
				9565	: (isU ? mkVecMINU(size) : mkVecMINS(size));
				9566	assign(vN, getQReg128(nn));
				9567	assign(vM, getQReg128(mm));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9568	IRTemp res128 = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	9569	assign(res128,
				9570	binop(op,
				9571	binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
				9572	binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
				9573	/* In the half-width case, use CatEL32x4 to extract the half-width
				9574	result from the full-width result. */
				9575	IRExpr* res
				9576	= bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
				9577	binop(Iop_CatEvenLanes32x4, mkexpr(res128),
				9578	mkexpr(res128)))
				9579	: mkexpr(res128);
				9580	putQReg128(dd, res);
				9581	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9582	const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp")
				9583	: (isU ? "uminp" : "sminp");
				9584	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				9585	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9586	return True;
				9587	}
				9588
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	9589	if (opcode == BITS5(1,0,1,1,0)) {
				9590	/* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
				9591	/* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
				9592	if (size == X00 \|\| size == X11) return False;
				9593	Bool isR = bitU == 1;
				9594	IRTemp res, sat1q, sat1n, vN, vM;
				9595	res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
				9596	newTempsV128_2(&vN, &vM);
				9597	assign(vN, getQReg128(nn));
				9598	assign(vM, getQReg128(mm));
				9599	math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
				9600	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				9601	IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
				9602	updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
				9603	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9604	const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
				9605	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				9606	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9607	return True;
				9608	}
				9609
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	9610	if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
				9611	/* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
				9612	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9613	IRTemp vN = newTempV128();
				9614	IRTemp vM = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	9615	assign(vN, getQReg128(nn));
				9616	assign(vM, getQReg128(mm));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9617	IRTemp res128 = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	9618	assign(res128,
				9619	binop(mkVecADD(size),
				9620	binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
				9621	binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
				9622	/* In the half-width case, use CatEL32x4 to extract the half-width
				9623	result from the full-width result. */
				9624	IRExpr* res
				9625	= bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
				9626	binop(Iop_CatEvenLanes32x4, mkexpr(res128),
				9627	mkexpr(res128)))
				9628	: mkexpr(res128);
				9629	putQReg128(dd, res);
				9630	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9631	DIP("addp %s.%s, %s.%s, %s.%s\n",
				9632	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9633	return True;
				9634	}
				9635
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9636	if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
				9637	/* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				9638	/* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				9639	Bool isD = (size & 1) == 1;
				9640	Bool isSUB = (size & 2) == 2;
				9641	if (bitQ == 0 && isD) return False; // implied 1d case
				9642	IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
				9643	IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
				9644	IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
				9645	IRTemp rm = mk_get_IR_rounding_mode();
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9646	IRTemp t1 = newTempV128();
				9647	IRTemp t2 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9648	// FIXME: double rounding; use FMA primops instead
				9649	assign(t1, triop(opMUL,
				9650	mkexpr(rm), getQReg128(nn), getQReg128(mm)));
				9651	assign(t2, triop(isSUB ? opSUB : opADD,
				9652	mkexpr(rm), getQReg128(dd), mkexpr(t1)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9653	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9654	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				9655	DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
				9656	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9657	return True;
				9658	}
				9659
				9660	if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
				9661	/* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				9662	/* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				9663	Bool isD = (size & 1) == 1;
				9664	Bool isSUB = (size & 2) == 2;
				9665	if (bitQ == 0 && isD) return False; // implied 1d case
				9666	const IROp ops[4]
				9667	= { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
				9668	IROp op = ops[size];
				9669	IRTemp rm = mk_get_IR_rounding_mode();
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9670	IRTemp t1 = newTempV128();
				9671	IRTemp t2 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9672	assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9673	assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9674	putQReg128(dd, mkexpr(t2));
				9675	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				9676	DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
				9677	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9678	return True;
				9679	}
				9680
				9681	if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
				9682	/* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				9683	Bool isD = (size & 1) == 1;
				9684	if (bitQ == 0 && isD) return False; // implied 1d case
				9685	IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
				9686	IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
				9687	IRTemp rm = mk_get_IR_rounding_mode();
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9688	IRTemp t1 = newTempV128();
				9689	IRTemp t2 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9690	// FIXME: use Abd primop instead?
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9691	assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9692	assign(t2, unop(opABS, mkexpr(t1)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9693	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9694	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				9695	DIP("fabd %s.%s, %s.%s, %s.%s\n",
				9696	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9697	return True;
				9698	}
				9699
				9700	if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
				9701	/* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				9702	Bool isD = (size & 1) == 1;
				9703	if (bitQ == 0 && isD) return False; // implied 1d case
				9704	IRTemp rm = mk_get_IR_rounding_mode();
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9705	IRTemp t1 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9706	assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
				9707	mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9708	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9709	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				9710	DIP("fmul %s.%s, %s.%s, %s.%s\n",
				9711	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9712	return True;
				9713	}
				9714
				9715	if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
				9716	/* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				9717	/* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				9718	Bool isD = (size & 1) == 1;
				9719	if (bitQ == 0 && isD) return False; // implied 1d case
				9720	Bool isGE = bitU == 1;
				9721	IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
				9722	: (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9723	IRTemp t1 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9724	assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
				9725	: binop(opCMP, getQReg128(nn), getQReg128(mm)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9726	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9727	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				9728	DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
				9729	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9730	return True;
				9731	}
				9732
				9733	if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
				9734	/* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				9735	Bool isD = (size & 1) == 1;
				9736	if (bitQ == 0 && isD) return False; // implied 1d case
				9737	IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9738	IRTemp t1 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9739	assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9740	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9741	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				9742	DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
				9743	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9744	return True;
				9745	}
				9746
				9747	if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
				9748	/* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				9749	/* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				9750	Bool isD = (size & 1) == 1;
				9751	Bool isGT = (size & 2) == 2;
				9752	if (bitQ == 0 && isD) return False; // implied 1d case
				9753	IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
				9754	: (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
				9755	IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9756	IRTemp t1 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9757	assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
				9758	unop(opABS, getQReg128(nn)))); // swapd
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9759	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9760	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				9761	DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
				9762	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9763	return True;
				9764	}
				9765
				9766	if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
				9767	/* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				9768	Bool isD = (size & 1) == 1;
				9769	if (bitQ == 0 && isD) return False; // implied 1d case
				9770	vassert(size <= 1);
				9771	const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
				9772	IROp op = ops[size];
				9773	IRTemp rm = mk_get_IR_rounding_mode();
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9774	IRTemp t1 = newTempV128();
				9775	IRTemp t2 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9776	assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9777	assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9778	putQReg128(dd, mkexpr(t2));
				9779	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				9780	DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
				9781	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				9782	return True;
				9783	}
				9784
				9785	return False;
				9786	# undef INSN
				9787	}
				9788
				9789
				9790	static
				9791	Bool dis_AdvSIMD_two_reg_misc(/MB_OUT/DisResult* dres, UInt insn)
				9792	{
				9793	/* 31 30 29 28 23 21 16 11 9 4
				9794	0 Q U 01110 size 10000 opcode 10 n d
				9795	Decode fields: U,size,opcode
				9796	*/
				9797	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				9798	if (INSN(31,31) != 0
				9799	\|\| INSN(28,24) != BITS5(0,1,1,1,0)
				9800	\|\| INSN(21,17) != BITS5(1,0,0,0,0)
				9801	\|\| INSN(11,10) != BITS2(1,0)) {
				9802	return False;
				9803	}
				9804	UInt bitQ = INSN(30,30);
				9805	UInt bitU = INSN(29,29);
				9806	UInt size = INSN(23,22);
				9807	UInt opcode = INSN(16,12);
				9808	UInt nn = INSN(9,5);
				9809	UInt dd = INSN(4,0);
				9810	vassert(size < 4);
				9811
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9812	if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
				9813	/* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
				9814	/* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
				9815	/* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
				9816	const IROp iops[3] = { Iop_Reverse8sIn64_x2,
				9817	Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
				9818	vassert(size <= 2);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9819	IRTemp res = newTempV128();
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9820	assign(res, unop(iops[size], getQReg128(nn)));
				9821	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				9822	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9823	DIP("%s %s.%s, %s.%s\n", "rev64",
				9824	nameQReg128(dd), arr, nameQReg128(nn), arr);
				9825	return True;
				9826	}
				9827
				9828	if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
				9829	/* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
				9830	/* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
				9831	Bool isH = size == X01;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9832	IRTemp res = newTempV128();
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9833	IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
				9834	assign(res, unop(iop, getQReg128(nn)));
				9835	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				9836	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9837	DIP("%s %s.%s, %s.%s\n", "rev32",
				9838	nameQReg128(dd), arr, nameQReg128(nn), arr);
				9839	return True;
				9840	}
				9841
sewardj	715d162	2014-06-26 12:39:05 +0000	[diff] [blame]	9842	if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
				9843	/* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9844	IRTemp res = newTempV128();
sewardj	715d162	2014-06-26 12:39:05 +0000	[diff] [blame]	9845	assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
				9846	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9847	const HChar* arr = nameArr_Q_SZ(bitQ, size);
sewardj	715d162	2014-06-26 12:39:05 +0000	[diff] [blame]	9848	DIP("%s %s.%s, %s.%s\n", "rev16",
				9849	nameQReg128(dd), arr, nameQReg128(nn), arr);
				9850	return True;
				9851	}
				9852
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	9853	if (opcode == BITS5(0,0,0,1,0) \|\| opcode == BITS5(0,0,1,1,0)) {
				9854	/* -------- 0,xx,00010: SADDLP std6_std6 -------- */
				9855	/* -------- 1,xx,00010: UADDLP std6_std6 -------- */
				9856	/* -------- 0,xx,00110: SADALP std6_std6 -------- */
				9857	/* -------- 1,xx,00110: UADALP std6_std6 -------- */
				9858	/* Widens, and size refers to the narrow size. */
				9859	if (size == X11) return False; // no 1d or 2d cases
				9860	Bool isU = bitU == 1;
				9861	Bool isACC = opcode == BITS5(0,0,1,1,0);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9862	IRTemp src = newTempV128();
				9863	IRTemp sum = newTempV128();
				9864	IRTemp res = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	9865	assign(src, getQReg128(nn));
				9866	assign(sum,
				9867	binop(mkVecADD(size+1),
				9868	mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
				9869	isU, True/fromOdd/, size, mkexpr(src))),
				9870	mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
				9871	isU, False/!fromOdd/, size, mkexpr(src)))));
				9872	assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
				9873	: mkexpr(sum));
				9874	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				9875	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				9876	const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1);
				9877	DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
				9878	: (isU ? "uaddlp" : "saddlp"),
				9879	nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
				9880	return True;
				9881	}
				9882
sewardj	f7003bc	2014-08-18 12:28:02 +0000	[diff] [blame]	9883	if (opcode == BITS5(0,0,0,1,1)) {
				9884	/* -------- 0,xx,00011: SUQADD std7_std7 -------- */
				9885	/* -------- 1,xx,00011: USQADD std7_std7 -------- */
				9886	if (bitQ == 0 && size == X11) return False; // implied 1d case
				9887	Bool isUSQADD = bitU == 1;
				9888	/* This is switched (in the US vs SU sense) deliberately.
				9889	SUQADD corresponds to the ExtUSsatSS variants and
				9890	USQADD corresponds to the ExtSUsatUU variants.
				9891	See libvex_ir for more details. */
				9892	IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
				9893	: mkVecQADDEXTUSSATSS(size);
				9894	IROp nop = mkVecADD(size);
				9895	IRTemp argL = newTempV128();
				9896	IRTemp argR = newTempV128();
				9897	IRTemp qres = newTempV128();
				9898	IRTemp nres = newTempV128();
				9899	/* Because the two arguments to the addition are implicitly
				9900	extended differently (one signedly, the other unsignedly) it is
				9901	important to present them to the primop in the correct order. */
				9902	assign(argL, getQReg128(nn));
				9903	assign(argR, getQReg128(dd));
				9904	assign(qres, math_MAYBE_ZERO_HI64_fromE(
				9905	bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
				9906	assign(nres, math_MAYBE_ZERO_HI64_fromE(
				9907	bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
				9908	putQReg128(dd, mkexpr(qres));
				9909	updateQCFLAGwithDifference(qres, nres);
				9910	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9911	DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
				9912	nameQReg128(dd), arr, nameQReg128(nn), arr);
				9913	return True;
				9914	}
				9915
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	9916	if (opcode == BITS5(0,0,1,0,0)) {
				9917	/* -------- 0,xx,00100: CLS std6_std6 -------- */
				9918	/* -------- 1,xx,00100: CLZ std6_std6 -------- */
				9919	if (size == X11) return False; // no 1d or 2d cases
sewardj	a8c7b0f	2014-06-26 08:18:08 +0000	[diff] [blame]	9920	const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
				9921	const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	9922	Bool isCLZ = bitU == 1;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9923	IRTemp res = newTempV128();
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	9924	vassert(size <= 2);
				9925	assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9926	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	9927	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9928	DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
				9929	nameQReg128(dd), arr, nameQReg128(nn), arr);
				9930	return True;
				9931	}
				9932
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	9933	if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	9934	/* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	9935	/* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9936	IRTemp res = newTempV128();
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	9937	assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9938	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	715d162	2014-06-26 12:39:05 +0000	[diff] [blame]	9939	const HChar* arr = nameArr_Q_SZ(bitQ, 0);
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	9940	DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	9941	nameQReg128(dd), arr, nameQReg128(nn), arr);
				9942	return True;
				9943	}
				9944
sewardj	715d162	2014-06-26 12:39:05 +0000	[diff] [blame]	9945	if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
				9946	/* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9947	IRTemp res = newTempV128();
sewardj	715d162	2014-06-26 12:39:05 +0000	[diff] [blame]	9948	assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9949	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	715d162	2014-06-26 12:39:05 +0000	[diff] [blame]	9950	const HChar* arr = nameArr_Q_SZ(bitQ, 0);
				9951	DIP("%s %s.%s, %s.%s\n", "rbit",
				9952	nameQReg128(dd), arr, nameQReg128(nn), arr);
				9953	return True;
				9954	}
				9955
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	9956	if (opcode == BITS5(0,0,1,1,1)) {
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9957	/* -------- 0,xx,00111 SQABS std7_std7 -------- */
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	9958	/* -------- 1,xx,00111 SQNEG std7_std7 -------- */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9959	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	9960	Bool isNEG = bitU == 1;
				9961	IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
				9962	(isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
				9963	getQReg128(nn), size );
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9964	IRTemp qres = newTempV128(), nres = newTempV128();
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	9965	assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
				9966	assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9967	putQReg128(dd, mkexpr(qres));
				9968	updateQCFLAGwithDifference(qres, nres);
				9969	const HChar* arr = nameArr_Q_SZ(bitQ, size);
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	9970	DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9971	nameQReg128(dd), arr, nameQReg128(nn), arr);
				9972	return True;
				9973	}
				9974
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9975	if (opcode == BITS5(0,1,0,0,0)) {
				9976	/* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
				9977	/* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
				9978	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9979	Bool isGT = bitU == 0;
				9980	IRExpr* argL = getQReg128(nn);
				9981	IRExpr* argR = mkV128(0x0000);
				9982	IRTemp res = newTempV128();
				9983	IROp opGTS = mkVecCMPGTS(size);
				9984	assign(res, isGT ? binop(opGTS, argL, argR)
				9985	: unop(Iop_NotV128, binop(opGTS, argR, argL)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	9986	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9987	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				9988	DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
				9989	nameQReg128(dd), arr, nameQReg128(nn), arr);
				9990	return True;
				9991	}
				9992
				9993	if (opcode == BITS5(0,1,0,0,1)) {
				9994	/* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
				9995	/* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
				9996	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9997	Bool isEQ = bitU == 0;
				9998	IRExpr* argL = getQReg128(nn);
				9999	IRExpr* argR = mkV128(0x0000);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10000	IRTemp res = newTempV128();
				10001	assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10002	: unop(Iop_NotV128,
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10003	binop(mkVecCMPGTS(size), argL, argR)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	10004	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10005	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				10006	DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
				10007	nameQReg128(dd), arr, nameQReg128(nn), arr);
				10008	return True;
				10009	}
				10010
				10011	if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
				10012	/* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
				10013	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10014	IRExpr* argL = getQReg128(nn);
				10015	IRExpr* argR = mkV128(0x0000);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10016	IRTemp res = newTempV128();
				10017	assign(res, binop(mkVecCMPGTS(size), argR, argL));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	10018	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10019	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				10020	DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
				10021	nameQReg128(dd), arr, nameQReg128(nn), arr);
				10022	return True;
				10023	}
				10024
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	10025	if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
				10026	/* -------- 0,xx,01011: ABS std7_std7 -------- */
				10027	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10028	IRTemp res = newTempV128();
				10029	assign(res, unop(mkVecABS(size), getQReg128(nn)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	10030	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	10031	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				10032	DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
				10033	return True;
				10034	}
				10035
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10036	if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
				10037	/* -------- 1,xx,01011: NEG std7_std7 -------- */
				10038	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10039	IRTemp res = newTempV128();
				10040	assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	10041	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10042	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				10043	DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
				10044	return True;
				10045	}
				10046
				10047	if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
				10048	/* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
				10049	/* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
				10050	if (bitQ == 0 && size == X11) return False; // implied 1d case
				10051	Bool isFNEG = bitU == 1;
				10052	IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
				10053	: (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10054	IRTemp res = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10055	assign(res, unop(op, getQReg128(nn)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	10056	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10057	const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
				10058	DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
				10059	nameQReg128(dd), arr, nameQReg128(nn), arr);
				10060	return True;
				10061	}
				10062
				10063	if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
				10064	/* -------- 0,xx,10010: XTN{,2} -------- */
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	10065	if (size == X11) return False;
				10066	vassert(size < 3);
				10067	Bool is2 = bitQ == 1;
				10068	IROp opN = mkVecNARROWUN(size);
				10069	IRTemp resN = newTempV128();
				10070	assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
				10071	putLO64andZUorPutHI64(is2, dd, resN);
				10072	const HChar* nm = "xtn";
				10073	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				10074	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				10075	DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
				10076	nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
				10077	return True;
				10078	}
				10079
				10080	if (opcode == BITS5(1,0,1,0,0)
				10081	\|\| (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
				10082	/* -------- 0,xx,10100: SQXTN{,2} -------- */
				10083	/* -------- 1,xx,10100: UQXTN{,2} -------- */
				10084	/* -------- 1,xx,10010: SQXTUN{,2} -------- */
				10085	if (size == X11) return False;
				10086	vassert(size < 3);
				10087	Bool is2 = bitQ == 1;
				10088	IROp opN = Iop_INVALID;
				10089	Bool zWiden = True;
				10090	const HChar* nm = "??";
				10091	/**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
				10092	opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10093	}
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	10094	else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
				10095	opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10096	}
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	10097	else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
				10098	opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
				10099	}
				10100	else vassert(0);
				10101	IRTemp src = newTempV128();
				10102	assign(src, getQReg128(nn));
				10103	IRTemp resN = newTempV128();
				10104	assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
				10105	putLO64andZUorPutHI64(is2, dd, resN);
				10106	IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/!fromUpperHalf/,
				10107	size, mkexpr(resN));
				10108	updateQCFLAGwithDifference(src, resW);
				10109	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				10110	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				10111	DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
				10112	nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
				10113	return True;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10114	}
				10115
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10116	if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
				10117	/* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
				10118	/* Widens, and size is the narrow size. */
				10119	if (size == X11) return False;
				10120	Bool is2 = bitQ == 1;
				10121	IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
				10122	IROp opSHL = mkVecSHLN(size+1);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10123	IRTemp src = newTempV128();
				10124	IRTemp res = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10125	assign(src, getQReg128(nn));
				10126	assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
				10127	mkU8(8 << size)));
				10128	putQReg128(dd, mkexpr(res));
				10129	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				10130	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				10131	DIP("shll%s %s.%s, %s.%s, #%u\n", is2 ? "2" : "",
				10132	nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
				10133	return True;
				10134	}
				10135
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10136	if (bitU == 0 && size == X01 && opcode == BITS5(1,0,1,1,0)) {
				10137	/* -------- 0,01,10110: FCVTN 2s/4s_2d -------- */
				10138	IRTemp rm = mk_get_IR_rounding_mode();
				10139	IRExpr* srcLo = getQRegLane(nn, 0, Ity_F64);
				10140	IRExpr* srcHi = getQRegLane(nn, 1, Ity_F64);
				10141	putQRegLane(dd, 2 * bitQ + 0, binop(Iop_F64toF32, mkexpr(rm), srcLo));
				10142	putQRegLane(dd, 2 * bitQ + 1, binop(Iop_F64toF32, mkexpr(rm), srcHi));
				10143	if (bitQ == 0) {
				10144	putQRegLane(dd, 1, mkU64(0));
				10145	}
				10146	DIP("fcvtn%s %s.%s, %s.2d\n", bitQ ? "2" : "",
				10147	nameQReg128(dd), bitQ ? "4s" : "2s", nameQReg128(nn));
				10148	return True;
				10149	}
				10150
sewardj	fc261d9	2014-08-24 20:36:14 +0000	[diff] [blame^]	10151	if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
				10152	/* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
				10153	/* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
				10154	Bool isREC = bitU == 0;
				10155	IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
				10156	IRTemp res = newTempV128();
				10157	assign(res, unop(op, getQReg128(nn)));
				10158	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				10159	const HChar* nm = isREC ? "urecpe" : "ursqrte";
				10160	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				10161	DIP("%s %s.%s, %s.%s\n", nm,
				10162	nameQReg128(dd), arr, nameQReg128(nn), arr);
				10163	return True;
				10164	}
				10165
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	10166	if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
				10167	/* -------- 0,0x,11101: SCVTF -------- */
				10168	/* -------- 1,0x,11101: UCVTF -------- */
				10169	/* 31 28 22 21 15 9 4
				10170	0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
				10171	0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
				10172	with laneage:
				10173	case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
				10174	*/
				10175	Bool isQ = bitQ == 1;
				10176	Bool isU = bitU == 1;
				10177	Bool isF64 = (size & 1) == 1;
				10178	if (isQ \|\| !isF64) {
				10179	IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
				10180	UInt nLanes = 0;
				10181	Bool zeroHI = False;
				10182	const HChar* arrSpec = NULL;
				10183	Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
				10184	isQ, isF64 );
				10185	IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
				10186	: (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
				10187	IRTemp rm = mk_get_IR_rounding_mode();
				10188	UInt i;
				10189	vassert(ok); /* the 'if' above should ensure this */
				10190	for (i = 0; i < nLanes; i++) {
				10191	putQRegLane(dd, i,
				10192	binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
				10193	}
				10194	if (zeroHI) {
				10195	putQRegLane(dd, 1, mkU64(0));
				10196	}
				10197	DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
				10198	nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
				10199	return True;
				10200	}
				10201	/* else fall through */
				10202	}
				10203
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10204	return False;
				10205	# undef INSN
				10206	}
				10207
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	10208
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10209	static
				10210	Bool dis_AdvSIMD_vector_x_indexed_elem(/MB_OUT/DisResult* dres, UInt insn)
				10211	{
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	10212	/* 31 28 23 21 20 19 15 11 9 4
				10213	0 Q U 01111 size L M m opcode H 0 n d
				10214	Decode fields are: u,size,opcode
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	10215	M is really part of the mm register number. Individual
				10216	cases need to inspect L and H though.
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	10217	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10218	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	10219	if (INSN(31,31) != 0
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10220	\|\| INSN(28,24) != BITS5(0,1,1,1,1) \|\| INSN(10,10) !=0) {
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	10221	return False;
				10222	}
				10223	UInt bitQ = INSN(30,30);
				10224	UInt bitU = INSN(29,29);
				10225	UInt size = INSN(23,22);
				10226	UInt bitL = INSN(21,21);
				10227	UInt bitM = INSN(20,20);
				10228	UInt mmLO4 = INSN(19,16);
				10229	UInt opcode = INSN(15,12);
				10230	UInt bitH = INSN(11,11);
				10231	UInt nn = INSN(9,5);
				10232	UInt dd = INSN(4,0);
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	10233	vassert(size < 4);
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	10234	vassert(bitH < 2 && bitM < 2 && bitL < 2);
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	10235
				10236	if (bitU == 0 && size >= X10 && opcode == BITS4(1,0,0,1)) {
				10237	/* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
				10238	if (bitQ == 0 && size == X11) return False; // implied 1d case
				10239	Bool isD = (size & 1) == 1;
				10240	UInt index;
				10241	if (!isD) index = (bitH << 1) \| bitL;
				10242	else if (isD && bitL == 0) index = bitH;
				10243	else return False; // sz:L == x11 => unallocated encoding
				10244	vassert(index < (isD ? 2 : 4));
				10245	IRType ity = isD ? Ity_F64 : Ity_F32;
				10246	IRTemp elem = newTemp(ity);
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	10247	UInt mm = (bitM << 4) \| mmLO4;
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	10248	assign(elem, getQRegLane(mm, index, ity));
				10249	IRTemp dupd = math_DUP_TO_V128(elem, ity);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10250	IRTemp res = newTempV128();
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	10251	assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
				10252	mkexpr(mk_get_IR_rounding_mode()),
				10253	getQReg128(nn), mkexpr(dupd)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	10254	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	10255	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				10256	DIP("fmul %s.%s, %s.%s, %s.%c[%u]\n", nameQReg128(dd), arr,
				10257	nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
				10258	return True;
				10259	}
				10260
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	10261	if ((bitU == 1 && (opcode == BITS4(0,0,0,0) \|\| opcode == BITS4(0,1,0,0)))
				10262	\|\| (bitU == 0 && opcode == BITS4(1,0,0,0))) {
				10263	/* -------- 1,xx,0000 MLA s/h variants only -------- */
				10264	/* -------- 1,xx,0100 MLS s/h variants only -------- */
				10265	/* -------- 0,xx,1000 MUL s/h variants only -------- */
				10266	Bool isMLA = opcode == BITS4(0,0,0,0);
				10267	Bool isMLS = opcode == BITS4(0,1,0,0);
				10268	UInt mm = 32; // invalid
				10269	UInt ix = 16; // invalid
				10270	switch (size) {
				10271	case X00:
				10272	return False; // b case is not allowed
				10273	case X01:
				10274	mm = mmLO4; ix = (bitH << 2) \| (bitL << 1) \| (bitM << 0); break;
				10275	case X10:
				10276	mm = (bitM << 4) \| mmLO4; ix = (bitH << 1) \| (bitL << 0); break;
				10277	case X11:
				10278	return False; // d case is not allowed
				10279	default:
				10280	vassert(0);
				10281	}
				10282	vassert(mm < 32 && ix < 16);
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10283	IROp opMUL = mkVecMUL(size);
				10284	IROp opADD = mkVecADD(size);
				10285	IROp opSUB = mkVecSUB(size);
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	10286	HChar ch = size == X01 ? 'h' : 's';
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10287	IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10288	IRTemp vecD = newTempV128();
				10289	IRTemp vecN = newTempV128();
				10290	IRTemp res = newTempV128();
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	10291	assign(vecD, getQReg128(dd));
				10292	assign(vecN, getQReg128(nn));
				10293	IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
				10294	if (isMLA \|\| isMLS) {
				10295	assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
				10296	} else {
				10297	assign(res, prod);
				10298	}
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	10299	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	10300	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				10301	DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
				10302	: (isMLS ? "mls" : "mul"),
				10303	nameQReg128(dd), arr,
				10304	nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
				10305	return True;
				10306	}
				10307
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10308	if (opcode == BITS4(1,0,1,0)
				10309	\|\| opcode == BITS4(0,0,1,0) \|\| opcode == BITS4(0,1,1,0)) {
				10310	/* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
				10311	/* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
				10312	/* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
				10313	/* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
				10314	/* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
				10315	/* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
				10316	/* Widens, and size refers to the narrowed lanes. */
				10317	UInt ks = 3;
				10318	switch (opcode) {
				10319	case BITS4(1,0,1,0): ks = 0; break;
				10320	case BITS4(0,0,1,0): ks = 1; break;
				10321	case BITS4(0,1,1,0): ks = 2; break;
				10322	default: vassert(0);
				10323	}
				10324	vassert(ks >= 0 && ks <= 2);
				10325	Bool isU = bitU == 1;
				10326	Bool is2 = bitQ == 1;
				10327	UInt mm = 32; // invalid
				10328	UInt ix = 16; // invalid
				10329	switch (size) {
				10330	case X00:
				10331	return False; // h_b_b[] case is not allowed
				10332	case X01:
				10333	mm = mmLO4; ix = (bitH << 2) \| (bitL << 1) \| (bitM << 0); break;
				10334	case X10:
				10335	mm = (bitM << 4) \| mmLO4; ix = (bitH << 1) \| (bitL << 0); break;
				10336	case X11:
				10337	return False; // q_d_d[] case is not allowed
				10338	default:
				10339	vassert(0);
				10340	}
				10341	vassert(mm < 32 && ix < 16);
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	10342	IRTemp vecN = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10343	IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10344	IRTemp vecD = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10345	assign(vecN, getQReg128(nn));
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	10346	assign(vecD, getQReg128(dd));
				10347	IRTemp res = IRTemp_INVALID;
				10348	math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
				10349	vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10350	putQReg128(dd, mkexpr(res));
				10351	const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
				10352	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				10353	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				10354	HChar ch = size == X01 ? 'h' : 's';
				10355	DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
				10356	isU ? 'u' : 's', nm, is2 ? "2" : "",
				10357	nameQReg128(dd), arrWide,
				10358	nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
				10359	return True;
				10360	}
				10361
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	10362	if (bitU == 0
				10363	&& (opcode == BITS4(1,0,1,1)
				10364	\|\| opcode == BITS4(0,0,1,1) \|\| opcode == BITS4(0,1,1,1))) {
				10365	/* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
				10366	/* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
				10367	/* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
				10368	/* Widens, and size refers to the narrowed lanes. */
				10369	UInt ks = 3;
				10370	switch (opcode) {
				10371	case BITS4(1,0,1,1): ks = 0; break;
				10372	case BITS4(0,0,1,1): ks = 1; break;
				10373	case BITS4(0,1,1,1): ks = 2; break;
				10374	default: vassert(0);
				10375	}
				10376	vassert(ks >= 0 && ks <= 2);
				10377	Bool is2 = bitQ == 1;
				10378	UInt mm = 32; // invalid
				10379	UInt ix = 16; // invalid
				10380	switch (size) {
				10381	case X00:
				10382	return False; // h_b_b[] case is not allowed
				10383	case X01:
				10384	mm = mmLO4; ix = (bitH << 2) \| (bitL << 1) \| (bitM << 0); break;
				10385	case X10:
				10386	mm = (bitM << 4) \| mmLO4; ix = (bitH << 1) \| (bitL << 0); break;
				10387	case X11:
				10388	return False; // q_d_d[] case is not allowed
				10389	default:
				10390	vassert(0);
				10391	}
				10392	vassert(mm < 32 && ix < 16);
				10393	IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
				10394	vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
				10395	newTempsV128_2(&vecN, &vecD);
				10396	assign(vecN, getQReg128(nn));
				10397	IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
				10398	assign(vecD, getQReg128(dd));
				10399	math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
				10400	is2, size, "mas"[ks],
				10401	vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
				10402	putQReg128(dd, mkexpr(res));
				10403	vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
				10404	updateQCFLAGwithDifference(sat1q, sat1n);
				10405	if (sat2q != IRTemp_INVALID \|\| sat2n != IRTemp_INVALID) {
				10406	updateQCFLAGwithDifference(sat2q, sat2n);
				10407	}
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	10408	const HChar* nm = ks == 0 ? "sqdmull"
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	10409	: (ks == 1 ? "sqdmlal" : "sqdmlsl");
				10410	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				10411	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				10412	HChar ch = size == X01 ? 'h' : 's';
				10413	DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
				10414	nm, is2 ? "2" : "",
				10415	nameQReg128(dd), arrWide,
				10416	nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
				10417	return True;
				10418	}
				10419
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	10420	if (opcode == BITS4(1,1,0,0) \|\| opcode == BITS4(1,1,0,1)) {
				10421	/* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
				10422	/* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
				10423	UInt mm = 32; // invalid
				10424	UInt ix = 16; // invalid
				10425	switch (size) {
				10426	case X00:
				10427	return False; // b case is not allowed
				10428	case X01:
				10429	mm = mmLO4; ix = (bitH << 2) \| (bitL << 1) \| (bitM << 0); break;
				10430	case X10:
				10431	mm = (bitM << 4) \| mmLO4; ix = (bitH << 1) \| (bitL << 0); break;
				10432	case X11:
				10433	return False; // q case is not allowed
				10434	default:
				10435	vassert(0);
				10436	}
				10437	vassert(mm < 32 && ix < 16);
				10438	Bool isR = opcode == BITS4(1,1,0,1);
				10439	IRTemp res, sat1q, sat1n, vN, vM;
				10440	res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
				10441	vN = newTempV128();
				10442	assign(vN, getQReg128(nn));
				10443	vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
				10444	math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
				10445	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				10446	IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
				10447	updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
				10448	const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
				10449	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				10450	HChar ch = size == X01 ? 'h' : 's';
				10451	DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
				10452	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
				10453	return True;
				10454	}
				10455
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10456	return False;
				10457	# undef INSN
				10458	}
				10459
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	10460
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10461	static
				10462	Bool dis_AdvSIMD_crypto_aes(/MB_OUT/DisResult* dres, UInt insn)
				10463	{
				10464	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				10465	return False;
				10466	# undef INSN
				10467	}
				10468
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	10469
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10470	static
				10471	Bool dis_AdvSIMD_crypto_three_reg_sha(/MB_OUT/DisResult* dres, UInt insn)
				10472	{
				10473	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				10474	return False;
				10475	# undef INSN
				10476	}
				10477
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	10478
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10479	static
				10480	Bool dis_AdvSIMD_crypto_two_reg_sha(/MB_OUT/DisResult* dres, UInt insn)
				10481	{
				10482	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				10483	return False;
				10484	# undef INSN
				10485	}
				10486
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	10487
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10488	static
				10489	Bool dis_AdvSIMD_fp_compare(/MB_OUT/DisResult* dres, UInt insn)
				10490	{
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	10491	/* 31 28 23 21 20 15 13 9 4
				10492	000 11110 ty 1 m op 1000 n opcode2
				10493	The first 3 bits are really "M 0 S", but M and S are always zero.
				10494	Decode fields are: ty,op,opcode2
				10495	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10496	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	10497	if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
				10498	\|\| INSN(21,21) != 1 \|\| INSN(13,10) != BITS4(1,0,0,0)) {
				10499	return False;
				10500	}
				10501	UInt ty = INSN(23,22);
				10502	UInt mm = INSN(20,16);
				10503	UInt op = INSN(15,14);
				10504	UInt nn = INSN(9,5);
				10505	UInt opcode2 = INSN(4,0);
				10506	vassert(ty < 4);
				10507
				10508	if (ty <= X01 && op == X00
				10509	&& (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
				10510	/* -------- 0x,00,00000 FCMP d_d, s_s -------- */
				10511	/* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
				10512	/* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
				10513	/* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
				10514	/* 31 23 20 15 9 4
				10515	000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
				10516	000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
				10517	000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
				10518	000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
				10519
				10520	000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
				10521	000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
				10522	000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
				10523	000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
				10524
				10525	FCMPE generates Invalid Operation exn if either arg is any kind
				10526	of NaN. FCMP generates Invalid Operation exn if either arg is a
				10527	signalling NaN. We ignore this detail here and produce the same
				10528	IR for both.
				10529	*/
				10530	Bool isD = (ty & 1) == 1;
				10531	Bool isCMPE = (opcode2 & 16) == 16;
				10532	Bool cmpZero = (opcode2 & 8) == 8;
				10533	IRType ity = isD ? Ity_F64 : Ity_F32;
				10534	Bool valid = True;
				10535	if (cmpZero && mm != 0) valid = False;
				10536	if (valid) {
				10537	IRTemp argL = newTemp(ity);
				10538	IRTemp argR = newTemp(ity);
				10539	IRTemp irRes = newTemp(Ity_I32);
				10540	assign(argL, getQRegLO(nn, ity));
				10541	assign(argR,
				10542	cmpZero
				10543	? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
				10544	: getQRegLO(mm, ity));
				10545	assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
				10546	mkexpr(argL), mkexpr(argR)));
				10547	IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
				10548	IRTemp nzcv_28x0 = newTemp(Ity_I64);
				10549	assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
				10550	setFlags_COPY(nzcv_28x0);
				10551	DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
				10552	cmpZero ? "#0.0" : nameQRegLO(mm, ity));
				10553	return True;
				10554	}
				10555	return False;
				10556	}
				10557
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10558	return False;
				10559	# undef INSN
				10560	}
				10561
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	10562
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10563	static
				10564	Bool dis_AdvSIMD_fp_conditional_compare(/MB_OUT/DisResult* dres, UInt insn)
				10565	{
				10566	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				10567	return False;
				10568	# undef INSN
				10569	}
				10570
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	10571
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10572	static
				10573	Bool dis_AdvSIMD_fp_conditional_select(/MB_OUT/DisResult* dres, UInt insn)
				10574	{
				10575	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				10576	return False;
				10577	# undef INSN
				10578	}
				10579
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	10580
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10581	static
				10582	Bool dis_AdvSIMD_fp_data_proc_1_source(/MB_OUT/DisResult* dres, UInt insn)
				10583	{
				10584	/* 31 28 23 21 20 14 9 4
				10585	000 11110 ty 1 opcode 10000 n d
				10586	The first 3 bits are really "M 0 S", but M and S are always zero.
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	10587	Decode fields: ty,opcode
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10588	*/
				10589	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				10590	if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
				10591	\|\| INSN(21,21) != 1 \|\| INSN(14,10) != BITS5(1,0,0,0,0)) {
				10592	return False;
				10593	}
				10594	UInt ty = INSN(23,22);
				10595	UInt opcode = INSN(20,15);
				10596	UInt nn = INSN(9,5);
				10597	UInt dd = INSN(4,0);
				10598
				10599	if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
				10600	/* -------- 0x,000000: FMOV d_d, s_s -------- */
				10601	/* -------- 0x,000001: FABS d_d, s_s -------- */
				10602	/* -------- 0x,000010: FNEG d_d, s_s -------- */
				10603	/* -------- 0x,000011: FSQRT d_d, s_s -------- */
				10604	IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
				10605	IRTemp src = newTemp(ity);
				10606	IRTemp res = newTemp(ity);
				10607	const HChar* nm = "??";
				10608	assign(src, getQRegLO(nn, ity));
				10609	switch (opcode) {
				10610	case BITS6(0,0,0,0,0,0):
				10611	nm = "fmov"; assign(res, mkexpr(src)); break;
				10612	case BITS6(0,0,0,0,0,1):
				10613	nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
				10614	case BITS6(0,0,0,0,1,0):
				10615	nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
				10616	case BITS6(0,0,0,0,1,1):
				10617	nm = "fsqrt";
				10618	assign(res, binop(mkSQRTF(ity),
				10619	mkexpr(mk_get_IR_rounding_mode()),
				10620	mkexpr(src))); break;
				10621	default:
				10622	vassert(0);
				10623	}
				10624	putQReg128(dd, mkV128(0x0000));
				10625	putQRegLO(dd, mkexpr(res));
				10626	DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
				10627	return True;
				10628	}
				10629
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	10630	if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
				10631	\|\| opcode == BITS6(0,0,0,1,0,1)))
				10632	\|\| (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
				10633	\|\| opcode == BITS6(0,0,0,1,0,1)))
				10634	\|\| (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
				10635	\|\| opcode == BITS6(0,0,0,1,0,0)))) {
				10636	/* -------- 11,000100: FCVT s_h -------- */
				10637	/* -------- 11,000101: FCVT d_h -------- */
				10638	/* -------- 00,000111: FCVT h_s -------- */
				10639	/* -------- 00,000101: FCVT d_s -------- */
				10640	/* -------- 01,000111: FCVT h_d -------- */
				10641	/* -------- 01,000100: FCVT s_d -------- */
				10642	/* 31 23 21 16 14 9 4
				10643	000 11110 11 10001 00 10000 n d FCVT Sd, Hn (unimp)
				10644	--------- 11 ----- 01 --------- FCVT Dd, Hn (unimp)
				10645	--------- 00 ----- 11 --------- FCVT Hd, Sn (unimp)
				10646	--------- 00 ----- 01 --------- FCVT Dd, Sn
				10647	--------- 01 ----- 11 --------- FCVT Hd, Dn (unimp)
				10648	--------- 01 ----- 00 --------- FCVT Sd, Dn
				10649	Rounding, when dst is smaller than src, is per the FPCR.
				10650	*/
				10651	UInt b2322 = ty;
				10652	UInt b1615 = opcode & BITS2(1,1);
				10653	if (b2322 == BITS2(0,0) && b1615 == BITS2(0,1)) {
				10654	/* Convert S to D */
				10655	IRTemp res = newTemp(Ity_F64);
				10656	assign(res, unop(Iop_F32toF64, getQRegLO(nn, Ity_F32)));
				10657	putQReg128(dd, mkV128(0x0000));
				10658	putQRegLO(dd, mkexpr(res));
				10659	DIP("fcvt %s, %s\n",
				10660	nameQRegLO(dd, Ity_F64), nameQRegLO(nn, Ity_F32));
				10661	return True;
				10662	}
				10663	if (b2322 == BITS2(0,1) && b1615 == BITS2(0,0)) {
				10664	/* Convert D to S */
				10665	IRTemp res = newTemp(Ity_F32);
				10666	assign(res, binop(Iop_F64toF32, mkexpr(mk_get_IR_rounding_mode()),
				10667	getQRegLO(nn, Ity_F64)));
				10668	putQReg128(dd, mkV128(0x0000));
				10669	putQRegLO(dd, mkexpr(res));
				10670	DIP("fcvt %s, %s\n",
				10671	nameQRegLO(dd, Ity_F32), nameQRegLO(nn, Ity_F64));
				10672	return True;
				10673	}
				10674	/* else unhandled */
				10675	return False;
				10676	}
				10677
				10678	if (ty <= X01
				10679	&& opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
				10680	&& opcode != BITS6(0,0,1,1,0,1)) {
				10681	/* -------- 0x,001000 FRINTN d_d, s_s -------- */
				10682	/* -------- 0x,001001 FRINTP d_d, s_s -------- */
				10683	/* -------- 0x,001010 FRINTM d_d, s_s -------- */
				10684	/* -------- 0x,001011 FRINTZ d_d, s_s -------- */
				10685	/* -------- 0x,001100 FRINTA d_d, s_s -------- */
				10686	/* -------- 0x,001110 FRINTX d_d, s_s -------- */
				10687	/* -------- 0x,001111 FRINTI d_d, s_s -------- */
				10688	/* 31 23 21 17 14 9 4
				10689	000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
				10690	rm
				10691	x==0 => S-registers, x==1 => D-registers
				10692	rm (17:15) encodings:
				10693	111 per FPCR (FRINTI)
				10694	001 +inf (FRINTP)
				10695	010 -inf (FRINTM)
				10696	011 zero (FRINTZ)
				10697	000 tieeven
				10698	100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
				10699	110 per FPCR + "exact = TRUE"
				10700	101 unallocated
				10701	*/
				10702	Bool isD = (ty & 1) == 1;
				10703	UInt rm = opcode & BITS6(0,0,0,1,1,1);
				10704	IRType ity = isD ? Ity_F64 : Ity_F32;
				10705	IRExpr* irrmE = NULL;
				10706	UChar ch = '?';
				10707	switch (rm) {
				10708	case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
				10709	case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
				10710	case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
				10711	// The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
				10712	case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
				10713	default: break;
				10714	}
				10715	if (irrmE) {
				10716	IRTemp src = newTemp(ity);
				10717	IRTemp dst = newTemp(ity);
				10718	assign(src, getQRegLO(nn, ity));
				10719	assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
				10720	irrmE, mkexpr(src)));
				10721	putQReg128(dd, mkV128(0x0000));
				10722	putQRegLO(dd, mkexpr(dst));
				10723	DIP("frint%c %s, %s\n",
				10724	ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
				10725	return True;
				10726	}
				10727	return False;
				10728	}
				10729
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10730	return False;
				10731	# undef INSN
				10732	}
				10733
				10734
				10735	static
				10736	Bool dis_AdvSIMD_fp_data_proc_2_source(/MB_OUT/DisResult* dres, UInt insn)
				10737	{
				10738	/* 31 28 23 21 20 15 11 9 4
				10739	000 11110 ty 1 m opcode 10 n d
				10740	The first 3 bits are really "M 0 S", but M and S are always zero.
				10741	*/
				10742	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				10743	if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
				10744	\|\| INSN(21,21) != 1 \|\| INSN(11,10) != BITS2(1,0)) {
				10745	return False;
				10746	}
				10747	UInt ty = INSN(23,22);
				10748	UInt mm = INSN(20,16);
				10749	UInt opcode = INSN(15,12);
				10750	UInt nn = INSN(9,5);
				10751	UInt dd = INSN(4,0);
				10752
				10753	if (ty <= X01 && opcode <= BITS4(0,0,1,1)) {
				10754	/* ------- 0x,0000: FMUL d_d, s_s ------- */
				10755	/* ------- 0x,0001: FDIV d_d, s_s ------- */
				10756	/* ------- 0x,0010: FADD d_d, s_s ------- */
				10757	/* ------- 0x,0011: FSUB d_d, s_s ------- */
				10758	IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
				10759	IROp iop = Iop_INVALID;
				10760	const HChar* nm = "???";
				10761	switch (opcode) {
				10762	case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
				10763	case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
				10764	case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
				10765	case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
				10766	default: vassert(0);
				10767	}
				10768	IRExpr* resE = triop(iop, mkexpr(mk_get_IR_rounding_mode()),
				10769	getQRegLO(nn, ity), getQRegLO(mm, ity));
				10770	IRTemp res = newTemp(ity);
				10771	assign(res, resE);
				10772	putQReg128(dd, mkV128(0));
				10773	putQRegLO(dd, mkexpr(res));
				10774	DIP("%s %s, %s, %s\n",
				10775	nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
				10776	return True;
				10777	}
				10778
				10779	if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
				10780	/* ------- 0x,1000: FNMUL d_d, s_s ------- */
				10781	IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
				10782	IROp iop = mkMULF(ity);
				10783	IROp iopn = mkNEGF(ity);
				10784	const HChar* nm = "fnmul";
				10785	IRExpr* resE = unop(iopn,
				10786	triop(iop, mkexpr(mk_get_IR_rounding_mode()),
				10787	getQRegLO(nn, ity), getQRegLO(mm, ity)));
				10788	IRTemp res = newTemp(ity);
				10789	assign(res, resE);
				10790	putQReg128(dd, mkV128(0));
				10791	putQRegLO(dd, mkexpr(res));
				10792	DIP("%s %s, %s, %s\n",
				10793	nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
				10794	return True;
				10795	}
				10796
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10797	return False;
				10798	# undef INSN
				10799	}
				10800
				10801
				10802	static
				10803	Bool dis_AdvSIMD_fp_data_proc_3_source(/MB_OUT/DisResult* dres, UInt insn)
				10804	{
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	10805	/* 31 28 23 21 20 15 14 9 4
				10806	000 11111 ty o1 m o0 a n d
				10807	The first 3 bits are really "M 0 S", but M and S are always zero.
				10808	Decode fields: ty,o1,o0
				10809	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10810	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	10811	if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
				10812	return False;
				10813	}
				10814	UInt ty = INSN(23,22);
				10815	UInt bitO1 = INSN(21,21);
				10816	UInt mm = INSN(20,16);
				10817	UInt bitO0 = INSN(15,15);
				10818	UInt aa = INSN(14,10);
				10819	UInt nn = INSN(9,5);
				10820	UInt dd = INSN(4,0);
				10821	vassert(ty < 4);
				10822
				10823	if (ty <= X01) {
				10824	/* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
				10825	/* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
				10826	/* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
				10827	/* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
				10828	/* -------------------- F{N}M{ADD,SUB} -------------------- */
				10829	/* 31 22 20 15 14 9 4 ix
				10830	000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
				10831	000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
				10832	000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
				10833	000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
				10834	where Fx=Dx when sz=1, Fx=Sx when sz=0
				10835
				10836	-----SPEC------ ----IMPL----
				10837	fmadd a + n * m a + n * m
				10838	fmsub a + (-n) * m a - n * m
				10839	fnmadd (-a) + (-n) * m -(a + n * m)
				10840	fnmsub (-a) + n * m -(a - n * m)
				10841	*/
				10842	Bool isD = (ty & 1) == 1;
				10843	UInt ix = (bitO1 << 1) \| bitO0;
				10844	IRType ity = isD ? Ity_F64 : Ity_F32;
				10845	IROp opADD = mkADDF(ity);
				10846	IROp opSUB = mkSUBF(ity);
				10847	IROp opMUL = mkMULF(ity);
				10848	IROp opNEG = mkNEGF(ity);
				10849	IRTemp res = newTemp(ity);
				10850	IRExpr* eA = getQRegLO(aa, ity);
				10851	IRExpr* eN = getQRegLO(nn, ity);
				10852	IRExpr* eM = getQRegLO(mm, ity);
				10853	IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
				10854	IRExpr* eNxM = triop(opMUL, rm, eN, eM);
				10855	switch (ix) {
				10856	case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
				10857	case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
				10858	case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
				10859	case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
				10860	default: vassert(0);
				10861	}
				10862	putQReg128(dd, mkV128(0x0000));
				10863	putQRegLO(dd, mkexpr(res));
				10864	const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
				10865	DIP("%s %s, %s, %s, %s\n",
				10866	names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
				10867	nameQRegLO(mm, ity), nameQRegLO(aa, ity));
				10868	return True;
				10869	}
				10870
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10871	return False;
				10872	# undef INSN
				10873	}
				10874
				10875
				10876	static
				10877	Bool dis_AdvSIMD_fp_immediate(/MB_OUT/DisResult* dres, UInt insn)
				10878	{
				10879	/* 31 28 23 21 20 12 9 4
				10880	000 11110 ty 1 imm8 100 imm5 d
				10881	The first 3 bits are really "M 0 S", but M and S are always zero.
				10882	*/
				10883	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				10884	if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
				10885	\|\| INSN(21,21) != 1 \|\| INSN(12,10) != BITS3(1,0,0)) {
				10886	return False;
				10887	}
				10888	UInt ty = INSN(23,22);
				10889	UInt imm8 = INSN(20,13);
				10890	UInt imm5 = INSN(9,5);
				10891	UInt dd = INSN(4,0);
				10892
				10893	/* ------- 00,00000: FMOV s_imm ------- */
				10894	/* ------- 01,00000: FMOV d_imm ------- */
				10895	if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
				10896	Bool isD = (ty & 1) == 1;
				10897	ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
				10898	if (!isD) {
				10899	vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
				10900	}
				10901	putQReg128(dd, mkV128(0));
				10902	putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
				10903	DIP("fmov %s, #0x%llx\n",
				10904	nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
				10905	return True;
				10906	}
				10907
				10908	return False;
				10909	# undef INSN
				10910	}
				10911
				10912
				10913	static
				10914	Bool dis_AdvSIMD_fp_to_fixedp_conv(/MB_OUT/DisResult* dres, UInt insn)
				10915	{
				10916	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				10917	return False;
				10918	# undef INSN
				10919	}
				10920
				10921
				10922	static
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	10923	Bool dis_AdvSIMD_fp_to_from_int_conv(/MB_OUT/DisResult* dres, UInt insn)
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10924	{
				10925	/* 31 30 29 28 23 21 20 18 15 9 4
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	10926	sf 0 0 11110 type 1 rmode opcode 000000 n d
				10927	The first 3 bits are really "sf 0 S", but S is always zero.
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10928	*/
				10929	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	10930	if (INSN(30,29) != BITS2(0,0)
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10931	\|\| INSN(28,24) != BITS5(1,1,1,1,0)
				10932	\|\| INSN(21,21) != 1
				10933	\|\| INSN(15,10) != BITS6(0,0,0,0,0,0)) {
				10934	return False;
				10935	}
				10936	UInt bitSF = INSN(31,31);
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10937	UInt ty = INSN(23,22); // type
				10938	UInt rm = INSN(20,19); // rmode
				10939	UInt op = INSN(18,16); // opcode
				10940	UInt nn = INSN(9,5);
				10941	UInt dd = INSN(4,0);
				10942
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	10943	// op = 000, 001
				10944	/* -------- FCVT{N,P,M,Z}{S,U} (scalar, integer) -------- */
				10945	/* 30 23 20 18 15 9 4
				10946	sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
				10947	sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
				10948	---------------- 01 -------------- FCVTP-------- (round to +inf)
				10949	---------------- 10 -------------- FCVTM-------- (round to -inf)
				10950	---------------- 11 -------------- FCVTZ-------- (round to zero)
				10951
				10952	Rd is Xd when sf==1, Wd when sf==0
				10953	Fn is Dn when x==1, Sn when x==0
				10954	20:19 carry the rounding mode, using the same encoding as FPCR
				10955	*/
				10956	if (ty <= X01 && (op == BITS3(0,0,0) \|\| op == BITS3(0,0,1))) {
				10957	Bool isI64 = bitSF == 1;
				10958	Bool isF64 = (ty & 1) == 1;
				10959	Bool isU = (op & 1) == 1;
				10960	/* Decide on the IR rounding mode to use. */
				10961	IRRoundingMode irrm = 8; /impossible/
				10962	HChar ch = '?';
				10963	switch (rm) {
				10964	case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
				10965	case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
				10966	case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
				10967	case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
				10968	default: vassert(0);
				10969	}
				10970	vassert(irrm != 8);
				10971	/* Decide on the conversion primop, based on the source size,
				10972	dest size and signedness (8 possibilities). Case coding:
				10973	F32 ->s I32 0
				10974	F32 ->u I32 1
				10975	F32 ->s I64 2
				10976	F32 ->u I64 3
				10977	F64 ->s I32 4
				10978	F64 ->u I32 5
				10979	F64 ->s I64 6
				10980	F64 ->u I64 7
				10981	*/
				10982	UInt ix = (isF64 ? 4 : 0) \| (isI64 ? 2 : 0) \| (isU ? 1 : 0);
				10983	vassert(ix < 8);
				10984	const IROp iops[8]
				10985	= { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
				10986	Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
				10987	IROp iop = iops[ix];
				10988	// A bit of ATCery: bounce all cases we haven't seen an example of.
				10989	if (/* F32toI32S */
				10990	(iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
				10991	\|\| (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
				10992	\|\| (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
				10993	/* F32toI32U */
				10994	\|\| (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
				10995	\|\| (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
				10996	/* F32toI64S */
				10997	\|\| (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
				10998	/* F32toI64U */
				10999	\|\| (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
				11000	/* F64toI32S */
				11001	\|\| (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
				11002	\|\| (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
				11003	\|\| (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
				11004	/* F64toI32U */
				11005	\|\| (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
				11006	\|\| (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
				11007	\|\| (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
				11008	/* F64toI64S */
				11009	\|\| (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
				11010	\|\| (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
				11011	\|\| (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
				11012	/* F64toI64U */
				11013	\|\| (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
				11014	\|\| (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
				11015	) {
				11016	/* validated */
				11017	} else {
				11018	return False;
				11019	}
				11020	IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
				11021	IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
				11022	IRTemp src = newTemp(srcTy);
				11023	IRTemp dst = newTemp(dstTy);
				11024	assign(src, getQRegLO(nn, srcTy));
				11025	assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
				11026	putIRegOrZR(isI64, dd, mkexpr(dst));
				11027	DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
				11028	nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
				11029	return True;
				11030	}
				11031
				11032	// op = 010, 011
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11033	/* -------------- {S,U}CVTF (scalar, integer) -------------- */
				11034	/* (ix) sf S 28 ty rm op 15 9 4
				11035	0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
				11036	1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
				11037	2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
				11038	3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
				11039
				11040	4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
				11041	5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
				11042	6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
				11043	7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
				11044
				11045	These are signed/unsigned conversion from integer registers to
				11046	FP registers, all 4 32/64-bit combinations, rounded per FPCR.
				11047	*/
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	11048	if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) \|\| op == BITS3(0,1,1))) {
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11049	Bool isI64 = bitSF == 1;
				11050	Bool isF64 = (ty & 1) == 1;
				11051	Bool isU = (op & 1) == 1;
				11052	UInt ix = (isU ? 4 : 0) \| (isI64 ? 2 : 0) \| (isF64 ? 1 : 0);
				11053	const IROp ops[8]
				11054	= { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
				11055	Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
				11056	IRExpr* src = getIRegOrZR(isI64, nn);
				11057	IRExpr* res = (isF64 && !isI64)
				11058	? unop(ops[ix], src)
				11059	: binop(ops[ix], mkexpr(mk_get_IR_rounding_mode()), src);
				11060	putQReg128(dd, mkV128(0));
				11061	putQRegLO(dd, res);
				11062	DIP("%ccvtf %s, %s\n",
				11063	isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
				11064	nameIRegOrZR(isI64, nn));
				11065	return True;
				11066	}
				11067
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	11068	// op = 110, 111
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11069	/* -------- FMOV (general) -------- */
				11070	/* case sf S ty rm op 15 9 4
				11071	(1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
				11072	(2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
				11073	(3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
				11074
				11075	(4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
				11076	(5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
				11077	(6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
				11078	*/
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	11079	if (1) {
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11080	UInt ix = 0; // case
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11081	if (bitSF == 0) {
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11082	if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
				11083	ix = 1;
				11084	else
				11085	if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
				11086	ix = 4;
				11087	} else {
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11088	vassert(bitSF == 1);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11089	if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
				11090	ix = 2;
				11091	else
				11092	if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
				11093	ix = 5;
				11094	else
				11095	if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
				11096	ix = 3;
				11097	else
				11098	if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
				11099	ix = 6;
				11100	}
				11101	if (ix > 0) {
				11102	switch (ix) {
				11103	case 1:
				11104	putQReg128(dd, mkV128(0));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	11105	putQRegLO(dd, getIReg32orZR(nn));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11106	DIP("fmov s%u, w%u\n", dd, nn);
				11107	break;
				11108	case 2:
				11109	putQReg128(dd, mkV128(0));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	11110	putQRegLO(dd, getIReg64orZR(nn));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11111	DIP("fmov d%u, x%u\n", dd, nn);
				11112	break;
				11113	case 3:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	11114	putQRegHI64(dd, getIReg64orZR(nn));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11115	DIP("fmov v%u.d[1], x%u\n", dd, nn);
				11116	break;
				11117	case 4:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	11118	putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11119	DIP("fmov w%u, s%u\n", dd, nn);
				11120	break;
				11121	case 5:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	11122	putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11123	DIP("fmov x%u, d%u\n", dd, nn);
				11124	break;
				11125	case 6:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	11126	putIReg64orZR(dd, getQRegHI64(nn));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11127	DIP("fmov x%u, v%u.d[1]\n", dd, nn);
				11128	break;
				11129	default:
				11130	vassert(0);
				11131	}
				11132	return True;
				11133	}
				11134	/* undecodable; fall through */
				11135	}
				11136
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11137	return False;
				11138	# undef INSN
				11139	}
				11140
				11141
				11142	static
				11143	Bool dis_ARM64_simd_and_fp(/MB_OUT/DisResult* dres, UInt insn)
				11144	{
				11145	Bool ok;
				11146	ok = dis_AdvSIMD_EXT(dres, insn);
				11147	if (UNLIKELY(ok)) return True;
				11148	ok = dis_AdvSIMD_TBL_TBX(dres, insn);
				11149	if (UNLIKELY(ok)) return True;
				11150	ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
				11151	if (UNLIKELY(ok)) return True;
				11152	ok = dis_AdvSIMD_across_lanes(dres, insn);
				11153	if (UNLIKELY(ok)) return True;
				11154	ok = dis_AdvSIMD_copy(dres, insn);
				11155	if (UNLIKELY(ok)) return True;
				11156	ok = dis_AdvSIMD_modified_immediate(dres, insn);
				11157	if (UNLIKELY(ok)) return True;
				11158	ok = dis_AdvSIMD_scalar_copy(dres, insn);
				11159	if (UNLIKELY(ok)) return True;
				11160	ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
				11161	if (UNLIKELY(ok)) return True;
				11162	ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
				11163	if (UNLIKELY(ok)) return True;
				11164	ok = dis_AdvSIMD_scalar_three_different(dres, insn);
				11165	if (UNLIKELY(ok)) return True;
				11166	ok = dis_AdvSIMD_scalar_three_same(dres, insn);
				11167	if (UNLIKELY(ok)) return True;
				11168	ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
				11169	if (UNLIKELY(ok)) return True;
				11170	ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
				11171	if (UNLIKELY(ok)) return True;
				11172	ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
				11173	if (UNLIKELY(ok)) return True;
				11174	ok = dis_AdvSIMD_three_different(dres, insn);
				11175	if (UNLIKELY(ok)) return True;
				11176	ok = dis_AdvSIMD_three_same(dres, insn);
				11177	if (UNLIKELY(ok)) return True;
				11178	ok = dis_AdvSIMD_two_reg_misc(dres, insn);
				11179	if (UNLIKELY(ok)) return True;
				11180	ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
				11181	if (UNLIKELY(ok)) return True;
				11182	ok = dis_AdvSIMD_crypto_aes(dres, insn);
				11183	if (UNLIKELY(ok)) return True;
				11184	ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
				11185	if (UNLIKELY(ok)) return True;
				11186	ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
				11187	if (UNLIKELY(ok)) return True;
				11188	ok = dis_AdvSIMD_fp_compare(dres, insn);
				11189	if (UNLIKELY(ok)) return True;
				11190	ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
				11191	if (UNLIKELY(ok)) return True;
				11192	ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
				11193	if (UNLIKELY(ok)) return True;
				11194	ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
				11195	if (UNLIKELY(ok)) return True;
				11196	ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
				11197	if (UNLIKELY(ok)) return True;
				11198	ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
				11199	if (UNLIKELY(ok)) return True;
				11200	ok = dis_AdvSIMD_fp_immediate(dres, insn);
				11201	if (UNLIKELY(ok)) return True;
				11202	ok = dis_AdvSIMD_fp_to_fixedp_conv(dres, insn);
				11203	if (UNLIKELY(ok)) return True;
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	11204	ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11205	if (UNLIKELY(ok)) return True;
				11206	return False;
				11207	}
				11208
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11209
				11210	/------------------------------------------------------------/
				11211	/--- Disassemble a single ARM64 instruction ---/
				11212	/------------------------------------------------------------/
				11213
				11214	/* Disassemble a single ARM64 instruction into IR. The instruction
				11215	has is located at \|guest_instr\| and has guest IP of
				11216	\|guest_PC_curr_instr\|, which will have been set before the call
				11217	here. Returns True iff the instruction was decoded, in which case
				11218	dres will be set accordingly, or False, in which case dres should
				11219	be ignored by the caller. */
				11220
				11221	static
				11222	Bool disInstr_ARM64_WRK (
				11223	/MB_OUT/DisResult* dres,
				11224	Bool (resteerOkFn) ( /opaque/void, Addr64 ),
				11225	Bool resteerCisOk,
				11226	void* callback_opaque,
				11227	UChar* guest_instr,
				11228	VexArchInfo* archinfo,
				11229	VexAbiInfo* abiinfo
				11230	)
				11231	{
				11232	// A macro to fish bits out of 'insn'.
				11233	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				11234
				11235	//ZZ DisResult dres;
				11236	//ZZ UInt insn;
				11237	//ZZ //Bool allow_VFP = False;
				11238	//ZZ //UInt hwcaps = archinfo->hwcaps;
				11239	//ZZ IRTemp condT; /* :: Ity_I32 */
				11240	//ZZ UInt summary;
				11241	//ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
				11242	//ZZ
				11243	//ZZ /* What insn variants are we supporting today? */
				11244	//ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
				11245	//ZZ // etc etc
				11246
				11247	/* Set result defaults. */
				11248	dres->whatNext = Dis_Continue;
				11249	dres->len = 4;
				11250	dres->continueAt = 0;
				11251	dres->jk_StopHere = Ijk_INVALID;
				11252
				11253	/* At least this is simple on ARM64: insns are all 4 bytes long, and
				11254	4-aligned. So just fish the whole thing out of memory right now
				11255	and have done. */
				11256	UInt insn = getUIntLittleEndianly( guest_instr );
				11257
				11258	if (0) vex_printf("insn: 0x%x\n", insn);
				11259
				11260	DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
				11261
				11262	vassert(0 == (guest_PC_curr_instr & 3ULL));
				11263
				11264	/* ----------------------------------------------------------- */
				11265
				11266	/* Spot "Special" instructions (see comment at top of file). */
				11267	{
				11268	UChar* code = (UChar*)guest_instr;
				11269	/* Spot the 16-byte preamble:
				11270	93CC0D8C ror x12, x12, #3
				11271	93CC358C ror x12, x12, #13
				11272	93CCCD8C ror x12, x12, #51
				11273	93CCF58C ror x12, x12, #61
				11274	*/
				11275	UInt word1 = 0x93CC0D8C;
				11276	UInt word2 = 0x93CC358C;
				11277	UInt word3 = 0x93CCCD8C;
				11278	UInt word4 = 0x93CCF58C;
				11279	if (getUIntLittleEndianly(code+ 0) == word1 &&
				11280	getUIntLittleEndianly(code+ 4) == word2 &&
				11281	getUIntLittleEndianly(code+ 8) == word3 &&
				11282	getUIntLittleEndianly(code+12) == word4) {
				11283	/* Got a "Special" instruction preamble. Which one is it? */
				11284	if (getUIntLittleEndianly(code+16) == 0xAA0A014A
				11285	/* orr x10,x10,x10 */) {
				11286	/* X3 = client_request ( X4 ) */
				11287	DIP("x3 = client_request ( x4 )\n");
				11288	putPC(mkU64( guest_PC_curr_instr + 20 ));
				11289	dres->jk_StopHere = Ijk_ClientReq;
				11290	dres->whatNext = Dis_StopHere;
				11291	return True;
				11292	}
				11293	else
				11294	if (getUIntLittleEndianly(code+16) == 0xAA0B016B
				11295	/* orr x11,x11,x11 */) {
				11296	/* X3 = guest_NRADDR */
				11297	DIP("x3 = guest_NRADDR\n");
				11298	dres->len = 20;
				11299	putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
				11300	return True;
				11301	}
				11302	else
				11303	if (getUIntLittleEndianly(code+16) == 0xAA0C018C
				11304	/* orr x12,x12,x12 */) {
				11305	/* branch-and-link-to-noredir X8 */
				11306	DIP("branch-and-link-to-noredir x8\n");
				11307	putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
				11308	putPC(getIReg64orZR(8));
				11309	dres->jk_StopHere = Ijk_NoRedir;
				11310	dres->whatNext = Dis_StopHere;
				11311	return True;
				11312	}
				11313	else
				11314	if (getUIntLittleEndianly(code+16) == 0xAA090129
				11315	/* orr x9,x9,x9 */) {
				11316	/* IR injection */
				11317	DIP("IR injection\n");
				11318	vex_inject_ir(irsb, Iend_LE);
				11319	// Invalidate the current insn. The reason is that the IRop we're
				11320	// injecting here can change. In which case the translation has to
				11321	// be redone. For ease of handling, we simply invalidate all the
				11322	// time.
sewardj	05f5e01	2014-05-04 10:52:11 +0000	[diff] [blame]	11323	stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
				11324	stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11325	putPC(mkU64( guest_PC_curr_instr + 20 ));
				11326	dres->whatNext = Dis_StopHere;
sewardj	05f5e01	2014-05-04 10:52:11 +0000	[diff] [blame]	11327	dres->jk_StopHere = Ijk_InvalICache;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11328	return True;
				11329	}
				11330	/* We don't know what it is. */
				11331	return False;
				11332	/NOTREACHED/
				11333	}
				11334	}
				11335
				11336	/* ----------------------------------------------------------- */
				11337
				11338	/* Main ARM64 instruction decoder starts here. */
				11339
				11340	Bool ok = False;
				11341
				11342	/* insn[28:25] determines the top-level grouping, so let's start
				11343	off with that.
				11344
				11345	For all of these dis_ARM64_ functions, we pass *dres with the
				11346	normal default results "insn OK, 4 bytes long, keep decoding" so
				11347	they don't need to change it. However, decodes of control-flow
				11348	insns may cause *dres to change.
				11349	*/
				11350	switch (INSN(28,25)) {
				11351	case BITS4(1,0,0,0): case BITS4(1,0,0,1):
				11352	// Data processing - immediate
				11353	ok = dis_ARM64_data_processing_immediate(dres, insn);
				11354	break;
				11355	case BITS4(1,0,1,0): case BITS4(1,0,1,1):
				11356	// Branch, exception generation and system instructions
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	11357	ok = dis_ARM64_branch_etc(dres, insn, archinfo);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11358	break;
				11359	case BITS4(0,1,0,0): case BITS4(0,1,1,0):
				11360	case BITS4(1,1,0,0): case BITS4(1,1,1,0):
				11361	// Loads and stores
				11362	ok = dis_ARM64_load_store(dres, insn);
				11363	break;
				11364	case BITS4(0,1,0,1): case BITS4(1,1,0,1):
				11365	// Data processing - register
				11366	ok = dis_ARM64_data_processing_register(dres, insn);
				11367	break;
				11368	case BITS4(0,1,1,1): case BITS4(1,1,1,1):
				11369	// Data processing - SIMD and floating point
				11370	ok = dis_ARM64_simd_and_fp(dres, insn);
				11371	break;
				11372	case BITS4(0,0,0,0): case BITS4(0,0,0,1):
				11373	case BITS4(0,0,1,0): case BITS4(0,0,1,1):
				11374	// UNALLOCATED
				11375	break;
				11376	default:
				11377	vassert(0); /* Can't happen */
				11378	}
				11379
				11380	/* If the next-level down decoders failed, make sure \|dres\| didn't
				11381	get changed. */
				11382	if (!ok) {
				11383	vassert(dres->whatNext == Dis_Continue);
				11384	vassert(dres->len == 4);
				11385	vassert(dres->continueAt == 0);
				11386	vassert(dres->jk_StopHere == Ijk_INVALID);
				11387	}
				11388
				11389	return ok;
				11390
				11391	# undef INSN
				11392	}
				11393
				11394
				11395	/------------------------------------------------------------/
				11396	/--- Top-level fn ---/
				11397	/------------------------------------------------------------/
				11398
				11399	/* Disassemble a single instruction into IR. The instruction
				11400	is located in host memory at &guest_code[delta]. */
				11401
				11402	DisResult disInstr_ARM64 ( IRSB* irsb_IN,
				11403	Bool (resteerOkFn) ( void, Addr64 ),
				11404	Bool resteerCisOk,
				11405	void* callback_opaque,
				11406	UChar* guest_code_IN,
				11407	Long delta_IN,
				11408	Addr64 guest_IP,
				11409	VexArch guest_arch,
				11410	VexArchInfo* archinfo,
				11411	VexAbiInfo* abiinfo,
sewardj	9b76916	2014-07-24 12:42:03 +0000	[diff] [blame]	11412	VexEndness host_endness_IN,
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11413	Bool sigill_diag_IN )
				11414	{
				11415	DisResult dres;
				11416	vex_bzero(&dres, sizeof(dres));
				11417
				11418	/* Set globals (see top of this file) */
				11419	vassert(guest_arch == VexArchARM64);
				11420
				11421	irsb = irsb_IN;
sewardj	9b76916	2014-07-24 12:42:03 +0000	[diff] [blame]	11422	host_endness = host_endness_IN;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11423	guest_PC_curr_instr = (Addr64)guest_IP;
				11424
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	11425	/* Sanity checks */
				11426	/* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
				11427	vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
				11428	vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
				11429
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11430	/* Try to decode */
				11431	Bool ok = disInstr_ARM64_WRK( &dres,
				11432	resteerOkFn, resteerCisOk, callback_opaque,
				11433	(UChar*)&guest_code_IN[delta_IN],
				11434	archinfo, abiinfo );
				11435	if (ok) {
				11436	/* All decode successes end up here. */
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	11437	vassert(dres.len == 4 \|\| dres.len == 20);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11438	switch (dres.whatNext) {
				11439	case Dis_Continue:
				11440	putPC( mkU64(dres.len + guest_PC_curr_instr) );
				11441	break;
				11442	case Dis_ResteerU:
				11443	case Dis_ResteerC:
				11444	putPC(mkU64(dres.continueAt));
				11445	break;
				11446	case Dis_StopHere:
				11447	break;
				11448	default:
				11449	vassert(0);
				11450	}
				11451	DIP("\n");
				11452	} else {
				11453	/* All decode failures end up here. */
				11454	if (sigill_diag_IN) {
				11455	Int i, j;
				11456	UChar buf[64];
				11457	UInt insn
				11458	= getUIntLittleEndianly( (UChar*)&guest_code_IN[delta_IN] );
				11459	vex_bzero(buf, sizeof(buf));
				11460	for (i = j = 0; i < 32; i++) {
				11461	if (i > 0) {
				11462	if ((i & 7) == 0) buf[j++] = ' ';
				11463	else if ((i & 3) == 0) buf[j++] = '\'';
				11464	}
				11465	buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
				11466	}
				11467	vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
				11468	vex_printf("disInstr(arm64): %s\n", buf);
				11469	}
				11470
				11471	/* Tell the dispatcher that this insn cannot be decoded, and so
				11472	has not been executed, and (is currently) the next to be
				11473	executed. PC should be up-to-date since it is made so at the
				11474	start of each insn, but nevertheless be paranoid and update
				11475	it again right now. */
				11476	putPC( mkU64(guest_PC_curr_instr) );
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11477	dres.len = 0;
philippe	2faf591	2014-08-11 22:45:47 +0000	[diff] [blame]	11478	dres.whatNext = Dis_StopHere;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11479	dres.jk_StopHere = Ijk_NoDecode;
philippe	2faf591	2014-08-11 22:45:47 +0000	[diff] [blame]	11480	dres.continueAt = 0;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11481	}
				11482	return dres;
				11483	}
				11484
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	11485
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	11486	/--------------------------------------------------------------------/
				11487	/--- end guest_arm64_toIR.c ---/
				11488	/--------------------------------------------------------------------/