Blame - priv/guest_arm64_toIR.c - platform/external/valgrind

blob: 1249de18e901a9222b7e024aa888b10a6d061751 [file] [log] [blame]

sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1	/* -- mode: C; c-basic-offset: 3; -- */
				2
				3	/--------------------------------------------------------------------/
				4	/--- begin guest_arm64_toIR.c ---/
				5	/--------------------------------------------------------------------/
				6
				7	/*
				8	This file is part of Valgrind, a dynamic binary instrumentation
				9	framework.
				10
				11	Copyright (C) 2013-2013 OpenWorks
				12	info@open-works.net
				13
				14	This program is free software; you can redistribute it and/or
				15	modify it under the terms of the GNU General Public License as
				16	published by the Free Software Foundation; either version 2 of the
				17	License, or (at your option) any later version.
				18
				19	This program is distributed in the hope that it will be useful, but
				20	WITHOUT ANY WARRANTY; without even the implied warranty of
				21	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				22	General Public License for more details.
				23
				24	You should have received a copy of the GNU General Public License
				25	along with this program; if not, write to the Free Software
				26	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
				27	02110-1301, USA.
				28
				29	The GNU General Public License is contained in the file COPYING.
				30	*/
				31
sewardj	76927e6	2014-11-17 11:21:21 +0000	[diff] [blame]	32	/* KNOWN LIMITATIONS 2014-Nov-16
				33
				34	* Correctness: FMAXNM, FMINNM are implemented the same as FMAX/FMIN.
				35
				36	Also FP comparison "unordered" .. is implemented as normal FP
				37	comparison.
				38
				39	Both should be fixed. They behave incorrectly in the presence of
				40	NaNs.
				41
sewardj	ee3db33	2015-02-08 18:24:38 +0000	[diff] [blame]	42	FMULX is treated the same as FMUL. That's also not correct.
				43
sewardj	76927e6	2014-11-17 11:21:21 +0000	[diff] [blame]	44	* Floating multiply-add (etc) insns. Are split into a multiply and
				45	an add, and so suffer double rounding and hence sometimes the
				46	least significant mantissa bit is incorrect. Fix: use the IR
				47	multiply-add IROps instead.
sewardj	6a785df	2015-02-09 09:07:47 +0000	[diff] [blame]	48
				49	* FRINTA, FRINTN are kludged .. they just round to nearest. No special
				50	handling for the "ties" case. FRINTX might be dubious too.
sewardj	76927e6	2014-11-17 11:21:21 +0000	[diff] [blame]	51	*/
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	52
				53	/* "Special" instructions.
				54
				55	This instruction decoder can decode four special instructions
				56	which mean nothing natively (are no-ops as far as regs/mem are
				57	concerned) but have meaning for supporting Valgrind. A special
				58	instruction is flagged by a 16-byte preamble:
				59
				60	93CC0D8C 93CC358C 93CCCD8C 93CCF58C
				61	(ror x12, x12, #3; ror x12, x12, #13
				62	ror x12, x12, #51; ror x12, x12, #61)
				63
				64	Following that, one of the following 3 are allowed
				65	(standard interpretation in parentheses):
				66
				67	AA0A014A (orr x10,x10,x10) X3 = client_request ( X4 )
				68	AA0B016B (orr x11,x11,x11) X3 = guest_NRADDR
				69	AA0C018C (orr x12,x12,x12) branch-and-link-to-noredir X8
				70	AA090129 (orr x9,x9,x9) IR injection
				71
				72	Any other bytes following the 16-byte preamble are illegal and
				73	constitute a failure in instruction decoding. This all assumes
				74	that the preamble will never occur except in specific code
				75	fragments designed for Valgrind to catch.
				76	*/
				77
				78	/* Translates ARM64 code to IR. */
				79
				80	#include "libvex_basictypes.h"
				81	#include "libvex_ir.h"
				82	#include "libvex.h"
				83	#include "libvex_guest_arm64.h"
				84
				85	#include "main_util.h"
				86	#include "main_globals.h"
				87	#include "guest_generic_bb_to_IR.h"
				88	#include "guest_arm64_defs.h"
				89
				90
				91	/------------------------------------------------------------/
				92	/--- Globals ---/
				93	/------------------------------------------------------------/
				94
				95	/* These are set at the start of the translation of a instruction, so
				96	that we don't have to pass them around endlessly. CONST means does
				97	not change during translation of the instruction.
				98	*/
				99
sewardj	9b76916	2014-07-24 12:42:03 +0000	[diff] [blame]	100	/* CONST: what is the host's endianness? We need to know this in
				101	order to do sub-register accesses to the SIMD/FP registers
				102	correctly. */
				103	static VexEndness host_endness;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	104
				105	/* CONST: The guest address for the instruction currently being
				106	translated. */
				107	static Addr64 guest_PC_curr_instr;
				108
				109	/* MOD: The IRSB* into which we're generating code. */
				110	static IRSB* irsb;
				111
				112
				113	/------------------------------------------------------------/
				114	/--- Debugging output ---/
				115	/------------------------------------------------------------/
				116
				117	#define DIP(format, args...) \
				118	if (vex_traceflags & VEX_TRACE_FE) \
				119	vex_printf(format, ## args)
				120
				121	#define DIS(buf, format, args...) \
				122	if (vex_traceflags & VEX_TRACE_FE) \
				123	vex_sprintf(buf, format, ## args)
				124
				125
				126	/------------------------------------------------------------/
				127	/--- Helper bits and pieces for deconstructing the ---/
				128	/--- arm insn stream. ---/
				129	/------------------------------------------------------------/
				130
				131	/* Do a little-endian load of a 32-bit word, regardless of the
				132	endianness of the underlying host. */
florian	8462d11	2014-09-24 15:18:09 +0000	[diff] [blame]	133	static inline UInt getUIntLittleEndianly ( const UChar* p )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	134	{
				135	UInt w = 0;
				136	w = (w << 8) \| p[3];
				137	w = (w << 8) \| p[2];
				138	w = (w << 8) \| p[1];
				139	w = (w << 8) \| p[0];
				140	return w;
				141	}
				142
				143	/* Sign extend a N-bit value up to 64 bits, by copying
				144	bit N-1 into all higher positions. */
				145	static ULong sx_to_64 ( ULong x, UInt n )
				146	{
				147	vassert(n > 1 && n < 64);
				148	Long r = (Long)x;
				149	r = (r << (64-n)) >> (64-n);
				150	return (ULong)r;
				151	}
				152
				153	//ZZ /* Do a little-endian load of a 16-bit word, regardless of the
				154	//ZZ endianness of the underlying host. */
				155	//ZZ static inline UShort getUShortLittleEndianly ( UChar* p )
				156	//ZZ {
				157	//ZZ UShort w = 0;
				158	//ZZ w = (w << 8) \| p[1];
				159	//ZZ w = (w << 8) \| p[0];
				160	//ZZ return w;
				161	//ZZ }
				162	//ZZ
				163	//ZZ static UInt ROR32 ( UInt x, UInt sh ) {
				164	//ZZ vassert(sh >= 0 && sh < 32);
				165	//ZZ if (sh == 0)
				166	//ZZ return x;
				167	//ZZ else
				168	//ZZ return (x << (32-sh)) \| (x >> sh);
				169	//ZZ }
				170	//ZZ
				171	//ZZ static Int popcount32 ( UInt x )
				172	//ZZ {
				173	//ZZ Int res = 0, i;
				174	//ZZ for (i = 0; i < 32; i++) {
				175	//ZZ res += (x & 1);
				176	//ZZ x >>= 1;
				177	//ZZ }
				178	//ZZ return res;
				179	//ZZ }
				180	//ZZ
				181	//ZZ static UInt setbit32 ( UInt x, Int ix, UInt b )
				182	//ZZ {
				183	//ZZ UInt mask = 1 << ix;
				184	//ZZ x &= ~mask;
				185	//ZZ x \|= ((b << ix) & mask);
				186	//ZZ return x;
				187	//ZZ }
				188
				189	#define BITS2(_b1,_b0) \
				190	(((_b1) << 1) \| (_b0))
				191
				192	#define BITS3(_b2,_b1,_b0) \
				193	(((_b2) << 2) \| ((_b1) << 1) \| (_b0))
				194
				195	#define BITS4(_b3,_b2,_b1,_b0) \
				196	(((_b3) << 3) \| ((_b2) << 2) \| ((_b1) << 1) \| (_b0))
				197
				198	#define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
				199	((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
				200	\| BITS4((_b3),(_b2),(_b1),(_b0)))
				201
				202	#define BITS5(_b4,_b3,_b2,_b1,_b0) \
				203	(BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
				204	#define BITS6(_b5,_b4,_b3,_b2,_b1,_b0) \
				205	(BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
				206	#define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
				207	(BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
				208
				209	#define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
				210	(((_b8) << 8) \
				211	\| BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
				212
				213	#define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
				214	(((_b9) << 9) \| ((_b8) << 8) \
				215	\| BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
				216
				217	#define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
				218	(((_b10) << 10) \
				219	\| BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
				220
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	221	#define BITS12(_b11, _b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0) \
				222	(((_b11) << 11) \
				223	\| BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0))
				224
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	225	#define X00 BITS2(0,0)
				226	#define X01 BITS2(0,1)
				227	#define X10 BITS2(1,0)
				228	#define X11 BITS2(1,1)
				229
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	230	// produces _uint[_bMax:_bMin]
				231	#define SLICE_UInt(_uint,_bMax,_bMin) \
				232	(( ((UInt)(_uint)) >> (_bMin)) \
				233	& (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
				234
				235
				236	/------------------------------------------------------------/
				237	/--- Helper bits and pieces for creating IR fragments. ---/
				238	/------------------------------------------------------------/
				239
				240	static IRExpr* mkV128 ( UShort w )
				241	{
				242	return IRExpr_Const(IRConst_V128(w));
				243	}
				244
				245	static IRExpr* mkU64 ( ULong i )
				246	{
				247	return IRExpr_Const(IRConst_U64(i));
				248	}
				249
				250	static IRExpr* mkU32 ( UInt i )
				251	{
				252	return IRExpr_Const(IRConst_U32(i));
				253	}
				254
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	255	static IRExpr* mkU16 ( UInt i )
				256	{
				257	vassert(i < 65536);
				258	return IRExpr_Const(IRConst_U16(i));
				259	}
				260
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	261	static IRExpr* mkU8 ( UInt i )
				262	{
				263	vassert(i < 256);
				264	return IRExpr_Const(IRConst_U8( (UChar)i ));
				265	}
				266
				267	static IRExpr* mkexpr ( IRTemp tmp )
				268	{
				269	return IRExpr_RdTmp(tmp);
				270	}
				271
				272	static IRExpr* unop ( IROp op, IRExpr* a )
				273	{
				274	return IRExpr_Unop(op, a);
				275	}
				276
				277	static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
				278	{
				279	return IRExpr_Binop(op, a1, a2);
				280	}
				281
				282	static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
				283	{
				284	return IRExpr_Triop(op, a1, a2, a3);
				285	}
				286
				287	static IRExpr* loadLE ( IRType ty, IRExpr* addr )
				288	{
				289	return IRExpr_Load(Iend_LE, ty, addr);
				290	}
				291
				292	/* Add a statement to the list held by "irbb". */
				293	static void stmt ( IRStmt* st )
				294	{
				295	addStmtToIRSB( irsb, st );
				296	}
				297
				298	static void assign ( IRTemp dst, IRExpr* e )
				299	{
				300	stmt( IRStmt_WrTmp(dst, e) );
				301	}
				302
				303	static void storeLE ( IRExpr* addr, IRExpr* data )
				304	{
				305	stmt( IRStmt_Store(Iend_LE, addr, data) );
				306	}
				307
				308	//ZZ static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
				309	//ZZ {
				310	//ZZ if (guardT == IRTemp_INVALID) {
				311	//ZZ /* unconditional */
				312	//ZZ storeLE(addr, data);
				313	//ZZ } else {
				314	//ZZ stmt( IRStmt_StoreG(Iend_LE, addr, data,
				315	//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
				316	//ZZ }
				317	//ZZ }
				318	//ZZ
				319	//ZZ static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
				320	//ZZ IRExpr* addr, IRExpr* alt,
				321	//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
				322	//ZZ {
				323	//ZZ if (guardT == IRTemp_INVALID) {
				324	//ZZ /* unconditional */
				325	//ZZ IRExpr* loaded = NULL;
				326	//ZZ switch (cvt) {
				327	//ZZ case ILGop_Ident32:
				328	//ZZ loaded = loadLE(Ity_I32, addr); break;
				329	//ZZ case ILGop_8Uto32:
				330	//ZZ loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
				331	//ZZ case ILGop_8Sto32:
				332	//ZZ loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
				333	//ZZ case ILGop_16Uto32:
				334	//ZZ loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
				335	//ZZ case ILGop_16Sto32:
				336	//ZZ loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
				337	//ZZ default:
				338	//ZZ vassert(0);
				339	//ZZ }
				340	//ZZ vassert(loaded != NULL);
				341	//ZZ assign(dst, loaded);
				342	//ZZ } else {
				343	//ZZ /* Generate a guarded load into 'dst', but apply 'cvt' to the
				344	//ZZ loaded data before putting the data in 'dst'. If the load
				345	//ZZ does not take place, 'alt' is placed directly in 'dst'. */
				346	//ZZ stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
				347	//ZZ binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
				348	//ZZ }
				349	//ZZ }
				350
				351	/* Generate a new temporary of the given type. */
				352	static IRTemp newTemp ( IRType ty )
				353	{
				354	vassert(isPlausibleIRType(ty));
				355	return newIRTemp( irsb->tyenv, ty );
				356	}
				357
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	358	/* This is used in many places, so the brevity is an advantage. */
				359	static IRTemp newTempV128(void)
				360	{
				361	return newTemp(Ity_V128);
				362	}
				363
				364	/* Initialise V128 temporaries en masse. */
				365	static
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	366	void newTempsV128_2(IRTemp* t1, IRTemp* t2)
				367	{
				368	vassert(t1 && *t1 == IRTemp_INVALID);
				369	vassert(t2 && *t2 == IRTemp_INVALID);
				370	*t1 = newTempV128();
				371	*t2 = newTempV128();
				372	}
				373
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	374	static
				375	void newTempsV128_3(IRTemp* t1, IRTemp* t2, IRTemp* t3)
				376	{
				377	vassert(t1 && *t1 == IRTemp_INVALID);
				378	vassert(t2 && *t2 == IRTemp_INVALID);
				379	vassert(t3 && *t3 == IRTemp_INVALID);
				380	*t1 = newTempV128();
				381	*t2 = newTempV128();
				382	*t3 = newTempV128();
				383	}
				384
sewardj	208a776	2014-10-22 13:52:51 +0000	[diff] [blame]	385	static
				386	void newTempsV128_4(IRTemp* t1, IRTemp* t2, IRTemp* t3, IRTemp* t4)
				387	{
				388	vassert(t1 && *t1 == IRTemp_INVALID);
				389	vassert(t2 && *t2 == IRTemp_INVALID);
				390	vassert(t3 && *t3 == IRTemp_INVALID);
				391	vassert(t4 && *t4 == IRTemp_INVALID);
				392	*t1 = newTempV128();
				393	*t2 = newTempV128();
				394	*t3 = newTempV128();
				395	*t4 = newTempV128();
				396	}
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	397
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	398	static
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	399	void newTempsV128_7(IRTemp* t1, IRTemp* t2, IRTemp* t3,
				400	IRTemp* t4, IRTemp* t5, IRTemp* t6, IRTemp* t7)
				401	{
				402	vassert(t1 && *t1 == IRTemp_INVALID);
				403	vassert(t2 && *t2 == IRTemp_INVALID);
				404	vassert(t3 && *t3 == IRTemp_INVALID);
				405	vassert(t4 && *t4 == IRTemp_INVALID);
				406	vassert(t5 && *t5 == IRTemp_INVALID);
				407	vassert(t6 && *t6 == IRTemp_INVALID);
				408	vassert(t7 && *t7 == IRTemp_INVALID);
				409	*t1 = newTempV128();
				410	*t2 = newTempV128();
				411	*t3 = newTempV128();
				412	*t4 = newTempV128();
				413	*t5 = newTempV128();
				414	*t6 = newTempV128();
				415	*t7 = newTempV128();
				416	}
				417
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	418	//ZZ /* Produces a value in 0 .. 3, which is encoded as per the type
				419	//ZZ IRRoundingMode. */
				420	//ZZ static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
				421	//ZZ {
				422	//ZZ return mkU32(Irrm_NEAREST);
				423	//ZZ }
				424	//ZZ
				425	//ZZ /* Generate an expression for SRC rotated right by ROT. */
				426	//ZZ static IRExpr* genROR32( IRTemp src, Int rot )
				427	//ZZ {
				428	//ZZ vassert(rot >= 0 && rot < 32);
				429	//ZZ if (rot == 0)
				430	//ZZ return mkexpr(src);
				431	//ZZ return
				432	//ZZ binop(Iop_Or32,
				433	//ZZ binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
				434	//ZZ binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
				435	//ZZ }
				436	//ZZ
				437	//ZZ static IRExpr* mkU128 ( ULong i )
				438	//ZZ {
				439	//ZZ return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
				440	//ZZ }
				441	//ZZ
				442	//ZZ /* Generate a 4-aligned version of the given expression if
				443	//ZZ the given condition is true. Else return it unchanged. */
				444	//ZZ static IRExpr* align4if ( IRExpr* e, Bool b )
				445	//ZZ {
				446	//ZZ if (b)
				447	//ZZ return binop(Iop_And32, e, mkU32(~3));
				448	//ZZ else
				449	//ZZ return e;
				450	//ZZ }
				451
				452	/* Other IR construction helpers. */
				453	static IROp mkAND ( IRType ty ) {
				454	switch (ty) {
				455	case Ity_I32: return Iop_And32;
				456	case Ity_I64: return Iop_And64;
				457	default: vpanic("mkAND");
				458	}
				459	}
				460
				461	static IROp mkOR ( IRType ty ) {
				462	switch (ty) {
				463	case Ity_I32: return Iop_Or32;
				464	case Ity_I64: return Iop_Or64;
				465	default: vpanic("mkOR");
				466	}
				467	}
				468
				469	static IROp mkXOR ( IRType ty ) {
				470	switch (ty) {
				471	case Ity_I32: return Iop_Xor32;
				472	case Ity_I64: return Iop_Xor64;
				473	default: vpanic("mkXOR");
				474	}
				475	}
				476
				477	static IROp mkSHL ( IRType ty ) {
				478	switch (ty) {
				479	case Ity_I32: return Iop_Shl32;
				480	case Ity_I64: return Iop_Shl64;
				481	default: vpanic("mkSHL");
				482	}
				483	}
				484
				485	static IROp mkSHR ( IRType ty ) {
				486	switch (ty) {
				487	case Ity_I32: return Iop_Shr32;
				488	case Ity_I64: return Iop_Shr64;
				489	default: vpanic("mkSHR");
				490	}
				491	}
				492
				493	static IROp mkSAR ( IRType ty ) {
				494	switch (ty) {
				495	case Ity_I32: return Iop_Sar32;
				496	case Ity_I64: return Iop_Sar64;
				497	default: vpanic("mkSAR");
				498	}
				499	}
				500
				501	static IROp mkNOT ( IRType ty ) {
				502	switch (ty) {
				503	case Ity_I32: return Iop_Not32;
				504	case Ity_I64: return Iop_Not64;
				505	default: vpanic("mkNOT");
				506	}
				507	}
				508
				509	static IROp mkADD ( IRType ty ) {
				510	switch (ty) {
				511	case Ity_I32: return Iop_Add32;
				512	case Ity_I64: return Iop_Add64;
				513	default: vpanic("mkADD");
				514	}
				515	}
				516
				517	static IROp mkSUB ( IRType ty ) {
				518	switch (ty) {
				519	case Ity_I32: return Iop_Sub32;
				520	case Ity_I64: return Iop_Sub64;
				521	default: vpanic("mkSUB");
				522	}
				523	}
				524
				525	static IROp mkADDF ( IRType ty ) {
				526	switch (ty) {
				527	case Ity_F32: return Iop_AddF32;
				528	case Ity_F64: return Iop_AddF64;
				529	default: vpanic("mkADDF");
				530	}
				531	}
				532
				533	static IROp mkSUBF ( IRType ty ) {
				534	switch (ty) {
				535	case Ity_F32: return Iop_SubF32;
				536	case Ity_F64: return Iop_SubF64;
				537	default: vpanic("mkSUBF");
				538	}
				539	}
				540
				541	static IROp mkMULF ( IRType ty ) {
				542	switch (ty) {
				543	case Ity_F32: return Iop_MulF32;
				544	case Ity_F64: return Iop_MulF64;
				545	default: vpanic("mkMULF");
				546	}
				547	}
				548
				549	static IROp mkDIVF ( IRType ty ) {
				550	switch (ty) {
				551	case Ity_F32: return Iop_DivF32;
				552	case Ity_F64: return Iop_DivF64;
				553	default: vpanic("mkMULF");
				554	}
				555	}
				556
				557	static IROp mkNEGF ( IRType ty ) {
				558	switch (ty) {
				559	case Ity_F32: return Iop_NegF32;
				560	case Ity_F64: return Iop_NegF64;
				561	default: vpanic("mkNEGF");
				562	}
				563	}
				564
				565	static IROp mkABSF ( IRType ty ) {
				566	switch (ty) {
				567	case Ity_F32: return Iop_AbsF32;
				568	case Ity_F64: return Iop_AbsF64;
				569	default: vpanic("mkNEGF");
				570	}
				571	}
				572
				573	static IROp mkSQRTF ( IRType ty ) {
				574	switch (ty) {
				575	case Ity_F32: return Iop_SqrtF32;
				576	case Ity_F64: return Iop_SqrtF64;
				577	default: vpanic("mkNEGF");
				578	}
				579	}
				580
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	581	static IROp mkVecADD ( UInt size ) {
				582	const IROp ops[4]
				583	= { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2 };
				584	vassert(size < 4);
				585	return ops[size];
				586	}
				587
				588	static IROp mkVecQADDU ( UInt size ) {
				589	const IROp ops[4]
				590	= { Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2 };
				591	vassert(size < 4);
				592	return ops[size];
				593	}
				594
				595	static IROp mkVecQADDS ( UInt size ) {
				596	const IROp ops[4]
				597	= { Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2 };
				598	vassert(size < 4);
				599	return ops[size];
				600	}
				601
sewardj	f7003bc	2014-08-18 12:28:02 +0000	[diff] [blame]	602	static IROp mkVecQADDEXTSUSATUU ( UInt size ) {
				603	const IROp ops[4]
				604	= { Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
				605	Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2 };
				606	vassert(size < 4);
				607	return ops[size];
				608	}
				609
				610	static IROp mkVecQADDEXTUSSATSS ( UInt size ) {
				611	const IROp ops[4]
				612	= { Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
				613	Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2 };
				614	vassert(size < 4);
				615	return ops[size];
				616	}
				617
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	618	static IROp mkVecSUB ( UInt size ) {
				619	const IROp ops[4]
				620	= { Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2 };
				621	vassert(size < 4);
				622	return ops[size];
				623	}
				624
				625	static IROp mkVecQSUBU ( UInt size ) {
				626	const IROp ops[4]
				627	= { Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2 };
				628	vassert(size < 4);
				629	return ops[size];
				630	}
				631
				632	static IROp mkVecQSUBS ( UInt size ) {
				633	const IROp ops[4]
				634	= { Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2 };
				635	vassert(size < 4);
				636	return ops[size];
				637	}
				638
				639	static IROp mkVecSARN ( UInt size ) {
				640	const IROp ops[4]
				641	= { Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2 };
				642	vassert(size < 4);
				643	return ops[size];
				644	}
				645
				646	static IROp mkVecSHRN ( UInt size ) {
				647	const IROp ops[4]
				648	= { Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2 };
				649	vassert(size < 4);
				650	return ops[size];
				651	}
				652
				653	static IROp mkVecSHLN ( UInt size ) {
				654	const IROp ops[4]
				655	= { Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2 };
				656	vassert(size < 4);
				657	return ops[size];
				658	}
				659
				660	static IROp mkVecCATEVENLANES ( UInt size ) {
				661	const IROp ops[4]
				662	= { Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8,
				663	Iop_CatEvenLanes32x4, Iop_InterleaveLO64x2 };
				664	vassert(size < 4);
				665	return ops[size];
				666	}
				667
				668	static IROp mkVecCATODDLANES ( UInt size ) {
				669	const IROp ops[4]
				670	= { Iop_CatOddLanes8x16, Iop_CatOddLanes16x8,
				671	Iop_CatOddLanes32x4, Iop_InterleaveHI64x2 };
				672	vassert(size < 4);
				673	return ops[size];
				674	}
				675
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	676	static IROp mkVecINTERLEAVELO ( UInt size ) {
				677	const IROp ops[4]
				678	= { Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
				679	Iop_InterleaveLO32x4, Iop_InterleaveLO64x2 };
				680	vassert(size < 4);
				681	return ops[size];
				682	}
				683
				684	static IROp mkVecINTERLEAVEHI ( UInt size ) {
				685	const IROp ops[4]
				686	= { Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
				687	Iop_InterleaveHI32x4, Iop_InterleaveHI64x2 };
				688	vassert(size < 4);
				689	return ops[size];
				690	}
				691
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	692	static IROp mkVecMAXU ( UInt size ) {
				693	const IROp ops[4]
				694	= { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2 };
				695	vassert(size < 4);
				696	return ops[size];
				697	}
				698
				699	static IROp mkVecMAXS ( UInt size ) {
				700	const IROp ops[4]
				701	= { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2 };
				702	vassert(size < 4);
				703	return ops[size];
				704	}
				705
				706	static IROp mkVecMINU ( UInt size ) {
				707	const IROp ops[4]
				708	= { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2 };
				709	vassert(size < 4);
				710	return ops[size];
				711	}
				712
				713	static IROp mkVecMINS ( UInt size ) {
				714	const IROp ops[4]
				715	= { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2 };
				716	vassert(size < 4);
				717	return ops[size];
				718	}
				719
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	720	static IROp mkVecMUL ( UInt size ) {
				721	const IROp ops[4]
				722	= { Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, Iop_INVALID };
				723	vassert(size < 3);
				724	return ops[size];
				725	}
				726
				727	static IROp mkVecMULLU ( UInt sizeNarrow ) {
				728	const IROp ops[4]
				729	= { Iop_Mull8Ux8, Iop_Mull16Ux4, Iop_Mull32Ux2, Iop_INVALID };
				730	vassert(sizeNarrow < 3);
				731	return ops[sizeNarrow];
				732	}
				733
				734	static IROp mkVecMULLS ( UInt sizeNarrow ) {
				735	const IROp ops[4]
				736	= { Iop_Mull8Sx8, Iop_Mull16Sx4, Iop_Mull32Sx2, Iop_INVALID };
				737	vassert(sizeNarrow < 3);
				738	return ops[sizeNarrow];
				739	}
				740
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	741	static IROp mkVecQDMULLS ( UInt sizeNarrow ) {
				742	const IROp ops[4]
				743	= { Iop_INVALID, Iop_QDMull16Sx4, Iop_QDMull32Sx2, Iop_INVALID };
				744	vassert(sizeNarrow < 3);
				745	return ops[sizeNarrow];
				746	}
				747
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	748	static IROp mkVecCMPEQ ( UInt size ) {
				749	const IROp ops[4]
				750	= { Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2 };
				751	vassert(size < 4);
				752	return ops[size];
				753	}
				754
				755	static IROp mkVecCMPGTU ( UInt size ) {
				756	const IROp ops[4]
				757	= { Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2 };
				758	vassert(size < 4);
				759	return ops[size];
				760	}
				761
				762	static IROp mkVecCMPGTS ( UInt size ) {
				763	const IROp ops[4]
				764	= { Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2 };
				765	vassert(size < 4);
				766	return ops[size];
				767	}
				768
				769	static IROp mkVecABS ( UInt size ) {
				770	const IROp ops[4]
				771	= { Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2 };
				772	vassert(size < 4);
				773	return ops[size];
				774	}
				775
				776	static IROp mkVecZEROHIxxOFV128 ( UInt size ) {
				777	const IROp ops[4]
				778	= { Iop_ZeroHI120ofV128, Iop_ZeroHI112ofV128,
				779	Iop_ZeroHI96ofV128, Iop_ZeroHI64ofV128 };
				780	vassert(size < 4);
				781	return ops[size];
				782	}
				783
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	784	static IRExpr* mkU ( IRType ty, ULong imm ) {
				785	switch (ty) {
				786	case Ity_I32: return mkU32((UInt)(imm & 0xFFFFFFFFULL));
				787	case Ity_I64: return mkU64(imm);
				788	default: vpanic("mkU");
				789	}
				790	}
				791
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	792	static IROp mkVecQDMULHIS ( UInt size ) {
				793	const IROp ops[4]
				794	= { Iop_INVALID, Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, Iop_INVALID };
				795	vassert(size < 4);
				796	return ops[size];
				797	}
				798
				799	static IROp mkVecQRDMULHIS ( UInt size ) {
				800	const IROp ops[4]
				801	= { Iop_INVALID, Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, Iop_INVALID };
				802	vassert(size < 4);
				803	return ops[size];
				804	}
				805
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	806	static IROp mkVecQANDUQSH ( UInt size ) {
sewardj	1297218	2014-08-04 08:09:47 +0000	[diff] [blame]	807	const IROp ops[4]
				808	= { Iop_QandUQsh8x16, Iop_QandUQsh16x8,
				809	Iop_QandUQsh32x4, Iop_QandUQsh64x2 };
				810	vassert(size < 4);
				811	return ops[size];
				812	}
				813
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	814	static IROp mkVecQANDSQSH ( UInt size ) {
sewardj	1297218	2014-08-04 08:09:47 +0000	[diff] [blame]	815	const IROp ops[4]
				816	= { Iop_QandSQsh8x16, Iop_QandSQsh16x8,
				817	Iop_QandSQsh32x4, Iop_QandSQsh64x2 };
				818	vassert(size < 4);
				819	return ops[size];
				820	}
				821
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	822	static IROp mkVecQANDUQRSH ( UInt size ) {
sewardj	1297218	2014-08-04 08:09:47 +0000	[diff] [blame]	823	const IROp ops[4]
				824	= { Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
				825	Iop_QandUQRsh32x4, Iop_QandUQRsh64x2 };
				826	vassert(size < 4);
				827	return ops[size];
				828	}
				829
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	830	static IROp mkVecQANDSQRSH ( UInt size ) {
sewardj	1297218	2014-08-04 08:09:47 +0000	[diff] [blame]	831	const IROp ops[4]
				832	= { Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
				833	Iop_QandSQRsh32x4, Iop_QandSQRsh64x2 };
				834	vassert(size < 4);
				835	return ops[size];
				836	}
				837
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	838	static IROp mkVecSHU ( UInt size ) {
				839	const IROp ops[4]
				840	= { Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2 };
				841	vassert(size < 4);
				842	return ops[size];
				843	}
				844
				845	static IROp mkVecSHS ( UInt size ) {
				846	const IROp ops[4]
				847	= { Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2 };
				848	vassert(size < 4);
				849	return ops[size];
				850	}
				851
				852	static IROp mkVecRSHU ( UInt size ) {
				853	const IROp ops[4]
				854	= { Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2 };
				855	vassert(size < 4);
				856	return ops[size];
				857	}
				858
				859	static IROp mkVecRSHS ( UInt size ) {
				860	const IROp ops[4]
				861	= { Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2 };
				862	vassert(size < 4);
				863	return ops[size];
				864	}
				865
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	866	static IROp mkVecNARROWUN ( UInt sizeNarrow ) {
				867	const IROp ops[4]
				868	= { Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4,
				869	Iop_NarrowUn64to32x2, Iop_INVALID };
				870	vassert(sizeNarrow < 4);
				871	return ops[sizeNarrow];
				872	}
				873
				874	static IROp mkVecQNARROWUNSU ( UInt sizeNarrow ) {
				875	const IROp ops[4]
				876	= { Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4,
				877	Iop_QNarrowUn64Sto32Ux2, Iop_INVALID };
				878	vassert(sizeNarrow < 4);
				879	return ops[sizeNarrow];
				880	}
				881
				882	static IROp mkVecQNARROWUNSS ( UInt sizeNarrow ) {
				883	const IROp ops[4]
				884	= { Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4,
				885	Iop_QNarrowUn64Sto32Sx2, Iop_INVALID };
				886	vassert(sizeNarrow < 4);
				887	return ops[sizeNarrow];
				888	}
				889
				890	static IROp mkVecQNARROWUNUU ( UInt sizeNarrow ) {
				891	const IROp ops[4]
				892	= { Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4,
				893	Iop_QNarrowUn64Uto32Ux2, Iop_INVALID };
				894	vassert(sizeNarrow < 4);
				895	return ops[sizeNarrow];
				896	}
				897
				898	static IROp mkVecQANDqshrNNARROWUU ( UInt sizeNarrow ) {
				899	const IROp ops[4]
				900	= { Iop_QandQShrNnarrow16Uto8Ux8, Iop_QandQShrNnarrow32Uto16Ux4,
				901	Iop_QandQShrNnarrow64Uto32Ux2, Iop_INVALID };
				902	vassert(sizeNarrow < 4);
				903	return ops[sizeNarrow];
				904	}
				905
				906	static IROp mkVecQANDqsarNNARROWSS ( UInt sizeNarrow ) {
				907	const IROp ops[4]
				908	= { Iop_QandQSarNnarrow16Sto8Sx8, Iop_QandQSarNnarrow32Sto16Sx4,
				909	Iop_QandQSarNnarrow64Sto32Sx2, Iop_INVALID };
				910	vassert(sizeNarrow < 4);
				911	return ops[sizeNarrow];
				912	}
				913
				914	static IROp mkVecQANDqsarNNARROWSU ( UInt sizeNarrow ) {
				915	const IROp ops[4]
				916	= { Iop_QandQSarNnarrow16Sto8Ux8, Iop_QandQSarNnarrow32Sto16Ux4,
				917	Iop_QandQSarNnarrow64Sto32Ux2, Iop_INVALID };
				918	vassert(sizeNarrow < 4);
				919	return ops[sizeNarrow];
				920	}
				921
				922	static IROp mkVecQANDqrshrNNARROWUU ( UInt sizeNarrow ) {
				923	const IROp ops[4]
				924	= { Iop_QandQRShrNnarrow16Uto8Ux8, Iop_QandQRShrNnarrow32Uto16Ux4,
				925	Iop_QandQRShrNnarrow64Uto32Ux2, Iop_INVALID };
				926	vassert(sizeNarrow < 4);
				927	return ops[sizeNarrow];
				928	}
				929
				930	static IROp mkVecQANDqrsarNNARROWSS ( UInt sizeNarrow ) {
				931	const IROp ops[4]
				932	= { Iop_QandQRSarNnarrow16Sto8Sx8, Iop_QandQRSarNnarrow32Sto16Sx4,
				933	Iop_QandQRSarNnarrow64Sto32Sx2, Iop_INVALID };
				934	vassert(sizeNarrow < 4);
				935	return ops[sizeNarrow];
				936	}
				937
				938	static IROp mkVecQANDqrsarNNARROWSU ( UInt sizeNarrow ) {
				939	const IROp ops[4]
				940	= { Iop_QandQRSarNnarrow16Sto8Ux8, Iop_QandQRSarNnarrow32Sto16Ux4,
				941	Iop_QandQRSarNnarrow64Sto32Ux2, Iop_INVALID };
				942	vassert(sizeNarrow < 4);
				943	return ops[sizeNarrow];
				944	}
				945
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	946	static IROp mkVecQSHLNSATUU ( UInt size ) {
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	947	const IROp ops[4]
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	948	= { Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
				949	Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2 };
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	950	vassert(size < 4);
				951	return ops[size];
				952	}
				953
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	954	static IROp mkVecQSHLNSATSS ( UInt size ) {
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	955	const IROp ops[4]
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	956	= { Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
				957	Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2 };
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	958	vassert(size < 4);
				959	return ops[size];
				960	}
				961
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	962	static IROp mkVecQSHLNSATSU ( UInt size ) {
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	963	const IROp ops[4]
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	964	= { Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
				965	Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2 };
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	966	vassert(size < 4);
				967	return ops[size];
				968	}
				969
sewardj	76927e6	2014-11-17 11:21:21 +0000	[diff] [blame]	970	static IROp mkVecADDF ( UInt size ) {
				971	const IROp ops[4]
				972	= { Iop_INVALID, Iop_INVALID, Iop_Add32Fx4, Iop_Add64Fx2 };
				973	vassert(size < 4);
				974	return ops[size];
				975	}
				976
				977	static IROp mkVecMAXF ( UInt size ) {
				978	const IROp ops[4]
				979	= { Iop_INVALID, Iop_INVALID, Iop_Max32Fx4, Iop_Max64Fx2 };
				980	vassert(size < 4);
				981	return ops[size];
				982	}
				983
				984	static IROp mkVecMINF ( UInt size ) {
				985	const IROp ops[4]
				986	= { Iop_INVALID, Iop_INVALID, Iop_Min32Fx4, Iop_Min64Fx2 };
				987	vassert(size < 4);
				988	return ops[size];
				989	}
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	990
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	991	/* Generate IR to create 'arg rotated right by imm', for sane values
				992	of 'ty' and 'imm'. */
				993	static IRTemp mathROR ( IRType ty, IRTemp arg, UInt imm )
				994	{
				995	UInt w = 0;
				996	if (ty == Ity_I64) {
				997	w = 64;
				998	} else {
				999	vassert(ty == Ity_I32);
				1000	w = 32;
				1001	}
				1002	vassert(w != 0);
				1003	vassert(imm < w);
				1004	if (imm == 0) {
				1005	return arg;
				1006	}
				1007	IRTemp res = newTemp(ty);
				1008	assign(res, binop(mkOR(ty),
				1009	binop(mkSHL(ty), mkexpr(arg), mkU8(w - imm)),
				1010	binop(mkSHR(ty), mkexpr(arg), mkU8(imm)) ));
				1011	return res;
				1012	}
				1013
				1014	/* Generate IR to set the returned temp to either all-zeroes or
				1015	all ones, as a copy of arg<imm>. */
				1016	static IRTemp mathREPLICATE ( IRType ty, IRTemp arg, UInt imm )
				1017	{
				1018	UInt w = 0;
				1019	if (ty == Ity_I64) {
				1020	w = 64;
				1021	} else {
				1022	vassert(ty == Ity_I32);
				1023	w = 32;
				1024	}
				1025	vassert(w != 0);
				1026	vassert(imm < w);
				1027	IRTemp res = newTemp(ty);
				1028	assign(res, binop(mkSAR(ty),
				1029	binop(mkSHL(ty), mkexpr(arg), mkU8(w - 1 - imm)),
				1030	mkU8(w - 1)));
				1031	return res;
				1032	}
				1033
sewardj	7d00913	2014-02-20 17:43:38 +0000	[diff] [blame]	1034	/* U-widen 8/16/32/64 bit int expr to 64. */
				1035	static IRExpr* widenUto64 ( IRType srcTy, IRExpr* e )
				1036	{
				1037	switch (srcTy) {
				1038	case Ity_I64: return e;
				1039	case Ity_I32: return unop(Iop_32Uto64, e);
				1040	case Ity_I16: return unop(Iop_16Uto64, e);
				1041	case Ity_I8: return unop(Iop_8Uto64, e);
				1042	default: vpanic("widenUto64(arm64)");
				1043	}
				1044	}
				1045
				1046	/* Narrow 64 bit int expr to 8/16/32/64. Clearly only some
				1047	of these combinations make sense. */
				1048	static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
				1049	{
				1050	switch (dstTy) {
				1051	case Ity_I64: return e;
				1052	case Ity_I32: return unop(Iop_64to32, e);
				1053	case Ity_I16: return unop(Iop_64to16, e);
				1054	case Ity_I8: return unop(Iop_64to8, e);
				1055	default: vpanic("narrowFrom64(arm64)");
				1056	}
				1057	}
				1058
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1059
				1060	/------------------------------------------------------------/
				1061	/--- Helpers for accessing guest registers. ---/
				1062	/------------------------------------------------------------/
				1063
				1064	#define OFFB_X0 offsetof(VexGuestARM64State,guest_X0)
				1065	#define OFFB_X1 offsetof(VexGuestARM64State,guest_X1)
				1066	#define OFFB_X2 offsetof(VexGuestARM64State,guest_X2)
				1067	#define OFFB_X3 offsetof(VexGuestARM64State,guest_X3)
				1068	#define OFFB_X4 offsetof(VexGuestARM64State,guest_X4)
				1069	#define OFFB_X5 offsetof(VexGuestARM64State,guest_X5)
				1070	#define OFFB_X6 offsetof(VexGuestARM64State,guest_X6)
				1071	#define OFFB_X7 offsetof(VexGuestARM64State,guest_X7)
				1072	#define OFFB_X8 offsetof(VexGuestARM64State,guest_X8)
				1073	#define OFFB_X9 offsetof(VexGuestARM64State,guest_X9)
				1074	#define OFFB_X10 offsetof(VexGuestARM64State,guest_X10)
				1075	#define OFFB_X11 offsetof(VexGuestARM64State,guest_X11)
				1076	#define OFFB_X12 offsetof(VexGuestARM64State,guest_X12)
				1077	#define OFFB_X13 offsetof(VexGuestARM64State,guest_X13)
				1078	#define OFFB_X14 offsetof(VexGuestARM64State,guest_X14)
				1079	#define OFFB_X15 offsetof(VexGuestARM64State,guest_X15)
				1080	#define OFFB_X16 offsetof(VexGuestARM64State,guest_X16)
				1081	#define OFFB_X17 offsetof(VexGuestARM64State,guest_X17)
				1082	#define OFFB_X18 offsetof(VexGuestARM64State,guest_X18)
				1083	#define OFFB_X19 offsetof(VexGuestARM64State,guest_X19)
				1084	#define OFFB_X20 offsetof(VexGuestARM64State,guest_X20)
				1085	#define OFFB_X21 offsetof(VexGuestARM64State,guest_X21)
				1086	#define OFFB_X22 offsetof(VexGuestARM64State,guest_X22)
				1087	#define OFFB_X23 offsetof(VexGuestARM64State,guest_X23)
				1088	#define OFFB_X24 offsetof(VexGuestARM64State,guest_X24)
				1089	#define OFFB_X25 offsetof(VexGuestARM64State,guest_X25)
				1090	#define OFFB_X26 offsetof(VexGuestARM64State,guest_X26)
				1091	#define OFFB_X27 offsetof(VexGuestARM64State,guest_X27)
				1092	#define OFFB_X28 offsetof(VexGuestARM64State,guest_X28)
				1093	#define OFFB_X29 offsetof(VexGuestARM64State,guest_X29)
				1094	#define OFFB_X30 offsetof(VexGuestARM64State,guest_X30)
				1095
sewardj	6068788	2014-01-15 10:25:21 +0000	[diff] [blame]	1096	#define OFFB_XSP offsetof(VexGuestARM64State,guest_XSP)
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1097	#define OFFB_PC offsetof(VexGuestARM64State,guest_PC)
				1098
				1099	#define OFFB_CC_OP offsetof(VexGuestARM64State,guest_CC_OP)
				1100	#define OFFB_CC_DEP1 offsetof(VexGuestARM64State,guest_CC_DEP1)
				1101	#define OFFB_CC_DEP2 offsetof(VexGuestARM64State,guest_CC_DEP2)
				1102	#define OFFB_CC_NDEP offsetof(VexGuestARM64State,guest_CC_NDEP)
				1103
				1104	#define OFFB_TPIDR_EL0 offsetof(VexGuestARM64State,guest_TPIDR_EL0)
				1105	#define OFFB_NRADDR offsetof(VexGuestARM64State,guest_NRADDR)
				1106
				1107	#define OFFB_Q0 offsetof(VexGuestARM64State,guest_Q0)
				1108	#define OFFB_Q1 offsetof(VexGuestARM64State,guest_Q1)
				1109	#define OFFB_Q2 offsetof(VexGuestARM64State,guest_Q2)
				1110	#define OFFB_Q3 offsetof(VexGuestARM64State,guest_Q3)
				1111	#define OFFB_Q4 offsetof(VexGuestARM64State,guest_Q4)
				1112	#define OFFB_Q5 offsetof(VexGuestARM64State,guest_Q5)
				1113	#define OFFB_Q6 offsetof(VexGuestARM64State,guest_Q6)
				1114	#define OFFB_Q7 offsetof(VexGuestARM64State,guest_Q7)
				1115	#define OFFB_Q8 offsetof(VexGuestARM64State,guest_Q8)
				1116	#define OFFB_Q9 offsetof(VexGuestARM64State,guest_Q9)
				1117	#define OFFB_Q10 offsetof(VexGuestARM64State,guest_Q10)
				1118	#define OFFB_Q11 offsetof(VexGuestARM64State,guest_Q11)
				1119	#define OFFB_Q12 offsetof(VexGuestARM64State,guest_Q12)
				1120	#define OFFB_Q13 offsetof(VexGuestARM64State,guest_Q13)
				1121	#define OFFB_Q14 offsetof(VexGuestARM64State,guest_Q14)
				1122	#define OFFB_Q15 offsetof(VexGuestARM64State,guest_Q15)
				1123	#define OFFB_Q16 offsetof(VexGuestARM64State,guest_Q16)
				1124	#define OFFB_Q17 offsetof(VexGuestARM64State,guest_Q17)
				1125	#define OFFB_Q18 offsetof(VexGuestARM64State,guest_Q18)
				1126	#define OFFB_Q19 offsetof(VexGuestARM64State,guest_Q19)
				1127	#define OFFB_Q20 offsetof(VexGuestARM64State,guest_Q20)
				1128	#define OFFB_Q21 offsetof(VexGuestARM64State,guest_Q21)
				1129	#define OFFB_Q22 offsetof(VexGuestARM64State,guest_Q22)
				1130	#define OFFB_Q23 offsetof(VexGuestARM64State,guest_Q23)
				1131	#define OFFB_Q24 offsetof(VexGuestARM64State,guest_Q24)
				1132	#define OFFB_Q25 offsetof(VexGuestARM64State,guest_Q25)
				1133	#define OFFB_Q26 offsetof(VexGuestARM64State,guest_Q26)
				1134	#define OFFB_Q27 offsetof(VexGuestARM64State,guest_Q27)
				1135	#define OFFB_Q28 offsetof(VexGuestARM64State,guest_Q28)
				1136	#define OFFB_Q29 offsetof(VexGuestARM64State,guest_Q29)
				1137	#define OFFB_Q30 offsetof(VexGuestARM64State,guest_Q30)
				1138	#define OFFB_Q31 offsetof(VexGuestARM64State,guest_Q31)
				1139
				1140	#define OFFB_FPCR offsetof(VexGuestARM64State,guest_FPCR)
sewardj	a0645d5	2014-06-28 22:11:16 +0000	[diff] [blame]	1141	#define OFFB_QCFLAG offsetof(VexGuestARM64State,guest_QCFLAG)
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1142
sewardj	05f5e01	2014-05-04 10:52:11 +0000	[diff] [blame]	1143	#define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
				1144	#define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1145
				1146
				1147	/* ---------------- Integer registers ---------------- */
				1148
				1149	static Int offsetIReg64 ( UInt iregNo )
				1150	{
				1151	/* Do we care about endianness here? We do if sub-parts of integer
				1152	registers are accessed. */
				1153	switch (iregNo) {
				1154	case 0: return OFFB_X0;
				1155	case 1: return OFFB_X1;
				1156	case 2: return OFFB_X2;
				1157	case 3: return OFFB_X3;
				1158	case 4: return OFFB_X4;
				1159	case 5: return OFFB_X5;
				1160	case 6: return OFFB_X6;
				1161	case 7: return OFFB_X7;
				1162	case 8: return OFFB_X8;
				1163	case 9: return OFFB_X9;
				1164	case 10: return OFFB_X10;
				1165	case 11: return OFFB_X11;
				1166	case 12: return OFFB_X12;
				1167	case 13: return OFFB_X13;
				1168	case 14: return OFFB_X14;
				1169	case 15: return OFFB_X15;
				1170	case 16: return OFFB_X16;
				1171	case 17: return OFFB_X17;
				1172	case 18: return OFFB_X18;
				1173	case 19: return OFFB_X19;
				1174	case 20: return OFFB_X20;
				1175	case 21: return OFFB_X21;
				1176	case 22: return OFFB_X22;
				1177	case 23: return OFFB_X23;
				1178	case 24: return OFFB_X24;
				1179	case 25: return OFFB_X25;
				1180	case 26: return OFFB_X26;
				1181	case 27: return OFFB_X27;
				1182	case 28: return OFFB_X28;
				1183	case 29: return OFFB_X29;
				1184	case 30: return OFFB_X30;
				1185	/* but not 31 */
				1186	default: vassert(0);
				1187	}
				1188	}
				1189
				1190	static Int offsetIReg64orSP ( UInt iregNo )
				1191	{
sewardj	6068788	2014-01-15 10:25:21 +0000	[diff] [blame]	1192	return iregNo == 31 ? OFFB_XSP : offsetIReg64(iregNo);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1193	}
				1194
				1195	static const HChar* nameIReg64orZR ( UInt iregNo )
				1196	{
				1197	vassert(iregNo < 32);
				1198	static const HChar* names[32]
				1199	= { "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
				1200	"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
				1201	"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
				1202	"x24", "x25", "x26", "x27", "x28", "x29", "x30", "xzr" };
				1203	return names[iregNo];
				1204	}
				1205
				1206	static const HChar* nameIReg64orSP ( UInt iregNo )
				1207	{
				1208	if (iregNo == 31) {
				1209	return "sp";
				1210	}
				1211	vassert(iregNo < 31);
				1212	return nameIReg64orZR(iregNo);
				1213	}
				1214
				1215	static IRExpr* getIReg64orSP ( UInt iregNo )
				1216	{
				1217	vassert(iregNo < 32);
				1218	return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
				1219	}
				1220
				1221	static IRExpr* getIReg64orZR ( UInt iregNo )
				1222	{
				1223	if (iregNo == 31) {
				1224	return mkU64(0);
				1225	}
				1226	vassert(iregNo < 31);
				1227	return IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 );
				1228	}
				1229
				1230	static void putIReg64orSP ( UInt iregNo, IRExpr* e )
				1231	{
				1232	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
				1233	stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
				1234	}
				1235
				1236	static void putIReg64orZR ( UInt iregNo, IRExpr* e )
				1237	{
				1238	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
				1239	if (iregNo == 31) {
				1240	return;
				1241	}
				1242	vassert(iregNo < 31);
				1243	stmt( IRStmt_Put(offsetIReg64orSP(iregNo), e) );
				1244	}
				1245
				1246	static const HChar* nameIReg32orZR ( UInt iregNo )
				1247	{
				1248	vassert(iregNo < 32);
				1249	static const HChar* names[32]
				1250	= { "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
				1251	"w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
				1252	"w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
				1253	"w24", "w25", "w26", "w27", "w28", "w29", "w30", "wzr" };
				1254	return names[iregNo];
				1255	}
				1256
				1257	static const HChar* nameIReg32orSP ( UInt iregNo )
				1258	{
				1259	if (iregNo == 31) {
				1260	return "wsp";
				1261	}
				1262	vassert(iregNo < 31);
				1263	return nameIReg32orZR(iregNo);
				1264	}
				1265
				1266	static IRExpr* getIReg32orSP ( UInt iregNo )
				1267	{
				1268	vassert(iregNo < 32);
				1269	return unop(Iop_64to32,
				1270	IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
				1271	}
				1272
				1273	static IRExpr* getIReg32orZR ( UInt iregNo )
				1274	{
				1275	if (iregNo == 31) {
				1276	return mkU32(0);
				1277	}
				1278	vassert(iregNo < 31);
				1279	return unop(Iop_64to32,
				1280	IRExpr_Get( offsetIReg64orSP(iregNo), Ity_I64 ));
				1281	}
				1282
				1283	static void putIReg32orSP ( UInt iregNo, IRExpr* e )
				1284	{
				1285	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
				1286	stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
				1287	}
				1288
				1289	static void putIReg32orZR ( UInt iregNo, IRExpr* e )
				1290	{
				1291	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
				1292	if (iregNo == 31) {
				1293	return;
				1294	}
				1295	vassert(iregNo < 31);
				1296	stmt( IRStmt_Put(offsetIReg64orSP(iregNo), unop(Iop_32Uto64, e)) );
				1297	}
				1298
				1299	static const HChar* nameIRegOrSP ( Bool is64, UInt iregNo )
				1300	{
				1301	vassert(is64 == True \|\| is64 == False);
				1302	return is64 ? nameIReg64orSP(iregNo) : nameIReg32orSP(iregNo);
				1303	}
				1304
				1305	static const HChar* nameIRegOrZR ( Bool is64, UInt iregNo )
				1306	{
				1307	vassert(is64 == True \|\| is64 == False);
				1308	return is64 ? nameIReg64orZR(iregNo) : nameIReg32orZR(iregNo);
				1309	}
				1310
				1311	static IRExpr* getIRegOrZR ( Bool is64, UInt iregNo )
				1312	{
				1313	vassert(is64 == True \|\| is64 == False);
				1314	return is64 ? getIReg64orZR(iregNo) : getIReg32orZR(iregNo);
				1315	}
				1316
				1317	static void putIRegOrZR ( Bool is64, UInt iregNo, IRExpr* e )
				1318	{
				1319	vassert(is64 == True \|\| is64 == False);
				1320	if (is64) putIReg64orZR(iregNo, e); else putIReg32orZR(iregNo, e);
				1321	}
				1322
				1323	static void putPC ( IRExpr* e )
				1324	{
				1325	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
				1326	stmt( IRStmt_Put(OFFB_PC, e) );
				1327	}
				1328
				1329
				1330	/* ---------------- Vector (Q) registers ---------------- */
				1331
				1332	static Int offsetQReg128 ( UInt qregNo )
				1333	{
				1334	/* We don't care about endianness at this point. It only becomes
				1335	relevant when dealing with sections of these registers.*/
				1336	switch (qregNo) {
				1337	case 0: return OFFB_Q0;
				1338	case 1: return OFFB_Q1;
				1339	case 2: return OFFB_Q2;
				1340	case 3: return OFFB_Q3;
				1341	case 4: return OFFB_Q4;
				1342	case 5: return OFFB_Q5;
				1343	case 6: return OFFB_Q6;
				1344	case 7: return OFFB_Q7;
				1345	case 8: return OFFB_Q8;
				1346	case 9: return OFFB_Q9;
				1347	case 10: return OFFB_Q10;
				1348	case 11: return OFFB_Q11;
				1349	case 12: return OFFB_Q12;
				1350	case 13: return OFFB_Q13;
				1351	case 14: return OFFB_Q14;
				1352	case 15: return OFFB_Q15;
				1353	case 16: return OFFB_Q16;
				1354	case 17: return OFFB_Q17;
				1355	case 18: return OFFB_Q18;
				1356	case 19: return OFFB_Q19;
				1357	case 20: return OFFB_Q20;
				1358	case 21: return OFFB_Q21;
				1359	case 22: return OFFB_Q22;
				1360	case 23: return OFFB_Q23;
				1361	case 24: return OFFB_Q24;
				1362	case 25: return OFFB_Q25;
				1363	case 26: return OFFB_Q26;
				1364	case 27: return OFFB_Q27;
				1365	case 28: return OFFB_Q28;
				1366	case 29: return OFFB_Q29;
				1367	case 30: return OFFB_Q30;
				1368	case 31: return OFFB_Q31;
				1369	default: vassert(0);
				1370	}
				1371	}
				1372
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1373	/* Write to a complete Qreg. */
				1374	static void putQReg128 ( UInt qregNo, IRExpr* e )
				1375	{
				1376	vassert(qregNo < 32);
				1377	vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
				1378	stmt( IRStmt_Put(offsetQReg128(qregNo), e) );
				1379	}
				1380
				1381	/* Read a complete Qreg. */
				1382	static IRExpr* getQReg128 ( UInt qregNo )
				1383	{
				1384	vassert(qregNo < 32);
				1385	return IRExpr_Get(offsetQReg128(qregNo), Ity_V128);
				1386	}
				1387
				1388	/* Produce the IR type for some sub-part of a vector. For 32- and 64-
				1389	bit sub-parts we can choose either integer or float types, and
				1390	choose float on the basis that that is the common use case and so
				1391	will give least interference with Put-to-Get forwarding later
				1392	on. */
				1393	static IRType preferredVectorSubTypeFromSize ( UInt szB )
				1394	{
				1395	switch (szB) {
				1396	case 1: return Ity_I8;
				1397	case 2: return Ity_I16;
				1398	case 4: return Ity_I32; //Ity_F32;
				1399	case 8: return Ity_F64;
				1400	case 16: return Ity_V128;
				1401	default: vassert(0);
				1402	}
				1403	}
				1404
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1405	/* Find the offset of the laneNo'th lane of type laneTy in the given
				1406	Qreg. Since the host is little-endian, the least significant lane
				1407	has the lowest offset. */
				1408	static Int offsetQRegLane ( UInt qregNo, IRType laneTy, UInt laneNo )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1409	{
sewardj	9b76916	2014-07-24 12:42:03 +0000	[diff] [blame]	1410	vassert(host_endness == VexEndnessLE);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1411	Int base = offsetQReg128(qregNo);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1412	/* Since the host is little-endian, the least significant lane
				1413	will be at the lowest address. */
				1414	/* Restrict this to known types, so as to avoid silently accepting
				1415	stupid types. */
				1416	UInt laneSzB = 0;
				1417	switch (laneTy) {
sewardj	5860ec7	2014-03-01 11:19:45 +0000	[diff] [blame]	1418	case Ity_I8: laneSzB = 1; break;
sewardj	400d6b9	2015-03-30 09:01:51 +0000	[diff] [blame]	1419	case Ity_F16: case Ity_I16: laneSzB = 2; break;
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1420	case Ity_F32: case Ity_I32: laneSzB = 4; break;
				1421	case Ity_F64: case Ity_I64: laneSzB = 8; break;
				1422	case Ity_V128: laneSzB = 16; break;
				1423	default: break;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1424	}
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1425	vassert(laneSzB > 0);
				1426	UInt minOff = laneNo * laneSzB;
				1427	UInt maxOff = minOff + laneSzB - 1;
				1428	vassert(maxOff < 16);
				1429	return base + minOff;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1430	}
				1431
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1432	/* Put to the least significant lane of a Qreg. */
				1433	static void putQRegLO ( UInt qregNo, IRExpr* e )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1434	{
				1435	IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1436	Int off = offsetQRegLane(qregNo, ty, 0);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1437	switch (ty) {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1438	case Ity_I8: case Ity_I16: case Ity_I32: case Ity_I64:
sewardj	400d6b9	2015-03-30 09:01:51 +0000	[diff] [blame]	1439	case Ity_F16: case Ity_F32: case Ity_F64: case Ity_V128:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1440	break;
				1441	default:
				1442	vassert(0); // Other cases are probably invalid
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1443	}
				1444	stmt(IRStmt_Put(off, e));
				1445	}
				1446
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1447	/* Get from the least significant lane of a Qreg. */
				1448	static IRExpr* getQRegLO ( UInt qregNo, IRType ty )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1449	{
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1450	Int off = offsetQRegLane(qregNo, ty, 0);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1451	switch (ty) {
sewardj	b355347	2014-05-15 16:49:21 +0000	[diff] [blame]	1452	case Ity_I8:
sewardj	400d6b9	2015-03-30 09:01:51 +0000	[diff] [blame]	1453	case Ity_F16: case Ity_I16:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1454	case Ity_I32: case Ity_I64:
				1455	case Ity_F32: case Ity_F64: case Ity_V128:
				1456	break;
				1457	default:
				1458	vassert(0); // Other cases are ATC
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1459	}
				1460	return IRExpr_Get(off, ty);
				1461	}
				1462
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1463	static const HChar* nameQRegLO ( UInt qregNo, IRType laneTy )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1464	{
				1465	static const HChar* namesQ[32]
				1466	= { "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
				1467	"q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15",
				1468	"q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23",
				1469	"q24", "q25", "q26", "q27", "q28", "q29", "q30", "q31" };
				1470	static const HChar* namesD[32]
				1471	= { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
				1472	"d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15",
				1473	"d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
				1474	"d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" };
				1475	static const HChar* namesS[32]
				1476	= { "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
				1477	"s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15",
				1478	"s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23",
				1479	"s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" };
				1480	static const HChar* namesH[32]
				1481	= { "h0", "h1", "h2", "h3", "h4", "h5", "h6", "h7",
				1482	"h8", "h9", "h10", "h11", "h12", "h13", "h14", "h15",
				1483	"h16", "h17", "h18", "h19", "h20", "h21", "h22", "h23",
				1484	"h24", "h25", "h26", "h27", "h28", "h29", "h30", "h31" };
				1485	static const HChar* namesB[32]
				1486	= { "b0", "b1", "b2", "b3", "b4", "b5", "b6", "b7",
				1487	"b8", "b9", "b10", "b11", "b12", "b13", "b14", "b15",
				1488	"b16", "b17", "b18", "b19", "b20", "b21", "b22", "b23",
				1489	"b24", "b25", "b26", "b27", "b28", "b29", "b30", "b31" };
				1490	vassert(qregNo < 32);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1491	switch (sizeofIRType(laneTy)) {
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1492	case 1: return namesB[qregNo];
				1493	case 2: return namesH[qregNo];
				1494	case 4: return namesS[qregNo];
				1495	case 8: return namesD[qregNo];
				1496	case 16: return namesQ[qregNo];
				1497	default: vassert(0);
				1498	}
				1499	/NOTREACHED/
				1500	}
				1501
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1502	static const HChar* nameQReg128 ( UInt qregNo )
				1503	{
				1504	return nameQRegLO(qregNo, Ity_V128);
				1505	}
				1506
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1507	/* Find the offset of the most significant half (8 bytes) of the given
				1508	Qreg. This requires knowing the endianness of the host. */
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1509	static Int offsetQRegHI64 ( UInt qregNo )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1510	{
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1511	return offsetQRegLane(qregNo, Ity_I64, 1);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1512	}
				1513
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1514	static IRExpr* getQRegHI64 ( UInt qregNo )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1515	{
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1516	return IRExpr_Get(offsetQRegHI64(qregNo), Ity_I64);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1517	}
				1518
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1519	static void putQRegHI64 ( UInt qregNo, IRExpr* e )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1520	{
				1521	IRType ty = typeOfIRExpr(irsb->tyenv, e);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1522	Int off = offsetQRegHI64(qregNo);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1523	switch (ty) {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1524	case Ity_I64: case Ity_F64:
				1525	break;
				1526	default:
				1527	vassert(0); // Other cases are plain wrong
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1528	}
				1529	stmt(IRStmt_Put(off, e));
				1530	}
				1531
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1532	/* Put to a specified lane of a Qreg. */
				1533	static void putQRegLane ( UInt qregNo, UInt laneNo, IRExpr* e )
				1534	{
				1535	IRType laneTy = typeOfIRExpr(irsb->tyenv, e);
				1536	Int off = offsetQRegLane(qregNo, laneTy, laneNo);
				1537	switch (laneTy) {
				1538	case Ity_F64: case Ity_I64:
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	1539	case Ity_I32: case Ity_F32:
sewardj	400d6b9	2015-03-30 09:01:51 +0000	[diff] [blame]	1540	case Ity_I16: case Ity_F16:
sewardj	5860ec7	2014-03-01 11:19:45 +0000	[diff] [blame]	1541	case Ity_I8:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1542	break;
				1543	default:
				1544	vassert(0); // Other cases are ATC
				1545	}
				1546	stmt(IRStmt_Put(off, e));
				1547	}
				1548
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	1549	/* Get from a specified lane of a Qreg. */
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1550	static IRExpr* getQRegLane ( UInt qregNo, UInt laneNo, IRType laneTy )
				1551	{
				1552	Int off = offsetQRegLane(qregNo, laneTy, laneNo);
				1553	switch (laneTy) {
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	1554	case Ity_I64: case Ity_I32: case Ity_I16: case Ity_I8:
sewardj	400d6b9	2015-03-30 09:01:51 +0000	[diff] [blame]	1555	case Ity_F64: case Ity_F32: case Ity_F16:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	1556	break;
				1557	default:
				1558	vassert(0); // Other cases are ATC
				1559	}
				1560	return IRExpr_Get(off, laneTy);
				1561	}
				1562
				1563
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1564	//ZZ /* ---------------- Misc registers ---------------- */
				1565	//ZZ
				1566	//ZZ static void putMiscReg32 ( UInt gsoffset,
				1567	//ZZ IRExpr* e, /* :: Ity_I32 */
				1568	//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */)
				1569	//ZZ {
				1570	//ZZ switch (gsoffset) {
				1571	//ZZ case OFFB_FPSCR: break;
				1572	//ZZ case OFFB_QFLAG32: break;
				1573	//ZZ case OFFB_GEFLAG0: break;
				1574	//ZZ case OFFB_GEFLAG1: break;
				1575	//ZZ case OFFB_GEFLAG2: break;
				1576	//ZZ case OFFB_GEFLAG3: break;
				1577	//ZZ default: vassert(0); /* awaiting more cases */
				1578	//ZZ }
				1579	//ZZ vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
				1580	//ZZ
				1581	//ZZ if (guardT == IRTemp_INVALID) {
				1582	//ZZ /* unconditional write */
				1583	//ZZ stmt(IRStmt_Put(gsoffset, e));
				1584	//ZZ } else {
				1585	//ZZ stmt(IRStmt_Put(
				1586	//ZZ gsoffset,
				1587	//ZZ IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
				1588	//ZZ e, IRExpr_Get(gsoffset, Ity_I32) )
				1589	//ZZ ));
				1590	//ZZ }
				1591	//ZZ }
				1592	//ZZ
				1593	//ZZ static IRTemp get_ITSTATE ( void )
				1594	//ZZ {
				1595	//ZZ ASSERT_IS_THUMB;
				1596	//ZZ IRTemp t = newTemp(Ity_I32);
				1597	//ZZ assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
				1598	//ZZ return t;
				1599	//ZZ }
				1600	//ZZ
				1601	//ZZ static void put_ITSTATE ( IRTemp t )
				1602	//ZZ {
				1603	//ZZ ASSERT_IS_THUMB;
				1604	//ZZ stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
				1605	//ZZ }
				1606	//ZZ
				1607	//ZZ static IRTemp get_QFLAG32 ( void )
				1608	//ZZ {
				1609	//ZZ IRTemp t = newTemp(Ity_I32);
				1610	//ZZ assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
				1611	//ZZ return t;
				1612	//ZZ }
				1613	//ZZ
				1614	//ZZ static void put_QFLAG32 ( IRTemp t, IRTemp condT )
				1615	//ZZ {
				1616	//ZZ putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
				1617	//ZZ }
				1618	//ZZ
				1619	//ZZ /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
				1620	//ZZ Status Register) to indicate that overflow or saturation occurred.
				1621	//ZZ Nb: t must be zero to denote no saturation, and any nonzero
				1622	//ZZ value to indicate saturation. */
				1623	//ZZ static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
				1624	//ZZ {
				1625	//ZZ IRTemp old = get_QFLAG32();
				1626	//ZZ IRTemp nyu = newTemp(Ity_I32);
				1627	//ZZ assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
				1628	//ZZ put_QFLAG32(nyu, condT);
				1629	//ZZ }
				1630
				1631
				1632	/* ---------------- FPCR stuff ---------------- */
				1633
				1634	/* Generate IR to get hold of the rounding mode bits in FPCR, and
				1635	convert them to IR format. Bind the final result to the
				1636	returned temp. */
				1637	static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
				1638	{
				1639	/* The ARMvfp encoding for rounding mode bits is:
				1640	00 to nearest
				1641	01 to +infinity
				1642	10 to -infinity
				1643	11 to zero
				1644	We need to convert that to the IR encoding:
				1645	00 to nearest (the default)
				1646	10 to +infinity
				1647	01 to -infinity
				1648	11 to zero
				1649	Which can be done by swapping bits 0 and 1.
				1650	The rmode bits are at 23:22 in FPSCR.
				1651	*/
				1652	IRTemp armEncd = newTemp(Ity_I32);
				1653	IRTemp swapped = newTemp(Ity_I32);
				1654	/* Fish FPCR[23:22] out, and slide to bottom. Doesn't matter that
				1655	we don't zero out bits 24 and above, since the assignment to
				1656	'swapped' will mask them out anyway. */
				1657	assign(armEncd,
				1658	binop(Iop_Shr32, IRExpr_Get(OFFB_FPCR, Ity_I32), mkU8(22)));
				1659	/* Now swap them. */
				1660	assign(swapped,
				1661	binop(Iop_Or32,
				1662	binop(Iop_And32,
				1663	binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
				1664	mkU32(2)),
				1665	binop(Iop_And32,
				1666	binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
				1667	mkU32(1))
				1668	));
				1669	return swapped;
				1670	}
				1671
				1672
				1673	/------------------------------------------------------------/
				1674	/--- Helpers for flag handling and conditional insns ---/
				1675	/------------------------------------------------------------/
				1676
				1677	static const HChar* nameARM64Condcode ( ARM64Condcode cond )
				1678	{
				1679	switch (cond) {
				1680	case ARM64CondEQ: return "eq";
				1681	case ARM64CondNE: return "ne";
				1682	case ARM64CondCS: return "cs"; // or 'hs'
				1683	case ARM64CondCC: return "cc"; // or 'lo'
				1684	case ARM64CondMI: return "mi";
				1685	case ARM64CondPL: return "pl";
				1686	case ARM64CondVS: return "vs";
				1687	case ARM64CondVC: return "vc";
				1688	case ARM64CondHI: return "hi";
				1689	case ARM64CondLS: return "ls";
				1690	case ARM64CondGE: return "ge";
				1691	case ARM64CondLT: return "lt";
				1692	case ARM64CondGT: return "gt";
				1693	case ARM64CondLE: return "le";
				1694	case ARM64CondAL: return "al";
				1695	case ARM64CondNV: return "nv";
				1696	default: vpanic("name_ARM64Condcode");
				1697	}
				1698	}
				1699
				1700	/* and a handy shorthand for it */
				1701	static const HChar* nameCC ( ARM64Condcode cond ) {
				1702	return nameARM64Condcode(cond);
				1703	}
				1704
				1705
				1706	/* Build IR to calculate some particular condition from stored
				1707	CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
				1708	Ity_I64, suitable for narrowing. Although the return type is
				1709	Ity_I64, the returned value is either 0 or 1. 'cond' must be
				1710	:: Ity_I64 and must denote the condition to compute in
				1711	bits 7:4, and be zero everywhere else.
				1712	*/
				1713	static IRExpr* mk_arm64g_calculate_condition_dyn ( IRExpr* cond )
				1714	{
				1715	vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I64);
				1716	/* And 'cond' had better produce a value in which only bits 7:4 are
				1717	nonzero. However, obviously we can't assert for that. */
				1718
				1719	/* So what we're constructing for the first argument is
				1720	"(cond << 4) \| stored-operation".
				1721	However, as per comments above, 'cond' must be supplied
				1722	pre-shifted to this function.
				1723
				1724	This pairing scheme requires that the ARM64_CC_OP_ values all fit
				1725	in 4 bits. Hence we are passing a (COND, OP) pair in the lowest
				1726	8 bits of the first argument. */
				1727	IRExpr** args
				1728	= mkIRExprVec_4(
				1729	binop(Iop_Or64, IRExpr_Get(OFFB_CC_OP, Ity_I64), cond),
				1730	IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
				1731	IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
				1732	IRExpr_Get(OFFB_CC_NDEP, Ity_I64)
				1733	);
				1734	IRExpr* call
				1735	= mkIRExprCCall(
				1736	Ity_I64,
				1737	0/regparm/,
				1738	"arm64g_calculate_condition", &arm64g_calculate_condition,
				1739	args
				1740	);
				1741
				1742	/* Exclude the requested condition, OP and NDEP from definedness
				1743	checking. We're only interested in DEP1 and DEP2. */
				1744	call->Iex.CCall.cee->mcx_mask = (1<<0) \| (1<<3);
				1745	return call;
				1746	}
				1747
				1748
				1749	/* Build IR to calculate some particular condition from stored
				1750	CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression of type
				1751	Ity_I64, suitable for narrowing. Although the return type is
				1752	Ity_I64, the returned value is either 0 or 1.
				1753	*/
				1754	static IRExpr* mk_arm64g_calculate_condition ( ARM64Condcode cond )
				1755	{
				1756	/* First arg is "(cond << 4) \| condition". This requires that the
				1757	ARM64_CC_OP_ values all fit in 4 bits. Hence we are passing a
				1758	(COND, OP) pair in the lowest 8 bits of the first argument. */
				1759	vassert(cond >= 0 && cond <= 15);
				1760	return mk_arm64g_calculate_condition_dyn( mkU64(cond << 4) );
				1761	}
				1762
				1763
sewardj	dee3050	2014-06-04 13:09:44 +0000	[diff] [blame]	1764	/* Build IR to calculate just the carry flag from stored
				1765	CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
				1766	Ity_I64. */
				1767	static IRExpr* mk_arm64g_calculate_flag_c ( void )
				1768	{
				1769	IRExpr** args
				1770	= mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
				1771	IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
				1772	IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
				1773	IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
				1774	IRExpr* call
				1775	= mkIRExprCCall(
				1776	Ity_I64,
				1777	0/regparm/,
				1778	"arm64g_calculate_flag_c", &arm64g_calculate_flag_c,
				1779	args
				1780	);
				1781	/* Exclude OP and NDEP from definedness checking. We're only
				1782	interested in DEP1 and DEP2. */
				1783	call->Iex.CCall.cee->mcx_mask = (1<<0) \| (1<<3);
				1784	return call;
				1785	}
				1786
				1787
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1788	//ZZ /* Build IR to calculate just the overflow flag from stored
				1789	//ZZ CC_OP/CC_DEP1/CC_DEP2/CC_NDEP. Returns an expression ::
				1790	//ZZ Ity_I32. */
				1791	//ZZ static IRExpr* mk_armg_calculate_flag_v ( void )
				1792	//ZZ {
				1793	//ZZ IRExpr** args
				1794	//ZZ = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I32),
				1795	//ZZ IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
				1796	//ZZ IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
				1797	//ZZ IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
				1798	//ZZ IRExpr* call
				1799	//ZZ = mkIRExprCCall(
				1800	//ZZ Ity_I32,
				1801	//ZZ 0/regparm/,
				1802	//ZZ "armg_calculate_flag_v", &armg_calculate_flag_v,
				1803	//ZZ args
				1804	//ZZ );
				1805	//ZZ /* Exclude OP and NDEP from definedness checking. We're only
				1806	//ZZ interested in DEP1 and DEP2. */
				1807	//ZZ call->Iex.CCall.cee->mcx_mask = (1<<0) \| (1<<3);
				1808	//ZZ return call;
				1809	//ZZ }
				1810
				1811
				1812	/* Build IR to calculate N Z C V in bits 31:28 of the
				1813	returned word. */
				1814	static IRExpr* mk_arm64g_calculate_flags_nzcv ( void )
				1815	{
				1816	IRExpr** args
				1817	= mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP, Ity_I64),
				1818	IRExpr_Get(OFFB_CC_DEP1, Ity_I64),
				1819	IRExpr_Get(OFFB_CC_DEP2, Ity_I64),
				1820	IRExpr_Get(OFFB_CC_NDEP, Ity_I64) );
				1821	IRExpr* call
				1822	= mkIRExprCCall(
				1823	Ity_I64,
				1824	0/regparm/,
				1825	"arm64g_calculate_flags_nzcv", &arm64g_calculate_flags_nzcv,
				1826	args
				1827	);
				1828	/* Exclude OP and NDEP from definedness checking. We're only
				1829	interested in DEP1 and DEP2. */
				1830	call->Iex.CCall.cee->mcx_mask = (1<<0) \| (1<<3);
				1831	return call;
				1832	}
				1833
				1834
				1835	/* Build IR to set the flags thunk, in the most general case. */
				1836	static
				1837	void setFlags_D1_D2_ND ( UInt cc_op,
				1838	IRTemp t_dep1, IRTemp t_dep2, IRTemp t_ndep )
				1839	{
				1840	vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I64));
				1841	vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I64));
				1842	vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I64));
				1843	vassert(cc_op >= ARM64G_CC_OP_COPY && cc_op < ARM64G_CC_OP_NUMBER);
				1844	stmt( IRStmt_Put( OFFB_CC_OP, mkU64(cc_op) ));
				1845	stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
				1846	stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
				1847	stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
				1848	}
				1849
				1850	/* Build IR to set the flags thunk after ADD or SUB. */
				1851	static
				1852	void setFlags_ADD_SUB ( Bool is64, Bool isSUB, IRTemp argL, IRTemp argR )
				1853	{
				1854	IRTemp argL64 = IRTemp_INVALID;
				1855	IRTemp argR64 = IRTemp_INVALID;
				1856	IRTemp z64 = newTemp(Ity_I64);
				1857	if (is64) {
				1858	argL64 = argL;
				1859	argR64 = argR;
				1860	} else {
				1861	argL64 = newTemp(Ity_I64);
				1862	argR64 = newTemp(Ity_I64);
				1863	assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
				1864	assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
				1865	}
				1866	assign(z64, mkU64(0));
				1867	UInt cc_op = ARM64G_CC_OP_NUMBER;
				1868	/**/ if ( isSUB && is64) { cc_op = ARM64G_CC_OP_SUB64; }
				1869	else if ( isSUB && !is64) { cc_op = ARM64G_CC_OP_SUB32; }
				1870	else if (!isSUB && is64) { cc_op = ARM64G_CC_OP_ADD64; }
				1871	else if (!isSUB && !is64) { cc_op = ARM64G_CC_OP_ADD32; }
				1872	else { vassert(0); }
				1873	setFlags_D1_D2_ND(cc_op, argL64, argR64, z64);
				1874	}
				1875
sewardj	dee3050	2014-06-04 13:09:44 +0000	[diff] [blame]	1876	/* Build IR to set the flags thunk after ADC or SBC. */
				1877	static
				1878	void setFlags_ADC_SBC ( Bool is64, Bool isSBC,
				1879	IRTemp argL, IRTemp argR, IRTemp oldC )
				1880	{
				1881	IRTemp argL64 = IRTemp_INVALID;
				1882	IRTemp argR64 = IRTemp_INVALID;
				1883	IRTemp oldC64 = IRTemp_INVALID;
				1884	if (is64) {
				1885	argL64 = argL;
				1886	argR64 = argR;
				1887	oldC64 = oldC;
				1888	} else {
				1889	argL64 = newTemp(Ity_I64);
				1890	argR64 = newTemp(Ity_I64);
				1891	oldC64 = newTemp(Ity_I64);
				1892	assign(argL64, unop(Iop_32Uto64, mkexpr(argL)));
				1893	assign(argR64, unop(Iop_32Uto64, mkexpr(argR)));
				1894	assign(oldC64, unop(Iop_32Uto64, mkexpr(oldC)));
				1895	}
				1896	UInt cc_op = ARM64G_CC_OP_NUMBER;
				1897	/**/ if ( isSBC && is64) { cc_op = ARM64G_CC_OP_SBC64; }
				1898	else if ( isSBC && !is64) { cc_op = ARM64G_CC_OP_SBC32; }
				1899	else if (!isSBC && is64) { cc_op = ARM64G_CC_OP_ADC64; }
				1900	else if (!isSBC && !is64) { cc_op = ARM64G_CC_OP_ADC32; }
				1901	else { vassert(0); }
				1902	setFlags_D1_D2_ND(cc_op, argL64, argR64, oldC64);
				1903	}
				1904
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1905	/* Build IR to set the flags thunk after ADD or SUB, if the given
				1906	condition evaluates to True at run time. If not, the flags are set
				1907	to the specified NZCV value. */
				1908	static
				1909	void setFlags_ADD_SUB_conditionally (
				1910	Bool is64, Bool isSUB,
				1911	IRTemp cond, IRTemp argL, IRTemp argR, UInt nzcv
				1912	)
				1913	{
				1914	/* Generate IR as follows:
				1915	CC_OP = ITE(cond, OP_{ADD,SUB}{32,64}, OP_COPY)
				1916	CC_DEP1 = ITE(cond, argL64, nzcv << 28)
				1917	CC_DEP2 = ITE(cond, argR64, 0)
				1918	CC_NDEP = 0
				1919	*/
				1920
				1921	IRTemp z64 = newTemp(Ity_I64);
				1922	assign(z64, mkU64(0));
				1923
				1924	/* Establish the operation and operands for the True case. */
				1925	IRTemp t_dep1 = IRTemp_INVALID;
				1926	IRTemp t_dep2 = IRTemp_INVALID;
				1927	UInt t_op = ARM64G_CC_OP_NUMBER;
				1928	/**/ if ( isSUB && is64) { t_op = ARM64G_CC_OP_SUB64; }
				1929	else if ( isSUB && !is64) { t_op = ARM64G_CC_OP_SUB32; }
				1930	else if (!isSUB && is64) { t_op = ARM64G_CC_OP_ADD64; }
				1931	else if (!isSUB && !is64) { t_op = ARM64G_CC_OP_ADD32; }
				1932	else { vassert(0); }
				1933	/* */
				1934	if (is64) {
				1935	t_dep1 = argL;
				1936	t_dep2 = argR;
				1937	} else {
				1938	t_dep1 = newTemp(Ity_I64);
				1939	t_dep2 = newTemp(Ity_I64);
				1940	assign(t_dep1, unop(Iop_32Uto64, mkexpr(argL)));
				1941	assign(t_dep2, unop(Iop_32Uto64, mkexpr(argR)));
				1942	}
				1943
				1944	/* Establish the operation and operands for the False case. */
				1945	IRTemp f_dep1 = newTemp(Ity_I64);
				1946	IRTemp f_dep2 = z64;
				1947	UInt f_op = ARM64G_CC_OP_COPY;
				1948	assign(f_dep1, mkU64(nzcv << 28));
				1949
				1950	/* Final thunk values */
				1951	IRTemp dep1 = newTemp(Ity_I64);
				1952	IRTemp dep2 = newTemp(Ity_I64);
				1953	IRTemp op = newTemp(Ity_I64);
				1954
				1955	assign(op, IRExpr_ITE(mkexpr(cond), mkU64(t_op), mkU64(f_op)));
				1956	assign(dep1, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep1), mkexpr(f_dep1)));
				1957	assign(dep2, IRExpr_ITE(mkexpr(cond), mkexpr(t_dep2), mkexpr(f_dep2)));
				1958
				1959	/* finally .. */
				1960	stmt( IRStmt_Put( OFFB_CC_OP, mkexpr(op) ));
				1961	stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(dep1) ));
				1962	stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(dep2) ));
				1963	stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(z64) ));
				1964	}
				1965
				1966	/* Build IR to set the flags thunk after AND/OR/XOR or variants thereof. */
				1967	static
				1968	void setFlags_LOGIC ( Bool is64, IRTemp res )
				1969	{
				1970	IRTemp res64 = IRTemp_INVALID;
				1971	IRTemp z64 = newTemp(Ity_I64);
				1972	UInt cc_op = ARM64G_CC_OP_NUMBER;
				1973	if (is64) {
				1974	res64 = res;
				1975	cc_op = ARM64G_CC_OP_LOGIC64;
				1976	} else {
				1977	res64 = newTemp(Ity_I64);
				1978	assign(res64, unop(Iop_32Uto64, mkexpr(res)));
				1979	cc_op = ARM64G_CC_OP_LOGIC32;
				1980	}
				1981	assign(z64, mkU64(0));
				1982	setFlags_D1_D2_ND(cc_op, res64, z64, z64);
				1983	}
				1984
				1985	/* Build IR to set the flags thunk to a given NZCV value. NZCV is
				1986	located in bits 31:28 of the supplied value. */
				1987	static
				1988	void setFlags_COPY ( IRTemp nzcv_28x0 )
				1989	{
				1990	IRTemp z64 = newTemp(Ity_I64);
				1991	assign(z64, mkU64(0));
				1992	setFlags_D1_D2_ND(ARM64G_CC_OP_COPY, nzcv_28x0, z64, z64);
				1993	}
				1994
				1995
				1996	//ZZ /* Minor variant of the above that sets NDEP to zero (if it
				1997	//ZZ sets it at all) */
				1998	//ZZ static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
				1999	//ZZ IRTemp t_dep2,
				2000	//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
				2001	//ZZ {
				2002	//ZZ IRTemp z32 = newTemp(Ity_I32);
				2003	//ZZ assign( z32, mkU32(0) );
				2004	//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
				2005	//ZZ }
				2006	//ZZ
				2007	//ZZ
				2008	//ZZ /* Minor variant of the above that sets DEP2 to zero (if it
				2009	//ZZ sets it at all) */
				2010	//ZZ static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
				2011	//ZZ IRTemp t_ndep,
				2012	//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
				2013	//ZZ {
				2014	//ZZ IRTemp z32 = newTemp(Ity_I32);
				2015	//ZZ assign( z32, mkU32(0) );
				2016	//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
				2017	//ZZ }
				2018	//ZZ
				2019	//ZZ
				2020	//ZZ /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
				2021	//ZZ sets them at all) */
				2022	//ZZ static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
				2023	//ZZ IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
				2024	//ZZ {
				2025	//ZZ IRTemp z32 = newTemp(Ity_I32);
				2026	//ZZ assign( z32, mkU32(0) );
				2027	//ZZ setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
				2028	//ZZ }
				2029
				2030
				2031	/------------------------------------------------------------/
				2032	/--- Misc math helpers ---/
				2033	/------------------------------------------------------------/
				2034
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	2035	/* Generate IR for ((x & mask) >>u sh) \| ((x << sh) & mask) */
				2036	static IRTemp math_SWAPHELPER ( IRTemp x, ULong mask, Int sh )
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	2037	{
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	2038	IRTemp maskT = newTemp(Ity_I64);
				2039	IRTemp res = newTemp(Ity_I64);
				2040	vassert(sh >= 1 && sh <= 63);
				2041	assign(maskT, mkU64(mask));
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	2042	assign( res,
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	2043	binop(Iop_Or64,
				2044	binop(Iop_Shr64,
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	2045	binop(Iop_And64,mkexpr(x),mkexpr(maskT)),
				2046	mkU8(sh)),
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	2047	binop(Iop_And64,
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	2048	binop(Iop_Shl64,mkexpr(x),mkU8(sh)),
				2049	mkexpr(maskT))
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	2050	)
				2051	);
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	2052	return res;
				2053	}
				2054
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	2055	/* Generates byte swaps within 32-bit lanes. */
				2056	static IRTemp math_UINTSWAP64 ( IRTemp src )
				2057	{
				2058	IRTemp res;
				2059	res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
				2060	res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
				2061	return res;
				2062	}
				2063
				2064	/* Generates byte swaps within 16-bit lanes. */
				2065	static IRTemp math_USHORTSWAP64 ( IRTemp src )
				2066	{
				2067	IRTemp res;
				2068	res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
				2069	return res;
				2070	}
				2071
				2072	/* Generates a 64-bit byte swap. */
				2073	static IRTemp math_BYTESWAP64 ( IRTemp src )
				2074	{
				2075	IRTemp res;
				2076	res = math_SWAPHELPER(src, 0xFF00FF00FF00FF00ULL, 8);
				2077	res = math_SWAPHELPER(res, 0xFFFF0000FFFF0000ULL, 16);
				2078	res = math_SWAPHELPER(res, 0xFFFFFFFF00000000ULL, 32);
				2079	return res;
				2080	}
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	2081
				2082	/* Generates a 64-bit bit swap. */
				2083	static IRTemp math_BITSWAP64 ( IRTemp src )
				2084	{
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	2085	IRTemp res;
				2086	res = math_SWAPHELPER(src, 0xAAAAAAAAAAAAAAAAULL, 1);
				2087	res = math_SWAPHELPER(res, 0xCCCCCCCCCCCCCCCCULL, 2);
				2088	res = math_SWAPHELPER(res, 0xF0F0F0F0F0F0F0F0ULL, 4);
				2089	return math_BYTESWAP64(res);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	2090	}
				2091
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	2092	/* Duplicates the bits at the bottom of the given word to fill the
				2093	whole word. src :: Ity_I64 is assumed to have zeroes everywhere
				2094	except for the bottom bits. */
				2095	static IRTemp math_DUP_TO_64 ( IRTemp src, IRType srcTy )
				2096	{
				2097	if (srcTy == Ity_I8) {
				2098	IRTemp t16 = newTemp(Ity_I64);
				2099	assign(t16, binop(Iop_Or64, mkexpr(src),
				2100	binop(Iop_Shl64, mkexpr(src), mkU8(8))));
				2101	IRTemp t32 = newTemp(Ity_I64);
				2102	assign(t32, binop(Iop_Or64, mkexpr(t16),
				2103	binop(Iop_Shl64, mkexpr(t16), mkU8(16))));
				2104	IRTemp t64 = newTemp(Ity_I64);
				2105	assign(t64, binop(Iop_Or64, mkexpr(t32),
				2106	binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
				2107	return t64;
				2108	}
				2109	if (srcTy == Ity_I16) {
				2110	IRTemp t32 = newTemp(Ity_I64);
				2111	assign(t32, binop(Iop_Or64, mkexpr(src),
				2112	binop(Iop_Shl64, mkexpr(src), mkU8(16))));
				2113	IRTemp t64 = newTemp(Ity_I64);
				2114	assign(t64, binop(Iop_Or64, mkexpr(t32),
				2115	binop(Iop_Shl64, mkexpr(t32), mkU8(32))));
				2116	return t64;
				2117	}
				2118	if (srcTy == Ity_I32) {
				2119	IRTemp t64 = newTemp(Ity_I64);
				2120	assign(t64, binop(Iop_Or64, mkexpr(src),
				2121	binop(Iop_Shl64, mkexpr(src), mkU8(32))));
				2122	return t64;
				2123	}
				2124	if (srcTy == Ity_I64) {
				2125	return src;
				2126	}
				2127	vassert(0);
				2128	}
				2129
				2130
sewardj	18bf517	2014-06-14 18:05:30 +0000	[diff] [blame]	2131	/* Duplicates the src element exactly so as to fill a V128 value. */
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	2132	static IRTemp math_DUP_TO_V128 ( IRTemp src, IRType srcTy )
				2133	{
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	2134	IRTemp res = newTempV128();
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	2135	if (srcTy == Ity_F64) {
				2136	IRTemp i64 = newTemp(Ity_I64);
				2137	assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(src)));
				2138	assign(res, binop(Iop_64HLtoV128, mkexpr(i64), mkexpr(i64)));
				2139	return res;
				2140	}
				2141	if (srcTy == Ity_F32) {
				2142	IRTemp i64a = newTemp(Ity_I64);
				2143	assign(i64a, unop(Iop_32Uto64, unop(Iop_ReinterpF32asI32, mkexpr(src))));
				2144	IRTemp i64b = newTemp(Ity_I64);
				2145	assign(i64b, binop(Iop_Or64, binop(Iop_Shl64, mkexpr(i64a), mkU8(32)),
				2146	mkexpr(i64a)));
				2147	assign(res, binop(Iop_64HLtoV128, mkexpr(i64b), mkexpr(i64b)));
				2148	return res;
				2149	}
sewardj	18bf517	2014-06-14 18:05:30 +0000	[diff] [blame]	2150	if (srcTy == Ity_I64) {
				2151	assign(res, binop(Iop_64HLtoV128, mkexpr(src), mkexpr(src)));
				2152	return res;
				2153	}
				2154	if (srcTy == Ity_I32 \|\| srcTy == Ity_I16 \|\| srcTy == Ity_I8) {
				2155	IRTemp t1 = newTemp(Ity_I64);
				2156	assign(t1, widenUto64(srcTy, mkexpr(src)));
				2157	IRTemp t2 = math_DUP_TO_64(t1, srcTy);
				2158	assign(res, binop(Iop_64HLtoV128, mkexpr(t2), mkexpr(t2)));
				2159	return res;
				2160	}
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	2161	vassert(0);
				2162	}
				2163
				2164
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	2165	/* \|fullWidth\| is a full V128 width result. Depending on bitQ,
				2166	zero out the upper half. */
				2167	static IRExpr* math_MAYBE_ZERO_HI64 ( UInt bitQ, IRTemp fullWidth )
				2168	{
				2169	if (bitQ == 1) return mkexpr(fullWidth);
				2170	if (bitQ == 0) return unop(Iop_ZeroHI64ofV128, mkexpr(fullWidth));
				2171	vassert(0);
				2172	}
				2173
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	2174	/* The same, but from an expression instead. */
				2175	static IRExpr* math_MAYBE_ZERO_HI64_fromE ( UInt bitQ, IRExpr* fullWidth )
				2176	{
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	2177	IRTemp fullWidthT = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	2178	assign(fullWidthT, fullWidth);
				2179	return math_MAYBE_ZERO_HI64(bitQ, fullWidthT);
				2180	}
				2181
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	2182
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	2183	/------------------------------------------------------------/
				2184	/--- FP comparison helpers ---/
				2185	/------------------------------------------------------------/
				2186
				2187	/* irRes :: Ity_I32 holds a floating point comparison result encoded
				2188	as an IRCmpF64Result. Generate code to convert it to an
				2189	ARM64-encoded (N,Z,C,V) group in the lowest 4 bits of an I64 value.
				2190	Assign a new temp to hold that value, and return the temp. */
				2191	static
				2192	IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes32 )
				2193	{
				2194	IRTemp ix = newTemp(Ity_I64);
				2195	IRTemp termL = newTemp(Ity_I64);
				2196	IRTemp termR = newTemp(Ity_I64);
				2197	IRTemp nzcv = newTemp(Ity_I64);
				2198	IRTemp irRes = newTemp(Ity_I64);
				2199
				2200	/* This is where the fun starts. We have to convert 'irRes' from
				2201	an IR-convention return result (IRCmpF64Result) to an
				2202	ARM-encoded (N,Z,C,V) group. The final result is in the bottom
				2203	4 bits of 'nzcv'. */
				2204	/* Map compare result from IR to ARM(nzcv) */
				2205	/*
				2206	FP cmp result \| IR \| ARM(nzcv)
				2207	--------------------------------
				2208	UN 0x45 0011
				2209	LT 0x01 1000
				2210	GT 0x00 0010
				2211	EQ 0x40 0110
				2212	*/
				2213	/* Now since you're probably wondering WTF ..
				2214
				2215	ix fishes the useful bits out of the IR value, bits 6 and 0, and
				2216	places them side by side, giving a number which is 0, 1, 2 or 3.
				2217
				2218	termL is a sequence cooked up by GNU superopt. It converts ix
				2219	into an almost correct value NZCV value (incredibly), except
				2220	for the case of UN, where it produces 0100 instead of the
				2221	required 0011.
				2222
				2223	termR is therefore a correction term, also computed from ix. It
				2224	is 1 in the UN case and 0 for LT, GT and UN. Hence, to get
				2225	the final correct value, we subtract termR from termL.
				2226
				2227	Don't take my word for it. There's a test program at the bottom
				2228	of guest_arm_toIR.c, to try this out with.
				2229	*/
				2230	assign(irRes, unop(Iop_32Uto64, mkexpr(irRes32)));
				2231
				2232	assign(
				2233	ix,
				2234	binop(Iop_Or64,
				2235	binop(Iop_And64,
				2236	binop(Iop_Shr64, mkexpr(irRes), mkU8(5)),
				2237	mkU64(3)),
				2238	binop(Iop_And64, mkexpr(irRes), mkU64(1))));
				2239
				2240	assign(
				2241	termL,
				2242	binop(Iop_Add64,
				2243	binop(Iop_Shr64,
				2244	binop(Iop_Sub64,
				2245	binop(Iop_Shl64,
				2246	binop(Iop_Xor64, mkexpr(ix), mkU64(1)),
				2247	mkU8(62)),
				2248	mkU64(1)),
				2249	mkU8(61)),
				2250	mkU64(1)));
				2251
				2252	assign(
				2253	termR,
				2254	binop(Iop_And64,
				2255	binop(Iop_And64,
				2256	mkexpr(ix),
				2257	binop(Iop_Shr64, mkexpr(ix), mkU8(1))),
				2258	mkU64(1)));
				2259
				2260	assign(nzcv, binop(Iop_Sub64, mkexpr(termL), mkexpr(termR)));
				2261	return nzcv;
				2262	}
				2263
				2264
				2265	/------------------------------------------------------------/
				2266	/--- Data processing (immediate) ---/
				2267	/------------------------------------------------------------/
				2268
				2269	/* Helper functions for supporting "DecodeBitMasks" */
				2270
				2271	static ULong dbm_ROR ( Int width, ULong x, Int rot )
				2272	{
				2273	vassert(width > 0 && width <= 64);
				2274	vassert(rot >= 0 && rot < width);
				2275	if (rot == 0) return x;
				2276	ULong res = x >> rot;
				2277	res \|= (x << (width - rot));
				2278	if (width < 64)
				2279	res &= ((1ULL << width) - 1);
				2280	return res;
				2281	}
				2282
				2283	static ULong dbm_RepTo64( Int esize, ULong x )
				2284	{
				2285	switch (esize) {
				2286	case 64:
				2287	return x;
				2288	case 32:
				2289	x &= 0xFFFFFFFF; x \|= (x << 32);
				2290	return x;
				2291	case 16:
				2292	x &= 0xFFFF; x \|= (x << 16); x \|= (x << 32);
				2293	return x;
				2294	case 8:
				2295	x &= 0xFF; x \|= (x << 8); x \|= (x << 16); x \|= (x << 32);
				2296	return x;
				2297	case 4:
				2298	x &= 0xF; x \|= (x << 4); x \|= (x << 8);
				2299	x \|= (x << 16); x \|= (x << 32);
				2300	return x;
				2301	case 2:
				2302	x &= 0x3; x \|= (x << 2); x \|= (x << 4); x \|= (x << 8);
				2303	x \|= (x << 16); x \|= (x << 32);
				2304	return x;
				2305	default:
				2306	break;
				2307	}
				2308	vpanic("dbm_RepTo64");
				2309	/NOTREACHED/
				2310	return 0;
				2311	}
				2312
				2313	static Int dbm_highestSetBit ( ULong x )
				2314	{
				2315	Int i;
				2316	for (i = 63; i >= 0; i--) {
				2317	if (x & (1ULL << i))
				2318	return i;
				2319	}
				2320	vassert(x == 0);
				2321	return -1;
				2322	}
				2323
				2324	static
				2325	Bool dbm_DecodeBitMasks ( /OUT/ULong* wmask, /OUT/ULong* tmask,
				2326	ULong immN, ULong imms, ULong immr, Bool immediate,
				2327	UInt M /32 or 64/)
				2328	{
				2329	vassert(immN < (1ULL << 1));
				2330	vassert(imms < (1ULL << 6));
				2331	vassert(immr < (1ULL << 6));
				2332	vassert(immediate == False \|\| immediate == True);
				2333	vassert(M == 32 \|\| M == 64);
				2334
				2335	Int len = dbm_highestSetBit( ((immN << 6) & 64) \| ((~imms) & 63) );
				2336	if (len < 1) { /* printf("fail1\n"); */ return False; }
				2337	vassert(len <= 6);
				2338	vassert(M >= (1 << len));
				2339
				2340	vassert(len >= 1 && len <= 6);
				2341	ULong levels = // (zeroes(6 - len) << (6-len)) \| ones(len);
				2342	(1 << len) - 1;
				2343	vassert(levels >= 1 && levels <= 63);
				2344
				2345	if (immediate && ((imms & levels) == levels)) {
				2346	/* printf("fail2 imms %llu levels %llu len %d\n", imms, levels, len); */
				2347	return False;
				2348	}
				2349
				2350	ULong S = imms & levels;
				2351	ULong R = immr & levels;
				2352	Int diff = S - R;
				2353	diff &= 63;
				2354	Int esize = 1 << len;
				2355	vassert(2 <= esize && esize <= 64);
				2356
				2357	/* Be careful of these (1ULL << (S+1)) - 1 expressions, and the
				2358	same below with d. S can be 63 in which case we have an out of
				2359	range and hence undefined shift. */
				2360	vassert(S >= 0 && S <= 63);
				2361	vassert(esize >= (S+1));
				2362	ULong elem_s = // Zeroes(esize-(S+1)):Ones(S+1)
				2363	//(1ULL << (S+1)) - 1;
				2364	((1ULL << S) - 1) + (1ULL << S);
				2365
				2366	Int d = // diff<len-1:0>
				2367	diff & ((1 << len)-1);
				2368	vassert(esize >= (d+1));
				2369	vassert(d >= 0 && d <= 63);
				2370
				2371	ULong elem_d = // Zeroes(esize-(d+1)):Ones(d+1)
				2372	//(1ULL << (d+1)) - 1;
				2373	((1ULL << d) - 1) + (1ULL << d);
				2374
				2375	if (esize != 64) vassert(elem_s < (1ULL << esize));
				2376	if (esize != 64) vassert(elem_d < (1ULL << esize));
				2377
				2378	if (wmask) *wmask = dbm_RepTo64(esize, dbm_ROR(esize, elem_s, R));
				2379	if (tmask) *tmask = dbm_RepTo64(esize, elem_d);
				2380
				2381	return True;
				2382	}
				2383
				2384
				2385	static
				2386	Bool dis_ARM64_data_processing_immediate(/MB_OUT/DisResult* dres,
				2387	UInt insn)
				2388	{
				2389	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				2390
				2391	/* insn[28:23]
				2392	10000x PC-rel addressing
				2393	10001x Add/subtract (immediate)
				2394	100100 Logical (immediate)
				2395	100101 Move Wide (immediate)
				2396	100110 Bitfield
				2397	100111 Extract
				2398	*/
				2399
				2400	/* ------------------ ADD/SUB{,S} imm12 ------------------ */
				2401	if (INSN(28,24) == BITS5(1,0,0,0,1)) {
				2402	Bool is64 = INSN(31,31) == 1;
				2403	Bool isSub = INSN(30,30) == 1;
				2404	Bool setCC = INSN(29,29) == 1;
				2405	UInt sh = INSN(23,22);
				2406	UInt uimm12 = INSN(21,10);
				2407	UInt nn = INSN(9,5);
				2408	UInt dd = INSN(4,0);
				2409	const HChar* nm = isSub ? "sub" : "add";
				2410	if (sh >= 2) {
				2411	/* Invalid; fall through */
				2412	} else {
				2413	vassert(sh <= 1);
				2414	uimm12 <<= (12 * sh);
				2415	if (is64) {
				2416	IRTemp argL = newTemp(Ity_I64);
				2417	IRTemp argR = newTemp(Ity_I64);
				2418	IRTemp res = newTemp(Ity_I64);
				2419	assign(argL, getIReg64orSP(nn));
				2420	assign(argR, mkU64(uimm12));
				2421	assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
				2422	mkexpr(argL), mkexpr(argR)));
				2423	if (setCC) {
				2424	putIReg64orZR(dd, mkexpr(res));
				2425	setFlags_ADD_SUB(True/is64/, isSub, argL, argR);
				2426	DIP("%ss %s, %s, 0x%x\n",
				2427	nm, nameIReg64orZR(dd), nameIReg64orSP(nn), uimm12);
				2428	} else {
				2429	putIReg64orSP(dd, mkexpr(res));
				2430	DIP("%s %s, %s, 0x%x\n",
				2431	nm, nameIReg64orSP(dd), nameIReg64orSP(nn), uimm12);
				2432	}
				2433	} else {
				2434	IRTemp argL = newTemp(Ity_I32);
				2435	IRTemp argR = newTemp(Ity_I32);
				2436	IRTemp res = newTemp(Ity_I32);
				2437	assign(argL, getIReg32orSP(nn));
				2438	assign(argR, mkU32(uimm12));
				2439	assign(res, binop(isSub ? Iop_Sub32 : Iop_Add32,
				2440	mkexpr(argL), mkexpr(argR)));
				2441	if (setCC) {
				2442	putIReg32orZR(dd, mkexpr(res));
				2443	setFlags_ADD_SUB(False/!is64/, isSub, argL, argR);
				2444	DIP("%ss %s, %s, 0x%x\n",
				2445	nm, nameIReg32orZR(dd), nameIReg32orSP(nn), uimm12);
				2446	} else {
				2447	putIReg32orSP(dd, mkexpr(res));
				2448	DIP("%s %s, %s, 0x%x\n",
				2449	nm, nameIReg32orSP(dd), nameIReg32orSP(nn), uimm12);
				2450	}
				2451	}
				2452	return True;
				2453	}
				2454	}
				2455
				2456	/* -------------------- ADR/ADRP -------------------- */
				2457	if (INSN(28,24) == BITS5(1,0,0,0,0)) {
				2458	UInt bP = INSN(31,31);
				2459	UInt immLo = INSN(30,29);
				2460	UInt immHi = INSN(23,5);
				2461	UInt rD = INSN(4,0);
				2462	ULong uimm = (immHi << 2) \| immLo;
				2463	ULong simm = sx_to_64(uimm, 21);
				2464	ULong val;
				2465	if (bP) {
				2466	val = (guest_PC_curr_instr & 0xFFFFFFFFFFFFF000ULL) + (simm << 12);
				2467	} else {
				2468	val = guest_PC_curr_instr + simm;
				2469	}
				2470	putIReg64orZR(rD, mkU64(val));
				2471	DIP("adr%s %s, 0x%llx\n", bP ? "p" : "", nameIReg64orZR(rD), val);
				2472	return True;
				2473	}
				2474
				2475	/* -------------------- LOGIC(imm) -------------------- */
				2476	if (INSN(28,23) == BITS6(1,0,0,1,0,0)) {
				2477	/* 31 30 28 22 21 15 9 4
				2478	sf op 100100 N immr imms Rn Rd
				2479	op=00: AND Rd\|SP, Rn, #imm
				2480	op=01: ORR Rd\|SP, Rn, #imm
				2481	op=10: EOR Rd\|SP, Rn, #imm
				2482	op=11: ANDS Rd\|ZR, Rn, #imm
				2483	*/
				2484	Bool is64 = INSN(31,31) == 1;
				2485	UInt op = INSN(30,29);
				2486	UInt N = INSN(22,22);
				2487	UInt immR = INSN(21,16);
				2488	UInt immS = INSN(15,10);
				2489	UInt nn = INSN(9,5);
				2490	UInt dd = INSN(4,0);
				2491	ULong imm = 0;
				2492	Bool ok;
				2493	if (N == 1 && !is64)
				2494	goto after_logic_imm; /* not allowed; fall through */
				2495	ok = dbm_DecodeBitMasks(&imm, NULL,
				2496	N, immS, immR, True, is64 ? 64 : 32);
				2497	if (!ok)
				2498	goto after_logic_imm;
				2499
				2500	const HChar* names[4] = { "and", "orr", "eor", "ands" };
				2501	const IROp ops64[4] = { Iop_And64, Iop_Or64, Iop_Xor64, Iop_And64 };
				2502	const IROp ops32[4] = { Iop_And32, Iop_Or32, Iop_Xor32, Iop_And32 };
				2503
				2504	vassert(op < 4);
				2505	if (is64) {
				2506	IRExpr* argL = getIReg64orZR(nn);
				2507	IRExpr* argR = mkU64(imm);
				2508	IRTemp res = newTemp(Ity_I64);
				2509	assign(res, binop(ops64[op], argL, argR));
				2510	if (op < 3) {
				2511	putIReg64orSP(dd, mkexpr(res));
				2512	DIP("%s %s, %s, 0x%llx\n", names[op],
				2513	nameIReg64orSP(dd), nameIReg64orZR(nn), imm);
				2514	} else {
				2515	putIReg64orZR(dd, mkexpr(res));
				2516	setFlags_LOGIC(True/is64/, res);
				2517	DIP("%s %s, %s, 0x%llx\n", names[op],
				2518	nameIReg64orZR(dd), nameIReg64orZR(nn), imm);
				2519	}
				2520	} else {
				2521	IRExpr* argL = getIReg32orZR(nn);
				2522	IRExpr* argR = mkU32((UInt)imm);
				2523	IRTemp res = newTemp(Ity_I32);
				2524	assign(res, binop(ops32[op], argL, argR));
				2525	if (op < 3) {
				2526	putIReg32orSP(dd, mkexpr(res));
				2527	DIP("%s %s, %s, 0x%x\n", names[op],
				2528	nameIReg32orSP(dd), nameIReg32orZR(nn), (UInt)imm);
				2529	} else {
				2530	putIReg32orZR(dd, mkexpr(res));
				2531	setFlags_LOGIC(False/!is64/, res);
				2532	DIP("%s %s, %s, 0x%x\n", names[op],
				2533	nameIReg32orZR(dd), nameIReg32orZR(nn), (UInt)imm);
				2534	}
				2535	}
				2536	return True;
				2537	}
				2538	after_logic_imm:
				2539
				2540	/* -------------------- MOV{Z,N,K} -------------------- */
				2541	if (INSN(28,23) == BITS6(1,0,0,1,0,1)) {
				2542	/* 31 30 28 22 20 4
				2543	\| \| \| \| \| \|
				2544	sf 10 100 101 hw imm16 Rd MOV(Z) Rd, (imm16 << (16*hw))
				2545	sf 00 100 101 hw imm16 Rd MOV(N) Rd, ~(imm16 << (16*hw))
				2546	sf 11 100 101 hw imm16 Rd MOV(K) Rd, (imm16 << (16*hw))
				2547	*/
				2548	Bool is64 = INSN(31,31) == 1;
				2549	UInt subopc = INSN(30,29);
				2550	UInt hw = INSN(22,21);
				2551	UInt imm16 = INSN(20,5);
				2552	UInt dd = INSN(4,0);
				2553	if (subopc == BITS2(0,1) \|\| (!is64 && hw >= 2)) {
				2554	/* invalid; fall through */
				2555	} else {
				2556	ULong imm64 = ((ULong)imm16) << (16 * hw);
				2557	if (!is64)
				2558	vassert(imm64 < 0x100000000ULL);
				2559	switch (subopc) {
				2560	case BITS2(1,0): // MOVZ
				2561	putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
				2562	DIP("movz %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
				2563	break;
				2564	case BITS2(0,0): // MOVN
				2565	imm64 = ~imm64;
				2566	if (!is64)
				2567	imm64 &= 0xFFFFFFFFULL;
				2568	putIRegOrZR(is64, dd, is64 ? mkU64(imm64) : mkU32((UInt)imm64));
				2569	DIP("movn %s, 0x%llx\n", nameIRegOrZR(is64, dd), imm64);
				2570	break;
				2571	case BITS2(1,1): // MOVK
				2572	/* This is more complex. We are inserting a slice into
				2573	the destination register, so we need to have the old
				2574	value of it. */
				2575	if (is64) {
				2576	IRTemp old = newTemp(Ity_I64);
				2577	assign(old, getIReg64orZR(dd));
				2578	ULong mask = 0xFFFFULL << (16 * hw);
				2579	IRExpr* res
				2580	= binop(Iop_Or64,
				2581	binop(Iop_And64, mkexpr(old), mkU64(~mask)),
				2582	mkU64(imm64));
				2583	putIReg64orZR(dd, res);
				2584	DIP("movk %s, 0x%x, lsl %u\n",
				2585	nameIReg64orZR(dd), imm16, 16*hw);
				2586	} else {
				2587	IRTemp old = newTemp(Ity_I32);
				2588	assign(old, getIReg32orZR(dd));
				2589	vassert(hw <= 1);
				2590	UInt mask = 0xFFFF << (16 * hw);
				2591	IRExpr* res
				2592	= binop(Iop_Or32,
				2593	binop(Iop_And32, mkexpr(old), mkU32(~mask)),
				2594	mkU32((UInt)imm64));
				2595	putIReg32orZR(dd, res);
				2596	DIP("movk %s, 0x%x, lsl %u\n",
				2597	nameIReg32orZR(dd), imm16, 16*hw);
				2598	}
				2599	break;
				2600	default:
				2601	vassert(0);
				2602	}
				2603	return True;
				2604	}
				2605	}
				2606
				2607	/* -------------------- {U,S,}BFM -------------------- */
				2608	/* 30 28 22 21 15 9 4
				2609
				2610	sf 10 100110 N immr imms nn dd
				2611	UBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
				2612	UBFM Xd, Xn, #immr, #imms when sf=1, N=1
				2613
				2614	sf 00 100110 N immr imms nn dd
				2615	SBFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
				2616	SBFM Xd, Xn, #immr, #imms when sf=1, N=1
				2617
				2618	sf 01 100110 N immr imms nn dd
				2619	BFM Wd, Wn, #immr, #imms when sf=0, N=0, immr[5]=0, imms[5]=0
				2620	BFM Xd, Xn, #immr, #imms when sf=1, N=1
				2621	*/
				2622	if (INSN(28,23) == BITS6(1,0,0,1,1,0)) {
				2623	UInt sf = INSN(31,31);
				2624	UInt opc = INSN(30,29);
				2625	UInt N = INSN(22,22);
				2626	UInt immR = INSN(21,16);
				2627	UInt immS = INSN(15,10);
				2628	UInt nn = INSN(9,5);
				2629	UInt dd = INSN(4,0);
				2630	Bool inZero = False;
				2631	Bool extend = False;
				2632	const HChar* nm = "???";
				2633	/* skip invalid combinations */
				2634	switch (opc) {
				2635	case BITS2(0,0):
				2636	inZero = True; extend = True; nm = "sbfm"; break;
				2637	case BITS2(0,1):
				2638	inZero = False; extend = False; nm = "bfm"; break;
				2639	case BITS2(1,0):
				2640	inZero = True; extend = False; nm = "ubfm"; break;
				2641	case BITS2(1,1):
				2642	goto after_bfm; /* invalid */
				2643	default:
				2644	vassert(0);
				2645	}
				2646	if (sf == 1 && N != 1) goto after_bfm;
				2647	if (sf == 0 && (N != 0 \|\| ((immR >> 5) & 1) != 0
				2648	\|\| ((immS >> 5) & 1) != 0)) goto after_bfm;
				2649	ULong wmask = 0, tmask = 0;
				2650	Bool ok = dbm_DecodeBitMasks(&wmask, &tmask,
				2651	N, immS, immR, False, sf == 1 ? 64 : 32);
				2652	if (!ok) goto after_bfm; /* hmmm */
				2653
				2654	Bool is64 = sf == 1;
				2655	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2656
				2657	IRTemp dst = newTemp(ty);
				2658	IRTemp src = newTemp(ty);
				2659	IRTemp bot = newTemp(ty);
				2660	IRTemp top = newTemp(ty);
				2661	IRTemp res = newTemp(ty);
				2662	assign(dst, inZero ? mkU(ty,0) : getIRegOrZR(is64, dd));
				2663	assign(src, getIRegOrZR(is64, nn));
				2664	/* perform bitfield move on low bits */
				2665	assign(bot, binop(mkOR(ty),
				2666	binop(mkAND(ty), mkexpr(dst), mkU(ty, ~wmask)),
				2667	binop(mkAND(ty), mkexpr(mathROR(ty, src, immR)),
				2668	mkU(ty, wmask))));
				2669	/* determine extension bits (sign, zero or dest register) */
				2670	assign(top, mkexpr(extend ? mathREPLICATE(ty, src, immS) : dst));
				2671	/* combine extension bits and result bits */
				2672	assign(res, binop(mkOR(ty),
				2673	binop(mkAND(ty), mkexpr(top), mkU(ty, ~tmask)),
				2674	binop(mkAND(ty), mkexpr(bot), mkU(ty, tmask))));
				2675	putIRegOrZR(is64, dd, mkexpr(res));
				2676	DIP("%s %s, %s, immR=%u, immS=%u\n",
				2677	nm, nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn), immR, immS);
				2678	return True;
				2679	}
				2680	after_bfm:
				2681
				2682	/* ---------------------- EXTR ---------------------- */
				2683	/* 30 28 22 20 15 9 4
				2684	1 00 100111 10 m imm6 n d EXTR Xd, Xn, Xm, #imm6
				2685	0 00 100111 00 m imm6 n d EXTR Wd, Wn, Wm, #imm6 when #imm6 < 32
				2686	*/
				2687	if (INSN(30,23) == BITS8(0,0,1,0,0,1,1,1) && INSN(21,21) == 0) {
				2688	Bool is64 = INSN(31,31) == 1;
				2689	UInt mm = INSN(20,16);
				2690	UInt imm6 = INSN(15,10);
				2691	UInt nn = INSN(9,5);
				2692	UInt dd = INSN(4,0);
				2693	Bool valid = True;
				2694	if (INSN(31,31) != INSN(22,22))
				2695	valid = False;
				2696	if (!is64 && imm6 >= 32)
				2697	valid = False;
				2698	if (!valid) goto after_extr;
				2699	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2700	IRTemp srcHi = newTemp(ty);
				2701	IRTemp srcLo = newTemp(ty);
				2702	IRTemp res = newTemp(ty);
				2703	assign(srcHi, getIRegOrZR(is64, nn));
				2704	assign(srcLo, getIRegOrZR(is64, mm));
				2705	if (imm6 == 0) {
				2706	assign(res, mkexpr(srcLo));
				2707	} else {
				2708	UInt szBits = 8 * sizeofIRType(ty);
				2709	vassert(imm6 > 0 && imm6 < szBits);
				2710	assign(res, binop(mkOR(ty),
				2711	binop(mkSHL(ty), mkexpr(srcHi), mkU8(szBits-imm6)),
				2712	binop(mkSHR(ty), mkexpr(srcLo), mkU8(imm6))));
				2713	}
				2714	putIRegOrZR(is64, dd, mkexpr(res));
				2715	DIP("extr %s, %s, %s, #%u\n",
				2716	nameIRegOrZR(is64,dd),
				2717	nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm), imm6);
				2718	return True;
				2719	}
				2720	after_extr:
				2721
				2722	vex_printf("ARM64 front end: data_processing_immediate\n");
				2723	return False;
				2724	# undef INSN
				2725	}
				2726
				2727
				2728	/------------------------------------------------------------/
				2729	/--- Data processing (register) instructions ---/
				2730	/------------------------------------------------------------/
				2731
				2732	static const HChar* nameSH ( UInt sh ) {
				2733	switch (sh) {
				2734	case 0: return "lsl";
				2735	case 1: return "lsr";
				2736	case 2: return "asr";
				2737	case 3: return "ror";
				2738	default: vassert(0);
				2739	}
				2740	}
				2741
				2742	/* Generate IR to get a register value, possibly shifted by an
				2743	immediate. Returns either a 32- or 64-bit temporary holding the
				2744	result. After the shift, the value can optionally be NOT-ed
				2745	too.
				2746
				2747	sh_how coding: 00=SHL, 01=SHR, 10=SAR, 11=ROR. sh_amt may only be
				2748	in the range 0 to (is64 ? 64 : 32)-1. For some instructions, ROR
				2749	isn't allowed, but it's the job of the caller to check that.
				2750	*/
				2751	static IRTemp getShiftedIRegOrZR ( Bool is64,
				2752	UInt sh_how, UInt sh_amt, UInt regNo,
				2753	Bool invert )
				2754	{
				2755	vassert(sh_how < 4);
				2756	vassert(sh_amt < (is64 ? 64 : 32));
				2757	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2758	IRTemp t0 = newTemp(ty);
				2759	assign(t0, getIRegOrZR(is64, regNo));
				2760	IRTemp t1 = newTemp(ty);
				2761	switch (sh_how) {
				2762	case BITS2(0,0):
				2763	assign(t1, binop(mkSHL(ty), mkexpr(t0), mkU8(sh_amt)));
				2764	break;
				2765	case BITS2(0,1):
				2766	assign(t1, binop(mkSHR(ty), mkexpr(t0), mkU8(sh_amt)));
				2767	break;
				2768	case BITS2(1,0):
				2769	assign(t1, binop(mkSAR(ty), mkexpr(t0), mkU8(sh_amt)));
				2770	break;
				2771	case BITS2(1,1):
				2772	assign(t1, mkexpr(mathROR(ty, t0, sh_amt)));
				2773	break;
				2774	default:
				2775	vassert(0);
				2776	}
				2777	if (invert) {
				2778	IRTemp t2 = newTemp(ty);
				2779	assign(t2, unop(mkNOT(ty), mkexpr(t1)));
				2780	return t2;
				2781	} else {
				2782	return t1;
				2783	}
				2784	}
				2785
				2786
				2787	static
				2788	Bool dis_ARM64_data_processing_register(/MB_OUT/DisResult* dres,
				2789	UInt insn)
				2790	{
				2791	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				2792
				2793	/* ------------------- ADD/SUB(reg) ------------------- */
				2794	/* x==0 => 32 bit op x==1 => 64 bit op
				2795	sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR(NOT ALLOWED)
				2796
				2797	31 30 29 28 23 21 20 15 9 4
				2798	\| \| \| \| \| \| \| \| \| \|
				2799	x 0 0 01011 sh 0 Rm imm6 Rn Rd ADD Rd,Rn, sh(Rm,imm6)
				2800	x 0 1 01011 sh 0 Rm imm6 Rn Rd ADDS Rd,Rn, sh(Rm,imm6)
				2801	x 1 0 01011 sh 0 Rm imm6 Rn Rd SUB Rd,Rn, sh(Rm,imm6)
				2802	x 1 1 01011 sh 0 Rm imm6 Rn Rd SUBS Rd,Rn, sh(Rm,imm6)
				2803	*/
				2804	if (INSN(28,24) == BITS5(0,1,0,1,1) && INSN(21,21) == 0) {
				2805	UInt bX = INSN(31,31);
				2806	UInt bOP = INSN(30,30); /* 0: ADD, 1: SUB */
				2807	UInt bS = INSN(29, 29); /* set flags? */
				2808	UInt sh = INSN(23,22);
				2809	UInt rM = INSN(20,16);
				2810	UInt imm6 = INSN(15,10);
				2811	UInt rN = INSN(9,5);
				2812	UInt rD = INSN(4,0);
				2813	Bool isSUB = bOP == 1;
				2814	Bool is64 = bX == 1;
				2815	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2816	if ((!is64 && imm6 > 31) \|\| sh == BITS2(1,1)) {
				2817	/* invalid; fall through */
				2818	} else {
				2819	IRTemp argL = newTemp(ty);
				2820	assign(argL, getIRegOrZR(is64, rN));
				2821	IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, False);
				2822	IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
				2823	IRTemp res = newTemp(ty);
				2824	assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
				2825	if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
				2826	if (bS) {
				2827	setFlags_ADD_SUB(is64, isSUB, argL, argR);
				2828	}
				2829	DIP("%s%s %s, %s, %s, %s #%u\n",
				2830	bOP ? "sub" : "add", bS ? "s" : "",
				2831	nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
				2832	nameIRegOrZR(is64, rM), nameSH(sh), imm6);
				2833	return True;
				2834	}
				2835	}
				2836
sewardj	dee3050	2014-06-04 13:09:44 +0000	[diff] [blame]	2837	/* ------------------- ADC/SBC(reg) ------------------- */
				2838	/* x==0 => 32 bit op x==1 => 64 bit op
				2839
				2840	31 30 29 28 23 21 20 15 9 4
				2841	\| \| \| \| \| \| \| \| \| \|
				2842	x 0 0 11010 00 0 Rm 000000 Rn Rd ADC Rd,Rn,Rm
				2843	x 0 1 11010 00 0 Rm 000000 Rn Rd ADCS Rd,Rn,Rm
				2844	x 1 0 11010 00 0 Rm 000000 Rn Rd SBC Rd,Rn,Rm
				2845	x 1 1 11010 00 0 Rm 000000 Rn Rd SBCS Rd,Rn,Rm
				2846	*/
				2847
				2848	if (INSN(28,21) == BITS8(1,1,0,1,0,0,0,0) && INSN(15,10) == 0 ) {
				2849	UInt bX = INSN(31,31);
				2850	UInt bOP = INSN(30,30); /* 0: ADC, 1: SBC */
				2851	UInt bS = INSN(29,29); /* set flags */
				2852	UInt rM = INSN(20,16);
				2853	UInt rN = INSN(9,5);
				2854	UInt rD = INSN(4,0);
				2855
				2856	Bool isSUB = bOP == 1;
				2857	Bool is64 = bX == 1;
				2858	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2859
				2860	IRTemp oldC = newTemp(ty);
				2861	assign(oldC,
				2862	is64 ? mk_arm64g_calculate_flag_c()
				2863	: unop(Iop_64to32, mk_arm64g_calculate_flag_c()) );
				2864
				2865	IRTemp argL = newTemp(ty);
				2866	assign(argL, getIRegOrZR(is64, rN));
				2867	IRTemp argR = newTemp(ty);
				2868	assign(argR, getIRegOrZR(is64, rM));
				2869
				2870	IROp op = isSUB ? mkSUB(ty) : mkADD(ty);
				2871	IRTemp res = newTemp(ty);
				2872	if (isSUB) {
				2873	IRExpr* one = is64 ? mkU64(1) : mkU32(1);
				2874	IROp xorOp = is64 ? Iop_Xor64 : Iop_Xor32;
				2875	assign(res,
				2876	binop(op,
				2877	binop(op, mkexpr(argL), mkexpr(argR)),
				2878	binop(xorOp, mkexpr(oldC), one)));
				2879	} else {
				2880	assign(res,
				2881	binop(op,
				2882	binop(op, mkexpr(argL), mkexpr(argR)),
				2883	mkexpr(oldC)));
				2884	}
				2885
				2886	if (rD != 31) putIRegOrZR(is64, rD, mkexpr(res));
				2887
				2888	if (bS) {
				2889	setFlags_ADC_SBC(is64, isSUB, argL, argR, oldC);
				2890	}
				2891
				2892	DIP("%s%s %s, %s, %s\n",
				2893	bOP ? "sbc" : "adc", bS ? "s" : "",
				2894	nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
				2895	nameIRegOrZR(is64, rM));
				2896	return True;
				2897	}
				2898
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	2899	/* -------------------- LOGIC(reg) -------------------- */
				2900	/* x==0 => 32 bit op x==1 => 64 bit op
				2901	N==0 => inv? is no-op (no inversion)
				2902	N==1 => inv? is NOT
				2903	sh: 00=LSL, 01=LSR, 10=ASR, 11=ROR
				2904
				2905	31 30 28 23 21 20 15 9 4
				2906	\| \| \| \| \| \| \| \| \|
				2907	x 00 01010 sh N Rm imm6 Rn Rd AND Rd,Rn, inv?(sh(Rm,imm6))
				2908	x 01 01010 sh N Rm imm6 Rn Rd ORR Rd,Rn, inv?(sh(Rm,imm6))
				2909	x 10 01010 sh N Rm imm6 Rn Rd EOR Rd,Rn, inv?(sh(Rm,imm6))
				2910	x 11 01010 sh N Rm imm6 Rn Rd ANDS Rd,Rn, inv?(sh(Rm,imm6))
				2911	With N=1, the names are: BIC ORN EON BICS
				2912	*/
				2913	if (INSN(28,24) == BITS5(0,1,0,1,0)) {
				2914	UInt bX = INSN(31,31);
				2915	UInt sh = INSN(23,22);
				2916	UInt bN = INSN(21,21);
				2917	UInt rM = INSN(20,16);
				2918	UInt imm6 = INSN(15,10);
				2919	UInt rN = INSN(9,5);
				2920	UInt rD = INSN(4,0);
				2921	Bool is64 = bX == 1;
				2922	IRType ty = is64 ? Ity_I64 : Ity_I32;
				2923	if (!is64 && imm6 > 31) {
				2924	/* invalid; fall though */
				2925	} else {
				2926	IRTemp argL = newTemp(ty);
				2927	assign(argL, getIRegOrZR(is64, rN));
				2928	IRTemp argR = getShiftedIRegOrZR(is64, sh, imm6, rM, bN == 1);
				2929	IROp op = Iop_INVALID;
				2930	switch (INSN(30,29)) {
				2931	case BITS2(0,0): case BITS2(1,1): op = mkAND(ty); break;
				2932	case BITS2(0,1): op = mkOR(ty); break;
				2933	case BITS2(1,0): op = mkXOR(ty); break;
				2934	default: vassert(0);
				2935	}
				2936	IRTemp res = newTemp(ty);
				2937	assign(res, binop(op, mkexpr(argL), mkexpr(argR)));
				2938	if (INSN(30,29) == BITS2(1,1)) {
				2939	setFlags_LOGIC(is64, res);
				2940	}
				2941	putIRegOrZR(is64, rD, mkexpr(res));
				2942
				2943	static const HChar* names_op[8]
				2944	= { "and", "orr", "eor", "ands", "bic", "orn", "eon", "bics" };
				2945	vassert(((bN << 2) \| INSN(30,29)) < 8);
				2946	const HChar* nm_op = names_op[(bN << 2) \| INSN(30,29)];
				2947	/* Special-case the printing of "MOV" */
				2948	if (rN == 31/zr/ && sh == 0/LSL/ && imm6 == 0 && bN == 0) {
				2949	DIP("mov %s, %s\n", nameIRegOrZR(is64, rD),
				2950	nameIRegOrZR(is64, rM));
				2951	} else {
				2952	DIP("%s %s, %s, %s, %s #%u\n", nm_op,
				2953	nameIRegOrZR(is64, rD), nameIRegOrZR(is64, rN),
				2954	nameIRegOrZR(is64, rM), nameSH(sh), imm6);
				2955	}
				2956	return True;
				2957	}
				2958	}
				2959
				2960	/* -------------------- {U,S}MULH -------------------- */
				2961	/* 31 23 22 20 15 9 4
				2962	10011011 1 10 Rm 011111 Rn Rd UMULH Xd,Xn,Xm
				2963	10011011 0 10 Rm 011111 Rn Rd SMULH Xd,Xn,Xm
				2964	*/
				2965	if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1)
sewardj	7fce7cc	2014-05-07 09:41:40 +0000	[diff] [blame]	2966	&& INSN(22,21) == BITS2(1,0) && INSN(15,10) == BITS6(0,1,1,1,1,1)) {
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	2967	Bool isU = INSN(23,23) == 1;
				2968	UInt mm = INSN(20,16);
				2969	UInt nn = INSN(9,5);
				2970	UInt dd = INSN(4,0);
				2971	putIReg64orZR(dd, unop(Iop_128HIto64,
				2972	binop(isU ? Iop_MullU64 : Iop_MullS64,
				2973	getIReg64orZR(nn), getIReg64orZR(mm))));
				2974	DIP("%cmulh %s, %s, %s\n",
				2975	isU ? 'u' : 's',
				2976	nameIReg64orZR(dd), nameIReg64orZR(nn), nameIReg64orZR(mm));
				2977	return True;
				2978	}
				2979
				2980	/* -------------------- M{ADD,SUB} -------------------- */
				2981	/* 31 30 20 15 14 9 4
				2982	sf 00 11011 000 m 0 a n r MADD Rd,Rn,Rm,Ra d = a+m*n
				2983	sf 00 11011 000 m 1 a n r MADD Rd,Rn,Rm,Ra d = a-m*n
				2984	*/
				2985	if (INSN(30,21) == BITS10(0,0,1,1,0,1,1,0,0,0)) {
				2986	Bool is64 = INSN(31,31) == 1;
				2987	UInt mm = INSN(20,16);
				2988	Bool isAdd = INSN(15,15) == 0;
				2989	UInt aa = INSN(14,10);
				2990	UInt nn = INSN(9,5);
				2991	UInt dd = INSN(4,0);
				2992	if (is64) {
				2993	putIReg64orZR(
				2994	dd,
				2995	binop(isAdd ? Iop_Add64 : Iop_Sub64,
				2996	getIReg64orZR(aa),
				2997	binop(Iop_Mul64, getIReg64orZR(mm), getIReg64orZR(nn))));
				2998	} else {
				2999	putIReg32orZR(
				3000	dd,
				3001	binop(isAdd ? Iop_Add32 : Iop_Sub32,
				3002	getIReg32orZR(aa),
				3003	binop(Iop_Mul32, getIReg32orZR(mm), getIReg32orZR(nn))));
				3004	}
				3005	DIP("%s %s, %s, %s, %s\n",
				3006	isAdd ? "madd" : "msub",
				3007	nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
				3008	nameIRegOrZR(is64, mm), nameIRegOrZR(is64, aa));
				3009	return True;
				3010	}
				3011
				3012	/* ---------------- CS{EL,INC,INV,NEG} ---------------- */
				3013	/* 31 30 28 20 15 11 9 4
				3014	sf 00 1101 0100 mm cond 00 nn dd CSEL Rd,Rn,Rm
				3015	sf 00 1101 0100 mm cond 01 nn dd CSINC Rd,Rn,Rm
				3016	sf 10 1101 0100 mm cond 00 nn dd CSINV Rd,Rn,Rm
				3017	sf 10 1101 0100 mm cond 01 nn dd CSNEG Rd,Rn,Rm
				3018	In all cases, the operation is: Rd = if cond then Rn else OP(Rm)
				3019	*/
				3020	if (INSN(29,21) == BITS9(0, 1,1,0,1, 0,1,0,0) && INSN(11,11) == 0) {
				3021	Bool is64 = INSN(31,31) == 1;
				3022	UInt b30 = INSN(30,30);
				3023	UInt mm = INSN(20,16);
				3024	UInt cond = INSN(15,12);
				3025	UInt b10 = INSN(10,10);
				3026	UInt nn = INSN(9,5);
				3027	UInt dd = INSN(4,0);
				3028	UInt op = (b30 << 1) \| b10; /* 00=id 01=inc 10=inv 11=neg */
				3029	IRType ty = is64 ? Ity_I64 : Ity_I32;
				3030	IRExpr* argL = getIRegOrZR(is64, nn);
				3031	IRExpr* argR = getIRegOrZR(is64, mm);
				3032	switch (op) {
				3033	case BITS2(0,0):
				3034	break;
				3035	case BITS2(0,1):
				3036	argR = binop(mkADD(ty), argR, mkU(ty,1));
				3037	break;
				3038	case BITS2(1,0):
				3039	argR = unop(mkNOT(ty), argR);
				3040	break;
				3041	case BITS2(1,1):
				3042	argR = binop(mkSUB(ty), mkU(ty,0), argR);
				3043	break;
				3044	default:
				3045	vassert(0);
				3046	}
				3047	putIRegOrZR(
				3048	is64, dd,
				3049	IRExpr_ITE(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
				3050	argL, argR)
				3051	);
				3052	const HChar* op_nm[4] = { "csel", "csinc", "csinv", "csneg" };
				3053	DIP("%s %s, %s, %s, %s\n", op_nm[op],
				3054	nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn),
				3055	nameIRegOrZR(is64, mm), nameCC(cond));
				3056	return True;
				3057	}
				3058
				3059	/* -------------- ADD/SUB(extended reg) -------------- */
				3060	/* 28 20 15 12 9 4
				3061	000 01011 00 1 m opt imm3 n d ADD Wd\|SP, Wn\|SP, Wm ext&lsld
				3062	100 01011 00 1 m opt imm3 n d ADD Xd\|SP, Xn\|SP, Rm ext&lsld
				3063
				3064	001 01011 00 1 m opt imm3 n d ADDS Wd, Wn\|SP, Wm ext&lsld
				3065	101 01011 00 1 m opt imm3 n d ADDS Xd, Xn\|SP, Rm ext&lsld
				3066
				3067	010 01011 00 1 m opt imm3 n d SUB Wd\|SP, Wn\|SP, Wm ext&lsld
				3068	110 01011 00 1 m opt imm3 n d SUB Xd\|SP, Xn\|SP, Rm ext&lsld
				3069
				3070	011 01011 00 1 m opt imm3 n d SUBS Wd, Wn\|SP, Wm ext&lsld
				3071	111 01011 00 1 m opt imm3 n d SUBS Xd, Xn\|SP, Rm ext&lsld
				3072
				3073	The 'm' operand is extended per opt, thusly:
				3074
				3075	000 Xm & 0xFF UXTB
				3076	001 Xm & 0xFFFF UXTH
				3077	010 Xm & (2^32)-1 UXTW
				3078	011 Xm UXTX
				3079
				3080	100 Xm sx from bit 7 SXTB
				3081	101 Xm sx from bit 15 SXTH
				3082	110 Xm sx from bit 31 SXTW
				3083	111 Xm SXTX
				3084
				3085	In the 64 bit case (bit31 == 1), UXTX and SXTX are the identity
				3086	operation on Xm. In the 32 bit case, UXTW, UXTX, SXTW and SXTX
				3087	are the identity operation on Wm.
				3088
				3089	After extension, the value is shifted left by imm3 bits, which
				3090	may only be in the range 0 .. 4 inclusive.
				3091	*/
				3092	if (INSN(28,21) == BITS8(0,1,0,1,1,0,0,1) && INSN(12,10) <= 4) {
				3093	Bool is64 = INSN(31,31) == 1;
				3094	Bool isSub = INSN(30,30) == 1;
				3095	Bool setCC = INSN(29,29) == 1;
				3096	UInt mm = INSN(20,16);
				3097	UInt opt = INSN(15,13);
				3098	UInt imm3 = INSN(12,10);
				3099	UInt nn = INSN(9,5);
				3100	UInt dd = INSN(4,0);
				3101	const HChar* nameExt[8] = { "uxtb", "uxth", "uxtw", "uxtx",
				3102	"sxtb", "sxth", "sxtw", "sxtx" };
				3103	/* Do almost the same thing in the 32- and 64-bit cases. */
				3104	IRTemp xN = newTemp(Ity_I64);
				3105	IRTemp xM = newTemp(Ity_I64);
				3106	assign(xN, getIReg64orSP(nn));
				3107	assign(xM, getIReg64orZR(mm));
				3108	IRExpr* xMw = mkexpr(xM); /* "xM widened" */
				3109	Int shSX = 0;
				3110	/* widen Xm .. */
				3111	switch (opt) {
				3112	case BITS3(0,0,0): // UXTB
				3113	xMw = binop(Iop_And64, xMw, mkU64(0xFF)); break;
				3114	case BITS3(0,0,1): // UXTH
				3115	xMw = binop(Iop_And64, xMw, mkU64(0xFFFF)); break;
				3116	case BITS3(0,1,0): // UXTW -- noop for the 32bit case
				3117	if (is64) {
				3118	xMw = unop(Iop_32Uto64, unop(Iop_64to32, xMw));
				3119	}
				3120	break;
				3121	case BITS3(0,1,1): // UXTX -- always a noop
				3122	break;
				3123	case BITS3(1,0,0): // SXTB
				3124	shSX = 56; goto sxTo64;
				3125	case BITS3(1,0,1): // SXTH
				3126	shSX = 48; goto sxTo64;
				3127	case BITS3(1,1,0): // SXTW -- noop for the 32bit case
				3128	if (is64) {
				3129	shSX = 32; goto sxTo64;
				3130	}
				3131	break;
				3132	case BITS3(1,1,1): // SXTX -- always a noop
				3133	break;
				3134	sxTo64:
				3135	vassert(shSX >= 32);
				3136	xMw = binop(Iop_Sar64, binop(Iop_Shl64, xMw, mkU8(shSX)),
				3137	mkU8(shSX));
				3138	break;
				3139	default:
				3140	vassert(0);
				3141	}
				3142	/* and now shift */
				3143	IRTemp argL = xN;
				3144	IRTemp argR = newTemp(Ity_I64);
				3145	assign(argR, binop(Iop_Shl64, xMw, mkU8(imm3)));
				3146	IRTemp res = newTemp(Ity_I64);
				3147	assign(res, binop(isSub ? Iop_Sub64 : Iop_Add64,
				3148	mkexpr(argL), mkexpr(argR)));
				3149	if (is64) {
				3150	if (setCC) {
				3151	putIReg64orZR(dd, mkexpr(res));
				3152	setFlags_ADD_SUB(True/is64/, isSub, argL, argR);
				3153	} else {
				3154	putIReg64orSP(dd, mkexpr(res));
				3155	}
				3156	} else {
				3157	if (setCC) {
				3158	IRTemp argL32 = newTemp(Ity_I32);
				3159	IRTemp argR32 = newTemp(Ity_I32);
				3160	putIReg32orZR(dd, unop(Iop_64to32, mkexpr(res)));
				3161	assign(argL32, unop(Iop_64to32, mkexpr(argL)));
				3162	assign(argR32, unop(Iop_64to32, mkexpr(argR)));
				3163	setFlags_ADD_SUB(False/!is64/, isSub, argL32, argR32);
				3164	} else {
				3165	putIReg32orSP(dd, unop(Iop_64to32, mkexpr(res)));
				3166	}
				3167	}
				3168	DIP("%s%s %s, %s, %s %s lsl %u\n",
				3169	isSub ? "sub" : "add", setCC ? "s" : "",
				3170	setCC ? nameIRegOrZR(is64, dd) : nameIRegOrSP(is64, dd),
				3171	nameIRegOrSP(is64, nn), nameIRegOrSP(is64, mm),
				3172	nameExt[opt], imm3);
				3173	return True;
				3174	}
				3175
				3176	/* ---------------- CCMP/CCMN(imm) ---------------- */
				3177	/* Bizarrely, these appear in the "data processing register"
				3178	category, even though they are operations against an
				3179	immediate. */
				3180	/* 31 29 20 15 11 9 3
				3181	sf 1 111010010 imm5 cond 10 Rn 0 nzcv CCMP Rn, #imm5, #nzcv, cond
				3182	sf 0 111010010 imm5 cond 10 Rn 0 nzcv CCMN Rn, #imm5, #nzcv, cond
				3183
				3184	Operation is:
				3185	(CCMP) flags = if cond then flags-after-sub(Rn,imm5) else nzcv
				3186	(CCMN) flags = if cond then flags-after-add(Rn,imm5) else nzcv
				3187	*/
				3188	if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
				3189	&& INSN(11,10) == BITS2(1,0) && INSN(4,4) == 0) {
				3190	Bool is64 = INSN(31,31) == 1;
				3191	Bool isSUB = INSN(30,30) == 1;
				3192	UInt imm5 = INSN(20,16);
				3193	UInt cond = INSN(15,12);
				3194	UInt nn = INSN(9,5);
				3195	UInt nzcv = INSN(3,0);
				3196
				3197	IRTemp condT = newTemp(Ity_I1);
				3198	assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
				3199
				3200	IRType ty = is64 ? Ity_I64 : Ity_I32;
				3201	IRTemp argL = newTemp(ty);
				3202	IRTemp argR = newTemp(ty);
				3203
				3204	if (is64) {
				3205	assign(argL, getIReg64orZR(nn));
				3206	assign(argR, mkU64(imm5));
				3207	} else {
				3208	assign(argL, getIReg32orZR(nn));
				3209	assign(argR, mkU32(imm5));
				3210	}
				3211	setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
				3212
				3213	DIP("ccm%c %s, #%u, #%u, %s\n",
				3214	isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
				3215	imm5, nzcv, nameCC(cond));
				3216	return True;
				3217	}
				3218
				3219	/* ---------------- CCMP/CCMN(reg) ---------------- */
				3220	/* 31 29 20 15 11 9 3
				3221	sf 1 111010010 Rm cond 00 Rn 0 nzcv CCMP Rn, Rm, #nzcv, cond
				3222	sf 0 111010010 Rm cond 00 Rn 0 nzcv CCMN Rn, Rm, #nzcv, cond
				3223	Operation is:
				3224	(CCMP) flags = if cond then flags-after-sub(Rn,Rm) else nzcv
				3225	(CCMN) flags = if cond then flags-after-add(Rn,Rm) else nzcv
				3226	*/
				3227	if (INSN(29,21) == BITS9(1,1,1,0,1,0,0,1,0)
				3228	&& INSN(11,10) == BITS2(0,0) && INSN(4,4) == 0) {
				3229	Bool is64 = INSN(31,31) == 1;
				3230	Bool isSUB = INSN(30,30) == 1;
				3231	UInt mm = INSN(20,16);
				3232	UInt cond = INSN(15,12);
				3233	UInt nn = INSN(9,5);
				3234	UInt nzcv = INSN(3,0);
				3235
				3236	IRTemp condT = newTemp(Ity_I1);
				3237	assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
				3238
				3239	IRType ty = is64 ? Ity_I64 : Ity_I32;
				3240	IRTemp argL = newTemp(ty);
				3241	IRTemp argR = newTemp(ty);
				3242
				3243	if (is64) {
				3244	assign(argL, getIReg64orZR(nn));
				3245	assign(argR, getIReg64orZR(mm));
				3246	} else {
				3247	assign(argL, getIReg32orZR(nn));
				3248	assign(argR, getIReg32orZR(mm));
				3249	}
				3250	setFlags_ADD_SUB_conditionally(is64, isSUB, condT, argL, argR, nzcv);
				3251
				3252	DIP("ccm%c %s, %s, #%u, %s\n",
				3253	isSUB ? 'p' : 'n', nameIRegOrZR(is64, nn),
				3254	nameIRegOrZR(is64, mm), nzcv, nameCC(cond));
				3255	return True;
				3256	}
				3257
				3258
				3259	/* -------------- REV/REV16/REV32/RBIT -------------- */
				3260	/* 31 30 28 20 15 11 9 4
				3261
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	3262	1 10 11010110 00000 0000 11 n d (1) REV Xd, Xn
				3263	0 10 11010110 00000 0000 10 n d (2) REV Wd, Wn
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3264
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	3265	1 10 11010110 00000 0000 00 n d (3) RBIT Xd, Xn
				3266	0 10 11010110 00000 0000 00 n d (4) RBIT Wd, Wn
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3267
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3268	1 10 11010110 00000 0000 01 n d (5) REV16 Xd, Xn
				3269	0 10 11010110 00000 0000 01 n d (6) REV16 Wd, Wn
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3270
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3271	1 10 11010110 00000 0000 10 n d (7) REV32 Xd, Xn
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3272	*/
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3273	if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3274	&& INSN(20,12) == BITS9(0,0,0,0,0,0,0,0,0)) {
				3275	UInt b31 = INSN(31,31);
				3276	UInt opc = INSN(11,10);
				3277
				3278	UInt ix = 0;
				3279	/**/ if (b31 == 1 && opc == BITS2(1,1)) ix = 1;
				3280	else if (b31 == 0 && opc == BITS2(1,0)) ix = 2;
				3281	else if (b31 == 1 && opc == BITS2(0,0)) ix = 3;
				3282	else if (b31 == 0 && opc == BITS2(0,0)) ix = 4;
				3283	else if (b31 == 1 && opc == BITS2(0,1)) ix = 5;
				3284	else if (b31 == 0 && opc == BITS2(0,1)) ix = 6;
				3285	else if (b31 == 1 && opc == BITS2(1,0)) ix = 7;
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	3286	if (ix >= 1 && ix <= 7) {
				3287	Bool is64 = ix == 1 \|\| ix == 3 \|\| ix == 5 \|\| ix == 7;
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3288	UInt nn = INSN(9,5);
				3289	UInt dd = INSN(4,0);
				3290	IRTemp src = newTemp(Ity_I64);
				3291	IRTemp dst = IRTemp_INVALID;
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	3292	IRTemp (*math)(IRTemp) = NULL;
				3293	switch (ix) {
				3294	case 1: case 2: math = math_BYTESWAP64; break;
				3295	case 3: case 4: math = math_BITSWAP64; break;
				3296	case 5: case 6: math = math_USHORTSWAP64; break;
				3297	case 7: math = math_UINTSWAP64; break;
				3298	default: vassert(0);
				3299	}
				3300	const HChar* names[7]
				3301	= { "rev", "rev", "rbit", "rbit", "rev16", "rev16", "rev32" };
				3302	const HChar* nm = names[ix-1];
				3303	vassert(math);
				3304	if (ix == 6) {
				3305	/* This has to be special cased, since the logic below doesn't
				3306	handle it correctly. */
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3307	assign(src, getIReg64orZR(nn));
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	3308	dst = math(src);
				3309	putIReg64orZR(dd,
				3310	unop(Iop_32Uto64, unop(Iop_64to32, mkexpr(dst))));
				3311	} else if (is64) {
				3312	assign(src, getIReg64orZR(nn));
				3313	dst = math(src);
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3314	putIReg64orZR(dd, mkexpr(dst));
				3315	} else {
				3316	assign(src, binop(Iop_Shl64, getIReg64orZR(nn), mkU8(32)));
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	3317	dst = math(src);
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3318	putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
				3319	}
sewardj	32d8675	2014-03-02 12:47:18 +0000	[diff] [blame]	3320	DIP("%s %s, %s\n", nm,
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3321	nameIRegOrZR(is64,dd), nameIRegOrZR(is64,nn));
				3322	return True;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3323	}
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	3324	/* else fall through */
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3325	}
				3326
				3327	/* -------------------- CLZ/CLS -------------------- */
				3328	/* 30 28 24 20 15 9 4
				3329	sf 10 1101 0110 00000 00010 0 n d CLZ Rd, Rn
				3330	sf 10 1101 0110 00000 00010 1 n d CLS Rd, Rn
				3331	*/
				3332	if (INSN(30,21) == BITS10(1,0,1,1,0,1,0,1,1,0)
				3333	&& INSN(20,11) == BITS10(0,0,0,0,0,0,0,0,1,0)) {
				3334	Bool is64 = INSN(31,31) == 1;
				3335	Bool isCLS = INSN(10,10) == 1;
				3336	UInt nn = INSN(9,5);
				3337	UInt dd = INSN(4,0);
				3338	IRTemp src = newTemp(Ity_I64);
sewardj	928540c	2014-11-25 15:51:07 +0000	[diff] [blame]	3339	IRTemp srcZ = newTemp(Ity_I64);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3340	IRTemp dst = newTemp(Ity_I64);
sewardj	928540c	2014-11-25 15:51:07 +0000	[diff] [blame]	3341	/* Get the argument, widened out to 64 bit */
				3342	if (is64) {
				3343	assign(src, getIReg64orZR(nn));
				3344	} else {
				3345	assign(src, binop(Iop_Shl64,
				3346	unop(Iop_32Uto64, getIReg32orZR(nn)), mkU8(32)));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3347	}
sewardj	928540c	2014-11-25 15:51:07 +0000	[diff] [blame]	3348	/* If this is CLS, mash the arg around accordingly */
				3349	if (isCLS) {
				3350	IRExpr* one = mkU8(1);
				3351	assign(srcZ,
				3352	binop(Iop_Xor64,
				3353	binop(Iop_Shl64, mkexpr(src), one),
				3354	binop(Iop_Shl64, binop(Iop_Shr64, mkexpr(src), one), one)));
				3355	} else {
				3356	assign(srcZ, mkexpr(src));
				3357	}
				3358	/* And compute CLZ. */
				3359	if (is64) {
				3360	assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
				3361	mkU64(isCLS ? 63 : 64),
				3362	unop(Iop_Clz64, mkexpr(srcZ))));
				3363	putIReg64orZR(dd, mkexpr(dst));
				3364	} else {
				3365	assign(dst, IRExpr_ITE(binop(Iop_CmpEQ64, mkexpr(srcZ), mkU64(0)),
				3366	mkU64(isCLS ? 31 : 32),
				3367	unop(Iop_Clz64, mkexpr(srcZ))));
				3368	putIReg32orZR(dd, unop(Iop_64to32, mkexpr(dst)));
				3369	}
				3370	DIP("cl%c %s, %s\n", isCLS ? 's' : 'z',
				3371	nameIRegOrZR(is64, dd), nameIRegOrZR(is64, nn));
				3372	return True;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3373	}
				3374
sewardj	ca95f2d	2014-11-25 17:27:32 +0000	[diff] [blame]	3375	/* ------------------ LSLV/LSRV/ASRV/RORV ------------------ */
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3376	/* 30 28 20 15 11 9 4
				3377	sf 00 1101 0110 m 0010 00 n d LSLV Rd,Rn,Rm
				3378	sf 00 1101 0110 m 0010 01 n d LSRV Rd,Rn,Rm
				3379	sf 00 1101 0110 m 0010 10 n d ASRV Rd,Rn,Rm
sewardj	ca95f2d	2014-11-25 17:27:32 +0000	[diff] [blame]	3380	sf 00 1101 0110 m 0010 11 n d RORV Rd,Rn,Rm
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3381	*/
				3382	if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
sewardj	ca95f2d	2014-11-25 17:27:32 +0000	[diff] [blame]	3383	&& INSN(15,12) == BITS4(0,0,1,0)) {
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3384	Bool is64 = INSN(31,31) == 1;
				3385	UInt mm = INSN(20,16);
				3386	UInt op = INSN(11,10);
				3387	UInt nn = INSN(9,5);
				3388	UInt dd = INSN(4,0);
				3389	IRType ty = is64 ? Ity_I64 : Ity_I32;
				3390	IRTemp srcL = newTemp(ty);
sewardj	ca95f2d	2014-11-25 17:27:32 +0000	[diff] [blame]	3391	IRTemp srcR = newTemp(Ity_I64);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3392	IRTemp res = newTemp(ty);
				3393	IROp iop = Iop_INVALID;
				3394	assign(srcL, getIRegOrZR(is64, nn));
sewardj	ca95f2d	2014-11-25 17:27:32 +0000	[diff] [blame]	3395	assign(srcR, binop(Iop_And64, getIReg64orZR(mm),
				3396	mkU64(is64 ? 63 : 31)));
				3397	if (op < 3) {
				3398	// LSLV, LSRV, ASRV
				3399	switch (op) {
				3400	case BITS2(0,0): iop = mkSHL(ty); break;
				3401	case BITS2(0,1): iop = mkSHR(ty); break;
				3402	case BITS2(1,0): iop = mkSAR(ty); break;
				3403	default: vassert(0);
				3404	}
				3405	assign(res, binop(iop, mkexpr(srcL),
				3406	unop(Iop_64to8, mkexpr(srcR))));
				3407	} else {
				3408	// RORV
				3409	IROp opSHL = mkSHL(ty);
				3410	IROp opSHR = mkSHR(ty);
				3411	IROp opOR = mkOR(ty);
				3412	IRExpr* width = mkU64(is64 ? 64: 32);
				3413	assign(
				3414	res,
				3415	IRExpr_ITE(
				3416	binop(Iop_CmpEQ64, mkexpr(srcR), mkU64(0)),
				3417	mkexpr(srcL),
				3418	binop(opOR,
				3419	binop(opSHL,
				3420	mkexpr(srcL),
				3421	unop(Iop_64to8, binop(Iop_Sub64, width,
				3422	mkexpr(srcR)))),
				3423	binop(opSHR,
				3424	mkexpr(srcL), unop(Iop_64to8, mkexpr(srcR))))
				3425	));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3426	}
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3427	putIRegOrZR(is64, dd, mkexpr(res));
sewardj	ca95f2d	2014-11-25 17:27:32 +0000	[diff] [blame]	3428	vassert(op < 4);
				3429	const HChar* names[4] = { "lslv", "lsrv", "asrv", "rorv" };
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	3430	DIP("%s %s, %s, %s\n",
				3431	names[op], nameIRegOrZR(is64,dd),
				3432	nameIRegOrZR(is64,nn), nameIRegOrZR(is64,mm));
				3433	return True;
				3434	}
				3435
				3436	/* -------------------- SDIV/UDIV -------------------- */
				3437	/* 30 28 20 15 10 9 4
				3438	sf 00 1101 0110 m 00001 1 n d SDIV Rd,Rn,Rm
				3439	sf 00 1101 0110 m 00001 0 n d UDIV Rd,Rn,Rm
				3440	*/
				3441	if (INSN(30,21) == BITS10(0,0,1,1,0,1,0,1,1,0)
				3442	&& INSN(15,11) == BITS5(0,0,0,0,1)) {
				3443	Bool is64 = INSN(31,31) == 1;
				3444	UInt mm = INSN(20,16);
				3445	Bool isS = INSN(10,10) == 1;
				3446	UInt nn = INSN(9,5);
				3447	UInt dd = INSN(4,0);
				3448	if (isS) {
				3449	putIRegOrZR(is64, dd, binop(is64 ? Iop_DivS64 : Iop_DivS32,
				3450	getIRegOrZR(is64, nn),
				3451	getIRegOrZR(is64, mm)));
				3452	} else {
				3453	putIRegOrZR(is64, dd, binop(is64 ? Iop_DivU64 : Iop_DivU32,
				3454	getIRegOrZR(is64, nn),
				3455	getIRegOrZR(is64, mm)));
				3456	}
				3457	DIP("%cdiv %s, %s, %s\n", isS ? 's' : 'u',
				3458	nameIRegOrZR(is64, dd),
				3459	nameIRegOrZR(is64, nn), nameIRegOrZR(is64, mm));
				3460	return True;
				3461	}
				3462
				3463	/* ------------------ {S,U}M{ADD,SUB}L ------------------ */
				3464	/* 31 23 20 15 14 9 4
				3465	1001 1011 101 m 0 a n d UMADDL Xd,Wn,Wm,Xa
				3466	1001 1011 001 m 0 a n d SMADDL Xd,Wn,Wm,Xa
				3467	1001 1011 101 m 1 a n d UMSUBL Xd,Wn,Wm,Xa
				3468	1001 1011 001 m 1 a n d SMSUBL Xd,Wn,Wm,Xa
				3469	with operation
				3470	Xd = Xa +/- (Wn *u/s Wm)
				3471	*/
				3472	if (INSN(31,24) == BITS8(1,0,0,1,1,0,1,1) && INSN(22,21) == BITS2(0,1)) {
				3473	Bool isU = INSN(23,23) == 1;
				3474	UInt mm = INSN(20,16);
				3475	Bool isAdd = INSN(15,15) == 0;
				3476	UInt aa = INSN(14,10);
				3477	UInt nn = INSN(9,5);
				3478	UInt dd = INSN(4,0);
				3479	IRTemp wN = newTemp(Ity_I32);
				3480	IRTemp wM = newTemp(Ity_I32);
				3481	IRTemp xA = newTemp(Ity_I64);
				3482	IRTemp muld = newTemp(Ity_I64);
				3483	IRTemp res = newTemp(Ity_I64);
				3484	assign(wN, getIReg32orZR(nn));
				3485	assign(wM, getIReg32orZR(mm));
				3486	assign(xA, getIReg64orZR(aa));
				3487	assign(muld, binop(isU ? Iop_MullU32 : Iop_MullS32,
				3488	mkexpr(wN), mkexpr(wM)));
				3489	assign(res, binop(isAdd ? Iop_Add64 : Iop_Sub64,
				3490	mkexpr(xA), mkexpr(muld)));
				3491	putIReg64orZR(dd, mkexpr(res));
				3492	DIP("%cm%sl %s, %s, %s, %s\n", isU ? 'u' : 's', isAdd ? "add" : "sub",
				3493	nameIReg64orZR(dd), nameIReg32orZR(nn),
				3494	nameIReg32orZR(mm), nameIReg64orZR(aa));
				3495	return True;
				3496	}
				3497	vex_printf("ARM64 front end: data_processing_register\n");
				3498	return False;
				3499	# undef INSN
				3500	}
				3501
				3502
				3503	/------------------------------------------------------------/
sewardj	208a776	2014-10-22 13:52:51 +0000	[diff] [blame]	3504	/--- Math helpers for vector interleave/deinterleave ---/
				3505	/------------------------------------------------------------/
				3506
				3507	#define EX(_tmp) \
				3508	mkexpr(_tmp)
				3509	#define SL(_hi128,_lo128,_nbytes) \
				3510	( (_nbytes) == 0 \
				3511	? (_lo128) \
				3512	: triop(Iop_SliceV128,(_hi128),(_lo128),mkU8(_nbytes)) )
				3513	#define ROR(_v128,_nbytes) \
				3514	SL((_v128),(_v128),(_nbytes))
				3515	#define ROL(_v128,_nbytes) \
				3516	SL((_v128),(_v128),16-(_nbytes))
				3517	#define SHR(_v128,_nbytes) \
				3518	binop(Iop_ShrV128,(_v128),mkU8(8*(_nbytes)))
				3519	#define SHL(_v128,_nbytes) \
				3520	binop(Iop_ShlV128,(_v128),mkU8(8*(_nbytes)))
				3521	#define ILO64x2(_argL,_argR) \
				3522	binop(Iop_InterleaveLO64x2,(_argL),(_argR))
				3523	#define IHI64x2(_argL,_argR) \
				3524	binop(Iop_InterleaveHI64x2,(_argL),(_argR))
				3525	#define ILO32x4(_argL,_argR) \
				3526	binop(Iop_InterleaveLO32x4,(_argL),(_argR))
				3527	#define IHI32x4(_argL,_argR) \
				3528	binop(Iop_InterleaveHI32x4,(_argL),(_argR))
				3529	#define ILO16x8(_argL,_argR) \
				3530	binop(Iop_InterleaveLO16x8,(_argL),(_argR))
				3531	#define IHI16x8(_argL,_argR) \
				3532	binop(Iop_InterleaveHI16x8,(_argL),(_argR))
				3533	#define ILO8x16(_argL,_argR) \
				3534	binop(Iop_InterleaveLO8x16,(_argL),(_argR))
				3535	#define IHI8x16(_argL,_argR) \
				3536	binop(Iop_InterleaveHI8x16,(_argL),(_argR))
				3537	#define CEV32x4(_argL,_argR) \
				3538	binop(Iop_CatEvenLanes32x4,(_argL),(_argR))
				3539	#define COD32x4(_argL,_argR) \
				3540	binop(Iop_CatOddLanes32x4,(_argL),(_argR))
				3541	#define COD16x8(_argL,_argR) \
				3542	binop(Iop_CatOddLanes16x8,(_argL),(_argR))
				3543	#define COD8x16(_argL,_argR) \
				3544	binop(Iop_CatOddLanes8x16,(_argL),(_argR))
				3545	#define CEV8x16(_argL,_argR) \
				3546	binop(Iop_CatEvenLanes8x16,(_argL),(_argR))
				3547	#define AND(_arg1,_arg2) \
				3548	binop(Iop_AndV128,(_arg1),(_arg2))
				3549	#define OR2(_arg1,_arg2) \
				3550	binop(Iop_OrV128,(_arg1),(_arg2))
				3551	#define OR3(_arg1,_arg2,_arg3) \
				3552	binop(Iop_OrV128,(_arg1),binop(Iop_OrV128,(_arg2),(_arg3)))
				3553	#define OR4(_arg1,_arg2,_arg3,_arg4) \
				3554	binop(Iop_OrV128, \
				3555	binop(Iop_OrV128,(_arg1),(_arg2)), \
				3556	binop(Iop_OrV128,(_arg3),(_arg4)))
				3557
				3558
				3559	/* Do interleaving for 1 128 bit vector, for ST1 insns. */
				3560	static
				3561	void math_INTERLEAVE1_128( /OUTx1/ IRTemp* i0,
				3562	UInt laneSzBlg2, IRTemp u0 )
				3563	{
				3564	assign(*i0, mkexpr(u0));
				3565	}
				3566
				3567
				3568	/* Do interleaving for 2 128 bit vectors, for ST2 insns. */
				3569	static
				3570	void math_INTERLEAVE2_128( /OUTx2/ IRTemp* i0, IRTemp* i1,
				3571	UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
				3572	{
				3573	/* This is pretty easy, since we have primitives directly to
				3574	hand. */
				3575	if (laneSzBlg2 == 3) {
				3576	// 64x2
				3577	// u1 == B1 B0, u0 == A1 A0
				3578	// i1 == B1 A1, i0 == B0 A0
				3579	assign(*i0, binop(Iop_InterleaveLO64x2, mkexpr(u1), mkexpr(u0)));
				3580	assign(*i1, binop(Iop_InterleaveHI64x2, mkexpr(u1), mkexpr(u0)));
				3581	return;
				3582	}
				3583	if (laneSzBlg2 == 2) {
				3584	// 32x4
				3585	// u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
				3586	// i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
				3587	assign(*i0, binop(Iop_InterleaveLO32x4, mkexpr(u1), mkexpr(u0)));
				3588	assign(*i1, binop(Iop_InterleaveHI32x4, mkexpr(u1), mkexpr(u0)));
				3589	return;
				3590	}
				3591	if (laneSzBlg2 == 1) {
				3592	// 16x8
				3593	// u1 == B{7..0}, u0 == A{7..0}
				3594	// i0 == B3 A3 B2 A2 B1 A1 B0 A0
				3595	// i1 == B7 A7 B6 A6 B5 A5 B4 A4
				3596	assign(*i0, binop(Iop_InterleaveLO16x8, mkexpr(u1), mkexpr(u0)));
				3597	assign(*i1, binop(Iop_InterleaveHI16x8, mkexpr(u1), mkexpr(u0)));
				3598	return;
				3599	}
				3600	if (laneSzBlg2 == 0) {
				3601	// 8x16
				3602	// u1 == B{f..0}, u0 == A{f..0}
				3603	// i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
				3604	// i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
				3605	assign(*i0, binop(Iop_InterleaveLO8x16, mkexpr(u1), mkexpr(u0)));
				3606	assign(*i1, binop(Iop_InterleaveHI8x16, mkexpr(u1), mkexpr(u0)));
				3607	return;
				3608	}
				3609	/NOTREACHED/
				3610	vassert(0);
				3611	}
				3612
				3613
				3614	/* Do interleaving for 3 128 bit vectors, for ST3 insns. */
				3615	static
				3616	void math_INTERLEAVE3_128(
				3617	/OUTx3/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
				3618	UInt laneSzBlg2,
				3619	IRTemp u0, IRTemp u1, IRTemp u2 )
				3620	{
				3621	if (laneSzBlg2 == 3) {
				3622	// 64x2
				3623	// u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
				3624	// i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
				3625	assign(*i2, IHI64x2( EX(u2), EX(u1) ));
				3626	assign(*i1, ILO64x2( ROR(EX(u0),8), EX(u2) ));
				3627	assign(*i0, ILO64x2( EX(u1), EX(u0) ));
				3628	return;
				3629	}
				3630
				3631	if (laneSzBlg2 == 2) {
				3632	// 32x4
				3633	// u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
				3634	// p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
				3635	// i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
				3636	IRTemp p0 = newTempV128();
				3637	IRTemp p1 = newTempV128();
				3638	IRTemp p2 = newTempV128();
				3639	IRTemp c1100 = newTempV128();
				3640	IRTemp c0011 = newTempV128();
				3641	IRTemp c0110 = newTempV128();
				3642	assign(c1100, mkV128(0xFF00));
				3643	assign(c0011, mkV128(0x00FF));
				3644	assign(c0110, mkV128(0x0FF0));
				3645	// First interleave them at 64x2 granularity,
				3646	// generating partial ("p") values.
				3647	math_INTERLEAVE3_128(&p0, &p1, &p2, 3, u0, u1, u2);
				3648	// And more shuffling around for the final answer
				3649	assign(*i2, OR2( AND( IHI32x4(EX(p2), ROL(EX(p2),8)), EX(c1100) ),
				3650	AND( IHI32x4(ROR(EX(p1),4), EX(p2)), EX(c0011) ) ));
				3651	assign(*i1, OR3( SHL(EX(p2),12),
				3652	AND(EX(p1),EX(c0110)),
				3653	SHR(EX(p0),12) ));
				3654	assign(*i0, OR2( AND( ILO32x4(EX(p0),ROL(EX(p1),4)), EX(c1100) ),
				3655	AND( ILO32x4(ROR(EX(p0),8),EX(p0)), EX(c0011) ) ));
				3656	return;
				3657	}
				3658
				3659	if (laneSzBlg2 == 1) {
				3660	// 16x8
				3661	// u2 == C7 C6 C5 C4 C3 C2 C1 C0
				3662	// u1 == B7 B6 B5 B4 B3 B2 B1 B0
				3663	// u0 == A7 A6 A5 A4 A3 A2 A1 A0
				3664	//
				3665	// p2 == C7 C6 B7 B6 A7 A6 C5 C4
				3666	// p1 == B5 B4 A5 A4 C3 C2 B3 B2
				3667	// p0 == A3 A2 C1 C0 B1 B0 A1 A0
				3668	//
				3669	// i2 == C7 B7 A7 C6 B6 A6 C5 B5
				3670	// i1 == A5 C4 B4 A4 C4 B3 A3 C2
				3671	// i0 == B2 A2 C1 B1 A1 C0 B0 A0
				3672	IRTemp p0 = newTempV128();
				3673	IRTemp p1 = newTempV128();
				3674	IRTemp p2 = newTempV128();
				3675	IRTemp c1000 = newTempV128();
				3676	IRTemp c0100 = newTempV128();
				3677	IRTemp c0010 = newTempV128();
				3678	IRTemp c0001 = newTempV128();
				3679	assign(c1000, mkV128(0xF000));
				3680	assign(c0100, mkV128(0x0F00));
				3681	assign(c0010, mkV128(0x00F0));
				3682	assign(c0001, mkV128(0x000F));
				3683	// First interleave them at 32x4 granularity,
				3684	// generating partial ("p") values.
				3685	math_INTERLEAVE3_128(&p0, &p1, &p2, 2, u0, u1, u2);
				3686	// And more shuffling around for the final answer
				3687	assign(*i2,
				3688	OR4( AND( IHI16x8( EX(p2), ROL(EX(p2),4) ), EX(c1000) ),
				3689	AND( IHI16x8( ROL(EX(p2),6), EX(p2) ), EX(c0100) ),
				3690	AND( IHI16x8( ROL(EX(p2),2), ROL(EX(p2),6) ), EX(c0010) ),
				3691	AND( ILO16x8( ROR(EX(p2),2), ROL(EX(p1),2) ), EX(c0001) )
				3692	));
				3693	assign(*i1,
				3694	OR4( AND( IHI16x8( ROL(EX(p1),4), ROR(EX(p2),2) ), EX(c1000) ),
				3695	AND( IHI16x8( EX(p1), ROL(EX(p1),4) ), EX(c0100) ),
				3696	AND( IHI16x8( ROL(EX(p1),4), ROL(EX(p1),8) ), EX(c0010) ),
				3697	AND( IHI16x8( ROR(EX(p0),6), ROL(EX(p1),4) ), EX(c0001) )
				3698	));
				3699	assign(*i0,
				3700	OR4( AND( IHI16x8( ROR(EX(p1),2), ROL(EX(p0),2) ), EX(c1000) ),
				3701	AND( IHI16x8( ROL(EX(p0),2), ROL(EX(p0),6) ), EX(c0100) ),
				3702	AND( IHI16x8( ROL(EX(p0),8), ROL(EX(p0),2) ), EX(c0010) ),
				3703	AND( IHI16x8( ROL(EX(p0),4), ROL(EX(p0),8) ), EX(c0001) )
				3704	));
				3705	return;
				3706	}
				3707
				3708	if (laneSzBlg2 == 0) {
				3709	// 8x16. It doesn't seem worth the hassle of first doing a
				3710	// 16x8 interleave, so just generate all 24 partial results
				3711	// directly :-(
				3712	// u2 == Cf .. C0, u1 == Bf .. B0, u0 == Af .. A0
				3713	// i2 == Cf Bf Af Ce .. Bb Ab Ca
				3714	// i1 == Ba Aa C9 B9 .. A6 C5 B5
				3715	// i0 == A5 C4 B4 A4 .. C0 B0 A0
				3716
				3717	IRTemp i2_FEDC = newTempV128(); IRTemp i2_BA98 = newTempV128();
				3718	IRTemp i2_7654 = newTempV128(); IRTemp i2_3210 = newTempV128();
				3719	IRTemp i1_FEDC = newTempV128(); IRTemp i1_BA98 = newTempV128();
				3720	IRTemp i1_7654 = newTempV128(); IRTemp i1_3210 = newTempV128();
				3721	IRTemp i0_FEDC = newTempV128(); IRTemp i0_BA98 = newTempV128();
				3722	IRTemp i0_7654 = newTempV128(); IRTemp i0_3210 = newTempV128();
				3723	IRTemp i2_hi64 = newTempV128(); IRTemp i2_lo64 = newTempV128();
				3724	IRTemp i1_hi64 = newTempV128(); IRTemp i1_lo64 = newTempV128();
				3725	IRTemp i0_hi64 = newTempV128(); IRTemp i0_lo64 = newTempV128();
				3726
				3727	// eg XXXX(qqq, CC, 0xF, BB, 0xA)) sets qqq to be a vector
				3728	// of the form 14 bytes junk : CC[0xF] : BB[0xA]
				3729	//
				3730	# define XXXX(_tempName,_srcVec1,_srcShift1,_srcVec2,_srcShift2) \
				3731	IRTemp t_##_tempName = newTempV128(); \
				3732	assign(t_##_tempName, \
				3733	ILO8x16( ROR(EX(_srcVec1),(_srcShift1)), \
				3734	ROR(EX(_srcVec2),(_srcShift2)) ) )
				3735
				3736	// Let CC, BB, AA be (handy) aliases of u2, u1, u0 respectively
				3737	IRTemp CC = u2; IRTemp BB = u1; IRTemp AA = u0;
				3738
				3739	// The slicing and reassembly are done as interleavedly as possible,
				3740	// so as to minimise the demand for registers in the back end, which
				3741	// was observed to be a problem in testing.
				3742
				3743	XXXX(CfBf, CC, 0xf, BB, 0xf); // i2[15:14]
				3744	XXXX(AfCe, AA, 0xf, CC, 0xe);
				3745	assign(i2_FEDC, ILO16x8(EX(t_CfBf), EX(t_AfCe)));
				3746
				3747	XXXX(BeAe, BB, 0xe, AA, 0xe);
				3748	XXXX(CdBd, CC, 0xd, BB, 0xd);
				3749	assign(i2_BA98, ILO16x8(EX(t_BeAe), EX(t_CdBd)));
				3750	assign(i2_hi64, ILO32x4(EX(i2_FEDC), EX(i2_BA98)));
				3751
				3752	XXXX(AdCc, AA, 0xd, CC, 0xc);
				3753	XXXX(BcAc, BB, 0xc, AA, 0xc);
				3754	assign(i2_7654, ILO16x8(EX(t_AdCc), EX(t_BcAc)));
				3755
				3756	XXXX(CbBb, CC, 0xb, BB, 0xb);
				3757	XXXX(AbCa, AA, 0xb, CC, 0xa); // i2[1:0]
				3758	assign(i2_3210, ILO16x8(EX(t_CbBb), EX(t_AbCa)));
				3759	assign(i2_lo64, ILO32x4(EX(i2_7654), EX(i2_3210)));
				3760	assign(*i2, ILO64x2(EX(i2_hi64), EX(i2_lo64)));
				3761
				3762	XXXX(BaAa, BB, 0xa, AA, 0xa); // i1[15:14]
				3763	XXXX(C9B9, CC, 0x9, BB, 0x9);
				3764	assign(i1_FEDC, ILO16x8(EX(t_BaAa), EX(t_C9B9)));
				3765
				3766	XXXX(A9C8, AA, 0x9, CC, 0x8);
				3767	XXXX(B8A8, BB, 0x8, AA, 0x8);
				3768	assign(i1_BA98, ILO16x8(EX(t_A9C8), EX(t_B8A8)));
				3769	assign(i1_hi64, ILO32x4(EX(i1_FEDC), EX(i1_BA98)));
				3770
				3771	XXXX(C7B7, CC, 0x7, BB, 0x7);
				3772	XXXX(A7C6, AA, 0x7, CC, 0x6);
				3773	assign(i1_7654, ILO16x8(EX(t_C7B7), EX(t_A7C6)));
				3774
				3775	XXXX(B6A6, BB, 0x6, AA, 0x6);
				3776	XXXX(C5B5, CC, 0x5, BB, 0x5); // i1[1:0]
				3777	assign(i1_3210, ILO16x8(EX(t_B6A6), EX(t_C5B5)));
				3778	assign(i1_lo64, ILO32x4(EX(i1_7654), EX(i1_3210)));
				3779	assign(*i1, ILO64x2(EX(i1_hi64), EX(i1_lo64)));
				3780
				3781	XXXX(A5C4, AA, 0x5, CC, 0x4); // i0[15:14]
				3782	XXXX(B4A4, BB, 0x4, AA, 0x4);
				3783	assign(i0_FEDC, ILO16x8(EX(t_A5C4), EX(t_B4A4)));
				3784
				3785	XXXX(C3B3, CC, 0x3, BB, 0x3);
				3786	XXXX(A3C2, AA, 0x3, CC, 0x2);
				3787	assign(i0_BA98, ILO16x8(EX(t_C3B3), EX(t_A3C2)));
				3788	assign(i0_hi64, ILO32x4(EX(i0_FEDC), EX(i0_BA98)));
				3789
				3790	XXXX(B2A2, BB, 0x2, AA, 0x2);
				3791	XXXX(C1B1, CC, 0x1, BB, 0x1);
				3792	assign(i0_7654, ILO16x8(EX(t_B2A2), EX(t_C1B1)));
				3793
				3794	XXXX(A1C0, AA, 0x1, CC, 0x0);
				3795	XXXX(B0A0, BB, 0x0, AA, 0x0); // i0[1:0]
				3796	assign(i0_3210, ILO16x8(EX(t_A1C0), EX(t_B0A0)));
				3797	assign(i0_lo64, ILO32x4(EX(i0_7654), EX(i0_3210)));
				3798	assign(*i0, ILO64x2(EX(i0_hi64), EX(i0_lo64)));
				3799
				3800	# undef XXXX
				3801	return;
				3802	}
				3803
				3804	/NOTREACHED/
				3805	vassert(0);
				3806	}
				3807
				3808
				3809	/* Do interleaving for 4 128 bit vectors, for ST4 insns. */
				3810	static
				3811	void math_INTERLEAVE4_128(
				3812	/OUTx4/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
				3813	UInt laneSzBlg2,
				3814	IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
				3815	{
				3816	if (laneSzBlg2 == 3) {
				3817	// 64x2
				3818	assign(*i0, ILO64x2(EX(u1), EX(u0)));
				3819	assign(*i1, ILO64x2(EX(u3), EX(u2)));
				3820	assign(*i2, IHI64x2(EX(u1), EX(u0)));
				3821	assign(*i3, IHI64x2(EX(u3), EX(u2)));
				3822	return;
				3823	}
				3824	if (laneSzBlg2 == 2) {
				3825	// 32x4
				3826	// First, interleave at the 64-bit lane size.
				3827	IRTemp p0 = newTempV128();
				3828	IRTemp p1 = newTempV128();
				3829	IRTemp p2 = newTempV128();
				3830	IRTemp p3 = newTempV128();
				3831	math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 3, u0, u1, u2, u3);
				3832	// And interleave (cat) at the 32 bit size.
				3833	assign(*i0, CEV32x4(EX(p1), EX(p0)));
				3834	assign(*i1, COD32x4(EX(p1), EX(p0)));
				3835	assign(*i2, CEV32x4(EX(p3), EX(p2)));
				3836	assign(*i3, COD32x4(EX(p3), EX(p2)));
				3837	return;
				3838	}
				3839	if (laneSzBlg2 == 1) {
				3840	// 16x8
				3841	// First, interleave at the 32-bit lane size.
				3842	IRTemp p0 = newTempV128();
				3843	IRTemp p1 = newTempV128();
				3844	IRTemp p2 = newTempV128();
				3845	IRTemp p3 = newTempV128();
				3846	math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 2, u0, u1, u2, u3);
				3847	// And rearrange within each vector, to get the right 16 bit lanes.
				3848	assign(*i0, COD16x8(EX(p0), SHL(EX(p0), 2)));
				3849	assign(*i1, COD16x8(EX(p1), SHL(EX(p1), 2)));
				3850	assign(*i2, COD16x8(EX(p2), SHL(EX(p2), 2)));
				3851	assign(*i3, COD16x8(EX(p3), SHL(EX(p3), 2)));
				3852	return;
				3853	}
				3854	if (laneSzBlg2 == 0) {
				3855	// 8x16
				3856	// First, interleave at the 16-bit lane size.
				3857	IRTemp p0 = newTempV128();
				3858	IRTemp p1 = newTempV128();
				3859	IRTemp p2 = newTempV128();
				3860	IRTemp p3 = newTempV128();
				3861	math_INTERLEAVE4_128(&p0, &p1, &p2, &p3, 1, u0, u1, u2, u3);
				3862	// And rearrange within each vector, to get the right 8 bit lanes.
				3863	assign(*i0, IHI32x4(COD8x16(EX(p0),EX(p0)), CEV8x16(EX(p0),EX(p0))));
				3864	assign(*i1, IHI32x4(COD8x16(EX(p1),EX(p1)), CEV8x16(EX(p1),EX(p1))));
				3865	assign(*i2, IHI32x4(COD8x16(EX(p2),EX(p2)), CEV8x16(EX(p2),EX(p2))));
				3866	assign(*i3, IHI32x4(COD8x16(EX(p3),EX(p3)), CEV8x16(EX(p3),EX(p3))));
				3867	return;
				3868	}
				3869	/NOTREACHED/
				3870	vassert(0);
				3871	}
				3872
				3873
				3874	/* Do deinterleaving for 1 128 bit vector, for LD1 insns. */
				3875	static
				3876	void math_DEINTERLEAVE1_128( /OUTx1/ IRTemp* u0,
				3877	UInt laneSzBlg2, IRTemp i0 )
				3878	{
				3879	assign(*u0, mkexpr(i0));
				3880	}
				3881
				3882
				3883	/* Do deinterleaving for 2 128 bit vectors, for LD2 insns. */
				3884	static
				3885	void math_DEINTERLEAVE2_128( /OUTx2/ IRTemp* u0, IRTemp* u1,
				3886	UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
				3887	{
				3888	/* This is pretty easy, since we have primitives directly to
				3889	hand. */
				3890	if (laneSzBlg2 == 3) {
				3891	// 64x2
				3892	// i1 == B1 A1, i0 == B0 A0
				3893	// u1 == B1 B0, u0 == A1 A0
				3894	assign(*u0, binop(Iop_InterleaveLO64x2, mkexpr(i1), mkexpr(i0)));
				3895	assign(*u1, binop(Iop_InterleaveHI64x2, mkexpr(i1), mkexpr(i0)));
				3896	return;
				3897	}
				3898	if (laneSzBlg2 == 2) {
				3899	// 32x4
				3900	// i1 == B3 A3 B2 A2, i0 == B1 A1 B0 A0
				3901	// u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0,
				3902	assign(*u0, binop(Iop_CatEvenLanes32x4, mkexpr(i1), mkexpr(i0)));
				3903	assign(*u1, binop(Iop_CatOddLanes32x4, mkexpr(i1), mkexpr(i0)));
				3904	return;
				3905	}
				3906	if (laneSzBlg2 == 1) {
				3907	// 16x8
				3908	// i0 == B3 A3 B2 A2 B1 A1 B0 A0
				3909	// i1 == B7 A7 B6 A6 B5 A5 B4 A4
				3910	// u1 == B{7..0}, u0 == A{7..0}
				3911	assign(*u0, binop(Iop_CatEvenLanes16x8, mkexpr(i1), mkexpr(i0)));
				3912	assign(*u1, binop(Iop_CatOddLanes16x8, mkexpr(i1), mkexpr(i0)));
				3913	return;
				3914	}
				3915	if (laneSzBlg2 == 0) {
				3916	// 8x16
				3917	// i0 == B7 A7 B6 A6 B5 A5 B4 A4 B3 A3 B2 A2 B1 A1 B0 A0
				3918	// i1 == Bf Af Be Ae Bd Ad Bc Ac Bb Ab Ba Aa B9 A9 B8 A8
				3919	// u1 == B{f..0}, u0 == A{f..0}
				3920	assign(*u0, binop(Iop_CatEvenLanes8x16, mkexpr(i1), mkexpr(i0)));
				3921	assign(*u1, binop(Iop_CatOddLanes8x16, mkexpr(i1), mkexpr(i0)));
				3922	return;
				3923	}
				3924	/NOTREACHED/
				3925	vassert(0);
				3926	}
				3927
				3928
				3929	/* Do deinterleaving for 3 128 bit vectors, for LD3 insns. */
				3930	static
				3931	void math_DEINTERLEAVE3_128(
				3932	/OUTx3/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
				3933	UInt laneSzBlg2,
				3934	IRTemp i0, IRTemp i1, IRTemp i2 )
				3935	{
				3936	if (laneSzBlg2 == 3) {
				3937	// 64x2
				3938	// i2 == C1 B1, i1 == A1 C0, i0 == B0 A0,
				3939	// u2 == C1 C0, u1 == B1 B0, u0 == A1 A0
				3940	assign(*u2, ILO64x2( ROL(EX(i2),8), EX(i1) ));
				3941	assign(*u1, ILO64x2( EX(i2), ROL(EX(i0),8) ));
				3942	assign(*u0, ILO64x2( ROL(EX(i1),8), EX(i0) ));
				3943	return;
				3944	}
				3945
				3946	if (laneSzBlg2 == 2) {
				3947	// 32x4
				3948	// i2 == C3 B3 A2 C2, i1 == B2 A2 C1 B1, i0 == A1 C0 B0 A0
				3949	// p2 == C3 C2 B3 B2, p1 == A3 A2 C1 C0, p0 == B1 B0 A1 A0
				3950	// u2 == C3 C2 C1 C0, u1 == B3 B2 B1 B0, u0 == A3 A2 A1 A0
				3951	IRTemp t_a1c0b0a0 = newTempV128();
				3952	IRTemp t_a2c1b1a1 = newTempV128();
				3953	IRTemp t_a3c2b2a2 = newTempV128();
				3954	IRTemp t_a0c3b3a3 = newTempV128();
				3955	IRTemp p0 = newTempV128();
				3956	IRTemp p1 = newTempV128();
				3957	IRTemp p2 = newTempV128();
				3958	// Compute some intermediate values.
				3959	assign(t_a1c0b0a0, EX(i0));
				3960	assign(t_a2c1b1a1, SL(EX(i1),EX(i0),3*4));
				3961	assign(t_a3c2b2a2, SL(EX(i2),EX(i1),2*4));
				3962	assign(t_a0c3b3a3, SL(EX(i0),EX(i2),1*4));
				3963	// First deinterleave into lane-pairs
				3964	assign(p0, ILO32x4(EX(t_a2c1b1a1),EX(t_a1c0b0a0)));
				3965	assign(p1, ILO64x2(ILO32x4(EX(t_a0c3b3a3), EX(t_a3c2b2a2)),
				3966	IHI32x4(EX(t_a2c1b1a1), EX(t_a1c0b0a0))));
				3967	assign(p2, ILO32x4(ROR(EX(t_a0c3b3a3),14), ROR(EX(t_a3c2b2a2),14)));
				3968	// Then deinterleave at 64x2 granularity.
				3969	math_DEINTERLEAVE3_128(u0, u1, u2, 3, p0, p1, p2);
				3970	return;
				3971	}
				3972
				3973	if (laneSzBlg2 == 1) {
				3974	// 16x8
				3975	// u2 == C7 C6 C5 C4 C3 C2 C1 C0
				3976	// u1 == B7 B6 B5 B4 B3 B2 B1 B0
				3977	// u0 == A7 A6 A5 A4 A3 A2 A1 A0
				3978	//
				3979	// i2 == C7 B7 A7 C6 B6 A6 C5 B5
				3980	// i1 == A5 C4 B4 A4 C4 B3 A3 C2
				3981	// i0 == B2 A2 C1 B1 A1 C0 B0 A0
				3982	//
				3983	// p2 == C7 C6 B7 B6 A7 A6 C5 C4
				3984	// p1 == B5 B4 A5 A4 C3 C2 B3 B2
				3985	// p0 == A3 A2 C1 C0 B1 B0 A1 A0
				3986
				3987	IRTemp s0, s1, s2, s3, t0, t1, t2, t3, p0, p1, p2, c00111111;
				3988	s0 = s1 = s2 = s3
				3989	= t0 = t1 = t2 = t3 = p0 = p1 = p2 = c00111111 = IRTemp_INVALID;
				3990	newTempsV128_4(&s0, &s1, &s2, &s3);
				3991	newTempsV128_4(&t0, &t1, &t2, &t3);
				3992	newTempsV128_4(&p0, &p1, &p2, &c00111111);
				3993
				3994	// s0 == b2a2 c1b1a1 c0b0a0
				3995	// s1 == b4a4 c3b3c3 c2b2a2
				3996	// s2 == b6a6 c5b5a5 c4b4a4
				3997	// s3 == b0a0 c7b7a7 c6b6a6
				3998	assign(s0, EX(i0));
				3999	assign(s1, SL(EX(i1),EX(i0),6*2));
				4000	assign(s2, SL(EX(i2),EX(i1),4*2));
				4001	assign(s3, SL(EX(i0),EX(i2),2*2));
				4002
				4003	// t0 == 0 0 c1c0 b1b0 a1a0
				4004	// t1 == 0 0 c3c2 b3b2 a3a2
				4005	// t2 == 0 0 c5c4 b5b4 a5a4
				4006	// t3 == 0 0 c7c6 b7b6 a7a6
				4007	assign(c00111111, mkV128(0x0FFF));
				4008	assign(t0, AND( ILO16x8( ROR(EX(s0),3*2), EX(s0)), EX(c00111111)));
				4009	assign(t1, AND( ILO16x8( ROR(EX(s1),3*2), EX(s1)), EX(c00111111)));
				4010	assign(t2, AND( ILO16x8( ROR(EX(s2),3*2), EX(s2)), EX(c00111111)));
				4011	assign(t3, AND( ILO16x8( ROR(EX(s3),3*2), EX(s3)), EX(c00111111)));
				4012
				4013	assign(p0, OR2(EX(t0), SHL(EX(t1),6*2)));
				4014	assign(p1, OR2(SHL(EX(t2),42), SHR(EX(t1),22)));
				4015	assign(p2, OR2(SHL(EX(t3),22), SHR(EX(t2),42)));
				4016
				4017	// Then deinterleave at 32x4 granularity.
				4018	math_DEINTERLEAVE3_128(u0, u1, u2, 2, p0, p1, p2);
				4019	return;
				4020	}
				4021
				4022	if (laneSzBlg2 == 0) {
				4023	// 8x16. This is the same scheme as for 16x8, with twice the
				4024	// number of intermediate values.
				4025	//
				4026	// u2 == C{f..0}
				4027	// u1 == B{f..0}
				4028	// u0 == A{f..0}
				4029	//
				4030	// i2 == CBA{f} CBA{e} CBA{d} CBA{c} CBA{b} C{a}
				4031	// i1 == BA{a} CBA{9} CBA{8} CBA{7} CBA{6} CB{5}
				4032	// i0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
				4033	//
				4034	// p2 == C{fe} B{fe} A{fe} C{dc} B{dc} A{dc} C{ba} B{ba}
				4035	// p1 == A{ba} C{98} B{98} A{98} C{76} B{76} A{76} C{54}
				4036	// p0 == B{54} A{54} C{32} B{32} A{32} C{10} B{10} A{10}
				4037	//
				4038	IRTemp s0, s1, s2, s3, s4, s5, s6, s7,
				4039	t0, t1, t2, t3, t4, t5, t6, t7, p0, p1, p2, cMASK;
				4040	s0 = s1 = s2 = s3 = s4 = s5 = s6 = s7
				4041	= t0 = t1 = t2 = t3 = t4 = t5 = t6 = t7 = p0 = p1 = p2 = cMASK
				4042	= IRTemp_INVALID;
				4043	newTempsV128_4(&s0, &s1, &s2, &s3);
				4044	newTempsV128_4(&s4, &s5, &s6, &s7);
				4045	newTempsV128_4(&t0, &t1, &t2, &t3);
				4046	newTempsV128_4(&t4, &t5, &t6, &t7);
				4047	newTempsV128_4(&p0, &p1, &p2, &cMASK);
				4048
				4049	// s0 == A{5} CBA{4} CBA{3} CBA{2} CBA{1} CBA{0}
				4050	// s1 == A{7} CBA{6} CBA{5} CBA{4} CBA{3} CBA{2}
				4051	// s2 == A{9} CBA{8} CBA{7} CBA{6} CBA{5} CBA{4}
				4052	// s3 == A{b} CBA{a} CBA{9} CBA{8} CBA{7} CBA{6}
				4053	// s4 == A{d} CBA{c} CBA{b} CBA{a} CBA{9} CBA{8}
				4054	// s5 == A{f} CBA{e} CBA{d} CBA{c} CBA{b} CBA{a}
				4055	// s6 == A{1} CBA{0} CBA{f} CBA{e} CBA{d} CBA{c}
				4056	// s7 == A{3} CBA{2} CBA{1} CBA{0} CBA{f} CBA{e}
				4057	assign(s0, SL(EX(i1),EX(i0), 0));
				4058	assign(s1, SL(EX(i1),EX(i0), 6));
				4059	assign(s2, SL(EX(i1),EX(i0),12));
				4060	assign(s3, SL(EX(i2),EX(i1), 2));
				4061	assign(s4, SL(EX(i2),EX(i1), 8));
				4062	assign(s5, SL(EX(i2),EX(i1),14));
				4063	assign(s6, SL(EX(i0),EX(i2), 4));
				4064	assign(s7, SL(EX(i0),EX(i2),10));
				4065
				4066	// t0 == 0--(ten)--0 C1 C0 B1 B0 A1 A0
				4067	// t1 == 0--(ten)--0 C3 C2 B3 B2 A3 A2
				4068	// t2 == 0--(ten)--0 C5 C4 B5 B4 A5 A4
				4069	// t3 == 0--(ten)--0 C7 C6 B7 B6 A7 A6
				4070	// t4 == 0--(ten)--0 C9 C8 B9 B8 A9 A8
				4071	// t5 == 0--(ten)--0 Cb Ca Bb Ba Ab Aa
				4072	// t6 == 0--(ten)--0 Cd Cc Bd Bc Ad Ac
				4073	// t7 == 0--(ten)--0 Cf Ce Bf Be Af Ae
				4074	assign(cMASK, mkV128(0x003F));
				4075	assign(t0, AND( ILO8x16( ROR(EX(s0),3), EX(s0)), EX(cMASK)));
				4076	assign(t1, AND( ILO8x16( ROR(EX(s1),3), EX(s1)), EX(cMASK)));
				4077	assign(t2, AND( ILO8x16( ROR(EX(s2),3), EX(s2)), EX(cMASK)));
				4078	assign(t3, AND( ILO8x16( ROR(EX(s3),3), EX(s3)), EX(cMASK)));
				4079	assign(t4, AND( ILO8x16( ROR(EX(s4),3), EX(s4)), EX(cMASK)));
				4080	assign(t5, AND( ILO8x16( ROR(EX(s5),3), EX(s5)), EX(cMASK)));
				4081	assign(t6, AND( ILO8x16( ROR(EX(s6),3), EX(s6)), EX(cMASK)));
				4082	assign(t7, AND( ILO8x16( ROR(EX(s7),3), EX(s7)), EX(cMASK)));
				4083
				4084	assign(p0, OR3( SHL(EX(t2),12), SHL(EX(t1),6), EX(t0) ));
				4085	assign(p1, OR4( SHL(EX(t5),14), SHL(EX(t4),8),
				4086	SHL(EX(t3),2), SHR(EX(t2),4) ));
				4087	assign(p2, OR3( SHL(EX(t7),10), SHL(EX(t6),4), SHR(EX(t5),2) ));
				4088
				4089	// Then deinterleave at 16x8 granularity.
				4090	math_DEINTERLEAVE3_128(u0, u1, u2, 1, p0, p1, p2);
				4091	return;
				4092	}
				4093
				4094	/NOTREACHED/
				4095	vassert(0);
				4096	}
				4097
				4098
				4099	/* Do deinterleaving for 4 128 bit vectors, for LD4 insns. */
				4100	static
				4101	void math_DEINTERLEAVE4_128(
				4102	/OUTx4/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
				4103	UInt laneSzBlg2,
				4104	IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
				4105	{
				4106	if (laneSzBlg2 == 3) {
				4107	// 64x2
				4108	assign(*u0, ILO64x2(EX(i2), EX(i0)));
				4109	assign(*u1, IHI64x2(EX(i2), EX(i0)));
				4110	assign(*u2, ILO64x2(EX(i3), EX(i1)));
				4111	assign(*u3, IHI64x2(EX(i3), EX(i1)));
				4112	return;
				4113	}
				4114	if (laneSzBlg2 == 2) {
				4115	// 32x4
				4116	IRTemp p0 = newTempV128();
				4117	IRTemp p2 = newTempV128();
				4118	IRTemp p1 = newTempV128();
				4119	IRTemp p3 = newTempV128();
				4120	assign(p0, ILO32x4(EX(i1), EX(i0)));
				4121	assign(p1, IHI32x4(EX(i1), EX(i0)));
				4122	assign(p2, ILO32x4(EX(i3), EX(i2)));
				4123	assign(p3, IHI32x4(EX(i3), EX(i2)));
				4124	// And now do what we did for the 64-bit case.
				4125	math_DEINTERLEAVE4_128(u0, u1, u2, u3, 3, p0, p1, p2, p3);
				4126	return;
				4127	}
				4128	if (laneSzBlg2 == 1) {
				4129	// 16x8
				4130	// Deinterleave into 32-bit chunks, then do as the 32-bit case.
				4131	IRTemp p0 = newTempV128();
				4132	IRTemp p1 = newTempV128();
				4133	IRTemp p2 = newTempV128();
				4134	IRTemp p3 = newTempV128();
				4135	assign(p0, IHI16x8(EX(i0), SHL(EX(i0), 8)));
				4136	assign(p1, IHI16x8(EX(i1), SHL(EX(i1), 8)));
				4137	assign(p2, IHI16x8(EX(i2), SHL(EX(i2), 8)));
				4138	assign(p3, IHI16x8(EX(i3), SHL(EX(i3), 8)));
				4139	// From here on is like the 32 bit case.
				4140	math_DEINTERLEAVE4_128(u0, u1, u2, u3, 2, p0, p1, p2, p3);
				4141	return;
				4142	}
				4143	if (laneSzBlg2 == 0) {
				4144	// 8x16
				4145	// Deinterleave into 16-bit chunks, then do as the 16-bit case.
				4146	IRTemp p0 = newTempV128();
				4147	IRTemp p1 = newTempV128();
				4148	IRTemp p2 = newTempV128();
				4149	IRTemp p3 = newTempV128();
				4150	assign(p0, IHI64x2( IHI8x16(EX(i0),ROL(EX(i0),4)),
				4151	ILO8x16(EX(i0),ROL(EX(i0),4)) ));
				4152	assign(p1, IHI64x2( IHI8x16(EX(i1),ROL(EX(i1),4)),
				4153	ILO8x16(EX(i1),ROL(EX(i1),4)) ));
				4154	assign(p2, IHI64x2( IHI8x16(EX(i2),ROL(EX(i2),4)),
				4155	ILO8x16(EX(i2),ROL(EX(i2),4)) ));
				4156	assign(p3, IHI64x2( IHI8x16(EX(i3),ROL(EX(i3),4)),
				4157	ILO8x16(EX(i3),ROL(EX(i3),4)) ));
				4158	// From here on is like the 16 bit case.
				4159	math_DEINTERLEAVE4_128(u0, u1, u2, u3, 1, p0, p1, p2, p3);
				4160	return;
				4161	}
				4162	/NOTREACHED/
				4163	vassert(0);
				4164	}
				4165
				4166
				4167	/* Wrappers that use the full-width (de)interleavers to do half-width
				4168	(de)interleaving. The scheme is to clone each input lane in the
				4169	lower half of each incoming value, do a full width (de)interleave
				4170	at the next lane size up, and remove every other lane of the the
				4171	result. The returned values may have any old junk in the upper
				4172	64 bits -- the caller must ignore that. */
				4173
				4174	/* Helper function -- get doubling and narrowing operations. */
				4175	static
				4176	void math_get_doubler_and_halver ( /OUT/IROp* doubler,
				4177	/OUT/IROp* halver,
				4178	UInt laneSzBlg2 )
				4179	{
				4180	switch (laneSzBlg2) {
				4181	case 2:
				4182	doubler = Iop_InterleaveLO32x4; halver = Iop_CatEvenLanes32x4;
				4183	break;
				4184	case 1:
				4185	doubler = Iop_InterleaveLO16x8; halver = Iop_CatEvenLanes16x8;
				4186	break;
				4187	case 0:
				4188	doubler = Iop_InterleaveLO8x16; halver = Iop_CatEvenLanes8x16;
				4189	break;
				4190	default:
				4191	vassert(0);
				4192	}
				4193	}
				4194
				4195	/* Do interleaving for 1 64 bit vector, for ST1 insns. */
				4196	static
				4197	void math_INTERLEAVE1_64( /OUTx1/ IRTemp* i0,
				4198	UInt laneSzBlg2, IRTemp u0 )
				4199	{
				4200	assign(*i0, mkexpr(u0));
				4201	}
				4202
				4203
				4204	/* Do interleaving for 2 64 bit vectors, for ST2 insns. */
				4205	static
				4206	void math_INTERLEAVE2_64( /OUTx2/ IRTemp* i0, IRTemp* i1,
				4207	UInt laneSzBlg2, IRTemp u0, IRTemp u1 )
				4208	{
				4209	if (laneSzBlg2 == 3) {
				4210	// 1x64, degenerate case
				4211	assign(*i0, EX(u0));
				4212	assign(*i1, EX(u1));
				4213	return;
				4214	}
				4215
				4216	vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
				4217	IROp doubler = Iop_INVALID, halver = Iop_INVALID;
				4218	math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
				4219
				4220	IRTemp du0 = newTempV128();
				4221	IRTemp du1 = newTempV128();
				4222	assign(du0, binop(doubler, EX(u0), EX(u0)));
				4223	assign(du1, binop(doubler, EX(u1), EX(u1)));
				4224	IRTemp di0 = newTempV128();
				4225	IRTemp di1 = newTempV128();
				4226	math_INTERLEAVE2_128(&di0, &di1, laneSzBlg2 + 1, du0, du1);
				4227	assign(*i0, binop(halver, EX(di0), EX(di0)));
				4228	assign(*i1, binop(halver, EX(di1), EX(di1)));
				4229	}
				4230
				4231
				4232	/* Do interleaving for 3 64 bit vectors, for ST3 insns. */
				4233	static
				4234	void math_INTERLEAVE3_64(
				4235	/OUTx3/ IRTemp* i0, IRTemp* i1, IRTemp* i2,
				4236	UInt laneSzBlg2,
				4237	IRTemp u0, IRTemp u1, IRTemp u2 )
				4238	{
				4239	if (laneSzBlg2 == 3) {
				4240	// 1x64, degenerate case
				4241	assign(*i0, EX(u0));
				4242	assign(*i1, EX(u1));
				4243	assign(*i2, EX(u2));
				4244	return;
				4245	}
				4246
				4247	vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
				4248	IROp doubler = Iop_INVALID, halver = Iop_INVALID;
				4249	math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
				4250
				4251	IRTemp du0 = newTempV128();
				4252	IRTemp du1 = newTempV128();
				4253	IRTemp du2 = newTempV128();
				4254	assign(du0, binop(doubler, EX(u0), EX(u0)));
				4255	assign(du1, binop(doubler, EX(u1), EX(u1)));
				4256	assign(du2, binop(doubler, EX(u2), EX(u2)));
				4257	IRTemp di0 = newTempV128();
				4258	IRTemp di1 = newTempV128();
				4259	IRTemp di2 = newTempV128();
				4260	math_INTERLEAVE3_128(&di0, &di1, &di2, laneSzBlg2 + 1, du0, du1, du2);
				4261	assign(*i0, binop(halver, EX(di0), EX(di0)));
				4262	assign(*i1, binop(halver, EX(di1), EX(di1)));
				4263	assign(*i2, binop(halver, EX(di2), EX(di2)));
				4264	}
				4265
				4266
				4267	/* Do interleaving for 4 64 bit vectors, for ST4 insns. */
				4268	static
				4269	void math_INTERLEAVE4_64(
				4270	/OUTx4/ IRTemp* i0, IRTemp* i1, IRTemp* i2, IRTemp* i3,
				4271	UInt laneSzBlg2,
				4272	IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3 )
				4273	{
				4274	if (laneSzBlg2 == 3) {
				4275	// 1x64, degenerate case
				4276	assign(*i0, EX(u0));
				4277	assign(*i1, EX(u1));
				4278	assign(*i2, EX(u2));
				4279	assign(*i3, EX(u3));
				4280	return;
				4281	}
				4282
				4283	vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
				4284	IROp doubler = Iop_INVALID, halver = Iop_INVALID;
				4285	math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
				4286
				4287	IRTemp du0 = newTempV128();
				4288	IRTemp du1 = newTempV128();
				4289	IRTemp du2 = newTempV128();
				4290	IRTemp du3 = newTempV128();
				4291	assign(du0, binop(doubler, EX(u0), EX(u0)));
				4292	assign(du1, binop(doubler, EX(u1), EX(u1)));
				4293	assign(du2, binop(doubler, EX(u2), EX(u2)));
				4294	assign(du3, binop(doubler, EX(u3), EX(u3)));
				4295	IRTemp di0 = newTempV128();
				4296	IRTemp di1 = newTempV128();
				4297	IRTemp di2 = newTempV128();
				4298	IRTemp di3 = newTempV128();
				4299	math_INTERLEAVE4_128(&di0, &di1, &di2, &di3,
				4300	laneSzBlg2 + 1, du0, du1, du2, du3);
				4301	assign(*i0, binop(halver, EX(di0), EX(di0)));
				4302	assign(*i1, binop(halver, EX(di1), EX(di1)));
				4303	assign(*i2, binop(halver, EX(di2), EX(di2)));
				4304	assign(*i3, binop(halver, EX(di3), EX(di3)));
				4305	}
				4306
				4307
				4308	/* Do deinterleaving for 1 64 bit vector, for LD1 insns. */
				4309	static
				4310	void math_DEINTERLEAVE1_64( /OUTx1/ IRTemp* u0,
				4311	UInt laneSzBlg2, IRTemp i0 )
				4312	{
				4313	assign(*u0, mkexpr(i0));
				4314	}
				4315
				4316
				4317	/* Do deinterleaving for 2 64 bit vectors, for LD2 insns. */
				4318	static
				4319	void math_DEINTERLEAVE2_64( /OUTx2/ IRTemp* u0, IRTemp* u1,
				4320	UInt laneSzBlg2, IRTemp i0, IRTemp i1 )
				4321	{
				4322	if (laneSzBlg2 == 3) {
				4323	// 1x64, degenerate case
				4324	assign(*u0, EX(i0));
				4325	assign(*u1, EX(i1));
				4326	return;
				4327	}
				4328
				4329	vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
				4330	IROp doubler = Iop_INVALID, halver = Iop_INVALID;
				4331	math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
				4332
				4333	IRTemp di0 = newTempV128();
				4334	IRTemp di1 = newTempV128();
				4335	assign(di0, binop(doubler, EX(i0), EX(i0)));
				4336	assign(di1, binop(doubler, EX(i1), EX(i1)));
				4337
				4338	IRTemp du0 = newTempV128();
				4339	IRTemp du1 = newTempV128();
				4340	math_DEINTERLEAVE2_128(&du0, &du1, laneSzBlg2 + 1, di0, di1);
				4341	assign(*u0, binop(halver, EX(du0), EX(du0)));
				4342	assign(*u1, binop(halver, EX(du1), EX(du1)));
				4343	}
				4344
				4345
				4346	/* Do deinterleaving for 3 64 bit vectors, for LD3 insns. */
				4347	static
				4348	void math_DEINTERLEAVE3_64(
				4349	/OUTx3/ IRTemp* u0, IRTemp* u1, IRTemp* u2,
				4350	UInt laneSzBlg2,
				4351	IRTemp i0, IRTemp i1, IRTemp i2 )
				4352	{
				4353	if (laneSzBlg2 == 3) {
				4354	// 1x64, degenerate case
				4355	assign(*u0, EX(i0));
				4356	assign(*u1, EX(i1));
				4357	assign(*u2, EX(i2));
				4358	return;
				4359	}
				4360
				4361	vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
				4362	IROp doubler = Iop_INVALID, halver = Iop_INVALID;
				4363	math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
				4364
				4365	IRTemp di0 = newTempV128();
				4366	IRTemp di1 = newTempV128();
				4367	IRTemp di2 = newTempV128();
				4368	assign(di0, binop(doubler, EX(i0), EX(i0)));
				4369	assign(di1, binop(doubler, EX(i1), EX(i1)));
				4370	assign(di2, binop(doubler, EX(i2), EX(i2)));
				4371	IRTemp du0 = newTempV128();
				4372	IRTemp du1 = newTempV128();
				4373	IRTemp du2 = newTempV128();
				4374	math_DEINTERLEAVE3_128(&du0, &du1, &du2, laneSzBlg2 + 1, di0, di1, di2);
				4375	assign(*u0, binop(halver, EX(du0), EX(du0)));
				4376	assign(*u1, binop(halver, EX(du1), EX(du1)));
				4377	assign(*u2, binop(halver, EX(du2), EX(du2)));
				4378	}
				4379
				4380
				4381	/* Do deinterleaving for 4 64 bit vectors, for LD4 insns. */
				4382	static
				4383	void math_DEINTERLEAVE4_64(
				4384	/OUTx4/ IRTemp* u0, IRTemp* u1, IRTemp* u2, IRTemp* u3,
				4385	UInt laneSzBlg2,
				4386	IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3 )
				4387	{
				4388	if (laneSzBlg2 == 3) {
				4389	// 1x64, degenerate case
				4390	assign(*u0, EX(i0));
				4391	assign(*u1, EX(i1));
				4392	assign(*u2, EX(i2));
				4393	assign(*u3, EX(i3));
				4394	return;
				4395	}
				4396
				4397	vassert(laneSzBlg2 >= 0 && laneSzBlg2 <= 2);
				4398	IROp doubler = Iop_INVALID, halver = Iop_INVALID;
				4399	math_get_doubler_and_halver(&doubler, &halver, laneSzBlg2);
				4400
				4401	IRTemp di0 = newTempV128();
				4402	IRTemp di1 = newTempV128();
				4403	IRTemp di2 = newTempV128();
				4404	IRTemp di3 = newTempV128();
				4405	assign(di0, binop(doubler, EX(i0), EX(i0)));
				4406	assign(di1, binop(doubler, EX(i1), EX(i1)));
				4407	assign(di2, binop(doubler, EX(i2), EX(i2)));
				4408	assign(di3, binop(doubler, EX(i3), EX(i3)));
				4409	IRTemp du0 = newTempV128();
				4410	IRTemp du1 = newTempV128();
				4411	IRTemp du2 = newTempV128();
				4412	IRTemp du3 = newTempV128();
				4413	math_DEINTERLEAVE4_128(&du0, &du1, &du2, &du3,
				4414	laneSzBlg2 + 1, di0, di1, di2, di3);
				4415	assign(*u0, binop(halver, EX(du0), EX(du0)));
				4416	assign(*u1, binop(halver, EX(du1), EX(du1)));
				4417	assign(*u2, binop(halver, EX(du2), EX(du2)));
				4418	assign(*u3, binop(halver, EX(du3), EX(du3)));
				4419	}
				4420
				4421
				4422	#undef EX
				4423	#undef SL
				4424	#undef ROR
				4425	#undef ROL
				4426	#undef SHR
				4427	#undef SHL
				4428	#undef ILO64x2
				4429	#undef IHI64x2
				4430	#undef ILO32x4
				4431	#undef IHI32x4
				4432	#undef ILO16x8
				4433	#undef IHI16x8
				4434	#undef ILO16x8
				4435	#undef IHI16x8
				4436	#undef CEV32x4
				4437	#undef COD32x4
				4438	#undef COD16x8
				4439	#undef COD8x16
				4440	#undef CEV8x16
				4441	#undef AND
				4442	#undef OR2
				4443	#undef OR3
				4444	#undef OR4
				4445
				4446
				4447	/------------------------------------------------------------/
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4448	/--- Load and Store instructions ---/
				4449	/------------------------------------------------------------/
				4450
				4451	/* Generate the EA for a "reg + reg" style amode. This is done from
				4452	parts of the insn, but for sanity checking sake it takes the whole
				4453	insn. This appears to depend on insn[15:12], with opt=insn[15:13]
				4454	and S=insn[12]:
				4455
				4456	The possible forms, along with their opt:S values, are:
				4457	011:0 Xn\|SP + Xm
				4458	111:0 Xn\|SP + Xm
				4459	011:1 Xn\|SP + Xm * transfer_szB
				4460	111:1 Xn\|SP + Xm * transfer_szB
				4461	010:0 Xn\|SP + 32Uto64(Wm)
				4462	010:1 Xn\|SP + 32Uto64(Wm) * transfer_szB
				4463	110:0 Xn\|SP + 32Sto64(Wm)
				4464	110:1 Xn\|SP + 32Sto64(Wm) * transfer_szB
				4465
				4466	Rm is insn[20:16]. Rn is insn[9:5]. Rt is insn[4:0]. Log2 of
				4467	the transfer size is insn[23,31,30]. For integer loads/stores,
				4468	insn[23] is zero, hence szLg2 can be at most 3 in such cases.
				4469
				4470	If the decoding fails, it returns IRTemp_INVALID.
				4471
				4472	isInt is True iff this is decoding is for transfers to/from integer
				4473	registers. If False it is for transfers to/from vector registers.
				4474	*/
				4475	static IRTemp gen_indexed_EA ( /OUT/HChar* buf, UInt insn, Bool isInt )
				4476	{
				4477	UInt optS = SLICE_UInt(insn, 15, 12);
				4478	UInt mm = SLICE_UInt(insn, 20, 16);
				4479	UInt nn = SLICE_UInt(insn, 9, 5);
				4480	UInt szLg2 = (isInt ? 0 : (SLICE_UInt(insn, 23, 23) << 2))
				4481	\| SLICE_UInt(insn, 31, 30); // Log2 of the size
				4482
				4483	buf[0] = 0;
				4484
				4485	/* Sanity checks, that this really is a load/store insn. */
				4486	if (SLICE_UInt(insn, 11, 10) != BITS2(1,0))
				4487	goto fail;
				4488
				4489	if (isInt
				4490	&& SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,1,1)/LDR/
				4491	&& SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,0,0,1)/STR/
				4492	&& SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,0,1)/LDRSbhw Xt/
				4493	&& SLICE_UInt(insn, 29, 21) != BITS9(1,1,1,0,0,0,1,1,1))/LDRSbhw Wt/
				4494	goto fail;
				4495
				4496	if (!isInt
				4497	&& SLICE_UInt(insn, 29, 24) != BITS6(1,1,1,1,0,0)) /LDR/STR/
				4498	goto fail;
				4499
				4500	/* Throw out non-verified but possibly valid cases. */
				4501	switch (szLg2) {
				4502	case BITS3(0,0,0): break; // 8 bit, valid for both int and vec
				4503	case BITS3(0,0,1): break; // 16 bit, valid for both int and vec
				4504	case BITS3(0,1,0): break; // 32 bit, valid for both int and vec
				4505	case BITS3(0,1,1): break; // 64 bit, valid for both int and vec
				4506	case BITS3(1,0,0): // can only ever be valid for the vector case
sewardj	208a776	2014-10-22 13:52:51 +0000	[diff] [blame]	4507	if (isInt) goto fail; else break;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4508	case BITS3(1,0,1): // these sizes are never valid
				4509	case BITS3(1,1,0):
				4510	case BITS3(1,1,1): goto fail;
				4511
				4512	default: vassert(0);
				4513	}
				4514
				4515	IRExpr* rhs = NULL;
				4516	switch (optS) {
				4517	case BITS4(1,1,1,0): goto fail; //ATC
				4518	case BITS4(0,1,1,0):
				4519	rhs = getIReg64orZR(mm);
				4520	vex_sprintf(buf, "[%s, %s]",
				4521	nameIReg64orZR(nn), nameIReg64orZR(mm));
				4522	break;
				4523	case BITS4(1,1,1,1): goto fail; //ATC
				4524	case BITS4(0,1,1,1):
				4525	rhs = binop(Iop_Shl64, getIReg64orZR(mm), mkU8(szLg2));
				4526	vex_sprintf(buf, "[%s, %s lsl %u]",
				4527	nameIReg64orZR(nn), nameIReg64orZR(mm), szLg2);
				4528	break;
				4529	case BITS4(0,1,0,0):
				4530	rhs = unop(Iop_32Uto64, getIReg32orZR(mm));
				4531	vex_sprintf(buf, "[%s, %s uxtx]",
				4532	nameIReg64orZR(nn), nameIReg32orZR(mm));
				4533	break;
				4534	case BITS4(0,1,0,1):
				4535	rhs = binop(Iop_Shl64,
				4536	unop(Iop_32Uto64, getIReg32orZR(mm)), mkU8(szLg2));
				4537	vex_sprintf(buf, "[%s, %s uxtx, lsl %u]",
				4538	nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
				4539	break;
				4540	case BITS4(1,1,0,0):
				4541	rhs = unop(Iop_32Sto64, getIReg32orZR(mm));
				4542	vex_sprintf(buf, "[%s, %s sxtx]",
				4543	nameIReg64orZR(nn), nameIReg32orZR(mm));
				4544	break;
				4545	case BITS4(1,1,0,1):
				4546	rhs = binop(Iop_Shl64,
				4547	unop(Iop_32Sto64, getIReg32orZR(mm)), mkU8(szLg2));
				4548	vex_sprintf(buf, "[%s, %s sxtx, lsl %u]",
				4549	nameIReg64orZR(nn), nameIReg32orZR(mm), szLg2);
				4550	break;
				4551	default:
				4552	/* The rest appear to be genuinely invalid */
				4553	goto fail;
				4554	}
				4555
				4556	vassert(rhs);
				4557	IRTemp res = newTemp(Ity_I64);
				4558	assign(res, binop(Iop_Add64, getIReg64orSP(nn), rhs));
				4559	return res;
				4560
				4561	fail:
				4562	vex_printf("gen_indexed_EA: unhandled case optS == 0x%x\n", optS);
				4563	return IRTemp_INVALID;
				4564	}
				4565
				4566
				4567	/* Generate an 8/16/32/64 bit integer store to ADDR for the lowest
				4568	bits of DATAE :: Ity_I64. */
				4569	static void gen_narrowing_store ( UInt szB, IRTemp addr, IRExpr* dataE )
				4570	{
				4571	IRExpr* addrE = mkexpr(addr);
				4572	switch (szB) {
				4573	case 8:
				4574	storeLE(addrE, dataE);
				4575	break;
				4576	case 4:
				4577	storeLE(addrE, unop(Iop_64to32, dataE));
				4578	break;
				4579	case 2:
				4580	storeLE(addrE, unop(Iop_64to16, dataE));
				4581	break;
				4582	case 1:
				4583	storeLE(addrE, unop(Iop_64to8, dataE));
				4584	break;
				4585	default:
				4586	vassert(0);
				4587	}
				4588	}
				4589
				4590
				4591	/* Generate an 8/16/32/64 bit unsigned widening load from ADDR,
				4592	placing the result in an Ity_I64 temporary. */
				4593	static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
				4594	{
				4595	IRTemp res = newTemp(Ity_I64);
				4596	IRExpr* addrE = mkexpr(addr);
				4597	switch (szB) {
				4598	case 8:
				4599	assign(res, loadLE(Ity_I64,addrE));
				4600	break;
				4601	case 4:
				4602	assign(res, unop(Iop_32Uto64, loadLE(Ity_I32,addrE)));
				4603	break;
				4604	case 2:
				4605	assign(res, unop(Iop_16Uto64, loadLE(Ity_I16,addrE)));
				4606	break;
				4607	case 1:
				4608	assign(res, unop(Iop_8Uto64, loadLE(Ity_I8,addrE)));
				4609	break;
				4610	default:
				4611	vassert(0);
				4612	}
				4613	return res;
				4614	}
				4615
				4616
sewardj	18bf517	2014-06-14 18:05:30 +0000	[diff] [blame]	4617	/* Generate a "standard 7" name, from bitQ and size. But also
				4618	allow ".1d" since that's occasionally useful. */
				4619	static
				4620	const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
				4621	{
				4622	vassert(bitQ <= 1 && size <= 3);
				4623	const HChar* nms[8]
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	4624	= { "8b", "4h", "2s", "1d", "16b", "8h", "4s", "2d" };
sewardj	18bf517	2014-06-14 18:05:30 +0000	[diff] [blame]	4625	UInt ix = (bitQ << 2) \| size;
				4626	vassert(ix < 8);
				4627	return nms[ix];
				4628	}
				4629
				4630
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4631	static
				4632	Bool dis_ARM64_load_store(/MB_OUT/DisResult* dres, UInt insn)
				4633	{
				4634	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				4635
				4636	/* ------------ LDR,STR (immediate, uimm12) ----------- */
				4637	/* uimm12 is scaled by the transfer size
				4638
				4639	31 29 26 21 9 4
				4640	\| \| \| \| \| \|
				4641	11 111 00100 imm12 nn tt STR Xt, [Xn\|SP, #imm12 * 8]
				4642	11 111 00101 imm12 nn tt LDR Xt, [Xn\|SP, #imm12 * 8]
				4643
				4644	10 111 00100 imm12 nn tt STR Wt, [Xn\|SP, #imm12 * 4]
				4645	10 111 00101 imm12 nn tt LDR Wt, [Xn\|SP, #imm12 * 4]
				4646
				4647	01 111 00100 imm12 nn tt STRH Wt, [Xn\|SP, #imm12 * 2]
				4648	01 111 00101 imm12 nn tt LDRH Wt, [Xn\|SP, #imm12 * 2]
				4649
				4650	00 111 00100 imm12 nn tt STRB Wt, [Xn\|SP, #imm12 * 1]
				4651	00 111 00101 imm12 nn tt LDRB Wt, [Xn\|SP, #imm12 * 1]
				4652	*/
				4653	if (INSN(29,23) == BITS7(1,1,1,0,0,1,0)) {
				4654	UInt szLg2 = INSN(31,30);
				4655	UInt szB = 1 << szLg2;
				4656	Bool isLD = INSN(22,22) == 1;
				4657	UInt offs = INSN(21,10) * szB;
				4658	UInt nn = INSN(9,5);
				4659	UInt tt = INSN(4,0);
				4660	IRTemp ta = newTemp(Ity_I64);
				4661	assign(ta, binop(Iop_Add64, getIReg64orSP(nn), mkU64(offs)));
				4662	if (nn == 31) { /* FIXME generate stack alignment check */ }
				4663	vassert(szLg2 < 4);
				4664	if (isLD) {
				4665	putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, ta)));
				4666	} else {
				4667	gen_narrowing_store(szB, ta, getIReg64orZR(tt));
				4668	}
				4669	const HChar* ld_name[4] = { "ldrb", "ldrh", "ldr", "ldr" };
				4670	const HChar* st_name[4] = { "strb", "strh", "str", "str" };
				4671	DIP("%s %s, [%s, #%u]\n",
				4672	(isLD ? ld_name : st_name)[szLg2], nameIRegOrZR(szB == 8, tt),
				4673	nameIReg64orSP(nn), offs);
				4674	return True;
				4675	}
				4676
				4677	/* ------------ LDUR,STUR (immediate, simm9) ----------- */
				4678	/*
				4679	31 29 26 20 11 9 4
				4680	\| \| \| \| \| \| \|
				4681	(at-Rn-then-Rn=EA) \| \| \|
				4682	sz 111 00000 0 imm9 01 Rn Rt STR Rt, [Xn\|SP], #simm9
				4683	sz 111 00001 0 imm9 01 Rn Rt LDR Rt, [Xn\|SP], #simm9
				4684
				4685	(at-EA-then-Rn=EA)
				4686	sz 111 00000 0 imm9 11 Rn Rt STR Rt, [Xn\|SP, #simm9]!
				4687	sz 111 00001 0 imm9 11 Rn Rt LDR Rt, [Xn\|SP, #simm9]!
				4688
				4689	(at-EA)
				4690	sz 111 00000 0 imm9 00 Rn Rt STR Rt, [Xn\|SP, #simm9]
				4691	sz 111 00001 0 imm9 00 Rn Rt LDR Rt, [Xn\|SP, #simm9]
				4692
				4693	simm9 is unscaled.
				4694
				4695	The case 'wback && Rn == Rt && Rt != 31' is disallowed. In the
				4696	load case this is because would create two competing values for
				4697	Rt. In the store case the reason is unclear, but the spec
				4698	disallows it anyway.
				4699
				4700	Stores are narrowing, loads are unsigned widening. sz encodes
				4701	the transfer size in the normal way: 00=1, 01=2, 10=4, 11=8.
				4702	*/
				4703	if ((INSN(29,21) & BITS9(1,1,1, 1,1,1,1,0, 1))
				4704	== BITS9(1,1,1, 0,0,0,0,0, 0)) {
				4705	UInt szLg2 = INSN(31,30);
				4706	UInt szB = 1 << szLg2;
				4707	Bool isLoad = INSN(22,22) == 1;
				4708	UInt imm9 = INSN(20,12);
				4709	UInt nn = INSN(9,5);
				4710	UInt tt = INSN(4,0);
				4711	Bool wBack = INSN(10,10) == 1;
				4712	UInt how = INSN(11,10);
				4713	if (how == BITS2(1,0) \|\| (wBack && nn == tt && tt != 31)) {
				4714	/* undecodable; fall through */
				4715	} else {
				4716	if (nn == 31) { /* FIXME generate stack alignment check */ }
				4717
				4718	// Compute the transfer address TA and the writeback address WA.
				4719	IRTemp tRN = newTemp(Ity_I64);
				4720	assign(tRN, getIReg64orSP(nn));
				4721	IRTemp tEA = newTemp(Ity_I64);
				4722	Long simm9 = (Long)sx_to_64(imm9, 9);
				4723	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
				4724
				4725	IRTemp tTA = newTemp(Ity_I64);
				4726	IRTemp tWA = newTemp(Ity_I64);
				4727	switch (how) {
				4728	case BITS2(0,1):
				4729	assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
				4730	case BITS2(1,1):
				4731	assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
				4732	case BITS2(0,0):
				4733	assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
				4734	default:
				4735	vassert(0); /* NOTREACHED */
				4736	}
				4737
sewardj	e0bff8b	2014-03-09 09:40:23 +0000	[diff] [blame]	4738	/* Normally rN would be updated after the transfer. However, in
				4739	the special case typifed by
				4740	str x30, [sp,#-16]!
				4741	it is necessary to update SP before the transfer, (1)
				4742	because Memcheck will otherwise complain about a write
				4743	below the stack pointer, and (2) because the segfault
				4744	stack extension mechanism will otherwise extend the stack
				4745	only down to SP before the instruction, which might not be
				4746	far enough, if the -16 bit takes the actual access
				4747	address to the next page.
				4748	*/
				4749	Bool earlyWBack
				4750	= wBack && simm9 < 0 && szB == 8
				4751	&& how == BITS2(1,1) && nn == 31 && !isLoad && tt != nn;
				4752
				4753	if (wBack && earlyWBack)
				4754	putIReg64orSP(nn, mkexpr(tEA));
				4755
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4756	if (isLoad) {
				4757	putIReg64orZR(tt, mkexpr(gen_zwidening_load(szB, tTA)));
				4758	} else {
				4759	gen_narrowing_store(szB, tTA, getIReg64orZR(tt));
				4760	}
				4761
sewardj	e0bff8b	2014-03-09 09:40:23 +0000	[diff] [blame]	4762	if (wBack && !earlyWBack)
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4763	putIReg64orSP(nn, mkexpr(tEA));
				4764
				4765	const HChar* ld_name[4] = { "ldurb", "ldurh", "ldur", "ldur" };
				4766	const HChar* st_name[4] = { "sturb", "sturh", "stur", "stur" };
				4767	const HChar* fmt_str = NULL;
				4768	switch (how) {
				4769	case BITS2(0,1):
				4770	fmt_str = "%s %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
				4771	break;
				4772	case BITS2(1,1):
				4773	fmt_str = "%s %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
				4774	break;
				4775	case BITS2(0,0):
				4776	fmt_str = "%s %s, [%s, #%lld] (at-Rn)\n";
				4777	break;
				4778	default:
				4779	vassert(0);
				4780	}
				4781	DIP(fmt_str, (isLoad ? ld_name : st_name)[szLg2],
				4782	nameIRegOrZR(szB == 8, tt),
				4783	nameIReg64orSP(nn), simm9);
				4784	return True;
				4785	}
				4786	}
				4787
				4788	/* -------- LDP,STP (immediate, simm7) (INT REGS) -------- */
				4789	/* L==1 => mm==LD
				4790	L==0 => mm==ST
				4791	x==0 => 32 bit transfers, and zero extended loads
				4792	x==1 => 64 bit transfers
				4793	simm7 is scaled by the (single-register) transfer size
				4794
				4795	(at-Rn-then-Rn=EA)
				4796	x0 101 0001 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn\|SP], #imm
				4797
				4798	(at-EA-then-Rn=EA)
				4799	x0 101 0011 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn\|SP, #imm]!
				4800
				4801	(at-EA)
				4802	x0 101 0010 L imm7 Rt2 Rn Rt1 mmP Rt1,Rt2, [Xn\|SP, #imm]
				4803	*/
				4804
				4805	UInt insn_30_23 = INSN(30,23);
				4806	if (insn_30_23 == BITS8(0,1,0,1,0,0,0,1)
				4807	\|\| insn_30_23 == BITS8(0,1,0,1,0,0,1,1)
				4808	\|\| insn_30_23 == BITS8(0,1,0,1,0,0,1,0)) {
				4809	UInt bL = INSN(22,22);
				4810	UInt bX = INSN(31,31);
				4811	UInt bWBack = INSN(23,23);
				4812	UInt rT1 = INSN(4,0);
				4813	UInt rN = INSN(9,5);
				4814	UInt rT2 = INSN(14,10);
				4815	Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
				4816	if ((bWBack && (rT1 == rN \|\| rT2 == rN) && rN != 31)
				4817	\|\| (bL && rT1 == rT2)) {
				4818	/* undecodable; fall through */
				4819	} else {
				4820	if (rN == 31) { /* FIXME generate stack alignment check */ }
				4821
				4822	// Compute the transfer address TA and the writeback address WA.
				4823	IRTemp tRN = newTemp(Ity_I64);
				4824	assign(tRN, getIReg64orSP(rN));
				4825	IRTemp tEA = newTemp(Ity_I64);
				4826	simm7 = (bX ? 8 : 4) * simm7;
				4827	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
				4828
				4829	IRTemp tTA = newTemp(Ity_I64);
				4830	IRTemp tWA = newTemp(Ity_I64);
				4831	switch (INSN(24,23)) {
				4832	case BITS2(0,1):
				4833	assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
				4834	case BITS2(1,1):
				4835	assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
				4836	case BITS2(1,0):
				4837	assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
				4838	default:
				4839	vassert(0); /* NOTREACHED */
				4840	}
				4841
				4842	/* Normally rN would be updated after the transfer. However, in
				4843	the special case typifed by
				4844	stp x29, x30, [sp,#-112]!
				4845	it is necessary to update SP before the transfer, (1)
				4846	because Memcheck will otherwise complain about a write
				4847	below the stack pointer, and (2) because the segfault
				4848	stack extension mechanism will otherwise extend the stack
				4849	only down to SP before the instruction, which might not be
				4850	far enough, if the -112 bit takes the actual access
				4851	address to the next page.
				4852	*/
				4853	Bool earlyWBack
				4854	= bWBack && simm7 < 0
				4855	&& INSN(24,23) == BITS2(1,1) && rN == 31 && bL == 0;
				4856
				4857	if (bWBack && earlyWBack)
				4858	putIReg64orSP(rN, mkexpr(tEA));
				4859
				4860	/**/ if (bL == 1 && bX == 1) {
				4861	// 64 bit load
				4862	putIReg64orZR(rT1, loadLE(Ity_I64,
				4863	binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
				4864	putIReg64orZR(rT2, loadLE(Ity_I64,
				4865	binop(Iop_Add64,mkexpr(tTA),mkU64(8))));
				4866	} else if (bL == 1 && bX == 0) {
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4867	// 32 bit load
				4868	putIReg32orZR(rT1, loadLE(Ity_I32,
				4869	binop(Iop_Add64,mkexpr(tTA),mkU64(0))));
				4870	putIReg32orZR(rT2, loadLE(Ity_I32,
				4871	binop(Iop_Add64,mkexpr(tTA),mkU64(4))));
				4872	} else if (bL == 0 && bX == 1) {
				4873	// 64 bit store
				4874	storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
				4875	getIReg64orZR(rT1));
				4876	storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(8)),
				4877	getIReg64orZR(rT2));
				4878	} else {
				4879	vassert(bL == 0 && bX == 0);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	4880	// 32 bit store
				4881	storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(0)),
				4882	getIReg32orZR(rT1));
				4883	storeLE(binop(Iop_Add64,mkexpr(tTA),mkU64(4)),
				4884	getIReg32orZR(rT2));
				4885	}
				4886
				4887	if (bWBack && !earlyWBack)
				4888	putIReg64orSP(rN, mkexpr(tEA));
				4889
				4890	const HChar* fmt_str = NULL;
				4891	switch (INSN(24,23)) {
				4892	case BITS2(0,1):
				4893	fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
				4894	break;
				4895	case BITS2(1,1):
				4896	fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
				4897	break;
				4898	case BITS2(1,0):
				4899	fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
				4900	break;
				4901	default:
				4902	vassert(0);
				4903	}
				4904	DIP(fmt_str, bL == 0 ? "st" : "ld",
				4905	nameIRegOrZR(bX == 1, rT1),
				4906	nameIRegOrZR(bX == 1, rT2),
				4907	nameIReg64orSP(rN), simm7);
				4908	return True;
				4909	}
				4910	}
				4911
				4912	/* ---------------- LDR (literal, int reg) ---------------- */
				4913	/* 31 29 23 4
				4914	00 011 000 imm19 Rt LDR Wt, [PC + sxTo64(imm19 << 2)]
				4915	01 011 000 imm19 Rt LDR Xt, [PC + sxTo64(imm19 << 2)]
				4916	10 011 000 imm19 Rt LDRSW Xt, [PC + sxTo64(imm19 << 2)]
				4917	11 011 000 imm19 Rt prefetch [PC + sxTo64(imm19 << 2)]
				4918	Just handles the first two cases for now.
				4919	*/
				4920	if (INSN(29,24) == BITS6(0,1,1,0,0,0) && INSN(31,31) == 0) {
				4921	UInt imm19 = INSN(23,5);
				4922	UInt rT = INSN(4,0);
				4923	UInt bX = INSN(30,30);
				4924	ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
				4925	if (bX) {
				4926	putIReg64orZR(rT, loadLE(Ity_I64, mkU64(ea)));
				4927	} else {
				4928	putIReg32orZR(rT, loadLE(Ity_I32, mkU64(ea)));
				4929	}
				4930	DIP("ldr %s, 0x%llx (literal)\n", nameIRegOrZR(bX == 1, rT), ea);
				4931	return True;
				4932	}
				4933
				4934	/* -------------- {LD,ST}R (integer register) --------------- */
				4935	/* 31 29 20 15 12 11 9 4
				4936	\| \| \| \| \| \| \| \|
				4937	11 111000011 Rm option S 10 Rn Rt LDR Xt, [Xn\|SP, R<m>{ext/sh}]
				4938	10 111000011 Rm option S 10 Rn Rt LDR Wt, [Xn\|SP, R<m>{ext/sh}]
				4939	01 111000011 Rm option S 10 Rn Rt LDRH Wt, [Xn\|SP, R<m>{ext/sh}]
				4940	00 111000011 Rm option S 10 Rn Rt LDRB Wt, [Xn\|SP, R<m>{ext/sh}]
				4941
				4942	11 111000001 Rm option S 10 Rn Rt STR Xt, [Xn\|SP, R<m>{ext/sh}]
				4943	10 111000001 Rm option S 10 Rn Rt STR Wt, [Xn\|SP, R<m>{ext/sh}]
				4944	01 111000001 Rm option S 10 Rn Rt STRH Wt, [Xn\|SP, R<m>{ext/sh}]
				4945	00 111000001 Rm option S 10 Rn Rt STRB Wt, [Xn\|SP, R<m>{ext/sh}]
				4946	*/
				4947	if (INSN(29,23) == BITS7(1,1,1,0,0,0,0)
				4948	&& INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
				4949	HChar dis_buf[64];
				4950	UInt szLg2 = INSN(31,30);
				4951	Bool isLD = INSN(22,22) == 1;
				4952	UInt tt = INSN(4,0);
				4953	IRTemp ea = gen_indexed_EA(dis_buf, insn, True/to/from int regs/);
				4954	if (ea != IRTemp_INVALID) {
				4955	switch (szLg2) {
				4956	case 3: /* 64 bit */
				4957	if (isLD) {
				4958	putIReg64orZR(tt, loadLE(Ity_I64, mkexpr(ea)));
				4959	DIP("ldr %s, %s\n", nameIReg64orZR(tt), dis_buf);
				4960	} else {
				4961	storeLE(mkexpr(ea), getIReg64orZR(tt));
				4962	DIP("str %s, %s\n", nameIReg64orZR(tt), dis_buf);
				4963	}
				4964	break;
				4965	case 2: /* 32 bit */
				4966	if (isLD) {
				4967	putIReg32orZR(tt, loadLE(Ity_I32, mkexpr(ea)));
				4968	DIP("ldr %s, %s\n", nameIReg32orZR(tt), dis_buf);
				4969	} else {
				4970	storeLE(mkexpr(ea), getIReg32orZR(tt));
				4971	DIP("str %s, %s\n", nameIReg32orZR(tt), dis_buf);
				4972	}
				4973	break;
				4974	case 1: /* 16 bit */
				4975	if (isLD) {
				4976	putIReg64orZR(tt, unop(Iop_16Uto64,
				4977	loadLE(Ity_I16, mkexpr(ea))));
				4978	DIP("ldruh %s, %s\n", nameIReg32orZR(tt), dis_buf);
				4979	} else {
				4980	storeLE(mkexpr(ea), unop(Iop_64to16, getIReg64orZR(tt)));
				4981	DIP("strh %s, %s\n", nameIReg32orZR(tt), dis_buf);
				4982	}
				4983	break;
				4984	case 0: /* 8 bit */
				4985	if (isLD) {
				4986	putIReg64orZR(tt, unop(Iop_8Uto64,
				4987	loadLE(Ity_I8, mkexpr(ea))));
				4988	DIP("ldrub %s, %s\n", nameIReg32orZR(tt), dis_buf);
				4989	} else {
				4990	storeLE(mkexpr(ea), unop(Iop_64to8, getIReg64orZR(tt)));
				4991	DIP("strb %s, %s\n", nameIReg32orZR(tt), dis_buf);
				4992	}
				4993	break;
				4994	default:
				4995	vassert(0);
				4996	}
				4997	return True;
				4998	}
				4999	}
				5000
				5001	/* -------------- LDRS{B,H,W} (uimm12) -------------- */
				5002	/* 31 29 26 23 21 9 4
				5003	10 111 001 10 imm12 n t LDRSW Xt, [Xn\|SP, #pimm12 * 4]
				5004	01 111 001 1x imm12 n t LDRSH Rt, [Xn\|SP, #pimm12 * 2]
				5005	00 111 001 1x imm12 n t LDRSB Rt, [Xn\|SP, #pimm12 * 1]
				5006	where
				5007	Rt is Wt when x==1, Xt when x==0
				5008	*/
				5009	if (INSN(29,23) == BITS7(1,1,1,0,0,1,1)) {
				5010	/* Further checks on bits 31:30 and 22 */
				5011	Bool valid = False;
				5012	switch ((INSN(31,30) << 1) \| INSN(22,22)) {
				5013	case BITS3(1,0,0):
				5014	case BITS3(0,1,0): case BITS3(0,1,1):
				5015	case BITS3(0,0,0): case BITS3(0,0,1):
				5016	valid = True;
				5017	break;
				5018	}
				5019	if (valid) {
				5020	UInt szLg2 = INSN(31,30);
				5021	UInt bitX = INSN(22,22);
				5022	UInt imm12 = INSN(21,10);
				5023	UInt nn = INSN(9,5);
				5024	UInt tt = INSN(4,0);
				5025	UInt szB = 1 << szLg2;
				5026	IRExpr* ea = binop(Iop_Add64,
				5027	getIReg64orSP(nn), mkU64(imm12 * szB));
				5028	switch (szB) {
				5029	case 4:
				5030	vassert(bitX == 0);
				5031	putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, ea)));
				5032	DIP("ldrsw %s, [%s, #%u]\n", nameIReg64orZR(tt),
				5033	nameIReg64orSP(nn), imm12 * szB);
				5034	break;
				5035	case 2:
				5036	if (bitX == 1) {
				5037	putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, ea)));
				5038	} else {
				5039	putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, ea)));
				5040	}
				5041	DIP("ldrsh %s, [%s, #%u]\n",
				5042	nameIRegOrZR(bitX == 0, tt),
				5043	nameIReg64orSP(nn), imm12 * szB);
				5044	break;
				5045	case 1:
				5046	if (bitX == 1) {
				5047	putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, ea)));
				5048	} else {
				5049	putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, ea)));
				5050	}
				5051	DIP("ldrsb %s, [%s, #%u]\n",
				5052	nameIRegOrZR(bitX == 0, tt),
				5053	nameIReg64orSP(nn), imm12 * szB);
				5054	break;
				5055	default:
				5056	vassert(0);
				5057	}
				5058	return True;
				5059	}
				5060	/* else fall through */
				5061	}
				5062
				5063	/* -------------- LDRS{B,H,W} (simm9, upd) -------------- */
				5064	/* (at-Rn-then-Rn=EA)
				5065	31 29 23 21 20 11 9 4
				5066	00 111 000 1x 0 imm9 01 n t LDRSB Rt, [Xn\|SP], #simm9
				5067	01 111 000 1x 0 imm9 01 n t LDRSH Rt, [Xn\|SP], #simm9
				5068	10 111 000 10 0 imm9 01 n t LDRSW Xt, [Xn\|SP], #simm9
				5069
				5070	(at-EA-then-Rn=EA)
				5071	00 111 000 1x 0 imm9 11 n t LDRSB Rt, [Xn\|SP, #simm9]!
				5072	01 111 000 1x 0 imm9 11 n t LDRSH Rt, [Xn\|SP, #simm9]!
				5073	10 111 000 10 0 imm9 11 n t LDRSW Xt, [Xn\|SP, #simm9]!
				5074	where
				5075	Rt is Wt when x==1, Xt when x==0
				5076	transfer-at-Rn when [11]==0, at EA when [11]==1
				5077	*/
				5078	if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
				5079	&& INSN(21,21) == 0 && INSN(10,10) == 1) {
				5080	/* Further checks on bits 31:30 and 22 */
				5081	Bool valid = False;
				5082	switch ((INSN(31,30) << 1) \| INSN(22,22)) {
				5083	case BITS3(1,0,0): // LDRSW Xt
				5084	case BITS3(0,1,0): case BITS3(0,1,1): // LDRSH Xt, Wt
				5085	case BITS3(0,0,0): case BITS3(0,0,1): // LDRSB Xt, Wt
				5086	valid = True;
				5087	break;
				5088	}
				5089	if (valid) {
				5090	UInt szLg2 = INSN(31,30);
				5091	UInt imm9 = INSN(20,12);
				5092	Bool atRN = INSN(11,11) == 0;
				5093	UInt nn = INSN(9,5);
				5094	UInt tt = INSN(4,0);
				5095	IRTemp tRN = newTemp(Ity_I64);
				5096	IRTemp tEA = newTemp(Ity_I64);
				5097	IRTemp tTA = IRTemp_INVALID;
				5098	ULong simm9 = sx_to_64(imm9, 9);
				5099	Bool is64 = INSN(22,22) == 0;
				5100	assign(tRN, getIReg64orSP(nn));
				5101	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
				5102	tTA = atRN ? tRN : tEA;
				5103	HChar ch = '?';
				5104	/* There are 5 cases:
				5105	byte load, SX to 64
				5106	byte load, SX to 32, ZX to 64
				5107	halfword load, SX to 64
				5108	halfword load, SX to 32, ZX to 64
				5109	word load, SX to 64
				5110	The ifs below handle them in the listed order.
				5111	*/
				5112	if (szLg2 == 0) {
				5113	ch = 'b';
				5114	if (is64) {
				5115	putIReg64orZR(tt, unop(Iop_8Sto64,
				5116	loadLE(Ity_I8, mkexpr(tTA))));
				5117	} else {
				5118	putIReg32orZR(tt, unop(Iop_8Sto32,
				5119	loadLE(Ity_I8, mkexpr(tTA))));
				5120	}
				5121	}
				5122	else if (szLg2 == 1) {
				5123	ch = 'h';
				5124	if (is64) {
				5125	putIReg64orZR(tt, unop(Iop_16Sto64,
				5126	loadLE(Ity_I16, mkexpr(tTA))));
				5127	} else {
				5128	putIReg32orZR(tt, unop(Iop_16Sto32,
				5129	loadLE(Ity_I16, mkexpr(tTA))));
				5130	}
				5131	}
				5132	else if (szLg2 == 2 && is64) {
				5133	ch = 'w';
				5134	putIReg64orZR(tt, unop(Iop_32Sto64,
				5135	loadLE(Ity_I32, mkexpr(tTA))));
				5136	}
				5137	else {
				5138	vassert(0);
				5139	}
				5140	putIReg64orSP(nn, mkexpr(tEA));
				5141	DIP(atRN ? "ldrs%c %s, [%s], #%lld\n" : "ldrs%c %s, [%s, #%lld]!",
				5142	ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
				5143	return True;
				5144	}
				5145	/* else fall through */
				5146	}
				5147
				5148	/* -------------- LDRS{B,H,W} (simm9, noUpd) -------------- */
				5149	/* 31 29 23 21 20 11 9 4
				5150	00 111 000 1x 0 imm9 00 n t LDURSB Rt, [Xn\|SP, #simm9]
				5151	01 111 000 1x 0 imm9 00 n t LDURSH Rt, [Xn\|SP, #simm9]
				5152	10 111 000 10 0 imm9 00 n t LDURSW Xt, [Xn\|SP, #simm9]
				5153	where
				5154	Rt is Wt when x==1, Xt when x==0
				5155	*/
				5156	if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
				5157	&& INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
				5158	/* Further checks on bits 31:30 and 22 */
				5159	Bool valid = False;
				5160	switch ((INSN(31,30) << 1) \| INSN(22,22)) {
				5161	case BITS3(1,0,0): // LDURSW Xt
				5162	case BITS3(0,1,0): case BITS3(0,1,1): // LDURSH Xt, Wt
				5163	case BITS3(0,0,0): case BITS3(0,0,1): // LDURSB Xt, Wt
				5164	valid = True;
				5165	break;
				5166	}
				5167	if (valid) {
				5168	UInt szLg2 = INSN(31,30);
				5169	UInt imm9 = INSN(20,12);
				5170	UInt nn = INSN(9,5);
				5171	UInt tt = INSN(4,0);
				5172	IRTemp tRN = newTemp(Ity_I64);
				5173	IRTemp tEA = newTemp(Ity_I64);
				5174	ULong simm9 = sx_to_64(imm9, 9);
				5175	Bool is64 = INSN(22,22) == 0;
				5176	assign(tRN, getIReg64orSP(nn));
				5177	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
				5178	HChar ch = '?';
				5179	/* There are 5 cases:
				5180	byte load, SX to 64
				5181	byte load, SX to 32, ZX to 64
				5182	halfword load, SX to 64
				5183	halfword load, SX to 32, ZX to 64
				5184	word load, SX to 64
				5185	The ifs below handle them in the listed order.
				5186	*/
				5187	if (szLg2 == 0) {
				5188	ch = 'b';
				5189	if (is64) {
				5190	putIReg64orZR(tt, unop(Iop_8Sto64,
				5191	loadLE(Ity_I8, mkexpr(tEA))));
				5192	} else {
				5193	putIReg32orZR(tt, unop(Iop_8Sto32,
				5194	loadLE(Ity_I8, mkexpr(tEA))));
				5195	}
				5196	}
				5197	else if (szLg2 == 1) {
				5198	ch = 'h';
				5199	if (is64) {
				5200	putIReg64orZR(tt, unop(Iop_16Sto64,
				5201	loadLE(Ity_I16, mkexpr(tEA))));
				5202	} else {
				5203	putIReg32orZR(tt, unop(Iop_16Sto32,
				5204	loadLE(Ity_I16, mkexpr(tEA))));
				5205	}
				5206	}
				5207	else if (szLg2 == 2 && is64) {
				5208	ch = 'w';
				5209	putIReg64orZR(tt, unop(Iop_32Sto64,
				5210	loadLE(Ity_I32, mkexpr(tEA))));
				5211	}
				5212	else {
				5213	vassert(0);
				5214	}
				5215	DIP("ldurs%c %s, [%s, #%lld]",
				5216	ch, nameIRegOrZR(is64, tt), nameIReg64orSP(nn), simm9);
				5217	return True;
				5218	}
				5219	/* else fall through */
				5220	}
				5221
				5222	/* -------- LDP,STP (immediate, simm7) (FP&VEC) -------- */
				5223	/* L==1 => mm==LD
				5224	L==0 => mm==ST
				5225	sz==00 => 32 bit (S) transfers
				5226	sz==01 => 64 bit (D) transfers
				5227	sz==10 => 128 bit (Q) transfers
				5228	sz==11 isn't allowed
				5229	simm7 is scaled by the (single-register) transfer size
				5230
sewardj	208a776	2014-10-22 13:52:51 +0000	[diff] [blame]	5231	31 29 26 22 21 14 9 4
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5232
sewardj	208a776	2014-10-22 13:52:51 +0000	[diff] [blame]	5233	sz 101 1000 L imm7 t2 n t1 mmNP SDQt1, SDQt2, [Xn\|SP, #imm]
				5234	(at-EA, with nontemporal hint)
				5235
				5236	sz 101 1001 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn\|SP], #imm
				5237	(at-Rn-then-Rn=EA)
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5238
				5239	sz 101 1010 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn\|SP, #imm]
sewardj	208a776	2014-10-22 13:52:51 +0000	[diff] [blame]	5240	(at-EA)
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5241
sewardj	208a776	2014-10-22 13:52:51 +0000	[diff] [blame]	5242	sz 101 1011 L imm7 t2 n t1 mmP SDQt1, SDQt2, [Xn\|SP, #imm]!
				5243	(at-EA-then-Rn=EA)
				5244	*/
				5245	if (INSN(29,25) == BITS5(1,0,1,1,0)) {
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5246	UInt szSlg2 = INSN(31,30); // log2 of the xfer size in 32-bit units
				5247	Bool isLD = INSN(22,22) == 1;
				5248	Bool wBack = INSN(23,23) == 1;
				5249	Long simm7 = (Long)sx_to_64(INSN(21,15), 7);
				5250	UInt tt2 = INSN(14,10);
				5251	UInt nn = INSN(9,5);
				5252	UInt tt1 = INSN(4,0);
				5253	if (szSlg2 == BITS2(1,1) \|\| (isLD && tt1 == tt2)) {
				5254	/* undecodable; fall through */
				5255	} else {
				5256	if (nn == 31) { /* FIXME generate stack alignment check */ }
				5257
				5258	// Compute the transfer address TA and the writeback address WA.
				5259	UInt szB = 4 << szSlg2; /* szB is the per-register size */
				5260	IRTemp tRN = newTemp(Ity_I64);
				5261	assign(tRN, getIReg64orSP(nn));
				5262	IRTemp tEA = newTemp(Ity_I64);
				5263	simm7 = szB * simm7;
				5264	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm7)));
				5265
				5266	IRTemp tTA = newTemp(Ity_I64);
				5267	IRTemp tWA = newTemp(Ity_I64);
				5268	switch (INSN(24,23)) {
				5269	case BITS2(0,1):
				5270	assign(tTA, mkexpr(tRN)); assign(tWA, mkexpr(tEA)); break;
				5271	case BITS2(1,1):
				5272	assign(tTA, mkexpr(tEA)); assign(tWA, mkexpr(tEA)); break;
				5273	case BITS2(1,0):
sewardj	208a776	2014-10-22 13:52:51 +0000	[diff] [blame]	5274	case BITS2(0,0):
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5275	assign(tTA, mkexpr(tEA)); /* tWA is unused */ break;
				5276	default:
				5277	vassert(0); /* NOTREACHED */
				5278	}
				5279
				5280	IRType ty = Ity_INVALID;
				5281	switch (szB) {
				5282	case 4: ty = Ity_F32; break;
				5283	case 8: ty = Ity_F64; break;
				5284	case 16: ty = Ity_V128; break;
				5285	default: vassert(0);
				5286	}
				5287
sewardj	e0bff8b	2014-03-09 09:40:23 +0000	[diff] [blame]	5288	/* Normally rN would be updated after the transfer. However, in
sewardj	1955143	2014-05-07 09:20:11 +0000	[diff] [blame]	5289	the special cases typifed by
sewardj	e0bff8b	2014-03-09 09:40:23 +0000	[diff] [blame]	5290	stp q0, q1, [sp,#-512]!
sewardj	1955143	2014-05-07 09:20:11 +0000	[diff] [blame]	5291	stp d0, d1, [sp,#-512]!
				5292	stp s0, s1, [sp,#-512]!
sewardj	e0bff8b	2014-03-09 09:40:23 +0000	[diff] [blame]	5293	it is necessary to update SP before the transfer, (1)
				5294	because Memcheck will otherwise complain about a write
				5295	below the stack pointer, and (2) because the segfault
				5296	stack extension mechanism will otherwise extend the stack
				5297	only down to SP before the instruction, which might not be
				5298	far enough, if the -512 bit takes the actual access
				5299	address to the next page.
				5300	*/
				5301	Bool earlyWBack
sewardj	1955143	2014-05-07 09:20:11 +0000	[diff] [blame]	5302	= wBack && simm7 < 0
sewardj	e0bff8b	2014-03-09 09:40:23 +0000	[diff] [blame]	5303	&& INSN(24,23) == BITS2(1,1) && nn == 31 && !isLD;
				5304
				5305	if (wBack && earlyWBack)
				5306	putIReg64orSP(nn, mkexpr(tEA));
				5307
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5308	if (isLD) {
sewardj	5ba4130	2014-03-03 08:42:16 +0000	[diff] [blame]	5309	if (szB < 16) {
				5310	putQReg128(tt1, mkV128(0x0000));
				5311	}
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5312	putQRegLO(tt1,
				5313	loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(0))));
sewardj	5ba4130	2014-03-03 08:42:16 +0000	[diff] [blame]	5314	if (szB < 16) {
				5315	putQReg128(tt2, mkV128(0x0000));
				5316	}
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5317	putQRegLO(tt2,
				5318	loadLE(ty, binop(Iop_Add64, mkexpr(tTA), mkU64(szB))));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5319	} else {
				5320	storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(0)),
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5321	getQRegLO(tt1, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5322	storeLE(binop(Iop_Add64, mkexpr(tTA), mkU64(szB)),
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5323	getQRegLO(tt2, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5324	}
				5325
sewardj	e0bff8b	2014-03-09 09:40:23 +0000	[diff] [blame]	5326	if (wBack && !earlyWBack)
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5327	putIReg64orSP(nn, mkexpr(tEA));
				5328
				5329	const HChar* fmt_str = NULL;
				5330	switch (INSN(24,23)) {
				5331	case BITS2(0,1):
				5332	fmt_str = "%sp %s, %s, [%s], #%lld (at-Rn-then-Rn=EA)\n";
				5333	break;
				5334	case BITS2(1,1):
				5335	fmt_str = "%sp %s, %s, [%s, #%lld]! (at-EA-then-Rn=EA)\n";
				5336	break;
				5337	case BITS2(1,0):
				5338	fmt_str = "%sp %s, %s, [%s, #%lld] (at-Rn)\n";
				5339	break;
sewardj	208a776	2014-10-22 13:52:51 +0000	[diff] [blame]	5340	case BITS2(0,0):
				5341	fmt_str = "%snp %s, %s, [%s, #%lld] (at-Rn)\n";
				5342	break;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5343	default:
				5344	vassert(0);
				5345	}
				5346	DIP(fmt_str, isLD ? "ld" : "st",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5347	nameQRegLO(tt1, ty), nameQRegLO(tt2, ty),
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5348	nameIReg64orSP(nn), simm7);
				5349	return True;
				5350	}
				5351	}
				5352
				5353	/* -------------- {LD,ST}R (vector register) --------------- */
				5354	/* 31 29 23 20 15 12 11 9 4
				5355	\| \| \| \| \| \| \| \| \|
				5356	00 111100 011 Rm option S 10 Rn Rt LDR Bt, [Xn\|SP, R<m>{ext/sh}]
				5357	01 111100 011 Rm option S 10 Rn Rt LDR Ht, [Xn\|SP, R<m>{ext/sh}]
				5358	10 111100 011 Rm option S 10 Rn Rt LDR St, [Xn\|SP, R<m>{ext/sh}]
				5359	11 111100 011 Rm option S 10 Rn Rt LDR Dt, [Xn\|SP, R<m>{ext/sh}]
				5360	00 111100 111 Rm option S 10 Rn Rt LDR Qt, [Xn\|SP, R<m>{ext/sh}]
				5361
				5362	00 111100 001 Rm option S 10 Rn Rt STR Bt, [Xn\|SP, R<m>{ext/sh}]
				5363	01 111100 001 Rm option S 10 Rn Rt STR Ht, [Xn\|SP, R<m>{ext/sh}]
				5364	10 111100 001 Rm option S 10 Rn Rt STR St, [Xn\|SP, R<m>{ext/sh}]
				5365	11 111100 001 Rm option S 10 Rn Rt STR Dt, [Xn\|SP, R<m>{ext/sh}]
				5366	00 111100 101 Rm option S 10 Rn Rt STR Qt, [Xn\|SP, R<m>{ext/sh}]
				5367	*/
				5368	if (INSN(29,24) == BITS6(1,1,1,1,0,0)
				5369	&& INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
				5370	HChar dis_buf[64];
				5371	UInt szLg2 = (INSN(23,23) << 2) \| INSN(31,30);
				5372	Bool isLD = INSN(22,22) == 1;
				5373	UInt tt = INSN(4,0);
sewardj	208a776	2014-10-22 13:52:51 +0000	[diff] [blame]	5374	if (szLg2 > 4) goto after_LDR_STR_vector_register;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5375	IRTemp ea = gen_indexed_EA(dis_buf, insn, False/to/from vec regs/);
				5376	if (ea == IRTemp_INVALID) goto after_LDR_STR_vector_register;
				5377	switch (szLg2) {
				5378	case 0: /* 8 bit */
				5379	if (isLD) {
				5380	putQReg128(tt, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5381	putQRegLO(tt, loadLE(Ity_I8, mkexpr(ea)));
				5382	DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5383	} else {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5384	storeLE(mkexpr(ea), getQRegLO(tt, Ity_I8));
				5385	DIP("str %s, %s\n", nameQRegLO(tt, Ity_I8), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5386	}
				5387	break;
				5388	case 1:
				5389	if (isLD) {
				5390	putQReg128(tt, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5391	putQRegLO(tt, loadLE(Ity_I16, mkexpr(ea)));
				5392	DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5393	} else {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5394	storeLE(mkexpr(ea), getQRegLO(tt, Ity_I16));
				5395	DIP("str %s, %s\n", nameQRegLO(tt, Ity_I16), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5396	}
				5397	break;
				5398	case 2: /* 32 bit */
				5399	if (isLD) {
				5400	putQReg128(tt, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5401	putQRegLO(tt, loadLE(Ity_I32, mkexpr(ea)));
				5402	DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5403	} else {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5404	storeLE(mkexpr(ea), getQRegLO(tt, Ity_I32));
				5405	DIP("str %s, %s\n", nameQRegLO(tt, Ity_I32), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5406	}
				5407	break;
				5408	case 3: /* 64 bit */
				5409	if (isLD) {
				5410	putQReg128(tt, mkV128(0x0000));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5411	putQRegLO(tt, loadLE(Ity_I64, mkexpr(ea)));
				5412	DIP("ldr %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5413	} else {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5414	storeLE(mkexpr(ea), getQRegLO(tt, Ity_I64));
				5415	DIP("str %s, %s\n", nameQRegLO(tt, Ity_I64), dis_buf);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5416	}
				5417	break;
sewardj	208a776	2014-10-22 13:52:51 +0000	[diff] [blame]	5418	case 4:
				5419	if (isLD) {
				5420	putQReg128(tt, loadLE(Ity_V128, mkexpr(ea)));
				5421	DIP("ldr %s, %s\n", nameQReg128(tt), dis_buf);
				5422	} else {
				5423	storeLE(mkexpr(ea), getQReg128(tt));
				5424	DIP("str %s, %s\n", nameQReg128(tt), dis_buf);
				5425	}
				5426	break;
				5427	default:
				5428	vassert(0);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5429	}
				5430	return True;
				5431	}
				5432	after_LDR_STR_vector_register:
				5433
				5434	/* ---------- LDRS{B,H,W} (integer register, SX) ---------- */
				5435	/* 31 29 22 20 15 12 11 9 4
				5436	\| \| \| \| \| \| \| \| \|
				5437	10 1110001 01 Rm opt S 10 Rn Rt LDRSW Xt, [Xn\|SP, R<m>{ext/sh}]
				5438
				5439	01 1110001 01 Rm opt S 10 Rn Rt LDRSH Xt, [Xn\|SP, R<m>{ext/sh}]
				5440	01 1110001 11 Rm opt S 10 Rn Rt LDRSH Wt, [Xn\|SP, R<m>{ext/sh}]
				5441
				5442	00 1110001 01 Rm opt S 10 Rn Rt LDRSB Xt, [Xn\|SP, R<m>{ext/sh}]
				5443	00 1110001 11 Rm opt S 10 Rn Rt LDRSB Wt, [Xn\|SP, R<m>{ext/sh}]
				5444	*/
				5445	if (INSN(29,23) == BITS7(1,1,1,0,0,0,1)
				5446	&& INSN(21,21) == 1 && INSN(11,10) == BITS2(1,0)) {
				5447	HChar dis_buf[64];
				5448	UInt szLg2 = INSN(31,30);
				5449	Bool sxTo64 = INSN(22,22) == 0; // else sx to 32 and zx to 64
				5450	UInt tt = INSN(4,0);
				5451	if (szLg2 == 3) goto after_LDRS_integer_register;
				5452	IRTemp ea = gen_indexed_EA(dis_buf, insn, True/to/from int regs/);
				5453	if (ea == IRTemp_INVALID) goto after_LDRS_integer_register;
				5454	/* Enumerate the 5 variants explicitly. */
				5455	if (szLg2 == 2/32 bit/ && sxTo64) {
				5456	putIReg64orZR(tt, unop(Iop_32Sto64, loadLE(Ity_I32, mkexpr(ea))));
				5457	DIP("ldrsw %s, %s\n", nameIReg64orZR(tt), dis_buf);
				5458	return True;
				5459	}
				5460	else
				5461	if (szLg2 == 1/16 bit/) {
				5462	if (sxTo64) {
				5463	putIReg64orZR(tt, unop(Iop_16Sto64, loadLE(Ity_I16, mkexpr(ea))));
				5464	DIP("ldrsh %s, %s\n", nameIReg64orZR(tt), dis_buf);
				5465	} else {
				5466	putIReg32orZR(tt, unop(Iop_16Sto32, loadLE(Ity_I16, mkexpr(ea))));
				5467	DIP("ldrsh %s, %s\n", nameIReg32orZR(tt), dis_buf);
				5468	}
				5469	return True;
				5470	}
				5471	else
				5472	if (szLg2 == 0/8 bit/) {
				5473	if (sxTo64) {
				5474	putIReg64orZR(tt, unop(Iop_8Sto64, loadLE(Ity_I8, mkexpr(ea))));
				5475	DIP("ldrsb %s, %s\n", nameIReg64orZR(tt), dis_buf);
				5476	} else {
				5477	putIReg32orZR(tt, unop(Iop_8Sto32, loadLE(Ity_I8, mkexpr(ea))));
				5478	DIP("ldrsb %s, %s\n", nameIReg32orZR(tt), dis_buf);
				5479	}
				5480	return True;
				5481	}
				5482	/* else it's an invalid combination */
				5483	}
				5484	after_LDRS_integer_register:
				5485
				5486	/* -------- LDR/STR (immediate, SIMD&FP, unsigned offset) -------- */
				5487	/* This is the Unsigned offset variant only. The Post-Index and
				5488	Pre-Index variants are below.
				5489
				5490	31 29 23 21 9 4
				5491	00 111 101 01 imm12 n t LDR Bt, [Xn\|SP + imm12 * 1]
				5492	01 111 101 01 imm12 n t LDR Ht, [Xn\|SP + imm12 * 2]
				5493	10 111 101 01 imm12 n t LDR St, [Xn\|SP + imm12 * 4]
				5494	11 111 101 01 imm12 n t LDR Dt, [Xn\|SP + imm12 * 8]
				5495	00 111 101 11 imm12 n t LDR Qt, [Xn\|SP + imm12 * 16]
				5496
				5497	00 111 101 00 imm12 n t STR Bt, [Xn\|SP + imm12 * 1]
				5498	01 111 101 00 imm12 n t STR Ht, [Xn\|SP + imm12 * 2]
				5499	10 111 101 00 imm12 n t STR St, [Xn\|SP + imm12 * 4]
				5500	11 111 101 00 imm12 n t STR Dt, [Xn\|SP + imm12 * 8]
				5501	00 111 101 10 imm12 n t STR Qt, [Xn\|SP + imm12 * 16]
				5502	*/
				5503	if (INSN(29,24) == BITS6(1,1,1,1,0,1)
				5504	&& ((INSN(23,23) << 2) \| INSN(31,30)) <= 4) {
				5505	UInt szLg2 = (INSN(23,23) << 2) \| INSN(31,30);
				5506	Bool isLD = INSN(22,22) == 1;
				5507	UInt pimm12 = INSN(21,10) << szLg2;
				5508	UInt nn = INSN(9,5);
				5509	UInt tt = INSN(4,0);
				5510	IRTemp tEA = newTemp(Ity_I64);
				5511	IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
				5512	assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(pimm12)));
				5513	if (isLD) {
				5514	if (szLg2 < 4) {
				5515	putQReg128(tt, mkV128(0x0000));
				5516	}
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5517	putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5518	} else {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5519	storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5520	}
				5521	DIP("%s %s, [%s, #%u]\n",
				5522	isLD ? "ldr" : "str",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5523	nameQRegLO(tt, ty), nameIReg64orSP(nn), pimm12);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5524	return True;
				5525	}
				5526
				5527	/* -------- LDR/STR (immediate, SIMD&FP, pre/post index) -------- */
				5528	/* These are the Post-Index and Pre-Index variants.
				5529
				5530	31 29 23 20 11 9 4
				5531	(at-Rn-then-Rn=EA)
				5532	00 111 100 01 0 imm9 01 n t LDR Bt, [Xn\|SP], #simm
				5533	01 111 100 01 0 imm9 01 n t LDR Ht, [Xn\|SP], #simm
				5534	10 111 100 01 0 imm9 01 n t LDR St, [Xn\|SP], #simm
				5535	11 111 100 01 0 imm9 01 n t LDR Dt, [Xn\|SP], #simm
				5536	00 111 100 11 0 imm9 01 n t LDR Qt, [Xn\|SP], #simm
				5537
				5538	(at-EA-then-Rn=EA)
				5539	00 111 100 01 0 imm9 11 n t LDR Bt, [Xn\|SP, #simm]!
				5540	01 111 100 01 0 imm9 11 n t LDR Ht, [Xn\|SP, #simm]!
				5541	10 111 100 01 0 imm9 11 n t LDR St, [Xn\|SP, #simm]!
				5542	11 111 100 01 0 imm9 11 n t LDR Dt, [Xn\|SP, #simm]!
				5543	00 111 100 11 0 imm9 11 n t LDR Qt, [Xn\|SP, #simm]!
				5544
				5545	Stores are the same except with bit 22 set to 0.
				5546	*/
				5547	if (INSN(29,24) == BITS6(1,1,1,1,0,0)
				5548	&& ((INSN(23,23) << 2) \| INSN(31,30)) <= 4
				5549	&& INSN(21,21) == 0 && INSN(10,10) == 1) {
				5550	UInt szLg2 = (INSN(23,23) << 2) \| INSN(31,30);
				5551	Bool isLD = INSN(22,22) == 1;
				5552	UInt imm9 = INSN(20,12);
				5553	Bool atRN = INSN(11,11) == 0;
				5554	UInt nn = INSN(9,5);
				5555	UInt tt = INSN(4,0);
				5556	IRTemp tRN = newTemp(Ity_I64);
				5557	IRTemp tEA = newTemp(Ity_I64);
				5558	IRTemp tTA = IRTemp_INVALID;
				5559	IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
				5560	ULong simm9 = sx_to_64(imm9, 9);
				5561	assign(tRN, getIReg64orSP(nn));
				5562	assign(tEA, binop(Iop_Add64, mkexpr(tRN), mkU64(simm9)));
				5563	tTA = atRN ? tRN : tEA;
				5564	if (isLD) {
				5565	if (szLg2 < 4) {
				5566	putQReg128(tt, mkV128(0x0000));
				5567	}
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5568	putQRegLO(tt, loadLE(ty, mkexpr(tTA)));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5569	} else {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5570	storeLE(mkexpr(tTA), getQRegLO(tt, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5571	}
				5572	putIReg64orSP(nn, mkexpr(tEA));
				5573	DIP(atRN ? "%s %s, [%s], #%lld\n" : "%s %s, [%s, #%lld]!\n",
				5574	isLD ? "ldr" : "str",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5575	nameQRegLO(tt, ty), nameIReg64orSP(nn), simm9);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5576	return True;
				5577	}
				5578
				5579	/* -------- LDUR/STUR (unscaled offset, SIMD&FP) -------- */
				5580	/* 31 29 23 20 11 9 4
				5581	00 111 100 01 0 imm9 00 n t LDR Bt, [Xn\|SP, #simm]
				5582	01 111 100 01 0 imm9 00 n t LDR Ht, [Xn\|SP, #simm]
				5583	10 111 100 01 0 imm9 00 n t LDR St, [Xn\|SP, #simm]
				5584	11 111 100 01 0 imm9 00 n t LDR Dt, [Xn\|SP, #simm]
				5585	00 111 100 11 0 imm9 00 n t LDR Qt, [Xn\|SP, #simm]
				5586
				5587	00 111 100 00 0 imm9 00 n t STR Bt, [Xn\|SP, #simm]
				5588	01 111 100 00 0 imm9 00 n t STR Ht, [Xn\|SP, #simm]
				5589	10 111 100 00 0 imm9 00 n t STR St, [Xn\|SP, #simm]
				5590	11 111 100 00 0 imm9 00 n t STR Dt, [Xn\|SP, #simm]
				5591	00 111 100 10 0 imm9 00 n t STR Qt, [Xn\|SP, #simm]
				5592	*/
				5593	if (INSN(29,24) == BITS6(1,1,1,1,0,0)
				5594	&& ((INSN(23,23) << 2) \| INSN(31,30)) <= 4
				5595	&& INSN(21,21) == 0 && INSN(11,10) == BITS2(0,0)) {
				5596	UInt szLg2 = (INSN(23,23) << 2) \| INSN(31,30);
				5597	Bool isLD = INSN(22,22) == 1;
				5598	UInt imm9 = INSN(20,12);
				5599	UInt nn = INSN(9,5);
				5600	UInt tt = INSN(4,0);
				5601	ULong simm9 = sx_to_64(imm9, 9);
				5602	IRTemp tEA = newTemp(Ity_I64);
				5603	IRType ty = preferredVectorSubTypeFromSize(1 << szLg2);
				5604	assign(tEA, binop(Iop_Add64, getIReg64orSP(nn), mkU64(simm9)));
				5605	if (isLD) {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5606	if (szLg2 < 4) {
				5607	putQReg128(tt, mkV128(0x0000));
				5608	}
				5609	putQRegLO(tt, loadLE(ty, mkexpr(tEA)));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5610	} else {
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5611	storeLE(mkexpr(tEA), getQRegLO(tt, ty));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5612	}
				5613	DIP("%s %s, [%s, #%lld]\n",
				5614	isLD ? "ldur" : "stur",
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5615	nameQRegLO(tt, ty), nameIReg64orSP(nn), (Long)simm9);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5616	return True;
				5617	}
				5618
				5619	/* ---------------- LDR (literal, SIMD&FP) ---------------- */
				5620	/* 31 29 23 4
				5621	00 011 100 imm19 t LDR St, [PC + sxTo64(imm19 << 2)]
				5622	01 011 100 imm19 t LDR Dt, [PC + sxTo64(imm19 << 2)]
				5623	10 011 100 imm19 t LDR Qt, [PC + sxTo64(imm19 << 2)]
				5624	*/
				5625	if (INSN(29,24) == BITS6(0,1,1,1,0,0) && INSN(31,30) < BITS2(1,1)) {
				5626	UInt szB = 4 << INSN(31,30);
				5627	UInt imm19 = INSN(23,5);
				5628	UInt tt = INSN(4,0);
				5629	ULong ea = guest_PC_curr_instr + sx_to_64(imm19 << 2, 21);
				5630	IRType ty = preferredVectorSubTypeFromSize(szB);
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5631	putQReg128(tt, mkV128(0x0000));
				5632	putQRegLO(tt, loadLE(ty, mkU64(ea)));
				5633	DIP("ldr %s, 0x%llx (literal)\n", nameQRegLO(tt, ty), ea);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5634	return True;
				5635	}
				5636
sewardj	208a776	2014-10-22 13:52:51 +0000	[diff] [blame]	5637	/* ------ LD1/ST1 (multiple 1-elem structs to/from 1 reg ------ */
				5638	/* ------ LD2/ST2 (multiple 2-elem structs to/from 2 regs ------ */
				5639	/* ------ LD3/ST3 (multiple 3-elem structs to/from 3 regs ------ */
				5640	/* ------ LD4/ST4 (multiple 4-elem structs to/from 4 regs ------ */
				5641	/* 31 29 26 22 21 20 15 11 9 4
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	5642
sewardj	bed9f68	2014-10-27 09:29:48 +0000	[diff] [blame]	5643	0q 001 1000 L 0 00000 0000 sz n t xx4 {Vt..t+3.T}, [Xn\|SP]
				5644	0q 001 1001 L 0 m 0000 sz n t xx4 {Vt..t+3.T}, [Xn\|SP], step
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5645
sewardj	bed9f68	2014-10-27 09:29:48 +0000	[diff] [blame]	5646	0q 001 1000 L 0 00000 0100 sz n t xx3 {Vt..t+2.T}, [Xn\|SP]
				5647	0q 001 1001 L 0 m 0100 sz n t xx3 {Vt..t+2.T}, [Xn\|SP], step
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5648
sewardj	bed9f68	2014-10-27 09:29:48 +0000	[diff] [blame]	5649	0q 001 1000 L 0 00000 1000 sz n t xx2 {Vt..t+1.T}, [Xn\|SP]
				5650	0q 001 1001 L 0 m 1000 sz n t xx2 {Vt..t+1.T}, [Xn\|SP], step
sewardj	208a776	2014-10-22 13:52:51 +0000	[diff] [blame]	5651
sewardj	bed9f68	2014-10-27 09:29:48 +0000	[diff] [blame]	5652	0q 001 1000 L 0 00000 0111 sz n t xx1 {Vt.T}, [Xn\|SP]
				5653	0q 001 1001 L 0 m 0111 sz n t xx1 {Vt.T}, [Xn\|SP], step
sewardj	208a776	2014-10-22 13:52:51 +0000	[diff] [blame]	5654
				5655	T = defined by Q and sz in the normal way
				5656	step = if m == 11111 then transfer-size else Xm
				5657	xx = case L of 1 -> LD ; 0 -> ST
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	5658	*/
sewardj	208a776	2014-10-22 13:52:51 +0000	[diff] [blame]	5659	if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
				5660	&& INSN(21,21) == 0) {
				5661	Bool bitQ = INSN(30,30);
				5662	Bool isPX = INSN(23,23) == 1;
				5663	Bool isLD = INSN(22,22) == 1;
				5664	UInt mm = INSN(20,16);
				5665	UInt opc = INSN(15,12);
				5666	UInt sz = INSN(11,10);
				5667	UInt nn = INSN(9,5);
				5668	UInt tt = INSN(4,0);
				5669	Bool isQ = bitQ == 1;
				5670	Bool is1d = sz == BITS2(1,1) && !isQ;
				5671	UInt nRegs = 0;
				5672	switch (opc) {
				5673	case BITS4(0,0,0,0): nRegs = 4; break;
				5674	case BITS4(0,1,0,0): nRegs = 3; break;
				5675	case BITS4(1,0,0,0): nRegs = 2; break;
				5676	case BITS4(0,1,1,1): nRegs = 1; break;
				5677	default: break;
sewardj	950ca7a	2014-04-03 23:03:32 +0000	[diff] [blame]	5678	}
sewardj	bed9f68	2014-10-27 09:29:48 +0000	[diff] [blame]	5679
				5680	/* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
				5681	If we see it, set nRegs to 0 so as to cause the next conditional
				5682	to fail. */
				5683	if (!isPX && mm != 0)
				5684	nRegs = 0;
sewardj	208a776	2014-10-22 13:52:51 +0000	[diff] [blame]	5685
				5686	if (nRegs == 1 /* .1d is allowed */
				5687	\|\| (nRegs >= 2 && nRegs <= 4 && !is1d) /* .1d is not allowed */) {
				5688
				5689	UInt xferSzB = (isQ ? 16 : 8) * nRegs;
				5690
				5691	/* Generate the transfer address (TA) and if necessary the
				5692	writeback address (WB) */
				5693	IRTemp tTA = newTemp(Ity_I64);
				5694	assign(tTA, getIReg64orSP(nn));
				5695	if (nn == 31) { /* FIXME generate stack alignment check */ }
				5696	IRTemp tWB = IRTemp_INVALID;
				5697	if (isPX) {
				5698	tWB = newTemp(Ity_I64);
				5699	assign(tWB, binop(Iop_Add64,
				5700	mkexpr(tTA),
				5701	mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
				5702	: getIReg64orZR(mm)));
				5703	}
				5704
				5705	/* -- BEGIN generate the transfers -- */
				5706
				5707	IRTemp u0, u1, u2, u3, i0, i1, i2, i3;
				5708	u0 = u1 = u2 = u3 = i0 = i1 = i2 = i3 = IRTemp_INVALID;
				5709	switch (nRegs) {
				5710	case 4: u3 = newTempV128(); i3 = newTempV128(); /* fallthru */
				5711	case 3: u2 = newTempV128(); i2 = newTempV128(); /* fallthru */
				5712	case 2: u1 = newTempV128(); i1 = newTempV128(); /* fallthru */
				5713	case 1: u0 = newTempV128(); i0 = newTempV128(); break;
				5714	default: vassert(0);
				5715	}
				5716
				5717	/* -- Multiple 128 or 64 bit stores -- */
				5718	if (!isLD) {
				5719	switch (nRegs) {
				5720	case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
				5721	case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
				5722	case 2: assign(u1, getQReg128((tt+1) % 32)); /* fallthru */
				5723	case 1: assign(u0, getQReg128((tt+0) % 32)); break;
				5724	default: vassert(0);
				5725	}
				5726	switch (nRegs) {
				5727	case 4: (isQ ? math_INTERLEAVE4_128 : math_INTERLEAVE4_64)
				5728	(&i0, &i1, &i2, &i3, sz, u0, u1, u2, u3);
				5729	break;
				5730	case 3: (isQ ? math_INTERLEAVE3_128 : math_INTERLEAVE3_64)
				5731	(&i0, &i1, &i2, sz, u0, u1, u2);
				5732	break;
				5733	case 2: (isQ ? math_INTERLEAVE2_128 : math_INTERLEAVE2_64)
				5734	(&i0, &i1, sz, u0, u1);
				5735	break;
				5736	case 1: (isQ ? math_INTERLEAVE1_128 : math_INTERLEAVE1_64)
				5737	(&i0, sz, u0);
				5738	break;
				5739	default: vassert(0);
				5740	}
				5741	# define MAYBE_NARROW_TO_64(_expr) \
				5742	(isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
				5743	UInt step = isQ ? 16 : 8;
				5744	switch (nRegs) {
				5745	case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
				5746	MAYBE_NARROW_TO_64(mkexpr(i3)) );
				5747	/* fallthru */
				5748	case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
				5749	MAYBE_NARROW_TO_64(mkexpr(i2)) );
				5750	/* fallthru */
				5751	case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
				5752	MAYBE_NARROW_TO_64(mkexpr(i1)) );
				5753	/* fallthru */
				5754	case 1: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
				5755	MAYBE_NARROW_TO_64(mkexpr(i0)) );
				5756	break;
				5757	default: vassert(0);
				5758	}
				5759	# undef MAYBE_NARROW_TO_64
				5760	}
				5761
				5762	/* -- Multiple 128 or 64 bit loads -- */
				5763	else /* isLD */ {
				5764	UInt step = isQ ? 16 : 8;
				5765	IRType loadTy = isQ ? Ity_V128 : Ity_I64;
				5766	# define MAYBE_WIDEN_FROM_64(_expr) \
				5767	(isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
				5768	switch (nRegs) {
				5769	case 4:
				5770	assign(i3, MAYBE_WIDEN_FROM_64(
				5771	loadLE(loadTy,
				5772	binop(Iop_Add64, mkexpr(tTA),
				5773	mkU64(3 * step)))));
				5774	/* fallthru */
				5775	case 3:
				5776	assign(i2, MAYBE_WIDEN_FROM_64(
				5777	loadLE(loadTy,
				5778	binop(Iop_Add64, mkexpr(tTA),
				5779	mkU64(2 * step)))));
				5780	/* fallthru */
				5781	case 2:
				5782	assign(i1, MAYBE_WIDEN_FROM_64(
				5783	loadLE(loadTy,
				5784	binop(Iop_Add64, mkexpr(tTA),
				5785	mkU64(1 * step)))));
				5786	/* fallthru */
sewardj	208a776	2014-10-22 13:52:51 +0000	[diff] [blame]	5787	case 1:
				5788	assign(i0, MAYBE_WIDEN_FROM_64(
				5789	loadLE(loadTy,
				5790	binop(Iop_Add64, mkexpr(tTA),
				5791	mkU64(0 * step)))));
				5792	break;
				5793	default:
				5794	vassert(0);
				5795	}
				5796	# undef MAYBE_WIDEN_FROM_64
				5797	switch (nRegs) {
				5798	case 4: (isQ ? math_DEINTERLEAVE4_128 : math_DEINTERLEAVE4_64)
				5799	(&u0, &u1, &u2, &u3, sz, i0,i1,i2,i3);
				5800	break;
				5801	case 3: (isQ ? math_DEINTERLEAVE3_128 : math_DEINTERLEAVE3_64)
				5802	(&u0, &u1, &u2, sz, i0, i1, i2);
				5803	break;
				5804	case 2: (isQ ? math_DEINTERLEAVE2_128 : math_DEINTERLEAVE2_64)
				5805	(&u0, &u1, sz, i0, i1);
				5806	break;
				5807	case 1: (isQ ? math_DEINTERLEAVE1_128 : math_DEINTERLEAVE1_64)
				5808	(&u0, sz, i0);
				5809	break;
				5810	default: vassert(0);
				5811	}
				5812	switch (nRegs) {
				5813	case 4: putQReg128( (tt+3) % 32,
				5814	math_MAYBE_ZERO_HI64(bitQ, u3));
				5815	/* fallthru */
				5816	case 3: putQReg128( (tt+2) % 32,
				5817	math_MAYBE_ZERO_HI64(bitQ, u2));
				5818	/* fallthru */
				5819	case 2: putQReg128( (tt+1) % 32,
				5820	math_MAYBE_ZERO_HI64(bitQ, u1));
				5821	/* fallthru */
				5822	case 1: putQReg128( (tt+0) % 32,
				5823	math_MAYBE_ZERO_HI64(bitQ, u0));
				5824	break;
				5825	default: vassert(0);
				5826	}
				5827	}
				5828
				5829	/* -- END generate the transfers -- */
				5830
				5831	/* Do the writeback, if necessary */
				5832	if (isPX) {
				5833	putIReg64orSP(nn, mkexpr(tWB));
				5834	}
				5835
				5836	HChar pxStr[20];
				5837	pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
				5838	if (isPX) {
				5839	if (mm == BITS5(1,1,1,1,1))
				5840	vex_sprintf(pxStr, ", #%u", xferSzB);
				5841	else
				5842	vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
				5843	}
				5844	const HChar* arr = nameArr_Q_SZ(bitQ, sz);
				5845	DIP("%s%u {v%u.%s .. v%u.%s}, [%s]%s\n",
				5846	isLD ? "ld" : "st", nRegs,
				5847	(tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
				5848	pxStr);
				5849
				5850	return True;
				5851	}
				5852	/* else fall through */
sewardj	950ca7a	2014-04-03 23:03:32 +0000	[diff] [blame]	5853	}
				5854
sewardj	bed9f68	2014-10-27 09:29:48 +0000	[diff] [blame]	5855	/* ------ LD1/ST1 (multiple 1-elem structs to/from 2 regs ------ */
				5856	/* ------ LD1/ST1 (multiple 1-elem structs to/from 3 regs ------ */
				5857	/* ------ LD1/ST1 (multiple 1-elem structs to/from 4 regs ------ */
				5858	/* 31 29 26 22 21 20 15 11 9 4
				5859
				5860	0q 001 1000 L 0 00000 0010 sz n t xx1 {Vt..t+3.T}, [Xn\|SP]
				5861	0q 001 1001 L 0 m 0010 sz n t xx1 {Vt..t+3.T}, [Xn\|SP], step
				5862
				5863	0q 001 1000 L 0 00000 0110 sz n t xx1 {Vt..t+2.T}, [Xn\|SP]
				5864	0q 001 1001 L 0 m 0110 sz n t xx1 {Vt..t+2.T}, [Xn\|SP], step
				5865
				5866	0q 001 1000 L 0 00000 1010 sz n t xx1 {Vt..t+1.T}, [Xn\|SP]
				5867	0q 001 1001 L 0 m 1010 sz n t xx1 {Vt..t+1.T}, [Xn\|SP], step
				5868
				5869	T = defined by Q and sz in the normal way
				5870	step = if m == 11111 then transfer-size else Xm
				5871	xx = case L of 1 -> LD ; 0 -> ST
				5872	*/
				5873	if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,0)
				5874	&& INSN(21,21) == 0) {
				5875	Bool bitQ = INSN(30,30);
				5876	Bool isPX = INSN(23,23) == 1;
				5877	Bool isLD = INSN(22,22) == 1;
				5878	UInt mm = INSN(20,16);
				5879	UInt opc = INSN(15,12);
				5880	UInt sz = INSN(11,10);
				5881	UInt nn = INSN(9,5);
				5882	UInt tt = INSN(4,0);
				5883	Bool isQ = bitQ == 1;
				5884	UInt nRegs = 0;
				5885	switch (opc) {
				5886	case BITS4(0,0,1,0): nRegs = 4; break;
				5887	case BITS4(0,1,1,0): nRegs = 3; break;
				5888	case BITS4(1,0,1,0): nRegs = 2; break;
				5889	default: break;
				5890	}
				5891
				5892	/* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed.
				5893	If we see it, set nRegs to 0 so as to cause the next conditional
				5894	to fail. */
				5895	if (!isPX && mm != 0)
				5896	nRegs = 0;
				5897
				5898	if (nRegs >= 2 && nRegs <= 4) {
				5899
				5900	UInt xferSzB = (isQ ? 16 : 8) * nRegs;
				5901
				5902	/* Generate the transfer address (TA) and if necessary the
				5903	writeback address (WB) */
				5904	IRTemp tTA = newTemp(Ity_I64);
				5905	assign(tTA, getIReg64orSP(nn));
				5906	if (nn == 31) { /* FIXME generate stack alignment check */ }
				5907	IRTemp tWB = IRTemp_INVALID;
				5908	if (isPX) {
				5909	tWB = newTemp(Ity_I64);
				5910	assign(tWB, binop(Iop_Add64,
				5911	mkexpr(tTA),
				5912	mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
				5913	: getIReg64orZR(mm)));
				5914	}
				5915
				5916	/* -- BEGIN generate the transfers -- */
				5917
				5918	IRTemp u0, u1, u2, u3;
				5919	u0 = u1 = u2 = u3 = IRTemp_INVALID;
				5920	switch (nRegs) {
				5921	case 4: u3 = newTempV128(); /* fallthru */
				5922	case 3: u2 = newTempV128(); /* fallthru */
				5923	case 2: u1 = newTempV128();
				5924	u0 = newTempV128(); break;
				5925	default: vassert(0);
				5926	}
				5927
				5928	/* -- Multiple 128 or 64 bit stores -- */
				5929	if (!isLD) {
				5930	switch (nRegs) {
				5931	case 4: assign(u3, getQReg128((tt+3) % 32)); /* fallthru */
				5932	case 3: assign(u2, getQReg128((tt+2) % 32)); /* fallthru */
				5933	case 2: assign(u1, getQReg128((tt+1) % 32));
				5934	assign(u0, getQReg128((tt+0) % 32)); break;
				5935	default: vassert(0);
				5936	}
				5937	# define MAYBE_NARROW_TO_64(_expr) \
				5938	(isQ ? (_expr) : unop(Iop_V128to64,(_expr)))
				5939	UInt step = isQ ? 16 : 8;
				5940	switch (nRegs) {
				5941	case 4: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(3*step)),
				5942	MAYBE_NARROW_TO_64(mkexpr(u3)) );
				5943	/* fallthru */
				5944	case 3: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(2*step)),
				5945	MAYBE_NARROW_TO_64(mkexpr(u2)) );
				5946	/* fallthru */
				5947	case 2: storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(1*step)),
				5948	MAYBE_NARROW_TO_64(mkexpr(u1)) );
				5949	storeLE( binop(Iop_Add64, mkexpr(tTA), mkU64(0*step)),
				5950	MAYBE_NARROW_TO_64(mkexpr(u0)) );
				5951	break;
				5952	default: vassert(0);
				5953	}
				5954	# undef MAYBE_NARROW_TO_64
				5955	}
				5956
				5957	/* -- Multiple 128 or 64 bit loads -- */
				5958	else /* isLD */ {
				5959	UInt step = isQ ? 16 : 8;
				5960	IRType loadTy = isQ ? Ity_V128 : Ity_I64;
				5961	# define MAYBE_WIDEN_FROM_64(_expr) \
				5962	(isQ ? (_expr) : unop(Iop_64UtoV128,(_expr)))
				5963	switch (nRegs) {
				5964	case 4:
				5965	assign(u3, MAYBE_WIDEN_FROM_64(
				5966	loadLE(loadTy,
				5967	binop(Iop_Add64, mkexpr(tTA),
				5968	mkU64(3 * step)))));
				5969	/* fallthru */
				5970	case 3:
				5971	assign(u2, MAYBE_WIDEN_FROM_64(
				5972	loadLE(loadTy,
				5973	binop(Iop_Add64, mkexpr(tTA),
				5974	mkU64(2 * step)))));
				5975	/* fallthru */
				5976	case 2:
				5977	assign(u1, MAYBE_WIDEN_FROM_64(
				5978	loadLE(loadTy,
				5979	binop(Iop_Add64, mkexpr(tTA),
				5980	mkU64(1 * step)))));
				5981	assign(u0, MAYBE_WIDEN_FROM_64(
				5982	loadLE(loadTy,
				5983	binop(Iop_Add64, mkexpr(tTA),
				5984	mkU64(0 * step)))));
				5985	break;
				5986	default:
				5987	vassert(0);
				5988	}
				5989	# undef MAYBE_WIDEN_FROM_64
				5990	switch (nRegs) {
				5991	case 4: putQReg128( (tt+3) % 32,
				5992	math_MAYBE_ZERO_HI64(bitQ, u3));
				5993	/* fallthru */
				5994	case 3: putQReg128( (tt+2) % 32,
				5995	math_MAYBE_ZERO_HI64(bitQ, u2));
				5996	/* fallthru */
				5997	case 2: putQReg128( (tt+1) % 32,
				5998	math_MAYBE_ZERO_HI64(bitQ, u1));
				5999	putQReg128( (tt+0) % 32,
				6000	math_MAYBE_ZERO_HI64(bitQ, u0));
				6001	break;
				6002	default: vassert(0);
				6003	}
				6004	}
				6005
				6006	/* -- END generate the transfers -- */
				6007
				6008	/* Do the writeback, if necessary */
				6009	if (isPX) {
				6010	putIReg64orSP(nn, mkexpr(tWB));
				6011	}
				6012
				6013	HChar pxStr[20];
				6014	pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
				6015	if (isPX) {
				6016	if (mm == BITS5(1,1,1,1,1))
				6017	vex_sprintf(pxStr, ", #%u", xferSzB);
				6018	else
				6019	vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
				6020	}
				6021	const HChar* arr = nameArr_Q_SZ(bitQ, sz);
				6022	DIP("%s1 {v%u.%s .. v%u.%s}, [%s]%s\n",
				6023	isLD ? "ld" : "st",
				6024	(tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
				6025	pxStr);
				6026
				6027	return True;
				6028	}
				6029	/* else fall through */
				6030	}
				6031
sewardj	18bf517	2014-06-14 18:05:30 +0000	[diff] [blame]	6032	/* ---------- LD1R (single structure, replicate) ---------- */
sewardj	bed9f68	2014-10-27 09:29:48 +0000	[diff] [blame]	6033	/* ---------- LD2R (single structure, replicate) ---------- */
				6034	/* ---------- LD3R (single structure, replicate) ---------- */
				6035	/* ---------- LD4R (single structure, replicate) ---------- */
sewardj	18bf517	2014-06-14 18:05:30 +0000	[diff] [blame]	6036	/* 31 29 22 20 15 11 9 4
sewardj	bed9f68	2014-10-27 09:29:48 +0000	[diff] [blame]	6037	0q 001 1010 10 00000 110 0 sz n t LD1R {Vt.T}, [Xn\|SP]
				6038	0q 001 1011 10 m 110 0 sz n t LD1R {Vt.T}, [Xn\|SP], step
				6039
				6040	0q 001 1010 11 00000 110 0 sz n t LD2R {Vt..t+1.T}, [Xn\|SP]
				6041	0q 001 1011 11 m 110 0 sz n t LD2R {Vt..t+1.T}, [Xn\|SP], step
				6042
				6043	0q 001 1010 10 00000 111 0 sz n t LD3R {Vt..t+2.T}, [Xn\|SP]
				6044	0q 001 1011 10 m 111 0 sz n t LD3R {Vt..t+2.T}, [Xn\|SP], step
				6045
				6046	0q 001 1010 11 00000 111 0 sz n t LD4R {Vt..t+3.T}, [Xn\|SP]
				6047	0q 001 1011 11 m 111 0 sz n t LD4R {Vt..t+3.T}, [Xn\|SP], step
				6048
				6049	step = if m == 11111 then transfer-size else Xm
sewardj	18bf517	2014-06-14 18:05:30 +0000	[diff] [blame]	6050	*/
				6051	if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)
sewardj	bed9f68	2014-10-27 09:29:48 +0000	[diff] [blame]	6052	&& INSN(22,22) == 1 && INSN(15,14) == BITS2(1,1)
				6053	&& INSN(12,12) == 0) {
				6054	UInt bitQ = INSN(30,30);
				6055	Bool isPX = INSN(23,23) == 1;
				6056	UInt nRegs = ((INSN(13,13) << 1) \| INSN(21,21)) + 1;
				6057	UInt mm = INSN(20,16);
				6058	UInt sz = INSN(11,10);
				6059	UInt nn = INSN(9,5);
				6060	UInt tt = INSN(4,0);
				6061
				6062	/* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
				6063	if (isPX \|\| mm == 0) {
				6064
				6065	IRType ty = integerIRTypeOfSize(1 << sz);
				6066
				6067	UInt laneSzB = 1 << sz;
				6068	UInt xferSzB = laneSzB * nRegs;
				6069
				6070	/* Generate the transfer address (TA) and if necessary the
				6071	writeback address (WB) */
				6072	IRTemp tTA = newTemp(Ity_I64);
				6073	assign(tTA, getIReg64orSP(nn));
				6074	if (nn == 31) { /* FIXME generate stack alignment check */ }
				6075	IRTemp tWB = IRTemp_INVALID;
				6076	if (isPX) {
				6077	tWB = newTemp(Ity_I64);
				6078	assign(tWB, binop(Iop_Add64,
				6079	mkexpr(tTA),
				6080	mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
				6081	: getIReg64orZR(mm)));
sewardj	18bf517	2014-06-14 18:05:30 +0000	[diff] [blame]	6082	}
sewardj	bed9f68	2014-10-27 09:29:48 +0000	[diff] [blame]	6083
				6084	/* Do the writeback, if necessary */
				6085	if (isPX) {
				6086	putIReg64orSP(nn, mkexpr(tWB));
				6087	}
				6088
				6089	IRTemp e0, e1, e2, e3, v0, v1, v2, v3;
				6090	e0 = e1 = e2 = e3 = v0 = v1 = v2 = v3 = IRTemp_INVALID;
				6091	switch (nRegs) {
				6092	case 4:
				6093	e3 = newTemp(ty);
				6094	assign(e3, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
				6095	mkU64(3 * laneSzB))));
				6096	v3 = math_DUP_TO_V128(e3, ty);
				6097	putQReg128((tt+3) % 32, math_MAYBE_ZERO_HI64(bitQ, v3));
				6098	/* fallthrough */
				6099	case 3:
				6100	e2 = newTemp(ty);
				6101	assign(e2, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
				6102	mkU64(2 * laneSzB))));
				6103	v2 = math_DUP_TO_V128(e2, ty);
				6104	putQReg128((tt+2) % 32, math_MAYBE_ZERO_HI64(bitQ, v2));
				6105	/* fallthrough */
				6106	case 2:
				6107	e1 = newTemp(ty);
				6108	assign(e1, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
				6109	mkU64(1 * laneSzB))));
				6110	v1 = math_DUP_TO_V128(e1, ty);
				6111	putQReg128((tt+1) % 32, math_MAYBE_ZERO_HI64(bitQ, v1));
				6112	/* fallthrough */
				6113	case 1:
				6114	e0 = newTemp(ty);
				6115	assign(e0, loadLE(ty, binop(Iop_Add64, mkexpr(tTA),
				6116	mkU64(0 * laneSzB))));
				6117	v0 = math_DUP_TO_V128(e0, ty);
				6118	putQReg128((tt+0) % 32, math_MAYBE_ZERO_HI64(bitQ, v0));
				6119	break;
				6120	default:
				6121	vassert(0);
				6122	}
				6123
				6124	HChar pxStr[20];
				6125	pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
				6126	if (isPX) {
				6127	if (mm == BITS5(1,1,1,1,1))
				6128	vex_sprintf(pxStr, ", #%u", xferSzB);
				6129	else
				6130	vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
				6131	}
				6132	const HChar* arr = nameArr_Q_SZ(bitQ, sz);
				6133	DIP("ld%ur {v%u.%s .. v%u.%s}, [%s]%s\n",
				6134	nRegs,
				6135	(tt+0) % 32, arr, (tt+nRegs-1) % 32, arr, nameIReg64orSP(nn),
				6136	pxStr);
				6137
sewardj	18bf517	2014-06-14 18:05:30 +0000	[diff] [blame]	6138	return True;
				6139	}
sewardj	bed9f68	2014-10-27 09:29:48 +0000	[diff] [blame]	6140	/* else fall through */
sewardj	18bf517	2014-06-14 18:05:30 +0000	[diff] [blame]	6141	}
				6142
sewardj	bed9f68	2014-10-27 09:29:48 +0000	[diff] [blame]	6143	/* ------ LD1/ST1 (single structure, to/from one lane) ------ */
				6144	/* ------ LD2/ST2 (single structure, to/from one lane) ------ */
				6145	/* ------ LD3/ST3 (single structure, to/from one lane) ------ */
				6146	/* ------ LD4/ST4 (single structure, to/from one lane) ------ */
				6147	/* 31 29 22 21 20 15 11 9 4
				6148	0q 001 1010 L 0 00000 xx0 S sz n t op1 {Vt.T}[ix], [Xn\|SP]
				6149	0q 001 1011 L 0 m xx0 S sz n t op1 {Vt.T}[ix], [Xn\|SP], step
sewardj	8a5ed54	2014-07-15 11:08:42 +0000	[diff] [blame]	6150
sewardj	bed9f68	2014-10-27 09:29:48 +0000	[diff] [blame]	6151	0q 001 1010 L 1 00000 xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn\|SP]
				6152	0q 001 1011 L 1 m xx0 S sz n t op2 {Vt..t+1.T}[ix], [Xn\|SP], step
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	6153
sewardj	bed9f68	2014-10-27 09:29:48 +0000	[diff] [blame]	6154	0q 001 1010 L 0 00000 xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn\|SP]
				6155	0q 001 1011 L 0 m xx1 S sz n t op3 {Vt..t+2.T}[ix], [Xn\|SP], step
				6156
				6157	0q 001 1010 L 1 00000 xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn\|SP]
				6158	0q 001 1011 L 1 m xx1 S sz n t op4 {Vt..t+3.T}[ix], [Xn\|SP], step
				6159
				6160	step = if m == 11111 then transfer-size else Xm
				6161	op = case L of 1 -> LD ; 0 -> ST
				6162
				6163	laneszB,ix = case xx:q:S:sz of 00:b:b:bb -> 1, bbbb
				6164	01:b:b:b0 -> 2, bbb
				6165	10:b:b:00 -> 4, bb
				6166	10:b:0:01 -> 8, b
sewardj	39f754d	2014-06-24 10:26:52 +0000	[diff] [blame]	6167	*/
sewardj	bed9f68	2014-10-27 09:29:48 +0000	[diff] [blame]	6168	if (INSN(31,31) == 0 && INSN(29,24) == BITS6(0,0,1,1,0,1)) {
				6169	UInt bitQ = INSN(30,30);
				6170	Bool isPX = INSN(23,23) == 1;
				6171	Bool isLD = INSN(22,22) == 1;
				6172	UInt nRegs = ((INSN(13,13) << 1) \| INSN(21,21)) + 1;
				6173	UInt mm = INSN(20,16);
				6174	UInt xx = INSN(15,14);
				6175	UInt bitS = INSN(12,12);
				6176	UInt sz = INSN(11,10);
				6177	UInt nn = INSN(9,5);
				6178	UInt tt = INSN(4,0);
				6179
				6180	Bool valid = True;
				6181
				6182	/* The combination insn[23] == 0 && insn[20:16] != 0 is not allowed. */
				6183	if (!isPX && mm != 0)
				6184	valid = False;
				6185
				6186	UInt laneSzB = 0; /* invalid */
				6187	UInt ix = 16; /* invalid */
				6188
				6189	UInt xx_q_S_sz = (xx << 4) \| (bitQ << 3) \| (bitS << 2) \| sz;
				6190	switch (xx_q_S_sz) {
				6191	case 0x00: case 0x01: case 0x02: case 0x03:
				6192	case 0x04: case 0x05: case 0x06: case 0x07:
				6193	case 0x08: case 0x09: case 0x0A: case 0x0B:
				6194	case 0x0C: case 0x0D: case 0x0E: case 0x0F:
				6195	laneSzB = 1; ix = xx_q_S_sz & 0xF;
				6196	break;
				6197	case 0x10: case 0x12: case 0x14: case 0x16:
				6198	case 0x18: case 0x1A: case 0x1C: case 0x1E:
				6199	laneSzB = 2; ix = (xx_q_S_sz >> 1) & 7;
				6200	break;
				6201	case 0x20: case 0x24: case 0x28: case 0x2C:
				6202	laneSzB = 4; ix = (xx_q_S_sz >> 2) & 3;
				6203	break;
				6204	case 0x21: case 0x29:
				6205	laneSzB = 8; ix = (xx_q_S_sz >> 3) & 1;
				6206	break;
				6207	default:
				6208	break;
sewardj	39f754d	2014-06-24 10:26:52 +0000	[diff] [blame]	6209	}
sewardj	bed9f68	2014-10-27 09:29:48 +0000	[diff] [blame]	6210
				6211	if (valid && laneSzB != 0) {
				6212
				6213	IRType ty = integerIRTypeOfSize(laneSzB);
				6214	UInt xferSzB = laneSzB * nRegs;
				6215
				6216	/* Generate the transfer address (TA) and if necessary the
				6217	writeback address (WB) */
				6218	IRTemp tTA = newTemp(Ity_I64);
				6219	assign(tTA, getIReg64orSP(nn));
				6220	if (nn == 31) { /* FIXME generate stack alignment check */ }
				6221	IRTemp tWB = IRTemp_INVALID;
				6222	if (isPX) {
				6223	tWB = newTemp(Ity_I64);
				6224	assign(tWB, binop(Iop_Add64,
				6225	mkexpr(tTA),
				6226	mm == BITS5(1,1,1,1,1) ? mkU64(xferSzB)
				6227	: getIReg64orZR(mm)));
				6228	}
				6229
				6230	/* Do the writeback, if necessary */
				6231	if (isPX) {
				6232	putIReg64orSP(nn, mkexpr(tWB));
				6233	}
				6234
				6235	switch (nRegs) {
				6236	case 4: {
				6237	IRExpr* addr
				6238	= binop(Iop_Add64, mkexpr(tTA), mkU64(3 * laneSzB));
				6239	if (isLD) {
				6240	putQRegLane((tt+3) % 32, ix, loadLE(ty, addr));
				6241	} else {
				6242	storeLE(addr, getQRegLane((tt+3) % 32, ix, ty));
				6243	}
				6244	/* fallthrough */
				6245	}
				6246	case 3: {
				6247	IRExpr* addr
				6248	= binop(Iop_Add64, mkexpr(tTA), mkU64(2 * laneSzB));
				6249	if (isLD) {
				6250	putQRegLane((tt+2) % 32, ix, loadLE(ty, addr));
				6251	} else {
				6252	storeLE(addr, getQRegLane((tt+2) % 32, ix, ty));
				6253	}
				6254	/* fallthrough */
				6255	}
				6256	case 2: {
				6257	IRExpr* addr
				6258	= binop(Iop_Add64, mkexpr(tTA), mkU64(1 * laneSzB));
				6259	if (isLD) {
				6260	putQRegLane((tt+1) % 32, ix, loadLE(ty, addr));
				6261	} else {
				6262	storeLE(addr, getQRegLane((tt+1) % 32, ix, ty));
				6263	}
				6264	/* fallthrough */
				6265	}
				6266	case 1: {
				6267	IRExpr* addr
				6268	= binop(Iop_Add64, mkexpr(tTA), mkU64(0 * laneSzB));
				6269	if (isLD) {
				6270	putQRegLane((tt+0) % 32, ix, loadLE(ty, addr));
				6271	} else {
				6272	storeLE(addr, getQRegLane((tt+0) % 32, ix, ty));
				6273	}
				6274	break;
				6275	}
				6276	default:
				6277	vassert(0);
				6278	}
				6279
				6280	HChar pxStr[20];
				6281	pxStr[0] = pxStr[sizeof(pxStr)-1] = 0;
				6282	if (isPX) {
				6283	if (mm == BITS5(1,1,1,1,1))
				6284	vex_sprintf(pxStr, ", #%u", xferSzB);
				6285	else
				6286	vex_sprintf(pxStr, ", %s", nameIReg64orZR(mm));
				6287	}
				6288	const HChar* arr = nameArr_Q_SZ(bitQ, sz);
				6289	DIP("%s%u {v%u.%s .. v%u.%s}[%u], [%s]%s\n",
				6290	isLD ? "ld" : "st", nRegs,
				6291	(tt+0) % 32, arr, (tt+nRegs-1) % 32, arr,
				6292	ix, nameIReg64orSP(nn), pxStr);
				6293
				6294	return True;
				6295	}
				6296	/* else fall through */
sewardj	39f754d	2014-06-24 10:26:52 +0000	[diff] [blame]	6297	}
				6298
sewardj	7d00913	2014-02-20 17:43:38 +0000	[diff] [blame]	6299	/* ------------------ LD{,A}X{R,RH,RB} ------------------ */
				6300	/* ------------------ ST{,L}X{R,RH,RB} ------------------ */
				6301	/* 31 29 23 20 14 9 4
				6302	sz 001000 010 11111 0 11111 n t LDX{R,RH,RB} Rt, [Xn\|SP]
				6303	sz 001000 010 11111 1 11111 n t LDAX{R,RH,RB} Rt, [Xn\|SP]
				6304	sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn\|SP]
				6305	sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn\|SP]
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6306	*/
sewardj	7d00913	2014-02-20 17:43:38 +0000	[diff] [blame]	6307	if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
				6308	&& (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
				6309	&& INSN(14,10) == BITS5(1,1,1,1,1)) {
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	6310	UInt szBlg2 = INSN(31,30);
				6311	Bool isLD = INSN(22,22) == 1;
				6312	Bool isAcqOrRel = INSN(15,15) == 1;
				6313	UInt ss = INSN(20,16);
				6314	UInt nn = INSN(9,5);
				6315	UInt tt = INSN(4,0);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6316
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	6317	vassert(szBlg2 < 4);
				6318	UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
				6319	IRType ty = integerIRTypeOfSize(szB);
				6320	const HChar* suffix[4] = { "rb", "rh", "r", "r" };
sewardj	7d00913	2014-02-20 17:43:38 +0000	[diff] [blame]	6321
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	6322	IRTemp ea = newTemp(Ity_I64);
				6323	assign(ea, getIReg64orSP(nn));
				6324	/* FIXME generate check that ea is szB-aligned */
sewardj	7d00913	2014-02-20 17:43:38 +0000	[diff] [blame]	6325
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	6326	if (isLD && ss == BITS5(1,1,1,1,1)) {
				6327	IRTemp res = newTemp(ty);
				6328	stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/LL/));
				6329	putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
				6330	if (isAcqOrRel) {
				6331	stmt(IRStmt_MBE(Imbe_Fence));
				6332	}
				6333	DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
				6334	nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
				6335	return True;
				6336	}
				6337	if (!isLD) {
				6338	if (isAcqOrRel) {
				6339	stmt(IRStmt_MBE(Imbe_Fence));
				6340	}
				6341	IRTemp res = newTemp(Ity_I1);
				6342	IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
				6343	stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
				6344	/* IR semantics: res is 1 if store succeeds, 0 if it fails.
				6345	Need to set rS to 1 on failure, 0 on success. */
				6346	putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
				6347	mkU64(1)));
				6348	DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
				6349	nameIRegOrZR(False, ss),
				6350	nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
				6351	return True;
				6352	}
				6353	/* else fall through */
				6354	}
				6355
				6356	/* ------------------ LDA{R,RH,RB} ------------------ */
				6357	/* ------------------ STL{R,RH,RB} ------------------ */
				6358	/* 31 29 23 20 14 9 4
				6359	sz 001000 110 11111 1 11111 n t LDAR<sz> Rt, [Xn\|SP]
				6360	sz 001000 100 11111 1 11111 n t STLR<sz> Rt, [Xn\|SP]
				6361	*/
				6362	if (INSN(29,23) == BITS7(0,0,1,0,0,0,1)
				6363	&& INSN(21,10) == BITS12(0,1,1,1,1,1,1,1,1,1,1,1)) {
				6364	UInt szBlg2 = INSN(31,30);
				6365	Bool isLD = INSN(22,22) == 1;
				6366	UInt nn = INSN(9,5);
				6367	UInt tt = INSN(4,0);
				6368
				6369	vassert(szBlg2 < 4);
				6370	UInt szB = 1 << szBlg2; /* 1, 2, 4 or 8 */
				6371	IRType ty = integerIRTypeOfSize(szB);
				6372	const HChar* suffix[4] = { "rb", "rh", "r", "r" };
				6373
				6374	IRTemp ea = newTemp(Ity_I64);
				6375	assign(ea, getIReg64orSP(nn));
				6376	/* FIXME generate check that ea is szB-aligned */
				6377
				6378	if (isLD) {
				6379	IRTemp res = newTemp(ty);
				6380	assign(res, loadLE(ty, mkexpr(ea)));
				6381	putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
				6382	stmt(IRStmt_MBE(Imbe_Fence));
				6383	DIP("lda%s %s, [%s]\n", suffix[szBlg2],
				6384	nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
				6385	} else {
				6386	stmt(IRStmt_MBE(Imbe_Fence));
				6387	IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
				6388	storeLE(mkexpr(ea), data);
				6389	DIP("stl%s %s, [%s]\n", suffix[szBlg2],
				6390	nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
				6391	}
				6392	return True;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6393	}
				6394
sewardj	5b924c8	2014-10-30 23:56:10 +0000	[diff] [blame]	6395	/* ------------------ PRFM (immediate) ------------------ */
				6396	/* 31 21 9 4
				6397	11 111 00110 imm12 n t PRFM pfrop=Rt, [Xn\|SP, #pimm]
				6398	*/
				6399	if (INSN(31,22) == BITS10(1,1,1,1,1,0,0,1,1,0)) {
				6400	UInt imm12 = INSN(21,10);
				6401	UInt nn = INSN(9,5);
				6402	UInt tt = INSN(4,0);
				6403	/* Generating any IR here is pointless, except for documentation
				6404	purposes, as it will get optimised away later. */
				6405	IRTemp ea = newTemp(Ity_I64);
				6406	assign(ea, binop(Iop_Add64, getIReg64orSP(nn), mkU64(imm12 * 8)));
				6407	DIP("prfm prfop=%u, [%s, #%u]\n", tt, nameIReg64orSP(nn), imm12 * 8);
				6408	return True;
				6409	}
				6410
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6411	vex_printf("ARM64 front end: load_store\n");
				6412	return False;
				6413	# undef INSN
				6414	}
				6415
				6416
				6417	/------------------------------------------------------------/
				6418	/--- Control flow and misc instructions ---/
				6419	/------------------------------------------------------------/
				6420
				6421	static
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	6422	Bool dis_ARM64_branch_etc(/MB_OUT/DisResult* dres, UInt insn,
florian	cacba8e	2014-12-15 18:58:07 +0000	[diff] [blame]	6423	const VexArchInfo* archinfo)
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6424	{
				6425	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				6426
				6427	/* ---------------------- B cond ----------------------- */
				6428	/* 31 24 4 3
				6429	0101010 0 imm19 0 cond */
				6430	if (INSN(31,24) == BITS8(0,1,0,1,0,1,0,0) && INSN(4,4) == 0) {
				6431	UInt cond = INSN(3,0);
				6432	ULong uimm64 = INSN(23,5) << 2;
				6433	Long simm64 = (Long)sx_to_64(uimm64, 21);
				6434	vassert(dres->whatNext == Dis_Continue);
				6435	vassert(dres->len == 4);
				6436	vassert(dres->continueAt == 0);
				6437	vassert(dres->jk_StopHere == Ijk_INVALID);
				6438	stmt( IRStmt_Exit(unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
				6439	Ijk_Boring,
				6440	IRConst_U64(guest_PC_curr_instr + simm64),
				6441	OFFB_PC) );
				6442	putPC(mkU64(guest_PC_curr_instr + 4));
				6443	dres->whatNext = Dis_StopHere;
				6444	dres->jk_StopHere = Ijk_Boring;
				6445	DIP("b.%s 0x%llx\n", nameCC(cond), guest_PC_curr_instr + simm64);
				6446	return True;
				6447	}
				6448
				6449	/* -------------------- B{L} uncond -------------------- */
				6450	if (INSN(30,26) == BITS5(0,0,1,0,1)) {
				6451	/* 000101 imm26 B (PC + sxTo64(imm26 << 2))
				6452	100101 imm26 B (PC + sxTo64(imm26 << 2))
				6453	*/
				6454	UInt bLink = INSN(31,31);
				6455	ULong uimm64 = INSN(25,0) << 2;
				6456	Long simm64 = (Long)sx_to_64(uimm64, 28);
				6457	if (bLink) {
				6458	putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
				6459	}
				6460	putPC(mkU64(guest_PC_curr_instr + simm64));
				6461	dres->whatNext = Dis_StopHere;
				6462	dres->jk_StopHere = Ijk_Call;
				6463	DIP("b%s 0x%llx\n", bLink == 1 ? "l" : "",
				6464	guest_PC_curr_instr + simm64);
				6465	return True;
				6466	}
				6467
				6468	/* --------------------- B{L} reg --------------------- */
				6469	/* 31 24 22 20 15 9 4
				6470	1101011 00 10 11111 000000 nn 00000 RET Rn
				6471	1101011 00 01 11111 000000 nn 00000 CALL Rn
				6472	1101011 00 00 11111 000000 nn 00000 JMP Rn
				6473	*/
				6474	if (INSN(31,23) == BITS9(1,1,0,1,0,1,1,0,0)
				6475	&& INSN(20,16) == BITS5(1,1,1,1,1)
				6476	&& INSN(15,10) == BITS6(0,0,0,0,0,0)
				6477	&& INSN(4,0) == BITS5(0,0,0,0,0)) {
				6478	UInt branch_type = INSN(22,21);
				6479	UInt nn = INSN(9,5);
				6480	if (branch_type == BITS2(1,0) /* RET */) {
				6481	putPC(getIReg64orZR(nn));
				6482	dres->whatNext = Dis_StopHere;
				6483	dres->jk_StopHere = Ijk_Ret;
				6484	DIP("ret %s\n", nameIReg64orZR(nn));
				6485	return True;
				6486	}
				6487	if (branch_type == BITS2(0,1) /* CALL */) {
sewardj	702054e	2014-05-07 11:09:28 +0000	[diff] [blame]	6488	IRTemp dst = newTemp(Ity_I64);
				6489	assign(dst, getIReg64orZR(nn));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6490	putIReg64orSP(30, mkU64(guest_PC_curr_instr + 4));
sewardj	702054e	2014-05-07 11:09:28 +0000	[diff] [blame]	6491	putPC(mkexpr(dst));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6492	dres->whatNext = Dis_StopHere;
				6493	dres->jk_StopHere = Ijk_Call;
				6494	DIP("blr %s\n", nameIReg64orZR(nn));
				6495	return True;
				6496	}
				6497	if (branch_type == BITS2(0,0) /* JMP */) {
				6498	putPC(getIReg64orZR(nn));
				6499	dres->whatNext = Dis_StopHere;
				6500	dres->jk_StopHere = Ijk_Boring;
				6501	DIP("jmp %s\n", nameIReg64orZR(nn));
				6502	return True;
				6503	}
				6504	}
				6505
				6506	/* -------------------- CB{N}Z -------------------- */
				6507	/* sf 011 010 1 imm19 Rt CBNZ Xt\|Wt, (PC + sxTo64(imm19 << 2))
				6508	sf 011 010 0 imm19 Rt CBZ Xt\|Wt, (PC + sxTo64(imm19 << 2))
				6509	*/
				6510	if (INSN(30,25) == BITS6(0,1,1,0,1,0)) {
				6511	Bool is64 = INSN(31,31) == 1;
				6512	Bool bIfZ = INSN(24,24) == 0;
				6513	ULong uimm64 = INSN(23,5) << 2;
				6514	UInt rT = INSN(4,0);
				6515	Long simm64 = (Long)sx_to_64(uimm64, 21);
				6516	IRExpr* cond = NULL;
				6517	if (is64) {
				6518	cond = binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
				6519	getIReg64orZR(rT), mkU64(0));
				6520	} else {
				6521	cond = binop(bIfZ ? Iop_CmpEQ32 : Iop_CmpNE32,
				6522	getIReg32orZR(rT), mkU32(0));
				6523	}
				6524	stmt( IRStmt_Exit(cond,
				6525	Ijk_Boring,
				6526	IRConst_U64(guest_PC_curr_instr + simm64),
				6527	OFFB_PC) );
				6528	putPC(mkU64(guest_PC_curr_instr + 4));
				6529	dres->whatNext = Dis_StopHere;
				6530	dres->jk_StopHere = Ijk_Boring;
				6531	DIP("cb%sz %s, 0x%llx\n",
				6532	bIfZ ? "" : "n", nameIRegOrZR(is64, rT),
				6533	guest_PC_curr_instr + simm64);
				6534	return True;
				6535	}
				6536
				6537	/* -------------------- TB{N}Z -------------------- */
				6538	/* 31 30 24 23 18 5 4
				6539	b5 011 011 1 b40 imm14 t TBNZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
				6540	b5 011 011 0 b40 imm14 t TBZ Xt, #(b5:b40), (PC + sxTo64(imm14 << 2))
				6541	*/
				6542	if (INSN(30,25) == BITS6(0,1,1,0,1,1)) {
				6543	UInt b5 = INSN(31,31);
				6544	Bool bIfZ = INSN(24,24) == 0;
				6545	UInt b40 = INSN(23,19);
				6546	UInt imm14 = INSN(18,5);
				6547	UInt tt = INSN(4,0);
				6548	UInt bitNo = (b5 << 5) \| b40;
				6549	ULong uimm64 = imm14 << 2;
				6550	Long simm64 = sx_to_64(uimm64, 16);
				6551	IRExpr* cond
				6552	= binop(bIfZ ? Iop_CmpEQ64 : Iop_CmpNE64,
				6553	binop(Iop_And64,
				6554	binop(Iop_Shr64, getIReg64orZR(tt), mkU8(bitNo)),
				6555	mkU64(1)),
				6556	mkU64(0));
				6557	stmt( IRStmt_Exit(cond,
				6558	Ijk_Boring,
				6559	IRConst_U64(guest_PC_curr_instr + simm64),
				6560	OFFB_PC) );
				6561	putPC(mkU64(guest_PC_curr_instr + 4));
				6562	dres->whatNext = Dis_StopHere;
				6563	dres->jk_StopHere = Ijk_Boring;
				6564	DIP("tb%sz %s, #%u, 0x%llx\n",
				6565	bIfZ ? "" : "n", nameIReg64orZR(tt), bitNo,
				6566	guest_PC_curr_instr + simm64);
				6567	return True;
				6568	}
				6569
				6570	/* -------------------- SVC -------------------- */
				6571	/* 11010100 000 imm16 000 01
				6572	Don't bother with anything except the imm16==0 case.
				6573	*/
				6574	if (INSN(31,0) == 0xD4000001) {
				6575	putPC(mkU64(guest_PC_curr_instr + 4));
				6576	dres->whatNext = Dis_StopHere;
				6577	dres->jk_StopHere = Ijk_Sys_syscall;
				6578	DIP("svc #0\n");
				6579	return True;
				6580	}
				6581
				6582	/* ------------------ M{SR,RS} ------------------ */
sewardj	6eb5ef8	2014-07-14 20:39:23 +0000	[diff] [blame]	6583	/* ---- Cases for TPIDR_EL0 ----
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6584	0xD51BD0 010 Rt MSR tpidr_el0, rT
				6585	0xD53BD0 010 Rt MRS rT, tpidr_el0
				6586	*/
				6587	if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51BD040 /MSR/
				6588	\|\| (INSN(31,0) & 0xFFFFFFE0) == 0xD53BD040 /MRS/) {
				6589	Bool toSys = INSN(21,21) == 0;
				6590	UInt tt = INSN(4,0);
				6591	if (toSys) {
				6592	stmt( IRStmt_Put( OFFB_TPIDR_EL0, getIReg64orZR(tt)) );
				6593	DIP("msr tpidr_el0, %s\n", nameIReg64orZR(tt));
				6594	} else {
				6595	putIReg64orZR(tt, IRExpr_Get( OFFB_TPIDR_EL0, Ity_I64 ));
				6596	DIP("mrs %s, tpidr_el0\n", nameIReg64orZR(tt));
				6597	}
				6598	return True;
				6599	}
sewardj	6eb5ef8	2014-07-14 20:39:23 +0000	[diff] [blame]	6600	/* ---- Cases for FPCR ----
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6601	0xD51B44 000 Rt MSR fpcr, rT
				6602	0xD53B44 000 Rt MSR rT, fpcr
				6603	*/
				6604	if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4400 /MSR/
				6605	\|\| (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4400 /MRS/) {
				6606	Bool toSys = INSN(21,21) == 0;
				6607	UInt tt = INSN(4,0);
				6608	if (toSys) {
				6609	stmt( IRStmt_Put( OFFB_FPCR, getIReg32orZR(tt)) );
				6610	DIP("msr fpcr, %s\n", nameIReg64orZR(tt));
				6611	} else {
				6612	putIReg32orZR(tt, IRExpr_Get(OFFB_FPCR, Ity_I32));
				6613	DIP("mrs %s, fpcr\n", nameIReg64orZR(tt));
				6614	}
				6615	return True;
				6616	}
sewardj	6eb5ef8	2014-07-14 20:39:23 +0000	[diff] [blame]	6617	/* ---- Cases for FPSR ----
sewardj	7d00913	2014-02-20 17:43:38 +0000	[diff] [blame]	6618	0xD51B44 001 Rt MSR fpsr, rT
				6619	0xD53B44 001 Rt MSR rT, fpsr
sewardj	a0645d5	2014-06-28 22:11:16 +0000	[diff] [blame]	6620	The only part of this we model is FPSR.QC. All other bits
				6621	are ignored when writing to it and RAZ when reading from it.
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6622	*/
				6623	if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4420 /MSR/
				6624	\|\| (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4420 /MRS/) {
				6625	Bool toSys = INSN(21,21) == 0;
				6626	UInt tt = INSN(4,0);
				6627	if (toSys) {
sewardj	a0645d5	2014-06-28 22:11:16 +0000	[diff] [blame]	6628	/* Just deal with FPSR.QC. Make up a V128 value which is
				6629	zero if Xt[27] is zero and any other value if Xt[27] is
				6630	nonzero. */
				6631	IRTemp qc64 = newTemp(Ity_I64);
				6632	assign(qc64, binop(Iop_And64,
				6633	binop(Iop_Shr64, getIReg64orZR(tt), mkU8(27)),
				6634	mkU64(1)));
				6635	IRExpr* qcV128 = binop(Iop_64HLtoV128, mkexpr(qc64), mkexpr(qc64));
				6636	stmt( IRStmt_Put( OFFB_QCFLAG, qcV128 ) );
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6637	DIP("msr fpsr, %s\n", nameIReg64orZR(tt));
				6638	} else {
sewardj	a0645d5	2014-06-28 22:11:16 +0000	[diff] [blame]	6639	/* Generate a value which is all zeroes except for bit 27,
				6640	which must be zero if QCFLAG is all zeroes and one otherwise. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6641	IRTemp qcV128 = newTempV128();
sewardj	a0645d5	2014-06-28 22:11:16 +0000	[diff] [blame]	6642	assign(qcV128, IRExpr_Get( OFFB_QCFLAG, Ity_V128 ));
				6643	IRTemp qc64 = newTemp(Ity_I64);
				6644	assign(qc64, binop(Iop_Or64, unop(Iop_V128HIto64, mkexpr(qcV128)),
				6645	unop(Iop_V128to64, mkexpr(qcV128))));
				6646	IRExpr* res = binop(Iop_Shl64,
				6647	unop(Iop_1Uto64,
				6648	binop(Iop_CmpNE64, mkexpr(qc64), mkU64(0))),
				6649	mkU8(27));
				6650	putIReg64orZR(tt, res);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6651	DIP("mrs %s, fpsr\n", nameIReg64orZR(tt));
				6652	}
				6653	return True;
				6654	}
sewardj	6eb5ef8	2014-07-14 20:39:23 +0000	[diff] [blame]	6655	/* ---- Cases for NZCV ----
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6656	D51B42 000 Rt MSR nzcv, rT
				6657	D53B42 000 Rt MRS rT, nzcv
sewardj	a0645d5	2014-06-28 22:11:16 +0000	[diff] [blame]	6658	The only parts of NZCV that actually exist are bits 31:28, which
				6659	are the N Z C and V bits themselves. Hence the flags thunk provides
				6660	all the state we need.
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6661	*/
				6662	if ( (INSN(31,0) & 0xFFFFFFE0) == 0xD51B4200 /MSR/
				6663	\|\| (INSN(31,0) & 0xFFFFFFE0) == 0xD53B4200 /MRS/) {
				6664	Bool toSys = INSN(21,21) == 0;
				6665	UInt tt = INSN(4,0);
				6666	if (toSys) {
				6667	IRTemp t = newTemp(Ity_I64);
				6668	assign(t, binop(Iop_And64, getIReg64orZR(tt), mkU64(0xF0000000ULL)));
				6669	setFlags_COPY(t);
				6670	DIP("msr %s, nzcv\n", nameIReg32orZR(tt));
				6671	} else {
				6672	IRTemp res = newTemp(Ity_I64);
				6673	assign(res, mk_arm64g_calculate_flags_nzcv());
				6674	putIReg32orZR(tt, unop(Iop_64to32, mkexpr(res)));
				6675	DIP("mrs %s, nzcv\n", nameIReg64orZR(tt));
				6676	}
				6677	return True;
				6678	}
sewardj	6eb5ef8	2014-07-14 20:39:23 +0000	[diff] [blame]	6679	/* ---- Cases for DCZID_EL0 ----
sewardj	d512d10	2014-02-21 14:49:44 +0000	[diff] [blame]	6680	Don't support arbitrary reads and writes to this register. Just
				6681	return the value 16, which indicates that the DC ZVA instruction
				6682	is not permitted, so we don't have to emulate it.
				6683	D5 3B 00 111 Rt MRS rT, dczid_el0
				6684	*/
				6685	if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B00E0) {
				6686	UInt tt = INSN(4,0);
				6687	putIReg64orZR(tt, mkU64(1<<4));
				6688	DIP("mrs %s, dczid_el0 (FAKED)\n", nameIReg64orZR(tt));
				6689	return True;
				6690	}
sewardj	6eb5ef8	2014-07-14 20:39:23 +0000	[diff] [blame]	6691	/* ---- Cases for CTR_EL0 ----
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	6692	We just handle reads, and make up a value from the D and I line
				6693	sizes in the VexArchInfo we are given, and patch in the following
				6694	fields that the Foundation model gives ("natively"):
				6695	CWG = 0b0100, ERG = 0b0100, L1Ip = 0b11
				6696	D5 3B 00 001 Rt MRS rT, dczid_el0
				6697	*/
				6698	if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53B0020) {
				6699	UInt tt = INSN(4,0);
				6700	/* Need to generate a value from dMinLine_lg2_szB and
				6701	dMinLine_lg2_szB. The value in the register is in 32-bit
				6702	units, so need to subtract 2 from the values in the
				6703	VexArchInfo. We can assume that the values here are valid --
				6704	disInstr_ARM64 checks them -- so there's no need to deal with
				6705	out-of-range cases. */
				6706	vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
				6707	&& archinfo->arm64_dMinLine_lg2_szB <= 17
				6708	&& archinfo->arm64_iMinLine_lg2_szB >= 2
				6709	&& archinfo->arm64_iMinLine_lg2_szB <= 17);
				6710	UInt val
				6711	= 0x8440c000 \| ((0xF & (archinfo->arm64_dMinLine_lg2_szB - 2)) << 16)
				6712	\| ((0xF & (archinfo->arm64_iMinLine_lg2_szB - 2)) << 0);
				6713	putIReg64orZR(tt, mkU64(val));
				6714	DIP("mrs %s, ctr_el0\n", nameIReg64orZR(tt));
				6715	return True;
				6716	}
sewardj	6eb5ef8	2014-07-14 20:39:23 +0000	[diff] [blame]	6717	/* ---- Cases for CNTVCT_EL0 ----
				6718	This is a timestamp counter of some sort. Support reads of it only
				6719	by passing through to the host.
				6720	D5 3B E0 010 Rt MRS Xt, cntvct_el0
				6721	*/
				6722	if ((INSN(31,0) & 0xFFFFFFE0) == 0xD53BE040) {
				6723	UInt tt = INSN(4,0);
				6724	IRTemp val = newTemp(Ity_I64);
				6725	IRExpr** args = mkIRExprVec_0();
				6726	IRDirty* d = unsafeIRDirty_1_N (
				6727	val,
				6728	0/regparms/,
				6729	"arm64g_dirtyhelper_MRS_CNTVCT_EL0",
				6730	&arm64g_dirtyhelper_MRS_CNTVCT_EL0,
				6731	args
				6732	);
				6733	/* execute the dirty call, dumping the result in val. */
				6734	stmt( IRStmt_Dirty(d) );
				6735	putIReg64orZR(tt, mkexpr(val));
				6736	DIP("mrs %s, cntvct_el0\n", nameIReg64orZR(tt));
				6737	return True;
				6738	}
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6739
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	6740	/* ------------------ IC_IVAU ------------------ */
				6741	/* D5 0B 75 001 Rt ic ivau, rT
				6742	*/
				6743	if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7520) {
				6744	/* We will always be provided with a valid iMinLine value. */
				6745	vassert(archinfo->arm64_iMinLine_lg2_szB >= 2
				6746	&& archinfo->arm64_iMinLine_lg2_szB <= 17);
				6747	/* Round the requested address, in rT, down to the start of the
				6748	containing block. */
				6749	UInt tt = INSN(4,0);
				6750	ULong lineszB = 1ULL << archinfo->arm64_iMinLine_lg2_szB;
				6751	IRTemp addr = newTemp(Ity_I64);
				6752	assign( addr, binop( Iop_And64,
				6753	getIReg64orZR(tt),
				6754	mkU64(~(lineszB - 1))) );
				6755	/* Set the invalidation range, request exit-and-invalidate, with
				6756	continuation at the next instruction. */
sewardj	05f5e01	2014-05-04 10:52:11 +0000	[diff] [blame]	6757	stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
				6758	stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	6759	/* be paranoid ... */
				6760	stmt( IRStmt_MBE(Imbe_Fence) );
				6761	putPC(mkU64( guest_PC_curr_instr + 4 ));
				6762	dres->whatNext = Dis_StopHere;
sewardj	05f5e01	2014-05-04 10:52:11 +0000	[diff] [blame]	6763	dres->jk_StopHere = Ijk_InvalICache;
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	6764	DIP("ic ivau, %s\n", nameIReg64orZR(tt));
				6765	return True;
				6766	}
				6767
				6768	/* ------------------ DC_CVAU ------------------ */
				6769	/* D5 0B 7B 001 Rt dc cvau, rT
				6770	*/
				6771	if ((INSN(31,0) & 0xFFFFFFE0) == 0xD50B7B20) {
				6772	/* Exactly the same scheme as for IC IVAU, except we observe the
sewardj	05f5e01	2014-05-04 10:52:11 +0000	[diff] [blame]	6773	dMinLine size, and request an Ijk_FlushDCache instead of
				6774	Ijk_InvalICache. */
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	6775	/* We will always be provided with a valid dMinLine value. */
				6776	vassert(archinfo->arm64_dMinLine_lg2_szB >= 2
				6777	&& archinfo->arm64_dMinLine_lg2_szB <= 17);
				6778	/* Round the requested address, in rT, down to the start of the
				6779	containing block. */
				6780	UInt tt = INSN(4,0);
				6781	ULong lineszB = 1ULL << archinfo->arm64_dMinLine_lg2_szB;
				6782	IRTemp addr = newTemp(Ity_I64);
				6783	assign( addr, binop( Iop_And64,
				6784	getIReg64orZR(tt),
				6785	mkU64(~(lineszB - 1))) );
				6786	/* Set the flush range, request exit-and-flush, with
				6787	continuation at the next instruction. */
sewardj	05f5e01	2014-05-04 10:52:11 +0000	[diff] [blame]	6788	stmt(IRStmt_Put(OFFB_CMSTART, mkexpr(addr)));
				6789	stmt(IRStmt_Put(OFFB_CMLEN, mkU64(lineszB)));
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	6790	/* be paranoid ... */
				6791	stmt( IRStmt_MBE(Imbe_Fence) );
				6792	putPC(mkU64( guest_PC_curr_instr + 4 ));
				6793	dres->whatNext = Dis_StopHere;
				6794	dres->jk_StopHere = Ijk_FlushDCache;
				6795	DIP("dc cvau, %s\n", nameIReg64orZR(tt));
				6796	return True;
				6797	}
				6798
				6799	/* ------------------ ISB, DMB, DSB ------------------ */
sewardj	2584255	2014-10-31 10:25:19 +0000	[diff] [blame]	6800	/* 31 21 11 7 6 4
				6801	11010 10100 0 00 011 0011 CRm 1 01 11111 DMB opt
				6802	11010 10100 0 00 011 0011 CRm 1 00 11111 DSB opt
				6803	11010 10100 0 00 011 0011 CRm 1 10 11111 ISB opt
				6804	*/
				6805	if (INSN(31,22) == BITS10(1,1,0,1,0,1,0,1,0,0)
				6806	&& INSN(21,12) == BITS10(0,0,0,0,1,1,0,0,1,1)
				6807	&& INSN(7,7) == 1
				6808	&& INSN(6,5) <= BITS2(1,0) && INSN(4,0) == BITS5(1,1,1,1,1)) {
				6809	UInt opc = INSN(6,5);
				6810	UInt CRm = INSN(11,8);
				6811	vassert(opc <= 2 && CRm <= 15);
sewardj	d512d10	2014-02-21 14:49:44 +0000	[diff] [blame]	6812	stmt(IRStmt_MBE(Imbe_Fence));
sewardj	2584255	2014-10-31 10:25:19 +0000	[diff] [blame]	6813	const HChar* opNames[3]
				6814	= { "dsb", "dmb", "isb" };
				6815	const HChar* howNames[16]
				6816	= { "#0", "oshld", "oshst", "osh", "#4", "nshld", "nshst", "nsh",
				6817	"#8", "ishld", "ishst", "ish", "#12", "ld", "st", "sy" };
				6818	DIP("%s %s\n", opNames[opc], howNames[CRm]);
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	6819	return True;
				6820	}
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6821
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	6822	/* -------------------- NOP -------------------- */
				6823	if (INSN(31,0) == 0xD503201F) {
				6824	DIP("nop\n");
				6825	return True;
				6826	}
				6827
sewardj	39b5168	2014-11-25 12:17:53 +0000	[diff] [blame]	6828	/* -------------------- BRK -------------------- */
				6829	/* 31 23 20 4
				6830	1101 0100 001 imm16 00000 BRK #imm16
				6831	*/
				6832	if (INSN(31,24) == BITS8(1,1,0,1,0,1,0,0)
				6833	&& INSN(23,21) == BITS3(0,0,1) && INSN(4,0) == BITS5(0,0,0,0,0)) {
				6834	UInt imm16 = INSN(20,5);
				6835	/* Request SIGTRAP and then restart of this insn. */
				6836	putPC(mkU64(guest_PC_curr_instr + 0));
				6837	dres->whatNext = Dis_StopHere;
				6838	dres->jk_StopHere = Ijk_SigTRAP;
				6839	DIP("brk #%u\n", imm16);
				6840	return True;
				6841	}
				6842
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6843	//fail:
				6844	vex_printf("ARM64 front end: branch_etc\n");
				6845	return False;
				6846	# undef INSN
				6847	}
				6848
				6849
				6850	/------------------------------------------------------------/
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	6851	/--- SIMD and FP instructions: helper functions ---/
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6852	/------------------------------------------------------------/
				6853
sewardj	d96daf6	2014-06-15 08:17:35 +0000	[diff] [blame]	6854	/* Some constructors for interleave/deinterleave expressions. */
sewardj	e520bb3	2014-02-17 11:00:53 +0000	[diff] [blame]	6855
sewardj	d96daf6	2014-06-15 08:17:35 +0000	[diff] [blame]	6856	static IRExpr* mk_CatEvenLanes64x2 ( IRTemp a10, IRTemp b10 ) {
				6857	// returns a0 b0
				6858	return binop(Iop_InterleaveLO64x2, mkexpr(a10), mkexpr(b10));
				6859	}
sewardj	e520bb3	2014-02-17 11:00:53 +0000	[diff] [blame]	6860
sewardj	d96daf6	2014-06-15 08:17:35 +0000	[diff] [blame]	6861	static IRExpr* mk_CatOddLanes64x2 ( IRTemp a10, IRTemp b10 ) {
				6862	// returns a1 b1
				6863	return binop(Iop_InterleaveHI64x2, mkexpr(a10), mkexpr(b10));
				6864	}
sewardj	e520bb3	2014-02-17 11:00:53 +0000	[diff] [blame]	6865
sewardj	d96daf6	2014-06-15 08:17:35 +0000	[diff] [blame]	6866	static IRExpr* mk_CatEvenLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
				6867	// returns a2 a0 b2 b0
				6868	return binop(Iop_CatEvenLanes32x4, mkexpr(a3210), mkexpr(b3210));
				6869	}
				6870
				6871	static IRExpr* mk_CatOddLanes32x4 ( IRTemp a3210, IRTemp b3210 ) {
				6872	// returns a3 a1 b3 b1
				6873	return binop(Iop_CatOddLanes32x4, mkexpr(a3210), mkexpr(b3210));
				6874	}
				6875
				6876	static IRExpr* mk_InterleaveLO32x4 ( IRTemp a3210, IRTemp b3210 ) {
				6877	// returns a1 b1 a0 b0
				6878	return binop(Iop_InterleaveLO32x4, mkexpr(a3210), mkexpr(b3210));
				6879	}
				6880
				6881	static IRExpr* mk_InterleaveHI32x4 ( IRTemp a3210, IRTemp b3210 ) {
				6882	// returns a3 b3 a2 b2
				6883	return binop(Iop_InterleaveHI32x4, mkexpr(a3210), mkexpr(b3210));
				6884	}
				6885
				6886	static IRExpr* mk_CatEvenLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
				6887	// returns a6 a4 a2 a0 b6 b4 b2 b0
				6888	return binop(Iop_CatEvenLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
				6889	}
				6890
				6891	static IRExpr* mk_CatOddLanes16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
				6892	// returns a7 a5 a3 a1 b7 b5 b3 b1
				6893	return binop(Iop_CatOddLanes16x8, mkexpr(a76543210), mkexpr(b76543210));
				6894	}
				6895
				6896	static IRExpr* mk_InterleaveLO16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
				6897	// returns a3 b3 a2 b2 a1 b1 a0 b0
				6898	return binop(Iop_InterleaveLO16x8, mkexpr(a76543210), mkexpr(b76543210));
				6899	}
				6900
				6901	static IRExpr* mk_InterleaveHI16x8 ( IRTemp a76543210, IRTemp b76543210 ) {
				6902	// returns a7 b7 a6 b6 a5 b5 a4 b4
				6903	return binop(Iop_InterleaveHI16x8, mkexpr(a76543210), mkexpr(b76543210));
				6904	}
				6905
				6906	static IRExpr* mk_CatEvenLanes8x16 ( IRTemp aFEDCBA9876543210,
				6907	IRTemp bFEDCBA9876543210 ) {
				6908	// returns aE aC aA a8 a6 a4 a2 a0 bE bC bA b8 b6 b4 b2 b0
				6909	return binop(Iop_CatEvenLanes8x16, mkexpr(aFEDCBA9876543210),
				6910	mkexpr(bFEDCBA9876543210));
				6911	}
				6912
				6913	static IRExpr* mk_CatOddLanes8x16 ( IRTemp aFEDCBA9876543210,
				6914	IRTemp bFEDCBA9876543210 ) {
				6915	// returns aF aD aB a9 a7 a5 a3 a1 bF bD bB b9 b7 b5 b3 b1
				6916	return binop(Iop_CatOddLanes8x16, mkexpr(aFEDCBA9876543210),
				6917	mkexpr(bFEDCBA9876543210));
				6918	}
				6919
				6920	static IRExpr* mk_InterleaveLO8x16 ( IRTemp aFEDCBA9876543210,
				6921	IRTemp bFEDCBA9876543210 ) {
				6922	// returns a7 b7 a6 b6 a5 b5 a4 b4 a3 b3 a2 b2 a1 b1 a0 b0
				6923	return binop(Iop_InterleaveLO8x16, mkexpr(aFEDCBA9876543210),
				6924	mkexpr(bFEDCBA9876543210));
				6925	}
				6926
				6927	static IRExpr* mk_InterleaveHI8x16 ( IRTemp aFEDCBA9876543210,
				6928	IRTemp bFEDCBA9876543210 ) {
				6929	// returns aF bF aE bE aD bD aC bC aB bB aA bA a9 b9 a8 b8
				6930	return binop(Iop_InterleaveHI8x16, mkexpr(aFEDCBA9876543210),
				6931	mkexpr(bFEDCBA9876543210));
				6932	}
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	6933
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6934	/* Generate N copies of \|bit\| in the bottom of a ULong. */
				6935	static ULong Replicate ( ULong bit, Int N )
				6936	{
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	6937	vassert(bit <= 1 && N >= 1 && N < 64);
				6938	if (bit == 0) {
				6939	return 0;
				6940	} else {
				6941	/* Careful. This won't work for N == 64. */
				6942	return (1ULL << N) - 1;
				6943	}
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6944	}
				6945
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	6946	static ULong Replicate32x2 ( ULong bits32 )
				6947	{
				6948	vassert(0 == (bits32 & ~0xFFFFFFFFULL));
				6949	return (bits32 << 32) \| bits32;
				6950	}
				6951
				6952	static ULong Replicate16x4 ( ULong bits16 )
				6953	{
				6954	vassert(0 == (bits16 & ~0xFFFFULL));
				6955	return Replicate32x2((bits16 << 16) \| bits16);
				6956	}
				6957
				6958	static ULong Replicate8x8 ( ULong bits8 )
				6959	{
				6960	vassert(0 == (bits8 & ~0xFFULL));
				6961	return Replicate16x4((bits8 << 8) \| bits8);
				6962	}
				6963
				6964	/* Expand the VFPExpandImm-style encoding in the bottom 8 bits of
				6965	\|imm8\| to either a 32-bit value if N is 32 or a 64 bit value if N
				6966	is 64. In the former case, the upper 32 bits of the returned value
				6967	are guaranteed to be zero. */
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6968	static ULong VFPExpandImm ( ULong imm8, Int N )
				6969	{
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	6970	vassert(imm8 <= 0xFF);
				6971	vassert(N == 32 \|\| N == 64);
				6972	Int E = ((N == 32) ? 8 : 11) - 2; // The spec incorrectly omits the -2.
				6973	Int F = N - E - 1;
				6974	ULong imm8_6 = (imm8 >> 6) & 1;
				6975	/* sign: 1 bit */
				6976	/* exp: E bits */
				6977	/* frac: F bits */
				6978	ULong sign = (imm8 >> 7) & 1;
				6979	ULong exp = ((imm8_6 ^ 1) << (E-1)) \| Replicate(imm8_6, E-1);
				6980	ULong frac = ((imm8 & 63) << (F-6)) \| Replicate(0, F-6);
				6981	vassert(sign < (1ULL << 1));
				6982	vassert(exp < (1ULL << E));
				6983	vassert(frac < (1ULL << F));
				6984	vassert(1 + E + F == N);
				6985	ULong res = (sign << (E+F)) \| (exp << F) \| frac;
				6986	return res;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	6987	}
				6988
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	6989	/* Expand an AdvSIMDExpandImm-style encoding into a 64-bit value.
				6990	This might fail, as indicated by the returned Bool. Page 2530 of
				6991	the manual. */
				6992	static Bool AdvSIMDExpandImm ( /OUT/ULong* res,
				6993	UInt op, UInt cmode, UInt imm8 )
				6994	{
				6995	vassert(op <= 1);
				6996	vassert(cmode <= 15);
				6997	vassert(imm8 <= 255);
				6998
				6999	res = 0; / will overwrite iff returning True */
				7000
				7001	ULong imm64 = 0;
				7002	Bool testimm8 = False;
				7003
				7004	switch (cmode >> 1) {
				7005	case 0:
				7006	testimm8 = False; imm64 = Replicate32x2(imm8); break;
				7007	case 1:
				7008	testimm8 = True; imm64 = Replicate32x2(imm8 << 8); break;
				7009	case 2:
				7010	testimm8 = True; imm64 = Replicate32x2(imm8 << 16); break;
				7011	case 3:
				7012	testimm8 = True; imm64 = Replicate32x2(imm8 << 24); break;
				7013	case 4:
				7014	testimm8 = False; imm64 = Replicate16x4(imm8); break;
				7015	case 5:
				7016	testimm8 = True; imm64 = Replicate16x4(imm8 << 8); break;
				7017	case 6:
				7018	testimm8 = True;
				7019	if ((cmode & 1) == 0)
				7020	imm64 = Replicate32x2((imm8 << 8) \| 0xFF);
				7021	else
				7022	imm64 = Replicate32x2((imm8 << 16) \| 0xFFFF);
				7023	break;
				7024	case 7:
				7025	testimm8 = False;
				7026	if ((cmode & 1) == 0 && op == 0)
				7027	imm64 = Replicate8x8(imm8);
				7028	if ((cmode & 1) == 0 && op == 1) {
				7029	imm64 = 0; imm64 \|= (imm8 & 0x80) ? 0xFF : 0x00;
				7030	imm64 <<= 8; imm64 \|= (imm8 & 0x40) ? 0xFF : 0x00;
				7031	imm64 <<= 8; imm64 \|= (imm8 & 0x20) ? 0xFF : 0x00;
				7032	imm64 <<= 8; imm64 \|= (imm8 & 0x10) ? 0xFF : 0x00;
				7033	imm64 <<= 8; imm64 \|= (imm8 & 0x08) ? 0xFF : 0x00;
				7034	imm64 <<= 8; imm64 \|= (imm8 & 0x04) ? 0xFF : 0x00;
				7035	imm64 <<= 8; imm64 \|= (imm8 & 0x02) ? 0xFF : 0x00;
				7036	imm64 <<= 8; imm64 \|= (imm8 & 0x01) ? 0xFF : 0x00;
				7037	}
				7038	if ((cmode & 1) == 1 && op == 0) {
				7039	ULong imm8_7 = (imm8 >> 7) & 1;
				7040	ULong imm8_6 = (imm8 >> 6) & 1;
				7041	ULong imm8_50 = imm8 & 63;
				7042	ULong imm32 = (imm8_7 << (1 + 5 + 6 + 19))
				7043	\| ((imm8_6 ^ 1) << (5 + 6 + 19))
				7044	\| (Replicate(imm8_6, 5) << (6 + 19))
				7045	\| (imm8_50 << 19);
				7046	imm64 = Replicate32x2(imm32);
				7047	}
				7048	if ((cmode & 1) == 1 && op == 1) {
				7049	// imm64 = imm8<7>:NOT(imm8<6>)
				7050	// :Replicate(imm8<6>,8):imm8<5:0>:Zeros(48);
				7051	ULong imm8_7 = (imm8 >> 7) & 1;
				7052	ULong imm8_6 = (imm8 >> 6) & 1;
				7053	ULong imm8_50 = imm8 & 63;
				7054	imm64 = (imm8_7 << 63) \| ((imm8_6 ^ 1) << 62)
				7055	\| (Replicate(imm8_6, 8) << 54)
				7056	\| (imm8_50 << 48);
				7057	}
				7058	break;
				7059	default:
				7060	vassert(0);
				7061	}
				7062
				7063	if (testimm8 && imm8 == 0)
				7064	return False;
				7065
				7066	*res = imm64;
				7067	return True;
				7068	}
				7069
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	7070	/* Help a bit for decoding laneage for vector operations that can be
				7071	of the form 4x32, 2x64 or 2x32-and-zero-upper-half, as encoded by Q
				7072	and SZ bits, typically for vector floating point. */
				7073	static Bool getLaneInfo_Q_SZ ( /OUT/IRType* tyI, /OUT/IRType* tyF,
				7074	/OUT/UInt* nLanes, /OUT/Bool* zeroUpper,
				7075	/OUT/const HChar** arrSpec,
				7076	Bool bitQ, Bool bitSZ )
				7077	{
				7078	vassert(bitQ == True \|\| bitQ == False);
				7079	vassert(bitSZ == True \|\| bitSZ == False);
				7080	if (bitQ && bitSZ) { // 2x64
				7081	if (tyI) *tyI = Ity_I64;
				7082	if (tyF) *tyF = Ity_F64;
				7083	if (nLanes) *nLanes = 2;
				7084	if (zeroUpper) *zeroUpper = False;
				7085	if (arrSpec) *arrSpec = "2d";
				7086	return True;
				7087	}
				7088	if (bitQ && !bitSZ) { // 4x32
				7089	if (tyI) *tyI = Ity_I32;
				7090	if (tyF) *tyF = Ity_F32;
				7091	if (nLanes) *nLanes = 4;
				7092	if (zeroUpper) *zeroUpper = False;
				7093	if (arrSpec) *arrSpec = "4s";
				7094	return True;
				7095	}
				7096	if (!bitQ && !bitSZ) { // 2x32
				7097	if (tyI) *tyI = Ity_I32;
				7098	if (tyF) *tyF = Ity_F32;
				7099	if (nLanes) *nLanes = 2;
				7100	if (zeroUpper) *zeroUpper = True;
				7101	if (arrSpec) *arrSpec = "2s";
				7102	return True;
				7103	}
				7104	// Else impliedly 1x64, which isn't allowed.
				7105	return False;
				7106	}
				7107
sewardj	e520bb3	2014-02-17 11:00:53 +0000	[diff] [blame]	7108	/* Helper for decoding laneage for shift-style vector operations
				7109	that involve an immediate shift amount. */
				7110	static Bool getLaneInfo_IMMH_IMMB ( /OUT/UInt* shift, /OUT/UInt* szBlg2,
				7111	UInt immh, UInt immb )
				7112	{
				7113	vassert(immh < (1<<4));
				7114	vassert(immb < (1<<3));
				7115	UInt immhb = (immh << 3) \| immb;
				7116	if (immh & 8) {
				7117	if (shift) *shift = 128 - immhb;
				7118	if (szBlg2) *szBlg2 = 3;
				7119	return True;
				7120	}
				7121	if (immh & 4) {
				7122	if (shift) *shift = 64 - immhb;
				7123	if (szBlg2) *szBlg2 = 2;
				7124	return True;
				7125	}
				7126	if (immh & 2) {
				7127	if (shift) *shift = 32 - immhb;
				7128	if (szBlg2) *szBlg2 = 1;
				7129	return True;
				7130	}
				7131	if (immh & 1) {
				7132	if (shift) *shift = 16 - immhb;
				7133	if (szBlg2) *szBlg2 = 0;
				7134	return True;
				7135	}
				7136	return False;
				7137	}
				7138
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7139	/* Generate IR to fold all lanes of the V128 value in 'src' as
				7140	characterised by the operator 'op', and return the result in the
				7141	bottom bits of a V128, with all other bits set to zero. */
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	7142	static IRTemp math_FOLDV ( IRTemp src, IROp op )
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7143	{
				7144	/* The basic idea is to use repeated applications of Iop_CatEven*
				7145	and Iop_CatOdd* operators to 'src' so as to clone each lane into
				7146	a complete vector. Then fold all those vectors with 'op' and
				7147	zero out all but the least significant lane. */
				7148	switch (op) {
				7149	case Iop_Min8Sx16: case Iop_Min8Ux16:
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	7150	case Iop_Max8Sx16: case Iop_Max8Ux16: case Iop_Add8x16: {
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	7151	/* NB: temp naming here is misleading -- the naming is for 8
				7152	lanes of 16 bit, whereas what is being operated on is 16
				7153	lanes of 8 bits. */
				7154	IRTemp x76543210 = src;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7155	IRTemp x76547654 = newTempV128();
				7156	IRTemp x32103210 = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	7157	assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
				7158	assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7159	IRTemp x76767676 = newTempV128();
				7160	IRTemp x54545454 = newTempV128();
				7161	IRTemp x32323232 = newTempV128();
				7162	IRTemp x10101010 = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	7163	assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
				7164	assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
				7165	assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
				7166	assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7167	IRTemp x77777777 = newTempV128();
				7168	IRTemp x66666666 = newTempV128();
				7169	IRTemp x55555555 = newTempV128();
				7170	IRTemp x44444444 = newTempV128();
				7171	IRTemp x33333333 = newTempV128();
				7172	IRTemp x22222222 = newTempV128();
				7173	IRTemp x11111111 = newTempV128();
				7174	IRTemp x00000000 = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	7175	assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
				7176	assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
				7177	assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
				7178	assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
				7179	assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
				7180	assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
				7181	assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
				7182	assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
				7183	/* Naming not misleading after here. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7184	IRTemp xAllF = newTempV128();
				7185	IRTemp xAllE = newTempV128();
				7186	IRTemp xAllD = newTempV128();
				7187	IRTemp xAllC = newTempV128();
				7188	IRTemp xAllB = newTempV128();
				7189	IRTemp xAllA = newTempV128();
				7190	IRTemp xAll9 = newTempV128();
				7191	IRTemp xAll8 = newTempV128();
				7192	IRTemp xAll7 = newTempV128();
				7193	IRTemp xAll6 = newTempV128();
				7194	IRTemp xAll5 = newTempV128();
				7195	IRTemp xAll4 = newTempV128();
				7196	IRTemp xAll3 = newTempV128();
				7197	IRTemp xAll2 = newTempV128();
				7198	IRTemp xAll1 = newTempV128();
				7199	IRTemp xAll0 = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	7200	assign(xAllF, mk_CatOddLanes8x16 (x77777777, x77777777));
				7201	assign(xAllE, mk_CatEvenLanes8x16(x77777777, x77777777));
				7202	assign(xAllD, mk_CatOddLanes8x16 (x66666666, x66666666));
				7203	assign(xAllC, mk_CatEvenLanes8x16(x66666666, x66666666));
				7204	assign(xAllB, mk_CatOddLanes8x16 (x55555555, x55555555));
				7205	assign(xAllA, mk_CatEvenLanes8x16(x55555555, x55555555));
				7206	assign(xAll9, mk_CatOddLanes8x16 (x44444444, x44444444));
				7207	assign(xAll8, mk_CatEvenLanes8x16(x44444444, x44444444));
				7208	assign(xAll7, mk_CatOddLanes8x16 (x33333333, x33333333));
				7209	assign(xAll6, mk_CatEvenLanes8x16(x33333333, x33333333));
				7210	assign(xAll5, mk_CatOddLanes8x16 (x22222222, x22222222));
				7211	assign(xAll4, mk_CatEvenLanes8x16(x22222222, x22222222));
				7212	assign(xAll3, mk_CatOddLanes8x16 (x11111111, x11111111));
				7213	assign(xAll2, mk_CatEvenLanes8x16(x11111111, x11111111));
				7214	assign(xAll1, mk_CatOddLanes8x16 (x00000000, x00000000));
				7215	assign(xAll0, mk_CatEvenLanes8x16(x00000000, x00000000));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7216	IRTemp maxFE = newTempV128();
				7217	IRTemp maxDC = newTempV128();
				7218	IRTemp maxBA = newTempV128();
				7219	IRTemp max98 = newTempV128();
				7220	IRTemp max76 = newTempV128();
				7221	IRTemp max54 = newTempV128();
				7222	IRTemp max32 = newTempV128();
				7223	IRTemp max10 = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	7224	assign(maxFE, binop(op, mkexpr(xAllF), mkexpr(xAllE)));
				7225	assign(maxDC, binop(op, mkexpr(xAllD), mkexpr(xAllC)));
				7226	assign(maxBA, binop(op, mkexpr(xAllB), mkexpr(xAllA)));
				7227	assign(max98, binop(op, mkexpr(xAll9), mkexpr(xAll8)));
				7228	assign(max76, binop(op, mkexpr(xAll7), mkexpr(xAll6)));
				7229	assign(max54, binop(op, mkexpr(xAll5), mkexpr(xAll4)));
				7230	assign(max32, binop(op, mkexpr(xAll3), mkexpr(xAll2)));
				7231	assign(max10, binop(op, mkexpr(xAll1), mkexpr(xAll0)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7232	IRTemp maxFEDC = newTempV128();
				7233	IRTemp maxBA98 = newTempV128();
				7234	IRTemp max7654 = newTempV128();
				7235	IRTemp max3210 = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	7236	assign(maxFEDC, binop(op, mkexpr(maxFE), mkexpr(maxDC)));
				7237	assign(maxBA98, binop(op, mkexpr(maxBA), mkexpr(max98)));
				7238	assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
				7239	assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7240	IRTemp maxFEDCBA98 = newTempV128();
				7241	IRTemp max76543210 = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	7242	assign(maxFEDCBA98, binop(op, mkexpr(maxFEDC), mkexpr(maxBA98)));
				7243	assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7244	IRTemp maxAllLanes = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	7245	assign(maxAllLanes, binop(op, mkexpr(maxFEDCBA98),
				7246	mkexpr(max76543210)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7247	IRTemp res = newTempV128();
sewardj	fab0914	2014-02-10 10:28:13 +0000	[diff] [blame]	7248	assign(res, unop(Iop_ZeroHI120ofV128, mkexpr(maxAllLanes)));
				7249	return res;
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7250	}
				7251	case Iop_Min16Sx8: case Iop_Min16Ux8:
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	7252	case Iop_Max16Sx8: case Iop_Max16Ux8: case Iop_Add16x8: {
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7253	IRTemp x76543210 = src;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7254	IRTemp x76547654 = newTempV128();
				7255	IRTemp x32103210 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7256	assign(x76547654, mk_CatOddLanes64x2 (x76543210, x76543210));
				7257	assign(x32103210, mk_CatEvenLanes64x2(x76543210, x76543210));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7258	IRTemp x76767676 = newTempV128();
				7259	IRTemp x54545454 = newTempV128();
				7260	IRTemp x32323232 = newTempV128();
				7261	IRTemp x10101010 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7262	assign(x76767676, mk_CatOddLanes32x4 (x76547654, x76547654));
				7263	assign(x54545454, mk_CatEvenLanes32x4(x76547654, x76547654));
				7264	assign(x32323232, mk_CatOddLanes32x4 (x32103210, x32103210));
				7265	assign(x10101010, mk_CatEvenLanes32x4(x32103210, x32103210));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7266	IRTemp x77777777 = newTempV128();
				7267	IRTemp x66666666 = newTempV128();
				7268	IRTemp x55555555 = newTempV128();
				7269	IRTemp x44444444 = newTempV128();
				7270	IRTemp x33333333 = newTempV128();
				7271	IRTemp x22222222 = newTempV128();
				7272	IRTemp x11111111 = newTempV128();
				7273	IRTemp x00000000 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7274	assign(x77777777, mk_CatOddLanes16x8 (x76767676, x76767676));
				7275	assign(x66666666, mk_CatEvenLanes16x8(x76767676, x76767676));
				7276	assign(x55555555, mk_CatOddLanes16x8 (x54545454, x54545454));
				7277	assign(x44444444, mk_CatEvenLanes16x8(x54545454, x54545454));
				7278	assign(x33333333, mk_CatOddLanes16x8 (x32323232, x32323232));
				7279	assign(x22222222, mk_CatEvenLanes16x8(x32323232, x32323232));
				7280	assign(x11111111, mk_CatOddLanes16x8 (x10101010, x10101010));
				7281	assign(x00000000, mk_CatEvenLanes16x8(x10101010, x10101010));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7282	IRTemp max76 = newTempV128();
				7283	IRTemp max54 = newTempV128();
				7284	IRTemp max32 = newTempV128();
				7285	IRTemp max10 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7286	assign(max76, binop(op, mkexpr(x77777777), mkexpr(x66666666)));
				7287	assign(max54, binop(op, mkexpr(x55555555), mkexpr(x44444444)));
				7288	assign(max32, binop(op, mkexpr(x33333333), mkexpr(x22222222)));
				7289	assign(max10, binop(op, mkexpr(x11111111), mkexpr(x00000000)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7290	IRTemp max7654 = newTempV128();
				7291	IRTemp max3210 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7292	assign(max7654, binop(op, mkexpr(max76), mkexpr(max54)));
				7293	assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7294	IRTemp max76543210 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7295	assign(max76543210, binop(op, mkexpr(max7654), mkexpr(max3210)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7296	IRTemp res = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7297	assign(res, unop(Iop_ZeroHI112ofV128, mkexpr(max76543210)));
				7298	return res;
				7299	}
sewardj	5cb53e7	2015-02-08 12:08:56 +0000	[diff] [blame]	7300	case Iop_Max32Fx4: case Iop_Min32Fx4:
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7301	case Iop_Min32Sx4: case Iop_Min32Ux4:
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	7302	case Iop_Max32Sx4: case Iop_Max32Ux4: case Iop_Add32x4: {
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7303	IRTemp x3210 = src;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7304	IRTemp x3232 = newTempV128();
				7305	IRTemp x1010 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7306	assign(x3232, mk_CatOddLanes64x2 (x3210, x3210));
				7307	assign(x1010, mk_CatEvenLanes64x2(x3210, x3210));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7308	IRTemp x3333 = newTempV128();
				7309	IRTemp x2222 = newTempV128();
				7310	IRTemp x1111 = newTempV128();
				7311	IRTemp x0000 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7312	assign(x3333, mk_CatOddLanes32x4 (x3232, x3232));
				7313	assign(x2222, mk_CatEvenLanes32x4(x3232, x3232));
				7314	assign(x1111, mk_CatOddLanes32x4 (x1010, x1010));
				7315	assign(x0000, mk_CatEvenLanes32x4(x1010, x1010));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7316	IRTemp max32 = newTempV128();
				7317	IRTemp max10 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7318	assign(max32, binop(op, mkexpr(x3333), mkexpr(x2222)));
				7319	assign(max10, binop(op, mkexpr(x1111), mkexpr(x0000)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7320	IRTemp max3210 = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7321	assign(max3210, binop(op, mkexpr(max32), mkexpr(max10)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7322	IRTemp res = newTempV128();
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7323	assign(res, unop(Iop_ZeroHI96ofV128, mkexpr(max3210)));
				7324	return res;
				7325	}
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	7326	case Iop_Add64x2: {
				7327	IRTemp x10 = src;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7328	IRTemp x00 = newTempV128();
				7329	IRTemp x11 = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	7330	assign(x11, binop(Iop_InterleaveHI64x2, mkexpr(x10), mkexpr(x10)));
				7331	assign(x00, binop(Iop_InterleaveLO64x2, mkexpr(x10), mkexpr(x10)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7332	IRTemp max10 = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	7333	assign(max10, binop(op, mkexpr(x11), mkexpr(x00)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7334	IRTemp res = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	7335	assign(res, unop(Iop_ZeroHI64ofV128, mkexpr(max10)));
				7336	return res;
				7337	}
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	7338	default:
				7339	vassert(0);
				7340	}
				7341	}
				7342
				7343
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	7344	/* Generate IR for TBL and TBX. This deals with the 128 bit case
				7345	only. */
				7346	static IRTemp math_TBL_TBX ( IRTemp tab[4], UInt len, IRTemp src,
				7347	IRTemp oor_values )
				7348	{
				7349	vassert(len >= 0 && len <= 3);
				7350
				7351	/* Generate some useful constants as concisely as possible. */
				7352	IRTemp half15 = newTemp(Ity_I64);
				7353	assign(half15, mkU64(0x0F0F0F0F0F0F0F0FULL));
				7354	IRTemp half16 = newTemp(Ity_I64);
				7355	assign(half16, mkU64(0x1010101010101010ULL));
				7356
				7357	/* A zero vector */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7358	IRTemp allZero = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	7359	assign(allZero, mkV128(0x0000));
				7360	/* A vector containing 15 in each 8-bit lane */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7361	IRTemp all15 = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	7362	assign(all15, binop(Iop_64HLtoV128, mkexpr(half15), mkexpr(half15)));
				7363	/* A vector containing 16 in each 8-bit lane */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7364	IRTemp all16 = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	7365	assign(all16, binop(Iop_64HLtoV128, mkexpr(half16), mkexpr(half16)));
				7366	/* A vector containing 32 in each 8-bit lane */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7367	IRTemp all32 = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	7368	assign(all32, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all16)));
				7369	/* A vector containing 48 in each 8-bit lane */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7370	IRTemp all48 = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	7371	assign(all48, binop(Iop_Add8x16, mkexpr(all16), mkexpr(all32)));
				7372	/* A vector containing 64 in each 8-bit lane */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7373	IRTemp all64 = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	7374	assign(all64, binop(Iop_Add8x16, mkexpr(all32), mkexpr(all32)));
				7375
				7376	/* Group the 16/32/48/64 vectors so as to be indexable. */
				7377	IRTemp allXX[4] = { all16, all32, all48, all64 };
				7378
				7379	/* Compute the result for each table vector, with zeroes in places
				7380	where the index values are out of range, and OR them into the
				7381	running vector. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7382	IRTemp running_result = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	7383	assign(running_result, mkV128(0));
				7384
				7385	UInt tabent;
				7386	for (tabent = 0; tabent <= len; tabent++) {
				7387	vassert(tabent >= 0 && tabent < 4);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7388	IRTemp bias = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	7389	assign(bias,
				7390	mkexpr(tabent == 0 ? allZero : allXX[tabent-1]));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7391	IRTemp biased_indices = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	7392	assign(biased_indices,
				7393	binop(Iop_Sub8x16, mkexpr(src), mkexpr(bias)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7394	IRTemp valid_mask = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	7395	assign(valid_mask,
				7396	binop(Iop_CmpGT8Ux16, mkexpr(all16), mkexpr(biased_indices)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7397	IRTemp safe_biased_indices = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	7398	assign(safe_biased_indices,
				7399	binop(Iop_AndV128, mkexpr(biased_indices), mkexpr(all15)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7400	IRTemp results_or_junk = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	7401	assign(results_or_junk,
				7402	binop(Iop_Perm8x16, mkexpr(tab[tabent]),
				7403	mkexpr(safe_biased_indices)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7404	IRTemp results_or_zero = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	7405	assign(results_or_zero,
				7406	binop(Iop_AndV128, mkexpr(results_or_junk), mkexpr(valid_mask)));
				7407	/* And OR that into the running result. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7408	IRTemp tmp = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	7409	assign(tmp, binop(Iop_OrV128, mkexpr(results_or_zero),
				7410	mkexpr(running_result)));
				7411	running_result = tmp;
				7412	}
				7413
				7414	/* So now running_result holds the overall result where the indices
				7415	are in range, and zero in out-of-range lanes. Now we need to
				7416	compute an overall validity mask and use this to copy in the
				7417	lanes in the oor_values for out of range indices. This is
				7418	unnecessary for TBL but will get folded out by iropt, so we lean
				7419	on that and generate the same code for TBL and TBX here. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7420	IRTemp overall_valid_mask = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	7421	assign(overall_valid_mask,
				7422	binop(Iop_CmpGT8Ux16, mkexpr(allXX[len]), mkexpr(src)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7423	IRTemp result = newTempV128();
sewardj	92d0ae3	2014-04-03 13:48:54 +0000	[diff] [blame]	7424	assign(result,
				7425	binop(Iop_OrV128,
				7426	mkexpr(running_result),
				7427	binop(Iop_AndV128,
				7428	mkexpr(oor_values),
				7429	unop(Iop_NotV128, mkexpr(overall_valid_mask)))));
				7430	return result;
				7431	}
				7432
				7433
sewardj	31b5a95	2014-06-26 07:41:14 +0000	[diff] [blame]	7434	/* Let \|argL\| and \|argR\| be V128 values, and let \|opI64x2toV128\| be
				7435	an op which takes two I64s and produces a V128. That is, a widening
				7436	operator. Generate IR which applies \|opI64x2toV128\| to either the
				7437	lower (if \|is2\| is False) or upper (if \|is2\| is True) halves of
				7438	\|argL\| and \|argR\|, and return the value in a new IRTemp.
				7439	*/
				7440	static
				7441	IRTemp math_BINARY_WIDENING_V128 ( Bool is2, IROp opI64x2toV128,
				7442	IRExpr* argL, IRExpr* argR )
				7443	{
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7444	IRTemp res = newTempV128();
sewardj	31b5a95	2014-06-26 07:41:14 +0000	[diff] [blame]	7445	IROp slice = is2 ? Iop_V128HIto64 : Iop_V128to64;
				7446	assign(res, binop(opI64x2toV128, unop(slice, argL),
				7447	unop(slice, argR)));
				7448	return res;
				7449	}
				7450
				7451
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	7452	/* Generate signed/unsigned absolute difference vector IR. */
				7453	static
				7454	IRTemp math_ABD ( Bool isU, UInt size, IRExpr* argLE, IRExpr* argRE )
				7455	{
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	7456	vassert(size <= 3);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7457	IRTemp argL = newTempV128();
				7458	IRTemp argR = newTempV128();
				7459	IRTemp msk = newTempV128();
				7460	IRTemp res = newTempV128();
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	7461	assign(argL, argLE);
				7462	assign(argR, argRE);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7463	assign(msk, binop(isU ? mkVecCMPGTU(size) : mkVecCMPGTS(size),
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	7464	mkexpr(argL), mkexpr(argR)));
				7465	assign(res,
				7466	binop(Iop_OrV128,
				7467	binop(Iop_AndV128,
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7468	binop(mkVecSUB(size), mkexpr(argL), mkexpr(argR)),
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	7469	mkexpr(msk)),
				7470	binop(Iop_AndV128,
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7471	binop(mkVecSUB(size), mkexpr(argR), mkexpr(argL)),
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	7472	unop(Iop_NotV128, mkexpr(msk)))));
				7473	return res;
				7474	}
				7475
				7476
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	7477	/* Generate IR that takes a V128 and sign- or zero-widens
				7478	either the lower or upper set of lanes to twice-as-wide,
				7479	resulting in a new V128 value. */
				7480	static
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	7481	IRTemp math_WIDEN_LO_OR_HI_LANES ( Bool zWiden, Bool fromUpperHalf,
				7482	UInt sizeNarrow, IRExpr* srcE )
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	7483	{
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7484	IRTemp src = newTempV128();
				7485	IRTemp res = newTempV128();
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	7486	assign(src, srcE);
				7487	switch (sizeNarrow) {
				7488	case X10:
				7489	assign(res,
				7490	binop(zWiden ? Iop_ShrN64x2 : Iop_SarN64x2,
				7491	binop(fromUpperHalf ? Iop_InterleaveHI32x4
				7492	: Iop_InterleaveLO32x4,
				7493	mkexpr(src),
				7494	mkexpr(src)),
				7495	mkU8(32)));
				7496	break;
				7497	case X01:
				7498	assign(res,
				7499	binop(zWiden ? Iop_ShrN32x4 : Iop_SarN32x4,
				7500	binop(fromUpperHalf ? Iop_InterleaveHI16x8
				7501	: Iop_InterleaveLO16x8,
				7502	mkexpr(src),
				7503	mkexpr(src)),
				7504	mkU8(16)));
				7505	break;
				7506	case X00:
				7507	assign(res,
				7508	binop(zWiden ? Iop_ShrN16x8 : Iop_SarN16x8,
				7509	binop(fromUpperHalf ? Iop_InterleaveHI8x16
				7510	: Iop_InterleaveLO8x16,
				7511	mkexpr(src),
				7512	mkexpr(src)),
				7513	mkU8(8)));
				7514	break;
				7515	default:
				7516	vassert(0);
				7517	}
				7518	return res;
				7519	}
				7520
				7521
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	7522	/* Generate IR that takes a V128 and sign- or zero-widens
				7523	either the even or odd lanes to twice-as-wide,
				7524	resulting in a new V128 value. */
				7525	static
				7526	IRTemp math_WIDEN_EVEN_OR_ODD_LANES ( Bool zWiden, Bool fromOdd,
				7527	UInt sizeNarrow, IRExpr* srcE )
				7528	{
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7529	IRTemp src = newTempV128();
				7530	IRTemp res = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	7531	IROp opSAR = mkVecSARN(sizeNarrow+1);
				7532	IROp opSHR = mkVecSHRN(sizeNarrow+1);
				7533	IROp opSHL = mkVecSHLN(sizeNarrow+1);
				7534	IROp opSxR = zWiden ? opSHR : opSAR;
				7535	UInt amt = 0;
				7536	switch (sizeNarrow) {
				7537	case X10: amt = 32; break;
				7538	case X01: amt = 16; break;
				7539	case X00: amt = 8; break;
				7540	default: vassert(0);
				7541	}
				7542	assign(src, srcE);
				7543	if (fromOdd) {
				7544	assign(res, binop(opSxR, mkexpr(src), mkU8(amt)));
				7545	} else {
				7546	assign(res, binop(opSxR, binop(opSHL, mkexpr(src), mkU8(amt)),
				7547	mkU8(amt)));
				7548	}
				7549	return res;
				7550	}
				7551
				7552
				7553	/* Generate IR that takes two V128s and narrows (takes lower half)
				7554	of each lane, producing a single V128 value. */
				7555	static
				7556	IRTemp math_NARROW_LANES ( IRTemp argHi, IRTemp argLo, UInt sizeNarrow )
				7557	{
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7558	IRTemp res = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	7559	assign(res, binop(mkVecCATEVENLANES(sizeNarrow),
				7560	mkexpr(argHi), mkexpr(argLo)));
				7561	return res;
				7562	}
				7563
				7564
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	7565	/* Return a temp which holds the vector dup of the lane of width
				7566	(1 << size) obtained from src[laneNo]. */
				7567	static
				7568	IRTemp math_DUP_VEC_ELEM ( IRExpr* src, UInt size, UInt laneNo )
				7569	{
				7570	vassert(size <= 3);
				7571	/* Normalise \|laneNo\| so it is of the form
				7572	x000 for D, xx00 for S, xxx0 for H, and xxxx for B.
				7573	This puts the bits we want to inspect at constant offsets
				7574	regardless of the value of \|size\|.
				7575	*/
				7576	UInt ix = laneNo << size;
				7577	vassert(ix <= 15);
				7578	IROp ops[4] = { Iop_INVALID, Iop_INVALID, Iop_INVALID, Iop_INVALID };
				7579	switch (size) {
				7580	case 0: /* B */
				7581	ops[0] = (ix & 1) ? Iop_CatOddLanes8x16 : Iop_CatEvenLanes8x16;
				7582	/* fallthrough */
				7583	case 1: /* H */
				7584	ops[1] = (ix & 2) ? Iop_CatOddLanes16x8 : Iop_CatEvenLanes16x8;
				7585	/* fallthrough */
				7586	case 2: /* S */
				7587	ops[2] = (ix & 4) ? Iop_CatOddLanes32x4 : Iop_CatEvenLanes32x4;
				7588	/* fallthrough */
				7589	case 3: /* D */
				7590	ops[3] = (ix & 8) ? Iop_InterleaveHI64x2 : Iop_InterleaveLO64x2;
				7591	break;
				7592	default:
				7593	vassert(0);
				7594	}
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7595	IRTemp res = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	7596	assign(res, src);
				7597	Int i;
				7598	for (i = 3; i >= 0; i--) {
				7599	if (ops[i] == Iop_INVALID)
				7600	break;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7601	IRTemp tmp = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	7602	assign(tmp, binop(ops[i], mkexpr(res), mkexpr(res)));
				7603	res = tmp;
				7604	}
				7605	return res;
				7606	}
				7607
				7608
				7609	/* Let \|srcV\| be a V128 value, and let \|imm5\| be a lane-and-size
				7610	selector encoded as shown below. Return a new V128 holding the
				7611	selected lane from \|srcV\| dup'd out to V128, and also return the
				7612	lane number, log2 of the lane size in bytes, and width-character via
				7613	laneNo, laneSzLg2 and *laneCh respectively. It may be that imm5
				7614	is an invalid selector, in which case return
				7615	IRTemp_INVALID, 0, 0 and '?' respectively.
				7616
				7617	imm5 = xxxx1 signifies .b[xxxx]
				7618	= xxx10 .h[xxx]
				7619	= xx100 .s[xx]
				7620	= x1000 .d[x]
				7621	otherwise invalid
				7622	*/
				7623	static
				7624	IRTemp handle_DUP_VEC_ELEM ( /OUT/UInt* laneNo,
				7625	/OUT/UInt* laneSzLg2, /OUT/HChar* laneCh,
				7626	IRExpr* srcV, UInt imm5 )
				7627	{
				7628	*laneNo = 0;
				7629	*laneSzLg2 = 0;
				7630	*laneCh = '?';
				7631
				7632	if (imm5 & 1) {
				7633	*laneNo = (imm5 >> 1) & 15;
				7634	*laneSzLg2 = 0;
				7635	*laneCh = 'b';
				7636	}
				7637	else if (imm5 & 2) {
				7638	*laneNo = (imm5 >> 2) & 7;
				7639	*laneSzLg2 = 1;
				7640	*laneCh = 'h';
				7641	}
				7642	else if (imm5 & 4) {
				7643	*laneNo = (imm5 >> 3) & 3;
				7644	*laneSzLg2 = 2;
				7645	*laneCh = 's';
				7646	}
				7647	else if (imm5 & 8) {
				7648	*laneNo = (imm5 >> 4) & 1;
				7649	*laneSzLg2 = 3;
				7650	*laneCh = 'd';
				7651	}
				7652	else {
				7653	/* invalid */
				7654	return IRTemp_INVALID;
				7655	}
				7656
				7657	return math_DUP_VEC_ELEM(srcV, laneSzLg2, laneNo);
				7658	}
				7659
				7660
				7661	/* Clone \|imm\| to every lane of a V128, with lane size log2 of \|size\|. */
				7662	static
				7663	IRTemp math_VEC_DUP_IMM ( UInt size, ULong imm )
				7664	{
				7665	IRType ty = Ity_INVALID;
				7666	IRTemp rcS = IRTemp_INVALID;
				7667	switch (size) {
				7668	case X01:
				7669	vassert(imm <= 0xFFFFULL);
				7670	ty = Ity_I16;
				7671	rcS = newTemp(ty); assign(rcS, mkU16( (UShort)imm ));
				7672	break;
				7673	case X10:
				7674	vassert(imm <= 0xFFFFFFFFULL);
				7675	ty = Ity_I32;
				7676	rcS = newTemp(ty); assign(rcS, mkU32( (UInt)imm ));
				7677	break;
				7678	case X11:
				7679	ty = Ity_I64;
				7680	rcS = newTemp(ty); assign(rcS, mkU64(imm)); break;
				7681	default:
				7682	vassert(0);
				7683	}
				7684	IRTemp rcV = math_DUP_TO_V128(rcS, ty);
				7685	return rcV;
				7686	}
				7687
				7688
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	7689	/* Let \|new64\| be a V128 in which only the lower 64 bits are interesting,
				7690	and the upper can contain any value -- it is ignored. If \|is2\| is False,
				7691	generate IR to put \|new64\| in the lower half of vector reg \|dd\| and zero
				7692	the upper half. If \|is2\| is True, generate IR to put \|new64\| in the upper
				7693	half of vector reg \|dd\| and leave the lower half unchanged. This
				7694	simulates the behaviour of the "foo/foo2" instructions in which the
				7695	destination is half the width of sources, for example addhn/addhn2.
				7696	*/
				7697	static
				7698	void putLO64andZUorPutHI64 ( Bool is2, UInt dd, IRTemp new64 )
				7699	{
				7700	if (is2) {
				7701	/* Get the old contents of Vdd, zero the upper half, and replace
				7702	it with 'x'. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7703	IRTemp t_zero_oldLO = newTempV128();
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	7704	assign(t_zero_oldLO, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7705	IRTemp t_newHI_zero = newTempV128();
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	7706	assign(t_newHI_zero, binop(Iop_InterleaveLO64x2, mkexpr(new64),
				7707	mkV128(0x0000)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7708	IRTemp res = newTempV128();
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	7709	assign(res, binop(Iop_OrV128, mkexpr(t_zero_oldLO),
				7710	mkexpr(t_newHI_zero)));
				7711	putQReg128(dd, mkexpr(res));
				7712	} else {
				7713	/* This is simple. */
				7714	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(new64)));
				7715	}
				7716	}
				7717
				7718
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7719	/* Compute vector SQABS at lane size \|size\| for \|srcE\|, returning
				7720	the q result in \|qabs\| and the normal result in \|nabs\|. */
				7721	static
				7722	void math_SQABS ( /OUT/IRTemp* qabs, /OUT/IRTemp* nabs,
				7723	IRExpr* srcE, UInt size )
				7724	{
				7725	IRTemp src, mask, maskn, nsub, qsub;
				7726	src = mask = maskn = nsub = qsub = IRTemp_INVALID;
				7727	newTempsV128_7(&src, &mask, &maskn, &nsub, &qsub, nabs, qabs);
				7728	assign(src, srcE);
				7729	assign(mask, binop(mkVecCMPGTS(size), mkV128(0x0000), mkexpr(src)));
				7730	assign(maskn, unop(Iop_NotV128, mkexpr(mask)));
				7731	assign(nsub, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
				7732	assign(qsub, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
				7733	assign(*nabs, binop(Iop_OrV128,
				7734	binop(Iop_AndV128, mkexpr(nsub), mkexpr(mask)),
				7735	binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
				7736	assign(*qabs, binop(Iop_OrV128,
				7737	binop(Iop_AndV128, mkexpr(qsub), mkexpr(mask)),
				7738	binop(Iop_AndV128, mkexpr(src), mkexpr(maskn))));
				7739	}
				7740
				7741
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	7742	/* Compute vector SQNEG at lane size \|size\| for \|srcE\|, returning
				7743	the q result in \|qneg\| and the normal result in \|nneg\|. */
				7744	static
				7745	void math_SQNEG ( /OUT/IRTemp* qneg, /OUT/IRTemp* nneg,
				7746	IRExpr* srcE, UInt size )
				7747	{
				7748	IRTemp src = IRTemp_INVALID;
				7749	newTempsV128_3(&src, nneg, qneg);
				7750	assign(src, srcE);
				7751	assign(*nneg, binop(mkVecSUB(size), mkV128(0x0000), mkexpr(src)));
				7752	assign(*qneg, binop(mkVecQSUBS(size), mkV128(0x0000), mkexpr(src)));
				7753	}
				7754
				7755
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	7756	/* Zero all except the least significant lane of \|srcE\|, where \|size\|
				7757	indicates the lane size in the usual way. */
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	7758	static IRTemp math_ZERO_ALL_EXCEPT_LOWEST_LANE ( UInt size, IRExpr* srcE )
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7759	{
				7760	vassert(size < 4);
				7761	IRTemp t = newTempV128();
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	7762	assign(t, unop(mkVecZEROHIxxOFV128(size), srcE));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	7763	return t;
				7764	}
				7765
				7766
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	7767	/* Generate IR to compute vector widening MULL from either the lower
				7768	(is2==False) or upper (is2==True) halves of vecN and vecM. The
				7769	widening multiplies are unsigned when isU==True and signed when
				7770	isU==False. \|size\| is the narrow lane size indication. Optionally,
				7771	the product may be added to or subtracted from vecD, at the wide lane
				7772	size. This happens when \|mas\| is 'a' (add) or 's' (sub). When \|mas\|
				7773	is 'm' (only multiply) then the accumulate part does not happen, and
				7774	\|vecD\| is expected to == IRTemp_INVALID.
				7775
				7776	Only size==0 (h_b_b), size==1 (s_h_h) and size==2 (d_s_s) variants
				7777	are allowed. The result is returned in a new IRTemp, which is
				7778	returned in res. /
				7779	static
				7780	void math_MULL_ACC ( /OUT/IRTemp* res,
				7781	Bool is2, Bool isU, UInt size, HChar mas,
				7782	IRTemp vecN, IRTemp vecM, IRTemp vecD )
				7783	{
				7784	vassert(res && *res == IRTemp_INVALID);
				7785	vassert(size <= 2);
				7786	vassert(mas == 'm' \|\| mas == 'a' \|\| mas == 's');
				7787	if (mas == 'm') vassert(vecD == IRTemp_INVALID);
				7788	IROp mulOp = isU ? mkVecMULLU(size) : mkVecMULLS(size);
				7789	IROp accOp = (mas == 'a') ? mkVecADD(size+1)
				7790	: (mas == 's' ? mkVecSUB(size+1)
				7791	: Iop_INVALID);
				7792	IRTemp mul = math_BINARY_WIDENING_V128(is2, mulOp,
				7793	mkexpr(vecN), mkexpr(vecM));
				7794	*res = newTempV128();
				7795	assign(*res, mas == 'm' ? mkexpr(mul)
				7796	: binop(accOp, mkexpr(vecD), mkexpr(mul)));
				7797	}
				7798
				7799
				7800	/* Same as math_MULL_ACC, except the multiply is signed widening,
				7801	the multiplied value is then doubled, before being added to or
				7802	subtracted from the accumulated value. And everything is
				7803	saturated. In all cases, saturation residuals are returned
				7804	via (sat1q, sat1n), and in the accumulate cases,
				7805	via (sat2q, sat2n) too. All results are returned in new temporaries.
				7806	In the no-accumulate case, sat2q and sat2n are never instantiated,
				7807	so the caller can tell this has happened. */
				7808	static
				7809	void math_SQDMULL_ACC ( /OUT/IRTemp* res,
				7810	/OUT/IRTemp* sat1q, /OUT/IRTemp* sat1n,
				7811	/OUT/IRTemp* sat2q, /OUT/IRTemp* sat2n,
				7812	Bool is2, UInt size, HChar mas,
				7813	IRTemp vecN, IRTemp vecM, IRTemp vecD )
				7814	{
				7815	vassert(size <= 2);
				7816	vassert(mas == 'm' \|\| mas == 'a' \|\| mas == 's');
				7817	/* Compute
				7818	sat1q = vecN.D[is2] sq vecM.d[is2] q 2
				7819	sat1n = vecN.D[is2] s vecM.d[is2] 2
				7820	IOW take either the low or high halves of vecN and vecM, signed widen,
				7821	multiply, double that, and signedly saturate. Also compute the same
				7822	but without saturation.
				7823	*/
				7824	vassert(sat2q && *sat2q == IRTemp_INVALID);
				7825	vassert(sat2n && *sat2n == IRTemp_INVALID);
				7826	newTempsV128_3(sat1q, sat1n, res);
				7827	IRTemp tq = math_BINARY_WIDENING_V128(is2, mkVecQDMULLS(size),
				7828	mkexpr(vecN), mkexpr(vecM));
				7829	IRTemp tn = math_BINARY_WIDENING_V128(is2, mkVecMULLS(size),
				7830	mkexpr(vecN), mkexpr(vecM));
				7831	assign(*sat1q, mkexpr(tq));
				7832	assign(*sat1n, binop(mkVecADD(size+1), mkexpr(tn), mkexpr(tn)));
				7833
				7834	/* If there is no accumulation, the final result is sat1q,
				7835	and there's no assignment to sat2q or sat2n. */
				7836	if (mas == 'm') {
				7837	assign(res, mkexpr(sat1q));
				7838	return;
				7839	}
				7840
				7841	/* Compute
				7842	sat2q = vecD +sq/-sq sat1q
				7843	sat2n = vecD +/- sat1n
				7844	result = sat2q
				7845	*/
				7846	newTempsV128_2(sat2q, sat2n);
				7847	assign(*sat2q, binop(mas == 'a' ? mkVecQADDS(size+1) : mkVecQSUBS(size+1),
				7848	mkexpr(vecD), mkexpr(*sat1q)));
				7849	assign(*sat2n, binop(mas == 'a' ? mkVecADD(size+1) : mkVecSUB(size+1),
				7850	mkexpr(vecD), mkexpr(*sat1n)));
				7851	assign(res, mkexpr(sat2q));
				7852	}
				7853
				7854
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	7855	/* Generate IR for widening signed vector multiplies. The operands
				7856	have their lane width signedly widened, and they are then multiplied
				7857	at the wider width, returning results in two new IRTemps. */
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	7858	static
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	7859	void math_MULLS ( /OUT/IRTemp* resHI, /OUT/IRTemp* resLO,
				7860	UInt sizeNarrow, IRTemp argL, IRTemp argR )
				7861	{
				7862	vassert(sizeNarrow <= 2);
				7863	newTempsV128_2(resHI, resLO);
				7864	IRTemp argLhi = newTemp(Ity_I64);
				7865	IRTemp argLlo = newTemp(Ity_I64);
				7866	IRTemp argRhi = newTemp(Ity_I64);
				7867	IRTemp argRlo = newTemp(Ity_I64);
				7868	assign(argLhi, unop(Iop_V128HIto64, mkexpr(argL)));
				7869	assign(argLlo, unop(Iop_V128to64, mkexpr(argL)));
				7870	assign(argRhi, unop(Iop_V128HIto64, mkexpr(argR)));
				7871	assign(argRlo, unop(Iop_V128to64, mkexpr(argR)));
				7872	IROp opMulls = mkVecMULLS(sizeNarrow);
				7873	assign(*resHI, binop(opMulls, mkexpr(argLhi), mkexpr(argRhi)));
				7874	assign(*resLO, binop(opMulls, mkexpr(argLlo), mkexpr(argRlo)));
				7875	}
				7876
				7877
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	7878	/* Generate IR for SQDMULH and SQRDMULH: signedly wideningly multiply,
				7879	double that, possibly add a rounding constant (R variants), and take
				7880	the high half. */
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	7881	static
				7882	void math_SQDMULH ( /OUT/IRTemp* res,
				7883	/OUT/IRTemp* sat1q, /OUT/IRTemp* sat1n,
				7884	Bool isR, UInt size, IRTemp vN, IRTemp vM )
				7885	{
				7886	vassert(size == X01 \|\| size == X10); /* s or h only */
				7887
				7888	newTempsV128_3(res, sat1q, sat1n);
				7889
				7890	IRTemp mullsHI = IRTemp_INVALID, mullsLO = IRTemp_INVALID;
				7891	math_MULLS(&mullsHI, &mullsLO, size, vN, vM);
				7892
				7893	IRTemp addWide = mkVecADD(size+1);
				7894
				7895	if (isR) {
				7896	assign(*sat1q, binop(mkVecQRDMULHIS(size), mkexpr(vN), mkexpr(vM)));
				7897
				7898	Int rcShift = size == X01 ? 15 : 31;
				7899	IRTemp roundConst = math_VEC_DUP_IMM(size+1, 1ULL << rcShift);
				7900	assign(*sat1n,
				7901	binop(mkVecCATODDLANES(size),
				7902	binop(addWide,
				7903	binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
				7904	mkexpr(roundConst)),
				7905	binop(addWide,
				7906	binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO)),
				7907	mkexpr(roundConst))));
				7908	} else {
				7909	assign(*sat1q, binop(mkVecQDMULHIS(size), mkexpr(vN), mkexpr(vM)));
				7910
				7911	assign(*sat1n,
				7912	binop(mkVecCATODDLANES(size),
				7913	binop(addWide, mkexpr(mullsHI), mkexpr(mullsHI)),
				7914	binop(addWide, mkexpr(mullsLO), mkexpr(mullsLO))));
				7915	}
				7916
				7917	assign(res, mkexpr(sat1q));
				7918	}
				7919
				7920
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	7921	/* Generate IR for SQSHL, UQSHL, SQSHLU by imm. Put the result in
				7922	a new temp in *res, and the Q difference pair in new temps in
				7923	qDiff1 and qDiff2 respectively. \|nm\| denotes which of the
				7924	three operations it is. */
				7925	static
				7926	void math_QSHL_IMM ( /OUT/IRTemp* res,
				7927	/OUT/IRTemp* qDiff1, /OUT/IRTemp* qDiff2,
				7928	IRTemp src, UInt size, UInt shift, const HChar* nm )
				7929	{
				7930	vassert(size <= 3);
				7931	UInt laneBits = 8 << size;
				7932	vassert(shift < laneBits);
				7933	newTempsV128_3(res, qDiff1, qDiff2);
				7934	IRTemp z128 = newTempV128();
				7935	assign(z128, mkV128(0x0000));
				7936
				7937	/* UQSHL */
				7938	if (vex_streq(nm, "uqshl")) {
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	7939	IROp qop = mkVecQSHLNSATUU(size);
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	7940	assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
				7941	if (shift == 0) {
				7942	/* No shift means no saturation. */
				7943	assign(*qDiff1, mkexpr(z128));
				7944	assign(*qDiff2, mkexpr(z128));
				7945	} else {
				7946	/* Saturation has occurred if any of the shifted-out bits are
				7947	nonzero. We get the shifted-out bits by right-shifting the
				7948	original value. */
				7949	UInt rshift = laneBits - shift;
				7950	vassert(rshift >= 1 && rshift < laneBits);
				7951	assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
				7952	assign(*qDiff2, mkexpr(z128));
				7953	}
				7954	return;
				7955	}
				7956
				7957	/* SQSHL */
				7958	if (vex_streq(nm, "sqshl")) {
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	7959	IROp qop = mkVecQSHLNSATSS(size);
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	7960	assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
				7961	if (shift == 0) {
				7962	/* No shift means no saturation. */
				7963	assign(*qDiff1, mkexpr(z128));
				7964	assign(*qDiff2, mkexpr(z128));
				7965	} else {
				7966	/* Saturation has occurred if any of the shifted-out bits are
				7967	different from the top bit of the original value. */
				7968	UInt rshift = laneBits - 1 - shift;
				7969	vassert(rshift >= 0 && rshift < laneBits-1);
				7970	/* qDiff1 is the shifted out bits, and the top bit of the original
				7971	value, preceded by zeroes. */
				7972	assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
				7973	/* qDiff2 is the top bit of the original value, cloned the
				7974	correct number of times. */
				7975	assign(*qDiff2, binop(mkVecSHRN(size),
				7976	binop(mkVecSARN(size), mkexpr(src),
				7977	mkU8(laneBits-1)),
				7978	mkU8(rshift)));
				7979	/* This also succeeds in comparing the top bit of the original
				7980	value to itself, which is a bit stupid, but not wrong. */
				7981	}
				7982	return;
				7983	}
				7984
				7985	/* SQSHLU */
				7986	if (vex_streq(nm, "sqshlu")) {
sewardj	1dd3ec1	2014-08-15 09:11:08 +0000	[diff] [blame]	7987	IROp qop = mkVecQSHLNSATSU(size);
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	7988	assign(*res, binop(qop, mkexpr(src), mkU8(shift)));
sewardj	acc2964	2014-08-15 05:35:35 +0000	[diff] [blame]	7989	if (shift == 0) {
				7990	/* If there's no shift, saturation depends on the top bit
				7991	of the source. */
				7992	assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(laneBits-1)));
				7993	assign(*qDiff2, mkexpr(z128));
				7994	} else {
				7995	/* Saturation has occurred if any of the shifted-out bits are
				7996	nonzero. We get the shifted-out bits by right-shifting the
				7997	original value. */
				7998	UInt rshift = laneBits - shift;
				7999	vassert(rshift >= 1 && rshift < laneBits);
				8000	assign(*qDiff1, binop(mkVecSHRN(size), mkexpr(src), mkU8(rshift)));
				8001	assign(*qDiff2, mkexpr(z128));
				8002	}
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	8003	return;
				8004	}
				8005
				8006	vassert(0);
				8007	}
				8008
				8009
sewardj	62ece66	2014-08-17 19:59:09 +0000	[diff] [blame]	8010	/* Generate IR to do SRHADD and URHADD. */
				8011	static
				8012	IRTemp math_RHADD ( UInt size, Bool isU, IRTemp aa, IRTemp bb )
				8013	{
				8014	/* Generate this:
				8015	(A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1)
				8016	*/
				8017	vassert(size <= 3);
				8018	IROp opSHR = isU ? mkVecSHRN(size) : mkVecSARN(size);
				8019	IROp opADD = mkVecADD(size);
				8020	/* The only tricky bit is to generate the correct vector 1 constant. */
				8021	const ULong ones64[4]
				8022	= { 0x0101010101010101ULL, 0x0001000100010001ULL,
				8023	0x0000000100000001ULL, 0x0000000000000001ULL };
				8024	IRTemp imm64 = newTemp(Ity_I64);
				8025	assign(imm64, mkU64(ones64[size]));
				8026	IRTemp vecOne = newTempV128();
				8027	assign(vecOne, binop(Iop_64HLtoV128, mkexpr(imm64), mkexpr(imm64)));
				8028	IRTemp scaOne = newTemp(Ity_I8);
				8029	assign(scaOne, mkU8(1));
				8030	IRTemp res = newTempV128();
				8031	assign(res,
				8032	binop(opADD,
				8033	binop(opSHR, mkexpr(aa), mkexpr(scaOne)),
				8034	binop(opADD,
				8035	binop(opSHR, mkexpr(bb), mkexpr(scaOne)),
				8036	binop(opSHR,
				8037	binop(opADD,
				8038	binop(opADD,
				8039	binop(Iop_AndV128, mkexpr(aa),
				8040	mkexpr(vecOne)),
				8041	binop(Iop_AndV128, mkexpr(bb),
				8042	mkexpr(vecOne))
				8043	),
				8044	mkexpr(vecOne)
				8045	),
				8046	mkexpr(scaOne)
				8047	)
				8048	)
				8049	)
				8050	);
				8051	return res;
				8052	}
				8053
				8054
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	8055	/* QCFLAG tracks the SIMD sticky saturation status. Update the status
				8056	thusly: if, after application of \|opZHI\| to both \|qres\| and \|nres\|,
				8057	they have the same value, leave QCFLAG unchanged. Otherwise, set it
				8058	(implicitly) to 1. \|opZHI\| may only be one of the Iop_ZeroHIxxofV128
				8059	operators, or Iop_INVALID, in which case \|qres\| and \|nres\| are used
				8060	unmodified. The presence \|opZHI\| means this function can be used to
				8061	generate QCFLAG update code for both scalar and vector SIMD operations.
				8062	*/
				8063	static
				8064	void updateQCFLAGwithDifferenceZHI ( IRTemp qres, IRTemp nres, IROp opZHI )
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	8065	{
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8066	IRTemp diff = newTempV128();
				8067	IRTemp oldQCFLAG = newTempV128();
				8068	IRTemp newQCFLAG = newTempV128();
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	8069	if (opZHI == Iop_INVALID) {
				8070	assign(diff, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres)));
				8071	} else {
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	8072	vassert(opZHI == Iop_ZeroHI64ofV128
				8073	\|\| opZHI == Iop_ZeroHI96ofV128 \|\| opZHI == Iop_ZeroHI112ofV128);
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	8074	assign(diff, unop(opZHI, binop(Iop_XorV128, mkexpr(qres), mkexpr(nres))));
				8075	}
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	8076	assign(oldQCFLAG, IRExpr_Get(OFFB_QCFLAG, Ity_V128));
				8077	assign(newQCFLAG, binop(Iop_OrV128, mkexpr(oldQCFLAG), mkexpr(diff)));
				8078	stmt(IRStmt_Put(OFFB_QCFLAG, mkexpr(newQCFLAG)));
				8079	}
				8080
				8081
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	8082	/* A variant of updateQCFLAGwithDifferenceZHI in which \|qres\| and \|nres\|
				8083	are used unmodified, hence suitable for QCFLAG updates for whole-vector
				8084	operations. */
				8085	static
				8086	void updateQCFLAGwithDifference ( IRTemp qres, IRTemp nres )
				8087	{
				8088	updateQCFLAGwithDifferenceZHI(qres, nres, Iop_INVALID);
				8089	}
				8090
				8091
sewardj	76927e6	2014-11-17 11:21:21 +0000	[diff] [blame]	8092	/* Generate IR to rearrange two vector values in a way which is useful
				8093	for doing S/D add-pair etc operations. There are 3 cases:
				8094
				8095	2d: [m1 m0] [n1 n0] --> [m1 n1] [m0 n0]
				8096
				8097	4s: [m3 m2 m1 m0] [n3 n2 n1 n0] --> [m3 m1 n3 n1] [m2 m0 n2 n0]
				8098
				8099	2s: [m2 m2 m1 m0] [n3 n2 n1 n0] --> [0 0 m1 n1] [0 0 m0 n0]
				8100
				8101	The cases are distinguished as follows:
				8102	isD == True, bitQ == 1 => 2d
				8103	isD == False, bitQ == 1 => 4s
				8104	isD == False, bitQ == 0 => 2s
				8105	*/
				8106	static
				8107	void math_REARRANGE_FOR_FLOATING_PAIRWISE (
				8108	/OUT/IRTemp* rearrL, /OUT/IRTemp* rearrR,
				8109	IRTemp vecM, IRTemp vecN, Bool isD, UInt bitQ
				8110	)
				8111	{
				8112	vassert(rearrL && *rearrL == IRTemp_INVALID);
				8113	vassert(rearrR && *rearrR == IRTemp_INVALID);
				8114	*rearrL = newTempV128();
				8115	*rearrR = newTempV128();
				8116	if (isD) {
				8117	// 2d case
				8118	vassert(bitQ == 1);
				8119	assign(*rearrL, binop(Iop_InterleaveHI64x2, mkexpr(vecM), mkexpr(vecN)));
				8120	assign(*rearrR, binop(Iop_InterleaveLO64x2, mkexpr(vecM), mkexpr(vecN)));
				8121	}
				8122	else if (!isD && bitQ == 1) {
				8123	// 4s case
				8124	assign(*rearrL, binop(Iop_CatOddLanes32x4, mkexpr(vecM), mkexpr(vecN)));
				8125	assign(*rearrR, binop(Iop_CatEvenLanes32x4, mkexpr(vecM), mkexpr(vecN)));
				8126	} else {
				8127	// 2s case
				8128	vassert(!isD && bitQ == 0);
				8129	IRTemp m1n1m0n0 = newTempV128();
				8130	IRTemp m0n0m1n1 = newTempV128();
				8131	assign(m1n1m0n0, binop(Iop_InterleaveLO32x4,
				8132	mkexpr(vecM), mkexpr(vecN)));
				8133	assign(m0n0m1n1, triop(Iop_SliceV128,
				8134	mkexpr(m1n1m0n0), mkexpr(m1n1m0n0), mkU8(8)));
				8135	assign(*rearrL, unop(Iop_ZeroHI64ofV128, mkexpr(m1n1m0n0)));
				8136	assign(*rearrR, unop(Iop_ZeroHI64ofV128, mkexpr(m0n0m1n1)));
				8137	}
				8138	}
				8139
				8140
sewardj	1aff76b	2014-11-20 10:14:06 +0000	[diff] [blame]	8141	/* Returns 2.0 ^ (-n) for n in 1 .. 64 */
				8142	static Double two_to_the_minus ( Int n )
				8143	{
				8144	if (n == 1) return 0.5;
				8145	vassert(n >= 2 && n <= 64);
				8146	Int half = n / 2;
				8147	return two_to_the_minus(half) * two_to_the_minus(n - half);
				8148	}
				8149
				8150
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8151	/------------------------------------------------------------/
				8152	/--- SIMD and FP instructions ---/
				8153	/------------------------------------------------------------/
				8154
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8155	static
				8156	Bool dis_AdvSIMD_EXT(/MB_OUT/DisResult* dres, UInt insn)
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	8157	{
sewardj	ab33a7a	2014-06-19 22:20:47 +0000	[diff] [blame]	8158	/* 31 29 23 21 20 15 14 10 9 4
				8159	0 q 101110 op2 0 m 0 imm4 0 n d
				8160	Decode fields: op2
				8161	*/
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	8162	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	ab33a7a	2014-06-19 22:20:47 +0000	[diff] [blame]	8163	if (INSN(31,31) != 0
				8164	\|\| INSN(29,24) != BITS6(1,0,1,1,1,0)
				8165	\|\| INSN(21,21) != 0 \|\| INSN(15,15) != 0 \|\| INSN(10,10) != 0) {
				8166	return False;
				8167	}
				8168	UInt bitQ = INSN(30,30);
				8169	UInt op2 = INSN(23,22);
				8170	UInt mm = INSN(20,16);
				8171	UInt imm4 = INSN(14,11);
				8172	UInt nn = INSN(9,5);
				8173	UInt dd = INSN(4,0);
				8174
				8175	if (op2 == BITS2(0,0)) {
				8176	/* -------- 00: EXT 16b_16b_16b, 8b_8b_8b -------- */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8177	IRTemp sHi = newTempV128();
				8178	IRTemp sLo = newTempV128();
				8179	IRTemp res = newTempV128();
sewardj	ab33a7a	2014-06-19 22:20:47 +0000	[diff] [blame]	8180	assign(sHi, getQReg128(mm));
				8181	assign(sLo, getQReg128(nn));
				8182	if (bitQ == 1) {
				8183	if (imm4 == 0) {
				8184	assign(res, mkexpr(sLo));
				8185	} else {
sewardj	8def049	2014-09-01 14:13:15 +0000	[diff] [blame]	8186	vassert(imm4 >= 1 && imm4 <= 15);
				8187	assign(res, triop(Iop_SliceV128,
				8188	mkexpr(sHi), mkexpr(sLo), mkU8(imm4)));
sewardj	ab33a7a	2014-06-19 22:20:47 +0000	[diff] [blame]	8189	}
				8190	putQReg128(dd, mkexpr(res));
				8191	DIP("ext v%u.16b, v%u.16b, v%u.16b, #%u\n", dd, nn, mm, imm4);
				8192	} else {
				8193	if (imm4 >= 8) return False;
				8194	if (imm4 == 0) {
				8195	assign(res, mkexpr(sLo));
				8196	} else {
sewardj	8def049	2014-09-01 14:13:15 +0000	[diff] [blame]	8197	vassert(imm4 >= 1 && imm4 <= 7);
				8198	IRTemp hi64lo64 = newTempV128();
				8199	assign(hi64lo64, binop(Iop_InterleaveLO64x2,
				8200	mkexpr(sHi), mkexpr(sLo)));
				8201	assign(res, triop(Iop_SliceV128,
				8202	mkexpr(hi64lo64), mkexpr(hi64lo64), mkU8(imm4)));
sewardj	ab33a7a	2014-06-19 22:20:47 +0000	[diff] [blame]	8203	}
				8204	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				8205	DIP("ext v%u.8b, v%u.8b, v%u.8b, #%u\n", dd, nn, mm, imm4);
				8206	}
				8207	return True;
				8208	}
				8209
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8210	return False;
				8211	# undef INSN
				8212	}
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	8213
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	8214
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8215	static
				8216	Bool dis_AdvSIMD_TBL_TBX(/MB_OUT/DisResult* dres, UInt insn)
				8217	{
				8218	/* 31 29 23 21 20 15 14 12 11 9 4
				8219	0 q 001110 op2 0 m 0 len op 00 n d
				8220	Decode fields: op2,len,op
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	8221	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8222	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				8223	if (INSN(31,31) != 0
				8224	\|\| INSN(29,24) != BITS6(0,0,1,1,1,0)
				8225	\|\| INSN(21,21) != 0
				8226	\|\| INSN(15,15) != 0
				8227	\|\| INSN(11,10) != BITS2(0,0)) {
				8228	return False;
				8229	}
				8230	UInt bitQ = INSN(30,30);
				8231	UInt op2 = INSN(23,22);
				8232	UInt mm = INSN(20,16);
				8233	UInt len = INSN(14,13);
				8234	UInt bitOP = INSN(12,12);
				8235	UInt nn = INSN(9,5);
				8236	UInt dd = INSN(4,0);
				8237
				8238	if (op2 == X00) {
				8239	/* -------- 00,xx,0 TBL, xx register table -------- */
				8240	/* -------- 00,xx,1 TBX, xx register table -------- */
				8241	/* 31 28 20 15 14 12 9 4
				8242	0q0 01110 000 m 0 len 000 n d TBL Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
				8243	0q0 01110 000 m 0 len 100 n d TBX Vd.Ta, {Vn .. V(n+len)%32}, Vm.Ta
				8244	where Ta = 16b(q=1) or 8b(q=0)
				8245	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8246	Bool isTBX = bitOP == 1;
				8247	/* The out-of-range values to use. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8248	IRTemp oor_values = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8249	assign(oor_values, isTBX ? getQReg128(dd) : mkV128(0));
				8250	/* src value */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8251	IRTemp src = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8252	assign(src, getQReg128(mm));
				8253	/* The table values */
				8254	IRTemp tab[4];
				8255	UInt i;
				8256	for (i = 0; i <= len; i++) {
				8257	vassert(i < 4);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8258	tab[i] = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8259	assign(tab[i], getQReg128((nn + i) % 32));
				8260	}
				8261	IRTemp res = math_TBL_TBX(tab, len, src, oor_values);
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	8262	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				8263	const HChar* Ta = bitQ ==1 ? "16b" : "8b";
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8264	const HChar* nm = isTBX ? "tbx" : "tbl";
				8265	DIP("%s %s.%s, {v%d.16b .. v%d.16b}, %s.%s\n",
				8266	nm, nameQReg128(dd), Ta, nn, (nn + len) % 32, nameQReg128(mm), Ta);
				8267	return True;
				8268	}
				8269
				8270	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				8271	return False;
				8272	# undef INSN
				8273	}
				8274
				8275
				8276	static
				8277	Bool dis_AdvSIMD_ZIP_UZP_TRN(/MB_OUT/DisResult* dres, UInt insn)
				8278	{
sewardj	fc261d9	2014-08-24 20:36:14 +0000	[diff] [blame]	8279	/* 31 29 23 21 20 15 14 11 9 4
				8280	0 q 001110 size 0 m 0 opcode 10 n d
				8281	Decode fields: opcode
				8282	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8283	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	fc261d9	2014-08-24 20:36:14 +0000	[diff] [blame]	8284	if (INSN(31,31) != 0
				8285	\|\| INSN(29,24) != BITS6(0,0,1,1,1,0)
				8286	\|\| INSN(21,21) != 0 \|\| INSN(15,15) != 0 \|\| INSN(11,10) != BITS2(1,0)) {
				8287	return False;
				8288	}
				8289	UInt bitQ = INSN(30,30);
				8290	UInt size = INSN(23,22);
				8291	UInt mm = INSN(20,16);
				8292	UInt opcode = INSN(14,12);
				8293	UInt nn = INSN(9,5);
				8294	UInt dd = INSN(4,0);
				8295
				8296	if (opcode == BITS3(0,0,1) \|\| opcode == BITS3(1,0,1)) {
				8297	/* -------- 001 UZP1 std7_std7_std7 -------- */
				8298	/* -------- 101 UZP2 std7_std7_std7 -------- */
				8299	if (bitQ == 0 && size == X11) return False; // implied 1d case
				8300	Bool isUZP1 = opcode == BITS3(0,0,1);
				8301	IROp op = isUZP1 ? mkVecCATEVENLANES(size)
				8302	: mkVecCATODDLANES(size);
				8303	IRTemp preL = newTempV128();
				8304	IRTemp preR = newTempV128();
				8305	IRTemp res = newTempV128();
				8306	if (bitQ == 0) {
				8307	assign(preL, binop(Iop_InterleaveLO64x2, getQReg128(mm),
				8308	getQReg128(nn)));
				8309	assign(preR, mkexpr(preL));
				8310	} else {
				8311	assign(preL, getQReg128(mm));
				8312	assign(preR, getQReg128(nn));
				8313	}
				8314	assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
				8315	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				8316	const HChar* nm = isUZP1 ? "uzp1" : "uzp2";
				8317	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				8318	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				8319	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				8320	return True;
				8321	}
				8322
				8323	if (opcode == BITS3(0,1,0) \|\| opcode == BITS3(1,1,0)) {
				8324	/* -------- 010 TRN1 std7_std7_std7 -------- */
				8325	/* -------- 110 TRN2 std7_std7_std7 -------- */
				8326	if (bitQ == 0 && size == X11) return False; // implied 1d case
				8327	Bool isTRN1 = opcode == BITS3(0,1,0);
				8328	IROp op1 = isTRN1 ? mkVecCATEVENLANES(size)
				8329	: mkVecCATODDLANES(size);
				8330	IROp op2 = mkVecINTERLEAVEHI(size);
				8331	IRTemp srcM = newTempV128();
				8332	IRTemp srcN = newTempV128();
				8333	IRTemp res = newTempV128();
				8334	assign(srcM, getQReg128(mm));
				8335	assign(srcN, getQReg128(nn));
				8336	assign(res, binop(op2, binop(op1, mkexpr(srcM), mkexpr(srcM)),
				8337	binop(op1, mkexpr(srcN), mkexpr(srcN))));
				8338	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				8339	const HChar* nm = isTRN1 ? "trn1" : "trn2";
				8340	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				8341	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				8342	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				8343	return True;
				8344	}
				8345
				8346	if (opcode == BITS3(0,1,1) \|\| opcode == BITS3(1,1,1)) {
				8347	/* -------- 011 ZIP1 std7_std7_std7 -------- */
				8348	/* -------- 111 ZIP2 std7_std7_std7 -------- */
				8349	if (bitQ == 0 && size == X11) return False; // implied 1d case
				8350	Bool isZIP1 = opcode == BITS3(0,1,1);
				8351	IROp op = isZIP1 ? mkVecINTERLEAVELO(size)
				8352	: mkVecINTERLEAVEHI(size);
				8353	IRTemp preL = newTempV128();
				8354	IRTemp preR = newTempV128();
				8355	IRTemp res = newTempV128();
				8356	if (bitQ == 0 && !isZIP1) {
sewardj	8def049	2014-09-01 14:13:15 +0000	[diff] [blame]	8357	IRTemp z128 = newTempV128();
				8358	assign(z128, mkV128(0x0000));
				8359	// preL = Vm shifted left 32 bits
				8360	// preR = Vn shifted left 32 bits
				8361	assign(preL, triop(Iop_SliceV128,
				8362	getQReg128(mm), mkexpr(z128), mkU8(12)));
				8363	assign(preR, triop(Iop_SliceV128,
				8364	getQReg128(nn), mkexpr(z128), mkU8(12)));
				8365
sewardj	fc261d9	2014-08-24 20:36:14 +0000	[diff] [blame]	8366	} else {
				8367	assign(preL, getQReg128(mm));
				8368	assign(preR, getQReg128(nn));
				8369	}
				8370	assign(res, binop(op, mkexpr(preL), mkexpr(preR)));
				8371	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				8372	const HChar* nm = isZIP1 ? "zip1" : "zip2";
				8373	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				8374	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				8375	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				8376	return True;
				8377	}
				8378
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8379	return False;
				8380	# undef INSN
				8381	}
				8382
				8383
				8384	static
				8385	Bool dis_AdvSIMD_across_lanes(/MB_OUT/DisResult* dres, UInt insn)
				8386	{
				8387	/* 31 28 23 21 16 11 9 4
				8388	0 q u 01110 size 11000 opcode 10 n d
				8389	Decode fields: u,size,opcode
				8390	*/
				8391	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				8392	if (INSN(31,31) != 0
				8393	\|\| INSN(28,24) != BITS5(0,1,1,1,0)
				8394	\|\| INSN(21,17) != BITS5(1,1,0,0,0) \|\| INSN(11,10) != BITS2(1,0)) {
				8395	return False;
				8396	}
				8397	UInt bitQ = INSN(30,30);
				8398	UInt bitU = INSN(29,29);
				8399	UInt size = INSN(23,22);
				8400	UInt opcode = INSN(16,12);
				8401	UInt nn = INSN(9,5);
				8402	UInt dd = INSN(4,0);
				8403
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	8404	if (opcode == BITS5(0,0,0,1,1)) {
				8405	/* -------- 0,xx,00011 SADDLV -------- */
				8406	/* -------- 1,xx,00011 UADDLV -------- */
				8407	/* size is the narrow size */
				8408	if (size == X11 \|\| (size == X10 && bitQ == 0)) return False;
				8409	Bool isU = bitU == 1;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8410	IRTemp src = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	8411	assign(src, getQReg128(nn));
				8412	/* The basic plan is to widen the lower half, and if Q = 1,
				8413	the upper half too. Add them together (if Q = 1), and in
				8414	either case fold with add at twice the lane width.
				8415	*/
				8416	IRExpr* widened
				8417	= mkexpr(math_WIDEN_LO_OR_HI_LANES(
				8418	isU, False/!fromUpperHalf/, size, mkexpr(src)));
				8419	if (bitQ == 1) {
				8420	widened
				8421	= binop(mkVecADD(size+1),
				8422	widened,
				8423	mkexpr(math_WIDEN_LO_OR_HI_LANES(
				8424	isU, True/fromUpperHalf/, size, mkexpr(src)))
				8425	);
				8426	}
				8427	/* Now fold. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8428	IRTemp tWi = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	8429	assign(tWi, widened);
				8430	IRTemp res = math_FOLDV(tWi, mkVecADD(size+1));
				8431	putQReg128(dd, mkexpr(res));
				8432	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				8433	const HChar ch = "bhsd"[size];
				8434	DIP("%s %s.%c, %s.%s\n", isU ? "uaddlv" : "saddlv",
				8435	nameQReg128(dd), ch, nameQReg128(nn), arr);
				8436	return True;
				8437	}
				8438
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	8439	UInt ix = 0;
				8440	/**/ if (opcode == BITS5(0,1,0,1,0)) { ix = bitU == 0 ? 1 : 2; }
				8441	else if (opcode == BITS5(1,1,0,1,0)) { ix = bitU == 0 ? 3 : 4; }
				8442	else if (opcode == BITS5(1,1,0,1,1) && bitU == 0) { ix = 5; }
				8443	/**/
				8444	if (ix != 0) {
				8445	/* -------- 0,xx,01010: SMAXV -------- (1) */
				8446	/* -------- 1,xx,01010: UMAXV -------- (2) */
				8447	/* -------- 0,xx,11010: SMINV -------- (3) */
				8448	/* -------- 1,xx,11010: UMINV -------- (4) */
				8449	/* -------- 0,xx,11011: ADDV -------- (5) */
				8450	vassert(ix >= 1 && ix <= 5);
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8451	if (size == X11) return False; // 1d,2d cases not allowed
				8452	if (size == X10 && bitQ == 0) return False; // 2s case not allowed
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8453	const IROp opMAXS[3]
				8454	= { Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4 };
				8455	const IROp opMAXU[3]
				8456	= { Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4 };
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	8457	const IROp opMINS[3]
				8458	= { Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4 };
				8459	const IROp opMINU[3]
				8460	= { Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4 };
				8461	const IROp opADD[3]
				8462	= { Iop_Add8x16, Iop_Add16x8, Iop_Add32x4 };
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8463	vassert(size < 3);
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	8464	IROp op = Iop_INVALID;
				8465	const HChar* nm = NULL;
				8466	switch (ix) {
				8467	case 1: op = opMAXS[size]; nm = "smaxv"; break;
				8468	case 2: op = opMAXU[size]; nm = "umaxv"; break;
				8469	case 3: op = opMINS[size]; nm = "sminv"; break;
				8470	case 4: op = opMINU[size]; nm = "uminv"; break;
				8471	case 5: op = opADD[size]; nm = "addv"; break;
				8472	default: vassert(0);
				8473	}
				8474	vassert(op != Iop_INVALID && nm != NULL);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8475	IRTemp tN1 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8476	assign(tN1, getQReg128(nn));
				8477	/* If Q == 0, we're just folding lanes in the lower half of
				8478	the value. In which case, copy the lower half of the
				8479	source into the upper half, so we can then treat it the
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	8480	same as the full width case. Except for the addition case,
				8481	in which we have to zero out the upper half. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8482	IRTemp tN2 = newTempV128();
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	8483	assign(tN2, bitQ == 0
				8484	? (ix == 5 ? unop(Iop_ZeroHI64ofV128, mkexpr(tN1))
				8485	: mk_CatEvenLanes64x2(tN1,tN1))
				8486	: mkexpr(tN1));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	8487	IRTemp res = math_FOLDV(tN2, op);
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8488	if (res == IRTemp_INVALID)
sewardj	5cb53e7	2015-02-08 12:08:56 +0000	[diff] [blame]	8489	return False; /* means math_FOLDV
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8490	doesn't handle this case yet */
				8491	putQReg128(dd, mkexpr(res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8492	const IRType tys[3] = { Ity_I8, Ity_I16, Ity_I32 };
				8493	IRType laneTy = tys[size];
				8494	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				8495	DIP("%s %s, %s.%s\n", nm,
				8496	nameQRegLO(dd, laneTy), nameQReg128(nn), arr);
				8497	return True;
				8498	}
				8499
sewardj	5cb53e7	2015-02-08 12:08:56 +0000	[diff] [blame]	8500	if ((size == X00 \|\| size == X10)
				8501	&& (opcode == BITS5(0,1,1,0,0) \|\| opcode == BITS5(0,1,1,1,1))) {
				8502	/* -------- 0,00,01100: FMAXMNV s_4s -------- */
				8503	/* -------- 0,10,01100: FMINMNV s_4s -------- */
				8504	/* -------- 1,00,01111: FMAXV s_4s -------- */
				8505	/* -------- 1,10,01111: FMINV s_4s -------- */
				8506	/* FMAXNM, FMINNM: FIXME -- KLUDGED */
				8507	if (bitQ == 0) return False; // Only 4s is allowed
				8508	Bool isMIN = (size & 2) == 2;
				8509	Bool isNM = opcode == BITS5(0,1,1,0,0);
				8510	IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(2);
				8511	IRTemp src = newTempV128();
				8512	assign(src, getQReg128(nn));
				8513	IRTemp res = math_FOLDV(src, opMXX);
				8514	putQReg128(dd, mkexpr(res));
				8515	DIP("%s%sv s%u, %u.4s\n",
				8516	isMIN ? "fmin" : "fmax", isNM ? "nm" : "", dd, nn);
				8517	return True;
				8518	}
				8519
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8520	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				8521	return False;
				8522	# undef INSN
				8523	}
				8524
				8525
				8526	static
				8527	Bool dis_AdvSIMD_copy(/MB_OUT/DisResult* dres, UInt insn)
				8528	{
				8529	/* 31 28 20 15 14 10 9 4
				8530	0 q op 01110000 imm5 0 imm4 1 n d
				8531	Decode fields: q,op,imm4
				8532	*/
				8533	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				8534	if (INSN(31,31) != 0
				8535	\|\| INSN(28,21) != BITS8(0,1,1,1,0,0,0,0)
				8536	\|\| INSN(15,15) != 0 \|\| INSN(10,10) != 1) {
				8537	return False;
				8538	}
				8539	UInt bitQ = INSN(30,30);
				8540	UInt bitOP = INSN(29,29);
				8541	UInt imm5 = INSN(20,16);
				8542	UInt imm4 = INSN(14,11);
				8543	UInt nn = INSN(9,5);
				8544	UInt dd = INSN(4,0);
				8545
				8546	/* -------- x,0,0000: DUP (element, vector) -------- */
				8547	/* 31 28 20 15 9 4
				8548	0q0 01110000 imm5 000001 n d DUP Vd.T, Vn.Ts[index]
				8549	*/
				8550	if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	8551	UInt laneNo = 0;
				8552	UInt laneSzLg2 = 0;
				8553	HChar laneCh = '?';
				8554	IRTemp res = handle_DUP_VEC_ELEM(&laneNo, &laneSzLg2, &laneCh,
				8555	getQReg128(nn), imm5);
				8556	if (res == IRTemp_INVALID)
				8557	return False;
				8558	if (bitQ == 0 && laneSzLg2 == X11)
				8559	return False; /* .1d case */
				8560	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				8561	const HChar* arT = nameArr_Q_SZ(bitQ, laneSzLg2);
				8562	DIP("dup %s.%s, %s.%c[%u]\n",
				8563	nameQReg128(dd), arT, nameQReg128(nn), laneCh, laneNo);
				8564	return True;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8565	}
				8566
				8567	/* -------- x,0,0001: DUP (general, vector) -------- */
				8568	/* 31 28 20 15 9 4
				8569	0q0 01110000 imm5 0 0001 1 n d DUP Vd.T, Rn
				8570	Q=0 writes 64, Q=1 writes 128
				8571	imm5: xxxx1 8B(q=0) or 16b(q=1), R=W
				8572	xxx10 4H(q=0) or 8H(q=1), R=W
				8573	xx100 2S(q=0) or 4S(q=1), R=W
				8574	x1000 Invalid(q=0) or 2D(q=1), R=X
				8575	x0000 Invalid(q=0) or Invalid(q=1)
				8576	Require op=0, imm4=0001
				8577	*/
				8578	if (bitOP == 0 && imm4 == BITS4(0,0,0,1)) {
				8579	Bool isQ = bitQ == 1;
				8580	IRTemp w0 = newTemp(Ity_I64);
				8581	const HChar* arT = "??";
				8582	IRType laneTy = Ity_INVALID;
				8583	if (imm5 & 1) {
				8584	arT = isQ ? "16b" : "8b";
				8585	laneTy = Ity_I8;
				8586	assign(w0, unop(Iop_8Uto64, unop(Iop_64to8, getIReg64orZR(nn))));
				8587	}
				8588	else if (imm5 & 2) {
				8589	arT = isQ ? "8h" : "4h";
				8590	laneTy = Ity_I16;
				8591	assign(w0, unop(Iop_16Uto64, unop(Iop_64to16, getIReg64orZR(nn))));
				8592	}
				8593	else if (imm5 & 4) {
				8594	arT = isQ ? "4s" : "2s";
				8595	laneTy = Ity_I32;
				8596	assign(w0, unop(Iop_32Uto64, unop(Iop_64to32, getIReg64orZR(nn))));
				8597	}
				8598	else if ((imm5 & 8) && isQ) {
				8599	arT = "2d";
				8600	laneTy = Ity_I64;
				8601	assign(w0, getIReg64orZR(nn));
				8602	}
				8603	else {
				8604	/* invalid; leave laneTy unchanged. */
				8605	}
				8606	/* */
				8607	if (laneTy != Ity_INVALID) {
				8608	IRTemp w1 = math_DUP_TO_64(w0, laneTy);
				8609	putQReg128(dd, binop(Iop_64HLtoV128,
				8610	isQ ? mkexpr(w1) : mkU64(0), mkexpr(w1)));
				8611	DIP("dup %s.%s, %s\n",
				8612	nameQReg128(dd), arT, nameIRegOrZR(laneTy == Ity_I64, nn));
				8613	return True;
				8614	}
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	8615	/* invalid */
				8616	return False;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8617	}
				8618
				8619	/* -------- 1,0,0011: INS (general) -------- */
				8620	/* 31 28 20 15 9 4
				8621	010 01110000 imm5 000111 n d INS Vd.Ts[ix], Rn
				8622	where Ts,ix = case imm5 of xxxx1 -> B, xxxx
				8623	xxx10 -> H, xxx
				8624	xx100 -> S, xx
				8625	x1000 -> D, x
				8626	*/
				8627	if (bitQ == 1 && bitOP == 0 && imm4 == BITS4(0,0,1,1)) {
				8628	HChar ts = '?';
				8629	UInt laneNo = 16;
				8630	IRExpr* src = NULL;
				8631	if (imm5 & 1) {
				8632	src = unop(Iop_64to8, getIReg64orZR(nn));
				8633	laneNo = (imm5 >> 1) & 15;
				8634	ts = 'b';
				8635	}
				8636	else if (imm5 & 2) {
				8637	src = unop(Iop_64to16, getIReg64orZR(nn));
				8638	laneNo = (imm5 >> 2) & 7;
				8639	ts = 'h';
				8640	}
				8641	else if (imm5 & 4) {
				8642	src = unop(Iop_64to32, getIReg64orZR(nn));
				8643	laneNo = (imm5 >> 3) & 3;
				8644	ts = 's';
				8645	}
				8646	else if (imm5 & 8) {
				8647	src = getIReg64orZR(nn);
				8648	laneNo = (imm5 >> 4) & 1;
				8649	ts = 'd';
				8650	}
				8651	/* */
				8652	if (src) {
				8653	vassert(laneNo < 16);
				8654	putQRegLane(dd, laneNo, src);
				8655	DIP("ins %s.%c[%u], %s\n",
				8656	nameQReg128(dd), ts, laneNo, nameIReg64orZR(nn));
				8657	return True;
				8658	}
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	8659	/* invalid */
				8660	return False;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8661	}
				8662
				8663	/* -------- x,0,0101: SMOV -------- */
				8664	/* -------- x,0,0111: UMOV -------- */
				8665	/* 31 28 20 15 9 4
				8666	0q0 01110 000 imm5 001111 n d UMOV Xd/Wd, Vn.Ts[index]
				8667	0q0 01110 000 imm5 001011 n d SMOV Xd/Wd, Vn.Ts[index]
				8668	dest is Xd when q==1, Wd when q==0
				8669	UMOV:
				8670	Ts,index,ops = case q:imm5 of
				8671	0:xxxx1 -> B, xxxx, 8Uto64
				8672	1:xxxx1 -> invalid
				8673	0:xxx10 -> H, xxx, 16Uto64
				8674	1:xxx10 -> invalid
				8675	0:xx100 -> S, xx, 32Uto64
				8676	1:xx100 -> invalid
				8677	1:x1000 -> D, x, copy64
				8678	other -> invalid
				8679	SMOV:
				8680	Ts,index,ops = case q:imm5 of
				8681	0:xxxx1 -> B, xxxx, (32Uto64 . 8Sto32)
				8682	1:xxxx1 -> B, xxxx, 8Sto64
				8683	0:xxx10 -> H, xxx, (32Uto64 . 16Sto32)
				8684	1:xxx10 -> H, xxx, 16Sto64
				8685	0:xx100 -> invalid
				8686	1:xx100 -> S, xx, 32Sto64
				8687	1:x1000 -> invalid
				8688	other -> invalid
				8689	*/
				8690	if (bitOP == 0 && (imm4 == BITS4(0,1,0,1) \|\| imm4 == BITS4(0,1,1,1))) {
				8691	Bool isU = (imm4 & 2) == 2;
				8692	const HChar* arTs = "??";
				8693	UInt laneNo = 16; /* invalid */
				8694	// Setting 'res' to non-NULL determines valid/invalid
				8695	IRExpr* res = NULL;
				8696	if (!bitQ && (imm5 & 1)) { // 0:xxxx1
				8697	laneNo = (imm5 >> 1) & 15;
				8698	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
				8699	res = isU ? unop(Iop_8Uto64, lane)
				8700	: unop(Iop_32Uto64, unop(Iop_8Sto32, lane));
				8701	arTs = "b";
				8702	}
				8703	else if (bitQ && (imm5 & 1)) { // 1:xxxx1
				8704	laneNo = (imm5 >> 1) & 15;
				8705	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I8);
				8706	res = isU ? NULL
				8707	: unop(Iop_8Sto64, lane);
				8708	arTs = "b";
				8709	}
				8710	else if (!bitQ && (imm5 & 2)) { // 0:xxx10
				8711	laneNo = (imm5 >> 2) & 7;
				8712	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
				8713	res = isU ? unop(Iop_16Uto64, lane)
				8714	: unop(Iop_32Uto64, unop(Iop_16Sto32, lane));
				8715	arTs = "h";
				8716	}
				8717	else if (bitQ && (imm5 & 2)) { // 1:xxx10
				8718	laneNo = (imm5 >> 2) & 7;
				8719	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I16);
				8720	res = isU ? NULL
				8721	: unop(Iop_16Sto64, lane);
				8722	arTs = "h";
				8723	}
				8724	else if (!bitQ && (imm5 & 4)) { // 0:xx100
				8725	laneNo = (imm5 >> 3) & 3;
				8726	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
				8727	res = isU ? unop(Iop_32Uto64, lane)
				8728	: NULL;
				8729	arTs = "s";
				8730	}
				8731	else if (bitQ && (imm5 & 4)) { // 1:xxx10
				8732	laneNo = (imm5 >> 3) & 3;
				8733	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I32);
				8734	res = isU ? NULL
				8735	: unop(Iop_32Sto64, lane);
				8736	arTs = "s";
				8737	}
				8738	else if (bitQ && (imm5 & 8)) { // 1:x1000
				8739	laneNo = (imm5 >> 4) & 1;
				8740	IRExpr* lane = getQRegLane(nn, laneNo, Ity_I64);
				8741	res = isU ? lane
				8742	: NULL;
				8743	arTs = "d";
				8744	}
				8745	/* */
				8746	if (res) {
				8747	vassert(laneNo < 16);
				8748	putIReg64orZR(dd, res);
				8749	DIP("%cmov %s, %s.%s[%u]\n", isU ? 'u' : 's',
				8750	nameIRegOrZR(bitQ == 1, dd),
				8751	nameQReg128(nn), arTs, laneNo);
				8752	return True;
				8753	}
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	8754	/* invalid */
				8755	return False;
				8756	}
				8757
				8758	/* -------- 1,1,xxxx: INS (element) -------- */
				8759	/* 31 28 20 14 9 4
				8760	011 01110000 imm5 0 imm4 n d INS Vd.Ts[ix1], Vn.Ts[ix2]
				8761	where Ts,ix1,ix2
				8762	= case imm5 of xxxx1 -> B, xxxx, imm4[3:0]
				8763	xxx10 -> H, xxx, imm4[3:1]
				8764	xx100 -> S, xx, imm4[3:2]
				8765	x1000 -> D, x, imm4[3:3]
				8766	*/
				8767	if (bitQ == 1 && bitOP == 1) {
				8768	HChar ts = '?';
				8769	IRType ity = Ity_INVALID;
				8770	UInt ix1 = 16;
				8771	UInt ix2 = 16;
				8772	if (imm5 & 1) {
				8773	ts = 'b';
				8774	ity = Ity_I8;
				8775	ix1 = (imm5 >> 1) & 15;
				8776	ix2 = (imm4 >> 0) & 15;
				8777	}
				8778	else if (imm5 & 2) {
				8779	ts = 'h';
				8780	ity = Ity_I16;
				8781	ix1 = (imm5 >> 2) & 7;
				8782	ix2 = (imm4 >> 1) & 7;
				8783	}
				8784	else if (imm5 & 4) {
				8785	ts = 's';
				8786	ity = Ity_I32;
				8787	ix1 = (imm5 >> 3) & 3;
				8788	ix2 = (imm4 >> 2) & 3;
				8789	}
				8790	else if (imm5 & 8) {
				8791	ts = 'd';
				8792	ity = Ity_I64;
				8793	ix1 = (imm5 >> 4) & 1;
				8794	ix2 = (imm4 >> 3) & 1;
				8795	}
				8796	/* */
				8797	if (ity != Ity_INVALID) {
				8798	vassert(ix1 < 16);
				8799	vassert(ix2 < 16);
				8800	putQRegLane(dd, ix1, getQRegLane(nn, ix2, ity));
				8801	DIP("ins %s.%c[%u], %s.%c[%u]\n",
				8802	nameQReg128(dd), ts, ix1, nameQReg128(nn), ts, ix2);
				8803	return True;
				8804	}
				8805	/* invalid */
				8806	return False;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8807	}
				8808
				8809	return False;
				8810	# undef INSN
				8811	}
				8812
				8813
				8814	static
				8815	Bool dis_AdvSIMD_modified_immediate(/MB_OUT/DisResult* dres, UInt insn)
				8816	{
				8817	/* 31 28 18 15 11 9 4
				8818	0q op 01111 00000 abc cmode 01 defgh d
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8819	Decode fields: q,op,cmode
				8820	Bit 11 is really "o2", but it is always zero.
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8821	*/
				8822	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				8823	if (INSN(31,31) != 0
				8824	\|\| INSN(28,19) != BITS10(0,1,1,1,1,0,0,0,0,0)
				8825	\|\| INSN(11,10) != BITS2(0,1)) {
				8826	return False;
				8827	}
				8828	UInt bitQ = INSN(30,30);
				8829	UInt bitOP = INSN(29,29);
				8830	UInt cmode = INSN(15,12);
				8831	UInt abcdefgh = (INSN(18,16) << 5) \| INSN(9,5);
				8832	UInt dd = INSN(4,0);
				8833
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8834	ULong imm64lo = 0;
				8835	UInt op_cmode = (bitOP << 4) \| cmode;
				8836	Bool ok = False;
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8837	Bool isORR = False;
				8838	Bool isBIC = False;
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	8839	Bool isMOV = False;
				8840	Bool isMVN = False;
				8841	Bool isFMOV = False;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8842	switch (op_cmode) {
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8843	/* -------- x,0,0000 MOVI 32-bit shifted imm -------- */
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8844	/* -------- x,0,0010 MOVI 32-bit shifted imm -------- */
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	8845	/* -------- x,0,0100 MOVI 32-bit shifted imm -------- */
				8846	/* -------- x,0,0110 MOVI 32-bit shifted imm -------- */
				8847	case BITS5(0,0,0,0,0): case BITS5(0,0,0,1,0):
				8848	case BITS5(0,0,1,0,0): case BITS5(0,0,1,1,0): // 0:0xx0
				8849	ok = True; isMOV = True; break;
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8850
				8851	/* -------- x,0,0001 ORR (vector, immediate) 32-bit -------- */
				8852	/* -------- x,0,0011 ORR (vector, immediate) 32-bit -------- */
				8853	/* -------- x,0,0101 ORR (vector, immediate) 32-bit -------- */
				8854	/* -------- x,0,0111 ORR (vector, immediate) 32-bit -------- */
				8855	case BITS5(0,0,0,0,1): case BITS5(0,0,0,1,1):
				8856	case BITS5(0,0,1,0,1): case BITS5(0,0,1,1,1): // 0:0xx1
				8857	ok = True; isORR = True; break;
				8858
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	8859	/* -------- x,0,1000 MOVI 16-bit shifted imm -------- */
				8860	/* -------- x,0,1010 MOVI 16-bit shifted imm -------- */
				8861	case BITS5(0,1,0,0,0): case BITS5(0,1,0,1,0): // 0:10x0
				8862	ok = True; isMOV = True; break;
				8863
				8864	/* -------- x,0,1001 ORR (vector, immediate) 16-bit -------- */
				8865	/* -------- x,0,1011 ORR (vector, immediate) 16-bit -------- */
				8866	case BITS5(0,1,0,0,1): case BITS5(0,1,0,1,1): // 0:10x1
				8867	ok = True; isORR = True; break;
				8868
				8869	/* -------- x,0,1100 MOVI 32-bit shifting ones -------- */
				8870	/* -------- x,0,1101 MOVI 32-bit shifting ones -------- */
				8871	case BITS5(0,1,1,0,0): case BITS5(0,1,1,0,1): // 0:110x
				8872	ok = True; isMOV = True; break;
				8873
				8874	/* -------- x,0,1110 MOVI 8-bit -------- */
				8875	case BITS5(0,1,1,1,0):
				8876	ok = True; isMOV = True; break;
				8877
sewardj	6a785df	2015-02-09 09:07:47 +0000	[diff] [blame]	8878	/* -------- x,0,1111 FMOV (vector, immediate, F32) -------- */
				8879	case BITS5(0,1,1,1,1): // 0:1111
				8880	ok = True; isFMOV = True; break;
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	8881
				8882	/* -------- x,1,0000 MVNI 32-bit shifted imm -------- */
				8883	/* -------- x,1,0010 MVNI 32-bit shifted imm -------- */
				8884	/* -------- x,1,0100 MVNI 32-bit shifted imm -------- */
				8885	/* -------- x,1,0110 MVNI 32-bit shifted imm -------- */
				8886	case BITS5(1,0,0,0,0): case BITS5(1,0,0,1,0):
				8887	case BITS5(1,0,1,0,0): case BITS5(1,0,1,1,0): // 1:0xx0
				8888	ok = True; isMVN = True; break;
				8889
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8890	/* -------- x,1,0001 BIC (vector, immediate) 32-bit -------- */
				8891	/* -------- x,1,0011 BIC (vector, immediate) 32-bit -------- */
				8892	/* -------- x,1,0101 BIC (vector, immediate) 32-bit -------- */
				8893	/* -------- x,1,0111 BIC (vector, immediate) 32-bit -------- */
				8894	case BITS5(1,0,0,0,1): case BITS5(1,0,0,1,1):
				8895	case BITS5(1,0,1,0,1): case BITS5(1,0,1,1,1): // 1:0xx1
				8896	ok = True; isBIC = True; break;
				8897
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	8898	/* -------- x,1,1000 MVNI 16-bit shifted imm -------- */
				8899	/* -------- x,1,1010 MVNI 16-bit shifted imm -------- */
				8900	case BITS5(1,1,0,0,0): case BITS5(1,1,0,1,0): // 1:10x0
				8901	ok = True; isMVN = True; break;
				8902
				8903	/* -------- x,1,1001 BIC (vector, immediate) 16-bit -------- */
				8904	/* -------- x,1,1011 BIC (vector, immediate) 16-bit -------- */
				8905	case BITS5(1,1,0,0,1): case BITS5(1,1,0,1,1): // 1:10x1
				8906	ok = True; isBIC = True; break;
				8907
				8908	/* -------- x,1,1100 MVNI 32-bit shifting ones -------- */
				8909	/* -------- x,1,1101 MVNI 32-bit shifting ones -------- */
				8910	case BITS5(1,1,1,0,0): case BITS5(1,1,1,0,1): // 1:110x
				8911	ok = True; isMVN = True; break;
				8912
				8913	/* -------- 0,1,1110 MOVI 64-bit scalar -------- */
				8914	/* -------- 1,1,1110 MOVI 64-bit vector -------- */
				8915	case BITS5(1,1,1,1,0):
				8916	ok = True; isMOV = True; break;
				8917
sewardj	6a785df	2015-02-09 09:07:47 +0000	[diff] [blame]	8918	/* -------- 1,1,1111 FMOV (vector, immediate, F64) -------- */
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	8919	case BITS5(1,1,1,1,1): // 1:1111
				8920	ok = bitQ == 1; isFMOV = True; break;
				8921
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8922	default:
				8923	break;
				8924	}
				8925	if (ok) {
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	8926	vassert(1 == (isMOV ? 1 : 0) + (isMVN ? 1 : 0)
				8927	+ (isORR ? 1 : 0) + (isBIC ? 1 : 0) + (isFMOV ? 1 : 0));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8928	ok = AdvSIMDExpandImm(&imm64lo, bitOP, cmode, abcdefgh);
				8929	}
				8930	if (ok) {
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8931	if (isORR \|\| isBIC) {
				8932	ULong inv
				8933	= isORR ? 0ULL : ~0ULL;
				8934	IRExpr* immV128
				8935	= binop(Iop_64HLtoV128, mkU64(inv ^ imm64lo), mkU64(inv ^ imm64lo));
				8936	IRExpr* res
				8937	= binop(isORR ? Iop_OrV128 : Iop_AndV128, getQReg128(dd), immV128);
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8938	const HChar* nm = isORR ? "orr" : "bic";
				8939	if (bitQ == 0) {
				8940	putQReg128(dd, unop(Iop_ZeroHI64ofV128, res));
				8941	DIP("%s %s.1d, %016llx\n", nm, nameQReg128(dd), imm64lo);
				8942	} else {
				8943	putQReg128(dd, res);
				8944	DIP("%s %s.2d, #0x%016llx'%016llx\n", nm,
				8945	nameQReg128(dd), imm64lo, imm64lo);
				8946	}
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	8947	}
				8948	else if (isMOV \|\| isMVN \|\| isFMOV) {
				8949	if (isMVN) imm64lo = ~imm64lo;
				8950	ULong imm64hi = bitQ == 0 ? 0 : imm64lo;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	8951	IRExpr* immV128 = binop(Iop_64HLtoV128, mkU64(imm64hi),
				8952	mkU64(imm64lo));
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	8953	putQReg128(dd, immV128);
				8954	DIP("mov %s, #0x%016llx'%016llx\n", nameQReg128(dd), imm64hi, imm64lo);
				8955	}
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8956	return True;
				8957	}
				8958	/* else fall through */
				8959
				8960	return False;
				8961	# undef INSN
				8962	}
				8963
				8964
				8965	static
				8966	Bool dis_AdvSIMD_scalar_copy(/MB_OUT/DisResult* dres, UInt insn)
				8967	{
sewardj	ab33a7a	2014-06-19 22:20:47 +0000	[diff] [blame]	8968	/* 31 28 20 15 14 10 9 4
				8969	01 op 11110000 imm5 0 imm4 1 n d
				8970	Decode fields: op,imm4
				8971	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	8972	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	ab33a7a	2014-06-19 22:20:47 +0000	[diff] [blame]	8973	if (INSN(31,30) != BITS2(0,1)
				8974	\|\| INSN(28,21) != BITS8(1,1,1,1,0,0,0,0)
				8975	\|\| INSN(15,15) != 0 \|\| INSN(10,10) != 1) {
				8976	return False;
				8977	}
				8978	UInt bitOP = INSN(29,29);
				8979	UInt imm5 = INSN(20,16);
				8980	UInt imm4 = INSN(14,11);
				8981	UInt nn = INSN(9,5);
				8982	UInt dd = INSN(4,0);
				8983
				8984	if (bitOP == 0 && imm4 == BITS4(0,0,0,0)) {
				8985	/* -------- 0,0000 DUP (element, scalar) -------- */
				8986	IRTemp w0 = newTemp(Ity_I64);
				8987	const HChar* arTs = "??";
				8988	IRType laneTy = Ity_INVALID;
				8989	UInt laneNo = 16; /* invalid */
				8990	if (imm5 & 1) {
				8991	arTs = "b";
				8992	laneNo = (imm5 >> 1) & 15;
				8993	laneTy = Ity_I8;
				8994	assign(w0, unop(Iop_8Uto64, getQRegLane(nn, laneNo, laneTy)));
				8995	}
				8996	else if (imm5 & 2) {
				8997	arTs = "h";
				8998	laneNo = (imm5 >> 2) & 7;
				8999	laneTy = Ity_I16;
				9000	assign(w0, unop(Iop_16Uto64, getQRegLane(nn, laneNo, laneTy)));
				9001	}
				9002	else if (imm5 & 4) {
				9003	arTs = "s";
				9004	laneNo = (imm5 >> 3) & 3;
				9005	laneTy = Ity_I32;
				9006	assign(w0, unop(Iop_32Uto64, getQRegLane(nn, laneNo, laneTy)));
				9007	}
				9008	else if (imm5 & 8) {
				9009	arTs = "d";
				9010	laneNo = (imm5 >> 4) & 1;
				9011	laneTy = Ity_I64;
				9012	assign(w0, getQRegLane(nn, laneNo, laneTy));
				9013	}
				9014	else {
				9015	/* invalid; leave laneTy unchanged. */
				9016	}
				9017	/* */
				9018	if (laneTy != Ity_INVALID) {
				9019	vassert(laneNo < 16);
				9020	putQReg128(dd, binop(Iop_64HLtoV128, mkU64(0), mkexpr(w0)));
				9021	DIP("dup %s, %s.%s[%u]\n",
				9022	nameQRegLO(dd, laneTy), nameQReg128(nn), arTs, laneNo);
				9023	return True;
				9024	}
				9025	/* else fall through */
				9026	}
				9027
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9028	return False;
				9029	# undef INSN
				9030	}
				9031
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	9032
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9033	static
				9034	Bool dis_AdvSIMD_scalar_pairwise(/MB_OUT/DisResult* dres, UInt insn)
				9035	{
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	9036	/* 31 28 23 21 16 11 9 4
				9037	01 u 11110 sz 11000 opcode 10 n d
				9038	Decode fields: u,sz,opcode
				9039	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9040	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	9041	if (INSN(31,30) != BITS2(0,1)
				9042	\|\| INSN(28,24) != BITS5(1,1,1,1,0)
				9043	\|\| INSN(21,17) != BITS5(1,1,0,0,0)
				9044	\|\| INSN(11,10) != BITS2(1,0)) {
				9045	return False;
				9046	}
				9047	UInt bitU = INSN(29,29);
				9048	UInt sz = INSN(23,22);
				9049	UInt opcode = INSN(16,12);
				9050	UInt nn = INSN(9,5);
				9051	UInt dd = INSN(4,0);
				9052
				9053	if (bitU == 0 && sz == X11 && opcode == BITS5(1,1,0,1,1)) {
				9054	/* -------- 0,11,11011 ADDP d_2d -------- */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9055	IRTemp xy = newTempV128();
				9056	IRTemp xx = newTempV128();
sewardj	b9aff1e	2014-06-15 21:55:33 +0000	[diff] [blame]	9057	assign(xy, getQReg128(nn));
				9058	assign(xx, binop(Iop_InterleaveHI64x2, mkexpr(xy), mkexpr(xy)));
				9059	putQReg128(dd, unop(Iop_ZeroHI64ofV128,
				9060	binop(Iop_Add64x2, mkexpr(xy), mkexpr(xx))));
				9061	DIP("addp d%u, %s.2d\n", dd, nameQReg128(nn));
				9062	return True;
				9063	}
				9064
sewardj	76927e6	2014-11-17 11:21:21 +0000	[diff] [blame]	9065	if (bitU == 1 && sz <= X01 && opcode == BITS5(0,1,1,0,1)) {
				9066	/* -------- 1,00,01101 ADDP s_2s -------- */
				9067	/* -------- 1,01,01101 ADDP d_2d -------- */
				9068	Bool isD = sz == X01;
				9069	IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
				9070	IROp opADD = mkVecADDF(isD ? 3 : 2);
				9071	IRTemp src = newTempV128();
				9072	IRTemp argL = newTempV128();
				9073	IRTemp argR = newTempV128();
				9074	assign(src, getQReg128(nn));
				9075	assign(argL, unop(opZHI, mkexpr(src)));
				9076	assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
				9077	mkU8(isD ? 8 : 4))));
				9078	putQReg128(dd, unop(opZHI,
				9079	triop(opADD, mkexpr(mk_get_IR_rounding_mode()),
				9080	mkexpr(argL), mkexpr(argR))));
				9081	DIP(isD ? "faddp d%u, v%u.2d\n" : "faddp s%u, v%u.2s\n", dd, nn);
				9082	return True;
				9083	}
				9084
sewardj	5cb53e7	2015-02-08 12:08:56 +0000	[diff] [blame]	9085	if (bitU == 1
				9086	&& (opcode == BITS5(0,1,1,0,0) \|\| opcode == BITS5(0,1,1,1,1))) {
				9087	/* -------- 1,0x,01100 FMAXNMP d_2d, s_2s -------- */
				9088	/* -------- 1,1x,01100 FMINNMP d_2d, s_2s -------- */
				9089	/* -------- 1,0x,01111 FMAXP d_2d, s_2s -------- */
				9090	/* -------- 1,1x,01111 FMINP d_2d, s_2s -------- */
				9091	/* FMAXNM, FMINNM: FIXME -- KLUDGED */
				9092	Bool isD = (sz & 1) == 1;
				9093	Bool isMIN = (sz & 2) == 2;
				9094	Bool isNM = opcode == BITS5(0,1,1,0,0);
				9095	IROp opZHI = mkVecZEROHIxxOFV128(isD ? 3 : 2);
				9096	IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
				9097	IRTemp src = newTempV128();
				9098	IRTemp argL = newTempV128();
				9099	IRTemp argR = newTempV128();
				9100	assign(src, getQReg128(nn));
				9101	assign(argL, unop(opZHI, mkexpr(src)));
				9102	assign(argR, unop(opZHI, triop(Iop_SliceV128, mkexpr(src), mkexpr(src),
				9103	mkU8(isD ? 8 : 4))));
				9104	putQReg128(dd, unop(opZHI,
				9105	binop(opMXX, mkexpr(argL), mkexpr(argR))));
				9106	HChar c = isD ? 'd' : 's';
				9107	DIP("%s%sp %c%u, v%u.2%c\n",
				9108	isMIN ? "fmin" : "fmax", isNM ? "nm" : "", c, dd, nn, c);
				9109	return True;
				9110	}
				9111
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9112	return False;
				9113	# undef INSN
				9114	}
				9115
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	9116
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9117	static
				9118	Bool dis_AdvSIMD_scalar_shift_by_imm(/MB_OUT/DisResult* dres, UInt insn)
				9119	{
				9120	/* 31 28 22 18 15 10 9 4
				9121	01 u 111110 immh immb opcode 1 n d
				9122	Decode fields: u,immh,opcode
				9123	*/
				9124	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				9125	if (INSN(31,30) != BITS2(0,1)
				9126	\|\| INSN(28,23) != BITS6(1,1,1,1,1,0) \|\| INSN(10,10) != 1) {
				9127	return False;
				9128	}
				9129	UInt bitU = INSN(29,29);
				9130	UInt immh = INSN(22,19);
				9131	UInt immb = INSN(18,16);
				9132	UInt opcode = INSN(15,11);
				9133	UInt nn = INSN(9,5);
				9134	UInt dd = INSN(4,0);
				9135	UInt immhb = (immh << 3) \| immb;
				9136
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	9137	if ((immh & 8) == 8
				9138	&& (opcode == BITS5(0,0,0,0,0) \|\| opcode == BITS5(0,0,0,1,0))) {
				9139	/* -------- 0,1xxx,00000 SSHR d_d_#imm -------- */
				9140	/* -------- 1,1xxx,00000 USHR d_d_#imm -------- */
				9141	/* -------- 0,1xxx,00010 SSRA d_d_#imm -------- */
				9142	/* -------- 1,1xxx,00010 USRA d_d_#imm -------- */
				9143	Bool isU = bitU == 1;
				9144	Bool isAcc = opcode == BITS5(0,0,0,1,0);
				9145	UInt sh = 128 - immhb;
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	9146	vassert(sh >= 1 && sh <= 64);
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	9147	IROp op = isU ? Iop_ShrN64x2 : Iop_SarN64x2;
				9148	IRExpr* src = getQReg128(nn);
				9149	IRTemp shf = newTempV128();
				9150	IRTemp res = newTempV128();
				9151	if (sh == 64 && isU) {
				9152	assign(shf, mkV128(0x0000));
				9153	} else {
				9154	UInt nudge = 0;
				9155	if (sh == 64) {
				9156	vassert(!isU);
				9157	nudge = 1;
				9158	}
				9159	assign(shf, binop(op, src, mkU8(sh - nudge)));
				9160	}
				9161	assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
				9162	: mkexpr(shf));
				9163	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				9164	const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
				9165	: (isU ? "ushr" : "sshr");
				9166	DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
				9167	return True;
				9168	}
				9169
				9170	if ((immh & 8) == 8
				9171	&& (opcode == BITS5(0,0,1,0,0) \|\| opcode == BITS5(0,0,1,1,0))) {
				9172	/* -------- 0,1xxx,00100 SRSHR d_d_#imm -------- */
				9173	/* -------- 1,1xxx,00100 URSHR d_d_#imm -------- */
				9174	/* -------- 0,1xxx,00110 SRSRA d_d_#imm -------- */
				9175	/* -------- 1,1xxx,00110 URSRA d_d_#imm -------- */
				9176	Bool isU = bitU == 1;
				9177	Bool isAcc = opcode == BITS5(0,0,1,1,0);
				9178	UInt sh = 128 - immhb;
				9179	vassert(sh >= 1 && sh <= 64);
				9180	IROp op = isU ? Iop_Rsh64Ux2 : Iop_Rsh64Sx2;
				9181	vassert(sh >= 1 && sh <= 64);
				9182	IRExpr* src = getQReg128(nn);
				9183	IRTemp imm8 = newTemp(Ity_I8);
				9184	assign(imm8, mkU8((UChar)(-sh)));
				9185	IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
				9186	IRTemp shf = newTempV128();
				9187	IRTemp res = newTempV128();
				9188	assign(shf, binop(op, src, amt));
				9189	assign(res, isAcc ? binop(Iop_Add64x2, getQReg128(dd), mkexpr(shf))
				9190	: mkexpr(shf));
				9191	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				9192	const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
				9193	: (isU ? "urshr" : "srshr");
				9194	DIP("%s d%u, d%u, #%u\n", nm, dd, nn, sh);
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	9195	return True;
				9196	}
				9197
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9198	if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,0,0)) {
				9199	/* -------- 1,1xxx,01000 SRI d_d_#imm -------- */
				9200	UInt sh = 128 - immhb;
				9201	vassert(sh >= 1 && sh <= 64);
				9202	if (sh == 64) {
				9203	putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(dd)));
				9204	} else {
				9205	/* sh is in range 1 .. 63 */
				9206	ULong nmask = (ULong)(((Long)0x8000000000000000ULL) >> (sh-1));
				9207	IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
				9208	IRTemp res = newTempV128();
				9209	assign(res, binop(Iop_OrV128,
				9210	binop(Iop_AndV128, getQReg128(dd), nmaskV),
				9211	binop(Iop_ShrN64x2, getQReg128(nn), mkU8(sh))));
				9212	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				9213	}
				9214	DIP("sri d%u, d%u, #%u\n", dd, nn, sh);
				9215	return True;
				9216	}
				9217
sewardj	acc2964	2014-08-15 05:35:35 +0000	[diff] [blame]	9218	if (bitU == 0 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
				9219	/* -------- 0,1xxx,01010 SHL d_d_#imm -------- */
				9220	UInt sh = immhb - 64;
				9221	vassert(sh >= 0 && sh < 64);
				9222	putQReg128(dd,
				9223	unop(Iop_ZeroHI64ofV128,
				9224	sh == 0 ? getQReg128(nn)
				9225	: binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
				9226	DIP("shl d%u, d%u, #%u\n", dd, nn, sh);
				9227	return True;
				9228	}
				9229
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9230	if (bitU == 1 && (immh & 8) == 8 && opcode == BITS5(0,1,0,1,0)) {
				9231	/* -------- 1,1xxx,01010 SLI d_d_#imm -------- */
				9232	UInt sh = immhb - 64;
				9233	vassert(sh >= 0 && sh < 64);
				9234	if (sh == 0) {
				9235	putQReg128(dd, unop(Iop_ZeroHI64ofV128, getQReg128(nn)));
				9236	} else {
				9237	/* sh is in range 1 .. 63 */
				9238	ULong nmask = (1ULL << sh) - 1;
				9239	IRExpr* nmaskV = binop(Iop_64HLtoV128, mkU64(nmask), mkU64(nmask));
				9240	IRTemp res = newTempV128();
				9241	assign(res, binop(Iop_OrV128,
				9242	binop(Iop_AndV128, getQReg128(dd), nmaskV),
				9243	binop(Iop_ShlN64x2, getQReg128(nn), mkU8(sh))));
				9244	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				9245	}
				9246	DIP("sli d%u, d%u, #%u\n", dd, nn, sh);
				9247	return True;
				9248	}
				9249
sewardj	acc2964	2014-08-15 05:35:35 +0000	[diff] [blame]	9250	if (opcode == BITS5(0,1,1,1,0)
				9251	\|\| (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
				9252	/* -------- 0,01110 SQSHL #imm -------- */
				9253	/* -------- 1,01110 UQSHL #imm -------- */
				9254	/* -------- 1,01100 SQSHLU #imm -------- */
				9255	UInt size = 0;
				9256	UInt shift = 0;
				9257	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
				9258	if (!ok) return False;
				9259	vassert(size >= 0 && size <= 3);
				9260	/* The shift encoding has opposite sign for the leftwards case.
				9261	Adjust shift to compensate. */
				9262	UInt lanebits = 8 << size;
				9263	shift = lanebits - shift;
				9264	vassert(shift >= 0 && shift < lanebits);
				9265	const HChar* nm = NULL;
				9266	/**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
				9267	else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
				9268	else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
				9269	else vassert(0);
				9270	IRTemp qDiff1 = IRTemp_INVALID;
				9271	IRTemp qDiff2 = IRTemp_INVALID;
				9272	IRTemp res = IRTemp_INVALID;
				9273	IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn));
				9274	/* This relies on the fact that the zeroed out lanes generate zeroed
				9275	result lanes and don't saturate, so there's no point in trimming
				9276	the resulting res, qDiff1 or qDiff2 values. */
				9277	math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
				9278	putQReg128(dd, mkexpr(res));
				9279	updateQCFLAGwithDifference(qDiff1, qDiff2);
				9280	const HChar arr = "bhsd"[size];
				9281	DIP("%s %c%u, %c%u, #%u\n", nm, arr, dd, arr, nn, shift);
				9282	return True;
				9283	}
				9284
sewardj	e741d16	2014-08-13 13:10:47 +0000	[diff] [blame]	9285	if (opcode == BITS5(1,0,0,1,0) \|\| opcode == BITS5(1,0,0,1,1)
				9286	\|\| (bitU == 1
				9287	&& (opcode == BITS5(1,0,0,0,0) \|\| opcode == BITS5(1,0,0,0,1)))) {
				9288	/* -------- 0,10010 SQSHRN #imm -------- */
				9289	/* -------- 1,10010 UQSHRN #imm -------- */
				9290	/* -------- 0,10011 SQRSHRN #imm -------- */
				9291	/* -------- 1,10011 UQRSHRN #imm -------- */
				9292	/* -------- 1,10000 SQSHRUN #imm -------- */
				9293	/* -------- 1,10001 SQRSHRUN #imm -------- */
				9294	UInt size = 0;
				9295	UInt shift = 0;
				9296	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
				9297	if (!ok \|\| size == X11) return False;
				9298	vassert(size >= X00 && size <= X10);
				9299	vassert(shift >= 1 && shift <= (8 << size));
				9300	const HChar* nm = "??";
				9301	IROp op = Iop_INVALID;
				9302	/* Decide on the name and the operation. */
				9303	/**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
				9304	nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
				9305	}
				9306	else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
				9307	nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
				9308	}
				9309	else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
				9310	nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
				9311	}
				9312	else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
				9313	nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
				9314	}
				9315	else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
				9316	nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
				9317	}
				9318	else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
				9319	nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
				9320	}
				9321	else vassert(0);
				9322	/* Compute the result (Q, shifted value) pair. */
				9323	IRTemp src128 = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size+1, getQReg128(nn));
				9324	IRTemp pair = newTempV128();
				9325	assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
				9326	/* Update the result reg */
				9327	IRTemp res64in128 = newTempV128();
				9328	assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
				9329	putQReg128(dd, mkexpr(res64in128));
				9330	/* Update the Q flag. */
				9331	IRTemp q64q64 = newTempV128();
				9332	assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
				9333	IRTemp z128 = newTempV128();
				9334	assign(z128, mkV128(0x0000));
				9335	updateQCFLAGwithDifference(q64q64, z128);
				9336	/* */
				9337	const HChar arrNarrow = "bhsd"[size];
				9338	const HChar arrWide = "bhsd"[size+1];
				9339	DIP("%s %c%u, %c%u, #%u\n", nm, arrNarrow, dd, arrWide, nn, shift);
				9340	return True;
				9341	}
				9342
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9343	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				9344	return False;
				9345	# undef INSN
				9346	}
				9347
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	9348
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9349	static
				9350	Bool dis_AdvSIMD_scalar_three_different(/MB_OUT/DisResult* dres, UInt insn)
				9351	{
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	9352	/* 31 29 28 23 21 20 15 11 9 4
				9353	01 U 11110 size 1 m opcode 00 n d
				9354	Decode fields: u,opcode
				9355	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9356	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	9357	if (INSN(31,30) != BITS2(0,1)
				9358	\|\| INSN(28,24) != BITS5(1,1,1,1,0)
				9359	\|\| INSN(21,21) != 1
				9360	\|\| INSN(11,10) != BITS2(0,0)) {
				9361	return False;
				9362	}
				9363	UInt bitU = INSN(29,29);
				9364	UInt size = INSN(23,22);
				9365	UInt mm = INSN(20,16);
				9366	UInt opcode = INSN(15,12);
				9367	UInt nn = INSN(9,5);
				9368	UInt dd = INSN(4,0);
				9369	vassert(size < 4);
				9370
				9371	if (bitU == 0
				9372	&& (opcode == BITS4(1,1,0,1)
				9373	\|\| opcode == BITS4(1,0,0,1) \|\| opcode == BITS4(1,0,1,1))) {
				9374	/* -------- 0,1101 SQDMULL -------- */ // 0 (ks)
				9375	/* -------- 0,1001 SQDMLAL -------- */ // 1
				9376	/* -------- 0,1011 SQDMLSL -------- */ // 2
				9377	/* Widens, and size refers to the narrowed lanes. */
				9378	UInt ks = 3;
				9379	switch (opcode) {
				9380	case BITS4(1,1,0,1): ks = 0; break;
				9381	case BITS4(1,0,0,1): ks = 1; break;
				9382	case BITS4(1,0,1,1): ks = 2; break;
				9383	default: vassert(0);
				9384	}
				9385	vassert(ks >= 0 && ks <= 2);
				9386	if (size == X00 \|\| size == X11) return False;
				9387	vassert(size <= 2);
				9388	IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
				9389	vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
				9390	newTempsV128_3(&vecN, &vecM, &vecD);
				9391	assign(vecN, getQReg128(nn));
				9392	assign(vecM, getQReg128(mm));
				9393	assign(vecD, getQReg128(dd));
				9394	math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
				9395	False/!is2/, size, "mas"[ks],
				9396	vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
				9397	IROp opZHI = mkVecZEROHIxxOFV128(size+1);
				9398	putQReg128(dd, unop(opZHI, mkexpr(res)));
				9399	vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
				9400	updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
				9401	if (sat2q != IRTemp_INVALID \|\| sat2n != IRTemp_INVALID) {
				9402	updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
				9403	}
				9404	const HChar* nm = ks == 0 ? "sqdmull"
				9405	: (ks == 1 ? "sqdmlal" : "sqdmlsl");
				9406	const HChar arrNarrow = "bhsd"[size];
				9407	const HChar arrWide = "bhsd"[size+1];
				9408	DIP("%s %c%d, %c%d, %c%d\n",
				9409	nm, arrWide, dd, arrNarrow, nn, arrNarrow, mm);
				9410	return True;
				9411	}
				9412
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9413	return False;
				9414	# undef INSN
				9415	}
				9416
				9417
				9418	static
				9419	Bool dis_AdvSIMD_scalar_three_same(/MB_OUT/DisResult* dres, UInt insn)
				9420	{
				9421	/* 31 29 28 23 21 20 15 10 9 4
				9422	01 U 11110 size 1 m opcode 1 n d
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	9423	Decode fields: u,size,opcode
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9424	*/
				9425	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				9426	if (INSN(31,30) != BITS2(0,1)
				9427	\|\| INSN(28,24) != BITS5(1,1,1,1,0)
				9428	\|\| INSN(21,21) != 1
				9429	\|\| INSN(10,10) != 1) {
				9430	return False;
				9431	}
				9432	UInt bitU = INSN(29,29);
				9433	UInt size = INSN(23,22);
				9434	UInt mm = INSN(20,16);
				9435	UInt opcode = INSN(15,11);
				9436	UInt nn = INSN(9,5);
				9437	UInt dd = INSN(4,0);
				9438	vassert(size < 4);
				9439
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	9440	if (opcode == BITS5(0,0,0,0,1) \|\| opcode == BITS5(0,0,1,0,1)) {
				9441	/* -------- 0,xx,00001 SQADD std4_std4_std4 -------- */
				9442	/* -------- 1,xx,00001 UQADD std4_std4_std4 -------- */
				9443	/* -------- 0,xx,00101 SQSUB std4_std4_std4 -------- */
				9444	/* -------- 1,xx,00101 UQSUB std4_std4_std4 -------- */
				9445	Bool isADD = opcode == BITS5(0,0,0,0,1);
				9446	Bool isU = bitU == 1;
				9447	IROp qop = Iop_INVALID;
				9448	IROp nop = Iop_INVALID;
				9449	if (isADD) {
				9450	qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
				9451	nop = mkVecADD(size);
				9452	} else {
				9453	qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
				9454	nop = mkVecSUB(size);
				9455	}
				9456	IRTemp argL = newTempV128();
				9457	IRTemp argR = newTempV128();
				9458	IRTemp qres = newTempV128();
				9459	IRTemp nres = newTempV128();
				9460	assign(argL, getQReg128(nn));
				9461	assign(argR, getQReg128(mm));
				9462	assign(qres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	9463	size, binop(qop, mkexpr(argL), mkexpr(argR)))));
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	9464	assign(nres, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	9465	size, binop(nop, mkexpr(argL), mkexpr(argR)))));
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	9466	putQReg128(dd, mkexpr(qres));
				9467	updateQCFLAGwithDifference(qres, nres);
				9468	const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
				9469	: (isU ? "uqsub" : "sqsub");
				9470	const HChar arr = "bhsd"[size];
sewardj	1297218	2014-08-04 08:09:47 +0000	[diff] [blame]	9471	DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	9472	return True;
				9473	}
				9474
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	9475	if (size == X11 && opcode == BITS5(0,0,1,1,0)) {
				9476	/* -------- 0,11,00110 CMGT d_d_d -------- */ // >s
				9477	/* -------- 1,11,00110 CMHI d_d_d -------- */ // >u
				9478	Bool isGT = bitU == 0;
				9479	IRExpr* argL = getQReg128(nn);
				9480	IRExpr* argR = getQReg128(mm);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9481	IRTemp res = newTempV128();
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	9482	assign(res,
				9483	isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
				9484	: binop(Iop_CmpGT64Ux2, argL, argR));
				9485	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				9486	DIP("%s %s, %s, %s\n",isGT ? "cmgt" : "cmhi",
				9487	nameQRegLO(dd, Ity_I64),
				9488	nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
				9489	return True;
				9490	}
				9491
				9492	if (size == X11 && opcode == BITS5(0,0,1,1,1)) {
				9493	/* -------- 0,11,00111 CMGE d_d_d -------- */ // >=s
				9494	/* -------- 1,11,00111 CMHS d_d_d -------- */ // >=u
				9495	Bool isGE = bitU == 0;
				9496	IRExpr* argL = getQReg128(nn);
				9497	IRExpr* argR = getQReg128(mm);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9498	IRTemp res = newTempV128();
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	9499	assign(res,
				9500	isGE ? unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL))
				9501	: unop(Iop_NotV128, binop(Iop_CmpGT64Ux2, argR, argL)));
				9502	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				9503	DIP("%s %s, %s, %s\n", isGE ? "cmge" : "cmhs",
				9504	nameQRegLO(dd, Ity_I64),
				9505	nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
				9506	return True;
				9507	}
				9508
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	9509	if (size == X11 && (opcode == BITS5(0,1,0,0,0)
				9510	\|\| opcode == BITS5(0,1,0,1,0))) {
				9511	/* -------- 0,xx,01000 SSHL d_d_d -------- */
				9512	/* -------- 0,xx,01010 SRSHL d_d_d -------- */
				9513	/* -------- 1,xx,01000 USHL d_d_d -------- */
				9514	/* -------- 1,xx,01010 URSHL d_d_d -------- */
				9515	Bool isU = bitU == 1;
				9516	Bool isR = opcode == BITS5(0,1,0,1,0);
				9517	IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
				9518	: (isU ? mkVecSHU(size) : mkVecSHS(size));
				9519	IRTemp res = newTempV128();
				9520	assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
				9521	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				9522	const HChar* nm = isR ? (isU ? "urshl" : "srshl")
				9523	: (isU ? "ushl" : "sshl");
				9524	DIP("%s %s, %s, %s\n", nm,
				9525	nameQRegLO(dd, Ity_I64),
				9526	nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
				9527	return True;
				9528	}
				9529
sewardj	1297218	2014-08-04 08:09:47 +0000	[diff] [blame]	9530	if (opcode == BITS5(0,1,0,0,1) \|\| opcode == BITS5(0,1,0,1,1)) {
				9531	/* -------- 0,xx,01001 SQSHL std4_std4_std4 -------- */
				9532	/* -------- 0,xx,01011 SQRSHL std4_std4_std4 -------- */
				9533	/* -------- 1,xx,01001 UQSHL std4_std4_std4 -------- */
				9534	/* -------- 1,xx,01011 UQRSHL std4_std4_std4 -------- */
				9535	Bool isU = bitU == 1;
				9536	Bool isR = opcode == BITS5(0,1,0,1,1);
				9537	IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
				9538	: (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
				9539	/* This is a bit tricky. Since we're only interested in the lowest
				9540	lane of the result, we zero out all the rest in the operands, so
				9541	as to ensure that other lanes don't pollute the returned Q value.
				9542	This works because it means, for the lanes we don't care about, we
				9543	are shifting zero by zero, which can never saturate. */
				9544	IRTemp res256 = newTemp(Ity_V256);
				9545	IRTemp resSH = newTempV128();
				9546	IRTemp resQ = newTempV128();
				9547	IRTemp zero = newTempV128();
				9548	assign(
				9549	res256,
				9550	binop(op,
				9551	mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(nn))),
				9552	mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, getQReg128(mm)))));
				9553	assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
				9554	assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
				9555	assign(zero, mkV128(0x0000));
				9556	putQReg128(dd, mkexpr(resSH));
				9557	updateQCFLAGwithDifference(resQ, zero);
				9558	const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
				9559	: (isU ? "uqshl" : "sqshl");
				9560	const HChar arr = "bhsd"[size];
				9561	DIP("%s %c%u, %c%u, %c%u\n", nm, arr, dd, arr, nn, arr, mm);
				9562	return True;
				9563	}
				9564
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9565	if (size == X11 && opcode == BITS5(1,0,0,0,0)) {
				9566	/* -------- 0,11,10000 ADD d_d_d -------- */
				9567	/* -------- 1,11,10000 SUB d_d_d -------- */
				9568	Bool isSUB = bitU == 1;
				9569	IRTemp res = newTemp(Ity_I64);
				9570	assign(res, binop(isSUB ? Iop_Sub64 : Iop_Add64,
				9571	getQRegLane(nn, 0, Ity_I64),
				9572	getQRegLane(mm, 0, Ity_I64)));
				9573	putQRegLane(dd, 0, mkexpr(res));
				9574	putQRegLane(dd, 1, mkU64(0));
				9575	DIP("%s %s, %s, %s\n", isSUB ? "sub" : "add",
				9576	nameQRegLO(dd, Ity_I64),
				9577	nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
				9578	return True;
				9579	}
				9580
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	9581	if (size == X11 && opcode == BITS5(1,0,0,0,1)) {
				9582	/* -------- 0,11,10001 CMTST d_d_d -------- */ // &, != 0
				9583	/* -------- 1,11,10001 CMEQ d_d_d -------- */ // ==
				9584	Bool isEQ = bitU == 1;
				9585	IRExpr* argL = getQReg128(nn);
				9586	IRExpr* argR = getQReg128(mm);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9587	IRTemp res = newTempV128();
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	9588	assign(res,
				9589	isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
				9590	: unop(Iop_NotV128, binop(Iop_CmpEQ64x2,
				9591	binop(Iop_AndV128, argL, argR),
				9592	mkV128(0x0000))));
				9593	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				9594	DIP("%s %s, %s, %s\n", isEQ ? "cmeq" : "cmtst",
				9595	nameQRegLO(dd, Ity_I64),
				9596	nameQRegLO(nn, Ity_I64), nameQRegLO(mm, Ity_I64));
				9597	return True;
				9598	}
				9599
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	9600	if (opcode == BITS5(1,0,1,1,0)) {
				9601	/* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
				9602	/* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
				9603	if (size == X00 \|\| size == X11) return False;
				9604	Bool isR = bitU == 1;
				9605	IRTemp res, sat1q, sat1n, vN, vM;
				9606	res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
				9607	newTempsV128_2(&vN, &vM);
				9608	assign(vN, getQReg128(nn));
				9609	assign(vM, getQReg128(mm));
				9610	math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
				9611	putQReg128(dd,
				9612	mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(res))));
				9613	updateQCFLAGwithDifference(
				9614	math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1q)),
				9615	math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(sat1n)));
				9616	const HChar arr = "bhsd"[size];
				9617	const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
				9618	DIP("%s %c%d, %c%d, %c%d\n", nm, arr, dd, arr, nn, arr, mm);
				9619	return True;
				9620	}
				9621
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9622	if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
				9623	/* -------- 1,1x,11010 FABD d_d_d, s_s_s -------- */
				9624	IRType ity = size == X11 ? Ity_F64 : Ity_F32;
				9625	IRTemp res = newTemp(ity);
				9626	assign(res, unop(mkABSF(ity),
				9627	triop(mkSUBF(ity),
				9628	mkexpr(mk_get_IR_rounding_mode()),
				9629	getQRegLO(nn,ity), getQRegLO(mm,ity))));
				9630	putQReg128(dd, mkV128(0x0000));
				9631	putQRegLO(dd, mkexpr(res));
				9632	DIP("fabd %s, %s, %s\n",
				9633	nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
				9634	return True;
				9635	}
				9636
sewardj	ee3db33	2015-02-08 18:24:38 +0000	[diff] [blame]	9637	if (bitU == 0 && size <= X01 && opcode == BITS5(1,1,0,1,1)) {
				9638	/* -------- 0,0x,11011 FMULX d_d_d, s_s_s -------- */
				9639	// KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
				9640	IRType ity = size == X01 ? Ity_F64 : Ity_F32;
				9641	IRTemp res = newTemp(ity);
				9642	assign(res, triop(mkMULF(ity),
				9643	mkexpr(mk_get_IR_rounding_mode()),
				9644	getQRegLO(nn,ity), getQRegLO(mm,ity)));
				9645	putQReg128(dd, mkV128(0x0000));
				9646	putQRegLO(dd, mkexpr(res));
				9647	DIP("fmulx %s, %s, %s\n",
				9648	nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
				9649	return True;
				9650	}
				9651
sewardj	13830dc	2015-02-07 21:09:47 +0000	[diff] [blame]	9652	if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
				9653	/* -------- 0,0x,11100 FCMEQ d_d_d, s_s_s -------- */
				9654	/* -------- 1,0x,11100 FCMGE d_d_d, s_s_s -------- */
				9655	Bool isD = size == X01;
				9656	IRType ity = isD ? Ity_F64 : Ity_F32;
				9657	Bool isGE = bitU == 1;
				9658	IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
				9659	: (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
				9660	IRTemp res = newTempV128();
				9661	assign(res, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
				9662	: binop(opCMP, getQReg128(nn), getQReg128(mm)));
				9663	putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
				9664	mkexpr(res))));
				9665	DIP("%s %s, %s, %s\n", isGE ? "fcmge" : "fcmeq",
				9666	nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
				9667	return True;
				9668	}
				9669
				9670	if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
				9671	/* -------- 1,1x,11100 FCMGT d_d_d, s_s_s -------- */
				9672	Bool isD = size == X11;
				9673	IRType ity = isD ? Ity_F64 : Ity_F32;
				9674	IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
				9675	IRTemp res = newTempV128();
				9676	assign(res, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
				9677	putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
				9678	mkexpr(res))));
				9679	DIP("%s %s, %s, %s\n", "fcmgt",
				9680	nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
				9681	return True;
				9682	}
				9683
				9684	if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
				9685	/* -------- 1,0x,11101 FACGE d_d_d, s_s_s -------- */
				9686	/* -------- 1,1x,11101 FACGT d_d_d, s_s_s -------- */
				9687	Bool isD = (size & 1) == 1;
				9688	IRType ity = isD ? Ity_F64 : Ity_F32;
				9689	Bool isGT = (size & 2) == 2;
				9690	IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
				9691	: (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
				9692	IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
				9693	IRTemp res = newTempV128();
				9694	assign(res, binop(opCMP, unop(opABS, getQReg128(mm)),
				9695	unop(opABS, getQReg128(nn)))); // swapd
				9696	putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
				9697	mkexpr(res))));
				9698	DIP("%s %s, %s, %s\n", isGT ? "facgt" : "facge",
				9699	nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
				9700	return True;
				9701	}
				9702
sewardj	89cefe4	2015-02-24 12:21:01 +0000	[diff] [blame]	9703	if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
				9704	/* -------- 0,0x,11111: FRECPS d_d_d, s_s_s -------- */
				9705	/* -------- 0,1x,11111: FRSQRTS d_d_d, s_s_s -------- */
				9706	Bool isSQRT = (size & 2) == 2;
				9707	Bool isD = (size & 1) == 1;
				9708	IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
				9709	: (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
				9710	IRTemp res = newTempV128();
				9711	assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
				9712	putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
				9713	mkexpr(res))));
				9714	HChar c = isD ? 'd' : 's';
				9715	DIP("%s %c%u, %c%u, %c%u\n", isSQRT ? "frsqrts" : "frecps",
				9716	c, dd, c, nn, c, mm);
				9717	return True;
				9718	}
				9719
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9720	return False;
				9721	# undef INSN
				9722	}
				9723
				9724
				9725	static
				9726	Bool dis_AdvSIMD_scalar_two_reg_misc(/MB_OUT/DisResult* dres, UInt insn)
				9727	{
				9728	/* 31 29 28 23 21 16 11 9 4
				9729	01 U 11110 size 10000 opcode 10 n d
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9730	Decode fields: u,size,opcode
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9731	*/
				9732	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				9733	if (INSN(31,30) != BITS2(0,1)
				9734	\|\| INSN(28,24) != BITS5(1,1,1,1,0)
				9735	\|\| INSN(21,17) != BITS5(1,0,0,0,0)
				9736	\|\| INSN(11,10) != BITS2(1,0)) {
				9737	return False;
				9738	}
				9739	UInt bitU = INSN(29,29);
				9740	UInt size = INSN(23,22);
				9741	UInt opcode = INSN(16,12);
				9742	UInt nn = INSN(9,5);
				9743	UInt dd = INSN(4,0);
				9744	vassert(size < 4);
				9745
sewardj	f7003bc	2014-08-18 12:28:02 +0000	[diff] [blame]	9746	if (opcode == BITS5(0,0,0,1,1)) {
				9747	/* -------- 0,xx,00011: SUQADD std4_std4 -------- */
				9748	/* -------- 1,xx,00011: USQADD std4_std4 -------- */
				9749	/* These are a bit tricky (to say the least). See comments on
				9750	the vector variants (in dis_AdvSIMD_two_reg_misc) below for
				9751	details. */
				9752	Bool isUSQADD = bitU == 1;
				9753	IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
				9754	: mkVecQADDEXTUSSATSS(size);
				9755	IROp nop = mkVecADD(size);
				9756	IRTemp argL = newTempV128();
				9757	IRTemp argR = newTempV128();
				9758	assign(argL, getQReg128(nn));
				9759	assign(argR, getQReg128(dd));
				9760	IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
				9761	size, binop(qop, mkexpr(argL), mkexpr(argR)));
				9762	IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
				9763	size, binop(nop, mkexpr(argL), mkexpr(argR)));
				9764	putQReg128(dd, mkexpr(qres));
				9765	updateQCFLAGwithDifference(qres, nres);
				9766	const HChar arr = "bhsd"[size];
				9767	DIP("%s %c%u, %c%u\n", isUSQADD ? "usqadd" : "suqadd", arr, dd, arr, nn);
				9768	return True;
				9769	}
				9770
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	9771	if (opcode == BITS5(0,0,1,1,1)) {
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9772	/* -------- 0,xx,00111 SQABS std4_std4 -------- */
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	9773	/* -------- 1,xx,00111 SQNEG std4_std4 -------- */
				9774	Bool isNEG = bitU == 1;
				9775	IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
				9776	(isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
				9777	getQReg128(nn), size );
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	9778	IRTemp qres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(qresFW));
				9779	IRTemp nres = math_ZERO_ALL_EXCEPT_LOWEST_LANE(size, mkexpr(nresFW));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9780	putQReg128(dd, mkexpr(qres));
				9781	updateQCFLAGwithDifference(qres, nres);
				9782	const HChar arr = "bhsd"[size];
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	9783	DIP("%s %c%u, %c%u\n", isNEG ? "sqneg" : "sqabs", arr, dd, arr, nn);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9784	return True;
				9785	}
				9786
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	9787	if (size == X11 && opcode == BITS5(0,1,0,0,0)) {
				9788	/* -------- 0,11,01000: CMGT d_d_#0 -------- */ // >s 0
				9789	/* -------- 1,11,01000: CMGE d_d_#0 -------- */ // >=s 0
				9790	Bool isGT = bitU == 0;
				9791	IRExpr* argL = getQReg128(nn);
				9792	IRExpr* argR = mkV128(0x0000);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9793	IRTemp res = newTempV128();
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	9794	assign(res, isGT ? binop(Iop_CmpGT64Sx2, argL, argR)
				9795	: unop(Iop_NotV128, binop(Iop_CmpGT64Sx2, argR, argL)));
				9796	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				9797	DIP("cm%s d%u, d%u, #0\n", isGT ? "gt" : "ge", dd, nn);
				9798	return True;
				9799	}
				9800
				9801	if (size == X11 && opcode == BITS5(0,1,0,0,1)) {
				9802	/* -------- 0,11,01001: CMEQ d_d_#0 -------- */ // == 0
				9803	/* -------- 1,11,01001: CMLE d_d_#0 -------- */ // <=s 0
				9804	Bool isEQ = bitU == 0;
				9805	IRExpr* argL = getQReg128(nn);
				9806	IRExpr* argR = mkV128(0x0000);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	9807	IRTemp res = newTempV128();
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	9808	assign(res, isEQ ? binop(Iop_CmpEQ64x2, argL, argR)
				9809	: unop(Iop_NotV128,
				9810	binop(Iop_CmpGT64Sx2, argL, argR)));
				9811	putQReg128(dd, unop(Iop_ZeroHI64ofV128, mkexpr(res)));
				9812	DIP("cm%s d%u, d%u, #0\n", isEQ ? "eq" : "le", dd, nn);
				9813	return True;
				9814	}
				9815
				9816	if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,0)) {
				9817	/* -------- 0,11,01010: CMLT d_d_#0 -------- */ // <s 0
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9818	putQReg128(dd, unop(Iop_ZeroHI64ofV128,
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	9819	binop(Iop_CmpGT64Sx2, mkV128(0x0000),
				9820	getQReg128(nn))));
				9821	DIP("cm%s d%u, d%u, #0\n", "lt", dd, nn);
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	9822	return True;
				9823	}
				9824
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	9825	if (bitU == 0 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
				9826	/* -------- 0,11,01011 ABS d_d -------- */
				9827	putQReg128(dd, unop(Iop_ZeroHI64ofV128,
				9828	unop(Iop_Abs64x2, getQReg128(nn))));
				9829	DIP("abs d%u, d%u\n", dd, nn);
				9830	return True;
				9831	}
				9832
				9833	if (bitU == 1 && size == X11 && opcode == BITS5(0,1,0,1,1)) {
				9834	/* -------- 1,11,01011 NEG d_d -------- */
				9835	putQReg128(dd, unop(Iop_ZeroHI64ofV128,
				9836	binop(Iop_Sub64x2, mkV128(0x0000), getQReg128(nn))));
				9837	DIP("neg d%u, d%u\n", dd, nn);
				9838	return True;
				9839	}
				9840
sewardj	13830dc	2015-02-07 21:09:47 +0000	[diff] [blame]	9841	UInt ix = 0; /INVALID/
				9842	if (size >= X10) {
				9843	switch (opcode) {
				9844	case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
				9845	case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
				9846	case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
				9847	default: break;
				9848	}
				9849	}
				9850	if (ix > 0) {
				9851	/* -------- 0,1x,01100 FCMGT d_d_#0.0, s_s_#0.0 (ix 1) -------- */
				9852	/* -------- 0,1x,01101 FCMEQ d_d_#0.0, s_s_#0.0 (ix 2) -------- */
				9853	/* -------- 0,1x,01110 FCMLT d_d_#0.0, s_s_#0.0 (ix 3) -------- */
				9854	/* -------- 1,1x,01100 FCMGE d_d_#0.0, s_s_#0.0 (ix 4) -------- */
				9855	/* -------- 1,1x,01101 FCMLE d_d_#0.0, s_s_#0.0 (ix 5) -------- */
				9856	Bool isD = size == X11;
				9857	IRType ity = isD ? Ity_F64 : Ity_F32;
				9858	IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
				9859	IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
				9860	IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
				9861	IROp opCmp = Iop_INVALID;
				9862	Bool swap = False;
				9863	const HChar* nm = "??";
				9864	switch (ix) {
				9865	case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
				9866	case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
				9867	case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
				9868	case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
				9869	case 5: nm = "fcmle"; opCmp = opCmpLE; break;
				9870	default: vassert(0);
				9871	}
				9872	IRExpr* zero = mkV128(0x0000);
				9873	IRTemp res = newTempV128();
				9874	assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
				9875	: binop(opCmp, getQReg128(nn), zero));
				9876	putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
				9877	mkexpr(res))));
				9878
				9879	DIP("%s %s, %s, #0.0\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
				9880	return True;
				9881	}
				9882
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	9883	if (opcode == BITS5(1,0,1,0,0)
				9884	\|\| (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
				9885	/* -------- 0,xx,10100: SQXTN -------- */
				9886	/* -------- 1,xx,10100: UQXTN -------- */
				9887	/* -------- 1,xx,10010: SQXTUN -------- */
				9888	if (size == X11) return False;
				9889	vassert(size < 3);
				9890	IROp opN = Iop_INVALID;
				9891	Bool zWiden = True;
				9892	const HChar* nm = "??";
				9893	/**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
				9894	opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
				9895	}
				9896	else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
				9897	opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
				9898	}
				9899	else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
				9900	opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
				9901	}
				9902	else vassert(0);
				9903	IRTemp src = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
				9904	size+1, getQReg128(nn));
				9905	IRTemp resN = math_ZERO_ALL_EXCEPT_LOWEST_LANE(
				9906	size, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
				9907	putQReg128(dd, mkexpr(resN));
				9908	/* This widens zero lanes to zero, and compares it against zero, so all
				9909	of the non-participating lanes make no contribution to the
				9910	Q flag state. */
				9911	IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/!fromUpperHalf/,
				9912	size, mkexpr(resN));
				9913	updateQCFLAGwithDifference(src, resW);
				9914	const HChar arrNarrow = "bhsd"[size];
				9915	const HChar arrWide = "bhsd"[size+1];
				9916	DIP("%s %c%u, %c%u\n", nm, arrNarrow, dd, arrWide, nn);
				9917	return True;
				9918	}
				9919
sewardj	400d6b9	2015-03-30 09:01:51 +0000	[diff] [blame]	9920	ix = 0; /INVALID/
				9921	switch (opcode) {
				9922	case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
				9923	case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
				9924	case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
				9925	default: break;
				9926	}
				9927	if (ix > 0) {
				9928	/* -------- 0,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
				9929	/* -------- 0,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
				9930	/* -------- 0,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
				9931	/* -------- 0,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
				9932	/* -------- 0,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
				9933	/* -------- 1,0x,11010 FCVTNS d_d, s_s (ix 1) -------- */
				9934	/* -------- 1,0x,11011 FCVTMS d_d, s_s (ix 2) -------- */
				9935	/* -------- 1,0x,11100 FCVTAS d_d, s_s (ix 3) -------- */
				9936	/* -------- 1,1x,11010 FCVTPS d_d, s_s (ix 4) -------- */
				9937	/* -------- 1,1x,11011 FCVTZS d_d, s_s (ix 5) -------- */
sewardj	bc0b722	2015-03-30 18:49:38 +0000	[diff] [blame^]	9938	Bool isD = (size & 1) == 1;
				9939	IRType tyF = isD ? Ity_F64 : Ity_F32;
				9940	IRType tyI = isD ? Ity_I64 : Ity_I32;
sewardj	400d6b9	2015-03-30 09:01:51 +0000	[diff] [blame]	9941	IRRoundingMode irrm = 8; /impossible/
				9942	HChar ch = '?';
				9943	switch (ix) {
				9944	case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
				9945	case 2: ch = 'm'; irrm = Irrm_NegINF; break;
				9946	case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
				9947	case 4: ch = 'p'; irrm = Irrm_PosINF; break;
				9948	case 5: ch = 'z'; irrm = Irrm_ZERO; break;
				9949	default: vassert(0);
				9950	}
				9951	IROp cvt = Iop_INVALID;
				9952	if (bitU == 1) {
sewardj	bc0b722	2015-03-30 18:49:38 +0000	[diff] [blame^]	9953	cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
sewardj	400d6b9	2015-03-30 09:01:51 +0000	[diff] [blame]	9954	} else {
sewardj	bc0b722	2015-03-30 18:49:38 +0000	[diff] [blame^]	9955	cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
sewardj	400d6b9	2015-03-30 09:01:51 +0000	[diff] [blame]	9956	}
				9957	IRTemp src = newTemp(tyF);
				9958	IRTemp res = newTemp(tyI);
				9959	assign(src, getQRegLane(nn, 0, tyF));
				9960	assign(res, binop(cvt, mkU32(irrm), mkexpr(src)));
				9961	putQRegLane(dd, 0, mkexpr(res)); /* bits 31-0 or 63-0 */
sewardj	bc0b722	2015-03-30 18:49:38 +0000	[diff] [blame^]	9962	if (!isD) {
sewardj	400d6b9	2015-03-30 09:01:51 +0000	[diff] [blame]	9963	putQRegLane(dd, 1, mkU32(0)); /* bits 63-32 */
				9964	}
				9965	putQRegLane(dd, 1, mkU64(0)); /* bits 127-64 */
sewardj	bc0b722	2015-03-30 18:49:38 +0000	[diff] [blame^]	9966	HChar sOrD = isD ? 'd' : 's';
sewardj	400d6b9	2015-03-30 09:01:51 +0000	[diff] [blame]	9967	DIP("fcvt%c%c %c%u, %c%u\n", ch, bitU == 1 ? 'u' : 's',
				9968	sOrD, dd, sOrD, nn);
				9969	return True;
				9970	}
				9971
sewardj	89cefe4	2015-02-24 12:21:01 +0000	[diff] [blame]	9972	if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
				9973	/* -------- 0,1x,11101: FRECPE d_d, s_s -------- */
				9974	/* -------- 1,1x,11101: FRSQRTE d_d, s_s -------- */
				9975	Bool isSQRT = bitU == 1;
				9976	Bool isD = (size & 1) == 1;
				9977	IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
				9978	: (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
				9979	IRTemp resV = newTempV128();
				9980	assign(resV, unop(op, getQReg128(nn)));
				9981	putQReg128(dd, mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? X11 : X10,
				9982	mkexpr(resV))));
				9983	HChar c = isD ? 'd' : 's';
				9984	DIP("%s %c%u, %c%u\n", isSQRT ? "frsqrte" : "frecpe", c, dd, c, nn);
				9985	return True;
				9986	}
				9987
				9988	if (bitU == 0 && size >= X10 && opcode == BITS5(1,1,1,1,1)) {
				9989	/* -------- 0,1x,11111: FRECPX d_d, s_s -------- */
				9990	Bool isD = (size & 1) == 1;
				9991	IRType ty = isD ? Ity_F64 : Ity_F32;
				9992	IROp op = isD ? Iop_RecpExpF64 : Iop_RecpExpF32;
				9993	IRTemp res = newTemp(ty);
				9994	IRTemp rm = mk_get_IR_rounding_mode();
				9995	assign(res, binop(op, mkexpr(rm), getQRegLane(nn, 0, ty)));
				9996	putQReg128(dd, mkV128(0x0000));
				9997	putQRegLane(dd, 0, mkexpr(res));
				9998	HChar c = isD ? 'd' : 's';
				9999	DIP("%s %c%u, %c%u\n", "frecpx", c, dd, c, nn);
				10000	return True;
				10001	}
				10002
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10003	return False;
				10004	# undef INSN
				10005	}
				10006
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	10007
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10008	static
				10009	Bool dis_AdvSIMD_scalar_x_indexed_element(/MB_OUT/DisResult* dres, UInt insn)
				10010	{
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	10011	/* 31 28 23 21 20 19 15 11 9 4
				10012	01 U 11111 size L M m opcode H 0 n d
				10013	Decode fields are: u,size,opcode
				10014	M is really part of the mm register number. Individual
				10015	cases need to inspect L and H though.
				10016	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10017	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	10018	if (INSN(31,30) != BITS2(0,1)
				10019	\|\| INSN(28,24) != BITS5(1,1,1,1,1) \|\| INSN(10,10) !=0) {
				10020	return False;
				10021	}
				10022	UInt bitU = INSN(29,29);
				10023	UInt size = INSN(23,22);
				10024	UInt bitL = INSN(21,21);
				10025	UInt bitM = INSN(20,20);
				10026	UInt mmLO4 = INSN(19,16);
				10027	UInt opcode = INSN(15,12);
				10028	UInt bitH = INSN(11,11);
				10029	UInt nn = INSN(9,5);
				10030	UInt dd = INSN(4,0);
				10031	vassert(size < 4);
				10032	vassert(bitH < 2 && bitM < 2 && bitL < 2);
				10033
sewardj	ee3db33	2015-02-08 18:24:38 +0000	[diff] [blame]	10034	if (bitU == 0 && size >= X10
				10035	&& (opcode == BITS4(0,0,0,1) \|\| opcode == BITS4(0,1,0,1))) {
				10036	/* -------- 0,1x,0001 FMLA d_d_d[], s_s_s[] -------- */
				10037	/* -------- 0,1x,0101 FMLS d_d_d[], s_s_s[] -------- */
				10038	Bool isD = (size & 1) == 1;
				10039	Bool isSUB = opcode == BITS4(0,1,0,1);
				10040	UInt index;
				10041	if (!isD) index = (bitH << 1) \| bitL;
				10042	else if (isD && bitL == 0) index = bitH;
				10043	else return False; // sz:L == x11 => unallocated encoding
				10044	vassert(index < (isD ? 2 : 4));
				10045	IRType ity = isD ? Ity_F64 : Ity_F32;
				10046	IRTemp elem = newTemp(ity);
				10047	UInt mm = (bitM << 4) \| mmLO4;
				10048	assign(elem, getQRegLane(mm, index, ity));
				10049	IRTemp dupd = math_DUP_TO_V128(elem, ity);
				10050	IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
				10051	IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
				10052	IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
				10053	IRTemp rm = mk_get_IR_rounding_mode();
				10054	IRTemp t1 = newTempV128();
				10055	IRTemp t2 = newTempV128();
				10056	// FIXME: double rounding; use FMA primops instead
				10057	assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
				10058	assign(t2, triop(isSUB ? opSUB : opADD,
				10059	mkexpr(rm), getQReg128(dd), mkexpr(t1)));
				10060	putQReg128(dd,
				10061	mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
				10062	mkexpr(t2))));
				10063	const HChar c = isD ? 'd' : 's';
				10064	DIP("%s %c%u, %c%u, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
				10065	c, dd, c, nn, nameQReg128(mm), c, index);
				10066	return True;
				10067	}
				10068
				10069	if (size >= X10 && opcode == BITS4(1,0,0,1)) {
				10070	/* -------- 0,1x,1001 FMUL d_d_d[], s_s_s[] -------- */
				10071	/* -------- 1,1x,1001 FMULX d_d_d[], s_s_s[] -------- */
				10072	Bool isD = (size & 1) == 1;
				10073	Bool isMULX = bitU == 1;
				10074	UInt index;
				10075	if (!isD) index = (bitH << 1) \| bitL;
				10076	else if (isD && bitL == 0) index = bitH;
				10077	else return False; // sz:L == x11 => unallocated encoding
				10078	vassert(index < (isD ? 2 : 4));
				10079	IRType ity = isD ? Ity_F64 : Ity_F32;
				10080	IRTemp elem = newTemp(ity);
				10081	UInt mm = (bitM << 4) \| mmLO4;
				10082	assign(elem, getQRegLane(mm, index, ity));
				10083	IRTemp dupd = math_DUP_TO_V128(elem, ity);
				10084	IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
				10085	IRTemp rm = mk_get_IR_rounding_mode();
				10086	IRTemp t1 = newTempV128();
				10087	// KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
				10088	assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
				10089	putQReg128(dd,
				10090	mkexpr(math_ZERO_ALL_EXCEPT_LOWEST_LANE(isD ? 3 : 2,
				10091	mkexpr(t1))));
				10092	const HChar c = isD ? 'd' : 's';
				10093	DIP("%s %c%u, %c%u, %s.%c[%u]\n", isMULX ? "fmulx" : "fmul",
				10094	c, dd, c, nn, nameQReg128(mm), c, index);
				10095	return True;
				10096	}
				10097
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	10098	if (bitU == 0
				10099	&& (opcode == BITS4(1,0,1,1)
				10100	\|\| opcode == BITS4(0,0,1,1) \|\| opcode == BITS4(0,1,1,1))) {
				10101	/* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
				10102	/* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
				10103	/* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
				10104	/* Widens, and size refers to the narrowed lanes. */
				10105	UInt ks = 3;
				10106	switch (opcode) {
				10107	case BITS4(1,0,1,1): ks = 0; break;
				10108	case BITS4(0,0,1,1): ks = 1; break;
				10109	case BITS4(0,1,1,1): ks = 2; break;
				10110	default: vassert(0);
				10111	}
				10112	vassert(ks >= 0 && ks <= 2);
				10113	UInt mm = 32; // invalid
				10114	UInt ix = 16; // invalid
				10115	switch (size) {
				10116	case X00:
				10117	return False; // h_b_b[] case is not allowed
				10118	case X01:
				10119	mm = mmLO4; ix = (bitH << 2) \| (bitL << 1) \| (bitM << 0); break;
				10120	case X10:
				10121	mm = (bitM << 4) \| mmLO4; ix = (bitH << 1) \| (bitL << 0); break;
				10122	case X11:
				10123	return False; // q_d_d[] case is not allowed
				10124	default:
				10125	vassert(0);
				10126	}
				10127	vassert(mm < 32 && ix < 16);
				10128	IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
				10129	vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
				10130	newTempsV128_2(&vecN, &vecD);
				10131	assign(vecN, getQReg128(nn));
				10132	IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
				10133	assign(vecD, getQReg128(dd));
				10134	math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
				10135	False/!is2/, size, "mas"[ks],
				10136	vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
				10137	IROp opZHI = mkVecZEROHIxxOFV128(size+1);
				10138	putQReg128(dd, unop(opZHI, mkexpr(res)));
				10139	vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
				10140	updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
				10141	if (sat2q != IRTemp_INVALID \|\| sat2n != IRTemp_INVALID) {
				10142	updateQCFLAGwithDifferenceZHI(sat2q, sat2n, opZHI);
				10143	}
				10144	const HChar* nm = ks == 0 ? "sqmull"
				10145	: (ks == 1 ? "sqdmlal" : "sqdmlsl");
				10146	const HChar arrNarrow = "bhsd"[size];
				10147	const HChar arrWide = "bhsd"[size+1];
				10148	DIP("%s %c%d, %c%d, v%d.%c[%u]\n",
				10149	nm, arrWide, dd, arrNarrow, nn, dd, arrNarrow, ix);
				10150	return True;
				10151	}
				10152
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	10153	if (opcode == BITS4(1,1,0,0) \|\| opcode == BITS4(1,1,0,1)) {
				10154	/* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
				10155	/* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
				10156	UInt mm = 32; // invalid
				10157	UInt ix = 16; // invalid
				10158	switch (size) {
				10159	case X00:
				10160	return False; // b case is not allowed
				10161	case X01:
				10162	mm = mmLO4; ix = (bitH << 2) \| (bitL << 1) \| (bitM << 0); break;
				10163	case X10:
				10164	mm = (bitM << 4) \| mmLO4; ix = (bitH << 1) \| (bitL << 0); break;
				10165	case X11:
				10166	return False; // q case is not allowed
				10167	default:
				10168	vassert(0);
				10169	}
				10170	vassert(mm < 32 && ix < 16);
				10171	Bool isR = opcode == BITS4(1,1,0,1);
				10172	IRTemp res, sat1q, sat1n, vN, vM;
				10173	res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
				10174	vN = newTempV128();
				10175	assign(vN, getQReg128(nn));
				10176	vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
				10177	math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
				10178	IROp opZHI = mkVecZEROHIxxOFV128(size);
				10179	putQReg128(dd, unop(opZHI, mkexpr(res)));
				10180	updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
				10181	const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
				10182	HChar ch = size == X01 ? 'h' : 's';
				10183	DIP("%s %c%d, %c%d, v%d.%c[%u]\n", nm, ch, dd, ch, nn, ch, dd, ix);
				10184	return True;
				10185	}
				10186
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10187	return False;
				10188	# undef INSN
				10189	}
				10190
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	10191
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10192	static
				10193	Bool dis_AdvSIMD_shift_by_immediate(/MB_OUT/DisResult* dres, UInt insn)
				10194	{
				10195	/* 31 28 22 18 15 10 9 4
				10196	0 q u 011110 immh immb opcode 1 n d
				10197	Decode fields: u,opcode
				10198	*/
				10199	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				10200	if (INSN(31,31) != 0
				10201	\|\| INSN(28,23) != BITS6(0,1,1,1,1,0) \|\| INSN(10,10) != 1) {
				10202	return False;
				10203	}
				10204	UInt bitQ = INSN(30,30);
				10205	UInt bitU = INSN(29,29);
				10206	UInt immh = INSN(22,19);
				10207	UInt immb = INSN(18,16);
				10208	UInt opcode = INSN(15,11);
				10209	UInt nn = INSN(9,5);
				10210	UInt dd = INSN(4,0);
				10211
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	10212	if (opcode == BITS5(0,0,0,0,0) \|\| opcode == BITS5(0,0,0,1,0)) {
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10213	/* -------- 0,00000 SSHR std7_std7_#imm -------- */
				10214	/* -------- 1,00000 USHR std7_std7_#imm -------- */
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	10215	/* -------- 0,00010 SSRA std7_std7_#imm -------- */
				10216	/* -------- 1,00010 USRA std7_std7_#imm -------- */
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10217	/* laneTy, shift = case immh:immb of
				10218	0001:xxx -> B, SHR:8-xxx
				10219	001x:xxx -> H, SHR:16-xxxx
				10220	01xx:xxx -> S, SHR:32-xxxxx
				10221	1xxx:xxx -> D, SHR:64-xxxxxx
				10222	other -> invalid
				10223	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10224	UInt size = 0;
				10225	UInt shift = 0;
				10226	Bool isQ = bitQ == 1;
				10227	Bool isU = bitU == 1;
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	10228	Bool isAcc = opcode == BITS5(0,0,0,1,0);
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10229	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10230	if (!ok \|\| (bitQ == 0 && size == X11)) return False;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10231	vassert(size >= 0 && size <= 3);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10232	UInt lanebits = 8 << size;
				10233	vassert(shift >= 1 && shift <= lanebits);
				10234	IROp op = isU ? mkVecSHRN(size) : mkVecSARN(size);
				10235	IRExpr* src = getQReg128(nn);
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	10236	IRTemp shf = newTempV128();
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10237	IRTemp res = newTempV128();
				10238	if (shift == lanebits && isU) {
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	10239	assign(shf, mkV128(0x0000));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10240	} else {
				10241	UInt nudge = 0;
				10242	if (shift == lanebits) {
				10243	vassert(!isU);
				10244	nudge = 1;
				10245	}
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	10246	assign(shf, binop(op, src, mkU8(shift - nudge)));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10247	}
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	10248	assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
				10249	: mkexpr(shf));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10250	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				10251	HChar laneCh = "bhsd"[size];
				10252	UInt nLanes = (isQ ? 128 : 64) / lanebits;
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	10253	const HChar* nm = isAcc ? (isU ? "usra" : "ssra")
				10254	: (isU ? "ushr" : "sshr");
				10255	DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
				10256	nameQReg128(dd), nLanes, laneCh,
				10257	nameQReg128(nn), nLanes, laneCh, shift);
				10258	return True;
				10259	}
				10260
				10261	if (opcode == BITS5(0,0,1,0,0) \|\| opcode == BITS5(0,0,1,1,0)) {
				10262	/* -------- 0,00100 SRSHR std7_std7_#imm -------- */
				10263	/* -------- 1,00100 URSHR std7_std7_#imm -------- */
				10264	/* -------- 0,00110 SRSRA std7_std7_#imm -------- */
				10265	/* -------- 1,00110 URSRA std7_std7_#imm -------- */
				10266	/* laneTy, shift = case immh:immb of
				10267	0001:xxx -> B, SHR:8-xxx
				10268	001x:xxx -> H, SHR:16-xxxx
				10269	01xx:xxx -> S, SHR:32-xxxxx
				10270	1xxx:xxx -> D, SHR:64-xxxxxx
				10271	other -> invalid
				10272	*/
				10273	UInt size = 0;
				10274	UInt shift = 0;
				10275	Bool isQ = bitQ == 1;
				10276	Bool isU = bitU == 1;
				10277	Bool isAcc = opcode == BITS5(0,0,1,1,0);
				10278	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
				10279	if (!ok \|\| (bitQ == 0 && size == X11)) return False;
				10280	vassert(size >= 0 && size <= 3);
				10281	UInt lanebits = 8 << size;
				10282	vassert(shift >= 1 && shift <= lanebits);
				10283	IROp op = isU ? mkVecRSHU(size) : mkVecRSHS(size);
				10284	IRExpr* src = getQReg128(nn);
				10285	IRTemp imm8 = newTemp(Ity_I8);
				10286	assign(imm8, mkU8((UChar)(-shift)));
				10287	IRExpr* amt = mkexpr(math_DUP_TO_V128(imm8, Ity_I8));
				10288	IRTemp shf = newTempV128();
				10289	IRTemp res = newTempV128();
				10290	assign(shf, binop(op, src, amt));
				10291	assign(res, isAcc ? binop(mkVecADD(size), getQReg128(dd), mkexpr(shf))
				10292	: mkexpr(shf));
				10293	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				10294	HChar laneCh = "bhsd"[size];
				10295	UInt nLanes = (isQ ? 128 : 64) / lanebits;
				10296	const HChar* nm = isAcc ? (isU ? "ursra" : "srsra")
				10297	: (isU ? "urshr" : "srshr");
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10298	DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
				10299	nameQReg128(dd), nLanes, laneCh,
				10300	nameQReg128(nn), nLanes, laneCh, shift);
				10301	return True;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10302	}
				10303
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10304	if (bitU == 1 && opcode == BITS5(0,1,0,0,0)) {
				10305	/* -------- 1,01000 SRI std7_std7_#imm -------- */
				10306	/* laneTy, shift = case immh:immb of
				10307	0001:xxx -> B, SHR:8-xxx
				10308	001x:xxx -> H, SHR:16-xxxx
				10309	01xx:xxx -> S, SHR:32-xxxxx
				10310	1xxx:xxx -> D, SHR:64-xxxxxx
				10311	other -> invalid
				10312	*/
				10313	UInt size = 0;
				10314	UInt shift = 0;
				10315	Bool isQ = bitQ == 1;
				10316	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
				10317	if (!ok \|\| (bitQ == 0 && size == X11)) return False;
				10318	vassert(size >= 0 && size <= 3);
				10319	UInt lanebits = 8 << size;
				10320	vassert(shift >= 1 && shift <= lanebits);
				10321	IRExpr* src = getQReg128(nn);
				10322	IRTemp res = newTempV128();
				10323	if (shift == lanebits) {
				10324	assign(res, getQReg128(dd));
				10325	} else {
				10326	assign(res, binop(mkVecSHRN(size), src, mkU8(shift)));
				10327	IRExpr* nmask = binop(mkVecSHLN(size),
				10328	mkV128(0xFFFF), mkU8(lanebits - shift));
				10329	IRTemp tmp = newTempV128();
				10330	assign(tmp, binop(Iop_OrV128,
				10331	mkexpr(res),
				10332	binop(Iop_AndV128, getQReg128(dd), nmask)));
				10333	res = tmp;
				10334	}
				10335	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				10336	HChar laneCh = "bhsd"[size];
				10337	UInt nLanes = (isQ ? 128 : 64) / lanebits;
				10338	DIP("%s %s.%u%c, %s.%u%c, #%u\n", "sri",
				10339	nameQReg128(dd), nLanes, laneCh,
				10340	nameQReg128(nn), nLanes, laneCh, shift);
				10341	return True;
				10342	}
				10343
				10344	if (opcode == BITS5(0,1,0,1,0)) {
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10345	/* -------- 0,01010 SHL std7_std7_#imm -------- */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10346	/* -------- 1,01010 SLI std7_std7_#imm -------- */
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10347	/* laneTy, shift = case immh:immb of
				10348	0001:xxx -> B, xxx
				10349	001x:xxx -> H, xxxx
				10350	01xx:xxx -> S, xxxxx
				10351	1xxx:xxx -> D, xxxxxx
				10352	other -> invalid
				10353	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10354	UInt size = 0;
				10355	UInt shift = 0;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10356	Bool isSLI = bitU == 1;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10357	Bool isQ = bitQ == 1;
				10358	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10359	if (!ok \|\| (bitQ == 0 && size == X11)) return False;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10360	vassert(size >= 0 && size <= 3);
				10361	/* The shift encoding has opposite sign for the leftwards case.
				10362	Adjust shift to compensate. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10363	UInt lanebits = 8 << size;
				10364	shift = lanebits - shift;
				10365	vassert(shift >= 0 && shift < lanebits);
				10366	IROp op = mkVecSHLN(size);
				10367	IRExpr* src = getQReg128(nn);
				10368	IRTemp res = newTempV128();
				10369	if (shift == 0) {
				10370	assign(res, src);
				10371	} else {
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	10372	assign(res, binop(op, src, mkU8(shift)));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10373	if (isSLI) {
				10374	IRExpr* nmask = binop(mkVecSHRN(size),
				10375	mkV128(0xFFFF), mkU8(lanebits - shift));
				10376	IRTemp tmp = newTempV128();
				10377	assign(tmp, binop(Iop_OrV128,
				10378	mkexpr(res),
				10379	binop(Iop_AndV128, getQReg128(dd), nmask)));
				10380	res = tmp;
				10381	}
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10382	}
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10383	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				10384	HChar laneCh = "bhsd"[size];
				10385	UInt nLanes = (isQ ? 128 : 64) / lanebits;
				10386	const HChar* nm = isSLI ? "sli" : "shl";
				10387	DIP("%s %s.%u%c, %s.%u%c, #%u\n", nm,
				10388	nameQReg128(dd), nLanes, laneCh,
				10389	nameQReg128(nn), nLanes, laneCh, shift);
				10390	return True;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10391	}
				10392
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	10393	if (opcode == BITS5(0,1,1,1,0)
				10394	\|\| (bitU == 1 && opcode == BITS5(0,1,1,0,0))) {
				10395	/* -------- 0,01110 SQSHL std7_std7_#imm -------- */
				10396	/* -------- 1,01110 UQSHL std7_std7_#imm -------- */
				10397	/* -------- 1,01100 SQSHLU std7_std7_#imm -------- */
				10398	UInt size = 0;
				10399	UInt shift = 0;
				10400	Bool isQ = bitQ == 1;
				10401	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
				10402	if (!ok \|\| (bitQ == 0 && size == X11)) return False;
				10403	vassert(size >= 0 && size <= 3);
				10404	/* The shift encoding has opposite sign for the leftwards case.
				10405	Adjust shift to compensate. */
				10406	UInt lanebits = 8 << size;
				10407	shift = lanebits - shift;
				10408	vassert(shift >= 0 && shift < lanebits);
				10409	const HChar* nm = NULL;
				10410	/**/ if (bitU == 0 && opcode == BITS5(0,1,1,1,0)) nm = "sqshl";
				10411	else if (bitU == 1 && opcode == BITS5(0,1,1,1,0)) nm = "uqshl";
				10412	else if (bitU == 1 && opcode == BITS5(0,1,1,0,0)) nm = "sqshlu";
				10413	else vassert(0);
				10414	IRTemp qDiff1 = IRTemp_INVALID;
				10415	IRTemp qDiff2 = IRTemp_INVALID;
				10416	IRTemp res = IRTemp_INVALID;
				10417	IRTemp src = newTempV128();
				10418	assign(src, getQReg128(nn));
				10419	math_QSHL_IMM(&res, &qDiff1, &qDiff2, src, size, shift, nm);
				10420	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				10421	updateQCFLAGwithDifferenceZHI(qDiff1, qDiff2,
sewardj	acc2964	2014-08-15 05:35:35 +0000	[diff] [blame]	10422	isQ ? Iop_INVALID : Iop_ZeroHI64ofV128);
sewardj	a97dddf	2014-08-14 22:26:52 +0000	[diff] [blame]	10423	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				10424	DIP("%s %s.%s, %s.%s, #%u\n", nm,
				10425	nameQReg128(dd), arr, nameQReg128(nn), arr, shift);
				10426	return True;
				10427	}
				10428
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10429	if (bitU == 0
				10430	&& (opcode == BITS5(1,0,0,0,0) \|\| opcode == BITS5(1,0,0,0,1))) {
				10431	/* -------- 0,10000 SHRN{,2} #imm -------- */
				10432	/* -------- 0,10001 RSHRN{,2} #imm -------- */
				10433	/* Narrows, and size is the narrow size. */
				10434	UInt size = 0;
				10435	UInt shift = 0;
				10436	Bool is2 = bitQ == 1;
				10437	Bool isR = opcode == BITS5(1,0,0,0,1);
				10438	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
				10439	if (!ok \|\| size == X11) return False;
				10440	vassert(shift >= 1);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10441	IRTemp t1 = newTempV128();
				10442	IRTemp t2 = newTempV128();
				10443	IRTemp t3 = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10444	assign(t1, getQReg128(nn));
				10445	assign(t2, isR ? binop(mkVecADD(size+1),
				10446	mkexpr(t1),
				10447	mkexpr(math_VEC_DUP_IMM(size+1, 1ULL<<(shift-1))))
				10448	: mkexpr(t1));
				10449	assign(t3, binop(mkVecSHRN(size+1), mkexpr(t2), mkU8(shift)));
				10450	IRTemp t4 = math_NARROW_LANES(t3, t3, size);
				10451	putLO64andZUorPutHI64(is2, dd, t4);
				10452	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				10453	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				10454	DIP("%s %s.%s, %s.%s, #%u\n", isR ? "rshrn" : "shrn",
				10455	nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
				10456	return True;
				10457	}
				10458
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	10459	if (opcode == BITS5(1,0,0,1,0) \|\| opcode == BITS5(1,0,0,1,1)
				10460	\|\| (bitU == 1
				10461	&& (opcode == BITS5(1,0,0,0,0) \|\| opcode == BITS5(1,0,0,0,1)))) {
				10462	/* -------- 0,10010 SQSHRN{,2} #imm -------- */
				10463	/* -------- 1,10010 UQSHRN{,2} #imm -------- */
				10464	/* -------- 0,10011 SQRSHRN{,2} #imm -------- */
				10465	/* -------- 1,10011 UQRSHRN{,2} #imm -------- */
				10466	/* -------- 1,10000 SQSHRUN{,2} #imm -------- */
				10467	/* -------- 1,10001 SQRSHRUN{,2} #imm -------- */
				10468	UInt size = 0;
				10469	UInt shift = 0;
				10470	Bool is2 = bitQ == 1;
				10471	Bool ok = getLaneInfo_IMMH_IMMB(&shift, &size, immh, immb);
				10472	if (!ok \|\| size == X11) return False;
				10473	vassert(shift >= 1 && shift <= (8 << size));
				10474	const HChar* nm = "??";
				10475	IROp op = Iop_INVALID;
				10476	/* Decide on the name and the operation. */
				10477	/**/ if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
				10478	nm = "sqshrn"; op = mkVecQANDqsarNNARROWSS(size);
				10479	}
				10480	else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
				10481	nm = "uqshrn"; op = mkVecQANDqshrNNARROWUU(size);
				10482	}
				10483	else if (bitU == 0 && opcode == BITS5(1,0,0,1,1)) {
				10484	nm = "sqrshrn"; op = mkVecQANDqrsarNNARROWSS(size);
				10485	}
				10486	else if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
				10487	nm = "uqrshrn"; op = mkVecQANDqrshrNNARROWUU(size);
				10488	}
				10489	else if (bitU == 1 && opcode == BITS5(1,0,0,0,0)) {
				10490	nm = "sqshrun"; op = mkVecQANDqsarNNARROWSU(size);
				10491	}
				10492	else if (bitU == 1 && opcode == BITS5(1,0,0,0,1)) {
				10493	nm = "sqrshrun"; op = mkVecQANDqrsarNNARROWSU(size);
				10494	}
				10495	else vassert(0);
				10496	/* Compute the result (Q, shifted value) pair. */
				10497	IRTemp src128 = newTempV128();
				10498	assign(src128, getQReg128(nn));
				10499	IRTemp pair = newTempV128();
				10500	assign(pair, binop(op, mkexpr(src128), mkU8(shift)));
				10501	/* Update the result reg */
				10502	IRTemp res64in128 = newTempV128();
				10503	assign(res64in128, unop(Iop_ZeroHI64ofV128, mkexpr(pair)));
				10504	putLO64andZUorPutHI64(is2, dd, res64in128);
				10505	/* Update the Q flag. */
				10506	IRTemp q64q64 = newTempV128();
				10507	assign(q64q64, binop(Iop_InterleaveHI64x2, mkexpr(pair), mkexpr(pair)));
				10508	IRTemp z128 = newTempV128();
				10509	assign(z128, mkV128(0x0000));
				10510	updateQCFLAGwithDifference(q64q64, z128);
				10511	/* */
				10512	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				10513	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				10514	DIP("%s %s.%s, %s.%s, #%u\n", nm,
				10515	nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide, shift);
				10516	return True;
				10517	}
				10518
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10519	if (opcode == BITS5(1,0,1,0,0)) {
				10520	/* -------- 0,10100 SSHLL{,2} #imm -------- */
				10521	/* -------- 1,10100 USHLL{,2} #imm -------- */
				10522	/* 31 28 22 18 15 9 4
				10523	0q0 011110 immh immb 101001 n d SSHLL Vd.Ta, Vn.Tb, #sh
				10524	0q1 011110 immh immb 101001 n d USHLL Vd.Ta, Vn.Tb, #sh
				10525	where Ta,Tb,sh
				10526	= case immh of 1xxx -> invalid
				10527	01xx -> 2d, 2s(q0)/4s(q1), immh:immb - 32 (0..31)
				10528	001x -> 4s, 4h(q0)/8h(q1), immh:immb - 16 (0..15)
				10529	0001 -> 8h, 8b(q0)/16b(q1), immh:immb - 8 (0..7)
				10530	0000 -> AdvSIMD modified immediate (???)
				10531	*/
				10532	Bool isQ = bitQ == 1;
				10533	Bool isU = bitU == 1;
				10534	UInt immhb = (immh << 3) \| immb;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10535	IRTemp src = newTempV128();
				10536	IRTemp zero = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10537	IRExpr* res = NULL;
				10538	UInt sh = 0;
				10539	const HChar* ta = "??";
				10540	const HChar* tb = "??";
				10541	assign(src, getQReg128(nn));
				10542	assign(zero, mkV128(0x0000));
				10543	if (immh & 8) {
				10544	/* invalid; don't assign to res */
				10545	}
				10546	else if (immh & 4) {
				10547	sh = immhb - 32;
				10548	vassert(sh < 32); /* so 32-sh is 1..32 */
				10549	ta = "2d";
				10550	tb = isQ ? "4s" : "2s";
				10551	IRExpr* tmp = isQ ? mk_InterleaveHI32x4(src, zero)
				10552	: mk_InterleaveLO32x4(src, zero);
				10553	res = binop(isU ? Iop_ShrN64x2 : Iop_SarN64x2, tmp, mkU8(32-sh));
				10554	}
				10555	else if (immh & 2) {
				10556	sh = immhb - 16;
				10557	vassert(sh < 16); /* so 16-sh is 1..16 */
				10558	ta = "4s";
				10559	tb = isQ ? "8h" : "4h";
				10560	IRExpr* tmp = isQ ? mk_InterleaveHI16x8(src, zero)
				10561	: mk_InterleaveLO16x8(src, zero);
				10562	res = binop(isU ? Iop_ShrN32x4 : Iop_SarN32x4, tmp, mkU8(16-sh));
				10563	}
				10564	else if (immh & 1) {
				10565	sh = immhb - 8;
				10566	vassert(sh < 8); /* so 8-sh is 1..8 */
				10567	ta = "8h";
				10568	tb = isQ ? "16b" : "8b";
				10569	IRExpr* tmp = isQ ? mk_InterleaveHI8x16(src, zero)
				10570	: mk_InterleaveLO8x16(src, zero);
				10571	res = binop(isU ? Iop_ShrN16x8 : Iop_SarN16x8, tmp, mkU8(8-sh));
				10572	} else {
				10573	vassert(immh == 0);
				10574	/* invalid; don't assign to res */
				10575	}
				10576	/* */
				10577	if (res) {
				10578	putQReg128(dd, res);
				10579	DIP("%cshll%s %s.%s, %s.%s, #%d\n",
				10580	isU ? 'u' : 's', isQ ? "2" : "",
				10581	nameQReg128(dd), ta, nameQReg128(nn), tb, sh);
				10582	return True;
				10583	}
				10584	return False;
				10585	}
				10586
				10587	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				10588	return False;
				10589	# undef INSN
				10590	}
				10591
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	10592
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10593	static
				10594	Bool dis_AdvSIMD_three_different(/MB_OUT/DisResult* dres, UInt insn)
				10595	{
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	10596	/* 31 30 29 28 23 21 20 15 11 9 4
				10597	0 Q U 01110 size 1 m opcode 00 n d
				10598	Decode fields: u,opcode
				10599	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10600	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	10601	if (INSN(31,31) != 0
				10602	\|\| INSN(28,24) != BITS5(0,1,1,1,0)
				10603	\|\| INSN(21,21) != 1
				10604	\|\| INSN(11,10) != BITS2(0,0)) {
				10605	return False;
				10606	}
				10607	UInt bitQ = INSN(30,30);
				10608	UInt bitU = INSN(29,29);
				10609	UInt size = INSN(23,22);
				10610	UInt mm = INSN(20,16);
				10611	UInt opcode = INSN(15,12);
				10612	UInt nn = INSN(9,5);
				10613	UInt dd = INSN(4,0);
				10614	vassert(size < 4);
				10615	Bool is2 = bitQ == 1;
				10616
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	10617	if (opcode == BITS4(0,0,0,0) \|\| opcode == BITS4(0,0,1,0)) {
				10618	/* -------- 0,0000 SADDL{2} -------- */
				10619	/* -------- 1,0000 UADDL{2} -------- */
				10620	/* -------- 0,0010 SSUBL{2} -------- */
				10621	/* -------- 1,0010 USUBL{2} -------- */
				10622	/* Widens, and size refers to the narrowed lanes. */
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	10623	if (size == X11) return False;
				10624	vassert(size <= 2);
				10625	Bool isU = bitU == 1;
				10626	Bool isADD = opcode == BITS4(0,0,0,0);
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	10627	IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
				10628	IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10629	IRTemp res = newTempV128();
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	10630	assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	10631	mkexpr(argL), mkexpr(argR)));
				10632	putQReg128(dd, mkexpr(res));
				10633	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				10634	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				10635	const HChar* nm = isADD ? (isU ? "uaddl" : "saddl")
				10636	: (isU ? "usubl" : "ssubl");
				10637	DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
				10638	nameQReg128(dd), arrWide,
				10639	nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
				10640	return True;
				10641	}
				10642
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	10643	if (opcode == BITS4(0,0,0,1) \|\| opcode == BITS4(0,0,1,1)) {
				10644	/* -------- 0,0001 SADDW{2} -------- */
				10645	/* -------- 1,0001 UADDW{2} -------- */
				10646	/* -------- 0,0011 SSUBW{2} -------- */
				10647	/* -------- 1,0011 USUBW{2} -------- */
				10648	/* Widens, and size refers to the narrowed lanes. */
				10649	if (size == X11) return False;
				10650	vassert(size <= 2);
				10651	Bool isU = bitU == 1;
				10652	Bool isADD = opcode == BITS4(0,0,0,1);
				10653	IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10654	IRTemp res = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	10655	assign(res, binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
				10656	getQReg128(nn), mkexpr(argR)));
				10657	putQReg128(dd, mkexpr(res));
				10658	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				10659	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				10660	const HChar* nm = isADD ? (isU ? "uaddw" : "saddw")
				10661	: (isU ? "usubw" : "ssubw");
				10662	DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
				10663	nameQReg128(dd), arrWide,
				10664	nameQReg128(nn), arrWide, nameQReg128(mm), arrNarrow);
				10665	return True;
				10666	}
				10667
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	10668	if (opcode == BITS4(0,1,0,0) \|\| opcode == BITS4(0,1,1,0)) {
				10669	/* -------- 0,0100 ADDHN{2} -------- */
				10670	/* -------- 1,0100 RADDHN{2} -------- */
				10671	/* -------- 0,0110 SUBHN{2} -------- */
				10672	/* -------- 1,0110 RSUBHN{2} -------- */
				10673	/* Narrows, and size refers to the narrowed lanes. */
				10674	if (size == X11) return False;
				10675	vassert(size <= 2);
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10676	const UInt shift[3] = { 8, 16, 32 };
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	10677	Bool isADD = opcode == BITS4(0,1,0,0);
				10678	Bool isR = bitU == 1;
				10679	/* Combined elements in wide lanes */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10680	IRTemp wide = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10681	IRExpr* wideE = binop(isADD ? mkVecADD(size+1) : mkVecSUB(size+1),
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	10682	getQReg128(nn), getQReg128(mm));
				10683	if (isR) {
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10684	wideE = binop(mkVecADD(size+1),
				10685	wideE,
				10686	mkexpr(math_VEC_DUP_IMM(size+1,
				10687	1ULL << (shift[size]-1))));
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	10688	}
				10689	assign(wide, wideE);
				10690	/* Top halves of elements, still in wide lanes */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10691	IRTemp shrd = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10692	assign(shrd, binop(mkVecSHRN(size+1), mkexpr(wide), mkU8(shift[size])));
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	10693	/* Elements now compacted into lower 64 bits */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10694	IRTemp new64 = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10695	assign(new64, binop(mkVecCATEVENLANES(size), mkexpr(shrd), mkexpr(shrd)));
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	10696	putLO64andZUorPutHI64(is2, dd, new64);
				10697	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				10698	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				10699	const HChar* nm = isADD ? (isR ? "raddhn" : "addhn")
				10700	: (isR ? "rsubhn" : "subhn");
				10701	DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
				10702	nameQReg128(dd), arrNarrow,
				10703	nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
				10704	return True;
				10705	}
				10706
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	10707	if (opcode == BITS4(0,1,0,1) \|\| opcode == BITS4(0,1,1,1)) {
				10708	/* -------- 0,0101 SABAL{2} -------- */
				10709	/* -------- 1,0101 UABAL{2} -------- */
				10710	/* -------- 0,0111 SABDL{2} -------- */
				10711	/* -------- 1,0111 UABDL{2} -------- */
				10712	/* Widens, and size refers to the narrowed lanes. */
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	10713	if (size == X11) return False;
				10714	vassert(size <= 2);
				10715	Bool isU = bitU == 1;
				10716	Bool isACC = opcode == BITS4(0,1,0,1);
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	10717	IRTemp argL = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(nn));
				10718	IRTemp argR = math_WIDEN_LO_OR_HI_LANES(isU, is2, size, getQReg128(mm));
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	10719	IRTemp abd = math_ABD(isU, size+1, mkexpr(argL), mkexpr(argR));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10720	IRTemp res = newTempV128();
				10721	assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(abd), getQReg128(dd))
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	10722	: mkexpr(abd));
				10723	putQReg128(dd, mkexpr(res));
				10724	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				10725	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				10726	const HChar* nm = isACC ? (isU ? "uabal" : "sabal")
				10727	: (isU ? "uabdl" : "sabdl");
				10728	DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
				10729	nameQReg128(dd), arrWide,
				10730	nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
				10731	return True;
				10732	}
				10733
				10734	if (opcode == BITS4(1,1,0,0)
				10735	\|\| opcode == BITS4(1,0,0,0) \|\| opcode == BITS4(1,0,1,0)) {
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10736	/* -------- 0,1100 SMULL{2} -------- */ // 0 (ks)
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	10737	/* -------- 1,1100 UMULL{2} -------- */ // 0
				10738	/* -------- 0,1000 SMLAL{2} -------- */ // 1
				10739	/* -------- 1,1000 UMLAL{2} -------- */ // 1
				10740	/* -------- 0,1010 SMLSL{2} -------- */ // 2
				10741	/* -------- 1,1010 UMLSL{2} -------- */ // 2
				10742	/* Widens, and size refers to the narrowed lanes. */
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10743	UInt ks = 3;
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	10744	switch (opcode) {
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10745	case BITS4(1,1,0,0): ks = 0; break;
				10746	case BITS4(1,0,0,0): ks = 1; break;
				10747	case BITS4(1,0,1,0): ks = 2; break;
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	10748	default: vassert(0);
				10749	}
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10750	vassert(ks >= 0 && ks <= 2);
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	10751	if (size == X11) return False;
				10752	vassert(size <= 2);
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	10753	Bool isU = bitU == 1;
				10754	IRTemp vecN = newTempV128();
				10755	IRTemp vecM = newTempV128();
				10756	IRTemp vecD = newTempV128();
				10757	assign(vecN, getQReg128(nn));
				10758	assign(vecM, getQReg128(mm));
				10759	assign(vecD, getQReg128(dd));
				10760	IRTemp res = IRTemp_INVALID;
				10761	math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
				10762	vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	10763	putQReg128(dd, mkexpr(res));
				10764	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				10765	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	10766	const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	10767	DIP("%c%s%s %s.%s, %s.%s, %s.%s\n", isU ? 'u' : 's', nm, is2 ? "2" : "",
				10768	nameQReg128(dd), arrWide,
				10769	nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
				10770	return True;
				10771	}
				10772
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	10773	if (bitU == 0
				10774	&& (opcode == BITS4(1,1,0,1)
				10775	\|\| opcode == BITS4(1,0,0,1) \|\| opcode == BITS4(1,0,1,1))) {
				10776	/* -------- 0,1101 SQDMULL{2} -------- */ // 0 (ks)
				10777	/* -------- 0,1001 SQDMLAL{2} -------- */ // 1
				10778	/* -------- 0,1011 SQDMLSL{2} -------- */ // 2
				10779	/* Widens, and size refers to the narrowed lanes. */
				10780	UInt ks = 3;
				10781	switch (opcode) {
				10782	case BITS4(1,1,0,1): ks = 0; break;
				10783	case BITS4(1,0,0,1): ks = 1; break;
				10784	case BITS4(1,0,1,1): ks = 2; break;
				10785	default: vassert(0);
				10786	}
				10787	vassert(ks >= 0 && ks <= 2);
				10788	if (size == X00 \|\| size == X11) return False;
				10789	vassert(size <= 2);
				10790	IRTemp vecN, vecM, vecD, res, sat1q, sat1n, sat2q, sat2n;
				10791	vecN = vecM = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
				10792	newTempsV128_3(&vecN, &vecM, &vecD);
				10793	assign(vecN, getQReg128(nn));
				10794	assign(vecM, getQReg128(mm));
				10795	assign(vecD, getQReg128(dd));
				10796	math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
				10797	is2, size, "mas"[ks],
				10798	vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
				10799	putQReg128(dd, mkexpr(res));
				10800	vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
				10801	updateQCFLAGwithDifference(sat1q, sat1n);
				10802	if (sat2q != IRTemp_INVALID \|\| sat2n != IRTemp_INVALID) {
				10803	updateQCFLAGwithDifference(sat2q, sat2n);
				10804	}
				10805	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				10806	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				10807	const HChar* nm = ks == 0 ? "sqdmull"
				10808	: (ks == 1 ? "sqdmlal" : "sqdmlsl");
				10809	DIP("%s%s %s.%s, %s.%s, %s.%s\n", nm, is2 ? "2" : "",
				10810	nameQReg128(dd), arrWide,
				10811	nameQReg128(nn), arrNarrow, nameQReg128(mm), arrNarrow);
				10812	return True;
				10813	}
				10814
sewardj	31b5a95	2014-06-26 07:41:14 +0000	[diff] [blame]	10815	if (bitU == 0 && opcode == BITS4(1,1,1,0)) {
				10816	/* -------- 0,1110 PMULL{2} -------- */
sewardj	6f312d0	2014-06-28 12:21:37 +0000	[diff] [blame]	10817	/* Widens, and size refers to the narrowed lanes. */
sewardj	31b5a95	2014-06-26 07:41:14 +0000	[diff] [blame]	10818	if (size != X00) return False;
				10819	IRTemp res
				10820	= math_BINARY_WIDENING_V128(is2, Iop_PolynomialMull8x8,
				10821	getQReg128(nn), getQReg128(mm));
				10822	putQReg128(dd, mkexpr(res));
				10823	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				10824	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				10825	DIP("%s%s %s.%s, %s.%s, %s.%s\n", "pmull", is2 ? "2" : "",
				10826	nameQReg128(dd), arrNarrow,
				10827	nameQReg128(nn), arrWide, nameQReg128(mm), arrWide);
				10828	return True;
				10829	}
				10830
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10831	return False;
				10832	# undef INSN
				10833	}
				10834
				10835
				10836	static
				10837	Bool dis_AdvSIMD_three_same(/MB_OUT/DisResult* dres, UInt insn)
				10838	{
				10839	/* 31 30 29 28 23 21 20 15 10 9 4
				10840	0 Q U 01110 size 1 m opcode 1 n d
				10841	Decode fields: u,size,opcode
				10842	*/
				10843	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				10844	if (INSN(31,31) != 0
				10845	\|\| INSN(28,24) != BITS5(0,1,1,1,0)
				10846	\|\| INSN(21,21) != 1
				10847	\|\| INSN(10,10) != 1) {
				10848	return False;
				10849	}
				10850	UInt bitQ = INSN(30,30);
				10851	UInt bitU = INSN(29,29);
				10852	UInt size = INSN(23,22);
				10853	UInt mm = INSN(20,16);
				10854	UInt opcode = INSN(15,11);
				10855	UInt nn = INSN(9,5);
				10856	UInt dd = INSN(4,0);
				10857	vassert(size < 4);
				10858
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	10859	if (opcode == BITS5(0,0,0,0,0) \|\| opcode == BITS5(0,0,1,0,0)) {
				10860	/* -------- 0,xx,00000 SHADD std6_std6_std6 -------- */
				10861	/* -------- 1,xx,00000 UHADD std6_std6_std6 -------- */
				10862	/* -------- 0,xx,00100 SHSUB std6_std6_std6 -------- */
				10863	/* -------- 1,xx,00100 UHSUB std6_std6_std6 -------- */
				10864	if (size == X11) return False;
				10865	Bool isADD = opcode == BITS5(0,0,0,0,0);
				10866	Bool isU = bitU == 1;
				10867	/* Widen both args out, do the math, narrow to final result. */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10868	IRTemp argL = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	10869	IRTemp argLhi = IRTemp_INVALID;
				10870	IRTemp argLlo = IRTemp_INVALID;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10871	IRTemp argR = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	10872	IRTemp argRhi = IRTemp_INVALID;
				10873	IRTemp argRlo = IRTemp_INVALID;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10874	IRTemp resHi = newTempV128();
				10875	IRTemp resLo = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	10876	IRTemp res = IRTemp_INVALID;
				10877	assign(argL, getQReg128(nn));
				10878	argLlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argL));
				10879	argLhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argL));
				10880	assign(argR, getQReg128(mm));
				10881	argRlo = math_WIDEN_LO_OR_HI_LANES(isU, False, size, mkexpr(argR));
				10882	argRhi = math_WIDEN_LO_OR_HI_LANES(isU, True, size, mkexpr(argR));
				10883	IROp opADDSUB = isADD ? mkVecADD(size+1) : mkVecSUB(size+1);
				10884	IROp opSxR = isU ? mkVecSHRN(size+1) : mkVecSARN(size+1);
				10885	assign(resHi, binop(opSxR,
				10886	binop(opADDSUB, mkexpr(argLhi), mkexpr(argRhi)),
				10887	mkU8(1)));
				10888	assign(resLo, binop(opSxR,
				10889	binop(opADDSUB, mkexpr(argLlo), mkexpr(argRlo)),
				10890	mkU8(1)));
				10891	res = math_NARROW_LANES ( resHi, resLo, size );
				10892	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				10893	const HChar* nm = isADD ? (isU ? "uhadd" : "shadd")
				10894	: (isU ? "uhsub" : "shsub");
				10895	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				10896	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				10897	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				10898	return True;
				10899	}
				10900
sewardj	62ece66	2014-08-17 19:59:09 +0000	[diff] [blame]	10901	if (opcode == BITS5(0,0,0,1,0)) {
				10902	/* -------- 0,xx,00010 SRHADD std7_std7_std7 -------- */
				10903	/* -------- 1,xx,00010 URHADD std7_std7_std7 -------- */
				10904	if (bitQ == 0 && size == X11) return False; // implied 1d case
				10905	Bool isU = bitU == 1;
				10906	IRTemp argL = newTempV128();
				10907	IRTemp argR = newTempV128();
				10908	assign(argL, getQReg128(nn));
				10909	assign(argR, getQReg128(mm));
				10910	IRTemp res = math_RHADD(size, isU, argL, argR);
				10911	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				10912	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				10913	DIP("%s %s.%s, %s.%s, %s.%s\n", isU ? "urhadd" : "srhadd",
				10914	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				10915	return True;
				10916	}
				10917
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	10918	if (opcode == BITS5(0,0,0,0,1) \|\| opcode == BITS5(0,0,1,0,1)) {
				10919	/* -------- 0,xx,00001 SQADD std7_std7_std7 -------- */
				10920	/* -------- 1,xx,00001 UQADD std7_std7_std7 -------- */
				10921	/* -------- 0,xx,00101 SQSUB std7_std7_std7 -------- */
				10922	/* -------- 1,xx,00101 UQSUB std7_std7_std7 -------- */
				10923	if (bitQ == 0 && size == X11) return False; // implied 1d case
				10924	Bool isADD = opcode == BITS5(0,0,0,0,1);
				10925	Bool isU = bitU == 1;
				10926	IROp qop = Iop_INVALID;
				10927	IROp nop = Iop_INVALID;
				10928	if (isADD) {
				10929	qop = isU ? mkVecQADDU(size) : mkVecQADDS(size);
				10930	nop = mkVecADD(size);
				10931	} else {
				10932	qop = isU ? mkVecQSUBU(size) : mkVecQSUBS(size);
				10933	nop = mkVecSUB(size);
				10934	}
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10935	IRTemp argL = newTempV128();
				10936	IRTemp argR = newTempV128();
				10937	IRTemp qres = newTempV128();
				10938	IRTemp nres = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	10939	assign(argL, getQReg128(nn));
				10940	assign(argR, getQReg128(mm));
				10941	assign(qres, math_MAYBE_ZERO_HI64_fromE(
				10942	bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
				10943	assign(nres, math_MAYBE_ZERO_HI64_fromE(
				10944	bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
				10945	putQReg128(dd, mkexpr(qres));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10946	updateQCFLAGwithDifference(qres, nres);
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	10947	const HChar* nm = isADD ? (isU ? "uqadd" : "sqadd")
				10948	: (isU ? "uqsub" : "sqsub");
				10949	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				10950	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				10951	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				10952	return True;
				10953	}
				10954
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10955	if (bitU == 0 && opcode == BITS5(0,0,0,1,1)) {
				10956	/* -------- 0,00,00011 AND 16b_16b_16b, 8b_8b_8b -------- */
				10957	/* -------- 0,01,00011 BIC 16b_16b_16b, 8b_8b_8b -------- */
				10958	/* -------- 0,10,00011 ORR 16b_16b_16b, 8b_8b_8b -------- */
				10959	/* -------- 0,10,00011 ORN 16b_16b_16b, 8b_8b_8b -------- */
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	10960	Bool isORx = (size & 2) == 2;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10961	Bool invert = (size & 1) == 1;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10962	IRTemp res = newTempV128();
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	10963	assign(res, binop(isORx ? Iop_OrV128 : Iop_AndV128,
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10964	getQReg128(nn),
				10965	invert ? unop(Iop_NotV128, getQReg128(mm))
				10966	: getQReg128(mm)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	10967	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10968	const HChar* names[4] = { "and", "bic", "orr", "orn" };
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	10969	const HChar* ar = bitQ == 1 ? "16b" : "8b";
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10970	DIP("%s %s.%s, %s.%s, %s.%s\n", names[INSN(23,22)],
				10971	nameQReg128(dd), ar, nameQReg128(nn), ar, nameQReg128(mm), ar);
				10972	return True;
				10973	}
				10974
				10975	if (bitU == 1 && opcode == BITS5(0,0,0,1,1)) {
				10976	/* -------- 1,00,00011 EOR 16b_16b_16b, 8b_8b_8b -------- */
				10977	/* -------- 1,01,00011 BSL 16b_16b_16b, 8b_8b_8b -------- */
				10978	/* -------- 1,10,00011 BIT 16b_16b_16b, 8b_8b_8b -------- */
				10979	/* -------- 1,10,00011 BIF 16b_16b_16b, 8b_8b_8b -------- */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10980	IRTemp argD = newTempV128();
				10981	IRTemp argN = newTempV128();
				10982	IRTemp argM = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10983	assign(argD, getQReg128(dd));
				10984	assign(argN, getQReg128(nn));
				10985	assign(argM, getQReg128(mm));
				10986	const IROp opXOR = Iop_XorV128;
				10987	const IROp opAND = Iop_AndV128;
				10988	const IROp opNOT = Iop_NotV128;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	10989	IRTemp res = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10990	switch (size) {
				10991	case BITS2(0,0): /* EOR */
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	10992	assign(res, binop(opXOR, mkexpr(argM), mkexpr(argN)));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10993	break;
				10994	case BITS2(0,1): /* BSL */
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	10995	assign(res, binop(opXOR, mkexpr(argM),
				10996	binop(opAND,
				10997	binop(opXOR, mkexpr(argM), mkexpr(argN)),
				10998	mkexpr(argD))));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	10999	break;
				11000	case BITS2(1,0): /* BIT */
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11001	assign(res, binop(opXOR, mkexpr(argD),
				11002	binop(opAND,
				11003	binop(opXOR, mkexpr(argD), mkexpr(argN)),
				11004	mkexpr(argM))));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11005	break;
				11006	case BITS2(1,1): /* BIF */
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11007	assign(res, binop(opXOR, mkexpr(argD),
				11008	binop(opAND,
				11009	binop(opXOR, mkexpr(argD), mkexpr(argN)),
				11010	unop(opNOT, mkexpr(argM)))));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11011	break;
				11012	default:
				11013	vassert(0);
				11014	}
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11015	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11016	const HChar* nms[4] = { "eor", "bsl", "bit", "bif" };
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11017	const HChar* arr = bitQ == 1 ? "16b" : "8b";
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11018	DIP("%s %s.%s, %s.%s, %s.%s\n", nms[size],
				11019	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11020	return True;
				11021	}
				11022
				11023	if (opcode == BITS5(0,0,1,1,0)) {
				11024	/* -------- 0,xx,00110 CMGT std7_std7_std7 -------- */ // >s
				11025	/* -------- 1,xx,00110 CMHI std7_std7_std7 -------- */ // >u
				11026	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11027	Bool isGT = bitU == 0;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11028	IRExpr* argL = getQReg128(nn);
				11029	IRExpr* argR = getQReg128(mm);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11030	IRTemp res = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11031	assign(res,
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11032	isGT ? binop(mkVecCMPGTS(size), argL, argR)
				11033	: binop(mkVecCMPGTU(size), argL, argR));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11034	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11035	const HChar* nm = isGT ? "cmgt" : "cmhi";
				11036	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11037	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				11038	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11039	return True;
				11040	}
				11041
				11042	if (opcode == BITS5(0,0,1,1,1)) {
				11043	/* -------- 0,xx,00111 CMGE std7_std7_std7 -------- */ // >=s
				11044	/* -------- 1,xx,00111 CMHS std7_std7_std7 -------- */ // >=u
				11045	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11046	Bool isGE = bitU == 0;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11047	IRExpr* argL = getQReg128(nn);
				11048	IRExpr* argR = getQReg128(mm);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11049	IRTemp res = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11050	assign(res,
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11051	isGE ? unop(Iop_NotV128, binop(mkVecCMPGTS(size), argR, argL))
				11052	: unop(Iop_NotV128, binop(mkVecCMPGTU(size), argR, argL)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11053	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11054	const HChar* nm = isGE ? "cmge" : "cmhs";
				11055	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11056	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				11057	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11058	return True;
				11059	}
				11060
sewardj	a6b61f0	2014-08-17 18:32:14 +0000	[diff] [blame]	11061	if (opcode == BITS5(0,1,0,0,0) \|\| opcode == BITS5(0,1,0,1,0)) {
				11062	/* -------- 0,xx,01000 SSHL std7_std7_std7 -------- */
				11063	/* -------- 0,xx,01010 SRSHL std7_std7_std7 -------- */
				11064	/* -------- 1,xx,01000 USHL std7_std7_std7 -------- */
				11065	/* -------- 1,xx,01010 URSHL std7_std7_std7 -------- */
				11066	if (bitQ == 0 && size == X11) return False; // implied 1d case
				11067	Bool isU = bitU == 1;
				11068	Bool isR = opcode == BITS5(0,1,0,1,0);
				11069	IROp op = isR ? (isU ? mkVecRSHU(size) : mkVecRSHS(size))
				11070	: (isU ? mkVecSHU(size) : mkVecSHS(size));
				11071	IRTemp res = newTempV128();
				11072	assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
				11073	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				11074	const HChar* nm = isR ? (isU ? "urshl" : "srshl")
				11075	: (isU ? "ushl" : "sshl");
				11076	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11077	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				11078	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11079	return True;
				11080	}
				11081
sewardj	1297218	2014-08-04 08:09:47 +0000	[diff] [blame]	11082	if (opcode == BITS5(0,1,0,0,1) \|\| opcode == BITS5(0,1,0,1,1)) {
				11083	/* -------- 0,xx,01001 SQSHL std7_std7_std7 -------- */
				11084	/* -------- 0,xx,01011 SQRSHL std7_std7_std7 -------- */
				11085	/* -------- 1,xx,01001 UQSHL std7_std7_std7 -------- */
				11086	/* -------- 1,xx,01011 UQRSHL std7_std7_std7 -------- */
				11087	if (bitQ == 0 && size == X11) return False; // implied 1d case
				11088	Bool isU = bitU == 1;
				11089	Bool isR = opcode == BITS5(0,1,0,1,1);
				11090	IROp op = isR ? (isU ? mkVecQANDUQRSH(size) : mkVecQANDSQRSH(size))
				11091	: (isU ? mkVecQANDUQSH(size) : mkVecQANDSQSH(size));
				11092	/* This is a bit tricky. If we're only interested in the lowest 64 bits
				11093	of the result (viz, bitQ == 0), then we must adjust the operands to
				11094	ensure that the upper part of the result, that we don't care about,
				11095	doesn't pollute the returned Q value. To do this, zero out the upper
				11096	operand halves beforehand. This works because it means, for the
				11097	lanes we don't care about, we are shifting zero by zero, which can
				11098	never saturate. */
				11099	IRTemp res256 = newTemp(Ity_V256);
				11100	IRTemp resSH = newTempV128();
				11101	IRTemp resQ = newTempV128();
				11102	IRTemp zero = newTempV128();
				11103	assign(res256, binop(op,
				11104	math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(nn)),
				11105	math_MAYBE_ZERO_HI64_fromE(bitQ, getQReg128(mm))));
				11106	assign(resSH, unop(Iop_V256toV128_0, mkexpr(res256)));
				11107	assign(resQ, unop(Iop_V256toV128_1, mkexpr(res256)));
				11108	assign(zero, mkV128(0x0000));
				11109	putQReg128(dd, mkexpr(resSH));
				11110	updateQCFLAGwithDifference(resQ, zero);
				11111	const HChar* nm = isR ? (isU ? "uqrshl" : "sqrshl")
				11112	: (isU ? "uqshl" : "sqshl");
				11113	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11114	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				11115	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11116	return True;
				11117	}
				11118
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11119	if (opcode == BITS5(0,1,1,0,0) \|\| opcode == BITS5(0,1,1,0,1)) {
				11120	/* -------- 0,xx,01100 SMAX std7_std7_std7 -------- */
				11121	/* -------- 1,xx,01100 UMAX std7_std7_std7 -------- */
				11122	/* -------- 0,xx,01101 SMIN std7_std7_std7 -------- */
				11123	/* -------- 1,xx,01101 UMIN std7_std7_std7 -------- */
				11124	if (bitQ == 0 && size == X11) return False; // implied 1d case
				11125	Bool isU = bitU == 1;
				11126	Bool isMAX = (opcode & 1) == 0;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11127	IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
				11128	: (isU ? mkVecMINU(size) : mkVecMINS(size));
				11129	IRTemp t = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11130	assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11131	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11132	const HChar* nm = isMAX ? (isU ? "umax" : "smax")
				11133	: (isU ? "umin" : "smin");
				11134	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11135	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				11136	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11137	return True;
				11138	}
				11139
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11140	if (opcode == BITS5(0,1,1,1,0) \|\| opcode == BITS5(0,1,1,1,1)) {
				11141	/* -------- 0,xx,01110 SABD std6_std6_std6 -------- */
				11142	/* -------- 1,xx,01110 UABD std6_std6_std6 -------- */
				11143	/* -------- 0,xx,01111 SABA std6_std6_std6 -------- */
				11144	/* -------- 1,xx,01111 UABA std6_std6_std6 -------- */
				11145	if (size == X11) return False; // 1d/2d cases not allowed
				11146	Bool isU = bitU == 1;
				11147	Bool isACC = opcode == BITS5(0,1,1,1,1);
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11148	vassert(size <= 2);
				11149	IRTemp t1 = math_ABD(isU, size, getQReg128(nn), getQReg128(mm));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11150	IRTemp t2 = newTempV128();
				11151	assign(t2, isACC ? binop(mkVecADD(size), mkexpr(t1), getQReg128(dd))
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11152	: mkexpr(t1));
				11153	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
				11154	const HChar* nm = isACC ? (isU ? "uaba" : "saba")
				11155	: (isU ? "uabd" : "sabd");
				11156	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11157	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				11158	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11159	return True;
				11160	}
				11161
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11162	if (opcode == BITS5(1,0,0,0,0)) {
				11163	/* -------- 0,xx,10000 ADD std7_std7_std7 -------- */
				11164	/* -------- 1,xx,10000 SUB std7_std7_std7 -------- */
				11165	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11166	Bool isSUB = bitU == 1;
				11167	IROp op = isSUB ? mkVecSUB(size) : mkVecADD(size);
				11168	IRTemp t = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11169	assign(t, binop(op, getQReg128(nn), getQReg128(mm)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11170	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11171	const HChar* nm = isSUB ? "sub" : "add";
				11172	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11173	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				11174	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11175	return True;
				11176	}
				11177
				11178	if (opcode == BITS5(1,0,0,0,1)) {
				11179	/* -------- 0,xx,10001 CMTST std7_std7_std7 -------- */ // &, != 0
				11180	/* -------- 1,xx,10001 CMEQ std7_std7_std7 -------- */ // ==
				11181	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11182	Bool isEQ = bitU == 1;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11183	IRExpr* argL = getQReg128(nn);
				11184	IRExpr* argR = getQReg128(mm);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11185	IRTemp res = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11186	assign(res,
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11187	isEQ ? binop(mkVecCMPEQ(size), argL, argR)
				11188	: unop(Iop_NotV128, binop(mkVecCMPEQ(size),
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11189	binop(Iop_AndV128, argL, argR),
				11190	mkV128(0x0000))));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11191	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11192	const HChar* nm = isEQ ? "cmeq" : "cmtst";
				11193	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11194	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				11195	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11196	return True;
				11197	}
				11198
				11199	if (opcode == BITS5(1,0,0,1,0)) {
				11200	/* -------- 0,xx,10010 MLA std7_std7_std7 -------- */
				11201	/* -------- 1,xx,10010 MLS std7_std7_std7 -------- */
				11202	if (bitQ == 0 && size == X11) return False; // implied 1d case
				11203	Bool isMLS = bitU == 1;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11204	IROp opMUL = mkVecMUL(size);
				11205	IROp opADDSUB = isMLS ? mkVecSUB(size) : mkVecADD(size);
				11206	IRTemp res = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11207	if (opMUL != Iop_INVALID && opADDSUB != Iop_INVALID) {
				11208	assign(res, binop(opADDSUB,
				11209	getQReg128(dd),
				11210	binop(opMUL, getQReg128(nn), getQReg128(mm))));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11211	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11212	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11213	DIP("%s %s.%s, %s.%s, %s.%s\n", isMLS ? "mls" : "mla",
				11214	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11215	return True;
				11216	}
				11217	return False;
				11218	}
				11219
				11220	if (opcode == BITS5(1,0,0,1,1)) {
				11221	/* -------- 0,xx,10011 MUL std7_std7_std7 -------- */
				11222	/* -------- 1,xx,10011 PMUL 16b_16b_16b, 8b_8b_8b -------- */
				11223	if (bitQ == 0 && size == X11) return False; // implied 1d case
				11224	Bool isPMUL = bitU == 1;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11225	const IROp opsPMUL[4]
				11226	= { Iop_PolynomialMul8x16, Iop_INVALID, Iop_INVALID, Iop_INVALID };
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11227	IROp opMUL = isPMUL ? opsPMUL[size] : mkVecMUL(size);
				11228	IRTemp res = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11229	if (opMUL != Iop_INVALID) {
				11230	assign(res, binop(opMUL, getQReg128(nn), getQReg128(mm)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11231	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11232	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11233	DIP("%s %s.%s, %s.%s, %s.%s\n", isPMUL ? "pmul" : "mul",
				11234	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11235	return True;
				11236	}
				11237	return False;
				11238	}
				11239
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	11240	if (opcode == BITS5(1,0,1,0,0) \|\| opcode == BITS5(1,0,1,0,1)) {
				11241	/* -------- 0,xx,10100 SMAXP std6_std6_std6 -------- */
				11242	/* -------- 1,xx,10100 UMAXP std6_std6_std6 -------- */
				11243	/* -------- 0,xx,10101 SMINP std6_std6_std6 -------- */
				11244	/* -------- 1,xx,10101 UMINP std6_std6_std6 -------- */
				11245	if (size == X11) return False;
				11246	Bool isU = bitU == 1;
				11247	Bool isMAX = opcode == BITS5(1,0,1,0,0);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11248	IRTemp vN = newTempV128();
				11249	IRTemp vM = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	11250	IROp op = isMAX ? (isU ? mkVecMAXU(size) : mkVecMAXS(size))
				11251	: (isU ? mkVecMINU(size) : mkVecMINS(size));
				11252	assign(vN, getQReg128(nn));
				11253	assign(vM, getQReg128(mm));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11254	IRTemp res128 = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	11255	assign(res128,
				11256	binop(op,
				11257	binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
				11258	binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
				11259	/* In the half-width case, use CatEL32x4 to extract the half-width
				11260	result from the full-width result. */
				11261	IRExpr* res
				11262	= bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
				11263	binop(Iop_CatEvenLanes32x4, mkexpr(res128),
				11264	mkexpr(res128)))
				11265	: mkexpr(res128);
				11266	putQReg128(dd, res);
				11267	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11268	const HChar* nm = isMAX ? (isU ? "umaxp" : "smaxp")
				11269	: (isU ? "uminp" : "sminp");
				11270	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				11271	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11272	return True;
				11273	}
				11274
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	11275	if (opcode == BITS5(1,0,1,1,0)) {
				11276	/* -------- 0,xx,10110 SQDMULH s and h variants only -------- */
				11277	/* -------- 1,xx,10110 SQRDMULH s and h variants only -------- */
				11278	if (size == X00 \|\| size == X11) return False;
				11279	Bool isR = bitU == 1;
				11280	IRTemp res, sat1q, sat1n, vN, vM;
				11281	res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
				11282	newTempsV128_2(&vN, &vM);
				11283	assign(vN, getQReg128(nn));
				11284	assign(vM, getQReg128(mm));
				11285	math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
				11286	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				11287	IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
				11288	updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
				11289	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11290	const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
				11291	DIP("%s %s.%s, %s.%s, %s.%s\n", nm,
				11292	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11293	return True;
				11294	}
				11295
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	11296	if (bitU == 0 && opcode == BITS5(1,0,1,1,1)) {
				11297	/* -------- 0,xx,10111 ADDP std7_std7_std7 -------- */
				11298	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11299	IRTemp vN = newTempV128();
				11300	IRTemp vM = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	11301	assign(vN, getQReg128(nn));
				11302	assign(vM, getQReg128(mm));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11303	IRTemp res128 = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	11304	assign(res128,
				11305	binop(mkVecADD(size),
				11306	binop(mkVecCATEVENLANES(size), mkexpr(vM), mkexpr(vN)),
				11307	binop(mkVecCATODDLANES(size), mkexpr(vM), mkexpr(vN))));
				11308	/* In the half-width case, use CatEL32x4 to extract the half-width
				11309	result from the full-width result. */
				11310	IRExpr* res
				11311	= bitQ == 0 ? unop(Iop_ZeroHI64ofV128,
				11312	binop(Iop_CatEvenLanes32x4, mkexpr(res128),
				11313	mkexpr(res128)))
				11314	: mkexpr(res128);
				11315	putQReg128(dd, res);
				11316	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11317	DIP("addp %s.%s, %s.%s, %s.%s\n",
				11318	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11319	return True;
				11320	}
				11321
sewardj	5cb53e7	2015-02-08 12:08:56 +0000	[diff] [blame]	11322	if (bitU == 0
				11323	&& (opcode == BITS5(1,1,0,0,0) \|\| opcode == BITS5(1,1,1,1,0))) {
				11324	/* -------- 0,0x,11000 FMAXNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11325	/* -------- 0,1x,11000 FMINNM 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11326	/* -------- 0,0x,11110 FMAX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11327	/* -------- 0,1x,11110 FMIN 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11328	/* FMAXNM, FMINNM: FIXME -- KLUDGED */
				11329	Bool isD = (size & 1) == 1;
				11330	if (bitQ == 0 && isD) return False; // implied 1d case
				11331	Bool isMIN = (size & 2) == 2;
				11332	Bool isNM = opcode == BITS5(1,1,0,0,0);
				11333	IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? X11 : X10);
				11334	IRTemp res = newTempV128();
				11335	assign(res, binop(opMXX, getQReg128(nn), getQReg128(mm)));
				11336	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				11337	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				11338	DIP("%s%s %s.%s, %s.%s, %s.%s\n",
				11339	isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
				11340	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11341	return True;
				11342	}
				11343
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11344	if (bitU == 0 && opcode == BITS5(1,1,0,0,1)) {
				11345	/* -------- 0,0x,11001 FMLA 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11346	/* -------- 0,1x,11001 FMLS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11347	Bool isD = (size & 1) == 1;
				11348	Bool isSUB = (size & 2) == 2;
				11349	if (bitQ == 0 && isD) return False; // implied 1d case
				11350	IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
				11351	IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
				11352	IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
				11353	IRTemp rm = mk_get_IR_rounding_mode();
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11354	IRTemp t1 = newTempV128();
				11355	IRTemp t2 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11356	// FIXME: double rounding; use FMA primops instead
				11357	assign(t1, triop(opMUL,
				11358	mkexpr(rm), getQReg128(nn), getQReg128(mm)));
				11359	assign(t2, triop(isSUB ? opSUB : opADD,
				11360	mkexpr(rm), getQReg128(dd), mkexpr(t1)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11361	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11362	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				11363	DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fmls" : "fmla",
				11364	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11365	return True;
				11366	}
				11367
				11368	if (bitU == 0 && opcode == BITS5(1,1,0,1,0)) {
				11369	/* -------- 0,0x,11010 FADD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11370	/* -------- 0,1x,11010 FSUB 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11371	Bool isD = (size & 1) == 1;
				11372	Bool isSUB = (size & 2) == 2;
				11373	if (bitQ == 0 && isD) return False; // implied 1d case
				11374	const IROp ops[4]
				11375	= { Iop_Add32Fx4, Iop_Add64Fx2, Iop_Sub32Fx4, Iop_Sub64Fx2 };
				11376	IROp op = ops[size];
				11377	IRTemp rm = mk_get_IR_rounding_mode();
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11378	IRTemp t1 = newTempV128();
				11379	IRTemp t2 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11380	assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11381	assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11382	putQReg128(dd, mkexpr(t2));
				11383	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				11384	DIP("%s %s.%s, %s.%s, %s.%s\n", isSUB ? "fsub" : "fadd",
				11385	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11386	return True;
				11387	}
				11388
				11389	if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,0,1,0)) {
				11390	/* -------- 1,1x,11010 FABD 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11391	Bool isD = (size & 1) == 1;
				11392	if (bitQ == 0 && isD) return False; // implied 1d case
				11393	IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
				11394	IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
				11395	IRTemp rm = mk_get_IR_rounding_mode();
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11396	IRTemp t1 = newTempV128();
				11397	IRTemp t2 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11398	// FIXME: use Abd primop instead?
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11399	assign(t1, triop(opSUB, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11400	assign(t2, unop(opABS, mkexpr(t1)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11401	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11402	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				11403	DIP("fabd %s.%s, %s.%s, %s.%s\n",
				11404	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11405	return True;
				11406	}
				11407
sewardj	ee3db33	2015-02-08 18:24:38 +0000	[diff] [blame]	11408	if (size <= X01 && opcode == BITS5(1,1,0,1,1)) {
				11409	/* -------- 0,0x,11011 FMULX 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11410	/* -------- 1,0x,11011 FMUL 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11411	// KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
				11412	Bool isD = (size & 1) == 1;
				11413	Bool isMULX = bitU == 0;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11414	if (bitQ == 0 && isD) return False; // implied 1d case
				11415	IRTemp rm = mk_get_IR_rounding_mode();
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11416	IRTemp t1 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11417	assign(t1, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
				11418	mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11419	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11420	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
sewardj	ee3db33	2015-02-08 18:24:38 +0000	[diff] [blame]	11421	DIP("%s %s.%s, %s.%s, %s.%s\n", isMULX ? "fmulx" : "fmul",
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11422	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11423	return True;
				11424	}
				11425
				11426	if (size <= X01 && opcode == BITS5(1,1,1,0,0)) {
				11427	/* -------- 0,0x,11100 FCMEQ 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11428	/* -------- 1,0x,11100 FCMGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11429	Bool isD = (size & 1) == 1;
				11430	if (bitQ == 0 && isD) return False; // implied 1d case
				11431	Bool isGE = bitU == 1;
				11432	IROp opCMP = isGE ? (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4)
				11433	: (isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11434	IRTemp t1 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11435	assign(t1, isGE ? binop(opCMP, getQReg128(mm), getQReg128(nn)) // swapd
				11436	: binop(opCMP, getQReg128(nn), getQReg128(mm)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11437	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11438	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				11439	DIP("%s %s.%s, %s.%s, %s.%s\n", isGE ? "fcmge" : "fcmeq",
				11440	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11441	return True;
				11442	}
				11443
				11444	if (bitU == 1 && size >= X10 && opcode == BITS5(1,1,1,0,0)) {
				11445	/* -------- 1,1x,11100 FCMGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11446	Bool isD = (size & 1) == 1;
				11447	if (bitQ == 0 && isD) return False; // implied 1d case
				11448	IROp opCMP = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11449	IRTemp t1 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11450	assign(t1, binop(opCMP, getQReg128(mm), getQReg128(nn))); // swapd
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11451	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11452	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				11453	DIP("%s %s.%s, %s.%s, %s.%s\n", "fcmgt",
				11454	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11455	return True;
				11456	}
				11457
				11458	if (bitU == 1 && opcode == BITS5(1,1,1,0,1)) {
				11459	/* -------- 1,0x,11101 FACGE 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11460	/* -------- 1,1x,11101 FACGT 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11461	Bool isD = (size & 1) == 1;
				11462	Bool isGT = (size & 2) == 2;
				11463	if (bitQ == 0 && isD) return False; // implied 1d case
				11464	IROp opCMP = isGT ? (isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4)
				11465	: (isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4);
				11466	IROp opABS = isD ? Iop_Abs64Fx2 : Iop_Abs32Fx4;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11467	IRTemp t1 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11468	assign(t1, binop(opCMP, unop(opABS, getQReg128(mm)),
				11469	unop(opABS, getQReg128(nn)))); // swapd
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11470	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11471	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				11472	DIP("%s %s.%s, %s.%s, %s.%s\n", isGT ? "facgt" : "facge",
				11473	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11474	return True;
				11475	}
				11476
sewardj	5cb53e7	2015-02-08 12:08:56 +0000	[diff] [blame]	11477	if (bitU == 1
				11478	&& (opcode == BITS5(1,1,0,0,0) \|\| opcode == BITS5(1,1,1,1,0))) {
				11479	/* -------- 1,0x,11000 FMAXNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11480	/* -------- 1,1x,11000 FMINNMP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11481	/* -------- 1,0x,11110 FMAXP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11482	/* -------- 1,1x,11110 FMINP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11483	/* FMAXNM, FMINNM: FIXME -- KLUDGED */
				11484	Bool isD = (size & 1) == 1;
				11485	if (bitQ == 0 && isD) return False; // implied 1d case
				11486	Bool isMIN = (size & 2) == 2;
				11487	Bool isNM = opcode == BITS5(1,1,0,0,0);
				11488	IROp opMXX = (isMIN ? mkVecMINF : mkVecMAXF)(isD ? 3 : 2);
				11489	IRTemp srcN = newTempV128();
				11490	IRTemp srcM = newTempV128();
				11491	IRTemp preL = IRTemp_INVALID;
				11492	IRTemp preR = IRTemp_INVALID;
				11493	assign(srcN, getQReg128(nn));
				11494	assign(srcM, getQReg128(mm));
				11495	math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
				11496	srcM, srcN, isD, bitQ);
				11497	putQReg128(
				11498	dd, math_MAYBE_ZERO_HI64_fromE(
				11499	bitQ,
				11500	binop(opMXX, mkexpr(preL), mkexpr(preR))));
				11501	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				11502	DIP("%s%sp %s.%s, %s.%s, %s.%s\n",
				11503	isMIN ? "fmin" : "fmax", isNM ? "nm" : "",
				11504	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11505	return True;
				11506	}
				11507
sewardj	76927e6	2014-11-17 11:21:21 +0000	[diff] [blame]	11508	if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,0,1,0)) {
				11509	/* -------- 1,0x,11010 FADDP 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11510	Bool isD = size == X01;
				11511	if (bitQ == 0 && isD) return False; // implied 1d case
				11512	IRTemp srcN = newTempV128();
				11513	IRTemp srcM = newTempV128();
				11514	IRTemp preL = IRTemp_INVALID;
				11515	IRTemp preR = IRTemp_INVALID;
				11516	assign(srcN, getQReg128(nn));
				11517	assign(srcM, getQReg128(mm));
				11518	math_REARRANGE_FOR_FLOATING_PAIRWISE(&preL, &preR,
				11519	srcM, srcN, isD, bitQ);
				11520	putQReg128(
				11521	dd, math_MAYBE_ZERO_HI64_fromE(
				11522	bitQ,
				11523	triop(mkVecADDF(isD ? 3 : 2),
				11524	mkexpr(mk_get_IR_rounding_mode()),
				11525	mkexpr(preL), mkexpr(preR))));
				11526	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				11527	DIP("%s %s.%s, %s.%s, %s.%s\n", "faddp",
				11528	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11529	return True;
				11530	}
				11531
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11532	if (bitU == 1 && size <= X01 && opcode == BITS5(1,1,1,1,1)) {
				11533	/* -------- 1,0x,11111 FDIV 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11534	Bool isD = (size & 1) == 1;
				11535	if (bitQ == 0 && isD) return False; // implied 1d case
				11536	vassert(size <= 1);
				11537	const IROp ops[2] = { Iop_Div32Fx4, Iop_Div64Fx2 };
				11538	IROp op = ops[size];
				11539	IRTemp rm = mk_get_IR_rounding_mode();
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11540	IRTemp t1 = newTempV128();
				11541	IRTemp t2 = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11542	assign(t1, triop(op, mkexpr(rm), getQReg128(nn), getQReg128(mm)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11543	assign(t2, math_MAYBE_ZERO_HI64(bitQ, t1));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11544	putQReg128(dd, mkexpr(t2));
				11545	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				11546	DIP("%s %s.%s, %s.%s, %s.%s\n", "fdiv",
				11547	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11548	return True;
				11549	}
				11550
sewardj	89cefe4	2015-02-24 12:21:01 +0000	[diff] [blame]	11551	if (bitU == 0 && opcode == BITS5(1,1,1,1,1)) {
				11552	/* -------- 0,0x,11111: FRECPS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11553	/* -------- 0,1x,11111: FRSQRTS 2d_2d_2d, 4s_4s_4s, 2s_2s_2s -------- */
				11554	Bool isSQRT = (size & 2) == 2;
				11555	Bool isD = (size & 1) == 1;
				11556	if (bitQ == 0 && isD) return False; // implied 1d case
				11557	IROp op = isSQRT ? (isD ? Iop_RSqrtStep64Fx2 : Iop_RSqrtStep32Fx4)
				11558	: (isD ? Iop_RecipStep64Fx2 : Iop_RecipStep32Fx4);
				11559	IRTemp res = newTempV128();
				11560	assign(res, binop(op, getQReg128(nn), getQReg128(mm)));
				11561	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				11562	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				11563	DIP("%s %s.%s, %s.%s, %s.%s\n", isSQRT ? "frsqrts" : "frecps",
				11564	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm), arr);
				11565	return True;
				11566	}
				11567
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11568	return False;
				11569	# undef INSN
				11570	}
				11571
				11572
				11573	static
				11574	Bool dis_AdvSIMD_two_reg_misc(/MB_OUT/DisResult* dres, UInt insn)
				11575	{
				11576	/* 31 30 29 28 23 21 16 11 9 4
				11577	0 Q U 01110 size 10000 opcode 10 n d
				11578	Decode fields: U,size,opcode
				11579	*/
				11580	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				11581	if (INSN(31,31) != 0
				11582	\|\| INSN(28,24) != BITS5(0,1,1,1,0)
				11583	\|\| INSN(21,17) != BITS5(1,0,0,0,0)
				11584	\|\| INSN(11,10) != BITS2(1,0)) {
				11585	return False;
				11586	}
				11587	UInt bitQ = INSN(30,30);
				11588	UInt bitU = INSN(29,29);
				11589	UInt size = INSN(23,22);
				11590	UInt opcode = INSN(16,12);
				11591	UInt nn = INSN(9,5);
				11592	UInt dd = INSN(4,0);
				11593	vassert(size < 4);
				11594
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11595	if (bitU == 0 && size <= X10 && opcode == BITS5(0,0,0,0,0)) {
				11596	/* -------- 0,00,00000: REV64 16b_16b, 8b_8b -------- */
				11597	/* -------- 0,01,00000: REV64 8h_8h, 4h_4h -------- */
				11598	/* -------- 0,10,00000: REV64 4s_4s, 2s_2s -------- */
				11599	const IROp iops[3] = { Iop_Reverse8sIn64_x2,
				11600	Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2 };
				11601	vassert(size <= 2);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11602	IRTemp res = newTempV128();
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11603	assign(res, unop(iops[size], getQReg128(nn)));
				11604	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				11605	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11606	DIP("%s %s.%s, %s.%s\n", "rev64",
				11607	nameQReg128(dd), arr, nameQReg128(nn), arr);
				11608	return True;
				11609	}
				11610
				11611	if (bitU == 1 && size <= X01 && opcode == BITS5(0,0,0,0,0)) {
				11612	/* -------- 1,00,00000: REV32 16b_16b, 8b_8b -------- */
				11613	/* -------- 1,01,00000: REV32 8h_8h, 4h_4h -------- */
				11614	Bool isH = size == X01;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11615	IRTemp res = newTempV128();
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11616	IROp iop = isH ? Iop_Reverse16sIn32_x4 : Iop_Reverse8sIn32_x4;
				11617	assign(res, unop(iop, getQReg128(nn)));
				11618	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				11619	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11620	DIP("%s %s.%s, %s.%s\n", "rev32",
				11621	nameQReg128(dd), arr, nameQReg128(nn), arr);
				11622	return True;
				11623	}
				11624
sewardj	715d162	2014-06-26 12:39:05 +0000	[diff] [blame]	11625	if (bitU == 0 && size == X00 && opcode == BITS5(0,0,0,0,1)) {
				11626	/* -------- 0,00,00001: REV16 16b_16b, 8b_8b -------- */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11627	IRTemp res = newTempV128();
sewardj	715d162	2014-06-26 12:39:05 +0000	[diff] [blame]	11628	assign(res, unop(Iop_Reverse8sIn16_x8, getQReg128(nn)));
				11629	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11630	const HChar* arr = nameArr_Q_SZ(bitQ, size);
sewardj	715d162	2014-06-26 12:39:05 +0000	[diff] [blame]	11631	DIP("%s %s.%s, %s.%s\n", "rev16",
				11632	nameQReg128(dd), arr, nameQReg128(nn), arr);
				11633	return True;
				11634	}
				11635
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	11636	if (opcode == BITS5(0,0,0,1,0) \|\| opcode == BITS5(0,0,1,1,0)) {
				11637	/* -------- 0,xx,00010: SADDLP std6_std6 -------- */
				11638	/* -------- 1,xx,00010: UADDLP std6_std6 -------- */
				11639	/* -------- 0,xx,00110: SADALP std6_std6 -------- */
				11640	/* -------- 1,xx,00110: UADALP std6_std6 -------- */
				11641	/* Widens, and size refers to the narrow size. */
				11642	if (size == X11) return False; // no 1d or 2d cases
				11643	Bool isU = bitU == 1;
				11644	Bool isACC = opcode == BITS5(0,0,1,1,0);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11645	IRTemp src = newTempV128();
				11646	IRTemp sum = newTempV128();
				11647	IRTemp res = newTempV128();
sewardj	a5a6b75	2014-06-30 07:33:56 +0000	[diff] [blame]	11648	assign(src, getQReg128(nn));
				11649	assign(sum,
				11650	binop(mkVecADD(size+1),
				11651	mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
				11652	isU, True/fromOdd/, size, mkexpr(src))),
				11653	mkexpr(math_WIDEN_EVEN_OR_ODD_LANES(
				11654	isU, False/!fromOdd/, size, mkexpr(src)))));
				11655	assign(res, isACC ? binop(mkVecADD(size+1), mkexpr(sum), getQReg128(dd))
				11656	: mkexpr(sum));
				11657	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				11658	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				11659	const HChar* arrWide = nameArr_Q_SZ(bitQ, size+1);
				11660	DIP("%s %s.%s, %s.%s\n", isACC ? (isU ? "uadalp" : "sadalp")
				11661	: (isU ? "uaddlp" : "saddlp"),
				11662	nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
				11663	return True;
				11664	}
				11665
sewardj	f7003bc	2014-08-18 12:28:02 +0000	[diff] [blame]	11666	if (opcode == BITS5(0,0,0,1,1)) {
				11667	/* -------- 0,xx,00011: SUQADD std7_std7 -------- */
				11668	/* -------- 1,xx,00011: USQADD std7_std7 -------- */
				11669	if (bitQ == 0 && size == X11) return False; // implied 1d case
				11670	Bool isUSQADD = bitU == 1;
				11671	/* This is switched (in the US vs SU sense) deliberately.
				11672	SUQADD corresponds to the ExtUSsatSS variants and
				11673	USQADD corresponds to the ExtSUsatUU variants.
				11674	See libvex_ir for more details. */
				11675	IROp qop = isUSQADD ? mkVecQADDEXTSUSATUU(size)
				11676	: mkVecQADDEXTUSSATSS(size);
				11677	IROp nop = mkVecADD(size);
				11678	IRTemp argL = newTempV128();
				11679	IRTemp argR = newTempV128();
				11680	IRTemp qres = newTempV128();
				11681	IRTemp nres = newTempV128();
				11682	/* Because the two arguments to the addition are implicitly
				11683	extended differently (one signedly, the other unsignedly) it is
				11684	important to present them to the primop in the correct order. */
				11685	assign(argL, getQReg128(nn));
				11686	assign(argR, getQReg128(dd));
				11687	assign(qres, math_MAYBE_ZERO_HI64_fromE(
				11688	bitQ, binop(qop, mkexpr(argL), mkexpr(argR))));
				11689	assign(nres, math_MAYBE_ZERO_HI64_fromE(
				11690	bitQ, binop(nop, mkexpr(argL), mkexpr(argR))));
				11691	putQReg128(dd, mkexpr(qres));
				11692	updateQCFLAGwithDifference(qres, nres);
				11693	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11694	DIP("%s %s.%s, %s.%s\n", isUSQADD ? "usqadd" : "suqadd",
				11695	nameQReg128(dd), arr, nameQReg128(nn), arr);
				11696	return True;
				11697	}
				11698
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	11699	if (opcode == BITS5(0,0,1,0,0)) {
				11700	/* -------- 0,xx,00100: CLS std6_std6 -------- */
				11701	/* -------- 1,xx,00100: CLZ std6_std6 -------- */
				11702	if (size == X11) return False; // no 1d or 2d cases
sewardj	a8c7b0f	2014-06-26 08:18:08 +0000	[diff] [blame]	11703	const IROp opsCLS[3] = { Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4 };
				11704	const IROp opsCLZ[3] = { Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4 };
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	11705	Bool isCLZ = bitU == 1;
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11706	IRTemp res = newTempV128();
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	11707	vassert(size <= 2);
				11708	assign(res, unop(isCLZ ? opsCLZ[size] : opsCLS[size], getQReg128(nn)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11709	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	11710	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11711	DIP("%s %s.%s, %s.%s\n", isCLZ ? "clz" : "cls",
				11712	nameQReg128(dd), arr, nameQReg128(nn), arr);
				11713	return True;
				11714	}
				11715
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	11716	if (size == X00 && opcode == BITS5(0,0,1,0,1)) {
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	11717	/* -------- 0,00,00101: CNT 16b_16b, 8b_8b -------- */
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	11718	/* -------- 1,00,00101: NOT 16b_16b, 8b_8b -------- */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11719	IRTemp res = newTempV128();
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	11720	assign(res, unop(bitU == 0 ? Iop_Cnt8x16 : Iop_NotV128, getQReg128(nn)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11721	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	715d162	2014-06-26 12:39:05 +0000	[diff] [blame]	11722	const HChar* arr = nameArr_Q_SZ(bitQ, 0);
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	11723	DIP("%s %s.%s, %s.%s\n", bitU == 0 ? "cnt" : "not",
sewardj	2b6fd5e	2014-06-19 14:21:37 +0000	[diff] [blame]	11724	nameQReg128(dd), arr, nameQReg128(nn), arr);
				11725	return True;
				11726	}
				11727
sewardj	715d162	2014-06-26 12:39:05 +0000	[diff] [blame]	11728	if (bitU == 1 && size == X01 && opcode == BITS5(0,0,1,0,1)) {
				11729	/* -------- 1,01,00101 RBIT 16b_16b, 8b_8b -------- */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11730	IRTemp res = newTempV128();
sewardj	715d162	2014-06-26 12:39:05 +0000	[diff] [blame]	11731	assign(res, unop(Iop_Reverse1sIn8_x16, getQReg128(nn)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11732	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	715d162	2014-06-26 12:39:05 +0000	[diff] [blame]	11733	const HChar* arr = nameArr_Q_SZ(bitQ, 0);
				11734	DIP("%s %s.%s, %s.%s\n", "rbit",
				11735	nameQReg128(dd), arr, nameQReg128(nn), arr);
				11736	return True;
				11737	}
				11738
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	11739	if (opcode == BITS5(0,0,1,1,1)) {
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11740	/* -------- 0,xx,00111 SQABS std7_std7 -------- */
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	11741	/* -------- 1,xx,00111 SQNEG std7_std7 -------- */
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11742	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	11743	Bool isNEG = bitU == 1;
				11744	IRTemp qresFW = IRTemp_INVALID, nresFW = IRTemp_INVALID;
				11745	(isNEG ? math_SQNEG : math_SQABS)( &qresFW, &nresFW,
				11746	getQReg128(nn), size );
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11747	IRTemp qres = newTempV128(), nres = newTempV128();
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	11748	assign(qres, math_MAYBE_ZERO_HI64(bitQ, qresFW));
				11749	assign(nres, math_MAYBE_ZERO_HI64(bitQ, nresFW));
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11750	putQReg128(dd, mkexpr(qres));
				11751	updateQCFLAGwithDifference(qres, nres);
				11752	const HChar* arr = nameArr_Q_SZ(bitQ, size);
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	11753	DIP("%s %s.%s, %s.%s\n", isNEG ? "sqneg" : "sqabs",
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11754	nameQReg128(dd), arr, nameQReg128(nn), arr);
				11755	return True;
				11756	}
				11757
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11758	if (opcode == BITS5(0,1,0,0,0)) {
				11759	/* -------- 0,xx,01000: CMGT std7_std7_#0 -------- */ // >s 0
				11760	/* -------- 1,xx,01000: CMGE std7_std7_#0 -------- */ // >=s 0
				11761	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11762	Bool isGT = bitU == 0;
				11763	IRExpr* argL = getQReg128(nn);
				11764	IRExpr* argR = mkV128(0x0000);
				11765	IRTemp res = newTempV128();
				11766	IROp opGTS = mkVecCMPGTS(size);
				11767	assign(res, isGT ? binop(opGTS, argL, argR)
				11768	: unop(Iop_NotV128, binop(opGTS, argR, argL)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11769	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11770	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11771	DIP("cm%s %s.%s, %s.%s, #0\n", isGT ? "gt" : "ge",
				11772	nameQReg128(dd), arr, nameQReg128(nn), arr);
				11773	return True;
				11774	}
				11775
				11776	if (opcode == BITS5(0,1,0,0,1)) {
				11777	/* -------- 0,xx,01001: CMEQ std7_std7_#0 -------- */ // == 0
				11778	/* -------- 1,xx,01001: CMLE std7_std7_#0 -------- */ // <=s 0
				11779	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11780	Bool isEQ = bitU == 0;
				11781	IRExpr* argL = getQReg128(nn);
				11782	IRExpr* argR = mkV128(0x0000);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11783	IRTemp res = newTempV128();
				11784	assign(res, isEQ ? binop(mkVecCMPEQ(size), argL, argR)
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11785	: unop(Iop_NotV128,
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11786	binop(mkVecCMPGTS(size), argL, argR)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11787	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11788	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11789	DIP("cm%s %s.%s, %s.%s, #0\n", isEQ ? "eq" : "le",
				11790	nameQReg128(dd), arr, nameQReg128(nn), arr);
				11791	return True;
				11792	}
				11793
				11794	if (bitU == 0 && opcode == BITS5(0,1,0,1,0)) {
				11795	/* -------- 0,xx,01010: CMLT std7_std7_#0 -------- */ // <s 0
				11796	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11797	IRExpr* argL = getQReg128(nn);
				11798	IRExpr* argR = mkV128(0x0000);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11799	IRTemp res = newTempV128();
				11800	assign(res, binop(mkVecCMPGTS(size), argR, argL));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11801	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11802	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11803	DIP("cm%s %s.%s, %s.%s, #0\n", "lt",
				11804	nameQReg128(dd), arr, nameQReg128(nn), arr);
				11805	return True;
				11806	}
				11807
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	11808	if (bitU == 0 && opcode == BITS5(0,1,0,1,1)) {
				11809	/* -------- 0,xx,01011: ABS std7_std7 -------- */
				11810	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11811	IRTemp res = newTempV128();
				11812	assign(res, unop(mkVecABS(size), getQReg128(nn)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11813	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	25523c4	2014-06-15 19:36:29 +0000	[diff] [blame]	11814	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11815	DIP("abs %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
				11816	return True;
				11817	}
				11818
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11819	if (bitU == 1 && opcode == BITS5(0,1,0,1,1)) {
				11820	/* -------- 1,xx,01011: NEG std7_std7 -------- */
				11821	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11822	IRTemp res = newTempV128();
				11823	assign(res, binop(mkVecSUB(size), mkV128(0x0000), getQReg128(nn)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11824	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11825	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				11826	DIP("neg %s.%s, %s.%s\n", nameQReg128(dd), arr, nameQReg128(nn), arr);
				11827	return True;
				11828	}
				11829
sewardj	13830dc	2015-02-07 21:09:47 +0000	[diff] [blame]	11830	UInt ix = 0; /INVALID/
				11831	if (size >= X10) {
				11832	switch (opcode) {
				11833	case BITS5(0,1,1,0,0): ix = (bitU == 1) ? 4 : 1; break;
				11834	case BITS5(0,1,1,0,1): ix = (bitU == 1) ? 5 : 2; break;
				11835	case BITS5(0,1,1,1,0): if (bitU == 0) ix = 3; break;
				11836	default: break;
				11837	}
				11838	}
				11839	if (ix > 0) {
				11840	/* -------- 0,1x,01100 FCMGT 2d_2d,4s_4s,2s_2s _#0.0 (ix 1) -------- */
				11841	/* -------- 0,1x,01101 FCMEQ 2d_2d,4s_4s,2s_2s _#0.0 (ix 2) -------- */
				11842	/* -------- 0,1x,01110 FCMLT 2d_2d,4s_4s,2s_2s _#0.0 (ix 3) -------- */
				11843	/* -------- 1,1x,01100 FCMGE 2d_2d,4s_4s,2s_2s _#0.0 (ix 4) -------- */
				11844	/* -------- 1,1x,01101 FCMLE 2d_2d,4s_4s,2s_2s _#0.0 (ix 5) -------- */
				11845	if (bitQ == 0 && size == X11) return False; // implied 1d case
				11846	Bool isD = size == X11;
				11847	IROp opCmpEQ = isD ? Iop_CmpEQ64Fx2 : Iop_CmpEQ32Fx4;
				11848	IROp opCmpLE = isD ? Iop_CmpLE64Fx2 : Iop_CmpLE32Fx4;
				11849	IROp opCmpLT = isD ? Iop_CmpLT64Fx2 : Iop_CmpLT32Fx4;
				11850	IROp opCmp = Iop_INVALID;
				11851	Bool swap = False;
				11852	const HChar* nm = "??";
				11853	switch (ix) {
				11854	case 1: nm = "fcmgt"; opCmp = opCmpLT; swap = True; break;
				11855	case 2: nm = "fcmeq"; opCmp = opCmpEQ; break;
				11856	case 3: nm = "fcmlt"; opCmp = opCmpLT; break;
				11857	case 4: nm = "fcmge"; opCmp = opCmpLE; swap = True; break;
				11858	case 5: nm = "fcmle"; opCmp = opCmpLE; break;
				11859	default: vassert(0);
				11860	}
				11861	IRExpr* zero = mkV128(0x0000);
				11862	IRTemp res = newTempV128();
				11863	assign(res, swap ? binop(opCmp, zero, getQReg128(nn))
				11864	: binop(opCmp, getQReg128(nn), zero));
				11865	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				11866	const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
				11867	DIP("%s %s.%s, %s.%s, #0.0\n", nm,
				11868	nameQReg128(dd), arr, nameQReg128(nn), arr);
				11869	return True;
				11870	}
				11871
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11872	if (size >= X10 && opcode == BITS5(0,1,1,1,1)) {
				11873	/* -------- 0,1x,01111: FABS 2d_2d, 4s_4s, 2s_2s -------- */
				11874	/* -------- 1,1x,01111: FNEG 2d_2d, 4s_4s, 2s_2s -------- */
				11875	if (bitQ == 0 && size == X11) return False; // implied 1d case
				11876	Bool isFNEG = bitU == 1;
				11877	IROp op = isFNEG ? (size == X10 ? Iop_Neg32Fx4 : Iop_Neg64Fx2)
				11878	: (size == X10 ? Iop_Abs32Fx4 : Iop_Abs64Fx2);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11879	IRTemp res = newTempV128();
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11880	assign(res, unop(op, getQReg128(nn)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	11881	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11882	const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
				11883	DIP("%s %s.%s, %s.%s\n", isFNEG ? "fneg" : "fabs",
				11884	nameQReg128(dd), arr, nameQReg128(nn), arr);
				11885	return True;
				11886	}
				11887
				11888	if (bitU == 0 && opcode == BITS5(1,0,0,1,0)) {
				11889	/* -------- 0,xx,10010: XTN{,2} -------- */
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	11890	if (size == X11) return False;
				11891	vassert(size < 3);
				11892	Bool is2 = bitQ == 1;
				11893	IROp opN = mkVecNARROWUN(size);
				11894	IRTemp resN = newTempV128();
				11895	assign(resN, unop(Iop_64UtoV128, unop(opN, getQReg128(nn))));
				11896	putLO64andZUorPutHI64(is2, dd, resN);
				11897	const HChar* nm = "xtn";
				11898	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				11899	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				11900	DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
				11901	nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
				11902	return True;
				11903	}
				11904
				11905	if (opcode == BITS5(1,0,1,0,0)
				11906	\|\| (bitU == 1 && opcode == BITS5(1,0,0,1,0))) {
				11907	/* -------- 0,xx,10100: SQXTN{,2} -------- */
				11908	/* -------- 1,xx,10100: UQXTN{,2} -------- */
				11909	/* -------- 1,xx,10010: SQXTUN{,2} -------- */
				11910	if (size == X11) return False;
				11911	vassert(size < 3);
				11912	Bool is2 = bitQ == 1;
				11913	IROp opN = Iop_INVALID;
				11914	Bool zWiden = True;
				11915	const HChar* nm = "??";
				11916	/**/ if (bitU == 0 && opcode == BITS5(1,0,1,0,0)) {
				11917	opN = mkVecQNARROWUNSS(size); nm = "sqxtn"; zWiden = False;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11918	}
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	11919	else if (bitU == 1 && opcode == BITS5(1,0,1,0,0)) {
				11920	opN = mkVecQNARROWUNUU(size); nm = "uqxtn";
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11921	}
sewardj	ecedd98	2014-08-11 14:02:47 +0000	[diff] [blame]	11922	else if (bitU == 1 && opcode == BITS5(1,0,0,1,0)) {
				11923	opN = mkVecQNARROWUNSU(size); nm = "sqxtun";
				11924	}
				11925	else vassert(0);
				11926	IRTemp src = newTempV128();
				11927	assign(src, getQReg128(nn));
				11928	IRTemp resN = newTempV128();
				11929	assign(resN, unop(Iop_64UtoV128, unop(opN, mkexpr(src))));
				11930	putLO64andZUorPutHI64(is2, dd, resN);
				11931	IRTemp resW = math_WIDEN_LO_OR_HI_LANES(zWiden, False/!fromUpperHalf/,
				11932	size, mkexpr(resN));
				11933	updateQCFLAGwithDifference(src, resW);
				11934	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				11935	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				11936	DIP("%s%s %s.%s, %s.%s\n", is2 ? "2" : "", nm,
				11937	nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
				11938	return True;
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11939	}
				11940
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	11941	if (bitU == 1 && opcode == BITS5(1,0,0,1,1)) {
				11942	/* -------- 1,xx,10011 SHLL{2} #lane-width -------- */
				11943	/* Widens, and size is the narrow size. */
				11944	if (size == X11) return False;
				11945	Bool is2 = bitQ == 1;
				11946	IROp opINT = is2 ? mkVecINTERLEAVEHI(size) : mkVecINTERLEAVELO(size);
				11947	IROp opSHL = mkVecSHLN(size+1);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	11948	IRTemp src = newTempV128();
				11949	IRTemp res = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	11950	assign(src, getQReg128(nn));
				11951	assign(res, binop(opSHL, binop(opINT, mkexpr(src), mkexpr(src)),
				11952	mkU8(8 << size)));
				11953	putQReg128(dd, mkexpr(res));
				11954	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				11955	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				11956	DIP("shll%s %s.%s, %s.%s, #%u\n", is2 ? "2" : "",
				11957	nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow, 8 << size);
				11958	return True;
				11959	}
				11960
sewardj	400d6b9	2015-03-30 09:01:51 +0000	[diff] [blame]	11961	if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,0)) {
				11962	/* -------- 0,0x,10110: FCVTN 4h/8h_4s, 2s/4s_2d -------- */
				11963	UInt nLanes = size == X00 ? 4 : 2;
				11964	IRType srcTy = size == X00 ? Ity_F32 : Ity_F64;
				11965	IROp opCvt = size == X00 ? Iop_F32toF16 : Iop_F64toF32;
				11966	IRTemp rm = mk_get_IR_rounding_mode();
				11967	IRTemp src[nLanes];
				11968	for (UInt i = 0; i < nLanes; i++) {
				11969	src[i] = newTemp(srcTy);
				11970	assign(src[i], getQRegLane(nn, i, srcTy));
				11971	}
				11972	for (UInt i = 0; i < nLanes; i++) {
				11973	putQRegLane(dd, nLanes * bitQ + i,
				11974	binop(opCvt, mkexpr(rm), mkexpr(src[i])));
				11975	}
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	11976	if (bitQ == 0) {
				11977	putQRegLane(dd, 1, mkU64(0));
				11978	}
sewardj	400d6b9	2015-03-30 09:01:51 +0000	[diff] [blame]	11979	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
				11980	const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
				11981	DIP("fcvtn%s %s.%s, %s.%s\n", bitQ ? "2" : "",
				11982	nameQReg128(dd), arrNarrow, nameQReg128(nn), arrWide);
				11983	return True;
				11984	}
				11985
				11986	if (bitU == 0 && size <= X01 && opcode == BITS5(1,0,1,1,1)) {
				11987	/* -------- 0,0x,10110: FCVTL 4s_4h/8h, 2d_2s/4s -------- */
				11988	UInt nLanes = size == X00 ? 4 : 2;
				11989	IRType srcTy = size == X00 ? Ity_F16 : Ity_F32;
				11990	IROp opCvt = size == X00 ? Iop_F16toF32 : Iop_F32toF64;
				11991	IRTemp src[nLanes];
				11992	for (UInt i = 0; i < nLanes; i++) {
				11993	src[i] = newTemp(srcTy);
				11994	assign(src[i], getQRegLane(nn, nLanes * bitQ + i, srcTy));
				11995	}
				11996	for (UInt i = 0; i < nLanes; i++) {
				11997	putQRegLane(dd, i, unop(opCvt, mkexpr(src[i])));
				11998	}
				11999	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, 1+size);
				12000	const HChar* arrWide = nameArr_Q_SZ(1, 1+size+1);
				12001	DIP("fcvtl%s %s.%s, %s.%s\n", bitQ ? "2" : "",
				12002	nameQReg128(dd), arrWide, nameQReg128(nn), arrNarrow);
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12003	return True;
				12004	}
				12005
sewardj	6a785df	2015-02-09 09:07:47 +0000	[diff] [blame]	12006	ix = 0;
				12007	if (opcode == BITS5(1,1,0,0,0) \|\| opcode == BITS5(1,1,0,0,1)) {
				12008	ix = 1 + ((((bitU & 1) << 2) \| ((size & 2) << 0)) \| ((opcode & 1) << 0));
				12009	// = 1 + bitU[0]:size[1]:opcode[0]
				12010	vassert(ix >= 1 && ix <= 8);
				12011	if (ix == 7) ix = 0;
				12012	}
				12013	if (ix > 0) {
				12014	/* -------- 0,0x,11000 FRINTN 2d_2d, 4s_4s, 2s_2s (1) -------- */
				12015	/* -------- 0,0x,11001 FRINTM 2d_2d, 4s_4s, 2s_2s (2) -------- */
				12016	/* -------- 0,1x,11000 FRINTP 2d_2d, 4s_4s, 2s_2s (3) -------- */
				12017	/* -------- 0,1x,11001 FRINTZ 2d_2d, 4s_4s, 2s_2s (4) -------- */
				12018	/* -------- 1,0x,11000 FRINTA 2d_2d, 4s_4s, 2s_2s (5) -------- */
				12019	/* -------- 1,0x,11001 FRINTX 2d_2d, 4s_4s, 2s_2s (6) -------- */
				12020	/* -------- 1,1x,11000 (apparently unassigned) (7) -------- */
				12021	/* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
				12022	/* rm plan:
				12023	FRINTN: tieeven -- !! FIXME KLUDGED !!
				12024	FRINTM: -inf
				12025	FRINTP: +inf
				12026	FRINTZ: zero
				12027	FRINTA: tieaway -- !! FIXME KLUDGED !!
				12028	FRINTX: per FPCR + "exact = TRUE"
				12029	FRINTI: per FPCR
				12030	*/
				12031	Bool isD = (size & 1) == 1;
				12032	if (bitQ == 0 && isD) return False; // implied 1d case
				12033
				12034	IRTemp irrmRM = mk_get_IR_rounding_mode();
				12035
				12036	UChar ch = '?';
				12037	IRTemp irrm = newTemp(Ity_I32);
				12038	switch (ix) {
				12039	case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
				12040	case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
				12041	case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
				12042	case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break;
				12043	// The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
				12044	case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
				12045	// I am unsure about the following, due to the "integral exact"
				12046	// description in the manual. What does it mean? (frintx, that is)
				12047	case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
				12048	case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break;
				12049	default: vassert(0);
				12050	}
				12051
sewardj	6a785df	2015-02-09 09:07:47 +0000	[diff] [blame]	12052	IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
				12053	if (isD) {
				12054	for (UInt i = 0; i < 2; i++) {
				12055	putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
				12056	getQRegLane(nn, i, Ity_F64)));
				12057	}
				12058	} else {
				12059	UInt n = bitQ==1 ? 4 : 2;
				12060	for (UInt i = 0; i < n; i++) {
				12061	putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
				12062	getQRegLane(nn, i, Ity_F32)));
				12063	}
				12064	if (bitQ == 0)
				12065	putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
				12066	}
				12067	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				12068	DIP("frint%c %s.%s, %s.%s\n", ch,
				12069	nameQReg128(dd), arr, nameQReg128(nn), arr);
				12070	return True;
				12071	}
				12072
sewardj	bc0b722	2015-03-30 18:49:38 +0000	[diff] [blame^]	12073	ix = 0; /INVALID/
				12074	switch (opcode) {
				12075	case BITS5(1,1,0,1,0): ix = ((size & 2) == 2) ? 4 : 1; break;
				12076	case BITS5(1,1,0,1,1): ix = ((size & 2) == 2) ? 5 : 2; break;
				12077	case BITS5(1,1,1,0,0): if ((size & 2) == 0) ix = 3; break;
				12078	default: break;
				12079	}
				12080	if (ix > 0) {
				12081	/* -------- 0,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
				12082	/* -------- 0,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
				12083	/* -------- 0,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
				12084	/* -------- 0,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
				12085	/* -------- 0,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
				12086	/* -------- 1,0x,11010 FCVTNS 2d_2d, 4s_4s, 2s_2s (ix 1) -------- */
				12087	/* -------- 1,0x,11011 FCVTMS 2d_2d, 4s_4s, 2s_2s (ix 2) -------- */
				12088	/* -------- 1,0x,11100 FCVTAS 2d_2d, 4s_4s, 2s_2s (ix 3) -------- */
				12089	/* -------- 1,1x,11010 FCVTPS 2d_2d, 4s_4s, 2s_2s (ix 4) -------- */
				12090	/* -------- 1,1x,11011 FCVTZS 2d_2d, 4s_4s, 2s_2s (ix 5) -------- */
				12091	Bool isD = (size & 1) == 1;
				12092	if (bitQ == 0 && isD) return False; // implied 1d case
				12093
				12094	IRRoundingMode irrm = 8; /impossible/
				12095	HChar ch = '?';
				12096	switch (ix) {
				12097	case 1: ch = 'n'; irrm = Irrm_NEAREST; break;
				12098	case 2: ch = 'm'; irrm = Irrm_NegINF; break;
				12099	case 3: ch = 'a'; irrm = Irrm_NEAREST; break; /* kludge? */
				12100	case 4: ch = 'p'; irrm = Irrm_PosINF; break;
				12101	case 5: ch = 'z'; irrm = Irrm_ZERO; break;
				12102	default: vassert(0);
				12103	}
				12104	IROp cvt = Iop_INVALID;
				12105	if (bitU == 1) {
				12106	cvt = isD ? Iop_F64toI64U : Iop_F32toI32U;
				12107	} else {
				12108	cvt = isD ? Iop_F64toI64S : Iop_F32toI32S;
				12109	}
				12110	if (isD) {
				12111	for (UInt i = 0; i < 2; i++) {
				12112	putQRegLane(dd, i, binop(cvt, mkU32(irrm),
				12113	getQRegLane(nn, i, Ity_F64)));
				12114	}
				12115	} else {
				12116	UInt n = bitQ==1 ? 4 : 2;
				12117	for (UInt i = 0; i < n; i++) {
				12118	putQRegLane(dd, i, binop(cvt, mkU32(irrm),
				12119	getQRegLane(nn, i, Ity_F32)));
				12120	}
				12121	if (bitQ == 0)
				12122	putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
				12123	}
				12124	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				12125	DIP("fcvt%c%c %s.%s, %s.%s\n", ch, bitU == 1 ? 'u' : 's',
				12126	nameQReg128(dd), arr, nameQReg128(nn), arr);
				12127	return True;
				12128	}
				12129
sewardj	fc261d9	2014-08-24 20:36:14 +0000	[diff] [blame]	12130	if (size == X10 && opcode == BITS5(1,1,1,0,0)) {
				12131	/* -------- 0,10,11100: URECPE 4s_4s, 2s_2s -------- */
				12132	/* -------- 1,10,11100: URSQRTE 4s_4s, 2s_2s -------- */
				12133	Bool isREC = bitU == 0;
				12134	IROp op = isREC ? Iop_RecipEst32Ux4 : Iop_RSqrtEst32Ux4;
				12135	IRTemp res = newTempV128();
				12136	assign(res, unop(op, getQReg128(nn)));
				12137	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				12138	const HChar* nm = isREC ? "urecpe" : "ursqrte";
				12139	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				12140	DIP("%s %s.%s, %s.%s\n", nm,
				12141	nameQReg128(dd), arr, nameQReg128(nn), arr);
				12142	return True;
				12143	}
				12144
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	12145	if (size <= X01 && opcode == BITS5(1,1,1,0,1)) {
				12146	/* -------- 0,0x,11101: SCVTF -------- */
				12147	/* -------- 1,0x,11101: UCVTF -------- */
				12148	/* 31 28 22 21 15 9 4
				12149	0q0 01110 0 sz 1 00001 110110 n d SCVTF Vd, Vn
				12150	0q1 01110 0 sz 1 00001 110110 n d UCVTF Vd, Vn
				12151	with laneage:
				12152	case sz:Q of 00 -> 2S, zero upper, 01 -> 4S, 10 -> illegal, 11 -> 2D
				12153	*/
				12154	Bool isQ = bitQ == 1;
				12155	Bool isU = bitU == 1;
				12156	Bool isF64 = (size & 1) == 1;
				12157	if (isQ \|\| !isF64) {
				12158	IRType tyF = Ity_INVALID, tyI = Ity_INVALID;
				12159	UInt nLanes = 0;
				12160	Bool zeroHI = False;
				12161	const HChar* arrSpec = NULL;
				12162	Bool ok = getLaneInfo_Q_SZ(&tyI, &tyF, &nLanes, &zeroHI, &arrSpec,
				12163	isQ, isF64 );
				12164	IROp iop = isU ? (isF64 ? Iop_I64UtoF64 : Iop_I32UtoF32)
				12165	: (isF64 ? Iop_I64StoF64 : Iop_I32StoF32);
				12166	IRTemp rm = mk_get_IR_rounding_mode();
				12167	UInt i;
				12168	vassert(ok); /* the 'if' above should ensure this */
				12169	for (i = 0; i < nLanes; i++) {
				12170	putQRegLane(dd, i,
				12171	binop(iop, mkexpr(rm), getQRegLane(nn, i, tyI)));
				12172	}
				12173	if (zeroHI) {
				12174	putQRegLane(dd, 1, mkU64(0));
				12175	}
				12176	DIP("%ccvtf %s.%s, %s.%s\n", isU ? 'u' : 's',
				12177	nameQReg128(dd), arrSpec, nameQReg128(nn), arrSpec);
				12178	return True;
				12179	}
				12180	/* else fall through */
				12181	}
				12182
sewardj	89cefe4	2015-02-24 12:21:01 +0000	[diff] [blame]	12183	if (size >= X10 && opcode == BITS5(1,1,1,0,1)) {
				12184	/* -------- 0,1x,11101: FRECPE 2d_2d, 4s_4s, 2s_2s -------- */
				12185	/* -------- 1,1x,11101: FRSQRTE 2d_2d, 4s_4s, 2s_2s -------- */
				12186	Bool isSQRT = bitU == 1;
				12187	Bool isD = (size & 1) == 1;
				12188	IROp op = isSQRT ? (isD ? Iop_RSqrtEst64Fx2 : Iop_RSqrtEst32Fx4)
				12189	: (isD ? Iop_RecipEst64Fx2 : Iop_RecipEst32Fx4);
				12190	if (bitQ == 0 && isD) return False; // implied 1d case
				12191	IRTemp resV = newTempV128();
				12192	assign(resV, unop(op, getQReg128(nn)));
				12193	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, resV));
				12194	const HChar* arr = bitQ == 0 ? "2s" : (size == X11 ? "2d" : "4s");
				12195	DIP("%s %s.%s, %s.%s\n", isSQRT ? "frsqrte" : "frecpe",
				12196	nameQReg128(dd), arr, nameQReg128(nn), arr);
				12197	return True;
				12198	}
				12199
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12200	return False;
				12201	# undef INSN
				12202	}
				12203
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	12204
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12205	static
				12206	Bool dis_AdvSIMD_vector_x_indexed_elem(/MB_OUT/DisResult* dres, UInt insn)
				12207	{
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	12208	/* 31 28 23 21 20 19 15 11 9 4
				12209	0 Q U 01111 size L M m opcode H 0 n d
				12210	Decode fields are: u,size,opcode
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	12211	M is really part of the mm register number. Individual
				12212	cases need to inspect L and H though.
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	12213	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12214	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	12215	if (INSN(31,31) != 0
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	12216	\|\| INSN(28,24) != BITS5(0,1,1,1,1) \|\| INSN(10,10) !=0) {
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	12217	return False;
				12218	}
				12219	UInt bitQ = INSN(30,30);
				12220	UInt bitU = INSN(29,29);
				12221	UInt size = INSN(23,22);
				12222	UInt bitL = INSN(21,21);
				12223	UInt bitM = INSN(20,20);
				12224	UInt mmLO4 = INSN(19,16);
				12225	UInt opcode = INSN(15,12);
				12226	UInt bitH = INSN(11,11);
				12227	UInt nn = INSN(9,5);
				12228	UInt dd = INSN(4,0);
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	12229	vassert(size < 4);
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	12230	vassert(bitH < 2 && bitM < 2 && bitL < 2);
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	12231
sewardj	d0e5e53	2014-10-30 16:36:53 +0000	[diff] [blame]	12232	if (bitU == 0 && size >= X10
				12233	&& (opcode == BITS4(0,0,0,1) \|\| opcode == BITS4(0,1,0,1))) {
				12234	/* -------- 0,1x,0001 FMLA 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
				12235	/* -------- 0,1x,0101 FMLS 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
				12236	if (bitQ == 0 && size == X11) return False; // implied 1d case
				12237	Bool isD = (size & 1) == 1;
				12238	Bool isSUB = opcode == BITS4(0,1,0,1);
				12239	UInt index;
				12240	if (!isD) index = (bitH << 1) \| bitL;
				12241	else if (isD && bitL == 0) index = bitH;
				12242	else return False; // sz:L == x11 => unallocated encoding
				12243	vassert(index < (isD ? 2 : 4));
				12244	IRType ity = isD ? Ity_F64 : Ity_F32;
				12245	IRTemp elem = newTemp(ity);
				12246	UInt mm = (bitM << 4) \| mmLO4;
				12247	assign(elem, getQRegLane(mm, index, ity));
				12248	IRTemp dupd = math_DUP_TO_V128(elem, ity);
				12249	IROp opADD = isD ? Iop_Add64Fx2 : Iop_Add32Fx4;
				12250	IROp opSUB = isD ? Iop_Sub64Fx2 : Iop_Sub32Fx4;
				12251	IROp opMUL = isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4;
				12252	IRTemp rm = mk_get_IR_rounding_mode();
				12253	IRTemp t1 = newTempV128();
				12254	IRTemp t2 = newTempV128();
				12255	// FIXME: double rounding; use FMA primops instead
				12256	assign(t1, triop(opMUL, mkexpr(rm), getQReg128(nn), mkexpr(dupd)));
				12257	assign(t2, triop(isSUB ? opSUB : opADD,
				12258	mkexpr(rm), getQReg128(dd), mkexpr(t1)));
				12259	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, t2));
				12260	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
				12261	DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isSUB ? "fmls" : "fmla",
				12262	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(mm),
				12263	isD ? 'd' : 's', index);
				12264	return True;
				12265	}
				12266
sewardj	ee3db33	2015-02-08 18:24:38 +0000	[diff] [blame]	12267	if (size >= X10 && opcode == BITS4(1,0,0,1)) {
				12268	/* -------- 0,1x,1001 FMUL 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
				12269	/* -------- 1,1x,1001 FMULX 2d_2d_d[], 4s_4s_s[], 2s_2s_s[] -------- */
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	12270	if (bitQ == 0 && size == X11) return False; // implied 1d case
sewardj	ee3db33	2015-02-08 18:24:38 +0000	[diff] [blame]	12271	Bool isD = (size & 1) == 1;
				12272	Bool isMULX = bitU == 1;
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	12273	UInt index;
				12274	if (!isD) index = (bitH << 1) \| bitL;
				12275	else if (isD && bitL == 0) index = bitH;
				12276	else return False; // sz:L == x11 => unallocated encoding
				12277	vassert(index < (isD ? 2 : 4));
				12278	IRType ity = isD ? Ity_F64 : Ity_F32;
				12279	IRTemp elem = newTemp(ity);
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	12280	UInt mm = (bitM << 4) \| mmLO4;
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	12281	assign(elem, getQRegLane(mm, index, ity));
				12282	IRTemp dupd = math_DUP_TO_V128(elem, ity);
sewardj	ee3db33	2015-02-08 18:24:38 +0000	[diff] [blame]	12283	// KLUDGE: FMULX is treated the same way as FMUL. That can't be right.
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	12284	IRTemp res = newTempV128();
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	12285	assign(res, triop(isD ? Iop_Mul64Fx2 : Iop_Mul32Fx4,
				12286	mkexpr(mk_get_IR_rounding_mode()),
				12287	getQReg128(nn), mkexpr(dupd)));
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	12288	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	12289	const HChar* arr = bitQ == 0 ? "2s" : (isD ? "2d" : "4s");
sewardj	ee3db33	2015-02-08 18:24:38 +0000	[diff] [blame]	12290	DIP("%s %s.%s, %s.%s, %s.%c[%u]\n",
				12291	isMULX ? "fmulx" : "fmul", nameQReg128(dd), arr,
sewardj	85fbb02	2014-06-12 13:16:01 +0000	[diff] [blame]	12292	nameQReg128(nn), arr, nameQReg128(mm), isD ? 'd' : 's', index);
				12293	return True;
				12294	}
				12295
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	12296	if ((bitU == 1 && (opcode == BITS4(0,0,0,0) \|\| opcode == BITS4(0,1,0,0)))
				12297	\|\| (bitU == 0 && opcode == BITS4(1,0,0,0))) {
				12298	/* -------- 1,xx,0000 MLA s/h variants only -------- */
				12299	/* -------- 1,xx,0100 MLS s/h variants only -------- */
				12300	/* -------- 0,xx,1000 MUL s/h variants only -------- */
				12301	Bool isMLA = opcode == BITS4(0,0,0,0);
				12302	Bool isMLS = opcode == BITS4(0,1,0,0);
				12303	UInt mm = 32; // invalid
				12304	UInt ix = 16; // invalid
				12305	switch (size) {
				12306	case X00:
				12307	return False; // b case is not allowed
				12308	case X01:
				12309	mm = mmLO4; ix = (bitH << 2) \| (bitL << 1) \| (bitM << 0); break;
				12310	case X10:
				12311	mm = (bitM << 4) \| mmLO4; ix = (bitH << 1) \| (bitL << 0); break;
				12312	case X11:
				12313	return False; // d case is not allowed
				12314	default:
				12315	vassert(0);
				12316	}
				12317	vassert(mm < 32 && ix < 16);
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	12318	IROp opMUL = mkVecMUL(size);
				12319	IROp opADD = mkVecADD(size);
				12320	IROp opSUB = mkVecSUB(size);
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	12321	HChar ch = size == X01 ? 'h' : 's';
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	12322	IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	12323	IRTemp vecD = newTempV128();
				12324	IRTemp vecN = newTempV128();
				12325	IRTemp res = newTempV128();
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	12326	assign(vecD, getQReg128(dd));
				12327	assign(vecN, getQReg128(nn));
				12328	IRExpr* prod = binop(opMUL, mkexpr(vecN), mkexpr(vecM));
				12329	if (isMLA \|\| isMLS) {
				12330	assign(res, binop(isMLA ? opADD : opSUB, mkexpr(vecD), prod));
				12331	} else {
				12332	assign(res, prod);
				12333	}
sewardj	df9d6d5	2014-06-27 10:43:22 +0000	[diff] [blame]	12334	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
sewardj	787a67f	2014-06-23 09:09:41 +0000	[diff] [blame]	12335	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				12336	DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", isMLA ? "mla"
				12337	: (isMLS ? "mls" : "mul"),
				12338	nameQReg128(dd), arr,
				12339	nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
				12340	return True;
				12341	}
				12342
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	12343	if (opcode == BITS4(1,0,1,0)
				12344	\|\| opcode == BITS4(0,0,1,0) \|\| opcode == BITS4(0,1,1,0)) {
				12345	/* -------- 0,xx,1010 SMULL s/h variants only -------- */ // 0 (ks)
				12346	/* -------- 1,xx,1010 UMULL s/h variants only -------- */ // 0
				12347	/* -------- 0,xx,0010 SMLAL s/h variants only -------- */ // 1
				12348	/* -------- 1,xx,0010 UMLAL s/h variants only -------- */ // 1
				12349	/* -------- 0,xx,0110 SMLSL s/h variants only -------- */ // 2
				12350	/* -------- 1,xx,0110 SMLSL s/h variants only -------- */ // 2
				12351	/* Widens, and size refers to the narrowed lanes. */
				12352	UInt ks = 3;
				12353	switch (opcode) {
				12354	case BITS4(1,0,1,0): ks = 0; break;
				12355	case BITS4(0,0,1,0): ks = 1; break;
				12356	case BITS4(0,1,1,0): ks = 2; break;
				12357	default: vassert(0);
				12358	}
				12359	vassert(ks >= 0 && ks <= 2);
				12360	Bool isU = bitU == 1;
				12361	Bool is2 = bitQ == 1;
				12362	UInt mm = 32; // invalid
				12363	UInt ix = 16; // invalid
				12364	switch (size) {
				12365	case X00:
				12366	return False; // h_b_b[] case is not allowed
				12367	case X01:
				12368	mm = mmLO4; ix = (bitH << 2) \| (bitL << 1) \| (bitM << 0); break;
				12369	case X10:
				12370	mm = (bitM << 4) \| mmLO4; ix = (bitH << 1) \| (bitL << 0); break;
				12371	case X11:
				12372	return False; // q_d_d[] case is not allowed
				12373	default:
				12374	vassert(0);
				12375	}
				12376	vassert(mm < 32 && ix < 16);
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	12377	IRTemp vecN = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	12378	IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
sewardj	8e91fd4	2014-07-11 12:05:47 +0000	[diff] [blame]	12379	IRTemp vecD = newTempV128();
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	12380	assign(vecN, getQReg128(nn));
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	12381	assign(vecD, getQReg128(dd));
				12382	IRTemp res = IRTemp_INVALID;
				12383	math_MULL_ACC(&res, is2, isU, size, "mas"[ks],
				12384	vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
sewardj	487559e	2014-07-10 14:22:45 +0000	[diff] [blame]	12385	putQReg128(dd, mkexpr(res));
				12386	const HChar* nm = ks == 0 ? "mull" : (ks == 1 ? "mlal" : "mlsl");
				12387	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				12388	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				12389	HChar ch = size == X01 ? 'h' : 's';
				12390	DIP("%c%s%s %s.%s, %s.%s, %s.%c[%u]\n",
				12391	isU ? 'u' : 's', nm, is2 ? "2" : "",
				12392	nameQReg128(dd), arrWide,
				12393	nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
				12394	return True;
				12395	}
				12396
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	12397	if (bitU == 0
				12398	&& (opcode == BITS4(1,0,1,1)
				12399	\|\| opcode == BITS4(0,0,1,1) \|\| opcode == BITS4(0,1,1,1))) {
				12400	/* -------- 0,xx,1011 SQDMULL s/h variants only -------- */ // 0 (ks)
				12401	/* -------- 0,xx,0011 SQDMLAL s/h variants only -------- */ // 1
				12402	/* -------- 0,xx,0111 SQDMLSL s/h variants only -------- */ // 2
				12403	/* Widens, and size refers to the narrowed lanes. */
				12404	UInt ks = 3;
				12405	switch (opcode) {
				12406	case BITS4(1,0,1,1): ks = 0; break;
				12407	case BITS4(0,0,1,1): ks = 1; break;
				12408	case BITS4(0,1,1,1): ks = 2; break;
				12409	default: vassert(0);
				12410	}
				12411	vassert(ks >= 0 && ks <= 2);
				12412	Bool is2 = bitQ == 1;
				12413	UInt mm = 32; // invalid
				12414	UInt ix = 16; // invalid
				12415	switch (size) {
				12416	case X00:
				12417	return False; // h_b_b[] case is not allowed
				12418	case X01:
				12419	mm = mmLO4; ix = (bitH << 2) \| (bitL << 1) \| (bitM << 0); break;
				12420	case X10:
				12421	mm = (bitM << 4) \| mmLO4; ix = (bitH << 1) \| (bitL << 0); break;
				12422	case X11:
				12423	return False; // q_d_d[] case is not allowed
				12424	default:
				12425	vassert(0);
				12426	}
				12427	vassert(mm < 32 && ix < 16);
				12428	IRTemp vecN, vecD, res, sat1q, sat1n, sat2q, sat2n;
				12429	vecN = vecD = res = sat1q = sat1n = sat2q = sat2n = IRTemp_INVALID;
				12430	newTempsV128_2(&vecN, &vecD);
				12431	assign(vecN, getQReg128(nn));
				12432	IRTemp vecM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
				12433	assign(vecD, getQReg128(dd));
				12434	math_SQDMULL_ACC(&res, &sat1q, &sat1n, &sat2q, &sat2n,
				12435	is2, size, "mas"[ks],
				12436	vecN, vecM, ks == 0 ? IRTemp_INVALID : vecD);
				12437	putQReg128(dd, mkexpr(res));
				12438	vassert(sat1q != IRTemp_INVALID && sat1n != IRTemp_INVALID);
				12439	updateQCFLAGwithDifference(sat1q, sat1n);
				12440	if (sat2q != IRTemp_INVALID \|\| sat2n != IRTemp_INVALID) {
				12441	updateQCFLAGwithDifference(sat2q, sat2n);
				12442	}
sewardj	54ffa1d	2014-07-22 09:27:49 +0000	[diff] [blame]	12443	const HChar* nm = ks == 0 ? "sqdmull"
sewardj	51d012a	2014-07-21 09:19:50 +0000	[diff] [blame]	12444	: (ks == 1 ? "sqdmlal" : "sqdmlsl");
				12445	const HChar* arrNarrow = nameArr_Q_SZ(bitQ, size);
				12446	const HChar* arrWide = nameArr_Q_SZ(1, size+1);
				12447	HChar ch = size == X01 ? 'h' : 's';
				12448	DIP("%s%s %s.%s, %s.%s, %s.%c[%u]\n",
				12449	nm, is2 ? "2" : "",
				12450	nameQReg128(dd), arrWide,
				12451	nameQReg128(nn), arrNarrow, nameQReg128(dd), ch, ix);
				12452	return True;
				12453	}
				12454
sewardj	257e99f	2014-08-03 12:45:19 +0000	[diff] [blame]	12455	if (opcode == BITS4(1,1,0,0) \|\| opcode == BITS4(1,1,0,1)) {
				12456	/* -------- 0,xx,1100 SQDMULH s and h variants only -------- */
				12457	/* -------- 0,xx,1101 SQRDMULH s and h variants only -------- */
				12458	UInt mm = 32; // invalid
				12459	UInt ix = 16; // invalid
				12460	switch (size) {
				12461	case X00:
				12462	return False; // b case is not allowed
				12463	case X01:
				12464	mm = mmLO4; ix = (bitH << 2) \| (bitL << 1) \| (bitM << 0); break;
				12465	case X10:
				12466	mm = (bitM << 4) \| mmLO4; ix = (bitH << 1) \| (bitL << 0); break;
				12467	case X11:
				12468	return False; // q case is not allowed
				12469	default:
				12470	vassert(0);
				12471	}
				12472	vassert(mm < 32 && ix < 16);
				12473	Bool isR = opcode == BITS4(1,1,0,1);
				12474	IRTemp res, sat1q, sat1n, vN, vM;
				12475	res = sat1q = sat1n = vN = vM = IRTemp_INVALID;
				12476	vN = newTempV128();
				12477	assign(vN, getQReg128(nn));
				12478	vM = math_DUP_VEC_ELEM(getQReg128(mm), size, ix);
				12479	math_SQDMULH(&res, &sat1q, &sat1n, isR, size, vN, vM);
				12480	putQReg128(dd, math_MAYBE_ZERO_HI64(bitQ, res));
				12481	IROp opZHI = bitQ == 0 ? Iop_ZeroHI64ofV128 : Iop_INVALID;
				12482	updateQCFLAGwithDifferenceZHI(sat1q, sat1n, opZHI);
				12483	const HChar* nm = isR ? "sqrdmulh" : "sqdmulh";
				12484	const HChar* arr = nameArr_Q_SZ(bitQ, size);
				12485	HChar ch = size == X01 ? 'h' : 's';
				12486	DIP("%s %s.%s, %s.%s, %s.%c[%u]\n", nm,
				12487	nameQReg128(dd), arr, nameQReg128(nn), arr, nameQReg128(dd), ch, ix);
				12488	return True;
				12489	}
				12490
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12491	return False;
				12492	# undef INSN
				12493	}
				12494
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	12495
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12496	static
				12497	Bool dis_AdvSIMD_crypto_aes(/MB_OUT/DisResult* dres, UInt insn)
				12498	{
				12499	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				12500	return False;
				12501	# undef INSN
				12502	}
				12503
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	12504
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12505	static
				12506	Bool dis_AdvSIMD_crypto_three_reg_sha(/MB_OUT/DisResult* dres, UInt insn)
				12507	{
				12508	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				12509	return False;
				12510	# undef INSN
				12511	}
				12512
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	12513
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12514	static
				12515	Bool dis_AdvSIMD_crypto_two_reg_sha(/MB_OUT/DisResult* dres, UInt insn)
				12516	{
				12517	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				12518	return False;
				12519	# undef INSN
				12520	}
				12521
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	12522
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12523	static
				12524	Bool dis_AdvSIMD_fp_compare(/MB_OUT/DisResult* dres, UInt insn)
				12525	{
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	12526	/* 31 28 23 21 20 15 13 9 4
				12527	000 11110 ty 1 m op 1000 n opcode2
				12528	The first 3 bits are really "M 0 S", but M and S are always zero.
				12529	Decode fields are: ty,op,opcode2
				12530	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12531	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	12532	if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
				12533	\|\| INSN(21,21) != 1 \|\| INSN(13,10) != BITS4(1,0,0,0)) {
				12534	return False;
				12535	}
				12536	UInt ty = INSN(23,22);
				12537	UInt mm = INSN(20,16);
				12538	UInt op = INSN(15,14);
				12539	UInt nn = INSN(9,5);
				12540	UInt opcode2 = INSN(4,0);
				12541	vassert(ty < 4);
				12542
				12543	if (ty <= X01 && op == X00
				12544	&& (opcode2 & BITS5(0,0,1,1,1)) == BITS5(0,0,0,0,0)) {
				12545	/* -------- 0x,00,00000 FCMP d_d, s_s -------- */
				12546	/* -------- 0x,00,01000 FCMP d_#0, s_#0 -------- */
				12547	/* -------- 0x,00,10000 FCMPE d_d, s_s -------- */
				12548	/* -------- 0x,00,11000 FCMPE d_#0, s_#0 -------- */
				12549	/* 31 23 20 15 9 4
				12550	000 11110 01 1 m 00 1000 n 10 000 FCMPE Dn, Dm
				12551	000 11110 01 1 00000 00 1000 n 11 000 FCMPE Dn, #0.0
				12552	000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm
				12553	000 11110 01 1 00000 00 1000 n 01 000 FCMP Dn, #0.0
				12554
				12555	000 11110 00 1 m 00 1000 n 10 000 FCMPE Sn, Sm
				12556	000 11110 00 1 00000 00 1000 n 11 000 FCMPE Sn, #0.0
				12557	000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm
				12558	000 11110 00 1 00000 00 1000 n 01 000 FCMP Sn, #0.0
				12559
				12560	FCMPE generates Invalid Operation exn if either arg is any kind
				12561	of NaN. FCMP generates Invalid Operation exn if either arg is a
				12562	signalling NaN. We ignore this detail here and produce the same
				12563	IR for both.
				12564	*/
				12565	Bool isD = (ty & 1) == 1;
				12566	Bool isCMPE = (opcode2 & 16) == 16;
				12567	Bool cmpZero = (opcode2 & 8) == 8;
				12568	IRType ity = isD ? Ity_F64 : Ity_F32;
				12569	Bool valid = True;
				12570	if (cmpZero && mm != 0) valid = False;
				12571	if (valid) {
				12572	IRTemp argL = newTemp(ity);
				12573	IRTemp argR = newTemp(ity);
				12574	IRTemp irRes = newTemp(Ity_I32);
				12575	assign(argL, getQRegLO(nn, ity));
				12576	assign(argR,
				12577	cmpZero
				12578	? (IRExpr_Const(isD ? IRConst_F64i(0) : IRConst_F32i(0)))
				12579	: getQRegLO(mm, ity));
				12580	assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
				12581	mkexpr(argL), mkexpr(argR)));
				12582	IRTemp nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
				12583	IRTemp nzcv_28x0 = newTemp(Ity_I64);
				12584	assign(nzcv_28x0, binop(Iop_Shl64, mkexpr(nzcv), mkU8(28)));
				12585	setFlags_COPY(nzcv_28x0);
				12586	DIP("fcmp%s %s, %s\n", isCMPE ? "e" : "", nameQRegLO(nn, ity),
				12587	cmpZero ? "#0.0" : nameQRegLO(mm, ity));
				12588	return True;
				12589	}
				12590	return False;
				12591	}
				12592
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12593	return False;
				12594	# undef INSN
				12595	}
				12596
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	12597
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12598	static
				12599	Bool dis_AdvSIMD_fp_conditional_compare(/MB_OUT/DisResult* dres, UInt insn)
				12600	{
sewardj	13830dc	2015-02-07 21:09:47 +0000	[diff] [blame]	12601	/* 31 28 23 21 20 15 11 9 4 3
				12602	000 11110 ty 1 m cond 01 n op nzcv
				12603	The first 3 bits are really "M 0 S", but M and S are always zero.
				12604	Decode fields are: ty,op
				12605	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12606	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	13830dc	2015-02-07 21:09:47 +0000	[diff] [blame]	12607	if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
				12608	\|\| INSN(21,21) != 1 \|\| INSN(11,10) != BITS2(0,1)) {
				12609	return False;
				12610	}
				12611	UInt ty = INSN(23,22);
				12612	UInt mm = INSN(20,16);
				12613	UInt cond = INSN(15,12);
				12614	UInt nn = INSN(9,5);
				12615	UInt op = INSN(4,4);
				12616	UInt nzcv = INSN(3,0);
				12617	vassert(ty < 4 && op <= 1);
				12618
				12619	if (ty <= BITS2(0,1)) {
				12620	/* -------- 00,0 FCCMP s_s -------- */
				12621	/* -------- 00,1 FCCMPE s_s -------- */
				12622	/* -------- 01,0 FCCMP d_d -------- */
				12623	/* -------- 01,1 FCCMPE d_d -------- */
				12624
				12625	/* FCCMPE generates Invalid Operation exn if either arg is any kind
				12626	of NaN. FCCMP generates Invalid Operation exn if either arg is a
				12627	signalling NaN. We ignore this detail here and produce the same
				12628	IR for both.
				12629	*/
				12630	Bool isD = (ty & 1) == 1;
				12631	Bool isCMPE = op == 1;
				12632	IRType ity = isD ? Ity_F64 : Ity_F32;
				12633	IRTemp argL = newTemp(ity);
				12634	IRTemp argR = newTemp(ity);
				12635	IRTemp irRes = newTemp(Ity_I32);
				12636	assign(argL, getQRegLO(nn, ity));
				12637	assign(argR, getQRegLO(mm, ity));
				12638	assign(irRes, binop(isD ? Iop_CmpF64 : Iop_CmpF32,
				12639	mkexpr(argL), mkexpr(argR)));
				12640	IRTemp condT = newTemp(Ity_I1);
				12641	assign(condT, unop(Iop_64to1, mk_arm64g_calculate_condition(cond)));
				12642	IRTemp nzcvT = mk_convert_IRCmpF64Result_to_NZCV(irRes);
				12643
				12644	IRTemp nzcvT_28x0 = newTemp(Ity_I64);
				12645	assign(nzcvT_28x0, binop(Iop_Shl64, mkexpr(nzcvT), mkU8(28)));
				12646
				12647	IRExpr* nzcvF_28x0 = mkU64(((ULong)nzcv) << 28);
				12648
				12649	IRTemp nzcv_28x0 = newTemp(Ity_I64);
				12650	assign(nzcv_28x0, IRExpr_ITE(mkexpr(condT),
				12651	mkexpr(nzcvT_28x0), nzcvF_28x0));
				12652	setFlags_COPY(nzcv_28x0);
				12653	DIP("fccmp%s %s, %s, #%u, %s\n", isCMPE ? "e" : "",
				12654	nameQRegLO(nn, ity), nameQRegLO(mm, ity), nzcv, nameCC(cond));
				12655	return True;
				12656	}
				12657
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12658	return False;
				12659	# undef INSN
				12660	}
				12661
sewardj	fc83d2c	2014-06-12 10:15:46 +0000	[diff] [blame]	12662
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12663	static
				12664	Bool dis_AdvSIMD_fp_conditional_select(/MB_OUT/DisResult* dres, UInt insn)
				12665	{
sewardj	e23ec11	2014-11-15 16:07:14 +0000	[diff] [blame]	12666	/* 31 23 21 20 15 11 9 5
				12667	000 11110 ty 1 m cond 11 n d
				12668	The first 3 bits are really "M 0 S", but M and S are always zero.
				12669	Decode fields: ty
				12670	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12671	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	e23ec11	2014-11-15 16:07:14 +0000	[diff] [blame]	12672	if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0) \|\| INSN(21,21) != 1
				12673	\|\| INSN(11,10) != BITS2(1,1)) {
				12674	return False;
				12675	}
				12676	UInt ty = INSN(23,22);
				12677	UInt mm = INSN(20,16);
				12678	UInt cond = INSN(15,12);
				12679	UInt nn = INSN(9,5);
				12680	UInt dd = INSN(4,0);
				12681	if (ty <= X01) {
				12682	/* -------- 00: FCSEL s_s -------- */
				12683	/* -------- 00: FCSEL d_d -------- */
				12684	IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
				12685	IRTemp srcT = newTemp(ity);
				12686	IRTemp srcF = newTemp(ity);
				12687	IRTemp res = newTemp(ity);
				12688	assign(srcT, getQRegLO(nn, ity));
				12689	assign(srcF, getQRegLO(mm, ity));
				12690	assign(res, IRExpr_ITE(
				12691	unop(Iop_64to1, mk_arm64g_calculate_condition(cond)),
				12692	mkexpr(srcT), mkexpr(srcF)));
				12693	putQReg128(dd, mkV128(0x0000));
				12694	putQRegLO(dd, mkexpr(res));
				12695	DIP("fcsel %s, %s, %s, %s\n",
				12696	nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity),
				12697	nameCC(cond));
				12698	return True;
				12699	}
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12700	return False;
				12701	# undef INSN
				12702	}
				12703
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	12704
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12705	static
				12706	Bool dis_AdvSIMD_fp_data_proc_1_source(/MB_OUT/DisResult* dres, UInt insn)
				12707	{
				12708	/* 31 28 23 21 20 14 9 4
				12709	000 11110 ty 1 opcode 10000 n d
				12710	The first 3 bits are really "M 0 S", but M and S are always zero.
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	12711	Decode fields: ty,opcode
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12712	*/
				12713	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				12714	if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
				12715	\|\| INSN(21,21) != 1 \|\| INSN(14,10) != BITS5(1,0,0,0,0)) {
				12716	return False;
				12717	}
				12718	UInt ty = INSN(23,22);
				12719	UInt opcode = INSN(20,15);
				12720	UInt nn = INSN(9,5);
				12721	UInt dd = INSN(4,0);
				12722
				12723	if (ty <= X01 && opcode <= BITS6(0,0,0,0,1,1)) {
				12724	/* -------- 0x,000000: FMOV d_d, s_s -------- */
				12725	/* -------- 0x,000001: FABS d_d, s_s -------- */
				12726	/* -------- 0x,000010: FNEG d_d, s_s -------- */
				12727	/* -------- 0x,000011: FSQRT d_d, s_s -------- */
				12728	IRType ity = ty == X01 ? Ity_F64 : Ity_F32;
				12729	IRTemp src = newTemp(ity);
				12730	IRTemp res = newTemp(ity);
				12731	const HChar* nm = "??";
				12732	assign(src, getQRegLO(nn, ity));
				12733	switch (opcode) {
				12734	case BITS6(0,0,0,0,0,0):
				12735	nm = "fmov"; assign(res, mkexpr(src)); break;
				12736	case BITS6(0,0,0,0,0,1):
				12737	nm = "fabs"; assign(res, unop(mkABSF(ity), mkexpr(src))); break;
				12738	case BITS6(0,0,0,0,1,0):
				12739	nm = "fabs"; assign(res, unop(mkNEGF(ity), mkexpr(src))); break;
				12740	case BITS6(0,0,0,0,1,1):
				12741	nm = "fsqrt";
				12742	assign(res, binop(mkSQRTF(ity),
				12743	mkexpr(mk_get_IR_rounding_mode()),
				12744	mkexpr(src))); break;
				12745	default:
				12746	vassert(0);
				12747	}
				12748	putQReg128(dd, mkV128(0x0000));
				12749	putQRegLO(dd, mkexpr(res));
				12750	DIP("%s %s, %s\n", nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
				12751	return True;
				12752	}
				12753
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	12754	if ( (ty == X11 && (opcode == BITS6(0,0,0,1,0,0)
				12755	\|\| opcode == BITS6(0,0,0,1,0,1)))
				12756	\|\| (ty == X00 && (opcode == BITS6(0,0,0,1,1,1)
				12757	\|\| opcode == BITS6(0,0,0,1,0,1)))
				12758	\|\| (ty == X01 && (opcode == BITS6(0,0,0,1,1,1)
				12759	\|\| opcode == BITS6(0,0,0,1,0,0)))) {
				12760	/* -------- 11,000100: FCVT s_h -------- */
				12761	/* -------- 11,000101: FCVT d_h -------- */
				12762	/* -------- 00,000111: FCVT h_s -------- */
				12763	/* -------- 00,000101: FCVT d_s -------- */
				12764	/* -------- 01,000111: FCVT h_d -------- */
				12765	/* -------- 01,000100: FCVT s_d -------- */
				12766	/* 31 23 21 16 14 9 4
sewardj	400d6b9	2015-03-30 09:01:51 +0000	[diff] [blame]	12767	000 11110 11 10001 00 10000 n d FCVT Sd, Hn
				12768	--------- 11 ----- 01 --------- FCVT Dd, Hn
				12769	--------- 00 ----- 11 --------- FCVT Hd, Sn
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	12770	--------- 00 ----- 01 --------- FCVT Dd, Sn
sewardj	400d6b9	2015-03-30 09:01:51 +0000	[diff] [blame]	12771	--------- 01 ----- 11 --------- FCVT Hd, Dn
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	12772	--------- 01 ----- 00 --------- FCVT Sd, Dn
				12773	Rounding, when dst is smaller than src, is per the FPCR.
				12774	*/
				12775	UInt b2322 = ty;
				12776	UInt b1615 = opcode & BITS2(1,1);
sewardj	400d6b9	2015-03-30 09:01:51 +0000	[diff] [blame]	12777	switch ((b2322 << 2) \| b1615) {
				12778	case BITS4(0,0,0,1): // S -> D
				12779	case BITS4(1,1,0,1): { // H -> D
				12780	Bool srcIsH = b2322 == BITS2(1,1);
				12781	IRType srcTy = srcIsH ? Ity_F16 : Ity_F32;
				12782	IRTemp res = newTemp(Ity_F64);
				12783	assign(res, unop(srcIsH ? Iop_F16toF64 : Iop_F32toF64,
				12784	getQRegLO(nn, srcTy)));
				12785	putQReg128(dd, mkV128(0x0000));
				12786	putQRegLO(dd, mkexpr(res));
				12787	DIP("fcvt %s, %s\n",
				12788	nameQRegLO(dd, Ity_F64), nameQRegLO(nn, srcTy));
				12789	return True;
				12790	}
				12791	case BITS4(0,1,0,0): // D -> S
				12792	case BITS4(0,1,1,1): { // D -> H
				12793	Bool dstIsH = b1615 == BITS2(1,1);
				12794	IRType dstTy = dstIsH ? Ity_F16 : Ity_F32;
				12795	IRTemp res = newTemp(dstTy);
				12796	assign(res, binop(dstIsH ? Iop_F64toF16 : Iop_F64toF32,
				12797	mkexpr(mk_get_IR_rounding_mode()),
				12798	getQRegLO(nn, Ity_F64)));
				12799	putQReg128(dd, mkV128(0x0000));
				12800	putQRegLO(dd, mkexpr(res));
				12801	DIP("fcvt %s, %s\n",
				12802	nameQRegLO(dd, dstTy), nameQRegLO(nn, Ity_F64));
				12803	return True;
				12804	}
				12805	case BITS4(0,0,1,1): // S -> H
				12806	case BITS4(1,1,0,0): { // H -> S
				12807	Bool toH = b1615 == BITS2(1,1);
				12808	IRType srcTy = toH ? Ity_F32 : Ity_F16;
				12809	IRType dstTy = toH ? Ity_F16 : Ity_F32;
				12810	IRTemp res = newTemp(dstTy);
				12811	if (toH) {
				12812	assign(res, binop(Iop_F32toF16,
				12813	mkexpr(mk_get_IR_rounding_mode()),
				12814	getQRegLO(nn, srcTy)));
				12815
				12816	} else {
				12817	assign(res, unop(Iop_F16toF32,
				12818	getQRegLO(nn, srcTy)));
				12819	}
				12820	putQReg128(dd, mkV128(0x0000));
				12821	putQRegLO(dd, mkexpr(res));
				12822	DIP("fcvt %s, %s\n",
				12823	nameQRegLO(dd, dstTy), nameQRegLO(nn, srcTy));
				12824	return True;
				12825	}
				12826	default:
				12827	break;
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	12828	}
				12829	/* else unhandled */
				12830	return False;
				12831	}
				12832
				12833	if (ty <= X01
				12834	&& opcode >= BITS6(0,0,1,0,0,0) && opcode <= BITS6(0,0,1,1,1,1)
				12835	&& opcode != BITS6(0,0,1,1,0,1)) {
				12836	/* -------- 0x,001000 FRINTN d_d, s_s -------- */
				12837	/* -------- 0x,001001 FRINTP d_d, s_s -------- */
				12838	/* -------- 0x,001010 FRINTM d_d, s_s -------- */
				12839	/* -------- 0x,001011 FRINTZ d_d, s_s -------- */
				12840	/* -------- 0x,001100 FRINTA d_d, s_s -------- */
				12841	/* -------- 0x,001110 FRINTX d_d, s_s -------- */
				12842	/* -------- 0x,001111 FRINTI d_d, s_s -------- */
				12843	/* 31 23 21 17 14 9 4
				12844	000 11110 0x 1001 111 10000 n d FRINTI Fd, Fm (round per FPCR)
				12845	rm
				12846	x==0 => S-registers, x==1 => D-registers
				12847	rm (17:15) encodings:
				12848	111 per FPCR (FRINTI)
				12849	001 +inf (FRINTP)
				12850	010 -inf (FRINTM)
				12851	011 zero (FRINTZ)
sewardj	6a785df	2015-02-09 09:07:47 +0000	[diff] [blame]	12852	000 tieeven (FRINTN) -- !! FIXME KLUDGED !!
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	12853	100 tieaway (FRINTA) -- !! FIXME KLUDGED !!
sewardj	d8ad76a	2014-10-30 15:37:16 +0000	[diff] [blame]	12854	110 per FPCR + "exact = TRUE" (FRINTX)
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	12855	101 unallocated
				12856	*/
				12857	Bool isD = (ty & 1) == 1;
				12858	UInt rm = opcode & BITS6(0,0,0,1,1,1);
				12859	IRType ity = isD ? Ity_F64 : Ity_F32;
				12860	IRExpr* irrmE = NULL;
				12861	UChar ch = '?';
				12862	switch (rm) {
				12863	case BITS3(0,1,1): ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
				12864	case BITS3(0,1,0): ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
				12865	case BITS3(0,0,1): ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
				12866	// The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
				12867	case BITS3(1,0,0): ch = 'a'; irrmE = mkU32(Irrm_NEAREST); break;
sewardj	d8ad76a	2014-10-30 15:37:16 +0000	[diff] [blame]	12868	// I am unsure about the following, due to the "integral exact"
sewardj	9e1c2b0	2014-11-25 17:42:52 +0000	[diff] [blame]	12869	// description in the manual. What does it mean? (frintx, that is)
sewardj	d8ad76a	2014-10-30 15:37:16 +0000	[diff] [blame]	12870	case BITS3(1,1,0):
				12871	ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
sewardj	9e1c2b0	2014-11-25 17:42:52 +0000	[diff] [blame]	12872	case BITS3(1,1,1):
				12873	ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
sewardj	6a785df	2015-02-09 09:07:47 +0000	[diff] [blame]	12874	// The following is a kludge. There's no Irrm_ value to represent
				12875	// this ("to nearest, with ties to even")
				12876	case BITS3(0,0,0): ch = 'n'; irrmE = mkU32(Irrm_NEAREST); break;
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	12877	default: break;
				12878	}
				12879	if (irrmE) {
				12880	IRTemp src = newTemp(ity);
				12881	IRTemp dst = newTemp(ity);
				12882	assign(src, getQRegLO(nn, ity));
				12883	assign(dst, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
				12884	irrmE, mkexpr(src)));
				12885	putQReg128(dd, mkV128(0x0000));
				12886	putQRegLO(dd, mkexpr(dst));
				12887	DIP("frint%c %s, %s\n",
				12888	ch, nameQRegLO(dd, ity), nameQRegLO(nn, ity));
				12889	return True;
				12890	}
				12891	return False;
				12892	}
				12893
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12894	return False;
				12895	# undef INSN
				12896	}
				12897
				12898
				12899	static
				12900	Bool dis_AdvSIMD_fp_data_proc_2_source(/MB_OUT/DisResult* dres, UInt insn)
				12901	{
				12902	/* 31 28 23 21 20 15 11 9 4
				12903	000 11110 ty 1 m opcode 10 n d
				12904	The first 3 bits are really "M 0 S", but M and S are always zero.
sewardj	76927e6	2014-11-17 11:21:21 +0000	[diff] [blame]	12905	Decode fields: ty, opcode
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12906	*/
				12907	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				12908	if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
				12909	\|\| INSN(21,21) != 1 \|\| INSN(11,10) != BITS2(1,0)) {
				12910	return False;
				12911	}
				12912	UInt ty = INSN(23,22);
				12913	UInt mm = INSN(20,16);
				12914	UInt opcode = INSN(15,12);
				12915	UInt nn = INSN(9,5);
				12916	UInt dd = INSN(4,0);
				12917
sewardj	76927e6	2014-11-17 11:21:21 +0000	[diff] [blame]	12918	if (ty <= X01 && opcode <= BITS4(0,1,1,1)) {
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12919	/* ------- 0x,0000: FMUL d_d, s_s ------- */
				12920	/* ------- 0x,0001: FDIV d_d, s_s ------- */
				12921	/* ------- 0x,0010: FADD d_d, s_s ------- */
				12922	/* ------- 0x,0011: FSUB d_d, s_s ------- */
sewardj	76927e6	2014-11-17 11:21:21 +0000	[diff] [blame]	12923	/* ------- 0x,0100: FMAX d_d, s_s ------- */
				12924	/* ------- 0x,0101: FMIN d_d, s_s ------- */
				12925	/* ------- 0x,0110: FMAXNM d_d, s_s ------- (FIXME KLUDGED) */
				12926	/* ------- 0x,0111: FMINNM d_d, s_s ------- (FIXME KLUDGED) */
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12927	IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
				12928	IROp iop = Iop_INVALID;
				12929	const HChar* nm = "???";
				12930	switch (opcode) {
sewardj	76927e6	2014-11-17 11:21:21 +0000	[diff] [blame]	12931	case BITS4(0,0,0,0): nm = "fmul"; iop = mkMULF(ity); break;
				12932	case BITS4(0,0,0,1): nm = "fdiv"; iop = mkDIVF(ity); break;
				12933	case BITS4(0,0,1,0): nm = "fadd"; iop = mkADDF(ity); break;
				12934	case BITS4(0,0,1,1): nm = "fsub"; iop = mkSUBF(ity); break;
				12935	case BITS4(0,1,0,0): nm = "fmax"; iop = mkVecMAXF(ty+2); break;
				12936	case BITS4(0,1,0,1): nm = "fmin"; iop = mkVecMINF(ty+2); break;
				12937	case BITS4(0,1,1,0): nm = "fmaxnm"; iop = mkVecMAXF(ty+2); break; //!!
				12938	case BITS4(0,1,1,1): nm = "fminnm"; iop = mkVecMINF(ty+2); break; //!!
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12939	default: vassert(0);
				12940	}
sewardj	76927e6	2014-11-17 11:21:21 +0000	[diff] [blame]	12941	if (opcode <= BITS4(0,0,1,1)) {
				12942	// This is really not good code. TODO: avoid width-changing
sewardj	b963eef	2014-11-17 14:16:56 +0000	[diff] [blame]	12943	IRTemp res = newTemp(ity);
				12944	assign(res, triop(iop, mkexpr(mk_get_IR_rounding_mode()),
				12945	getQRegLO(nn, ity), getQRegLO(mm, ity)));
sewardj	76927e6	2014-11-17 11:21:21 +0000	[diff] [blame]	12946	putQReg128(dd, mkV128(0));
sewardj	b963eef	2014-11-17 14:16:56 +0000	[diff] [blame]	12947	putQRegLO(dd, mkexpr(res));
sewardj	76927e6	2014-11-17 11:21:21 +0000	[diff] [blame]	12948	} else {
				12949	putQReg128(dd, unop(mkVecZEROHIxxOFV128(ty+2),
				12950	binop(iop, getQReg128(nn), getQReg128(mm))));
				12951	}
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12952	DIP("%s %s, %s, %s\n",
				12953	nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
				12954	return True;
				12955	}
				12956
				12957	if (ty <= X01 && opcode == BITS4(1,0,0,0)) {
				12958	/* ------- 0x,1000: FNMUL d_d, s_s ------- */
				12959	IRType ity = ty == X00 ? Ity_F32 : Ity_F64;
				12960	IROp iop = mkMULF(ity);
				12961	IROp iopn = mkNEGF(ity);
				12962	const HChar* nm = "fnmul";
				12963	IRExpr* resE = unop(iopn,
				12964	triop(iop, mkexpr(mk_get_IR_rounding_mode()),
				12965	getQRegLO(nn, ity), getQRegLO(mm, ity)));
				12966	IRTemp res = newTemp(ity);
				12967	assign(res, resE);
				12968	putQReg128(dd, mkV128(0));
				12969	putQRegLO(dd, mkexpr(res));
				12970	DIP("%s %s, %s, %s\n",
				12971	nm, nameQRegLO(dd, ity), nameQRegLO(nn, ity), nameQRegLO(mm, ity));
				12972	return True;
				12973	}
				12974
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12975	return False;
				12976	# undef INSN
				12977	}
				12978
				12979
				12980	static
				12981	Bool dis_AdvSIMD_fp_data_proc_3_source(/MB_OUT/DisResult* dres, UInt insn)
				12982	{
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	12983	/* 31 28 23 21 20 15 14 9 4
				12984	000 11111 ty o1 m o0 a n d
				12985	The first 3 bits are really "M 0 S", but M and S are always zero.
				12986	Decode fields: ty,o1,o0
				12987	*/
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	12988	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	12989	if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,1)) {
				12990	return False;
				12991	}
				12992	UInt ty = INSN(23,22);
				12993	UInt bitO1 = INSN(21,21);
				12994	UInt mm = INSN(20,16);
				12995	UInt bitO0 = INSN(15,15);
				12996	UInt aa = INSN(14,10);
				12997	UInt nn = INSN(9,5);
				12998	UInt dd = INSN(4,0);
				12999	vassert(ty < 4);
				13000
				13001	if (ty <= X01) {
				13002	/* -------- 0x,0,0 FMADD d_d_d_d, s_s_s_s -------- */
				13003	/* -------- 0x,0,1 FMSUB d_d_d_d, s_s_s_s -------- */
				13004	/* -------- 0x,1,0 FNMADD d_d_d_d, s_s_s_s -------- */
				13005	/* -------- 0x,1,1 FNMSUB d_d_d_d, s_s_s_s -------- */
				13006	/* -------------------- F{N}M{ADD,SUB} -------------------- */
				13007	/* 31 22 20 15 14 9 4 ix
				13008	000 11111 0 sz 0 m 0 a n d 0 FMADD Fd,Fn,Fm,Fa
				13009	000 11111 0 sz 0 m 1 a n d 1 FMSUB Fd,Fn,Fm,Fa
				13010	000 11111 0 sz 1 m 0 a n d 2 FNMADD Fd,Fn,Fm,Fa
				13011	000 11111 0 sz 1 m 1 a n d 3 FNMSUB Fd,Fn,Fm,Fa
				13012	where Fx=Dx when sz=1, Fx=Sx when sz=0
				13013
				13014	-----SPEC------ ----IMPL----
				13015	fmadd a + n * m a + n * m
				13016	fmsub a + (-n) * m a - n * m
				13017	fnmadd (-a) + (-n) * m -(a + n * m)
				13018	fnmsub (-a) + n * m -(a - n * m)
				13019	*/
				13020	Bool isD = (ty & 1) == 1;
				13021	UInt ix = (bitO1 << 1) \| bitO0;
				13022	IRType ity = isD ? Ity_F64 : Ity_F32;
				13023	IROp opADD = mkADDF(ity);
				13024	IROp opSUB = mkSUBF(ity);
				13025	IROp opMUL = mkMULF(ity);
				13026	IROp opNEG = mkNEGF(ity);
				13027	IRTemp res = newTemp(ity);
				13028	IRExpr* eA = getQRegLO(aa, ity);
				13029	IRExpr* eN = getQRegLO(nn, ity);
				13030	IRExpr* eM = getQRegLO(mm, ity);
				13031	IRExpr* rm = mkexpr(mk_get_IR_rounding_mode());
				13032	IRExpr* eNxM = triop(opMUL, rm, eN, eM);
				13033	switch (ix) {
				13034	case 0: assign(res, triop(opADD, rm, eA, eNxM)); break;
				13035	case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break;
				13036	case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break;
				13037	case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break;
				13038	default: vassert(0);
				13039	}
				13040	putQReg128(dd, mkV128(0x0000));
				13041	putQRegLO(dd, mkexpr(res));
				13042	const HChar* names[4] = { "fmadd", "fmsub", "fnmadd", "fnmsub" };
				13043	DIP("%s %s, %s, %s, %s\n",
				13044	names[ix], nameQRegLO(dd, ity), nameQRegLO(nn, ity),
				13045	nameQRegLO(mm, ity), nameQRegLO(aa, ity));
				13046	return True;
				13047	}
				13048
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	13049	return False;
				13050	# undef INSN
				13051	}
				13052
				13053
				13054	static
				13055	Bool dis_AdvSIMD_fp_immediate(/MB_OUT/DisResult* dres, UInt insn)
				13056	{
				13057	/* 31 28 23 21 20 12 9 4
				13058	000 11110 ty 1 imm8 100 imm5 d
				13059	The first 3 bits are really "M 0 S", but M and S are always zero.
				13060	*/
				13061	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				13062	if (INSN(31,24) != BITS8(0,0,0,1,1,1,1,0)
				13063	\|\| INSN(21,21) != 1 \|\| INSN(12,10) != BITS3(1,0,0)) {
				13064	return False;
				13065	}
				13066	UInt ty = INSN(23,22);
				13067	UInt imm8 = INSN(20,13);
				13068	UInt imm5 = INSN(9,5);
				13069	UInt dd = INSN(4,0);
				13070
				13071	/* ------- 00,00000: FMOV s_imm ------- */
				13072	/* ------- 01,00000: FMOV d_imm ------- */
				13073	if (ty <= X01 && imm5 == BITS5(0,0,0,0,0)) {
				13074	Bool isD = (ty & 1) == 1;
				13075	ULong imm = VFPExpandImm(imm8, isD ? 64 : 32);
				13076	if (!isD) {
				13077	vassert(0 == (imm & 0xFFFFFFFF00000000ULL));
				13078	}
				13079	putQReg128(dd, mkV128(0));
				13080	putQRegLO(dd, isD ? mkU64(imm) : mkU32(imm & 0xFFFFFFFFULL));
				13081	DIP("fmov %s, #0x%llx\n",
				13082	nameQRegLO(dd, isD ? Ity_F64 : Ity_F32), imm);
				13083	return True;
				13084	}
				13085
				13086	return False;
				13087	# undef INSN
				13088	}
				13089
				13090
				13091	static
sewardj	1aff76b	2014-11-20 10:14:06 +0000	[diff] [blame]	13092	Bool dis_AdvSIMD_fp_to_from_fixedp_conv(/MB_OUT/DisResult* dres, UInt insn)
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	13093	{
				13094	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	1aff76b	2014-11-20 10:14:06 +0000	[diff] [blame]	13095	/* 31 30 29 28 23 21 20 18 15 9 4
				13096	sf 0 0 11110 type 0 rmode opcode scale n d
				13097	The first 3 bits are really "sf 0 S", but S is always zero.
				13098	Decode fields: sf,type,rmode,opcode
				13099	*/
				13100	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				13101	if (INSN(30,29) != BITS2(0,0)
				13102	\|\| INSN(28,24) != BITS5(1,1,1,1,0)
				13103	\|\| INSN(21,21) != 0) {
				13104	return False;
				13105	}
				13106	UInt bitSF = INSN(31,31);
				13107	UInt ty = INSN(23,22); // type
				13108	UInt rm = INSN(20,19); // rmode
				13109	UInt op = INSN(18,16); // opcode
				13110	UInt sc = INSN(15,10); // scale
				13111	UInt nn = INSN(9,5);
				13112	UInt dd = INSN(4,0);
				13113
				13114	// op = 010, 011
				13115	/* -------------- {S,U}CVTF (scalar, fixedpt) -------------- */
				13116	/* (ix) sf S 28 ty rm op 15 9 4
				13117	0 0 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Wn, #fbits
				13118	1 0 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Wn, #fbits
				13119	2 1 0 0 11110 00 0 00 010 scale n d SCVTF Sd, Xn, #fbits
				13120	3 1 0 0 11110 01 0 00 010 scale n d SCVTF Dd, Xn, #fbits
				13121
				13122	4 0 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Wn, #fbits
				13123	5 0 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Wn, #fbits
				13124	6 1 0 0 11110 00 0 00 011 scale n d UCVTF Sd, Xn, #fbits
				13125	7 1 0 0 11110 01 0 00 011 scale n d UCVTF Dd, Xn, #fbits
				13126
				13127	These are signed/unsigned conversion from integer registers to
				13128	FP registers, all 4 32/64-bit combinations, rounded per FPCR,
				13129	scaled per \|scale\|.
				13130	*/
				13131	if (ty <= X01 && rm == X00
				13132	&& (op == BITS3(0,1,0) \|\| op == BITS3(0,1,1))
				13133	&& (bitSF == 1 \|\| ((sc >> 5) & 1) == 1)) {
				13134	Bool isI64 = bitSF == 1;
				13135	Bool isF64 = (ty & 1) == 1;
				13136	Bool isU = (op & 1) == 1;
				13137	UInt ix = (isU ? 4 : 0) \| (isI64 ? 2 : 0) \| (isF64 ? 1 : 0);
				13138
				13139	Int fbits = 64 - sc;
				13140	vassert(fbits >= 1 && fbits <= (isI64 ? 64 : 32));
				13141
				13142	Double scale = two_to_the_minus(fbits);
				13143	IRExpr* scaleE = isF64 ? IRExpr_Const(IRConst_F64(scale))
				13144	: IRExpr_Const(IRConst_F32( (Float)scale ));
				13145	IROp opMUL = isF64 ? Iop_MulF64 : Iop_MulF32;
				13146
				13147	const IROp ops[8]
				13148	= { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
				13149	Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
				13150	IRExpr* src = getIRegOrZR(isI64, nn);
				13151	IRExpr* res = (isF64 && !isI64)
				13152	? unop(ops[ix], src)
				13153	: binop(ops[ix],
				13154	mkexpr(mk_get_IR_rounding_mode()), src);
				13155	putQReg128(dd, mkV128(0));
				13156	putQRegLO(dd, triop(opMUL, mkU32(Irrm_NEAREST), res, scaleE));
				13157
				13158	DIP("%ccvtf %s, %s, #%d\n",
				13159	isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
				13160	nameIRegOrZR(isI64, nn), fbits);
				13161	return True;
				13162	}
				13163
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	13164	return False;
				13165	# undef INSN
				13166	}
				13167
				13168
				13169	static
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13170	Bool dis_AdvSIMD_fp_to_from_int_conv(/MB_OUT/DisResult* dres, UInt insn)
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	13171	{
				13172	/* 31 30 29 28 23 21 20 18 15 9 4
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13173	sf 0 0 11110 type 1 rmode opcode 000000 n d
				13174	The first 3 bits are really "sf 0 S", but S is always zero.
sewardj	f67fcb9	2014-10-30 23:10:45 +0000	[diff] [blame]	13175	Decode fields: sf,type,rmode,opcode
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	13176	*/
				13177	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13178	if (INSN(30,29) != BITS2(0,0)
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	13179	\|\| INSN(28,24) != BITS5(1,1,1,1,0)
				13180	\|\| INSN(21,21) != 1
				13181	\|\| INSN(15,10) != BITS6(0,0,0,0,0,0)) {
				13182	return False;
				13183	}
				13184	UInt bitSF = INSN(31,31);
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	13185	UInt ty = INSN(23,22); // type
				13186	UInt rm = INSN(20,19); // rmode
				13187	UInt op = INSN(18,16); // opcode
				13188	UInt nn = INSN(9,5);
				13189	UInt dd = INSN(4,0);
				13190
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13191	// op = 000, 001
sewardj	f67fcb9	2014-10-30 23:10:45 +0000	[diff] [blame]	13192	/* -------- FCVT{N,P,M,Z,A}{S,U} (scalar, integer) -------- */
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13193	/* 30 23 20 18 15 9 4
				13194	sf 00 11110 0x 1 00 000 000000 n d FCVTNS Rd, Fn (round to
				13195	sf 00 11110 0x 1 00 001 000000 n d FCVTNU Rd, Fn nearest)
				13196	---------------- 01 -------------- FCVTP-------- (round to +inf)
				13197	---------------- 10 -------------- FCVTM-------- (round to -inf)
				13198	---------------- 11 -------------- FCVTZ-------- (round to zero)
sewardj	f67fcb9	2014-10-30 23:10:45 +0000	[diff] [blame]	13199	---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
				13200	---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
				13201
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13202	Rd is Xd when sf==1, Wd when sf==0
				13203	Fn is Dn when x==1, Sn when x==0
				13204	20:19 carry the rounding mode, using the same encoding as FPCR
				13205	*/
sewardj	f67fcb9	2014-10-30 23:10:45 +0000	[diff] [blame]	13206	if (ty <= X01
				13207	&& ( ((op == BITS3(0,0,0) \|\| op == BITS3(0,0,1)) && True)
				13208	\|\| ((op == BITS3(1,0,0) \|\| op == BITS3(1,0,1)) && rm == BITS2(0,0))
				13209	)
				13210	) {
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13211	Bool isI64 = bitSF == 1;
				13212	Bool isF64 = (ty & 1) == 1;
				13213	Bool isU = (op & 1) == 1;
				13214	/* Decide on the IR rounding mode to use. */
				13215	IRRoundingMode irrm = 8; /impossible/
				13216	HChar ch = '?';
sewardj	f67fcb9	2014-10-30 23:10:45 +0000	[diff] [blame]	13217	if (op == BITS3(0,0,0) \|\| op == BITS3(0,0,1)) {
				13218	switch (rm) {
				13219	case BITS2(0,0): ch = 'n'; irrm = Irrm_NEAREST; break;
				13220	case BITS2(0,1): ch = 'p'; irrm = Irrm_PosINF; break;
				13221	case BITS2(1,0): ch = 'm'; irrm = Irrm_NegINF; break;
				13222	case BITS2(1,1): ch = 'z'; irrm = Irrm_ZERO; break;
				13223	default: vassert(0);
				13224	}
				13225	} else {
				13226	vassert(op == BITS3(1,0,0) \|\| op == BITS3(1,0,1));
				13227	switch (rm) {
				13228	case BITS2(0,0): ch = 'a'; irrm = Irrm_NEAREST; break;
				13229	default: vassert(0);
				13230	}
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13231	}
				13232	vassert(irrm != 8);
				13233	/* Decide on the conversion primop, based on the source size,
				13234	dest size and signedness (8 possibilities). Case coding:
				13235	F32 ->s I32 0
				13236	F32 ->u I32 1
				13237	F32 ->s I64 2
				13238	F32 ->u I64 3
				13239	F64 ->s I32 4
				13240	F64 ->u I32 5
				13241	F64 ->s I64 6
				13242	F64 ->u I64 7
				13243	*/
				13244	UInt ix = (isF64 ? 4 : 0) \| (isI64 ? 2 : 0) \| (isU ? 1 : 0);
				13245	vassert(ix < 8);
				13246	const IROp iops[8]
				13247	= { Iop_F32toI32S, Iop_F32toI32U, Iop_F32toI64S, Iop_F32toI64U,
				13248	Iop_F64toI32S, Iop_F64toI32U, Iop_F64toI64S, Iop_F64toI64U };
				13249	IROp iop = iops[ix];
				13250	// A bit of ATCery: bounce all cases we haven't seen an example of.
				13251	if (/* F32toI32S */
				13252	(iop == Iop_F32toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Sn */
				13253	\|\| (iop == Iop_F32toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Sn */
				13254	\|\| (iop == Iop_F32toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Sn */
sewardj	f67fcb9	2014-10-30 23:10:45 +0000	[diff] [blame]	13255	\|\| (iop == Iop_F32toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,S */
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13256	/* F32toI32U */
				13257	\|\| (iop == Iop_F32toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Sn */
				13258	\|\| (iop == Iop_F32toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Sn */
sewardj	0728a52	2014-11-15 22:24:18 +0000	[diff] [blame]	13259	\|\| (iop == Iop_F32toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Sn */
sewardj	f67fcb9	2014-10-30 23:10:45 +0000	[diff] [blame]	13260	\|\| (iop == Iop_F32toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,S */
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13261	/* F32toI64S */
				13262	\|\| (iop == Iop_F32toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Sn */
sewardj	f4edb1d	2015-02-24 13:23:38 +0000	[diff] [blame]	13263	\|\| (iop == Iop_F32toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Sn */
				13264	\|\| (iop == Iop_F32toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Sn */
sewardj	266d596	2014-11-20 11:30:41 +0000	[diff] [blame]	13265	\|\| (iop == Iop_F32toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,S */
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13266	/* F32toI64U */
				13267	\|\| (iop == Iop_F32toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Sn */
sewardj	6d5985e	2015-02-05 15:22:30 +0000	[diff] [blame]	13268	\|\| (iop == Iop_F32toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Sn */
sewardj	efe536b	2014-09-06 08:08:47 +0000	[diff] [blame]	13269	\|\| (iop == Iop_F32toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Sn */
sewardj	f4edb1d	2015-02-24 13:23:38 +0000	[diff] [blame]	13270	\|\| (iop == Iop_F32toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,S */
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13271	/* F64toI32S */
				13272	\|\| (iop == Iop_F64toI32S && irrm == Irrm_ZERO) /* FCVTZS Wd,Dn */
				13273	\|\| (iop == Iop_F64toI32S && irrm == Irrm_NegINF) /* FCVTMS Wd,Dn */
				13274	\|\| (iop == Iop_F64toI32S && irrm == Irrm_PosINF) /* FCVTPS Wd,Dn */
sewardj	76927e6	2014-11-17 11:21:21 +0000	[diff] [blame]	13275	\|\| (iop == Iop_F64toI32S && irrm == Irrm_NEAREST)/* FCVT{A,N}S W,D */
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13276	/* F64toI32U */
				13277	\|\| (iop == Iop_F64toI32U && irrm == Irrm_ZERO) /* FCVTZU Wd,Dn */
				13278	\|\| (iop == Iop_F64toI32U && irrm == Irrm_NegINF) /* FCVTMU Wd,Dn */
				13279	\|\| (iop == Iop_F64toI32U && irrm == Irrm_PosINF) /* FCVTPU Wd,Dn */
sewardj	f4edb1d	2015-02-24 13:23:38 +0000	[diff] [blame]	13280	\|\| (iop == Iop_F64toI32U && irrm == Irrm_NEAREST)/* FCVT{A,N}U W,D */
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13281	/* F64toI64S */
				13282	\|\| (iop == Iop_F64toI64S && irrm == Irrm_ZERO) /* FCVTZS Xd,Dn */
				13283	\|\| (iop == Iop_F64toI64S && irrm == Irrm_NegINF) /* FCVTMS Xd,Dn */
				13284	\|\| (iop == Iop_F64toI64S && irrm == Irrm_PosINF) /* FCVTPS Xd,Dn */
sewardj	76927e6	2014-11-17 11:21:21 +0000	[diff] [blame]	13285	\|\| (iop == Iop_F64toI64S && irrm == Irrm_NEAREST)/* FCVT{A,N}S X,D */
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13286	/* F64toI64U */
				13287	\|\| (iop == Iop_F64toI64U && irrm == Irrm_ZERO) /* FCVTZU Xd,Dn */
sewardj	31b29af	2014-10-30 15:54:53 +0000	[diff] [blame]	13288	\|\| (iop == Iop_F64toI64U && irrm == Irrm_NegINF) /* FCVTMU Xd,Dn */
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13289	\|\| (iop == Iop_F64toI64U && irrm == Irrm_PosINF) /* FCVTPU Xd,Dn */
sewardj	f4edb1d	2015-02-24 13:23:38 +0000	[diff] [blame]	13290	\|\| (iop == Iop_F64toI64U && irrm == Irrm_NEAREST)/* FCVT{A,N}U X,D */
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13291	) {
				13292	/* validated */
				13293	} else {
				13294	return False;
				13295	}
				13296	IRType srcTy = isF64 ? Ity_F64 : Ity_F32;
				13297	IRType dstTy = isI64 ? Ity_I64 : Ity_I32;
				13298	IRTemp src = newTemp(srcTy);
				13299	IRTemp dst = newTemp(dstTy);
				13300	assign(src, getQRegLO(nn, srcTy));
				13301	assign(dst, binop(iop, mkU32(irrm), mkexpr(src)));
				13302	putIRegOrZR(isI64, dd, mkexpr(dst));
				13303	DIP("fcvt%c%c %s, %s\n", ch, isU ? 'u' : 's',
				13304	nameIRegOrZR(isI64, dd), nameQRegLO(nn, srcTy));
				13305	return True;
				13306	}
				13307
				13308	// op = 010, 011
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	13309	/* -------------- {S,U}CVTF (scalar, integer) -------------- */
				13310	/* (ix) sf S 28 ty rm op 15 9 4
				13311	0 0 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
				13312	1 0 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
				13313	2 1 0 0 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
				13314	3 1 0 0 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
				13315
				13316	4 0 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
				13317	5 0 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
				13318	6 1 0 0 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
				13319	7 1 0 0 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
				13320
				13321	These are signed/unsigned conversion from integer registers to
				13322	FP registers, all 4 32/64-bit combinations, rounded per FPCR.
				13323	*/
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13324	if (ty <= X01 && rm == X00 && (op == BITS3(0,1,0) \|\| op == BITS3(0,1,1))) {
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	13325	Bool isI64 = bitSF == 1;
				13326	Bool isF64 = (ty & 1) == 1;
				13327	Bool isU = (op & 1) == 1;
				13328	UInt ix = (isU ? 4 : 0) \| (isI64 ? 2 : 0) \| (isF64 ? 1 : 0);
				13329	const IROp ops[8]
				13330	= { Iop_I32StoF32, Iop_I32StoF64, Iop_I64StoF32, Iop_I64StoF64,
				13331	Iop_I32UtoF32, Iop_I32UtoF64, Iop_I64UtoF32, Iop_I64UtoF64 };
				13332	IRExpr* src = getIRegOrZR(isI64, nn);
				13333	IRExpr* res = (isF64 && !isI64)
				13334	? unop(ops[ix], src)
sewardj	1aff76b	2014-11-20 10:14:06 +0000	[diff] [blame]	13335	: binop(ops[ix],
				13336	mkexpr(mk_get_IR_rounding_mode()), src);
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	13337	putQReg128(dd, mkV128(0));
				13338	putQRegLO(dd, res);
				13339	DIP("%ccvtf %s, %s\n",
				13340	isU ? 'u' : 's', nameQRegLO(dd, isF64 ? Ity_F64 : Ity_F32),
				13341	nameIRegOrZR(isI64, nn));
				13342	return True;
				13343	}
				13344
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13345	// op = 110, 111
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	13346	/* -------- FMOV (general) -------- */
				13347	/* case sf S ty rm op 15 9 4
				13348	(1) 0 0 0 11110 00 1 00 111 000000 n d FMOV Sd, Wn
				13349	(2) 1 0 0 11110 01 1 00 111 000000 n d FMOV Dd, Xn
				13350	(3) 1 0 0 11110 10 1 01 111 000000 n d FMOV Vd.D[1], Xn
				13351
				13352	(4) 0 0 0 11110 00 1 00 110 000000 n d FMOV Wd, Sn
				13353	(5) 1 0 0 11110 01 1 00 110 000000 n d FMOV Xd, Dn
				13354	(6) 1 0 0 11110 10 1 01 110 000000 n d FMOV Xd, Vn.D[1]
				13355	*/
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13356	if (1) {
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13357	UInt ix = 0; // case
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	13358	if (bitSF == 0) {
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13359	if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,1))
				13360	ix = 1;
				13361	else
				13362	if (ty == BITS2(0,0) && rm == BITS2(0,0) && op == BITS3(1,1,0))
				13363	ix = 4;
				13364	} else {
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	13365	vassert(bitSF == 1);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13366	if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,1))
				13367	ix = 2;
				13368	else
				13369	if (ty == BITS2(0,1) && rm == BITS2(0,0) && op == BITS3(1,1,0))
				13370	ix = 5;
				13371	else
				13372	if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,1))
				13373	ix = 3;
				13374	else
				13375	if (ty == BITS2(1,0) && rm == BITS2(0,1) && op == BITS3(1,1,0))
				13376	ix = 6;
				13377	}
				13378	if (ix > 0) {
				13379	switch (ix) {
				13380	case 1:
				13381	putQReg128(dd, mkV128(0));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	13382	putQRegLO(dd, getIReg32orZR(nn));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13383	DIP("fmov s%u, w%u\n", dd, nn);
				13384	break;
				13385	case 2:
				13386	putQReg128(dd, mkV128(0));
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	13387	putQRegLO(dd, getIReg64orZR(nn));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13388	DIP("fmov d%u, x%u\n", dd, nn);
				13389	break;
				13390	case 3:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	13391	putQRegHI64(dd, getIReg64orZR(nn));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13392	DIP("fmov v%u.d[1], x%u\n", dd, nn);
				13393	break;
				13394	case 4:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	13395	putIReg32orZR(dd, getQRegLO(nn, Ity_I32));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13396	DIP("fmov w%u, s%u\n", dd, nn);
				13397	break;
				13398	case 5:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	13399	putIReg64orZR(dd, getQRegLO(nn, Ity_I64));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13400	DIP("fmov x%u, d%u\n", dd, nn);
				13401	break;
				13402	case 6:
sewardj	606c4ba	2014-01-26 19:11:14 +0000	[diff] [blame]	13403	putIReg64orZR(dd, getQRegHI64(nn));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13404	DIP("fmov x%u, v%u.d[1]\n", dd, nn);
				13405	break;
				13406	default:
				13407	vassert(0);
				13408	}
				13409	return True;
				13410	}
				13411	/* undecodable; fall through */
				13412	}
				13413
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	13414	return False;
				13415	# undef INSN
				13416	}
				13417
				13418
				13419	static
				13420	Bool dis_ARM64_simd_and_fp(/MB_OUT/DisResult* dres, UInt insn)
				13421	{
				13422	Bool ok;
				13423	ok = dis_AdvSIMD_EXT(dres, insn);
				13424	if (UNLIKELY(ok)) return True;
				13425	ok = dis_AdvSIMD_TBL_TBX(dres, insn);
				13426	if (UNLIKELY(ok)) return True;
				13427	ok = dis_AdvSIMD_ZIP_UZP_TRN(dres, insn);
				13428	if (UNLIKELY(ok)) return True;
				13429	ok = dis_AdvSIMD_across_lanes(dres, insn);
				13430	if (UNLIKELY(ok)) return True;
				13431	ok = dis_AdvSIMD_copy(dres, insn);
				13432	if (UNLIKELY(ok)) return True;
				13433	ok = dis_AdvSIMD_modified_immediate(dres, insn);
				13434	if (UNLIKELY(ok)) return True;
				13435	ok = dis_AdvSIMD_scalar_copy(dres, insn);
				13436	if (UNLIKELY(ok)) return True;
				13437	ok = dis_AdvSIMD_scalar_pairwise(dres, insn);
				13438	if (UNLIKELY(ok)) return True;
				13439	ok = dis_AdvSIMD_scalar_shift_by_imm(dres, insn);
				13440	if (UNLIKELY(ok)) return True;
				13441	ok = dis_AdvSIMD_scalar_three_different(dres, insn);
				13442	if (UNLIKELY(ok)) return True;
				13443	ok = dis_AdvSIMD_scalar_three_same(dres, insn);
				13444	if (UNLIKELY(ok)) return True;
				13445	ok = dis_AdvSIMD_scalar_two_reg_misc(dres, insn);
				13446	if (UNLIKELY(ok)) return True;
				13447	ok = dis_AdvSIMD_scalar_x_indexed_element(dres, insn);
				13448	if (UNLIKELY(ok)) return True;
				13449	ok = dis_AdvSIMD_shift_by_immediate(dres, insn);
				13450	if (UNLIKELY(ok)) return True;
				13451	ok = dis_AdvSIMD_three_different(dres, insn);
				13452	if (UNLIKELY(ok)) return True;
				13453	ok = dis_AdvSIMD_three_same(dres, insn);
				13454	if (UNLIKELY(ok)) return True;
				13455	ok = dis_AdvSIMD_two_reg_misc(dres, insn);
				13456	if (UNLIKELY(ok)) return True;
				13457	ok = dis_AdvSIMD_vector_x_indexed_elem(dres, insn);
				13458	if (UNLIKELY(ok)) return True;
				13459	ok = dis_AdvSIMD_crypto_aes(dres, insn);
				13460	if (UNLIKELY(ok)) return True;
				13461	ok = dis_AdvSIMD_crypto_three_reg_sha(dres, insn);
				13462	if (UNLIKELY(ok)) return True;
				13463	ok = dis_AdvSIMD_crypto_two_reg_sha(dres, insn);
				13464	if (UNLIKELY(ok)) return True;
				13465	ok = dis_AdvSIMD_fp_compare(dres, insn);
				13466	if (UNLIKELY(ok)) return True;
				13467	ok = dis_AdvSIMD_fp_conditional_compare(dres, insn);
				13468	if (UNLIKELY(ok)) return True;
				13469	ok = dis_AdvSIMD_fp_conditional_select(dres, insn);
				13470	if (UNLIKELY(ok)) return True;
				13471	ok = dis_AdvSIMD_fp_data_proc_1_source(dres, insn);
				13472	if (UNLIKELY(ok)) return True;
				13473	ok = dis_AdvSIMD_fp_data_proc_2_source(dres, insn);
				13474	if (UNLIKELY(ok)) return True;
				13475	ok = dis_AdvSIMD_fp_data_proc_3_source(dres, insn);
				13476	if (UNLIKELY(ok)) return True;
				13477	ok = dis_AdvSIMD_fp_immediate(dres, insn);
				13478	if (UNLIKELY(ok)) return True;
sewardj	1aff76b	2014-11-20 10:14:06 +0000	[diff] [blame]	13479	ok = dis_AdvSIMD_fp_to_from_fixedp_conv(dres, insn);
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	13480	if (UNLIKELY(ok)) return True;
sewardj	5747c4a	2014-06-11 20:57:23 +0000	[diff] [blame]	13481	ok = dis_AdvSIMD_fp_to_from_int_conv(dres, insn);
sewardj	df1628c	2014-06-10 22:52:05 +0000	[diff] [blame]	13482	if (UNLIKELY(ok)) return True;
				13483	return False;
				13484	}
				13485
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13486
				13487	/------------------------------------------------------------/
				13488	/--- Disassemble a single ARM64 instruction ---/
				13489	/------------------------------------------------------------/
				13490
				13491	/* Disassemble a single ARM64 instruction into IR. The instruction
				13492	has is located at \|guest_instr\| and has guest IP of
				13493	\|guest_PC_curr_instr\|, which will have been set before the call
				13494	here. Returns True iff the instruction was decoded, in which case
				13495	dres will be set accordingly, or False, in which case dres should
				13496	be ignored by the caller. */
				13497
				13498	static
				13499	Bool disInstr_ARM64_WRK (
				13500	/MB_OUT/DisResult* dres,
florian	beac530	2014-12-31 12:09:38 +0000	[diff] [blame]	13501	Bool (resteerOkFn) ( /opaque/void, Addr ),
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13502	Bool resteerCisOk,
				13503	void* callback_opaque,
florian	8462d11	2014-09-24 15:18:09 +0000	[diff] [blame]	13504	const UChar* guest_instr,
florian	cacba8e	2014-12-15 18:58:07 +0000	[diff] [blame]	13505	const VexArchInfo* archinfo,
				13506	const VexAbiInfo* abiinfo
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13507	)
				13508	{
				13509	// A macro to fish bits out of 'insn'.
				13510	# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
				13511
				13512	//ZZ DisResult dres;
				13513	//ZZ UInt insn;
				13514	//ZZ //Bool allow_VFP = False;
				13515	//ZZ //UInt hwcaps = archinfo->hwcaps;
				13516	//ZZ IRTemp condT; /* :: Ity_I32 */
				13517	//ZZ UInt summary;
				13518	//ZZ HChar dis_buf[128]; // big enough to hold LDMIA etc text
				13519	//ZZ
				13520	//ZZ /* What insn variants are we supporting today? */
				13521	//ZZ //allow_VFP = (0 != (hwcaps & VEX_HWCAPS_ARM_VFP));
				13522	//ZZ // etc etc
				13523
				13524	/* Set result defaults. */
				13525	dres->whatNext = Dis_Continue;
				13526	dres->len = 4;
				13527	dres->continueAt = 0;
				13528	dres->jk_StopHere = Ijk_INVALID;
				13529
				13530	/* At least this is simple on ARM64: insns are all 4 bytes long, and
				13531	4-aligned. So just fish the whole thing out of memory right now
				13532	and have done. */
				13533	UInt insn = getUIntLittleEndianly( guest_instr );
				13534
				13535	if (0) vex_printf("insn: 0x%x\n", insn);
				13536
				13537	DIP("\t(arm64) 0x%llx: ", (ULong)guest_PC_curr_instr);
				13538
				13539	vassert(0 == (guest_PC_curr_instr & 3ULL));
				13540
				13541	/* ----------------------------------------------------------- */
				13542
				13543	/* Spot "Special" instructions (see comment at top of file). */
				13544	{
florian	8462d11	2014-09-24 15:18:09 +0000	[diff] [blame]	13545	const UChar* code = guest_instr;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13546	/* Spot the 16-byte preamble:
				13547	93CC0D8C ror x12, x12, #3
				13548	93CC358C ror x12, x12, #13
				13549	93CCCD8C ror x12, x12, #51
				13550	93CCF58C ror x12, x12, #61
				13551	*/
				13552	UInt word1 = 0x93CC0D8C;
				13553	UInt word2 = 0x93CC358C;
				13554	UInt word3 = 0x93CCCD8C;
				13555	UInt word4 = 0x93CCF58C;
				13556	if (getUIntLittleEndianly(code+ 0) == word1 &&
				13557	getUIntLittleEndianly(code+ 4) == word2 &&
				13558	getUIntLittleEndianly(code+ 8) == word3 &&
				13559	getUIntLittleEndianly(code+12) == word4) {
				13560	/* Got a "Special" instruction preamble. Which one is it? */
				13561	if (getUIntLittleEndianly(code+16) == 0xAA0A014A
				13562	/* orr x10,x10,x10 */) {
				13563	/* X3 = client_request ( X4 ) */
				13564	DIP("x3 = client_request ( x4 )\n");
				13565	putPC(mkU64( guest_PC_curr_instr + 20 ));
				13566	dres->jk_StopHere = Ijk_ClientReq;
				13567	dres->whatNext = Dis_StopHere;
				13568	return True;
				13569	}
				13570	else
				13571	if (getUIntLittleEndianly(code+16) == 0xAA0B016B
				13572	/* orr x11,x11,x11 */) {
				13573	/* X3 = guest_NRADDR */
				13574	DIP("x3 = guest_NRADDR\n");
				13575	dres->len = 20;
				13576	putIReg64orZR(3, IRExpr_Get( OFFB_NRADDR, Ity_I64 ));
				13577	return True;
				13578	}
				13579	else
				13580	if (getUIntLittleEndianly(code+16) == 0xAA0C018C
				13581	/* orr x12,x12,x12 */) {
				13582	/* branch-and-link-to-noredir X8 */
				13583	DIP("branch-and-link-to-noredir x8\n");
				13584	putIReg64orZR(30, mkU64(guest_PC_curr_instr + 20));
				13585	putPC(getIReg64orZR(8));
				13586	dres->jk_StopHere = Ijk_NoRedir;
				13587	dres->whatNext = Dis_StopHere;
				13588	return True;
				13589	}
				13590	else
				13591	if (getUIntLittleEndianly(code+16) == 0xAA090129
				13592	/* orr x9,x9,x9 */) {
				13593	/* IR injection */
				13594	DIP("IR injection\n");
				13595	vex_inject_ir(irsb, Iend_LE);
				13596	// Invalidate the current insn. The reason is that the IRop we're
				13597	// injecting here can change. In which case the translation has to
				13598	// be redone. For ease of handling, we simply invalidate all the
				13599	// time.
sewardj	05f5e01	2014-05-04 10:52:11 +0000	[diff] [blame]	13600	stmt(IRStmt_Put(OFFB_CMSTART, mkU64(guest_PC_curr_instr)));
				13601	stmt(IRStmt_Put(OFFB_CMLEN, mkU64(20)));
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13602	putPC(mkU64( guest_PC_curr_instr + 20 ));
				13603	dres->whatNext = Dis_StopHere;
sewardj	05f5e01	2014-05-04 10:52:11 +0000	[diff] [blame]	13604	dres->jk_StopHere = Ijk_InvalICache;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13605	return True;
				13606	}
				13607	/* We don't know what it is. */
				13608	return False;
				13609	/NOTREACHED/
				13610	}
				13611	}
				13612
				13613	/* ----------------------------------------------------------- */
				13614
				13615	/* Main ARM64 instruction decoder starts here. */
				13616
				13617	Bool ok = False;
				13618
				13619	/* insn[28:25] determines the top-level grouping, so let's start
				13620	off with that.
				13621
				13622	For all of these dis_ARM64_ functions, we pass *dres with the
				13623	normal default results "insn OK, 4 bytes long, keep decoding" so
				13624	they don't need to change it. However, decodes of control-flow
				13625	insns may cause *dres to change.
				13626	*/
				13627	switch (INSN(28,25)) {
				13628	case BITS4(1,0,0,0): case BITS4(1,0,0,1):
				13629	// Data processing - immediate
				13630	ok = dis_ARM64_data_processing_immediate(dres, insn);
				13631	break;
				13632	case BITS4(1,0,1,0): case BITS4(1,0,1,1):
				13633	// Branch, exception generation and system instructions
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	13634	ok = dis_ARM64_branch_etc(dres, insn, archinfo);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13635	break;
				13636	case BITS4(0,1,0,0): case BITS4(0,1,1,0):
				13637	case BITS4(1,1,0,0): case BITS4(1,1,1,0):
				13638	// Loads and stores
				13639	ok = dis_ARM64_load_store(dres, insn);
				13640	break;
				13641	case BITS4(0,1,0,1): case BITS4(1,1,0,1):
				13642	// Data processing - register
				13643	ok = dis_ARM64_data_processing_register(dres, insn);
				13644	break;
				13645	case BITS4(0,1,1,1): case BITS4(1,1,1,1):
				13646	// Data processing - SIMD and floating point
				13647	ok = dis_ARM64_simd_and_fp(dres, insn);
				13648	break;
				13649	case BITS4(0,0,0,0): case BITS4(0,0,0,1):
				13650	case BITS4(0,0,1,0): case BITS4(0,0,1,1):
				13651	// UNALLOCATED
				13652	break;
				13653	default:
				13654	vassert(0); /* Can't happen */
				13655	}
				13656
				13657	/* If the next-level down decoders failed, make sure \|dres\| didn't
				13658	get changed. */
				13659	if (!ok) {
				13660	vassert(dres->whatNext == Dis_Continue);
				13661	vassert(dres->len == 4);
				13662	vassert(dres->continueAt == 0);
				13663	vassert(dres->jk_StopHere == Ijk_INVALID);
				13664	}
				13665
				13666	return ok;
				13667
				13668	# undef INSN
				13669	}
				13670
				13671
				13672	/------------------------------------------------------------/
				13673	/--- Top-level fn ---/
				13674	/------------------------------------------------------------/
				13675
				13676	/* Disassemble a single instruction into IR. The instruction
				13677	is located in host memory at &guest_code[delta]. */
				13678
				13679	DisResult disInstr_ARM64 ( IRSB* irsb_IN,
florian	beac530	2014-12-31 12:09:38 +0000	[diff] [blame]	13680	Bool (resteerOkFn) ( void, Addr ),
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13681	Bool resteerCisOk,
				13682	void* callback_opaque,
florian	8462d11	2014-09-24 15:18:09 +0000	[diff] [blame]	13683	const UChar* guest_code_IN,
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13684	Long delta_IN,
florian	d4cc0de	2015-01-02 11:44:12 +0000	[diff] [blame]	13685	Addr guest_IP,
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13686	VexArch guest_arch,
florian	cacba8e	2014-12-15 18:58:07 +0000	[diff] [blame]	13687	const VexArchInfo* archinfo,
				13688	const VexAbiInfo* abiinfo,
sewardj	9b76916	2014-07-24 12:42:03 +0000	[diff] [blame]	13689	VexEndness host_endness_IN,
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13690	Bool sigill_diag_IN )
				13691	{
				13692	DisResult dres;
				13693	vex_bzero(&dres, sizeof(dres));
				13694
				13695	/* Set globals (see top of this file) */
				13696	vassert(guest_arch == VexArchARM64);
				13697
				13698	irsb = irsb_IN;
sewardj	9b76916	2014-07-24 12:42:03 +0000	[diff] [blame]	13699	host_endness = host_endness_IN;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13700	guest_PC_curr_instr = (Addr64)guest_IP;
				13701
sewardj	6590299	2014-05-03 21:20:56 +0000	[diff] [blame]	13702	/* Sanity checks */
				13703	/* (x::UInt - 2) <= 15 === x >= 2 && x <= 17 (I hope) */
				13704	vassert((archinfo->arm64_dMinLine_lg2_szB - 2) <= 15);
				13705	vassert((archinfo->arm64_iMinLine_lg2_szB - 2) <= 15);
				13706
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13707	/* Try to decode */
				13708	Bool ok = disInstr_ARM64_WRK( &dres,
				13709	resteerOkFn, resteerCisOk, callback_opaque,
florian	8462d11	2014-09-24 15:18:09 +0000	[diff] [blame]	13710	&guest_code_IN[delta_IN],
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13711	archinfo, abiinfo );
				13712	if (ok) {
				13713	/* All decode successes end up here. */
sewardj	dc9259c	2014-02-27 11:10:19 +0000	[diff] [blame]	13714	vassert(dres.len == 4 \|\| dres.len == 20);
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13715	switch (dres.whatNext) {
				13716	case Dis_Continue:
				13717	putPC( mkU64(dres.len + guest_PC_curr_instr) );
				13718	break;
				13719	case Dis_ResteerU:
				13720	case Dis_ResteerC:
				13721	putPC(mkU64(dres.continueAt));
				13722	break;
				13723	case Dis_StopHere:
				13724	break;
				13725	default:
				13726	vassert(0);
				13727	}
				13728	DIP("\n");
				13729	} else {
				13730	/* All decode failures end up here. */
				13731	if (sigill_diag_IN) {
				13732	Int i, j;
				13733	UChar buf[64];
				13734	UInt insn
florian	8462d11	2014-09-24 15:18:09 +0000	[diff] [blame]	13735	= getUIntLittleEndianly( &guest_code_IN[delta_IN] );
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13736	vex_bzero(buf, sizeof(buf));
				13737	for (i = j = 0; i < 32; i++) {
				13738	if (i > 0) {
				13739	if ((i & 7) == 0) buf[j++] = ' ';
				13740	else if ((i & 3) == 0) buf[j++] = '\'';
				13741	}
				13742	buf[j++] = (insn & (1<<(31-i))) ? '1' : '0';
				13743	}
				13744	vex_printf("disInstr(arm64): unhandled instruction 0x%08x\n", insn);
				13745	vex_printf("disInstr(arm64): %s\n", buf);
				13746	}
				13747
				13748	/* Tell the dispatcher that this insn cannot be decoded, and so
				13749	has not been executed, and (is currently) the next to be
				13750	executed. PC should be up-to-date since it is made so at the
				13751	start of each insn, but nevertheless be paranoid and update
				13752	it again right now. */
				13753	putPC( mkU64(guest_PC_curr_instr) );
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13754	dres.len = 0;
philippe	2faf591	2014-08-11 22:45:47 +0000	[diff] [blame]	13755	dres.whatNext = Dis_StopHere;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13756	dres.jk_StopHere = Ijk_NoDecode;
philippe	2faf591	2014-08-11 22:45:47 +0000	[diff] [blame]	13757	dres.continueAt = 0;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13758	}
				13759	return dres;
				13760	}
				13761
sewardj	ecde697	2014-02-05 11:01:19 +0000	[diff] [blame]	13762
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	13763	/--------------------------------------------------------------------/
				13764	/--- end guest_arm64_toIR.c ---/
				13765	/--------------------------------------------------------------------/