Blame - llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp - toolchain/llvm-project

blob: 08c039f3decd547b70c187a43f5a3d67c6bcd106 [file] [log] [blame]

NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	1	//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	//
				10	// Define several functions to decode x86 specific shuffle semantics into a
				11	// generic vector mask.
				12	//
				13	//===----------------------------------------------------------------------===//
				14
				15	#include "X86ShuffleDecode.h"
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	16	#include "llvm/CodeGen/MachineValueType.h"
				17
				18	//===----------------------------------------------------------------------===//
				19	// Vector Mask Decoding
				20	//===----------------------------------------------------------------------===//
				21
				22	namespace llvm {
				23
				24	void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
				25	// Defaults the copying the dest value.
				26	ShuffleMask.push_back(0);
				27	ShuffleMask.push_back(1);
				28	ShuffleMask.push_back(2);
				29	ShuffleMask.push_back(3);
				30
				31	// Decode the immediate.
				32	unsigned ZMask = Imm & 15;
				33	unsigned CountD = (Imm >> 4) & 3;
				34	unsigned CountS = (Imm >> 6) & 3;
				35
				36	// CountS selects which input element to use.
NAKAMURA Takumi	5582a6a	2015-05-25 01:43:34 +0000	[diff] [blame]	37	unsigned InVal = 4 + CountS;
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	38	// CountD specifies which element of destination to update.
				39	ShuffleMask[CountD] = InVal;
				40	// ZMask zaps values, potentially overriding the CountD elt.
				41	if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
				42	if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
				43	if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
				44	if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
				45	}
				46
				47	// <3,1> or <6,7,2,3>
				48	void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
NAKAMURA Takumi	5582a6a	2015-05-25 01:43:34 +0000	[diff] [blame]	49	for (unsigned i = NElts / 2; i != NElts; ++i)
				50	ShuffleMask.push_back(NElts + i);
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	51
NAKAMURA Takumi	5582a6a	2015-05-25 01:43:34 +0000	[diff] [blame]	52	for (unsigned i = NElts / 2; i != NElts; ++i)
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	53	ShuffleMask.push_back(i);
				54	}
				55
				56	// <0,2> or <0,1,4,5>
				57	void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
NAKAMURA Takumi	5582a6a	2015-05-25 01:43:34 +0000	[diff] [blame]	58	for (unsigned i = 0; i != NElts / 2; ++i)
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	59	ShuffleMask.push_back(i);
				60
NAKAMURA Takumi	5582a6a	2015-05-25 01:43:34 +0000	[diff] [blame]	61	for (unsigned i = 0; i != NElts / 2; ++i)
				62	ShuffleMask.push_back(NElts + i);
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	63	}
				64
				65	void DecodeMOVSLDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
				66	unsigned NumElts = VT.getVectorNumElements();
				67	for (int i = 0, e = NumElts / 2; i < e; ++i) {
				68	ShuffleMask.push_back(2 * i);
				69	ShuffleMask.push_back(2 * i);
				70	}
				71	}
				72
				73	void DecodeMOVSHDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
				74	unsigned NumElts = VT.getVectorNumElements();
				75	for (int i = 0, e = NumElts / 2; i < e; ++i) {
				76	ShuffleMask.push_back(2 * i + 1);
				77	ShuffleMask.push_back(2 * i + 1);
				78	}
				79	}
				80
				81	void DecodeMOVDDUPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
				82	unsigned VectorSizeInBits = VT.getSizeInBits();
				83	unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
				84	unsigned NumElts = VT.getVectorNumElements();
				85	unsigned NumLanes = VectorSizeInBits / 128;
				86	unsigned NumLaneElts = NumElts / NumLanes;
				87	unsigned NumLaneSubElts = 64 / ScalarSizeInBits;
				88
				89	for (unsigned l = 0; l < NumElts; l += NumLaneElts)
				90	for (unsigned i = 0; i < NumLaneElts; i += NumLaneSubElts)
				91	for (unsigned s = 0; s != NumLaneSubElts; s++)
				92	ShuffleMask.push_back(l + s);
				93	}
				94
				95	void DecodePSLLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
				96	unsigned VectorSizeInBits = VT.getSizeInBits();
				97	unsigned NumElts = VectorSizeInBits / 8;
				98	unsigned NumLanes = VectorSizeInBits / 128;
				99	unsigned NumLaneElts = NumElts / NumLanes;
				100
				101	for (unsigned l = 0; l < NumElts; l += NumLaneElts)
				102	for (unsigned i = 0; i < NumLaneElts; ++i) {
				103	int M = SM_SentinelZero;
				104	if (i >= Imm) M = i - Imm + l;
				105	ShuffleMask.push_back(M);
				106	}
				107	}
				108
				109	void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
				110	unsigned VectorSizeInBits = VT.getSizeInBits();
				111	unsigned NumElts = VectorSizeInBits / 8;
				112	unsigned NumLanes = VectorSizeInBits / 128;
				113	unsigned NumLaneElts = NumElts / NumLanes;
				114
				115	for (unsigned l = 0; l < NumElts; l += NumLaneElts)
				116	for (unsigned i = 0; i < NumLaneElts; ++i) {
				117	unsigned Base = i + Imm;
				118	int M = Base + l;
				119	if (Base >= NumLaneElts) M = SM_SentinelZero;
				120	ShuffleMask.push_back(M);
				121	}
				122	}
				123
				124	void DecodePALIGNRMask(MVT VT, unsigned Imm,
				125	SmallVectorImpl<int> &ShuffleMask) {
				126	unsigned NumElts = VT.getVectorNumElements();
				127	unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8);
				128
				129	unsigned NumLanes = VT.getSizeInBits() / 128;
				130	unsigned NumLaneElts = NumElts / NumLanes;
				131
				132	for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
				133	for (unsigned i = 0; i != NumLaneElts; ++i) {
				134	unsigned Base = i + Offset;
				135	// if i+offset is out of this lane then we actually need the other source
				136	if (Base >= NumLaneElts) Base += NumElts - NumLaneElts;
				137	ShuffleMask.push_back(Base + l);
				138	}
				139	}
				140	}
				141
Simon Pilgrim	f8f86ab	2015-09-13 11:28:45 +0000	[diff] [blame]	142	/// DecodePSHUFMask - This decodes the shuffle masks for pshufw, pshufd, and vpermilp*.
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	143	/// VT indicates the type of the vector allowing it to handle different
				144	/// datatypes and vector widths.
				145	void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
				146	unsigned NumElts = VT.getVectorNumElements();
				147
				148	unsigned NumLanes = VT.getSizeInBits() / 128;
Simon Pilgrim	f8f86ab	2015-09-13 11:28:45 +0000	[diff] [blame]	149	if (NumLanes == 0) NumLanes = 1; // Handle MMX
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	150	unsigned NumLaneElts = NumElts / NumLanes;
				151
				152	unsigned NewImm = Imm;
				153	for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
				154	for (unsigned i = 0; i != NumLaneElts; ++i) {
				155	ShuffleMask.push_back(NewImm % NumLaneElts + l);
				156	NewImm /= NumLaneElts;
				157	}
				158	if (NumLaneElts == 4) NewImm = Imm; // reload imm
				159	}
				160	}
				161
				162	void DecodePSHUFHWMask(MVT VT, unsigned Imm,
				163	SmallVectorImpl<int> &ShuffleMask) {
				164	unsigned NumElts = VT.getVectorNumElements();
				165
				166	for (unsigned l = 0; l != NumElts; l += 8) {
				167	unsigned NewImm = Imm;
				168	for (unsigned i = 0, e = 4; i != e; ++i) {
				169	ShuffleMask.push_back(l + i);
				170	}
				171	for (unsigned i = 4, e = 8; i != e; ++i) {
				172	ShuffleMask.push_back(l + 4 + (NewImm & 3));
				173	NewImm >>= 2;
				174	}
				175	}
				176	}
				177
				178	void DecodePSHUFLWMask(MVT VT, unsigned Imm,
				179	SmallVectorImpl<int> &ShuffleMask) {
				180	unsigned NumElts = VT.getVectorNumElements();
				181
				182	for (unsigned l = 0; l != NumElts; l += 8) {
				183	unsigned NewImm = Imm;
				184	for (unsigned i = 0, e = 4; i != e; ++i) {
				185	ShuffleMask.push_back(l + (NewImm & 3));
				186	NewImm >>= 2;
				187	}
				188	for (unsigned i = 4, e = 8; i != e; ++i) {
				189	ShuffleMask.push_back(l + i);
				190	}
				191	}
				192	}
				193
Simon Pilgrim	f8f86ab	2015-09-13 11:28:45 +0000	[diff] [blame]	194	void DecodePSWAPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
				195	unsigned NumElts = VT.getVectorNumElements();
				196	unsigned NumHalfElts = NumElts / 2;
				197
				198	for (unsigned l = 0; l != NumHalfElts; ++l)
				199	ShuffleMask.push_back(l + NumHalfElts);
				200	for (unsigned h = 0; h != NumHalfElts; ++h)
				201	ShuffleMask.push_back(h);
				202	}
				203
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	204	/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
				205	/// the type of the vector allowing it to handle different datatypes and vector
				206	/// widths.
				207	void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
				208	unsigned NumElts = VT.getVectorNumElements();
				209
				210	unsigned NumLanes = VT.getSizeInBits() / 128;
				211	unsigned NumLaneElts = NumElts / NumLanes;
				212
				213	unsigned NewImm = Imm;
				214	for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
				215	// each half of a lane comes from different source
NAKAMURA Takumi	5582a6a	2015-05-25 01:43:34 +0000	[diff] [blame]	216	for (unsigned s = 0; s != NumElts * 2; s += NumElts) {
				217	for (unsigned i = 0; i != NumLaneElts / 2; ++i) {
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	218	ShuffleMask.push_back(NewImm % NumLaneElts + s + l);
				219	NewImm /= NumLaneElts;
				220	}
				221	}
				222	if (NumLaneElts == 4) NewImm = Imm; // reload imm
				223	}
				224	}
				225
				226	/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
				227	/// and punpckh*. VT indicates the type of the vector allowing it to handle
				228	/// different datatypes and vector widths.
				229	void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
				230	unsigned NumElts = VT.getVectorNumElements();
				231
				232	// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
				233	// independently on 128-bit lanes.
				234	unsigned NumLanes = VT.getSizeInBits() / 128;
Simon Pilgrim	f8f86ab	2015-09-13 11:28:45 +0000	[diff] [blame]	235	if (NumLanes == 0) NumLanes = 1; // Handle MMX
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	236	unsigned NumLaneElts = NumElts / NumLanes;
				237
				238	for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
NAKAMURA Takumi	5582a6a	2015-05-25 01:43:34 +0000	[diff] [blame]	239	for (unsigned i = l + NumLaneElts / 2, e = l + NumLaneElts; i != e; ++i) {
				240	ShuffleMask.push_back(i); // Reads from dest/src1
				241	ShuffleMask.push_back(i + NumElts); // Reads from src/src2
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	242	}
				243	}
				244	}
				245
				246	/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
				247	/// and punpckl*. VT indicates the type of the vector allowing it to handle
				248	/// different datatypes and vector widths.
				249	void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
				250	unsigned NumElts = VT.getVectorNumElements();
				251
				252	// Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
				253	// independently on 128-bit lanes.
				254	unsigned NumLanes = VT.getSizeInBits() / 128;
				255	if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
				256	unsigned NumLaneElts = NumElts / NumLanes;
				257
				258	for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
NAKAMURA Takumi	5582a6a	2015-05-25 01:43:34 +0000	[diff] [blame]	259	for (unsigned i = l, e = l + NumLaneElts / 2; i != e; ++i) {
				260	ShuffleMask.push_back(i); // Reads from dest/src1
				261	ShuffleMask.push_back(i + NumElts); // Reads from src/src2
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	262	}
				263	}
				264	}
				265
Igor Breger	d7bae45	2015-10-15 13:29:07 +0000	[diff] [blame]	266	/// \brief Decode a shuffle packed values at 128-bit granularity
				267	/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2)
				268	/// immediate mask into a shuffle mask.
				269	void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm,
				270	SmallVectorImpl<int> &ShuffleMask) {
				271	unsigned NumLanes = VT.getSizeInBits() / 128;
				272	unsigned NumElementsInLane = 128 / VT.getScalarSizeInBits();
				273	unsigned ControlBitsMask = NumLanes - 1;
				274	unsigned NumControlBits = NumLanes / 2;
				275
				276	for (unsigned l = 0; l != NumLanes; ++l) {
				277	unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
				278	// We actually need the other source.
				279	if (l >= NumLanes / 2)
				280	LaneMask += NumLanes;
				281	for (unsigned i = 0; i != NumElementsInLane; ++i)
				282	ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
				283	}
				284	}
				285
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	286	void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
				287	SmallVectorImpl<int> &ShuffleMask) {
NAKAMURA Takumi	5582a6a	2015-05-25 01:43:34 +0000	[diff] [blame]	288	unsigned HalfSize = VT.getVectorNumElements() / 2;
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	289
				290	for (unsigned l = 0; l != 2; ++l) {
Simon Pilgrim	40343e6	2015-07-06 22:46:46 +0000	[diff] [blame]	291	unsigned HalfMask = Imm >> (l * 4);
				292	unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;
NAKAMURA Takumi	5582a6a	2015-05-25 01:43:34 +0000	[diff] [blame]	293	for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
Denis Protivensky	b612902	2015-07-07 07:48:48 +0000	[diff] [blame]	294	ShuffleMask.push_back(HalfMask & 8 ? SM_SentinelZero : (int)i);
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	295	}
				296	}
				297
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	298	void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
				299	SmallVectorImpl<int> &ShuffleMask) {
				300	for (int i = 0, e = RawMask.size(); i < e; ++i) {
				301	uint64_t M = RawMask[i];
				302	if (M == (uint64_t)SM_SentinelUndef) {
				303	ShuffleMask.push_back(M);
				304	continue;
				305	}
				306	// For AVX vectors with 32 bytes the base of the shuffle is the half of
				307	// the vector we're inside.
				308	int Base = i < 16 ? 0 : 16;
				309	// If the high bit (7) of the byte is set, the element is zeroed.
				310	if (M & (1 << 7))
				311	ShuffleMask.push_back(SM_SentinelZero);
				312	else {
				313	// Only the least significant 4 bits of the byte are used.
				314	int Index = Base + (M & 0xf);
				315	ShuffleMask.push_back(Index);
				316	}
				317	}
				318	}
				319
				320	void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
				321	int ElementBits = VT.getScalarSizeInBits();
				322	int NumElements = VT.getVectorNumElements();
				323	for (int i = 0; i < NumElements; ++i) {
				324	// If there are more than 8 elements in the vector, then any immediate blend
				325	// mask applies to each 128-bit lane. There can never be more than
				326	// 8 elements in a 128-bit lane with an immediate blend.
				327	int Bit = NumElements > 8 ? i % (128 / ElementBits) : i;
				328	assert(Bit < 8 &&
				329	"Immediate blends only operate over 8 elements at a time!");
				330	ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElements + i : i);
				331	}
				332	}
				333
				334	/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
				335	/// No VT provided since it only works on 256-bit, 4 element vectors.
				336	void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
				337	for (unsigned i = 0; i != 4; ++i) {
NAKAMURA Takumi	5582a6a	2015-05-25 01:43:34 +0000	[diff] [blame]	338	ShuffleMask.push_back((Imm >> (2 * i)) & 3);
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	339	}
				340	}
				341
Simon Pilgrim	e1b6db9	2016-02-06 16:33:42 +0000	[diff] [blame]	342	void DecodeZeroExtendMask(MVT SrcScalarVT, MVT DstVT, SmallVectorImpl<int> &Mask) {
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	343	unsigned NumDstElts = DstVT.getVectorNumElements();
Simon Pilgrim	e1b6db9	2016-02-06 16:33:42 +0000	[diff] [blame]	344	unsigned SrcScalarBits = SrcScalarVT.getSizeInBits();
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	345	unsigned DstScalarBits = DstVT.getScalarSizeInBits();
				346	unsigned Scale = DstScalarBits / SrcScalarBits;
				347	assert(SrcScalarBits < DstScalarBits &&
				348	"Expected zero extension mask to increase scalar size");
NAKAMURA Takumi	fb3bd71	2015-05-25 01:43:23 +0000	[diff] [blame]	349
				350	for (unsigned i = 0; i != NumDstElts; i++) {
				351	Mask.push_back(i);
				352	for (unsigned j = 1; j != Scale; j++)
				353	Mask.push_back(SM_SentinelZero);
				354	}
				355	}
				356
				357	void DecodeZeroMoveLowMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
				358	unsigned NumElts = VT.getVectorNumElements();
				359	ShuffleMask.push_back(0);
				360	for (unsigned i = 1; i < NumElts; i++)
				361	ShuffleMask.push_back(SM_SentinelZero);
				362	}
				363
				364	void DecodeScalarMoveMask(MVT VT, bool IsLoad, SmallVectorImpl<int> &Mask) {
				365	// First element comes from the first element of second source.
				366	// Remaining elements: Load zero extends / Move copies from first source.
				367	unsigned NumElts = VT.getVectorNumElements();
				368	Mask.push_back(NumElts);
				369	for (unsigned i = 1; i < NumElts; i++)
				370	Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
				371	}
Simon Pilgrim	d85cae3	2015-07-06 20:46:41 +0000	[diff] [blame]	372
				373	void DecodeEXTRQIMask(int Len, int Idx,
				374	SmallVectorImpl<int> &ShuffleMask) {
				375	// Only the bottom 6 bits are valid for each immediate.
				376	Len &= 0x3F;
				377	Idx &= 0x3F;
				378
				379	// We can only decode this bit extraction instruction as a shuffle if both the
				380	// length and index work with whole bytes.
				381	if (0 != (Len % 8) \|\| 0 != (Idx % 8))
				382	return;
				383
				384	// A length of zero is equivalent to a bit length of 64.
				385	if (Len == 0)
				386	Len = 64;
				387
				388	// If the length + index exceeds the bottom 64 bits the result is undefined.
				389	if ((Len + Idx) > 64) {
				390	ShuffleMask.append(16, SM_SentinelUndef);
				391	return;
				392	}
				393
				394	// Convert index and index to work with bytes.
				395	Len /= 8;
				396	Idx /= 8;
				397
				398	// EXTRQ: Extract Len bytes starting from Idx. Zero pad the remaining bytes
				399	// of the lower 64-bits. The upper 64-bits are undefined.
				400	for (int i = 0; i != Len; ++i)
				401	ShuffleMask.push_back(i + Idx);
				402	for (int i = Len; i != 8; ++i)
				403	ShuffleMask.push_back(SM_SentinelZero);
				404	for (int i = 8; i != 16; ++i)
				405	ShuffleMask.push_back(SM_SentinelUndef);
				406	}
				407
				408	void DecodeINSERTQIMask(int Len, int Idx,
				409	SmallVectorImpl<int> &ShuffleMask) {
				410	// Only the bottom 6 bits are valid for each immediate.
				411	Len &= 0x3F;
				412	Idx &= 0x3F;
				413
				414	// We can only decode this bit insertion instruction as a shuffle if both the
				415	// length and index work with whole bytes.
				416	if (0 != (Len % 8) \|\| 0 != (Idx % 8))
				417	return;
				418
				419	// A length of zero is equivalent to a bit length of 64.
				420	if (Len == 0)
				421	Len = 64;
				422
				423	// If the length + index exceeds the bottom 64 bits the result is undefined.
				424	if ((Len + Idx) > 64) {
				425	ShuffleMask.append(16, SM_SentinelUndef);
				426	return;
				427	}
				428
				429	// Convert index and index to work with bytes.
				430	Len /= 8;
				431	Idx /= 8;
				432
				433	// INSERTQ: Extract lowest Len bytes from lower half of second source and
				434	// insert over first source starting at Idx byte. The upper 64-bits are
				435	// undefined.
				436	for (int i = 0; i != Idx; ++i)
				437	ShuffleMask.push_back(i);
				438	for (int i = 0; i != Len; ++i)
				439	ShuffleMask.push_back(i + 16);
				440	for (int i = Idx + Len; i != 8; ++i)
				441	ShuffleMask.push_back(i);
				442	for (int i = 8; i != 16; ++i)
				443	ShuffleMask.push_back(SM_SentinelUndef);
				444	}
				445
Elena Demikhovsky	e88038f	2015-09-08 06:38:21 +0000	[diff] [blame]	446	void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
				447	SmallVectorImpl<int> &ShuffleMask) {
				448	for (int i = 0, e = RawMask.size(); i < e; ++i) {
				449	uint64_t M = RawMask[i];
				450	ShuffleMask.push_back((int)M);
				451	}
				452	}
				453
				454	void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
				455	SmallVectorImpl<int> &ShuffleMask) {
				456	for (int i = 0, e = RawMask.size(); i < e; ++i) {
				457	uint64_t M = RawMask[i];
				458	ShuffleMask.push_back((int)M);
				459	}
				460	}
				461
Alexander Kornienko	f00654e	2015-06-23 09:49:53 +0000	[diff] [blame]	462	} // llvm namespace