blob: f78e21b684c19075e8fba4efafcb31fcf089d29f [file] [log] [blame]
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +00001//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +00006//
7//===----------------------------------------------------------------------===//
8//
9// Define several functions to decode x86 specific shuffle semantics into a
10// generic vector mask.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ShuffleDecode.h"
Mehdi Aminib550cb12016-04-18 09:17:29 +000015#include "llvm/ADT/ArrayRef.h"
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +000016
17//===----------------------------------------------------------------------===//
18// Vector Mask Decoding
19//===----------------------------------------------------------------------===//
20
21namespace llvm {
22
23void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
24 // Defaults the copying the dest value.
25 ShuffleMask.push_back(0);
26 ShuffleMask.push_back(1);
27 ShuffleMask.push_back(2);
28 ShuffleMask.push_back(3);
29
30 // Decode the immediate.
31 unsigned ZMask = Imm & 15;
32 unsigned CountD = (Imm >> 4) & 3;
33 unsigned CountS = (Imm >> 6) & 3;
34
35 // CountS selects which input element to use.
NAKAMURA Takumi5582a6a2015-05-25 01:43:34 +000036 unsigned InVal = 4 + CountS;
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +000037 // CountD specifies which element of destination to update.
38 ShuffleMask[CountD] = InVal;
39 // ZMask zaps values, potentially overriding the CountD elt.
40 if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
41 if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
42 if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
43 if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
44}
45
Craig Topperacaba3b2018-03-12 16:43:11 +000046void DecodeInsertElementMask(unsigned NumElts, unsigned Idx, unsigned Len,
Simon Pilgrima3d67442016-02-07 15:39:22 +000047 SmallVectorImpl<int> &ShuffleMask) {
Simon Pilgrima3d67442016-02-07 15:39:22 +000048 assert((Idx + Len) <= NumElts && "Insertion out of range");
49
50 for (unsigned i = 0; i != NumElts; ++i)
51 ShuffleMask.push_back(i);
52 for (unsigned i = 0; i != Len; ++i)
53 ShuffleMask[Idx + i] = NumElts + i;
54}
55
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +000056// <3,1> or <6,7,2,3>
57void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
NAKAMURA Takumi5582a6a2015-05-25 01:43:34 +000058 for (unsigned i = NElts / 2; i != NElts; ++i)
59 ShuffleMask.push_back(NElts + i);
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +000060
NAKAMURA Takumi5582a6a2015-05-25 01:43:34 +000061 for (unsigned i = NElts / 2; i != NElts; ++i)
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +000062 ShuffleMask.push_back(i);
63}
64
65// <0,2> or <0,1,4,5>
66void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
NAKAMURA Takumi5582a6a2015-05-25 01:43:34 +000067 for (unsigned i = 0; i != NElts / 2; ++i)
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +000068 ShuffleMask.push_back(i);
69
NAKAMURA Takumi5582a6a2015-05-25 01:43:34 +000070 for (unsigned i = 0; i != NElts / 2; ++i)
71 ShuffleMask.push_back(NElts + i);
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +000072}
73
Craig Topperacaba3b2018-03-12 16:43:11 +000074void DecodeMOVSLDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +000075 for (int i = 0, e = NumElts / 2; i < e; ++i) {
76 ShuffleMask.push_back(2 * i);
77 ShuffleMask.push_back(2 * i);
78 }
79}
80
Craig Topperacaba3b2018-03-12 16:43:11 +000081void DecodeMOVSHDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +000082 for (int i = 0, e = NumElts / 2; i < e; ++i) {
83 ShuffleMask.push_back(2 * i + 1);
84 ShuffleMask.push_back(2 * i + 1);
85 }
86}
87
Craig Topperacaba3b2018-03-12 16:43:11 +000088void DecodeMOVDDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
89 const unsigned NumLaneElts = 2;
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +000090
91 for (unsigned l = 0; l < NumElts; l += NumLaneElts)
Craig Topperacaba3b2018-03-12 16:43:11 +000092 for (unsigned i = 0; i < NumLaneElts; ++i)
93 ShuffleMask.push_back(l);
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +000094}
95
Craig Topperacaba3b2018-03-12 16:43:11 +000096void DecodePSLLDQMask(unsigned NumElts, unsigned Imm,
97 SmallVectorImpl<int> &ShuffleMask) {
98 const unsigned NumLaneElts = 16;
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +000099
100 for (unsigned l = 0; l < NumElts; l += NumLaneElts)
101 for (unsigned i = 0; i < NumLaneElts; ++i) {
102 int M = SM_SentinelZero;
103 if (i >= Imm) M = i - Imm + l;
104 ShuffleMask.push_back(M);
105 }
106}
107
Craig Topperacaba3b2018-03-12 16:43:11 +0000108void DecodePSRLDQMask(unsigned NumElts, unsigned Imm,
109 SmallVectorImpl<int> &ShuffleMask) {
110 const unsigned NumLaneElts = 16;
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000111
112 for (unsigned l = 0; l < NumElts; l += NumLaneElts)
113 for (unsigned i = 0; i < NumLaneElts; ++i) {
114 unsigned Base = i + Imm;
115 int M = Base + l;
116 if (Base >= NumLaneElts) M = SM_SentinelZero;
117 ShuffleMask.push_back(M);
118 }
119}
120
Craig Topperacaba3b2018-03-12 16:43:11 +0000121void DecodePALIGNRMask(unsigned NumElts, unsigned Imm,
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000122 SmallVectorImpl<int> &ShuffleMask) {
Craig Topperacaba3b2018-03-12 16:43:11 +0000123 const unsigned NumLaneElts = 16;
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000124
125 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
126 for (unsigned i = 0; i != NumLaneElts; ++i) {
Craig Topperacaba3b2018-03-12 16:43:11 +0000127 unsigned Base = i + Imm;
128 // if i+imm is out of this lane then we actually need the other source
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000129 if (Base >= NumLaneElts) Base += NumElts - NumLaneElts;
130 ShuffleMask.push_back(Base + l);
131 }
132 }
133}
134
Craig Topperacaba3b2018-03-12 16:43:11 +0000135void DecodeVALIGNMask(unsigned NumElts, unsigned Imm,
Craig Topperb084c902016-10-22 06:51:56 +0000136 SmallVectorImpl<int> &ShuffleMask) {
Craig Topperb084c902016-10-22 06:51:56 +0000137 // Not all bits of the immediate are used so mask it.
138 assert(isPowerOf2_32(NumElts) && "NumElts should be power of 2");
139 Imm = Imm & (NumElts - 1);
Craig Topperacaba3b2018-03-12 16:43:11 +0000140 for (unsigned i = 0; i != NumElts; ++i)
Craig Topperb084c902016-10-22 06:51:56 +0000141 ShuffleMask.push_back(i + Imm);
142}
143
Simon Pilgrimf8f86ab2015-09-13 11:28:45 +0000144/// DecodePSHUFMask - This decodes the shuffle masks for pshufw, pshufd, and vpermilp*.
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000145/// VT indicates the type of the vector allowing it to handle different
146/// datatypes and vector widths.
Craig Topperacaba3b2018-03-12 16:43:11 +0000147void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm,
148 SmallVectorImpl<int> &ShuffleMask) {
149 unsigned Size = NumElts * ScalarBits;
150 unsigned NumLanes = Size / 128;
Simon Pilgrimf8f86ab2015-09-13 11:28:45 +0000151 if (NumLanes == 0) NumLanes = 1; // Handle MMX
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000152 unsigned NumLaneElts = NumElts / NumLanes;
153
Craig Topper2ed43282018-06-08 01:09:31 +0000154 uint32_t SplatImm = (Imm & 0xff) * 0x01010101;
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000155 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
156 for (unsigned i = 0; i != NumLaneElts; ++i) {
Craig Topper2ed43282018-06-08 01:09:31 +0000157 ShuffleMask.push_back(SplatImm % NumLaneElts + l);
158 SplatImm /= NumLaneElts;
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000159 }
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000160 }
161}
162
Craig Topperacaba3b2018-03-12 16:43:11 +0000163void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm,
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000164 SmallVectorImpl<int> &ShuffleMask) {
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000165 for (unsigned l = 0; l != NumElts; l += 8) {
166 unsigned NewImm = Imm;
167 for (unsigned i = 0, e = 4; i != e; ++i) {
168 ShuffleMask.push_back(l + i);
169 }
170 for (unsigned i = 4, e = 8; i != e; ++i) {
171 ShuffleMask.push_back(l + 4 + (NewImm & 3));
172 NewImm >>= 2;
173 }
174 }
175}
176
Craig Topperacaba3b2018-03-12 16:43:11 +0000177void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm,
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000178 SmallVectorImpl<int> &ShuffleMask) {
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000179 for (unsigned l = 0; l != NumElts; l += 8) {
180 unsigned NewImm = Imm;
181 for (unsigned i = 0, e = 4; i != e; ++i) {
182 ShuffleMask.push_back(l + (NewImm & 3));
183 NewImm >>= 2;
184 }
185 for (unsigned i = 4, e = 8; i != e; ++i) {
186 ShuffleMask.push_back(l + i);
187 }
188 }
189}
190
Craig Topperacaba3b2018-03-12 16:43:11 +0000191void DecodePSWAPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
Simon Pilgrimf8f86ab2015-09-13 11:28:45 +0000192 unsigned NumHalfElts = NumElts / 2;
193
194 for (unsigned l = 0; l != NumHalfElts; ++l)
195 ShuffleMask.push_back(l + NumHalfElts);
196 for (unsigned h = 0; h != NumHalfElts; ++h)
197 ShuffleMask.push_back(h);
198}
199
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000200/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
201/// the type of the vector allowing it to handle different datatypes and vector
202/// widths.
Craig Topperacaba3b2018-03-12 16:43:11 +0000203void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits,
204 unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
205 unsigned NumLaneElts = 128 / ScalarBits;
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000206
207 unsigned NewImm = Imm;
208 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
209 // each half of a lane comes from different source
NAKAMURA Takumi5582a6a2015-05-25 01:43:34 +0000210 for (unsigned s = 0; s != NumElts * 2; s += NumElts) {
211 for (unsigned i = 0; i != NumLaneElts / 2; ++i) {
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000212 ShuffleMask.push_back(NewImm % NumLaneElts + s + l);
213 NewImm /= NumLaneElts;
214 }
215 }
216 if (NumLaneElts == 4) NewImm = Imm; // reload imm
217 }
218}
219
220/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
221/// and punpckh*. VT indicates the type of the vector allowing it to handle
222/// different datatypes and vector widths.
Craig Topperacaba3b2018-03-12 16:43:11 +0000223void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits,
224 SmallVectorImpl<int> &ShuffleMask) {
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000225 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
226 // independently on 128-bit lanes.
Craig Topperacaba3b2018-03-12 16:43:11 +0000227 unsigned NumLanes = (NumElts * ScalarBits) / 128;
Simon Pilgrimf8f86ab2015-09-13 11:28:45 +0000228 if (NumLanes == 0) NumLanes = 1; // Handle MMX
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000229 unsigned NumLaneElts = NumElts / NumLanes;
230
231 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
NAKAMURA Takumi5582a6a2015-05-25 01:43:34 +0000232 for (unsigned i = l + NumLaneElts / 2, e = l + NumLaneElts; i != e; ++i) {
233 ShuffleMask.push_back(i); // Reads from dest/src1
234 ShuffleMask.push_back(i + NumElts); // Reads from src/src2
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000235 }
236 }
237}
238
239/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
240/// and punpckl*. VT indicates the type of the vector allowing it to handle
241/// different datatypes and vector widths.
Craig Topperacaba3b2018-03-12 16:43:11 +0000242void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits,
243 SmallVectorImpl<int> &ShuffleMask) {
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000244 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
245 // independently on 128-bit lanes.
Craig Topperacaba3b2018-03-12 16:43:11 +0000246 unsigned NumLanes = (NumElts * ScalarBits) / 128;
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000247 if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
248 unsigned NumLaneElts = NumElts / NumLanes;
249
250 for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
NAKAMURA Takumi5582a6a2015-05-25 01:43:34 +0000251 for (unsigned i = l, e = l + NumLaneElts / 2; i != e; ++i) {
252 ShuffleMask.push_back(i); // Reads from dest/src1
253 ShuffleMask.push_back(i + NumElts); // Reads from src/src2
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000254 }
255 }
256}
257
Simon Pilgrimc941f6b2016-07-18 17:32:59 +0000258/// Decodes a broadcast of the first element of a vector.
Craig Topperacaba3b2018-03-12 16:43:11 +0000259void DecodeVectorBroadcast(unsigned NumElts,
260 SmallVectorImpl<int> &ShuffleMask) {
Simon Pilgrimc941f6b2016-07-18 17:32:59 +0000261 ShuffleMask.append(NumElts, 0);
262}
263
Simon Pilgrima76a8e52016-07-14 12:07:43 +0000264/// Decodes a broadcast of a subvector to a larger vector type.
Craig Topperacaba3b2018-03-12 16:43:11 +0000265void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts,
Simon Pilgrima76a8e52016-07-14 12:07:43 +0000266 SmallVectorImpl<int> &ShuffleMask) {
Craig Topperacaba3b2018-03-12 16:43:11 +0000267 unsigned Scale = DstNumElts / SrcNumElts;
Simon Pilgrima76a8e52016-07-14 12:07:43 +0000268
269 for (unsigned i = 0; i != Scale; ++i)
Craig Topperacaba3b2018-03-12 16:43:11 +0000270 for (unsigned j = 0; j != SrcNumElts; ++j)
Simon Pilgrima76a8e52016-07-14 12:07:43 +0000271 ShuffleMask.push_back(j);
272}
273
Adrian Prantl5f8f34e42018-05-01 15:54:18 +0000274/// Decode a shuffle packed values at 128-bit granularity
Igor Bregerd7bae452015-10-15 13:29:07 +0000275/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2)
276/// immediate mask into a shuffle mask.
Craig Topperacaba3b2018-03-12 16:43:11 +0000277void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize,
278 unsigned Imm,
279 SmallVectorImpl<int> &ShuffleMask) {
280 unsigned NumElementsInLane = 128 / ScalarSize;
281 unsigned NumLanes = NumElts / NumElementsInLane;
Igor Bregerd7bae452015-10-15 13:29:07 +0000282
Craig Topper2ed43282018-06-08 01:09:31 +0000283 for (unsigned l = 0; l != NumElts; l += NumElementsInLane) {
284 unsigned Index = (Imm % NumLanes) * NumElementsInLane;
285 Imm /= NumLanes; // Discard the bits we just used.
Igor Bregerd7bae452015-10-15 13:29:07 +0000286 // We actually need the other source.
Craig Topper2ed43282018-06-08 01:09:31 +0000287 if (l >= (NumElts / 2))
288 Index += NumElts;
Igor Bregerd7bae452015-10-15 13:29:07 +0000289 for (unsigned i = 0; i != NumElementsInLane; ++i)
Craig Topper2ed43282018-06-08 01:09:31 +0000290 ShuffleMask.push_back(Index + i);
Igor Bregerd7bae452015-10-15 13:29:07 +0000291 }
292}
293
Craig Topperacaba3b2018-03-12 16:43:11 +0000294void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm,
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000295 SmallVectorImpl<int> &ShuffleMask) {
Craig Topperacaba3b2018-03-12 16:43:11 +0000296 unsigned HalfSize = NumElts / 2;
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000297
298 for (unsigned l = 0; l != 2; ++l) {
Simon Pilgrim40343e62015-07-06 22:46:46 +0000299 unsigned HalfMask = Imm >> (l * 4);
300 unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;
NAKAMURA Takumi5582a6a2015-05-25 01:43:34 +0000301 for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
Denis Protivenskyb6129022015-07-07 07:48:48 +0000302 ShuffleMask.push_back(HalfMask & 8 ? SM_SentinelZero : (int)i);
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000303 }
304}
305
Simon Pilgrimf85ee9f2018-10-23 11:33:38 +0000306void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000307 SmallVectorImpl<int> &ShuffleMask) {
308 for (int i = 0, e = RawMask.size(); i < e; ++i) {
309 uint64_t M = RawMask[i];
Simon Pilgrimf85ee9f2018-10-23 11:33:38 +0000310 if (UndefElts[i]) {
311 ShuffleMask.push_back(SM_SentinelUndef);
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000312 continue;
313 }
Simon Pilgrimf33cb612016-03-03 21:55:01 +0000314 // For 256/512-bit vectors the base of the shuffle is the 128-bit
315 // subvector we're inside.
316 int Base = (i / 16) * 16;
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000317 // If the high bit (7) of the byte is set, the element is zeroed.
318 if (M & (1 << 7))
319 ShuffleMask.push_back(SM_SentinelZero);
320 else {
321 // Only the least significant 4 bits of the byte are used.
322 int Index = Base + (M & 0xf);
323 ShuffleMask.push_back(Index);
324 }
325 }
326}
327
Craig Topperacaba3b2018-03-12 16:43:11 +0000328void DecodeBLENDMask(unsigned NumElts, unsigned Imm,
329 SmallVectorImpl<int> &ShuffleMask) {
330 for (unsigned i = 0; i < NumElts; ++i) {
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000331 // If there are more than 8 elements in the vector, then any immediate blend
Craig Topperacaba3b2018-03-12 16:43:11 +0000332 // mask wraps around.
333 unsigned Bit = i % 8;
334 ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElts + i : i);
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000335 }
336}
337
Simon Pilgrimf85ee9f2018-10-23 11:33:38 +0000338void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
Simon Pilgrimfd4b9b02016-04-16 17:52:07 +0000339 SmallVectorImpl<int> &ShuffleMask) {
340 assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size");
341
342 // VPPERM Operation
343 // Bits[4:0] - Byte Index (0 - 31)
344 // Bits[7:5] - Permute Operation
345 //
346 // Permute Operation:
347 // 0 - Source byte (no logical operation).
348 // 1 - Invert source byte.
349 // 2 - Bit reverse of source byte.
350 // 3 - Bit reverse of inverted source byte.
351 // 4 - 00h (zero - fill).
352 // 5 - FFh (ones - fill).
353 // 6 - Most significant bit of source byte replicated in all bit positions.
354 // 7 - Invert most significant bit of source byte and replicate in all bit positions.
355 for (int i = 0, e = RawMask.size(); i < e; ++i) {
Simon Pilgrimf85ee9f2018-10-23 11:33:38 +0000356 if (UndefElts[i]) {
357 ShuffleMask.push_back(SM_SentinelUndef);
Simon Pilgrimfd4b9b02016-04-16 17:52:07 +0000358 continue;
359 }
360
Simon Pilgrimf85ee9f2018-10-23 11:33:38 +0000361 uint64_t M = RawMask[i];
Simon Pilgrimf379a6c2016-04-24 15:05:04 +0000362 uint64_t PermuteOp = (M >> 5) & 0x7;
Simon Pilgrimfd4b9b02016-04-16 17:52:07 +0000363 if (PermuteOp == 4) {
364 ShuffleMask.push_back(SM_SentinelZero);
365 continue;
366 }
367 if (PermuteOp != 0) {
368 ShuffleMask.clear();
369 return;
370 }
371
372 uint64_t Index = M & 0x1F;
373 ShuffleMask.push_back((int)Index);
374 }
375}
376
Simon Pilgrima0d73832016-07-03 18:27:37 +0000377/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
Craig Topperacaba3b2018-03-12 16:43:11 +0000378void DecodeVPERMMask(unsigned NumElts, unsigned Imm,
379 SmallVectorImpl<int> &ShuffleMask) {
Simon Pilgrima0d73832016-07-03 18:27:37 +0000380 for (unsigned l = 0; l != NumElts; l += 4)
381 for (unsigned i = 0; i != 4; ++i)
382 ShuffleMask.push_back(l + ((Imm >> (2 * i)) & 3));
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000383}
384
Craig Topperacaba3b2018-03-12 16:43:11 +0000385void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
386 unsigned NumDstElts, SmallVectorImpl<int> &Mask) {
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000387 unsigned Scale = DstScalarBits / SrcScalarBits;
388 assert(SrcScalarBits < DstScalarBits &&
389 "Expected zero extension mask to increase scalar size");
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000390
391 for (unsigned i = 0; i != NumDstElts; i++) {
392 Mask.push_back(i);
393 for (unsigned j = 1; j != Scale; j++)
394 Mask.push_back(SM_SentinelZero);
395 }
396}
397
Craig Topperacaba3b2018-03-12 16:43:11 +0000398void DecodeZeroMoveLowMask(unsigned NumElts,
399 SmallVectorImpl<int> &ShuffleMask) {
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000400 ShuffleMask.push_back(0);
401 for (unsigned i = 1; i < NumElts; i++)
402 ShuffleMask.push_back(SM_SentinelZero);
403}
404
Craig Topperacaba3b2018-03-12 16:43:11 +0000405void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad,
406 SmallVectorImpl<int> &Mask) {
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000407 // First element comes from the first element of second source.
408 // Remaining elements: Load zero extends / Move copies from first source.
NAKAMURA Takumifb3bd712015-05-25 01:43:23 +0000409 Mask.push_back(NumElts);
410 for (unsigned i = 1; i < NumElts; i++)
411 Mask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
412}
Simon Pilgrimd85cae32015-07-06 20:46:41 +0000413
Craig Topperacaba3b2018-03-12 16:43:11 +0000414void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
Simon Pilgrimd85cae32015-07-06 20:46:41 +0000415 SmallVectorImpl<int> &ShuffleMask) {
Simon Pilgrim9f0a0bd2017-07-04 16:53:12 +0000416 unsigned HalfElts = NumElts / 2;
417
Simon Pilgrimd85cae32015-07-06 20:46:41 +0000418 // Only the bottom 6 bits are valid for each immediate.
419 Len &= 0x3F;
420 Idx &= 0x3F;
421
422 // We can only decode this bit extraction instruction as a shuffle if both the
Simon Pilgrim9f0a0bd2017-07-04 16:53:12 +0000423 // length and index work with whole elements.
424 if (0 != (Len % EltSize) || 0 != (Idx % EltSize))
Simon Pilgrimd85cae32015-07-06 20:46:41 +0000425 return;
426
427 // A length of zero is equivalent to a bit length of 64.
428 if (Len == 0)
429 Len = 64;
430
431 // If the length + index exceeds the bottom 64 bits the result is undefined.
432 if ((Len + Idx) > 64) {
Simon Pilgrim9f0a0bd2017-07-04 16:53:12 +0000433 ShuffleMask.append(NumElts, SM_SentinelUndef);
Simon Pilgrimd85cae32015-07-06 20:46:41 +0000434 return;
435 }
436
Simon Pilgrim9f0a0bd2017-07-04 16:53:12 +0000437 // Convert index and index to work with elements.
438 Len /= EltSize;
439 Idx /= EltSize;
Simon Pilgrimd85cae32015-07-06 20:46:41 +0000440
Simon Pilgrim9f0a0bd2017-07-04 16:53:12 +0000441 // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining
442 // elements of the lower 64-bits. The upper 64-bits are undefined.
Simon Pilgrimd85cae32015-07-06 20:46:41 +0000443 for (int i = 0; i != Len; ++i)
444 ShuffleMask.push_back(i + Idx);
Simon Pilgrimf809c5f2017-07-04 17:42:01 +0000445 for (int i = Len; i != (int)HalfElts; ++i)
Simon Pilgrimd85cae32015-07-06 20:46:41 +0000446 ShuffleMask.push_back(SM_SentinelZero);
Simon Pilgrimf809c5f2017-07-04 17:42:01 +0000447 for (int i = HalfElts; i != (int)NumElts; ++i)
Simon Pilgrimd85cae32015-07-06 20:46:41 +0000448 ShuffleMask.push_back(SM_SentinelUndef);
449}
450
Craig Topperacaba3b2018-03-12 16:43:11 +0000451void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
Simon Pilgrimd85cae32015-07-06 20:46:41 +0000452 SmallVectorImpl<int> &ShuffleMask) {
Simon Pilgrim9f0a0bd2017-07-04 16:53:12 +0000453 unsigned HalfElts = NumElts / 2;
454
Simon Pilgrimd85cae32015-07-06 20:46:41 +0000455 // Only the bottom 6 bits are valid for each immediate.
456 Len &= 0x3F;
457 Idx &= 0x3F;
458
459 // We can only decode this bit insertion instruction as a shuffle if both the
Simon Pilgrim9f0a0bd2017-07-04 16:53:12 +0000460 // length and index work with whole elements.
461 if (0 != (Len % EltSize) || 0 != (Idx % EltSize))
Simon Pilgrimd85cae32015-07-06 20:46:41 +0000462 return;
463
464 // A length of zero is equivalent to a bit length of 64.
465 if (Len == 0)
466 Len = 64;
467
468 // If the length + index exceeds the bottom 64 bits the result is undefined.
469 if ((Len + Idx) > 64) {
Simon Pilgrim9f0a0bd2017-07-04 16:53:12 +0000470 ShuffleMask.append(NumElts, SM_SentinelUndef);
Simon Pilgrimd85cae32015-07-06 20:46:41 +0000471 return;
472 }
473
Simon Pilgrim9f0a0bd2017-07-04 16:53:12 +0000474 // Convert index and index to work with elements.
475 Len /= EltSize;
476 Idx /= EltSize;
Simon Pilgrimd85cae32015-07-06 20:46:41 +0000477
Simon Pilgrim9f0a0bd2017-07-04 16:53:12 +0000478 // INSERTQ: Extract lowest Len elements from lower half of second source and
479 // insert over first source starting at Idx element. The upper 64-bits are
Simon Pilgrimd85cae32015-07-06 20:46:41 +0000480 // undefined.
481 for (int i = 0; i != Idx; ++i)
482 ShuffleMask.push_back(i);
483 for (int i = 0; i != Len; ++i)
Simon Pilgrim9f0a0bd2017-07-04 16:53:12 +0000484 ShuffleMask.push_back(i + NumElts);
Simon Pilgrimf809c5f2017-07-04 17:42:01 +0000485 for (int i = Idx + Len; i != (int)HalfElts; ++i)
Simon Pilgrimd85cae32015-07-06 20:46:41 +0000486 ShuffleMask.push_back(i);
Simon Pilgrimf809c5f2017-07-04 17:42:01 +0000487 for (int i = HalfElts; i != (int)NumElts; ++i)
Simon Pilgrimd85cae32015-07-06 20:46:41 +0000488 ShuffleMask.push_back(SM_SentinelUndef);
489}
490
Craig Topperacaba3b2018-03-12 16:43:11 +0000491void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
Simon Pilgrimf85ee9f2018-10-23 11:33:38 +0000492 ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
Simon Pilgrim40e1a712016-03-05 22:53:31 +0000493 SmallVectorImpl<int> &ShuffleMask) {
Craig Topperacaba3b2018-03-12 16:43:11 +0000494 unsigned VecSize = NumElts * ScalarBits;
Simon Pilgrim40e1a712016-03-05 22:53:31 +0000495 unsigned NumLanes = VecSize / 128;
Craig Topperacaba3b2018-03-12 16:43:11 +0000496 unsigned NumEltsPerLane = NumElts / NumLanes;
Simon Pilgrim40e1a712016-03-05 22:53:31 +0000497 assert((VecSize == 128 || VecSize == 256 || VecSize == 512) &&
498 "Unexpected vector size");
Craig Topperacaba3b2018-03-12 16:43:11 +0000499 assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size");
Simon Pilgrim40e1a712016-03-05 22:53:31 +0000500
501 for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
Simon Pilgrimf85ee9f2018-10-23 11:33:38 +0000502 if (UndefElts[i]) {
503 ShuffleMask.push_back(SM_SentinelUndef);
504 continue;
505 }
Simon Pilgrim40e1a712016-03-05 22:53:31 +0000506 uint64_t M = RawMask[i];
Craig Topperacaba3b2018-03-12 16:43:11 +0000507 M = (ScalarBits == 64 ? ((M >> 1) & 0x1) : (M & 0x3));
Simon Pilgrim40e1a712016-03-05 22:53:31 +0000508 unsigned LaneOffset = i & ~(NumEltsPerLane - 1);
509 ShuffleMask.push_back((int)(LaneOffset + M));
510 }
511}
512
Craig Topperacaba3b2018-03-12 16:43:11 +0000513void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
Simon Pilgrimf85ee9f2018-10-23 11:33:38 +0000514 ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
Simon Pilgrim64c6de42016-06-05 15:21:30 +0000515 SmallVectorImpl<int> &ShuffleMask) {
Craig Topperacaba3b2018-03-12 16:43:11 +0000516 unsigned VecSize = NumElts * ScalarBits;
Simon Pilgrim64c6de42016-06-05 15:21:30 +0000517 unsigned NumLanes = VecSize / 128;
Simon Pilgrimd5bc5c12016-12-07 11:19:00 +0000518 unsigned NumEltsPerLane = NumElts / NumLanes;
519 assert((VecSize == 128 || VecSize == 256) && "Unexpected vector size");
Craig Topperacaba3b2018-03-12 16:43:11 +0000520 assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size");
Simon Pilgrimd5bc5c12016-12-07 11:19:00 +0000521 assert((NumElts == RawMask.size()) && "Unexpected mask size");
Simon Pilgrim64c6de42016-06-05 15:21:30 +0000522
523 for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
Simon Pilgrimf85ee9f2018-10-23 11:33:38 +0000524 if (UndefElts[i]) {
525 ShuffleMask.push_back(SM_SentinelUndef);
526 continue;
527 }
528
Simon Pilgrim64c6de42016-06-05 15:21:30 +0000529 // VPERMIL2 Operation.
530 // Bits[3] - Match Bit.
531 // Bits[2:1] - (Per Lane) PD Shuffle Mask.
532 // Bits[2:0] - (Per Lane) PS Shuffle Mask.
533 uint64_t Selector = RawMask[i];
Filipe Cabecinhas6e7d5462016-06-06 10:49:56 +0000534 unsigned MatchBit = (Selector >> 3) & 0x1;
Simon Pilgrim64c6de42016-06-05 15:21:30 +0000535
536 // M2Z[0:1] MatchBit
537 // 0Xb X Source selected by Selector index.
538 // 10b 0 Source selected by Selector index.
539 // 10b 1 Zero.
540 // 11b 0 Zero.
541 // 11b 1 Source selected by Selector index.
542 if ((M2Z & 0x2) != 0 && MatchBit != (M2Z & 0x1)) {
543 ShuffleMask.push_back(SM_SentinelZero);
544 continue;
545 }
546
Simon Pilgrimd5bc5c12016-12-07 11:19:00 +0000547 int Index = i & ~(NumEltsPerLane - 1);
Craig Topperacaba3b2018-03-12 16:43:11 +0000548 if (ScalarBits == 64)
Simon Pilgrim64c6de42016-06-05 15:21:30 +0000549 Index += (Selector >> 1) & 0x1;
550 else
551 Index += Selector & 0x3;
552
Simon Pilgrimd5bc5c12016-12-07 11:19:00 +0000553 int Src = (Selector >> 2) & 0x1;
554 Index += Src * NumElts;
555 ShuffleMask.push_back(Index);
Simon Pilgrim64c6de42016-06-05 15:21:30 +0000556 }
557}
558
Simon Pilgrimf85ee9f2018-10-23 11:33:38 +0000559void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
Elena Demikhovskye88038f2015-09-08 06:38:21 +0000560 SmallVectorImpl<int> &ShuffleMask) {
Simon Pilgrim48adedf2016-07-05 18:31:17 +0000561 uint64_t EltMaskSize = RawMask.size() - 1;
Simon Pilgrimf85ee9f2018-10-23 11:33:38 +0000562 for (int i = 0, e = RawMask.size(); i != e; ++i) {
563 if (UndefElts[i]) {
564 ShuffleMask.push_back(SM_SentinelUndef);
565 continue;
566 }
567 uint64_t M = RawMask[i];
Simon Pilgrim48adedf2016-07-05 18:31:17 +0000568 M &= EltMaskSize;
Elena Demikhovskye88038f2015-09-08 06:38:21 +0000569 ShuffleMask.push_back((int)M);
570 }
571}
572
Simon Pilgrimf85ee9f2018-10-23 11:33:38 +0000573void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
Elena Demikhovskye88038f2015-09-08 06:38:21 +0000574 SmallVectorImpl<int> &ShuffleMask) {
Simon Pilgrim253ca342016-03-06 21:54:52 +0000575 uint64_t EltMaskSize = (RawMask.size() * 2) - 1;
Simon Pilgrimf85ee9f2018-10-23 11:33:38 +0000576 for (int i = 0, e = RawMask.size(); i != e; ++i) {
577 if (UndefElts[i]) {
578 ShuffleMask.push_back(SM_SentinelUndef);
579 continue;
580 }
581 uint64_t M = RawMask[i];
Simon Pilgrim253ca342016-03-06 21:54:52 +0000582 M &= EltMaskSize;
Elena Demikhovskye88038f2015-09-08 06:38:21 +0000583 ShuffleMask.push_back((int)M);
584 }
585}
586
Alexander Kornienkof00654e2015-06-23 09:49:53 +0000587} // llvm namespace