blob: bc6696a2a325ac4e8f9b43e06e45824fc5233cfa [file] [log] [blame]
Chris Lattnerec97a902010-01-05 05:36:20 +00001//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Chris Lattnerec97a902010-01-05 05:36:20 +00006//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
Chandler Carrutha9174582015-01-22 05:25:13 +000014#include "InstCombineInternal.h"
Eugene Zelenko7f0f9bc2017-10-24 21:24:53 +000015#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
JF Bastiend52c9902015-02-25 22:30:51 +000017#include "llvm/ADT/DenseMap.h"
Eugene Zelenko7f0f9bc2017-10-24 21:24:53 +000018#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallVector.h"
David Majnemer599ca442015-07-13 01:15:53 +000020#include "llvm/Analysis/InstructionSimplify.h"
21#include "llvm/Analysis/VectorUtils.h"
Eugene Zelenko7f0f9bc2017-10-24 21:24:53 +000022#include "llvm/IR/BasicBlock.h"
23#include "llvm/IR/Constant.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DerivedTypes.h"
26#include "llvm/IR/InstrTypes.h"
27#include "llvm/IR/Instruction.h"
28#include "llvm/IR/Instructions.h"
29#include "llvm/IR/Operator.h"
Chandler Carruth820a9082014-03-04 11:08:18 +000030#include "llvm/IR/PatternMatch.h"
Eugene Zelenko7f0f9bc2017-10-24 21:24:53 +000031#include "llvm/IR/Type.h"
32#include "llvm/IR/User.h"
33#include "llvm/IR/Value.h"
34#include "llvm/Support/Casting.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
37#include <cassert>
38#include <cstdint>
39#include <iterator>
40#include <utility>
41
Chris Lattnerec97a902010-01-05 05:36:20 +000042using namespace llvm;
Nadav Rotem7df85092013-01-15 23:43:14 +000043using namespace PatternMatch;
Chris Lattnerec97a902010-01-05 05:36:20 +000044
Chandler Carruth964daaa2014-04-22 02:55:47 +000045#define DEBUG_TYPE "instcombine"
46
Sanjay Patel6eccf482015-09-09 15:24:36 +000047/// Return true if the value is cheaper to scalarize than it is to leave as a
Sanjay Patele51d5bd2018-12-18 19:07:38 +000048/// vector operation. IsConstantExtractIndex indicates whether we are extracting
49/// one known element from a vector constant.
50///
51/// FIXME: It's possible to create more instructions than previously existed.
52static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
53 // If we can pick a scalar constant value out of a vector, that is free.
54 if (auto *C = dyn_cast<Constant>(V))
55 return IsConstantExtractIndex || C->getSplatValue();
Chris Lattner8326bd82012-01-26 00:42:34 +000056
Sanjay Patele51d5bd2018-12-18 19:07:38 +000057 // An insertelement to the same constant index as our extract will simplify
58 // to the scalar inserted element. An insertelement to a different constant
59 // index is irrelevant to our extract.
60 if (match(V, m_InsertElement(m_Value(), m_Value(), m_ConstantInt())))
61 return IsConstantExtractIndex;
Bob Wilson8ecf98b2010-10-29 22:20:43 +000062
Sanjay Patele51d5bd2018-12-18 19:07:38 +000063 if (match(V, m_OneUse(m_Load(m_Value()))))
Chris Lattnerec97a902010-01-05 05:36:20 +000064 return true;
Sanjay Patele51d5bd2018-12-18 19:07:38 +000065
Simon Molld871ef42020-03-10 16:05:31 +010066 if (match(V, m_OneUse(m_UnOp())))
67 return true;
68
Sanjay Patele51d5bd2018-12-18 19:07:38 +000069 Value *V0, *V1;
70 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
71 if (cheapToScalarize(V0, IsConstantExtractIndex) ||
72 cheapToScalarize(V1, IsConstantExtractIndex))
Chris Lattnerec97a902010-01-05 05:36:20 +000073 return true;
Sanjay Patele51d5bd2018-12-18 19:07:38 +000074
75 CmpInst::Predicate UnusedPred;
76 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
77 if (cheapToScalarize(V0, IsConstantExtractIndex) ||
78 cheapToScalarize(V1, IsConstantExtractIndex))
Chris Lattnerec97a902010-01-05 05:36:20 +000079 return true;
Bob Wilson8ecf98b2010-10-29 22:20:43 +000080
Chris Lattnerec97a902010-01-05 05:36:20 +000081 return false;
82}
83
Michael Kupersteina0c6ae02016-06-06 23:38:33 +000084// If we have a PHI node with a vector type that is only used to feed
Matt Arsenault38874732013-08-28 22:17:26 +000085// itself and be an operand of extractelement at a constant location,
86// try to replace the PHI of the vector type with a PHI of a scalar type.
Anat Shemer0c95efa2013-04-18 19:35:39 +000087Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
Michael Kupersteina0c6ae02016-06-06 23:38:33 +000088 SmallVector<Instruction *, 2> Extracts;
89 // The users we want the PHI to have are:
90 // 1) The EI ExtractElement (we already know this)
91 // 2) Possibly more ExtractElements with the same index.
92 // 3) Another operand, which will feed back into the PHI.
93 Instruction *PHIUser = nullptr;
94 for (auto U : PN->users()) {
95 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
96 if (EI.getIndexOperand() == EU->getIndexOperand())
97 Extracts.push_back(EU);
98 else
99 return nullptr;
100 } else if (!PHIUser) {
101 PHIUser = cast<Instruction>(U);
102 } else {
103 return nullptr;
104 }
105 }
Anat Shemer0c95efa2013-04-18 19:35:39 +0000106
Michael Kupersteina0c6ae02016-06-06 23:38:33 +0000107 if (!PHIUser)
108 return nullptr;
Anat Shemer0c95efa2013-04-18 19:35:39 +0000109
110 // Verify that this PHI user has one use, which is the PHI itself,
111 // and that it is a binary operation which is cheap to scalarize.
Eugene Zelenko7f0f9bc2017-10-24 21:24:53 +0000112 // otherwise return nullptr.
Chandler Carruthcdf47882014-03-09 03:16:01 +0000113 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
Sanjay Patel431e1142015-11-17 17:24:08 +0000114 !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
Craig Topperf40110f2014-04-25 05:29:35 +0000115 return nullptr;
Anat Shemer0c95efa2013-04-18 19:35:39 +0000116
117 // Create a scalar PHI node that will replace the vector PHI node
118 // just before the current PHI node.
Joey Goulyb34294d2013-05-24 12:33:28 +0000119 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
120 PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
Anat Shemer0c95efa2013-04-18 19:35:39 +0000121 // Scalarize each PHI operand.
Joey Goulyb34294d2013-05-24 12:33:28 +0000122 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
Anat Shemer0c95efa2013-04-18 19:35:39 +0000123 Value *PHIInVal = PN->getIncomingValue(i);
124 BasicBlock *inBB = PN->getIncomingBlock(i);
125 Value *Elt = EI.getIndexOperand();
126 // If the operand is the PHI induction variable:
127 if (PHIInVal == PHIUser) {
128 // Scalarize the binary operation. Its first operand is the
Sanjay Patel70af1fd2014-07-07 22:13:58 +0000129 // scalar PHI, and the second operand is extracted from the other
Anat Shemer0c95efa2013-04-18 19:35:39 +0000130 // vector operand.
131 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
Joey Goulyb34294d2013-05-24 12:33:28 +0000132 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
Joey Gouly83699282013-05-24 12:29:54 +0000133 Value *Op = InsertNewInstWith(
134 ExtractElementInst::Create(B0->getOperand(opId), Elt,
135 B0->getOperand(opId)->getName() + ".Elt"),
136 *B0);
Anat Shemer0c95efa2013-04-18 19:35:39 +0000137 Value *newPHIUser = InsertNewInstWith(
Owen Anderson7ea02fc2016-03-01 19:35:52 +0000138 BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
139 scalarPHI, Op, B0), *B0);
Anat Shemer0c95efa2013-04-18 19:35:39 +0000140 scalarPHI->addIncoming(newPHIUser, inBB);
141 } else {
142 // Scalarize PHI input:
Joey Goulyb34294d2013-05-24 12:33:28 +0000143 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
Anat Shemer0c95efa2013-04-18 19:35:39 +0000144 // Insert the new instruction into the predecessor basic block.
145 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
146 BasicBlock::iterator InsertPos;
147 if (pos && !isa<PHINode>(pos)) {
Duncan P. N. Exon Smith9f8aaf22015-10-13 16:59:33 +0000148 InsertPos = ++pos->getIterator();
Anat Shemer0c95efa2013-04-18 19:35:39 +0000149 } else {
150 InsertPos = inBB->getFirstInsertionPt();
151 }
152
153 InsertNewInstWith(newEI, *InsertPos);
154
155 scalarPHI->addIncoming(newEI, inBB);
156 }
157 }
Michael Kupersteina0c6ae02016-06-06 23:38:33 +0000158
159 for (auto E : Extracts)
160 replaceInstUsesWith(*E, scalarPHI);
161
162 return &EI;
Anat Shemer0c95efa2013-04-18 19:35:39 +0000163}
164
Sanjay Patel4674c772018-09-24 20:41:22 +0000165static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
Sanjay Patel31b07192018-10-01 14:40:00 +0000166 InstCombiner::BuilderTy &Builder,
167 bool IsBigEndian) {
Sanjay Patel4674c772018-09-24 20:41:22 +0000168 Value *X;
169 uint64_t ExtIndexC;
170 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
171 !X->getType()->isVectorTy() ||
172 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
173 return nullptr;
174
175 // If this extractelement is using a bitcast from a vector of the same number
176 // of elements, see if we can find the source element from the source vector:
177 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
178 Type *SrcTy = X->getType();
179 Type *DestTy = Ext.getType();
180 unsigned NumSrcElts = SrcTy->getVectorNumElements();
181 unsigned NumElts = Ext.getVectorOperandType()->getNumElements();
182 if (NumSrcElts == NumElts)
183 if (Value *Elt = findScalarElement(X, ExtIndexC))
184 return new BitCastInst(Elt, DestTy);
185
Sanjay Patel31b07192018-10-01 14:40:00 +0000186 // If the source elements are wider than the destination, try to shift and
187 // truncate a subset of scalar bits of an insert op.
Sanjay Patel3746e112018-10-04 16:25:05 +0000188 if (NumSrcElts < NumElts) {
Sanjay Patel31b07192018-10-01 14:40:00 +0000189 Value *Scalar;
190 uint64_t InsIndexC;
191 if (!match(X, m_InsertElement(m_Value(), m_Value(Scalar),
192 m_ConstantInt(InsIndexC))))
193 return nullptr;
194
195 // The extract must be from the subset of vector elements that we inserted
196 // into. Example: if we inserted element 1 of a <2 x i64> and we are
197 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
198 // of elements 4-7 of the bitcasted vector.
199 unsigned NarrowingRatio = NumElts / NumSrcElts;
200 if (ExtIndexC / NarrowingRatio != InsIndexC)
201 return nullptr;
202
203 // We are extracting part of the original scalar. How that scalar is
204 // inserted into the vector depends on the endian-ness. Example:
205 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
206 // +--+--+--+--+--+--+--+--+
207 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
208 // extelt <4 x i16> V', 3: | |S2|S3|
209 // +--+--+--+--+--+--+--+--+
210 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
211 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
212 // In this example, we must right-shift little-endian. Big-endian is just a
213 // truncate.
214 unsigned Chunk = ExtIndexC % NarrowingRatio;
215 if (IsBigEndian)
216 Chunk = NarrowingRatio - 1 - Chunk;
Sanjay Patel3746e112018-10-04 16:25:05 +0000217
218 // Bail out if this is an FP vector to FP vector sequence. That would take
219 // more instructions than we started with unless there is no shift, and it
220 // may not be handled as well in the backend.
221 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
222 bool NeedDestBitcast = DestTy->isFloatingPointTy();
223 if (NeedSrcBitcast && NeedDestBitcast)
224 return nullptr;
225
226 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
227 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
228 unsigned ShAmt = Chunk * DestWidth;
229
230 // TODO: This limitation is more strict than necessary. We could sum the
231 // number of new instructions and subtract the number eliminated to know if
232 // we can proceed.
233 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
234 if (NeedSrcBitcast || NeedDestBitcast)
235 return nullptr;
236
237 if (NeedSrcBitcast) {
238 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
239 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
240 }
241
Sanjay Patel31b07192018-10-01 14:40:00 +0000242 if (ShAmt) {
243 // Bail out if we could end with more instructions than we started with.
244 if (!Ext.getVectorOperand()->hasOneUse())
245 return nullptr;
246 Scalar = Builder.CreateLShr(Scalar, ShAmt);
247 }
Sanjay Patel3746e112018-10-04 16:25:05 +0000248
249 if (NeedDestBitcast) {
250 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
251 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
252 }
Sanjay Patel31b07192018-10-01 14:40:00 +0000253 return new TruncInst(Scalar, DestTy);
254 }
255
Sanjay Patel4674c772018-09-24 20:41:22 +0000256 return nullptr;
257}
258
Piotr Sobczaka861c9a2019-10-21 08:12:47 +0000259/// Find elements of V demanded by UserInstr.
260static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
261 unsigned VWidth = V->getType()->getVectorNumElements();
262
263 // Conservatively assume that all elements are needed.
264 APInt UsedElts(APInt::getAllOnesValue(VWidth));
265
266 switch (UserInstr->getOpcode()) {
267 case Instruction::ExtractElement: {
268 ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
269 assert(EEI->getVectorOperand() == V);
270 ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
271 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
272 UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
273 }
274 break;
275 }
276 case Instruction::ShuffleVector: {
277 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
278 unsigned MaskNumElts = UserInstr->getType()->getVectorNumElements();
279
280 UsedElts = APInt(VWidth, 0);
281 for (unsigned i = 0; i < MaskNumElts; i++) {
282 unsigned MaskVal = Shuffle->getMaskValue(i);
283 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
284 continue;
285 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
286 UsedElts.setBit(MaskVal);
287 if (Shuffle->getOperand(1) == V &&
288 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
289 UsedElts.setBit(MaskVal - VWidth);
290 }
291 break;
292 }
293 default:
294 break;
295 }
296 return UsedElts;
297}
298
299/// Find union of elements of V demanded by all its users.
300/// If it is known by querying findDemandedEltsBySingleUser that
301/// no user demands an element of V, then the corresponding bit
302/// remains unset in the returned value.
303static APInt findDemandedEltsByAllUsers(Value *V) {
304 unsigned VWidth = V->getType()->getVectorNumElements();
305
306 APInt UnionUsedElts(VWidth, 0);
307 for (const Use &U : V->uses()) {
308 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
309 UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
310 } else {
311 UnionUsedElts = APInt::getAllOnesValue(VWidth);
312 break;
313 }
314
315 if (UnionUsedElts.isAllOnesValue())
316 break;
317 }
318
319 return UnionUsedElts;
320}
321
Chris Lattnerec97a902010-01-05 05:36:20 +0000322Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000323 Value *SrcVec = EI.getVectorOperand();
324 Value *Index = EI.getIndexOperand();
325 if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
Craig Toppera4205622017-06-09 03:21:29 +0000326 SQ.getWithInstruction(&EI)))
Sanjay Patel4b198802016-02-01 22:23:39 +0000327 return replaceInstUsesWith(EI, V);
David Majnemer599ca442015-07-13 01:15:53 +0000328
Chris Lattnerec97a902010-01-05 05:36:20 +0000329 // If extracting a specified index from the vector, see if we can recursively
330 // find a previously computed scalar that was inserted into the vector.
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000331 auto *IndexC = dyn_cast<ConstantInt>(Index);
332 if (IndexC) {
Sanjay Patel7a526262018-09-24 16:39:03 +0000333 unsigned NumElts = EI.getVectorOperandType()->getNumElements();
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000334
Simon Pilgrime7d032f2017-12-27 12:00:18 +0000335 // InstSimplify should handle cases where the index is invalid.
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000336 if (!IndexC->getValue().ule(NumElts))
Simon Pilgrime7d032f2017-12-27 12:00:18 +0000337 return nullptr;
338
Chris Lattnerec97a902010-01-05 05:36:20 +0000339 // This instruction only demands the single element from the input vector.
Piotr Sobczaka861c9a2019-10-21 08:12:47 +0000340 if (NumElts != 1) {
341 // If the input vector has a single use, simplify it based on this use
342 // property.
343 if (SrcVec->hasOneUse()) {
344 APInt UndefElts(NumElts, 0);
345 APInt DemandedElts(NumElts, 0);
346 DemandedElts.setBit(IndexC->getZExtValue());
347 if (Value *V =
348 SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts)) {
349 EI.setOperand(0, V);
350 return &EI;
351 }
352 } else {
353 // If the input vector has multiple uses, simplify it based on a union
354 // of all elements used.
355 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
356 if (!DemandedElts.isAllOnesValue()) {
357 APInt UndefElts(NumElts, 0);
358 if (Value *V = SimplifyDemandedVectorElts(
359 SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
360 true /* AllowMultipleUsers */)) {
361 if (V != SrcVec) {
362 SrcVec->replaceAllUsesWith(V);
363 return &EI;
364 }
365 }
366 }
Chris Lattnerec97a902010-01-05 05:36:20 +0000367 }
368 }
Sanjay Patel31b07192018-10-01 14:40:00 +0000369 if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
Sanjay Patel4674c772018-09-24 20:41:22 +0000370 return I;
Anat Shemer0c95efa2013-04-18 19:35:39 +0000371
372 // If there's a vector PHI feeding a scalar use through this extractelement
373 // instruction, try to scalarize the PHI.
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000374 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
375 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
376 return ScalarPHI;
Chris Lattnerec97a902010-01-05 05:36:20 +0000377 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000378
Simon Molld871ef42020-03-10 16:05:31 +0100379 // TODO come up with a n-ary matcher that subsumes both unary and
380 // binary matchers.
381 UnaryOperator *UO;
382 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, IndexC)) {
383 // extelt (unop X), Index --> unop (extelt X, Index)
384 Value *X = UO->getOperand(0);
385 Value *E = Builder.CreateExtractElement(X, Index);
386 return UnaryOperator::CreateWithCopiedFlags(UO->getOpcode(), E, UO);
387 }
388
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000389 BinaryOperator *BO;
390 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) {
391 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
392 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
393 Value *E0 = Builder.CreateExtractElement(X, Index);
394 Value *E1 = Builder.CreateExtractElement(Y, Index);
395 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
396 }
397
Matt Arsenault9ccde612018-12-10 21:50:54 +0000398 Value *X, *Y;
399 CmpInst::Predicate Pred;
400 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
401 cheapToScalarize(SrcVec, IndexC)) {
402 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
403 Value *E0 = Builder.CreateExtractElement(X, Index);
404 Value *E1 = Builder.CreateExtractElement(Y, Index);
405 return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
406 }
407
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000408 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
409 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
Chris Lattnerec97a902010-01-05 05:36:20 +0000410 // Extracting the inserted element?
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000411 if (IE->getOperand(2) == Index)
Sanjay Patel4b198802016-02-01 22:23:39 +0000412 return replaceInstUsesWith(EI, IE->getOperand(1));
Chris Lattnerec97a902010-01-05 05:36:20 +0000413 // If the inserted and extracted elements are constants, they must not
414 // be the same value, extract from the pre-inserted value instead.
Nikita Popov878cb382020-01-31 22:23:33 +0100415 if (isa<Constant>(IE->getOperand(2)) && IndexC)
416 return replaceOperand(EI, 0, IE->getOperand(0));
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000417 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
Chris Lattnerec97a902010-01-05 05:36:20 +0000418 // If this is extracting an element from a shufflevector, figure out where
419 // it came from and extract from the appropriate input element instead.
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000420 if (auto *Elt = dyn_cast<ConstantInt>(Index)) {
Eli Friedman303c81c2011-10-21 19:11:34 +0000421 int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
Chris Lattnerec97a902010-01-05 05:36:20 +0000422 Value *Src;
423 unsigned LHSWidth =
Chris Lattner8326bd82012-01-26 00:42:34 +0000424 SVI->getOperand(0)->getType()->getVectorNumElements();
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000425
Bob Wilson11ee4562010-10-29 22:03:05 +0000426 if (SrcIdx < 0)
Sanjay Patel4b198802016-02-01 22:23:39 +0000427 return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
Bob Wilson11ee4562010-10-29 22:03:05 +0000428 if (SrcIdx < (int)LHSWidth)
Chris Lattnerec97a902010-01-05 05:36:20 +0000429 Src = SVI->getOperand(0);
Bob Wilson11ee4562010-10-29 22:03:05 +0000430 else {
Chris Lattnerec97a902010-01-05 05:36:20 +0000431 SrcIdx -= LHSWidth;
432 Src = SVI->getOperand(1);
Chris Lattnerec97a902010-01-05 05:36:20 +0000433 }
Chris Lattner229907c2011-07-18 04:54:35 +0000434 Type *Int32Ty = Type::getInt32Ty(EI.getContext());
Chris Lattnerec97a902010-01-05 05:36:20 +0000435 return ExtractElementInst::Create(Src,
Bob Wilson9d07f392010-10-29 22:03:07 +0000436 ConstantInt::get(Int32Ty,
Chris Lattnerec97a902010-01-05 05:36:20 +0000437 SrcIdx, false));
438 }
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000439 } else if (auto *CI = dyn_cast<CastInst>(I)) {
Sanjay Patelb67076c2015-11-29 22:09:34 +0000440 // Canonicalize extractelement(cast) -> cast(extractelement).
441 // Bitcasts can change the number of vector elements, and they cost
442 // nothing.
Anat Shemer55703182013-04-18 19:56:44 +0000443 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000444 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
Nadav Rotemd74b72b2011-03-31 22:57:29 +0000445 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
446 }
Chris Lattnerec97a902010-01-05 05:36:20 +0000447 }
Chris Lattnerec97a902010-01-05 05:36:20 +0000448 }
Craig Topperf40110f2014-04-25 05:29:35 +0000449 return nullptr;
Chris Lattnerec97a902010-01-05 05:36:20 +0000450}
451
Sanjay Patel6eccf482015-09-09 15:24:36 +0000452/// If V is a shuffle of values that ONLY returns elements from either LHS or
453/// RHS, return the shuffle mask and true. Otherwise, return false.
Sanjay Patel431e1142015-11-17 17:24:08 +0000454static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
Chris Lattner0256be92012-01-27 03:08:05 +0000455 SmallVectorImpl<Constant*> &Mask) {
Tim Northoverfad27612014-03-07 10:24:44 +0000456 assert(LHS->getType() == RHS->getType() &&
Chris Lattnerec97a902010-01-05 05:36:20 +0000457 "Invalid CollectSingleShuffleElements");
Matt Arsenault8227b9f2013-09-06 00:37:24 +0000458 unsigned NumElts = V->getType()->getVectorNumElements();
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000459
Chris Lattnerec97a902010-01-05 05:36:20 +0000460 if (isa<UndefValue>(V)) {
461 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
462 return true;
463 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000464
Chris Lattnerec97a902010-01-05 05:36:20 +0000465 if (V == LHS) {
466 for (unsigned i = 0; i != NumElts; ++i)
467 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
468 return true;
469 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000470
Chris Lattnerec97a902010-01-05 05:36:20 +0000471 if (V == RHS) {
472 for (unsigned i = 0; i != NumElts; ++i)
473 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
474 i+NumElts));
475 return true;
476 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000477
Chris Lattnerec97a902010-01-05 05:36:20 +0000478 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
479 // If this is an insert of an extract from some other vector, include it.
480 Value *VecOp = IEI->getOperand(0);
481 Value *ScalarOp = IEI->getOperand(1);
482 Value *IdxOp = IEI->getOperand(2);
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000483
Chris Lattnerec97a902010-01-05 05:36:20 +0000484 if (!isa<ConstantInt>(IdxOp))
485 return false;
486 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000487
Chris Lattnerec97a902010-01-05 05:36:20 +0000488 if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
Sanjay Patel70af1fd2014-07-07 22:13:58 +0000489 // We can handle this if the vector we are inserting into is
Chris Lattnerec97a902010-01-05 05:36:20 +0000490 // transitively ok.
Sanjay Patel431e1142015-11-17 17:24:08 +0000491 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
Chris Lattnerec97a902010-01-05 05:36:20 +0000492 // If so, update the mask to reflect the inserted undef.
493 Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
494 return true;
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000495 }
Chris Lattnerec97a902010-01-05 05:36:20 +0000496 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
Tim Northoverfad27612014-03-07 10:24:44 +0000497 if (isa<ConstantInt>(EI->getOperand(1))) {
Chris Lattnerec97a902010-01-05 05:36:20 +0000498 unsigned ExtractedIdx =
499 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
Tim Northoverfad27612014-03-07 10:24:44 +0000500 unsigned NumLHSElts = LHS->getType()->getVectorNumElements();
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000501
Chris Lattnerec97a902010-01-05 05:36:20 +0000502 // This must be extracting from either LHS or RHS.
503 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
Sanjay Patel70af1fd2014-07-07 22:13:58 +0000504 // We can handle this if the vector we are inserting into is
Chris Lattnerec97a902010-01-05 05:36:20 +0000505 // transitively ok.
Sanjay Patel431e1142015-11-17 17:24:08 +0000506 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
Chris Lattnerec97a902010-01-05 05:36:20 +0000507 // If so, update the mask to reflect the inserted value.
508 if (EI->getOperand(0) == LHS) {
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000509 Mask[InsertedIdx % NumElts] =
Chris Lattnerec97a902010-01-05 05:36:20 +0000510 ConstantInt::get(Type::getInt32Ty(V->getContext()),
511 ExtractedIdx);
512 } else {
513 assert(EI->getOperand(0) == RHS);
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000514 Mask[InsertedIdx % NumElts] =
Chris Lattnerec97a902010-01-05 05:36:20 +0000515 ConstantInt::get(Type::getInt32Ty(V->getContext()),
Tim Northoverfad27612014-03-07 10:24:44 +0000516 ExtractedIdx + NumLHSElts);
Chris Lattnerec97a902010-01-05 05:36:20 +0000517 }
518 return true;
519 }
520 }
521 }
522 }
523 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000524
Chris Lattnerec97a902010-01-05 05:36:20 +0000525 return false;
526}
527
Sanjay Patelae945e72015-12-24 21:17:56 +0000528/// If we have insertion into a vector that is wider than the vector that we
529/// are extracting from, try to widen the source vector to allow a single
530/// shufflevector to replace one or more insert/extract pairs.
531static void replaceExtractElements(InsertElementInst *InsElt,
532 ExtractElementInst *ExtElt,
533 InstCombiner &IC) {
534 VectorType *InsVecType = InsElt->getType();
535 VectorType *ExtVecType = ExtElt->getVectorOperandType();
536 unsigned NumInsElts = InsVecType->getVectorNumElements();
537 unsigned NumExtElts = ExtVecType->getVectorNumElements();
538
539 // The inserted-to vector must be wider than the extracted-from vector.
540 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
541 NumExtElts >= NumInsElts)
542 return;
543
544 // Create a shuffle mask to widen the extended-from vector using undefined
545 // values. The mask selects all of the values of the original vector followed
546 // by as many undefined values as needed to create a vector of the same length
547 // as the inserted-to vector.
548 SmallVector<Constant *, 16> ExtendMask;
549 IntegerType *IntType = Type::getInt32Ty(InsElt->getContext());
550 for (unsigned i = 0; i < NumExtElts; ++i)
551 ExtendMask.push_back(ConstantInt::get(IntType, i));
552 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
553 ExtendMask.push_back(UndefValue::get(IntType));
554
555 Value *ExtVecOp = ExtElt->getVectorOperand();
Sanjay Patel66fff732016-01-29 20:21:02 +0000556 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
557 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
558 ? ExtVecOpInst->getParent()
559 : ExtElt->getParent();
560
561 // TODO: This restriction matches the basic block check below when creating
562 // new extractelement instructions. If that limitation is removed, this one
563 // could also be removed. But for now, we just bail out to ensure that we
564 // will replace the extractelement instruction that is feeding our
565 // insertelement instruction. This allows the insertelement to then be
566 // replaced by a shufflevector. If the insertelement is not replaced, we can
567 // induce infinite looping because there's an optimization for extractelement
568 // that will delete our widening shuffle. This would trigger another attempt
569 // here to create that shuffle, and we spin forever.
570 if (InsertionBlock != InsElt->getParent())
571 return;
572
Sanjay Patel4e1b5a52016-11-10 00:15:14 +0000573 // TODO: This restriction matches the check in visitInsertElementInst() and
574 // prevents an infinite loop caused by not turning the extract/insert pair
575 // into a shuffle. We really should not need either check, but we're lacking
576 // folds for shufflevectors because we're afraid to generate shuffle masks
577 // that the backend can't handle.
578 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
579 return;
580
Sanjay Patelae945e72015-12-24 21:17:56 +0000581 auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
582 ConstantVector::get(ExtendMask));
583
Sanjay Patela1c53472016-01-05 19:09:47 +0000584 // Insert the new shuffle after the vector operand of the extract is defined
Sanjay Pateld72a4582016-01-08 01:39:16 +0000585 // (as long as it's not a PHI) or at the start of the basic block of the
586 // extract, so any subsequent extracts in the same basic block can use it.
587 // TODO: Insert before the earliest ExtractElementInst that is replaced.
Sanjay Pateld72a4582016-01-08 01:39:16 +0000588 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
Sanjay Patela1c53472016-01-05 19:09:47 +0000589 WideVec->insertAfter(ExtVecOpInst);
Sanjay Pateld72a4582016-01-08 01:39:16 +0000590 else
Sanjay Patela1c53472016-01-05 19:09:47 +0000591 IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
Sanjay Patela1c53472016-01-05 19:09:47 +0000592
593 // Replace extracts from the original narrow vector with extracts from the new
594 // wide vector.
Sanjay Patelae945e72015-12-24 21:17:56 +0000595 for (User *U : ExtVecOp->users()) {
Sanjay Patela1c53472016-01-05 19:09:47 +0000596 ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
Sanjay Pateld72a4582016-01-08 01:39:16 +0000597 if (!OldExt || OldExt->getParent() != WideVec->getParent())
Sanjay Patela1c53472016-01-05 19:09:47 +0000598 continue;
599 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
Sven van Haastregt78819e02017-06-05 09:18:10 +0000600 NewExt->insertAfter(OldExt);
Sanjay Patel4b198802016-02-01 22:23:39 +0000601 IC.replaceInstUsesWith(*OldExt, NewExt);
Sanjay Patelae945e72015-12-24 21:17:56 +0000602 }
603}
Tim Northoverfad27612014-03-07 10:24:44 +0000604
605/// We are building a shuffle to create V, which is a sequence of insertelement,
606/// extractelement pairs. If PermittedRHS is set, then we must either use it or
Sanjay Patel70af1fd2014-07-07 22:13:58 +0000607/// not rely on the second vector source. Return a std::pair containing the
Tim Northoverfad27612014-03-07 10:24:44 +0000608/// left and right vectors of the proposed shuffle (or 0), and set the Mask
609/// parameter as required.
610///
611/// Note: we intentionally don't try to fold earlier shuffles since they have
612/// often been chosen carefully to be efficiently implementable on the target.
Eugene Zelenko7f0f9bc2017-10-24 21:24:53 +0000613using ShuffleOps = std::pair<Value *, Value *>;
Tim Northoverfad27612014-03-07 10:24:44 +0000614
Sanjay Patel431e1142015-11-17 17:24:08 +0000615static ShuffleOps collectShuffleElements(Value *V,
Tim Northoverfad27612014-03-07 10:24:44 +0000616 SmallVectorImpl<Constant *> &Mask,
Sanjay Patelae945e72015-12-24 21:17:56 +0000617 Value *PermittedRHS,
618 InstCombiner &IC) {
Tim Northoverfad27612014-03-07 10:24:44 +0000619 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
Craig Topper17b55682016-12-29 07:03:18 +0000620 unsigned NumElts = V->getType()->getVectorNumElements();
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000621
Chris Lattnerec97a902010-01-05 05:36:20 +0000622 if (isa<UndefValue>(V)) {
623 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
Tim Northoverfad27612014-03-07 10:24:44 +0000624 return std::make_pair(
625 PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
Chris Lattnera0d01ff2012-01-24 14:31:22 +0000626 }
Craig Topper2ea22b02013-01-18 05:09:16 +0000627
Chris Lattnera0d01ff2012-01-24 14:31:22 +0000628 if (isa<ConstantAggregateZero>(V)) {
Chris Lattnerec97a902010-01-05 05:36:20 +0000629 Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
Tim Northoverfad27612014-03-07 10:24:44 +0000630 return std::make_pair(V, nullptr);
Chris Lattnera0d01ff2012-01-24 14:31:22 +0000631 }
Craig Topper2ea22b02013-01-18 05:09:16 +0000632
Chris Lattnera0d01ff2012-01-24 14:31:22 +0000633 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
Chris Lattnerec97a902010-01-05 05:36:20 +0000634 // If this is an insert of an extract from some other vector, include it.
635 Value *VecOp = IEI->getOperand(0);
636 Value *ScalarOp = IEI->getOperand(1);
637 Value *IdxOp = IEI->getOperand(2);
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000638
Chris Lattnerec97a902010-01-05 05:36:20 +0000639 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
Tim Northoverfad27612014-03-07 10:24:44 +0000640 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
Chris Lattnerec97a902010-01-05 05:36:20 +0000641 unsigned ExtractedIdx =
Bob Wilson67a6f322010-10-29 22:20:45 +0000642 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
Chris Lattnerec97a902010-01-05 05:36:20 +0000643 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000644
Chris Lattnerec97a902010-01-05 05:36:20 +0000645 // Either the extracted from or inserted into vector must be RHSVec,
646 // otherwise we'd end up with a shuffle of three inputs.
Craig Topperf40110f2014-04-25 05:29:35 +0000647 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
Tim Northoverfad27612014-03-07 10:24:44 +0000648 Value *RHS = EI->getOperand(0);
Sanjay Patelae945e72015-12-24 21:17:56 +0000649 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
Craig Toppere73658d2014-04-28 04:05:08 +0000650 assert(LR.second == nullptr || LR.second == RHS);
Tim Northoverfad27612014-03-07 10:24:44 +0000651
652 if (LR.first->getType() != RHS->getType()) {
Sanjay Patelae945e72015-12-24 21:17:56 +0000653 // Although we are giving up for now, see if we can create extracts
654 // that match the inserts for another round of combining.
655 replaceExtractElements(IEI, EI, IC);
656
Tim Northoverfad27612014-03-07 10:24:44 +0000657 // We tried our best, but we can't find anything compatible with RHS
658 // further up the chain. Return a trivial shuffle.
659 for (unsigned i = 0; i < NumElts; ++i)
660 Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i);
661 return std::make_pair(V, nullptr);
662 }
663
664 unsigned NumLHSElts = RHS->getType()->getVectorNumElements();
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000665 Mask[InsertedIdx % NumElts] =
Bob Wilson67a6f322010-10-29 22:20:45 +0000666 ConstantInt::get(Type::getInt32Ty(V->getContext()),
Tim Northoverfad27612014-03-07 10:24:44 +0000667 NumLHSElts+ExtractedIdx);
668 return std::make_pair(LR.first, RHS);
Chris Lattnerec97a902010-01-05 05:36:20 +0000669 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000670
Tim Northoverfad27612014-03-07 10:24:44 +0000671 if (VecOp == PermittedRHS) {
672 // We've gone as far as we can: anything on the other side of the
673 // extractelement will already have been converted into a shuffle.
674 unsigned NumLHSElts =
675 EI->getOperand(0)->getType()->getVectorNumElements();
676 for (unsigned i = 0; i != NumElts; ++i)
677 Mask.push_back(ConstantInt::get(
678 Type::getInt32Ty(V->getContext()),
679 i == InsertedIdx ? ExtractedIdx : NumLHSElts + i));
680 return std::make_pair(EI->getOperand(0), PermittedRHS);
Chris Lattnerec97a902010-01-05 05:36:20 +0000681 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000682
Chris Lattnerec97a902010-01-05 05:36:20 +0000683 // If this insertelement is a chain that comes from exactly these two
684 // vectors, return the vector and the effective shuffle.
Tim Northoverfad27612014-03-07 10:24:44 +0000685 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
Sanjay Patel431e1142015-11-17 17:24:08 +0000686 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
Tim Northoverfad27612014-03-07 10:24:44 +0000687 Mask))
688 return std::make_pair(EI->getOperand(0), PermittedRHS);
Chris Lattnerec97a902010-01-05 05:36:20 +0000689 }
690 }
691 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000692
Sanjay Patelb67076c2015-11-29 22:09:34 +0000693 // Otherwise, we can't do anything fancy. Return an identity vector.
Chris Lattnerec97a902010-01-05 05:36:20 +0000694 for (unsigned i = 0; i != NumElts; ++i)
695 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
Tim Northoverfad27612014-03-07 10:24:44 +0000696 return std::make_pair(V, nullptr);
Chris Lattnerec97a902010-01-05 05:36:20 +0000697}
698
Michael Zolotukhin7d6293a2014-05-07 14:30:18 +0000699/// Try to find redundant insertvalue instructions, like the following ones:
700/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
701/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
702/// Here the second instruction inserts values at the same indices, as the
703/// first one, making the first one redundant.
704/// It should be transformed to:
705/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
706Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) {
707 bool IsRedundant = false;
708 ArrayRef<unsigned int> FirstIndices = I.getIndices();
709
710 // If there is a chain of insertvalue instructions (each of them except the
711 // last one has only one use and it's another insertvalue insn from this
712 // chain), check if any of the 'children' uses the same indices as the first
713 // instruction. In this case, the first one is redundant.
714 Value *V = &I;
Michael Zolotukhin292d3ca2014-05-08 19:50:24 +0000715 unsigned Depth = 0;
Michael Zolotukhin7d6293a2014-05-07 14:30:18 +0000716 while (V->hasOneUse() && Depth < 10) {
717 User *U = V->user_back();
Michael Zolotukhin292d3ca2014-05-08 19:50:24 +0000718 auto UserInsInst = dyn_cast<InsertValueInst>(U);
719 if (!UserInsInst || U->getOperand(0) != V)
Michael Zolotukhin7d6293a2014-05-07 14:30:18 +0000720 break;
Michael Zolotukhin7d6293a2014-05-07 14:30:18 +0000721 if (UserInsInst->getIndices() == FirstIndices) {
722 IsRedundant = true;
723 break;
724 }
725 V = UserInsInst;
726 Depth++;
727 }
728
729 if (IsRedundant)
Sanjay Patel4b198802016-02-01 22:23:39 +0000730 return replaceInstUsesWith(I, I.getOperand(0));
Michael Zolotukhin7d6293a2014-05-07 14:30:18 +0000731 return nullptr;
732}
733
Sanjay Patel521f19f2016-09-02 17:05:43 +0000734static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
735 int MaskSize = Shuf.getMask()->getType()->getVectorNumElements();
736 int VecSize = Shuf.getOperand(0)->getType()->getVectorNumElements();
737
738 // A vector select does not change the size of the operands.
739 if (MaskSize != VecSize)
740 return false;
741
742 // Each mask element must be undefined or choose a vector element from one of
743 // the source operands without crossing vector lanes.
744 for (int i = 0; i != MaskSize; ++i) {
745 int Elt = Shuf.getMaskValue(i);
746 if (Elt != -1 && Elt != i && Elt != i + VecSize)
747 return false;
748 }
749
750 return true;
751}
752
Sanjay Patel71ad2272019-06-26 15:52:59 +0000753/// Turn a chain of inserts that splats a value into an insert + shuffle:
754/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
755/// shufflevector(insertelt(X, %k, 0), undef, zero)
756static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
757 // We are interested in the last insert in a chain. So if this insert has a
758 // single user and that user is an insert, bail.
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000759 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
760 return nullptr;
761
Sanjay Patel71ad2272019-06-26 15:52:59 +0000762 auto *VecTy = cast<VectorType>(InsElt.getType());
763 unsigned NumElements = VecTy->getNumElements();
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000764
765 // Do not try to do this for a one-element vector, since that's a nop,
766 // and will cause an inf-loop.
767 if (NumElements == 1)
768 return nullptr;
769
770 Value *SplatVal = InsElt.getOperand(1);
Fangrui Songf78650a2018-07-30 19:41:25 +0000771 InsertElementInst *CurrIE = &InsElt;
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000772 SmallVector<bool, 16> ElementPresent(NumElements, false);
Florian Hahnb992fee2017-08-30 10:54:21 +0000773 InsertElementInst *FirstIE = nullptr;
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000774
775 // Walk the chain backwards, keeping track of which indices we inserted into,
776 // until we hit something that isn't an insert of the splatted value.
777 while (CurrIE) {
Sanjay Patel863d4942017-11-27 18:19:32 +0000778 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000779 if (!Idx || CurrIE->getOperand(1) != SplatVal)
780 return nullptr;
781
Sanjay Patel863d4942017-11-27 18:19:32 +0000782 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
Florian Hahnb992fee2017-08-30 10:54:21 +0000783 // Check none of the intermediate steps have any additional uses, except
784 // for the root insertelement instruction, which can be re-used, if it
785 // inserts at position 0.
786 if (CurrIE != &InsElt &&
787 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000788 return nullptr;
789
790 ElementPresent[Idx->getZExtValue()] = true;
Florian Hahnb992fee2017-08-30 10:54:21 +0000791 FirstIE = CurrIE;
792 CurrIE = NextIE;
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000793 }
794
Sanjay Patel75b5edf2019-07-04 16:45:34 +0000795 // If this is just a single insertelement (not a sequence), we are done.
796 if (FirstIE == &InsElt)
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000797 return nullptr;
798
Sanjay Patel75b5edf2019-07-04 16:45:34 +0000799 // If we are not inserting into an undef vector, make sure we've seen an
800 // insert into every element.
801 // TODO: If the base vector is not undef, it might be better to create a splat
802 // and then a select-shuffle (blend) with the base vector.
803 if (!isa<UndefValue>(FirstIE->getOperand(0)))
804 if (any_of(ElementPresent, [](bool Present) { return !Present; }))
805 return nullptr;
806
Sanjay Patel71ad2272019-06-26 15:52:59 +0000807 // Create the insert + shuffle.
808 Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
809 UndefValue *UndefVec = UndefValue::get(VecTy);
810 Constant *Zero = ConstantInt::get(Int32Ty, 0);
811 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
812 FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000813
Sanjay Patel75b5edf2019-07-04 16:45:34 +0000814 // Splat from element 0, but replace absent elements with undef in the mask.
815 SmallVector<Constant *, 16> Mask(NumElements, Zero);
816 for (unsigned i = 0; i != NumElements; ++i)
817 if (!ElementPresent[i])
818 Mask[i] = UndefValue::get(Int32Ty);
819
820 return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask));
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000821}
822
Sanjay Patel3dee1132019-07-08 19:48:52 +0000823/// Try to fold an insert element into an existing splat shuffle by changing
824/// the shuffle's mask to include the index of this insert element.
825static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
826 // Check if the vector operand of this insert is a canonical splat shuffle.
827 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
828 if (!Shuf || !Shuf->isZeroEltSplat())
829 return nullptr;
830
831 // Check for a constant insertion index.
832 uint64_t IdxC;
833 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
834 return nullptr;
835
836 // Check if the splat shuffle's input is the same as this insert's scalar op.
837 Value *X = InsElt.getOperand(1);
838 Value *Op0 = Shuf->getOperand(0);
839 if (!match(Op0, m_InsertElement(m_Undef(), m_Specific(X), m_ZeroInt())))
840 return nullptr;
841
842 // Replace the shuffle mask element at the index of this insert with a zero.
843 // For example:
844 // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
845 // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
846 unsigned NumMaskElts = Shuf->getType()->getVectorNumElements();
847 SmallVector<Constant *, 16> NewMaskVec(NumMaskElts);
848 Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext());
849 Constant *Zero = ConstantInt::getNullValue(I32Ty);
850 for (unsigned i = 0; i != NumMaskElts; ++i)
851 NewMaskVec[i] = i == IdxC ? Zero : Shuf->getMask()->getAggregateElement(i);
852
853 Constant *NewMask = ConstantVector::get(NewMaskVec);
854 return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
855}
856
Sanjay Patelaff5bee2019-09-08 19:03:01 +0000857/// Try to fold an extract+insert element into an existing identity shuffle by
858/// changing the shuffle's mask to include the index of this insert element.
859static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
860 // Check if the vector operand of this insert is an identity shuffle.
861 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
862 if (!Shuf || !isa<UndefValue>(Shuf->getOperand(1)) ||
863 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
864 return nullptr;
865
866 // Check for a constant insertion index.
867 uint64_t IdxC;
868 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
869 return nullptr;
870
871 // Check if this insert's scalar op is extracted from the identity shuffle's
872 // input vector.
873 Value *Scalar = InsElt.getOperand(1);
874 Value *X = Shuf->getOperand(0);
875 if (!match(Scalar, m_ExtractElement(m_Specific(X), m_SpecificInt(IdxC))))
876 return nullptr;
877
878 // Replace the shuffle mask element at the index of this extract+insert with
879 // that same index value.
880 // For example:
881 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
882 unsigned NumMaskElts = Shuf->getType()->getVectorNumElements();
883 SmallVector<Constant *, 16> NewMaskVec(NumMaskElts);
884 Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext());
885 Constant *NewMaskEltC = ConstantInt::get(I32Ty, IdxC);
886 Constant *OldMask = Shuf->getMask();
887 for (unsigned i = 0; i != NumMaskElts; ++i) {
888 if (i != IdxC) {
889 // All mask elements besides the inserted element remain the same.
890 NewMaskVec[i] = OldMask->getAggregateElement(i);
891 } else if (OldMask->getAggregateElement(i) == NewMaskEltC) {
892 // If the mask element was already set, there's nothing to do
893 // (demanded elements analysis may unset it later).
894 return nullptr;
895 } else {
896 assert(isa<UndefValue>(OldMask->getAggregateElement(i)) &&
897 "Unexpected shuffle mask element for identity shuffle");
898 NewMaskVec[i] = NewMaskEltC;
899 }
900 }
901
902 Constant *NewMask = ConstantVector::get(NewMaskVec);
903 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
904}
905
Sanjay Patel2f602ce2017-03-22 17:10:44 +0000906/// If we have an insertelement instruction feeding into another insertelement
907/// and the 2nd is inserting a constant into the vector, canonicalize that
908/// constant insertion before the insertion of a variable:
909///
910/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
911/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
912///
913/// This has the potential of eliminating the 2nd insertelement instruction
914/// via constant folding of the scalar constant into a vector constant.
915static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
916 InstCombiner::BuilderTy &Builder) {
917 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
918 if (!InsElt1 || !InsElt1->hasOneUse())
919 return nullptr;
920
921 Value *X, *Y;
922 Constant *ScalarC;
923 ConstantInt *IdxC1, *IdxC2;
924 if (match(InsElt1->getOperand(0), m_Value(X)) &&
925 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
926 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
927 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
928 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
929 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
930 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
931 }
932
933 return nullptr;
934}
935
Alexey Bataevfee90782016-09-23 09:14:08 +0000936/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
937/// --> shufflevector X, CVec', Mask'
Sanjay Patel521f19f2016-09-02 17:05:43 +0000938static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
Alexey Bataevfee90782016-09-23 09:14:08 +0000939 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
940 // Bail out if the parent has more than one use. In that case, we'd be
Sanjay Patel521f19f2016-09-02 17:05:43 +0000941 // replacing the insertelt with a shuffle, and that's not a clear win.
Alexey Bataevfee90782016-09-23 09:14:08 +0000942 if (!Inst || !Inst->hasOneUse())
Sanjay Patel521f19f2016-09-02 17:05:43 +0000943 return nullptr;
Alexey Bataevfee90782016-09-23 09:14:08 +0000944 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
945 // The shuffle must have a constant vector operand. The insertelt must have
946 // a constant scalar being inserted at a constant position in the vector.
947 Constant *ShufConstVec, *InsEltScalar;
948 uint64_t InsEltIndex;
949 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
950 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
951 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
952 return nullptr;
Sanjay Patel521f19f2016-09-02 17:05:43 +0000953
Alexey Bataevfee90782016-09-23 09:14:08 +0000954 // Adding an element to an arbitrary shuffle could be expensive, but a
955 // shuffle that selects elements from vectors without crossing lanes is
956 // assumed cheap.
957 // If we're just adding a constant into that shuffle, it will still be
958 // cheap.
959 if (!isShuffleEquivalentToSelect(*Shuf))
960 return nullptr;
Sanjay Patel521f19f2016-09-02 17:05:43 +0000961
Alexey Bataevfee90782016-09-23 09:14:08 +0000962 // From the above 'select' check, we know that the mask has the same number
963 // of elements as the vector input operands. We also know that each constant
964 // input element is used in its lane and can not be used more than once by
965 // the shuffle. Therefore, replace the constant in the shuffle's constant
966 // vector with the insertelt constant. Replace the constant in the shuffle's
967 // mask vector with the insertelt index plus the length of the vector
968 // (because the constant vector operand of a shuffle is always the 2nd
969 // operand).
970 Constant *Mask = Shuf->getMask();
971 unsigned NumElts = Mask->getType()->getVectorNumElements();
972 SmallVector<Constant *, 16> NewShufElts(NumElts);
973 SmallVector<Constant *, 16> NewMaskElts(NumElts);
974 for (unsigned I = 0; I != NumElts; ++I) {
975 if (I == InsEltIndex) {
976 NewShufElts[I] = InsEltScalar;
977 Type *Int32Ty = Type::getInt32Ty(Shuf->getContext());
978 NewMaskElts[I] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts);
979 } else {
980 // Copy over the existing values.
981 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
982 NewMaskElts[I] = Mask->getAggregateElement(I);
983 }
Sanjay Patel521f19f2016-09-02 17:05:43 +0000984 }
Sanjay Patel521f19f2016-09-02 17:05:43 +0000985
Alexey Bataevfee90782016-09-23 09:14:08 +0000986 // Create new operands for a shuffle that includes the constant of the
987 // original insertelt. The old shuffle will be dead now.
988 return new ShuffleVectorInst(Shuf->getOperand(0),
989 ConstantVector::get(NewShufElts),
990 ConstantVector::get(NewMaskElts));
991 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
992 // Transform sequences of insertelements ops with constant data/indexes into
993 // a single shuffle op.
994 unsigned NumElts = InsElt.getType()->getNumElements();
995
996 uint64_t InsertIdx[2];
997 Constant *Val[2];
998 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
999 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1000 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1001 !match(IEI->getOperand(1), m_Constant(Val[1])))
1002 return nullptr;
1003 SmallVector<Constant *, 16> Values(NumElts);
1004 SmallVector<Constant *, 16> Mask(NumElts);
1005 auto ValI = std::begin(Val);
1006 // Generate new constant vector and mask.
1007 // We have 2 values/masks from the insertelements instructions. Insert them
1008 // into new value/mask vectors.
1009 for (uint64_t I : InsertIdx) {
1010 if (!Values[I]) {
1011 assert(!Mask[I]);
1012 Values[I] = *ValI;
1013 Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()),
1014 NumElts + I);
1015 }
1016 ++ValI;
1017 }
1018 // Remaining values are filled with 'undef' values.
1019 for (unsigned I = 0; I < NumElts; ++I) {
1020 if (!Values[I]) {
1021 assert(!Mask[I]);
1022 Values[I] = UndefValue::get(InsElt.getType()->getElementType());
1023 Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I);
1024 }
1025 }
1026 // Create new operands for a shuffle that includes the constant of the
1027 // original insertelt.
1028 return new ShuffleVectorInst(IEI->getOperand(0),
1029 ConstantVector::get(Values),
1030 ConstantVector::get(Mask));
1031 }
1032 return nullptr;
Sanjay Patel521f19f2016-09-02 17:05:43 +00001033}
1034
Chris Lattnerec97a902010-01-05 05:36:20 +00001035Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
1036 Value *VecOp = IE.getOperand(0);
1037 Value *ScalarOp = IE.getOperand(1);
1038 Value *IdxOp = IE.getOperand(2);
Bob Wilson8ecf98b2010-10-29 22:20:43 +00001039
Igor Laevskye0edb662017-12-13 11:21:18 +00001040 if (auto *V = SimplifyInsertElementInst(
1041 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1042 return replaceInstUsesWith(IE, V);
1043
Sanjay Patel926e4772019-05-17 18:06:12 +00001044 // If the vector and scalar are both bitcast from the same element type, do
1045 // the insert in that source type followed by bitcast.
1046 Value *VecSrc, *ScalarSrc;
1047 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1048 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1049 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1050 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1051 VecSrc->getType()->getVectorElementType() == ScalarSrc->getType()) {
1052 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1053 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1054 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1055 return new BitCastInst(NewInsElt, IE.getType());
1056 }
1057
Sanjay Patel0522b0d2018-10-20 17:15:57 +00001058 // If the inserted element was extracted from some other vector and both
Sanjay Patel93179632019-05-26 14:03:50 +00001059 // indexes are valid constants, try to turn this into a shuffle.
Sanjay Patel0522b0d2018-10-20 17:15:57 +00001060 uint64_t InsertedIdx, ExtractedIdx;
1061 Value *ExtVecOp;
1062 if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1063 match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp),
Sanjay Patel93179632019-05-26 14:03:50 +00001064 m_ConstantInt(ExtractedIdx))) &&
1065 ExtractedIdx < ExtVecOp->getType()->getVectorNumElements()) {
Sanjay Patel0522b0d2018-10-20 17:15:57 +00001066 // TODO: Looking at the user(s) to determine if this insert is a
1067 // fold-to-shuffle opportunity does not match the usual instcombine
1068 // constraints. We should decide if the transform is worthy based only
1069 // on this instruction and its operands, but that may not work currently.
1070 //
1071 // Here, we are trying to avoid creating shuffles before reaching
1072 // the end of a chain of extract-insert pairs. This is complicated because
1073 // we do not generally form arbitrary shuffle masks in instcombine
1074 // (because those may codegen poorly), but collectShuffleElements() does
1075 // exactly that.
1076 //
1077 // The rules for determining what is an acceptable target-independent
1078 // shuffle mask are fuzzy because they evolve based on the backend's
1079 // capabilities and real-world impact.
1080 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1081 if (!Insert.hasOneUse())
1082 return true;
1083 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1084 if (!InsertUser)
1085 return true;
1086 return false;
1087 };
Bob Wilson8ecf98b2010-10-29 22:20:43 +00001088
Sanjay Patel0522b0d2018-10-20 17:15:57 +00001089 // Try to form a shuffle from a chain of extract-insert ops.
1090 if (isShuffleRootCandidate(IE)) {
1091 SmallVector<Constant*, 16> Mask;
1092 ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
Sanjay Patel729c4362018-10-20 16:25:55 +00001093
Sanjay Patel0522b0d2018-10-20 17:15:57 +00001094 // The proposed shuffle may be trivial, in which case we shouldn't
1095 // perform the combine.
1096 if (LR.first != &IE && LR.second != &IE) {
1097 // We now have a shuffle of LHS, RHS, Mask.
1098 if (LR.second == nullptr)
1099 LR.second = UndefValue::get(LR.first->getType());
1100 return new ShuffleVectorInst(LR.first, LR.second,
1101 ConstantVector::get(Mask));
Chris Lattnerec97a902010-01-05 05:36:20 +00001102 }
1103 }
1104 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +00001105
Craig Topper17b55682016-12-29 07:03:18 +00001106 unsigned VWidth = VecOp->getType()->getVectorNumElements();
Chris Lattnerec97a902010-01-05 05:36:20 +00001107 APInt UndefElts(VWidth, 0);
1108 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
Eli Friedmanef200db2011-02-19 22:42:40 +00001109 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
1110 if (V != &IE)
Sanjay Patel4b198802016-02-01 22:23:39 +00001111 return replaceInstUsesWith(IE, V);
Chris Lattnerec97a902010-01-05 05:36:20 +00001112 return &IE;
Eli Friedmanef200db2011-02-19 22:42:40 +00001113 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +00001114
Sanjay Patel521f19f2016-09-02 17:05:43 +00001115 if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
1116 return Shuf;
1117
Craig Topperbb4069e2017-07-07 23:16:26 +00001118 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
Sanjay Patel2f602ce2017-03-22 17:10:44 +00001119 return NewInsElt;
1120
Sanjay Patel71ad2272019-06-26 15:52:59 +00001121 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
Michael Kupersteincd7ad712016-12-28 00:18:08 +00001122 return Broadcast;
1123
Sanjay Patel3dee1132019-07-08 19:48:52 +00001124 if (Instruction *Splat = foldInsEltIntoSplat(IE))
1125 return Splat;
1126
Sanjay Patelaff5bee2019-09-08 19:03:01 +00001127 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1128 return IdentityShuf;
1129
Craig Topperf40110f2014-04-25 05:29:35 +00001130 return nullptr;
Chris Lattnerec97a902010-01-05 05:36:20 +00001131}
1132
Nick Lewyckya2b77202013-05-31 00:59:42 +00001133/// Return true if we can evaluate the specified expression tree if the vector
1134/// elements were shuffled in a different order.
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001135static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
Nick Lewycky3f715e22013-06-01 20:51:31 +00001136 unsigned Depth = 5) {
Nick Lewyckya2b77202013-05-31 00:59:42 +00001137 // We can always reorder the elements of a constant.
1138 if (isa<Constant>(V))
1139 return true;
1140
1141 // We won't reorder vector arguments. No IPO here.
1142 Instruction *I = dyn_cast<Instruction>(V);
1143 if (!I) return false;
1144
1145 // Two users may expect different orders of the elements. Don't try it.
1146 if (!I->hasOneUse())
1147 return false;
1148
1149 if (Depth == 0) return false;
1150
1151 switch (I->getOpcode()) {
Bjorn Pettersson64562522019-10-18 07:42:02 +00001152 case Instruction::UDiv:
1153 case Instruction::SDiv:
1154 case Instruction::URem:
1155 case Instruction::SRem:
1156 // Propagating an undefined shuffle mask element to integer div/rem is not
1157 // allowed because those opcodes can create immediate undefined behavior
1158 // from an undefined element in an operand.
1159 if (llvm::any_of(Mask, [](int M){ return M == -1; }))
1160 return false;
1161 LLVM_FALLTHROUGH;
Nick Lewyckya2b77202013-05-31 00:59:42 +00001162 case Instruction::Add:
1163 case Instruction::FAdd:
1164 case Instruction::Sub:
1165 case Instruction::FSub:
1166 case Instruction::Mul:
1167 case Instruction::FMul:
Nick Lewyckya2b77202013-05-31 00:59:42 +00001168 case Instruction::FDiv:
Nick Lewyckya2b77202013-05-31 00:59:42 +00001169 case Instruction::FRem:
1170 case Instruction::Shl:
1171 case Instruction::LShr:
1172 case Instruction::AShr:
1173 case Instruction::And:
1174 case Instruction::Or:
1175 case Instruction::Xor:
1176 case Instruction::ICmp:
1177 case Instruction::FCmp:
1178 case Instruction::Trunc:
1179 case Instruction::ZExt:
1180 case Instruction::SExt:
1181 case Instruction::FPToUI:
1182 case Instruction::FPToSI:
1183 case Instruction::UIToFP:
1184 case Instruction::SIToFP:
1185 case Instruction::FPTrunc:
1186 case Instruction::FPExt:
1187 case Instruction::GetElementPtr: {
Sanjay Patel26c119a2018-09-30 13:50:42 +00001188 // Bail out if we would create longer vector ops. We could allow creating
Bjorn Pettersson64562522019-10-18 07:42:02 +00001189 // longer vector ops, but that may result in more expensive codegen.
Sanjay Patel26c119a2018-09-30 13:50:42 +00001190 Type *ITy = I->getType();
1191 if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements())
1192 return false;
Sanjay Patel4e28753142015-11-16 22:16:52 +00001193 for (Value *Operand : I->operands()) {
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001194 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
Nick Lewyckya2b77202013-05-31 00:59:42 +00001195 return false;
1196 }
1197 return true;
1198 }
1199 case Instruction::InsertElement: {
1200 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1201 if (!CI) return false;
1202 int ElementNumber = CI->getLimitedValue();
1203
1204 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1205 // can't put an element into multiple indices.
1206 bool SeenOnce = false;
1207 for (int i = 0, e = Mask.size(); i != e; ++i) {
1208 if (Mask[i] == ElementNumber) {
1209 if (SeenOnce)
1210 return false;
1211 SeenOnce = true;
1212 }
1213 }
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001214 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
Nick Lewyckya2b77202013-05-31 00:59:42 +00001215 }
1216 }
1217 return false;
1218}
1219
1220/// Rebuild a new instruction just like 'I' but with the new operands given.
1221/// In the event of type mismatch, the type of the operands is correct.
Sanjay Patel431e1142015-11-17 17:24:08 +00001222static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
Nick Lewyckya2b77202013-05-31 00:59:42 +00001223 // We don't want to use the IRBuilder here because we want the replacement
1224 // instructions to appear next to 'I', not the builder's insertion point.
1225 switch (I->getOpcode()) {
1226 case Instruction::Add:
1227 case Instruction::FAdd:
1228 case Instruction::Sub:
1229 case Instruction::FSub:
1230 case Instruction::Mul:
1231 case Instruction::FMul:
1232 case Instruction::UDiv:
1233 case Instruction::SDiv:
1234 case Instruction::FDiv:
1235 case Instruction::URem:
1236 case Instruction::SRem:
1237 case Instruction::FRem:
1238 case Instruction::Shl:
1239 case Instruction::LShr:
1240 case Instruction::AShr:
1241 case Instruction::And:
1242 case Instruction::Or:
1243 case Instruction::Xor: {
1244 BinaryOperator *BO = cast<BinaryOperator>(I);
1245 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1246 BinaryOperator *New =
1247 BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
1248 NewOps[0], NewOps[1], "", BO);
1249 if (isa<OverflowingBinaryOperator>(BO)) {
1250 New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1251 New->setHasNoSignedWrap(BO->hasNoSignedWrap());
1252 }
1253 if (isa<PossiblyExactOperator>(BO)) {
1254 New->setIsExact(BO->isExact());
1255 }
Owen Anderson48b842e2014-01-18 00:48:14 +00001256 if (isa<FPMathOperator>(BO))
1257 New->copyFastMathFlags(I);
Nick Lewyckya2b77202013-05-31 00:59:42 +00001258 return New;
1259 }
1260 case Instruction::ICmp:
1261 assert(NewOps.size() == 2 && "icmp with #ops != 2");
1262 return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
1263 NewOps[0], NewOps[1]);
1264 case Instruction::FCmp:
1265 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1266 return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
1267 NewOps[0], NewOps[1]);
1268 case Instruction::Trunc:
1269 case Instruction::ZExt:
1270 case Instruction::SExt:
1271 case Instruction::FPToUI:
1272 case Instruction::FPToSI:
1273 case Instruction::UIToFP:
1274 case Instruction::SIToFP:
1275 case Instruction::FPTrunc:
1276 case Instruction::FPExt: {
1277 // It's possible that the mask has a different number of elements from
1278 // the original cast. We recompute the destination type to match the mask.
1279 Type *DestTy =
1280 VectorType::get(I->getType()->getScalarType(),
1281 NewOps[0]->getType()->getVectorNumElements());
1282 assert(NewOps.size() == 1 && "cast with #ops != 1");
1283 return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
1284 "", I);
1285 }
1286 case Instruction::GetElementPtr: {
1287 Value *Ptr = NewOps[0];
1288 ArrayRef<Value*> Idx = NewOps.slice(1);
David Blaikie22319eb2015-03-14 19:24:04 +00001289 GetElementPtrInst *GEP = GetElementPtrInst::Create(
1290 cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
Nick Lewyckya2b77202013-05-31 00:59:42 +00001291 GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
1292 return GEP;
1293 }
1294 }
1295 llvm_unreachable("failed to rebuild vector instructions");
1296}
1297
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001298static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
Nick Lewyckya2b77202013-05-31 00:59:42 +00001299 // Mask.size() does not need to be equal to the number of vector elements.
1300
1301 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
Sanjay Patelce36b032017-10-09 17:54:46 +00001302 Type *EltTy = V->getType()->getScalarType();
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001303 Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
Sanjay Patelce36b032017-10-09 17:54:46 +00001304 if (isa<UndefValue>(V))
1305 return UndefValue::get(VectorType::get(EltTy, Mask.size()));
1306
1307 if (isa<ConstantAggregateZero>(V))
1308 return ConstantAggregateZero::get(VectorType::get(EltTy, Mask.size()));
1309
Nick Lewyckya2b77202013-05-31 00:59:42 +00001310 if (Constant *C = dyn_cast<Constant>(V)) {
1311 SmallVector<Constant *, 16> MaskValues;
1312 for (int i = 0, e = Mask.size(); i != e; ++i) {
1313 if (Mask[i] == -1)
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001314 MaskValues.push_back(UndefValue::get(I32Ty));
Nick Lewyckya2b77202013-05-31 00:59:42 +00001315 else
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001316 MaskValues.push_back(ConstantInt::get(I32Ty, Mask[i]));
Nick Lewyckya2b77202013-05-31 00:59:42 +00001317 }
1318 return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
1319 ConstantVector::get(MaskValues));
1320 }
1321
1322 Instruction *I = cast<Instruction>(V);
1323 switch (I->getOpcode()) {
1324 case Instruction::Add:
1325 case Instruction::FAdd:
1326 case Instruction::Sub:
1327 case Instruction::FSub:
1328 case Instruction::Mul:
1329 case Instruction::FMul:
1330 case Instruction::UDiv:
1331 case Instruction::SDiv:
1332 case Instruction::FDiv:
1333 case Instruction::URem:
1334 case Instruction::SRem:
1335 case Instruction::FRem:
1336 case Instruction::Shl:
1337 case Instruction::LShr:
1338 case Instruction::AShr:
1339 case Instruction::And:
1340 case Instruction::Or:
1341 case Instruction::Xor:
1342 case Instruction::ICmp:
1343 case Instruction::FCmp:
1344 case Instruction::Trunc:
1345 case Instruction::ZExt:
1346 case Instruction::SExt:
1347 case Instruction::FPToUI:
1348 case Instruction::FPToSI:
1349 case Instruction::UIToFP:
1350 case Instruction::SIToFP:
1351 case Instruction::FPTrunc:
1352 case Instruction::FPExt:
1353 case Instruction::Select:
1354 case Instruction::GetElementPtr: {
1355 SmallVector<Value*, 8> NewOps;
1356 bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
1357 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
Mikael Holmen150a7ec2019-04-01 14:10:10 +00001358 Value *V;
1359 // Recursively call evaluateInDifferentElementOrder on vector arguments
1360 // as well. E.g. GetElementPtr may have scalar operands even if the
1361 // return value is a vector, so we need to examine the operand type.
1362 if (I->getOperand(i)->getType()->isVectorTy())
1363 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
1364 else
1365 V = I->getOperand(i);
Nick Lewyckya2b77202013-05-31 00:59:42 +00001366 NewOps.push_back(V);
1367 NeedsRebuild |= (V != I->getOperand(i));
1368 }
1369 if (NeedsRebuild) {
Sanjay Patel431e1142015-11-17 17:24:08 +00001370 return buildNew(I, NewOps);
Nick Lewyckya2b77202013-05-31 00:59:42 +00001371 }
1372 return I;
1373 }
1374 case Instruction::InsertElement: {
1375 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
Nick Lewyckya2b77202013-05-31 00:59:42 +00001376
1377 // The insertelement was inserting at Element. Figure out which element
1378 // that becomes after shuffling. The answer is guaranteed to be unique
1379 // by CanEvaluateShuffled.
Nick Lewycky3f715e22013-06-01 20:51:31 +00001380 bool Found = false;
Nick Lewyckya2b77202013-05-31 00:59:42 +00001381 int Index = 0;
Nick Lewycky3f715e22013-06-01 20:51:31 +00001382 for (int e = Mask.size(); Index != e; ++Index) {
1383 if (Mask[Index] == Element) {
1384 Found = true;
Nick Lewyckya2b77202013-05-31 00:59:42 +00001385 break;
Nick Lewycky3f715e22013-06-01 20:51:31 +00001386 }
1387 }
Nick Lewyckya2b77202013-05-31 00:59:42 +00001388
Hao Liu26abebb2014-01-08 03:06:15 +00001389 // If element is not in Mask, no need to handle the operand 1 (element to
1390 // be inserted). Just evaluate values in operand 0 according to Mask.
Nick Lewycky3f715e22013-06-01 20:51:31 +00001391 if (!Found)
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001392 return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
Joey Goulya3250f22013-07-12 23:08:06 +00001393
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001394 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
Nick Lewyckya2b77202013-05-31 00:59:42 +00001395 return InsertElementInst::Create(V, I->getOperand(1),
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001396 ConstantInt::get(I32Ty, Index), "", I);
Nick Lewyckya2b77202013-05-31 00:59:42 +00001397 }
1398 }
1399 llvm_unreachable("failed to reorder elements of vector instruction!");
1400}
Chris Lattnerec97a902010-01-05 05:36:20 +00001401
JF Bastiend52c9902015-02-25 22:30:51 +00001402// Returns true if the shuffle is extracting a contiguous range of values from
1403// LHS, for example:
1404// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1405// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
1406// Shuffles to: |EE|FF|GG|HH|
1407// +--+--+--+--+
1408static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
1409 SmallVector<int, 16> &Mask) {
Craig Topper17b55682016-12-29 07:03:18 +00001410 unsigned LHSElems = SVI.getOperand(0)->getType()->getVectorNumElements();
JF Bastiend52c9902015-02-25 22:30:51 +00001411 unsigned MaskElems = Mask.size();
1412 unsigned BegIdx = Mask.front();
1413 unsigned EndIdx = Mask.back();
1414 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
1415 return false;
1416 for (unsigned I = 0; I != MaskElems; ++I)
1417 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
1418 return false;
1419 return true;
1420}
1421
Sanjay Patelb999d742018-07-02 17:42:29 +00001422/// These are the ingredients in an alternate form binary operator as described
1423/// below.
1424struct BinopElts {
1425 BinaryOperator::BinaryOps Opcode;
1426 Value *Op0;
1427 Value *Op1;
1428 BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0,
1429 Value *V0 = nullptr, Value *V1 = nullptr) :
1430 Opcode(Opc), Op0(V0), Op1(V1) {}
1431 operator bool() const { return Opcode != 0; }
1432};
1433
1434/// Binops may be transformed into binops with different opcodes and operands.
1435/// Reverse the usual canonicalization to enable folds with the non-canonical
1436/// form of the binop. If a transform is possible, return the elements of the
1437/// new binop. If not, return invalid elements.
1438static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) {
1439 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
1440 Type *Ty = BO->getType();
1441 switch (BO->getOpcode()) {
1442 case Instruction::Shl: {
1443 // shl X, C --> mul X, (1 << C)
1444 Constant *C;
1445 if (match(BO1, m_Constant(C))) {
1446 Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
1447 return { Instruction::Mul, BO0, ShlOne };
1448 }
1449 break;
1450 }
1451 case Instruction::Or: {
1452 // or X, C --> add X, C (when X and C have no common bits set)
1453 const APInt *C;
1454 if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
1455 return { Instruction::Add, BO0, BO1 };
1456 break;
1457 }
1458 default:
1459 break;
1460 }
1461 return {};
1462}
1463
Sanjay Patel3074b9e2018-07-03 13:44:22 +00001464static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
1465 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
1466
1467 // Are we shuffling together some value and that same value after it has been
1468 // modified by a binop with a constant?
1469 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1470 Constant *C;
1471 bool Op0IsBinop;
1472 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
1473 Op0IsBinop = true;
1474 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
1475 Op0IsBinop = false;
1476 else
1477 return nullptr;
1478
Sanjay Patel3074b9e2018-07-03 13:44:22 +00001479 // The identity constant for a binop leaves a variable operand unchanged. For
1480 // a vector, this is a splat of something like 0, -1, or 1.
1481 // If there's no identity constant for this binop, we're done.
Sanjay Patel509a1e72018-07-10 15:12:31 +00001482 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
Sanjay Patel3074b9e2018-07-03 13:44:22 +00001483 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
Sanjay Patel509a1e72018-07-10 15:12:31 +00001484 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
Sanjay Patel3074b9e2018-07-03 13:44:22 +00001485 if (!IdC)
1486 return nullptr;
1487
1488 // Shuffle identity constants into the lanes that return the original value.
1489 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
1490 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
1491 // The existing binop constant vector remains in the same operand position.
1492 Constant *Mask = Shuf.getMask();
1493 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
1494 ConstantExpr::getShuffleVector(IdC, C, Mask);
1495
Sanjay Patel509a1e72018-07-10 15:12:31 +00001496 bool MightCreatePoisonOrUB =
1497 Mask->containsUndefElement() &&
1498 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
1499 if (MightCreatePoisonOrUB)
1500 NewC = getSafeVectorConstantForBinop(BOpcode, NewC, true);
1501
Sanjay Patel3074b9e2018-07-03 13:44:22 +00001502 // shuf (bop X, C), X, M --> bop X, C'
1503 // shuf X, (bop X, C), M --> bop X, C'
Sanjay Patel509a1e72018-07-10 15:12:31 +00001504 Value *X = Op0IsBinop ? Op1 : Op0;
Sanjay Patel3074b9e2018-07-03 13:44:22 +00001505 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
1506 NewBO->copyIRFlags(BO);
Sanjay Patel33331062018-07-10 14:27:55 +00001507
1508 // An undef shuffle mask element may propagate as an undef constant element in
1509 // the new binop. That would produce poison where the original code might not.
Sanjay Patel509a1e72018-07-10 15:12:31 +00001510 // If we already made a safe constant, then there's no danger.
1511 if (Mask->containsUndefElement() && !MightCreatePoisonOrUB)
Sanjay Patel33331062018-07-10 14:27:55 +00001512 NewBO->dropPoisonGeneratingFlags();
Sanjay Patel3074b9e2018-07-03 13:44:22 +00001513 return NewBO;
1514}
1515
Sanjay Patel0b591032019-07-08 16:26:48 +00001516/// If we have an insert of a scalar to a non-zero element of an undefined
1517/// vector and then shuffle that value, that's the same as inserting to the zero
1518/// element and shuffling. Splatting from the zero element is recognized as the
1519/// canonical form of splat.
1520static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
1521 InstCombiner::BuilderTy &Builder) {
1522 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1523 Constant *Mask = Shuf.getMask();
1524 Value *X;
1525 uint64_t IndexC;
1526
1527 // Match a shuffle that is a splat to a non-zero element.
1528 if (!match(Op0, m_OneUse(m_InsertElement(m_Undef(), m_Value(X),
1529 m_ConstantInt(IndexC)))) ||
1530 !match(Op1, m_Undef()) || match(Mask, m_ZeroInt()) || IndexC == 0)
1531 return nullptr;
1532
1533 // Insert into element 0 of an undef vector.
1534 UndefValue *UndefVec = UndefValue::get(Shuf.getType());
1535 Constant *Zero = Builder.getInt32(0);
1536 Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
1537
1538 // Splat from element 0. Any mask element that is undefined remains undefined.
1539 // For example:
1540 // shuf (inselt undef, X, 2), undef, <2,2,undef>
1541 // --> shuf (inselt undef, X, 0), undef, <0,0,undef>
1542 unsigned NumMaskElts = Shuf.getType()->getVectorNumElements();
1543 SmallVector<Constant *, 16> NewMask(NumMaskElts, Zero);
1544 for (unsigned i = 0; i != NumMaskElts; ++i)
1545 if (isa<UndefValue>(Mask->getAggregateElement(i)))
1546 NewMask[i] = Mask->getAggregateElement(i);
1547
1548 return new ShuffleVectorInst(NewIns, UndefVec, ConstantVector::get(NewMask));
1549}
1550
Sanjay Patelb999d742018-07-02 17:42:29 +00001551/// Try to fold shuffles that are the equivalent of a vector select.
Sanjay Patelda667532018-06-29 13:44:06 +00001552static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
Sanjay Patelb999d742018-07-02 17:42:29 +00001553 InstCombiner::BuilderTy &Builder,
1554 const DataLayout &DL) {
Sanjay Patela76b7002018-06-21 20:15:09 +00001555 if (!Shuf.isSelect())
1556 return nullptr;
1557
Sanjay Patele85d2e42019-11-25 11:55:57 -05001558 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
1559 // Commuting undef to operand 0 conflicts with another canonicalization.
Sanjay Patelb276dd12019-03-31 15:01:30 +00001560 unsigned NumElts = Shuf.getType()->getVectorNumElements();
Sanjay Patele85d2e42019-11-25 11:55:57 -05001561 if (!isa<UndefValue>(Shuf.getOperand(1)) &&
1562 Shuf.getMaskValue(0) >= (int)NumElts) {
Sanjay Patelb33938d2019-04-08 13:28:29 +00001563 // TODO: Can we assert that both operands of a shuffle-select are not undef
1564 // (otherwise, it would have been folded by instsimplify?
Sanjay Patelb276dd12019-03-31 15:01:30 +00001565 Shuf.commute();
1566 return &Shuf;
1567 }
1568
Sanjay Patel3074b9e2018-07-03 13:44:22 +00001569 if (Instruction *I = foldSelectShuffleWith1Binop(Shuf))
1570 return I;
1571
Sanjay Patela76b7002018-06-21 20:15:09 +00001572 BinaryOperator *B0, *B1;
1573 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
1574 !match(Shuf.getOperand(1), m_BinOp(B1)))
1575 return nullptr;
1576
Sanjay Patelda667532018-06-29 13:44:06 +00001577 Value *X, *Y;
Sanjay Patela76b7002018-06-21 20:15:09 +00001578 Constant *C0, *C1;
Sanjay Patela52963b2018-06-22 12:46:16 +00001579 bool ConstantsAreOp1;
1580 if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) &&
Sanjay Patelda667532018-06-29 13:44:06 +00001581 match(B1, m_BinOp(m_Value(Y), m_Constant(C1))))
Sanjay Patela52963b2018-06-22 12:46:16 +00001582 ConstantsAreOp1 = true;
1583 else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
Sanjay Patelda667532018-06-29 13:44:06 +00001584 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
Sanjay Patela52963b2018-06-22 12:46:16 +00001585 ConstantsAreOp1 = false;
1586 else
Sanjay Patela76b7002018-06-21 20:15:09 +00001587 return nullptr;
1588
Sanjay Patel57bda362018-06-28 17:48:04 +00001589 // We need matching binops to fold the lanes together.
1590 BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
1591 BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
1592 bool DropNSW = false;
1593 if (ConstantsAreOp1 && Opc0 != Opc1) {
Sanjay Patel57bda362018-06-28 17:48:04 +00001594 // TODO: We drop "nsw" if shift is converted into multiply because it may
1595 // not be correct when the shift amount is BitWidth - 1. We could examine
1596 // each vector element to determine if it is safe to keep that flag.
Sanjay Patelb999d742018-07-02 17:42:29 +00001597 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
Sanjay Patel57bda362018-06-28 17:48:04 +00001598 DropNSW = true;
Sanjay Patelb999d742018-07-02 17:42:29 +00001599 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
1600 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
1601 Opc0 = AltB0.Opcode;
1602 C0 = cast<Constant>(AltB0.Op1);
1603 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
1604 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
1605 Opc1 = AltB1.Opcode;
1606 C1 = cast<Constant>(AltB1.Op1);
Sanjay Patel57bda362018-06-28 17:48:04 +00001607 }
1608 }
1609
1610 if (Opc0 != Opc1)
Sanjay Patel4784e152018-06-21 23:56:59 +00001611 return nullptr;
1612
Sanjay Patel57bda362018-06-28 17:48:04 +00001613 // The opcodes must be the same. Use a new name to make that clear.
1614 BinaryOperator::BinaryOps BOpc = Opc0;
1615
Sanjay Patel06ea4202018-07-10 13:33:26 +00001616 // Select the constant elements needed for the single binop.
1617 Constant *Mask = Shuf.getMask();
1618 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
1619
Sanjay Patel5bd36642018-07-09 13:21:46 +00001620 // We are moving a binop after a shuffle. When a shuffle has an undefined
1621 // mask element, the result is undefined, but it is not poison or undefined
1622 // behavior. That is not necessarily true for div/rem/shift.
Sanjay Patel5bd36642018-07-09 13:21:46 +00001623 bool MightCreatePoisonOrUB =
1624 Mask->containsUndefElement() &&
1625 (Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc));
Sanjay Patel06ea4202018-07-10 13:33:26 +00001626 if (MightCreatePoisonOrUB)
1627 NewC = getSafeVectorConstantForBinop(BOpc, NewC, ConstantsAreOp1);
Sanjay Patel5bd36642018-07-09 13:21:46 +00001628
Sanjay Patelda667532018-06-29 13:44:06 +00001629 Value *V;
1630 if (X == Y) {
1631 // Remove a binop and the shuffle by rearranging the constant:
1632 // shuffle (op V, C0), (op V, C1), M --> op V, C'
1633 // shuffle (op C0, V), (op C1, V), M --> op C', V
1634 V = X;
Sanjay Patel5bd36642018-07-09 13:21:46 +00001635 } else {
Sanjay Patelda667532018-06-29 13:44:06 +00001636 // If there are 2 different variable operands, we must create a new shuffle
1637 // (select) first, so check uses to ensure that we don't end up with more
1638 // instructions than we started with.
Sanjay Patel5bd36642018-07-09 13:21:46 +00001639 if (!B0->hasOneUse() && !B1->hasOneUse())
1640 return nullptr;
1641
Sanjay Patel06ea4202018-07-10 13:33:26 +00001642 // If we use the original shuffle mask and op1 is *variable*, we would be
1643 // putting an undef into operand 1 of div/rem/shift. This is either UB or
1644 // poison. We do not have to guard against UB when *constants* are op1
1645 // because safe constants guarantee that we do not overflow sdiv/srem (and
1646 // there's no danger for other opcodes).
1647 // TODO: To allow this case, create a new shuffle mask with no undefs.
1648 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
Sanjay Patel5bd36642018-07-09 13:21:46 +00001649 return nullptr;
1650
Sanjay Patelda667532018-06-29 13:44:06 +00001651 // Note: In general, we do not create new shuffles in InstCombine because we
1652 // do not know if a target can lower an arbitrary shuffle optimally. In this
1653 // case, the shuffle uses the existing mask, so there is no additional risk.
Sanjay Patelda667532018-06-29 13:44:06 +00001654
1655 // Select the variable vectors first, then perform the binop:
1656 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
1657 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
Sanjay Patel5bd36642018-07-09 13:21:46 +00001658 V = Builder.CreateShuffleVector(X, Y, Mask);
Sanjay Patelda667532018-06-29 13:44:06 +00001659 }
1660
Sanjay Patelda667532018-06-29 13:44:06 +00001661 Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) :
1662 BinaryOperator::Create(BOpc, NewC, V);
Sanjay Patela76b7002018-06-21 20:15:09 +00001663
Sanjay Patel5bd36642018-07-09 13:21:46 +00001664 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
1665 // 1. If we changed an opcode, poison conditions might have changed.
1666 // 2. If the shuffle had undef mask elements, the new binop might have undefs
Sanjay Patelc8d9d812018-07-10 16:09:49 +00001667 // where the original code did not. But if we already made a safe constant,
1668 // then there's no danger.
Sanjay Patela76b7002018-06-21 20:15:09 +00001669 NewBO->copyIRFlags(B0);
1670 NewBO->andIRFlags(B1);
Sanjay Patel57bda362018-06-28 17:48:04 +00001671 if (DropNSW)
1672 NewBO->setHasNoSignedWrap(false);
Sanjay Patelc8d9d812018-07-10 16:09:49 +00001673 if (Mask->containsUndefElement() && !MightCreatePoisonOrUB)
Sanjay Patel5bd36642018-07-09 13:21:46 +00001674 NewBO->dropPoisonGeneratingFlags();
Sanjay Patela76b7002018-06-21 20:15:09 +00001675 return NewBO;
1676}
1677
Sanjay Patelc1416b62018-09-07 21:03:34 +00001678/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
1679/// narrowing (concatenating with undef and extracting back to the original
1680/// length). This allows replacing the wide select with a narrow select.
Sanjay Patel88194df2018-10-09 15:29:26 +00001681static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
1682 InstCombiner::BuilderTy &Builder) {
Sanjay Patelc1416b62018-09-07 21:03:34 +00001683 // This must be a narrowing identity shuffle. It extracts the 1st N elements
1684 // of the 1st vector operand of a shuffle.
1685 if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
1686 return nullptr;
1687
1688 // The vector being shuffled must be a vector select that we can eliminate.
1689 // TODO: The one-use requirement could be eased if X and/or Y are constants.
1690 Value *Cond, *X, *Y;
1691 if (!match(Shuf.getOperand(0),
1692 m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
1693 return nullptr;
1694
1695 // We need a narrow condition value. It must be extended with undef elements
1696 // and have the same number of elements as this shuffle.
1697 unsigned NarrowNumElts = Shuf.getType()->getVectorNumElements();
1698 Value *NarrowCond;
1699 if (!match(Cond, m_OneUse(m_ShuffleVector(m_Value(NarrowCond), m_Undef(),
1700 m_Constant()))) ||
1701 NarrowCond->getType()->getVectorNumElements() != NarrowNumElts ||
1702 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
1703 return nullptr;
1704
1705 // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
1706 // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
1707 Value *Undef = UndefValue::get(X->getType());
1708 Value *NarrowX = Builder.CreateShuffleVector(X, Undef, Shuf.getMask());
1709 Value *NarrowY = Builder.CreateShuffleVector(Y, Undef, Shuf.getMask());
1710 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
1711}
1712
Sanjay Patel71811462018-10-14 15:25:06 +00001713/// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
1714static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
1715 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1716 if (!Shuf.isIdentityWithExtract() || !isa<UndefValue>(Op1))
1717 return nullptr;
1718
1719 Value *X, *Y;
1720 Constant *Mask;
1721 if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask))))
1722 return nullptr;
1723
Sanjay Patelcddb1e52019-02-05 22:58:45 +00001724 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
1725 // then combining may result in worse codegen.
1726 if (!Op0->hasOneUse())
1727 return nullptr;
1728
Sanjay Patel71811462018-10-14 15:25:06 +00001729 // We are extracting a subvector from a shuffle. Remove excess elements from
1730 // the 1st shuffle mask to eliminate the extract.
1731 //
1732 // This transform is conservatively limited to identity extracts because we do
1733 // not allow arbitrary shuffle mask creation as a target-independent transform
1734 // (because we can't guarantee that will lower efficiently).
1735 //
1736 // If the extracting shuffle has an undef mask element, it transfers to the
1737 // new shuffle mask. Otherwise, copy the original mask element. Example:
1738 // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
1739 // shuf X, Y, <C0, undef, C2, undef>
1740 unsigned NumElts = Shuf.getType()->getVectorNumElements();
1741 SmallVector<Constant *, 16> NewMask(NumElts);
1742 assert(NumElts < Mask->getType()->getVectorNumElements() &&
1743 "Identity with extract must have less elements than its inputs");
1744
1745 for (unsigned i = 0; i != NumElts; ++i) {
1746 Constant *ExtractMaskElt = Shuf.getMask()->getAggregateElement(i);
1747 Constant *MaskElt = Mask->getAggregateElement(i);
1748 NewMask[i] = isa<UndefValue>(ExtractMaskElt) ? ExtractMaskElt : MaskElt;
1749 }
1750 return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
1751}
1752
Sanjay Patel396d18a2019-12-10 10:10:05 -05001753/// Try to replace a shuffle with an insertelement or try to replace a shuffle
1754/// operand with the operand of an insertelement.
Nikita Popov5a8819b2020-02-03 21:17:36 +01001755static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
1756 InstCombiner &IC) {
Sanjay Patelb12e4102018-10-30 15:26:39 +00001757 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
1758 SmallVector<int, 16> Mask = Shuf.getShuffleMask();
1759
1760 // The shuffle must not change vector sizes.
1761 // TODO: This restriction could be removed if the insert has only one use
1762 // (because the transform would require a new length-changing shuffle).
1763 int NumElts = Mask.size();
1764 if (NumElts != (int)(V0->getType()->getVectorNumElements()))
1765 return nullptr;
1766
Sanjay Patel396d18a2019-12-10 10:10:05 -05001767 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
1768 // not be able to handle it there if the insertelement has >1 use.
1769 // If the shuffle has an insertelement operand but does not choose the
1770 // inserted scalar element from that value, then we can replace that shuffle
1771 // operand with the source vector of the insertelement.
1772 Value *X;
1773 uint64_t IdxC;
1774 if (match(V0, m_InsertElement(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
1775 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
Nikita Popov5a8819b2020-02-03 21:17:36 +01001776 if (none_of(Mask, [IdxC](int MaskElt) { return MaskElt == (int)IdxC; }))
1777 return IC.replaceOperand(Shuf, 0, X);
Sanjay Patel396d18a2019-12-10 10:10:05 -05001778 }
1779 if (match(V1, m_InsertElement(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
1780 // Offset the index constant by the vector width because we are checking for
1781 // accesses to the 2nd vector input of the shuffle.
1782 IdxC += NumElts;
1783 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
Nikita Popov5a8819b2020-02-03 21:17:36 +01001784 if (none_of(Mask, [IdxC](int MaskElt) { return MaskElt == (int)IdxC; }))
1785 return IC.replaceOperand(Shuf, 1, X);
Sanjay Patel396d18a2019-12-10 10:10:05 -05001786 }
1787
Sanjay Patelb12e4102018-10-30 15:26:39 +00001788 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
1789 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
1790 // We need an insertelement with a constant index.
1791 if (!match(V0, m_InsertElement(m_Value(), m_Value(Scalar),
1792 m_ConstantInt(IndexC))))
1793 return false;
1794
1795 // Test the shuffle mask to see if it splices the inserted scalar into the
1796 // operand 1 vector of the shuffle.
1797 int NewInsIndex = -1;
1798 for (int i = 0; i != NumElts; ++i) {
1799 // Ignore undef mask elements.
1800 if (Mask[i] == -1)
1801 continue;
1802
1803 // The shuffle takes elements of operand 1 without lane changes.
1804 if (Mask[i] == NumElts + i)
1805 continue;
1806
1807 // The shuffle must choose the inserted scalar exactly once.
1808 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
1809 return false;
1810
1811 // The shuffle is placing the inserted scalar into element i.
1812 NewInsIndex = i;
1813 }
1814
1815 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
1816
1817 // Index is updated to the potentially translated insertion lane.
1818 IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
1819 return true;
1820 };
1821
1822 // If the shuffle is unnecessary, insert the scalar operand directly into
1823 // operand 1 of the shuffle. Example:
1824 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
1825 Value *Scalar;
1826 ConstantInt *IndexC;
1827 if (isShufflingScalarIntoOp1(Scalar, IndexC))
1828 return InsertElementInst::Create(V1, Scalar, IndexC);
1829
1830 // Try again after commuting shuffle. Example:
1831 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
1832 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
1833 std::swap(V0, V1);
1834 ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
1835 if (isShufflingScalarIntoOp1(Scalar, IndexC))
1836 return InsertElementInst::Create(V1, Scalar, IndexC);
1837
1838 return nullptr;
1839}
1840
Sanjay Patel6a554182019-05-22 00:32:25 +00001841static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
1842 // Match the operands as identity with padding (also known as concatenation
1843 // with undef) shuffles of the same source type. The backend is expected to
1844 // recreate these concatenations from a shuffle of narrow operands.
1845 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
1846 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
1847 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
1848 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
1849 return nullptr;
1850
1851 // We limit this transform to power-of-2 types because we expect that the
1852 // backend can convert the simplified IR patterns to identical nodes as the
1853 // original IR.
Sanjay Patel3249be12019-05-23 18:46:03 +00001854 // TODO: If we can verify the same behavior for arbitrary types, the
1855 // power-of-2 checks can be removed.
Sanjay Patel6a554182019-05-22 00:32:25 +00001856 Value *X = Shuffle0->getOperand(0);
1857 Value *Y = Shuffle1->getOperand(0);
1858 if (X->getType() != Y->getType() ||
1859 !isPowerOf2_32(Shuf.getType()->getVectorNumElements()) ||
1860 !isPowerOf2_32(Shuffle0->getType()->getVectorNumElements()) ||
1861 !isPowerOf2_32(X->getType()->getVectorNumElements()) ||
1862 isa<UndefValue>(X) || isa<UndefValue>(Y))
1863 return nullptr;
1864 assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&
1865 isa<UndefValue>(Shuffle1->getOperand(1)) &&
1866 "Unexpected operand for identity shuffle");
1867
1868 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
1869 // operands directly by adjusting the shuffle mask to account for the narrower
1870 // types:
1871 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
1872 int NarrowElts = X->getType()->getVectorNumElements();
1873 int WideElts = Shuffle0->getType()->getVectorNumElements();
1874 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
1875
1876 Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext());
1877 SmallVector<int, 16> Mask = Shuf.getShuffleMask();
1878 SmallVector<Constant *, 16> NewMask(Mask.size(), UndefValue::get(I32Ty));
1879 for (int i = 0, e = Mask.size(); i != e; ++i) {
1880 if (Mask[i] == -1)
1881 continue;
Sanjay Patel3249be12019-05-23 18:46:03 +00001882
1883 // If this shuffle is choosing an undef element from 1 of the sources, that
1884 // element is undef.
1885 if (Mask[i] < WideElts) {
1886 if (Shuffle0->getMaskValue(Mask[i]) == -1)
1887 continue;
1888 } else {
1889 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
1890 continue;
1891 }
1892
1893 // If this shuffle is choosing from the 1st narrow op, the mask element is
1894 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
1895 // element is offset down to adjust for the narrow vector widths.
1896 if (Mask[i] < WideElts) {
1897 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
Sanjay Patel6a554182019-05-22 00:32:25 +00001898 NewMask[i] = ConstantInt::get(I32Ty, Mask[i]);
Sanjay Patel3249be12019-05-23 18:46:03 +00001899 } else {
1900 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
Sanjay Patel6a554182019-05-22 00:32:25 +00001901 NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts));
Sanjay Patel3249be12019-05-23 18:46:03 +00001902 }
Sanjay Patel6a554182019-05-22 00:32:25 +00001903 }
1904 return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
1905}
1906
Chris Lattnerec97a902010-01-05 05:36:20 +00001907Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
1908 Value *LHS = SVI.getOperand(0);
1909 Value *RHS = SVI.getOperand(1);
Craig Toppera4205622017-06-09 03:21:29 +00001910 if (auto *V = SimplifyShuffleVectorInst(
1911 LHS, RHS, SVI.getMask(), SVI.getType(), SQ.getWithInstruction(&SVI)))
Zvi Rackover82bf48d2017-04-04 04:47:57 +00001912 return replaceInstUsesWith(SVI, V);
1913
Sanjay Patel35827162019-11-25 13:30:45 -05001914 // shuffle x, x, mask --> shuffle x, undef, mask'
Sanjay Patel3f4d1b42019-03-29 16:49:38 +00001915 unsigned VWidth = SVI.getType()->getVectorNumElements();
1916 unsigned LHSWidth = LHS->getType()->getVectorNumElements();
1917 SmallVector<int, 16> Mask = SVI.getShuffleMask();
1918 Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
Sanjay Patel35827162019-11-25 13:30:45 -05001919 if (LHS == RHS) {
Sanjay Patel847aabf2019-11-25 10:54:18 -05001920 assert(!isa<UndefValue>(RHS) && "Shuffle with 2 undef ops not simplified?");
Chris Lattnerec97a902010-01-05 05:36:20 +00001921 // Remap any references to RHS to use LHS.
Chris Lattner0256be92012-01-27 03:08:05 +00001922 SmallVector<Constant*, 16> Elts;
Sanjay Patel20684092019-11-25 10:40:21 -05001923 for (unsigned i = 0; i != VWidth; ++i) {
Sanjay Patel35827162019-11-25 13:30:45 -05001924 // Propagate undef elements or force mask to LHS.
1925 if (Mask[i] < 0)
JF Bastiend52c9902015-02-25 22:30:51 +00001926 Elts.push_back(UndefValue::get(Int32Ty));
Sanjay Patelfc31b582019-11-25 11:11:12 -05001927 else
1928 Elts.push_back(ConstantInt::get(Int32Ty, Mask[i] % LHSWidth));
Chris Lattnerec97a902010-01-05 05:36:20 +00001929 }
Nikita Popovd4627b92020-02-08 17:02:10 +01001930 return new ShuffleVectorInst(LHS, UndefValue::get(RHS->getType()),
1931 ConstantVector::get(Elts));
Chris Lattnerec97a902010-01-05 05:36:20 +00001932 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +00001933
Sanjay Patel35827162019-11-25 13:30:45 -05001934 // shuffle undef, x, mask --> shuffle x, undef, mask'
1935 if (isa<UndefValue>(LHS)) {
1936 SVI.commute();
1937 return &SVI;
1938 }
1939
Sanjay Patel0b591032019-07-08 16:26:48 +00001940 if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))
1941 return I;
1942
Sanjay Patel3f4d1b42019-03-29 16:49:38 +00001943 if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
1944 return I;
1945
1946 if (Instruction *I = narrowVectorSelect(SVI, Builder))
1947 return I;
1948
1949 APInt UndefElts(VWidth, 0);
1950 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
1951 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
1952 if (V != &SVI)
1953 return replaceInstUsesWith(SVI, V);
1954 return &SVI;
1955 }
1956
1957 if (Instruction *I = foldIdentityExtractShuffle(SVI))
1958 return I;
1959
Sanjay Patel6a554182019-05-22 00:32:25 +00001960 // These transforms have the potential to lose undef knowledge, so they are
Sanjay Patel3f4d1b42019-03-29 16:49:38 +00001961 // intentionally placed after SimplifyDemandedVectorElts().
Nikita Popov5a8819b2020-02-03 21:17:36 +01001962 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
Sanjay Patel3f4d1b42019-03-29 16:49:38 +00001963 return I;
Sanjay Patel6a554182019-05-22 00:32:25 +00001964 if (Instruction *I = foldIdentityPaddedShuffles(SVI))
1965 return I;
Sanjay Patel3f4d1b42019-03-29 16:49:38 +00001966
Sanjay Patel26c119a2018-09-30 13:50:42 +00001967 if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) {
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001968 Value *V = evaluateInDifferentElementOrder(LHS, Mask);
Sanjay Patel4b198802016-02-01 22:23:39 +00001969 return replaceInstUsesWith(SVI, V);
Nick Lewyckya2b77202013-05-31 00:59:42 +00001970 }
1971
JF Bastiend52c9902015-02-25 22:30:51 +00001972 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
1973 // a non-vector type. We can instead bitcast the original vector followed by
1974 // an extract of the desired element:
1975 //
1976 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
1977 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1978 // %1 = bitcast <4 x i8> %sroa to i32
1979 // Becomes:
1980 // %bc = bitcast <16 x i8> %in to <4 x i32>
1981 // %ext = extractelement <4 x i32> %bc, i32 0
1982 //
1983 // If the shuffle is extracting a contiguous range of values from the input
1984 // vector then each use which is a bitcast of the extracted size can be
1985 // replaced. This will work if the vector types are compatible, and the begin
1986 // index is aligned to a value in the casted vector type. If the begin index
1987 // isn't aligned then we can shuffle the original vector (keeping the same
1988 // vector type) before extracting.
1989 //
1990 // This code will bail out if the target type is fundamentally incompatible
1991 // with vectors of the source type.
1992 //
1993 // Example of <16 x i8>, target type i32:
1994 // Index range [4,8): v-----------v Will work.
1995 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1996 // <16 x i8>: | | | | | | | | | | | | | | | | |
1997 // <4 x i32>: | | | | |
1998 // +-----------+-----------+-----------+-----------+
1999 // Index range [6,10): ^-----------^ Needs an extra shuffle.
2000 // Target type i40: ^--------------^ Won't work, bail.
Sanjay Patel3f4d1b42019-03-29 16:49:38 +00002001 bool MadeChange = false;
JF Bastiend52c9902015-02-25 22:30:51 +00002002 if (isShuffleExtractingFromLHS(SVI, Mask)) {
2003 Value *V = LHS;
2004 unsigned MaskElems = Mask.size();
JF Bastiend52c9902015-02-25 22:30:51 +00002005 VectorType *SrcTy = cast<VectorType>(V->getType());
2006 unsigned VecBitWidth = SrcTy->getBitWidth();
David Majnemer98cfe2b2015-04-03 20:18:40 +00002007 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
JF Bastiend52c9902015-02-25 22:30:51 +00002008 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
2009 unsigned SrcNumElems = SrcTy->getNumElements();
2010 SmallVector<BitCastInst *, 8> BCs;
2011 DenseMap<Type *, Value *> NewBCs;
2012 for (User *U : SVI.users())
2013 if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
2014 if (!BC->use_empty())
2015 // Only visit bitcasts that weren't previously handled.
2016 BCs.push_back(BC);
2017 for (BitCastInst *BC : BCs) {
Eugene Leviant958fcd72017-02-17 07:36:03 +00002018 unsigned BegIdx = Mask.front();
JF Bastiend52c9902015-02-25 22:30:51 +00002019 Type *TgtTy = BC->getDestTy();
David Majnemer98cfe2b2015-04-03 20:18:40 +00002020 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
JF Bastiend52c9902015-02-25 22:30:51 +00002021 if (!TgtElemBitWidth)
2022 continue;
2023 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2024 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2025 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2026 if (!VecBitWidthsEqual)
2027 continue;
2028 if (!VectorType::isValidElementType(TgtTy))
2029 continue;
2030 VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
2031 if (!BegIsAligned) {
2032 // Shuffle the input so [0,NumElements) contains the output, and
2033 // [NumElems,SrcNumElems) is undef.
2034 SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
2035 UndefValue::get(Int32Ty));
2036 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2037 ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
Craig Topperbb4069e2017-07-07 23:16:26 +00002038 V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
2039 ConstantVector::get(ShuffleMask),
2040 SVI.getName() + ".extract");
JF Bastiend52c9902015-02-25 22:30:51 +00002041 BegIdx = 0;
2042 }
2043 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2044 assert(SrcElemsPerTgtElem);
2045 BegIdx /= SrcElemsPerTgtElem;
2046 bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
2047 auto *NewBC =
2048 BCAlreadyExists
2049 ? NewBCs[CastSrcTy]
Craig Topperbb4069e2017-07-07 23:16:26 +00002050 : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
JF Bastiend52c9902015-02-25 22:30:51 +00002051 if (!BCAlreadyExists)
2052 NewBCs[CastSrcTy] = NewBC;
Craig Topperbb4069e2017-07-07 23:16:26 +00002053 auto *Ext = Builder.CreateExtractElement(
JF Bastiend52c9902015-02-25 22:30:51 +00002054 NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
2055 // The shufflevector isn't being replaced: the bitcast that used it
2056 // is. InstCombine will visit the newly-created instructions.
Sanjay Patel4b198802016-02-01 22:23:39 +00002057 replaceInstUsesWith(*BC, Ext);
JF Bastiend52c9902015-02-25 22:30:51 +00002058 MadeChange = true;
2059 }
2060 }
2061
Eric Christopher51edc7b2010-08-17 22:55:27 +00002062 // If the LHS is a shufflevector itself, see if we can combine it with this
Eli Friedmance818272011-10-21 19:06:29 +00002063 // one without producing an unusual shuffle.
2064 // Cases that might be simplified:
2065 // 1.
2066 // x1=shuffle(v1,v2,mask1)
2067 // x=shuffle(x1,undef,mask)
2068 // ==>
2069 // x=shuffle(v1,undef,newMask)
2070 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
2071 // 2.
2072 // x1=shuffle(v1,undef,mask1)
2073 // x=shuffle(x1,x2,mask)
2074 // where v1.size() == mask1.size()
2075 // ==>
2076 // x=shuffle(v1,x2,newMask)
2077 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
2078 // 3.
2079 // x2=shuffle(v2,undef,mask2)
2080 // x=shuffle(x1,x2,mask)
2081 // where v2.size() == mask2.size()
2082 // ==>
2083 // x=shuffle(x1,v2,newMask)
2084 // newMask[i] = (mask[i] < x1.size())
2085 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
2086 // 4.
2087 // x1=shuffle(v1,undef,mask1)
2088 // x2=shuffle(v2,undef,mask2)
2089 // x=shuffle(x1,x2,mask)
2090 // where v1.size() == v2.size()
2091 // ==>
2092 // x=shuffle(v1,v2,newMask)
2093 // newMask[i] = (mask[i] < x1.size())
2094 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
2095 //
2096 // Here we are really conservative:
Eric Christopher51edc7b2010-08-17 22:55:27 +00002097 // we are absolutely afraid of producing a shuffle mask not in the input
2098 // program, because the code gen may not be smart enough to turn a merged
2099 // shuffle into two specific shuffles: it may produce worse code. As such,
Jim Grosbachd11584a2013-05-01 00:25:27 +00002100 // we only merge two shuffles if the result is either a splat or one of the
2101 // input shuffle masks. In this case, merging the shuffles just removes
2102 // one instruction, which we know is safe. This is good for things like
Eli Friedmance818272011-10-21 19:06:29 +00002103 // turning: (splat(splat)) -> splat, or
2104 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
2105 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
2106 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
2107 if (LHSShuffle)
2108 if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
Craig Topperf40110f2014-04-25 05:29:35 +00002109 LHSShuffle = nullptr;
Eli Friedmance818272011-10-21 19:06:29 +00002110 if (RHSShuffle)
2111 if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
Craig Topperf40110f2014-04-25 05:29:35 +00002112 RHSShuffle = nullptr;
Eli Friedmance818272011-10-21 19:06:29 +00002113 if (!LHSShuffle && !RHSShuffle)
Craig Topperf40110f2014-04-25 05:29:35 +00002114 return MadeChange ? &SVI : nullptr;
Eli Friedmance818272011-10-21 19:06:29 +00002115
Craig Topperf40110f2014-04-25 05:29:35 +00002116 Value* LHSOp0 = nullptr;
2117 Value* LHSOp1 = nullptr;
2118 Value* RHSOp0 = nullptr;
Eli Friedmance818272011-10-21 19:06:29 +00002119 unsigned LHSOp0Width = 0;
2120 unsigned RHSOp0Width = 0;
2121 if (LHSShuffle) {
2122 LHSOp0 = LHSShuffle->getOperand(0);
2123 LHSOp1 = LHSShuffle->getOperand(1);
Craig Topper17b55682016-12-29 07:03:18 +00002124 LHSOp0Width = LHSOp0->getType()->getVectorNumElements();
Eli Friedmance818272011-10-21 19:06:29 +00002125 }
2126 if (RHSShuffle) {
2127 RHSOp0 = RHSShuffle->getOperand(0);
Craig Topper17b55682016-12-29 07:03:18 +00002128 RHSOp0Width = RHSOp0->getType()->getVectorNumElements();
Eli Friedmance818272011-10-21 19:06:29 +00002129 }
2130 Value* newLHS = LHS;
2131 Value* newRHS = RHS;
2132 if (LHSShuffle) {
2133 // case 1
Eric Christopher51edc7b2010-08-17 22:55:27 +00002134 if (isa<UndefValue>(RHS)) {
Eli Friedmance818272011-10-21 19:06:29 +00002135 newLHS = LHSOp0;
2136 newRHS = LHSOp1;
2137 }
2138 // case 2 or 4
2139 else if (LHSOp0Width == LHSWidth) {
2140 newLHS = LHSOp0;
2141 }
2142 }
2143 // case 3 or 4
2144 if (RHSShuffle && RHSOp0Width == LHSWidth) {
2145 newRHS = RHSOp0;
2146 }
2147 // case 4
2148 if (LHSOp0 == RHSOp0) {
2149 newLHS = LHSOp0;
Craig Topperf40110f2014-04-25 05:29:35 +00002150 newRHS = nullptr;
Eli Friedmance818272011-10-21 19:06:29 +00002151 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +00002152
Eli Friedmance818272011-10-21 19:06:29 +00002153 if (newLHS == LHS && newRHS == RHS)
Craig Topperf40110f2014-04-25 05:29:35 +00002154 return MadeChange ? &SVI : nullptr;
Bob Wilson8ecf98b2010-10-29 22:20:43 +00002155
Eli Friedmance818272011-10-21 19:06:29 +00002156 SmallVector<int, 16> LHSMask;
2157 SmallVector<int, 16> RHSMask;
Chris Lattner8326bd82012-01-26 00:42:34 +00002158 if (newLHS != LHS)
2159 LHSMask = LHSShuffle->getShuffleMask();
2160 if (RHSShuffle && newRHS != RHS)
2161 RHSMask = RHSShuffle->getShuffleMask();
2162
Eli Friedmance818272011-10-21 19:06:29 +00002163 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
2164 SmallVector<int, 16> newMask;
2165 bool isSplat = true;
2166 int SplatElt = -1;
2167 // Create a new mask for the new ShuffleVectorInst so that the new
2168 // ShuffleVectorInst is equivalent to the original one.
2169 for (unsigned i = 0; i < VWidth; ++i) {
2170 int eltMask;
Craig Topper45d9f4b2013-01-18 05:30:07 +00002171 if (Mask[i] < 0) {
Eli Friedmance818272011-10-21 19:06:29 +00002172 // This element is an undef value.
2173 eltMask = -1;
2174 } else if (Mask[i] < (int)LHSWidth) {
2175 // This element is from left hand side vector operand.
Craig Topper2ea22b02013-01-18 05:09:16 +00002176 //
Eli Friedmance818272011-10-21 19:06:29 +00002177 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
2178 // new mask value for the element.
2179 if (newLHS != LHS) {
2180 eltMask = LHSMask[Mask[i]];
2181 // If the value selected is an undef value, explicitly specify it
2182 // with a -1 mask value.
2183 if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
2184 eltMask = -1;
Craig Topper2ea22b02013-01-18 05:09:16 +00002185 } else
Eli Friedmance818272011-10-21 19:06:29 +00002186 eltMask = Mask[i];
2187 } else {
2188 // This element is from right hand side vector operand
2189 //
2190 // If the value selected is an undef value, explicitly specify it
2191 // with a -1 mask value. (case 1)
2192 if (isa<UndefValue>(RHS))
2193 eltMask = -1;
2194 // If RHS is going to be replaced (case 3 or 4), calculate the
2195 // new mask value for the element.
2196 else if (newRHS != RHS) {
2197 eltMask = RHSMask[Mask[i]-LHSWidth];
2198 // If the value selected is an undef value, explicitly specify it
2199 // with a -1 mask value.
2200 if (eltMask >= (int)RHSOp0Width) {
2201 assert(isa<UndefValue>(RHSShuffle->getOperand(1))
2202 && "should have been check above");
2203 eltMask = -1;
Nate Begeman2a0ca3e92010-08-13 00:17:53 +00002204 }
Craig Topper2ea22b02013-01-18 05:09:16 +00002205 } else
Eli Friedmance818272011-10-21 19:06:29 +00002206 eltMask = Mask[i]-LHSWidth;
2207
2208 // If LHS's width is changed, shift the mask value accordingly.
Eugene Zelenko7f0f9bc2017-10-24 21:24:53 +00002209 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
Michael Gottesman02a11412012-10-16 21:29:38 +00002210 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
2211 // If newRHS == newLHS, we want to remap any references from newRHS to
2212 // newLHS so that we can properly identify splats that may occur due to
Alp Tokercb402912014-01-24 17:20:08 +00002213 // obfuscation across the two vectors.
Craig Topperf40110f2014-04-25 05:29:35 +00002214 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
Eli Friedmance818272011-10-21 19:06:29 +00002215 eltMask += newLHSWidth;
Nate Begeman2a0ca3e92010-08-13 00:17:53 +00002216 }
Eli Friedmance818272011-10-21 19:06:29 +00002217
2218 // Check if this could still be a splat.
2219 if (eltMask >= 0) {
2220 if (SplatElt >= 0 && SplatElt != eltMask)
2221 isSplat = false;
2222 SplatElt = eltMask;
2223 }
2224
2225 newMask.push_back(eltMask);
2226 }
2227
2228 // If the result mask is equal to one of the original shuffle masks,
Jim Grosbachd11584a2013-05-01 00:25:27 +00002229 // or is a splat, do the replacement.
2230 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
Eli Friedmance818272011-10-21 19:06:29 +00002231 SmallVector<Constant*, 16> Elts;
Eli Friedmance818272011-10-21 19:06:29 +00002232 for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
2233 if (newMask[i] < 0) {
2234 Elts.push_back(UndefValue::get(Int32Ty));
2235 } else {
2236 Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
2237 }
2238 }
Craig Topperf40110f2014-04-25 05:29:35 +00002239 if (!newRHS)
Eli Friedmance818272011-10-21 19:06:29 +00002240 newRHS = UndefValue::get(newLHS->getType());
2241 return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
Nate Begeman2a0ca3e92010-08-13 00:17:53 +00002242 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +00002243
Craig Topperf40110f2014-04-25 05:29:35 +00002244 return MadeChange ? &SVI : nullptr;
Chris Lattnerec97a902010-01-05 05:36:20 +00002245}