blob: f052f8e0100abb1cce0ec00365615313367fbb73 [file] [log] [blame]
Chris Lattnerec97a902010-01-05 05:36:20 +00001//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
Chandler Carruth2946cd72019-01-19 08:50:56 +00003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Chris Lattnerec97a902010-01-05 05:36:20 +00006//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
Chandler Carrutha9174582015-01-22 05:25:13 +000014#include "InstCombineInternal.h"
Eugene Zelenko7f0f9bc2017-10-24 21:24:53 +000015#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
JF Bastiend52c9902015-02-25 22:30:51 +000017#include "llvm/ADT/DenseMap.h"
Eugene Zelenko7f0f9bc2017-10-24 21:24:53 +000018#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/SmallVector.h"
David Majnemer599ca442015-07-13 01:15:53 +000020#include "llvm/Analysis/InstructionSimplify.h"
21#include "llvm/Analysis/VectorUtils.h"
Eugene Zelenko7f0f9bc2017-10-24 21:24:53 +000022#include "llvm/IR/BasicBlock.h"
23#include "llvm/IR/Constant.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DerivedTypes.h"
26#include "llvm/IR/InstrTypes.h"
27#include "llvm/IR/Instruction.h"
28#include "llvm/IR/Instructions.h"
29#include "llvm/IR/Operator.h"
Chandler Carruth820a9082014-03-04 11:08:18 +000030#include "llvm/IR/PatternMatch.h"
Eugene Zelenko7f0f9bc2017-10-24 21:24:53 +000031#include "llvm/IR/Type.h"
32#include "llvm/IR/User.h"
33#include "llvm/IR/Value.h"
34#include "llvm/Support/Casting.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
37#include <cassert>
38#include <cstdint>
39#include <iterator>
40#include <utility>
41
Chris Lattnerec97a902010-01-05 05:36:20 +000042using namespace llvm;
Nadav Rotem7df85092013-01-15 23:43:14 +000043using namespace PatternMatch;
Chris Lattnerec97a902010-01-05 05:36:20 +000044
Chandler Carruth964daaa2014-04-22 02:55:47 +000045#define DEBUG_TYPE "instcombine"
46
Sanjay Patel6eccf482015-09-09 15:24:36 +000047/// Return true if the value is cheaper to scalarize than it is to leave as a
Sanjay Patele51d5bd2018-12-18 19:07:38 +000048/// vector operation. IsConstantExtractIndex indicates whether we are extracting
49/// one known element from a vector constant.
50///
51/// FIXME: It's possible to create more instructions than previously existed.
52static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
53 // If we can pick a scalar constant value out of a vector, that is free.
54 if (auto *C = dyn_cast<Constant>(V))
55 return IsConstantExtractIndex || C->getSplatValue();
Chris Lattner8326bd82012-01-26 00:42:34 +000056
Sanjay Patele51d5bd2018-12-18 19:07:38 +000057 // An insertelement to the same constant index as our extract will simplify
58 // to the scalar inserted element. An insertelement to a different constant
59 // index is irrelevant to our extract.
60 if (match(V, m_InsertElement(m_Value(), m_Value(), m_ConstantInt())))
61 return IsConstantExtractIndex;
Bob Wilson8ecf98b2010-10-29 22:20:43 +000062
Sanjay Patele51d5bd2018-12-18 19:07:38 +000063 if (match(V, m_OneUse(m_Load(m_Value()))))
Chris Lattnerec97a902010-01-05 05:36:20 +000064 return true;
Sanjay Patele51d5bd2018-12-18 19:07:38 +000065
Simon Molld871ef42020-03-10 16:05:31 +010066 if (match(V, m_OneUse(m_UnOp())))
67 return true;
68
Sanjay Patele51d5bd2018-12-18 19:07:38 +000069 Value *V0, *V1;
70 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
71 if (cheapToScalarize(V0, IsConstantExtractIndex) ||
72 cheapToScalarize(V1, IsConstantExtractIndex))
Chris Lattnerec97a902010-01-05 05:36:20 +000073 return true;
Sanjay Patele51d5bd2018-12-18 19:07:38 +000074
75 CmpInst::Predicate UnusedPred;
76 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
77 if (cheapToScalarize(V0, IsConstantExtractIndex) ||
78 cheapToScalarize(V1, IsConstantExtractIndex))
Chris Lattnerec97a902010-01-05 05:36:20 +000079 return true;
Bob Wilson8ecf98b2010-10-29 22:20:43 +000080
Chris Lattnerec97a902010-01-05 05:36:20 +000081 return false;
82}
83
Michael Kupersteina0c6ae02016-06-06 23:38:33 +000084// If we have a PHI node with a vector type that is only used to feed
Matt Arsenault38874732013-08-28 22:17:26 +000085// itself and be an operand of extractelement at a constant location,
86// try to replace the PHI of the vector type with a PHI of a scalar type.
Anat Shemer0c95efa2013-04-18 19:35:39 +000087Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
Michael Kupersteina0c6ae02016-06-06 23:38:33 +000088 SmallVector<Instruction *, 2> Extracts;
89 // The users we want the PHI to have are:
90 // 1) The EI ExtractElement (we already know this)
91 // 2) Possibly more ExtractElements with the same index.
92 // 3) Another operand, which will feed back into the PHI.
93 Instruction *PHIUser = nullptr;
94 for (auto U : PN->users()) {
95 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
96 if (EI.getIndexOperand() == EU->getIndexOperand())
97 Extracts.push_back(EU);
98 else
99 return nullptr;
100 } else if (!PHIUser) {
101 PHIUser = cast<Instruction>(U);
102 } else {
103 return nullptr;
104 }
105 }
Anat Shemer0c95efa2013-04-18 19:35:39 +0000106
Michael Kupersteina0c6ae02016-06-06 23:38:33 +0000107 if (!PHIUser)
108 return nullptr;
Anat Shemer0c95efa2013-04-18 19:35:39 +0000109
110 // Verify that this PHI user has one use, which is the PHI itself,
111 // and that it is a binary operation which is cheap to scalarize.
Eugene Zelenko7f0f9bc2017-10-24 21:24:53 +0000112 // otherwise return nullptr.
Chandler Carruthcdf47882014-03-09 03:16:01 +0000113 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
Sanjay Patel431e1142015-11-17 17:24:08 +0000114 !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
Craig Topperf40110f2014-04-25 05:29:35 +0000115 return nullptr;
Anat Shemer0c95efa2013-04-18 19:35:39 +0000116
117 // Create a scalar PHI node that will replace the vector PHI node
118 // just before the current PHI node.
Joey Goulyb34294d2013-05-24 12:33:28 +0000119 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
120 PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
Anat Shemer0c95efa2013-04-18 19:35:39 +0000121 // Scalarize each PHI operand.
Joey Goulyb34294d2013-05-24 12:33:28 +0000122 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
Anat Shemer0c95efa2013-04-18 19:35:39 +0000123 Value *PHIInVal = PN->getIncomingValue(i);
124 BasicBlock *inBB = PN->getIncomingBlock(i);
125 Value *Elt = EI.getIndexOperand();
126 // If the operand is the PHI induction variable:
127 if (PHIInVal == PHIUser) {
128 // Scalarize the binary operation. Its first operand is the
Sanjay Patel70af1fd2014-07-07 22:13:58 +0000129 // scalar PHI, and the second operand is extracted from the other
Anat Shemer0c95efa2013-04-18 19:35:39 +0000130 // vector operand.
131 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
Joey Goulyb34294d2013-05-24 12:33:28 +0000132 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
Joey Gouly83699282013-05-24 12:29:54 +0000133 Value *Op = InsertNewInstWith(
134 ExtractElementInst::Create(B0->getOperand(opId), Elt,
135 B0->getOperand(opId)->getName() + ".Elt"),
136 *B0);
Anat Shemer0c95efa2013-04-18 19:35:39 +0000137 Value *newPHIUser = InsertNewInstWith(
Owen Anderson7ea02fc2016-03-01 19:35:52 +0000138 BinaryOperator::CreateWithCopiedFlags(B0->getOpcode(),
139 scalarPHI, Op, B0), *B0);
Anat Shemer0c95efa2013-04-18 19:35:39 +0000140 scalarPHI->addIncoming(newPHIUser, inBB);
141 } else {
142 // Scalarize PHI input:
Joey Goulyb34294d2013-05-24 12:33:28 +0000143 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
Anat Shemer0c95efa2013-04-18 19:35:39 +0000144 // Insert the new instruction into the predecessor basic block.
145 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
146 BasicBlock::iterator InsertPos;
147 if (pos && !isa<PHINode>(pos)) {
Duncan P. N. Exon Smith9f8aaf22015-10-13 16:59:33 +0000148 InsertPos = ++pos->getIterator();
Anat Shemer0c95efa2013-04-18 19:35:39 +0000149 } else {
150 InsertPos = inBB->getFirstInsertionPt();
151 }
152
153 InsertNewInstWith(newEI, *InsertPos);
154
155 scalarPHI->addIncoming(newEI, inBB);
156 }
157 }
Michael Kupersteina0c6ae02016-06-06 23:38:33 +0000158
159 for (auto E : Extracts)
160 replaceInstUsesWith(*E, scalarPHI);
161
162 return &EI;
Anat Shemer0c95efa2013-04-18 19:35:39 +0000163}
164
Sanjay Patel4674c772018-09-24 20:41:22 +0000165static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
Sanjay Patel31b07192018-10-01 14:40:00 +0000166 InstCombiner::BuilderTy &Builder,
167 bool IsBigEndian) {
Sanjay Patel4674c772018-09-24 20:41:22 +0000168 Value *X;
169 uint64_t ExtIndexC;
170 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
171 !X->getType()->isVectorTy() ||
172 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
173 return nullptr;
174
175 // If this extractelement is using a bitcast from a vector of the same number
176 // of elements, see if we can find the source element from the source vector:
177 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
Christopher Tetreault155740c2020-04-08 10:42:22 -0700178 auto *SrcTy = cast<VectorType>(X->getType());
Sanjay Patel4674c772018-09-24 20:41:22 +0000179 Type *DestTy = Ext.getType();
Christopher Tetreault155740c2020-04-08 10:42:22 -0700180 unsigned NumSrcElts = SrcTy->getNumElements();
Sanjay Patel4674c772018-09-24 20:41:22 +0000181 unsigned NumElts = Ext.getVectorOperandType()->getNumElements();
182 if (NumSrcElts == NumElts)
183 if (Value *Elt = findScalarElement(X, ExtIndexC))
184 return new BitCastInst(Elt, DestTy);
185
Sanjay Patel31b07192018-10-01 14:40:00 +0000186 // If the source elements are wider than the destination, try to shift and
187 // truncate a subset of scalar bits of an insert op.
Sanjay Patel3746e112018-10-04 16:25:05 +0000188 if (NumSrcElts < NumElts) {
Sanjay Patel31b07192018-10-01 14:40:00 +0000189 Value *Scalar;
190 uint64_t InsIndexC;
191 if (!match(X, m_InsertElement(m_Value(), m_Value(Scalar),
192 m_ConstantInt(InsIndexC))))
193 return nullptr;
194
195 // The extract must be from the subset of vector elements that we inserted
196 // into. Example: if we inserted element 1 of a <2 x i64> and we are
197 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
198 // of elements 4-7 of the bitcasted vector.
199 unsigned NarrowingRatio = NumElts / NumSrcElts;
200 if (ExtIndexC / NarrowingRatio != InsIndexC)
201 return nullptr;
202
203 // We are extracting part of the original scalar. How that scalar is
204 // inserted into the vector depends on the endian-ness. Example:
205 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
206 // +--+--+--+--+--+--+--+--+
207 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
208 // extelt <4 x i16> V', 3: | |S2|S3|
209 // +--+--+--+--+--+--+--+--+
210 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
211 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
212 // In this example, we must right-shift little-endian. Big-endian is just a
213 // truncate.
214 unsigned Chunk = ExtIndexC % NarrowingRatio;
215 if (IsBigEndian)
216 Chunk = NarrowingRatio - 1 - Chunk;
Sanjay Patel3746e112018-10-04 16:25:05 +0000217
218 // Bail out if this is an FP vector to FP vector sequence. That would take
219 // more instructions than we started with unless there is no shift, and it
220 // may not be handled as well in the backend.
221 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
222 bool NeedDestBitcast = DestTy->isFloatingPointTy();
223 if (NeedSrcBitcast && NeedDestBitcast)
224 return nullptr;
225
226 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
227 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
228 unsigned ShAmt = Chunk * DestWidth;
229
230 // TODO: This limitation is more strict than necessary. We could sum the
231 // number of new instructions and subtract the number eliminated to know if
232 // we can proceed.
233 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
234 if (NeedSrcBitcast || NeedDestBitcast)
235 return nullptr;
236
237 if (NeedSrcBitcast) {
238 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
239 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
240 }
241
Sanjay Patel31b07192018-10-01 14:40:00 +0000242 if (ShAmt) {
243 // Bail out if we could end with more instructions than we started with.
244 if (!Ext.getVectorOperand()->hasOneUse())
245 return nullptr;
246 Scalar = Builder.CreateLShr(Scalar, ShAmt);
247 }
Sanjay Patel3746e112018-10-04 16:25:05 +0000248
249 if (NeedDestBitcast) {
250 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
251 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
252 }
Sanjay Patel31b07192018-10-01 14:40:00 +0000253 return new TruncInst(Scalar, DestTy);
254 }
255
Sanjay Patel4674c772018-09-24 20:41:22 +0000256 return nullptr;
257}
258
Piotr Sobczaka861c9a2019-10-21 08:12:47 +0000259/// Find elements of V demanded by UserInstr.
260static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
Christopher Tetreault155740c2020-04-08 10:42:22 -0700261 unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
Piotr Sobczaka861c9a2019-10-21 08:12:47 +0000262
263 // Conservatively assume that all elements are needed.
264 APInt UsedElts(APInt::getAllOnesValue(VWidth));
265
266 switch (UserInstr->getOpcode()) {
267 case Instruction::ExtractElement: {
268 ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
269 assert(EEI->getVectorOperand() == V);
270 ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
271 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
272 UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
273 }
274 break;
275 }
276 case Instruction::ShuffleVector: {
277 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
Christopher Tetreault155740c2020-04-08 10:42:22 -0700278 unsigned MaskNumElts =
279 cast<VectorType>(UserInstr->getType())->getNumElements();
Piotr Sobczaka861c9a2019-10-21 08:12:47 +0000280
281 UsedElts = APInt(VWidth, 0);
282 for (unsigned i = 0; i < MaskNumElts; i++) {
283 unsigned MaskVal = Shuffle->getMaskValue(i);
284 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
285 continue;
286 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
287 UsedElts.setBit(MaskVal);
288 if (Shuffle->getOperand(1) == V &&
289 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
290 UsedElts.setBit(MaskVal - VWidth);
291 }
292 break;
293 }
294 default:
295 break;
296 }
297 return UsedElts;
298}
299
300/// Find union of elements of V demanded by all its users.
301/// If it is known by querying findDemandedEltsBySingleUser that
302/// no user demands an element of V, then the corresponding bit
303/// remains unset in the returned value.
304static APInt findDemandedEltsByAllUsers(Value *V) {
Christopher Tetreault155740c2020-04-08 10:42:22 -0700305 unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
Piotr Sobczaka861c9a2019-10-21 08:12:47 +0000306
307 APInt UnionUsedElts(VWidth, 0);
308 for (const Use &U : V->uses()) {
309 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
310 UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
311 } else {
312 UnionUsedElts = APInt::getAllOnesValue(VWidth);
313 break;
314 }
315
316 if (UnionUsedElts.isAllOnesValue())
317 break;
318 }
319
320 return UnionUsedElts;
321}
322
Chris Lattnerec97a902010-01-05 05:36:20 +0000323Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000324 Value *SrcVec = EI.getVectorOperand();
325 Value *Index = EI.getIndexOperand();
326 if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
Craig Toppera4205622017-06-09 03:21:29 +0000327 SQ.getWithInstruction(&EI)))
Sanjay Patel4b198802016-02-01 22:23:39 +0000328 return replaceInstUsesWith(EI, V);
David Majnemer599ca442015-07-13 01:15:53 +0000329
Chris Lattnerec97a902010-01-05 05:36:20 +0000330 // If extracting a specified index from the vector, see if we can recursively
331 // find a previously computed scalar that was inserted into the vector.
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000332 auto *IndexC = dyn_cast<ConstantInt>(Index);
333 if (IndexC) {
Sanjay Patel7a526262018-09-24 16:39:03 +0000334 unsigned NumElts = EI.getVectorOperandType()->getNumElements();
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000335
Simon Pilgrime7d032f2017-12-27 12:00:18 +0000336 // InstSimplify should handle cases where the index is invalid.
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000337 if (!IndexC->getValue().ule(NumElts))
Simon Pilgrime7d032f2017-12-27 12:00:18 +0000338 return nullptr;
339
Chris Lattnerec97a902010-01-05 05:36:20 +0000340 // This instruction only demands the single element from the input vector.
Piotr Sobczaka861c9a2019-10-21 08:12:47 +0000341 if (NumElts != 1) {
342 // If the input vector has a single use, simplify it based on this use
343 // property.
344 if (SrcVec->hasOneUse()) {
345 APInt UndefElts(NumElts, 0);
346 APInt DemandedElts(NumElts, 0);
347 DemandedElts.setBit(IndexC->getZExtValue());
348 if (Value *V =
Nikita Popov53d20902020-03-29 20:07:46 +0200349 SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts))
350 return replaceOperand(EI, 0, V);
Piotr Sobczaka861c9a2019-10-21 08:12:47 +0000351 } else {
352 // If the input vector has multiple uses, simplify it based on a union
353 // of all elements used.
354 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
355 if (!DemandedElts.isAllOnesValue()) {
356 APInt UndefElts(NumElts, 0);
357 if (Value *V = SimplifyDemandedVectorElts(
358 SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
359 true /* AllowMultipleUsers */)) {
360 if (V != SrcVec) {
361 SrcVec->replaceAllUsesWith(V);
362 return &EI;
363 }
364 }
365 }
Chris Lattnerec97a902010-01-05 05:36:20 +0000366 }
367 }
Sanjay Patel31b07192018-10-01 14:40:00 +0000368 if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
Sanjay Patel4674c772018-09-24 20:41:22 +0000369 return I;
Anat Shemer0c95efa2013-04-18 19:35:39 +0000370
371 // If there's a vector PHI feeding a scalar use through this extractelement
372 // instruction, try to scalarize the PHI.
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000373 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
374 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
375 return ScalarPHI;
Chris Lattnerec97a902010-01-05 05:36:20 +0000376 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000377
Simon Molld871ef42020-03-10 16:05:31 +0100378 // TODO come up with a n-ary matcher that subsumes both unary and
379 // binary matchers.
380 UnaryOperator *UO;
381 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, IndexC)) {
382 // extelt (unop X), Index --> unop (extelt X, Index)
383 Value *X = UO->getOperand(0);
384 Value *E = Builder.CreateExtractElement(X, Index);
385 return UnaryOperator::CreateWithCopiedFlags(UO->getOpcode(), E, UO);
386 }
387
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000388 BinaryOperator *BO;
389 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) {
390 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
391 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
392 Value *E0 = Builder.CreateExtractElement(X, Index);
393 Value *E1 = Builder.CreateExtractElement(Y, Index);
394 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
395 }
396
Matt Arsenault9ccde612018-12-10 21:50:54 +0000397 Value *X, *Y;
398 CmpInst::Predicate Pred;
399 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
400 cheapToScalarize(SrcVec, IndexC)) {
401 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
402 Value *E0 = Builder.CreateExtractElement(X, Index);
403 Value *E1 = Builder.CreateExtractElement(Y, Index);
404 return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
405 }
406
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000407 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
408 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
Chris Lattnerec97a902010-01-05 05:36:20 +0000409 // Extracting the inserted element?
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000410 if (IE->getOperand(2) == Index)
Sanjay Patel4b198802016-02-01 22:23:39 +0000411 return replaceInstUsesWith(EI, IE->getOperand(1));
Chris Lattnerec97a902010-01-05 05:36:20 +0000412 // If the inserted and extracted elements are constants, they must not
413 // be the same value, extract from the pre-inserted value instead.
Nikita Popov878cb382020-01-31 22:23:33 +0100414 if (isa<Constant>(IE->getOperand(2)) && IndexC)
415 return replaceOperand(EI, 0, IE->getOperand(0));
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000416 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
Chris Lattnerec97a902010-01-05 05:36:20 +0000417 // If this is extracting an element from a shufflevector, figure out where
418 // it came from and extract from the appropriate input element instead.
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000419 if (auto *Elt = dyn_cast<ConstantInt>(Index)) {
Eli Friedman303c81c2011-10-21 19:11:34 +0000420 int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
Chris Lattnerec97a902010-01-05 05:36:20 +0000421 Value *Src;
422 unsigned LHSWidth =
Christopher Tetreault155740c2020-04-08 10:42:22 -0700423 cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000424
Bob Wilson11ee4562010-10-29 22:03:05 +0000425 if (SrcIdx < 0)
Sanjay Patel4b198802016-02-01 22:23:39 +0000426 return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
Bob Wilson11ee4562010-10-29 22:03:05 +0000427 if (SrcIdx < (int)LHSWidth)
Chris Lattnerec97a902010-01-05 05:36:20 +0000428 Src = SVI->getOperand(0);
Bob Wilson11ee4562010-10-29 22:03:05 +0000429 else {
Chris Lattnerec97a902010-01-05 05:36:20 +0000430 SrcIdx -= LHSWidth;
431 Src = SVI->getOperand(1);
Chris Lattnerec97a902010-01-05 05:36:20 +0000432 }
Chris Lattner229907c2011-07-18 04:54:35 +0000433 Type *Int32Ty = Type::getInt32Ty(EI.getContext());
Chris Lattnerec97a902010-01-05 05:36:20 +0000434 return ExtractElementInst::Create(Src,
Bob Wilson9d07f392010-10-29 22:03:07 +0000435 ConstantInt::get(Int32Ty,
Chris Lattnerec97a902010-01-05 05:36:20 +0000436 SrcIdx, false));
437 }
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000438 } else if (auto *CI = dyn_cast<CastInst>(I)) {
Sanjay Patelb67076c2015-11-29 22:09:34 +0000439 // Canonicalize extractelement(cast) -> cast(extractelement).
440 // Bitcasts can change the number of vector elements, and they cost
441 // nothing.
Anat Shemer55703182013-04-18 19:56:44 +0000442 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
Sanjay Patel47b3b4b2018-12-05 21:57:51 +0000443 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
Nadav Rotemd74b72b2011-03-31 22:57:29 +0000444 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
445 }
Chris Lattnerec97a902010-01-05 05:36:20 +0000446 }
Chris Lattnerec97a902010-01-05 05:36:20 +0000447 }
Craig Topperf40110f2014-04-25 05:29:35 +0000448 return nullptr;
Chris Lattnerec97a902010-01-05 05:36:20 +0000449}
450
Sanjay Patel6eccf482015-09-09 15:24:36 +0000451/// If V is a shuffle of values that ONLY returns elements from either LHS or
452/// RHS, return the shuffle mask and true. Otherwise, return false.
Sanjay Patel431e1142015-11-17 17:24:08 +0000453static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
Chris Lattner0256be92012-01-27 03:08:05 +0000454 SmallVectorImpl<Constant*> &Mask) {
Tim Northoverfad27612014-03-07 10:24:44 +0000455 assert(LHS->getType() == RHS->getType() &&
Chris Lattnerec97a902010-01-05 05:36:20 +0000456 "Invalid CollectSingleShuffleElements");
Christopher Tetreault155740c2020-04-08 10:42:22 -0700457 unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000458
Chris Lattnerec97a902010-01-05 05:36:20 +0000459 if (isa<UndefValue>(V)) {
460 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
461 return true;
462 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000463
Chris Lattnerec97a902010-01-05 05:36:20 +0000464 if (V == LHS) {
465 for (unsigned i = 0; i != NumElts; ++i)
466 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
467 return true;
468 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000469
Chris Lattnerec97a902010-01-05 05:36:20 +0000470 if (V == RHS) {
471 for (unsigned i = 0; i != NumElts; ++i)
472 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
473 i+NumElts));
474 return true;
475 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000476
Chris Lattnerec97a902010-01-05 05:36:20 +0000477 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
478 // If this is an insert of an extract from some other vector, include it.
479 Value *VecOp = IEI->getOperand(0);
480 Value *ScalarOp = IEI->getOperand(1);
481 Value *IdxOp = IEI->getOperand(2);
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000482
Chris Lattnerec97a902010-01-05 05:36:20 +0000483 if (!isa<ConstantInt>(IdxOp))
484 return false;
485 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000486
Chris Lattnerec97a902010-01-05 05:36:20 +0000487 if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
Sanjay Patel70af1fd2014-07-07 22:13:58 +0000488 // We can handle this if the vector we are inserting into is
Chris Lattnerec97a902010-01-05 05:36:20 +0000489 // transitively ok.
Sanjay Patel431e1142015-11-17 17:24:08 +0000490 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
Chris Lattnerec97a902010-01-05 05:36:20 +0000491 // If so, update the mask to reflect the inserted undef.
492 Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
493 return true;
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000494 }
Chris Lattnerec97a902010-01-05 05:36:20 +0000495 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
Tim Northoverfad27612014-03-07 10:24:44 +0000496 if (isa<ConstantInt>(EI->getOperand(1))) {
Chris Lattnerec97a902010-01-05 05:36:20 +0000497 unsigned ExtractedIdx =
498 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
Christopher Tetreault155740c2020-04-08 10:42:22 -0700499 unsigned NumLHSElts =
500 cast<VectorType>(LHS->getType())->getNumElements();
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000501
Chris Lattnerec97a902010-01-05 05:36:20 +0000502 // This must be extracting from either LHS or RHS.
503 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
Sanjay Patel70af1fd2014-07-07 22:13:58 +0000504 // We can handle this if the vector we are inserting into is
Chris Lattnerec97a902010-01-05 05:36:20 +0000505 // transitively ok.
Sanjay Patel431e1142015-11-17 17:24:08 +0000506 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
Chris Lattnerec97a902010-01-05 05:36:20 +0000507 // If so, update the mask to reflect the inserted value.
508 if (EI->getOperand(0) == LHS) {
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000509 Mask[InsertedIdx % NumElts] =
Chris Lattnerec97a902010-01-05 05:36:20 +0000510 ConstantInt::get(Type::getInt32Ty(V->getContext()),
511 ExtractedIdx);
512 } else {
513 assert(EI->getOperand(0) == RHS);
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000514 Mask[InsertedIdx % NumElts] =
Chris Lattnerec97a902010-01-05 05:36:20 +0000515 ConstantInt::get(Type::getInt32Ty(V->getContext()),
Tim Northoverfad27612014-03-07 10:24:44 +0000516 ExtractedIdx + NumLHSElts);
Chris Lattnerec97a902010-01-05 05:36:20 +0000517 }
518 return true;
519 }
520 }
521 }
522 }
523 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000524
Chris Lattnerec97a902010-01-05 05:36:20 +0000525 return false;
526}
527
Sanjay Patelae945e72015-12-24 21:17:56 +0000528/// If we have insertion into a vector that is wider than the vector that we
529/// are extracting from, try to widen the source vector to allow a single
530/// shufflevector to replace one or more insert/extract pairs.
531static void replaceExtractElements(InsertElementInst *InsElt,
532 ExtractElementInst *ExtElt,
533 InstCombiner &IC) {
534 VectorType *InsVecType = InsElt->getType();
535 VectorType *ExtVecType = ExtElt->getVectorOperandType();
Christopher Tetreault155740c2020-04-08 10:42:22 -0700536 unsigned NumInsElts = InsVecType->getNumElements();
537 unsigned NumExtElts = ExtVecType->getNumElements();
Sanjay Patelae945e72015-12-24 21:17:56 +0000538
539 // The inserted-to vector must be wider than the extracted-from vector.
540 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
541 NumExtElts >= NumInsElts)
542 return;
543
544 // Create a shuffle mask to widen the extended-from vector using undefined
545 // values. The mask selects all of the values of the original vector followed
546 // by as many undefined values as needed to create a vector of the same length
547 // as the inserted-to vector.
548 SmallVector<Constant *, 16> ExtendMask;
549 IntegerType *IntType = Type::getInt32Ty(InsElt->getContext());
550 for (unsigned i = 0; i < NumExtElts; ++i)
551 ExtendMask.push_back(ConstantInt::get(IntType, i));
552 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
553 ExtendMask.push_back(UndefValue::get(IntType));
554
555 Value *ExtVecOp = ExtElt->getVectorOperand();
Sanjay Patel66fff732016-01-29 20:21:02 +0000556 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
557 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
558 ? ExtVecOpInst->getParent()
559 : ExtElt->getParent();
560
561 // TODO: This restriction matches the basic block check below when creating
562 // new extractelement instructions. If that limitation is removed, this one
563 // could also be removed. But for now, we just bail out to ensure that we
564 // will replace the extractelement instruction that is feeding our
565 // insertelement instruction. This allows the insertelement to then be
566 // replaced by a shufflevector. If the insertelement is not replaced, we can
567 // induce infinite looping because there's an optimization for extractelement
568 // that will delete our widening shuffle. This would trigger another attempt
569 // here to create that shuffle, and we spin forever.
570 if (InsertionBlock != InsElt->getParent())
571 return;
572
Sanjay Patel4e1b5a52016-11-10 00:15:14 +0000573 // TODO: This restriction matches the check in visitInsertElementInst() and
574 // prevents an infinite loop caused by not turning the extract/insert pair
575 // into a shuffle. We really should not need either check, but we're lacking
576 // folds for shufflevectors because we're afraid to generate shuffle masks
577 // that the backend can't handle.
578 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
579 return;
580
Sanjay Patelae945e72015-12-24 21:17:56 +0000581 auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
582 ConstantVector::get(ExtendMask));
583
Sanjay Patela1c53472016-01-05 19:09:47 +0000584 // Insert the new shuffle after the vector operand of the extract is defined
Sanjay Pateld72a4582016-01-08 01:39:16 +0000585 // (as long as it's not a PHI) or at the start of the basic block of the
586 // extract, so any subsequent extracts in the same basic block can use it.
587 // TODO: Insert before the earliest ExtractElementInst that is replaced.
Sanjay Pateld72a4582016-01-08 01:39:16 +0000588 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
Sanjay Patela1c53472016-01-05 19:09:47 +0000589 WideVec->insertAfter(ExtVecOpInst);
Sanjay Pateld72a4582016-01-08 01:39:16 +0000590 else
Sanjay Patela1c53472016-01-05 19:09:47 +0000591 IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
Sanjay Patela1c53472016-01-05 19:09:47 +0000592
593 // Replace extracts from the original narrow vector with extracts from the new
594 // wide vector.
Sanjay Patelae945e72015-12-24 21:17:56 +0000595 for (User *U : ExtVecOp->users()) {
Sanjay Patela1c53472016-01-05 19:09:47 +0000596 ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
Sanjay Pateld72a4582016-01-08 01:39:16 +0000597 if (!OldExt || OldExt->getParent() != WideVec->getParent())
Sanjay Patela1c53472016-01-05 19:09:47 +0000598 continue;
599 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
Sven van Haastregt78819e02017-06-05 09:18:10 +0000600 NewExt->insertAfter(OldExt);
Sanjay Patel4b198802016-02-01 22:23:39 +0000601 IC.replaceInstUsesWith(*OldExt, NewExt);
Sanjay Patelae945e72015-12-24 21:17:56 +0000602 }
603}
Tim Northoverfad27612014-03-07 10:24:44 +0000604
605/// We are building a shuffle to create V, which is a sequence of insertelement,
606/// extractelement pairs. If PermittedRHS is set, then we must either use it or
Sanjay Patel70af1fd2014-07-07 22:13:58 +0000607/// not rely on the second vector source. Return a std::pair containing the
Tim Northoverfad27612014-03-07 10:24:44 +0000608/// left and right vectors of the proposed shuffle (or 0), and set the Mask
609/// parameter as required.
610///
611/// Note: we intentionally don't try to fold earlier shuffles since they have
612/// often been chosen carefully to be efficiently implementable on the target.
Eugene Zelenko7f0f9bc2017-10-24 21:24:53 +0000613using ShuffleOps = std::pair<Value *, Value *>;
Tim Northoverfad27612014-03-07 10:24:44 +0000614
Sanjay Patel431e1142015-11-17 17:24:08 +0000615static ShuffleOps collectShuffleElements(Value *V,
Tim Northoverfad27612014-03-07 10:24:44 +0000616 SmallVectorImpl<Constant *> &Mask,
Sanjay Patelae945e72015-12-24 21:17:56 +0000617 Value *PermittedRHS,
618 InstCombiner &IC) {
Tim Northoverfad27612014-03-07 10:24:44 +0000619 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
Christopher Tetreault155740c2020-04-08 10:42:22 -0700620 unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000621
Chris Lattnerec97a902010-01-05 05:36:20 +0000622 if (isa<UndefValue>(V)) {
623 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
Tim Northoverfad27612014-03-07 10:24:44 +0000624 return std::make_pair(
625 PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
Chris Lattnera0d01ff2012-01-24 14:31:22 +0000626 }
Craig Topper2ea22b02013-01-18 05:09:16 +0000627
Chris Lattnera0d01ff2012-01-24 14:31:22 +0000628 if (isa<ConstantAggregateZero>(V)) {
Chris Lattnerec97a902010-01-05 05:36:20 +0000629 Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
Tim Northoverfad27612014-03-07 10:24:44 +0000630 return std::make_pair(V, nullptr);
Chris Lattnera0d01ff2012-01-24 14:31:22 +0000631 }
Craig Topper2ea22b02013-01-18 05:09:16 +0000632
Chris Lattnera0d01ff2012-01-24 14:31:22 +0000633 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
Chris Lattnerec97a902010-01-05 05:36:20 +0000634 // If this is an insert of an extract from some other vector, include it.
635 Value *VecOp = IEI->getOperand(0);
636 Value *ScalarOp = IEI->getOperand(1);
637 Value *IdxOp = IEI->getOperand(2);
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000638
Chris Lattnerec97a902010-01-05 05:36:20 +0000639 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
Tim Northoverfad27612014-03-07 10:24:44 +0000640 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
Chris Lattnerec97a902010-01-05 05:36:20 +0000641 unsigned ExtractedIdx =
Bob Wilson67a6f322010-10-29 22:20:45 +0000642 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
Chris Lattnerec97a902010-01-05 05:36:20 +0000643 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000644
Chris Lattnerec97a902010-01-05 05:36:20 +0000645 // Either the extracted from or inserted into vector must be RHSVec,
646 // otherwise we'd end up with a shuffle of three inputs.
Craig Topperf40110f2014-04-25 05:29:35 +0000647 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
Tim Northoverfad27612014-03-07 10:24:44 +0000648 Value *RHS = EI->getOperand(0);
Sanjay Patelae945e72015-12-24 21:17:56 +0000649 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
Craig Toppere73658d2014-04-28 04:05:08 +0000650 assert(LR.second == nullptr || LR.second == RHS);
Tim Northoverfad27612014-03-07 10:24:44 +0000651
652 if (LR.first->getType() != RHS->getType()) {
Sanjay Patelae945e72015-12-24 21:17:56 +0000653 // Although we are giving up for now, see if we can create extracts
654 // that match the inserts for another round of combining.
655 replaceExtractElements(IEI, EI, IC);
656
Tim Northoverfad27612014-03-07 10:24:44 +0000657 // We tried our best, but we can't find anything compatible with RHS
658 // further up the chain. Return a trivial shuffle.
659 for (unsigned i = 0; i < NumElts; ++i)
660 Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i);
661 return std::make_pair(V, nullptr);
662 }
663
Christopher Tetreault155740c2020-04-08 10:42:22 -0700664 unsigned NumLHSElts =
665 cast<VectorType>(RHS->getType())->getNumElements();
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000666 Mask[InsertedIdx % NumElts] =
Bob Wilson67a6f322010-10-29 22:20:45 +0000667 ConstantInt::get(Type::getInt32Ty(V->getContext()),
Tim Northoverfad27612014-03-07 10:24:44 +0000668 NumLHSElts+ExtractedIdx);
669 return std::make_pair(LR.first, RHS);
Chris Lattnerec97a902010-01-05 05:36:20 +0000670 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000671
Tim Northoverfad27612014-03-07 10:24:44 +0000672 if (VecOp == PermittedRHS) {
673 // We've gone as far as we can: anything on the other side of the
674 // extractelement will already have been converted into a shuffle.
675 unsigned NumLHSElts =
Christopher Tetreault155740c2020-04-08 10:42:22 -0700676 cast<VectorType>(EI->getOperand(0)->getType())->getNumElements();
Tim Northoverfad27612014-03-07 10:24:44 +0000677 for (unsigned i = 0; i != NumElts; ++i)
678 Mask.push_back(ConstantInt::get(
679 Type::getInt32Ty(V->getContext()),
680 i == InsertedIdx ? ExtractedIdx : NumLHSElts + i));
681 return std::make_pair(EI->getOperand(0), PermittedRHS);
Chris Lattnerec97a902010-01-05 05:36:20 +0000682 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000683
Chris Lattnerec97a902010-01-05 05:36:20 +0000684 // If this insertelement is a chain that comes from exactly these two
685 // vectors, return the vector and the effective shuffle.
Tim Northoverfad27612014-03-07 10:24:44 +0000686 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
Sanjay Patel431e1142015-11-17 17:24:08 +0000687 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
Tim Northoverfad27612014-03-07 10:24:44 +0000688 Mask))
689 return std::make_pair(EI->getOperand(0), PermittedRHS);
Chris Lattnerec97a902010-01-05 05:36:20 +0000690 }
691 }
692 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +0000693
Sanjay Patelb67076c2015-11-29 22:09:34 +0000694 // Otherwise, we can't do anything fancy. Return an identity vector.
Chris Lattnerec97a902010-01-05 05:36:20 +0000695 for (unsigned i = 0; i != NumElts; ++i)
696 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
Tim Northoverfad27612014-03-07 10:24:44 +0000697 return std::make_pair(V, nullptr);
Chris Lattnerec97a902010-01-05 05:36:20 +0000698}
699
Michael Zolotukhin7d6293a2014-05-07 14:30:18 +0000700/// Try to find redundant insertvalue instructions, like the following ones:
701/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
702/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
703/// Here the second instruction inserts values at the same indices, as the
704/// first one, making the first one redundant.
705/// It should be transformed to:
706/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
707Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) {
708 bool IsRedundant = false;
709 ArrayRef<unsigned int> FirstIndices = I.getIndices();
710
711 // If there is a chain of insertvalue instructions (each of them except the
712 // last one has only one use and it's another insertvalue insn from this
713 // chain), check if any of the 'children' uses the same indices as the first
714 // instruction. In this case, the first one is redundant.
715 Value *V = &I;
Michael Zolotukhin292d3ca2014-05-08 19:50:24 +0000716 unsigned Depth = 0;
Michael Zolotukhin7d6293a2014-05-07 14:30:18 +0000717 while (V->hasOneUse() && Depth < 10) {
718 User *U = V->user_back();
Michael Zolotukhin292d3ca2014-05-08 19:50:24 +0000719 auto UserInsInst = dyn_cast<InsertValueInst>(U);
720 if (!UserInsInst || U->getOperand(0) != V)
Michael Zolotukhin7d6293a2014-05-07 14:30:18 +0000721 break;
Michael Zolotukhin7d6293a2014-05-07 14:30:18 +0000722 if (UserInsInst->getIndices() == FirstIndices) {
723 IsRedundant = true;
724 break;
725 }
726 V = UserInsInst;
727 Depth++;
728 }
729
730 if (IsRedundant)
Sanjay Patel4b198802016-02-01 22:23:39 +0000731 return replaceInstUsesWith(I, I.getOperand(0));
Michael Zolotukhin7d6293a2014-05-07 14:30:18 +0000732 return nullptr;
733}
734
Sanjay Patel521f19f2016-09-02 17:05:43 +0000735static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
Eli Friedman1ee6ec22020-03-31 13:08:59 -0700736 int MaskSize = Shuf.getShuffleMask().size();
Christopher Tetreault155740c2020-04-08 10:42:22 -0700737 int VecSize =
738 cast<VectorType>(Shuf.getOperand(0)->getType())->getNumElements();
Sanjay Patel521f19f2016-09-02 17:05:43 +0000739
740 // A vector select does not change the size of the operands.
741 if (MaskSize != VecSize)
742 return false;
743
744 // Each mask element must be undefined or choose a vector element from one of
745 // the source operands without crossing vector lanes.
746 for (int i = 0; i != MaskSize; ++i) {
747 int Elt = Shuf.getMaskValue(i);
748 if (Elt != -1 && Elt != i && Elt != i + VecSize)
749 return false;
750 }
751
752 return true;
753}
754
Sanjay Patel71ad2272019-06-26 15:52:59 +0000755/// Turn a chain of inserts that splats a value into an insert + shuffle:
756/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
757/// shufflevector(insertelt(X, %k, 0), undef, zero)
758static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
759 // We are interested in the last insert in a chain. So if this insert has a
760 // single user and that user is an insert, bail.
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000761 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
762 return nullptr;
763
Sanjay Patel71ad2272019-06-26 15:52:59 +0000764 auto *VecTy = cast<VectorType>(InsElt.getType());
765 unsigned NumElements = VecTy->getNumElements();
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000766
767 // Do not try to do this for a one-element vector, since that's a nop,
768 // and will cause an inf-loop.
769 if (NumElements == 1)
770 return nullptr;
771
772 Value *SplatVal = InsElt.getOperand(1);
Fangrui Songf78650a2018-07-30 19:41:25 +0000773 InsertElementInst *CurrIE = &InsElt;
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000774 SmallVector<bool, 16> ElementPresent(NumElements, false);
Florian Hahnb992fee2017-08-30 10:54:21 +0000775 InsertElementInst *FirstIE = nullptr;
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000776
777 // Walk the chain backwards, keeping track of which indices we inserted into,
778 // until we hit something that isn't an insert of the splatted value.
779 while (CurrIE) {
Sanjay Patel863d4942017-11-27 18:19:32 +0000780 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000781 if (!Idx || CurrIE->getOperand(1) != SplatVal)
782 return nullptr;
783
Sanjay Patel863d4942017-11-27 18:19:32 +0000784 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
Florian Hahnb992fee2017-08-30 10:54:21 +0000785 // Check none of the intermediate steps have any additional uses, except
786 // for the root insertelement instruction, which can be re-used, if it
787 // inserts at position 0.
788 if (CurrIE != &InsElt &&
789 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000790 return nullptr;
791
792 ElementPresent[Idx->getZExtValue()] = true;
Florian Hahnb992fee2017-08-30 10:54:21 +0000793 FirstIE = CurrIE;
794 CurrIE = NextIE;
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000795 }
796
Sanjay Patel75b5edf2019-07-04 16:45:34 +0000797 // If this is just a single insertelement (not a sequence), we are done.
798 if (FirstIE == &InsElt)
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000799 return nullptr;
800
Sanjay Patel75b5edf2019-07-04 16:45:34 +0000801 // If we are not inserting into an undef vector, make sure we've seen an
802 // insert into every element.
803 // TODO: If the base vector is not undef, it might be better to create a splat
804 // and then a select-shuffle (blend) with the base vector.
805 if (!isa<UndefValue>(FirstIE->getOperand(0)))
806 if (any_of(ElementPresent, [](bool Present) { return !Present; }))
807 return nullptr;
808
Sanjay Patel71ad2272019-06-26 15:52:59 +0000809 // Create the insert + shuffle.
810 Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
811 UndefValue *UndefVec = UndefValue::get(VecTy);
812 Constant *Zero = ConstantInt::get(Int32Ty, 0);
813 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
814 FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000815
Sanjay Patel75b5edf2019-07-04 16:45:34 +0000816 // Splat from element 0, but replace absent elements with undef in the mask.
817 SmallVector<Constant *, 16> Mask(NumElements, Zero);
818 for (unsigned i = 0; i != NumElements; ++i)
819 if (!ElementPresent[i])
820 Mask[i] = UndefValue::get(Int32Ty);
821
822 return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask));
Michael Kupersteincd7ad712016-12-28 00:18:08 +0000823}
824
Sanjay Patel3dee1132019-07-08 19:48:52 +0000825/// Try to fold an insert element into an existing splat shuffle by changing
826/// the shuffle's mask to include the index of this insert element.
827static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
828 // Check if the vector operand of this insert is a canonical splat shuffle.
829 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
830 if (!Shuf || !Shuf->isZeroEltSplat())
831 return nullptr;
832
833 // Check for a constant insertion index.
834 uint64_t IdxC;
835 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
836 return nullptr;
837
838 // Check if the splat shuffle's input is the same as this insert's scalar op.
839 Value *X = InsElt.getOperand(1);
840 Value *Op0 = Shuf->getOperand(0);
841 if (!match(Op0, m_InsertElement(m_Undef(), m_Specific(X), m_ZeroInt())))
842 return nullptr;
843
844 // Replace the shuffle mask element at the index of this insert with a zero.
845 // For example:
846 // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
847 // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
Christopher Tetreault155740c2020-04-08 10:42:22 -0700848 unsigned NumMaskElts = Shuf->getType()->getNumElements();
Eli Friedman1ee6ec22020-03-31 13:08:59 -0700849 SmallVector<int, 16> NewMask(NumMaskElts);
Sanjay Patel3dee1132019-07-08 19:48:52 +0000850 for (unsigned i = 0; i != NumMaskElts; ++i)
Eli Friedman1ee6ec22020-03-31 13:08:59 -0700851 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
Sanjay Patel3dee1132019-07-08 19:48:52 +0000852
Sanjay Patel3dee1132019-07-08 19:48:52 +0000853 return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
854}
855
Sanjay Patelaff5bee2019-09-08 19:03:01 +0000856/// Try to fold an extract+insert element into an existing identity shuffle by
857/// changing the shuffle's mask to include the index of this insert element.
858static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
859 // Check if the vector operand of this insert is an identity shuffle.
860 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
861 if (!Shuf || !isa<UndefValue>(Shuf->getOperand(1)) ||
862 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
863 return nullptr;
864
865 // Check for a constant insertion index.
866 uint64_t IdxC;
867 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
868 return nullptr;
869
870 // Check if this insert's scalar op is extracted from the identity shuffle's
871 // input vector.
872 Value *Scalar = InsElt.getOperand(1);
873 Value *X = Shuf->getOperand(0);
874 if (!match(Scalar, m_ExtractElement(m_Specific(X), m_SpecificInt(IdxC))))
875 return nullptr;
876
877 // Replace the shuffle mask element at the index of this extract+insert with
878 // that same index value.
879 // For example:
880 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
Christopher Tetreault155740c2020-04-08 10:42:22 -0700881 unsigned NumMaskElts = Shuf->getType()->getNumElements();
Eli Friedman1ee6ec22020-03-31 13:08:59 -0700882 SmallVector<int, 16> NewMask(NumMaskElts);
883 ArrayRef<int> OldMask = Shuf->getShuffleMask();
Sanjay Patelaff5bee2019-09-08 19:03:01 +0000884 for (unsigned i = 0; i != NumMaskElts; ++i) {
885 if (i != IdxC) {
886 // All mask elements besides the inserted element remain the same.
Eli Friedman1ee6ec22020-03-31 13:08:59 -0700887 NewMask[i] = OldMask[i];
888 } else if (OldMask[i] == (int)IdxC) {
Sanjay Patelaff5bee2019-09-08 19:03:01 +0000889 // If the mask element was already set, there's nothing to do
890 // (demanded elements analysis may unset it later).
891 return nullptr;
892 } else {
Eli Friedman1ee6ec22020-03-31 13:08:59 -0700893 assert(OldMask[i] == UndefMaskElem &&
Sanjay Patelaff5bee2019-09-08 19:03:01 +0000894 "Unexpected shuffle mask element for identity shuffle");
Eli Friedman1ee6ec22020-03-31 13:08:59 -0700895 NewMask[i] = IdxC;
Sanjay Patelaff5bee2019-09-08 19:03:01 +0000896 }
897 }
898
Sanjay Patelaff5bee2019-09-08 19:03:01 +0000899 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
900}
901
Sanjay Patel2f602ce2017-03-22 17:10:44 +0000902/// If we have an insertelement instruction feeding into another insertelement
903/// and the 2nd is inserting a constant into the vector, canonicalize that
904/// constant insertion before the insertion of a variable:
905///
906/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
907/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
908///
909/// This has the potential of eliminating the 2nd insertelement instruction
910/// via constant folding of the scalar constant into a vector constant.
911static Instruction *hoistInsEltConst(InsertElementInst &InsElt2,
912 InstCombiner::BuilderTy &Builder) {
913 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
914 if (!InsElt1 || !InsElt1->hasOneUse())
915 return nullptr;
916
917 Value *X, *Y;
918 Constant *ScalarC;
919 ConstantInt *IdxC1, *IdxC2;
920 if (match(InsElt1->getOperand(0), m_Value(X)) &&
921 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
922 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
923 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
924 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
925 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
926 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
927 }
928
929 return nullptr;
930}
931
Alexey Bataevfee90782016-09-23 09:14:08 +0000932/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
933/// --> shufflevector X, CVec', Mask'
Sanjay Patel521f19f2016-09-02 17:05:43 +0000934static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
Alexey Bataevfee90782016-09-23 09:14:08 +0000935 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
936 // Bail out if the parent has more than one use. In that case, we'd be
Sanjay Patel521f19f2016-09-02 17:05:43 +0000937 // replacing the insertelt with a shuffle, and that's not a clear win.
Alexey Bataevfee90782016-09-23 09:14:08 +0000938 if (!Inst || !Inst->hasOneUse())
Sanjay Patel521f19f2016-09-02 17:05:43 +0000939 return nullptr;
Alexey Bataevfee90782016-09-23 09:14:08 +0000940 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
941 // The shuffle must have a constant vector operand. The insertelt must have
942 // a constant scalar being inserted at a constant position in the vector.
943 Constant *ShufConstVec, *InsEltScalar;
944 uint64_t InsEltIndex;
945 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
946 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
947 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
948 return nullptr;
Sanjay Patel521f19f2016-09-02 17:05:43 +0000949
Alexey Bataevfee90782016-09-23 09:14:08 +0000950 // Adding an element to an arbitrary shuffle could be expensive, but a
951 // shuffle that selects elements from vectors without crossing lanes is
952 // assumed cheap.
953 // If we're just adding a constant into that shuffle, it will still be
954 // cheap.
955 if (!isShuffleEquivalentToSelect(*Shuf))
956 return nullptr;
Sanjay Patel521f19f2016-09-02 17:05:43 +0000957
Alexey Bataevfee90782016-09-23 09:14:08 +0000958 // From the above 'select' check, we know that the mask has the same number
959 // of elements as the vector input operands. We also know that each constant
960 // input element is used in its lane and can not be used more than once by
961 // the shuffle. Therefore, replace the constant in the shuffle's constant
962 // vector with the insertelt constant. Replace the constant in the shuffle's
963 // mask vector with the insertelt index plus the length of the vector
964 // (because the constant vector operand of a shuffle is always the 2nd
965 // operand).
Eli Friedman1ee6ec22020-03-31 13:08:59 -0700966 ArrayRef<int> Mask = Shuf->getShuffleMask();
967 unsigned NumElts = Mask.size();
Alexey Bataevfee90782016-09-23 09:14:08 +0000968 SmallVector<Constant *, 16> NewShufElts(NumElts);
Eli Friedman1ee6ec22020-03-31 13:08:59 -0700969 SmallVector<int, 16> NewMaskElts(NumElts);
Alexey Bataevfee90782016-09-23 09:14:08 +0000970 for (unsigned I = 0; I != NumElts; ++I) {
971 if (I == InsEltIndex) {
972 NewShufElts[I] = InsEltScalar;
Eli Friedman1ee6ec22020-03-31 13:08:59 -0700973 NewMaskElts[I] = InsEltIndex + NumElts;
Alexey Bataevfee90782016-09-23 09:14:08 +0000974 } else {
975 // Copy over the existing values.
976 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
Eli Friedman1ee6ec22020-03-31 13:08:59 -0700977 NewMaskElts[I] = Mask[I];
Alexey Bataevfee90782016-09-23 09:14:08 +0000978 }
Sanjay Patel521f19f2016-09-02 17:05:43 +0000979 }
Sanjay Patel521f19f2016-09-02 17:05:43 +0000980
Alexey Bataevfee90782016-09-23 09:14:08 +0000981 // Create new operands for a shuffle that includes the constant of the
982 // original insertelt. The old shuffle will be dead now.
983 return new ShuffleVectorInst(Shuf->getOperand(0),
Eli Friedman1ee6ec22020-03-31 13:08:59 -0700984 ConstantVector::get(NewShufElts), NewMaskElts);
Alexey Bataevfee90782016-09-23 09:14:08 +0000985 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
986 // Transform sequences of insertelements ops with constant data/indexes into
987 // a single shuffle op.
988 unsigned NumElts = InsElt.getType()->getNumElements();
989
990 uint64_t InsertIdx[2];
991 Constant *Val[2];
992 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
993 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
994 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
995 !match(IEI->getOperand(1), m_Constant(Val[1])))
996 return nullptr;
997 SmallVector<Constant *, 16> Values(NumElts);
998 SmallVector<Constant *, 16> Mask(NumElts);
999 auto ValI = std::begin(Val);
1000 // Generate new constant vector and mask.
1001 // We have 2 values/masks from the insertelements instructions. Insert them
1002 // into new value/mask vectors.
1003 for (uint64_t I : InsertIdx) {
1004 if (!Values[I]) {
1005 assert(!Mask[I]);
1006 Values[I] = *ValI;
1007 Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()),
1008 NumElts + I);
1009 }
1010 ++ValI;
1011 }
1012 // Remaining values are filled with 'undef' values.
1013 for (unsigned I = 0; I < NumElts; ++I) {
1014 if (!Values[I]) {
1015 assert(!Mask[I]);
1016 Values[I] = UndefValue::get(InsElt.getType()->getElementType());
1017 Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I);
1018 }
1019 }
1020 // Create new operands for a shuffle that includes the constant of the
1021 // original insertelt.
1022 return new ShuffleVectorInst(IEI->getOperand(0),
1023 ConstantVector::get(Values),
1024 ConstantVector::get(Mask));
1025 }
1026 return nullptr;
Sanjay Patel521f19f2016-09-02 17:05:43 +00001027}
1028
Chris Lattnerec97a902010-01-05 05:36:20 +00001029Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
1030 Value *VecOp = IE.getOperand(0);
1031 Value *ScalarOp = IE.getOperand(1);
1032 Value *IdxOp = IE.getOperand(2);
Bob Wilson8ecf98b2010-10-29 22:20:43 +00001033
Igor Laevskye0edb662017-12-13 11:21:18 +00001034 if (auto *V = SimplifyInsertElementInst(
1035 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1036 return replaceInstUsesWith(IE, V);
1037
Sanjay Patel926e4772019-05-17 18:06:12 +00001038 // If the vector and scalar are both bitcast from the same element type, do
1039 // the insert in that source type followed by bitcast.
1040 Value *VecSrc, *ScalarSrc;
1041 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1042 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1043 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1044 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
Christopher Tetreault155740c2020-04-08 10:42:22 -07001045 cast<VectorType>(VecSrc->getType())->getElementType() ==
1046 ScalarSrc->getType()) {
Sanjay Patel926e4772019-05-17 18:06:12 +00001047 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1048 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1049 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1050 return new BitCastInst(NewInsElt, IE.getType());
1051 }
1052
Sanjay Patel0522b0d2018-10-20 17:15:57 +00001053 // If the inserted element was extracted from some other vector and both
Sanjay Patel93179632019-05-26 14:03:50 +00001054 // indexes are valid constants, try to turn this into a shuffle.
Sanjay Patel0522b0d2018-10-20 17:15:57 +00001055 uint64_t InsertedIdx, ExtractedIdx;
1056 Value *ExtVecOp;
1057 if (match(IdxOp, m_ConstantInt(InsertedIdx)) &&
Christopher Tetreault155740c2020-04-08 10:42:22 -07001058 match(ScalarOp,
1059 m_ExtractElement(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1060 ExtractedIdx < cast<VectorType>(ExtVecOp->getType())->getNumElements()) {
Sanjay Patel0522b0d2018-10-20 17:15:57 +00001061 // TODO: Looking at the user(s) to determine if this insert is a
1062 // fold-to-shuffle opportunity does not match the usual instcombine
1063 // constraints. We should decide if the transform is worthy based only
1064 // on this instruction and its operands, but that may not work currently.
1065 //
1066 // Here, we are trying to avoid creating shuffles before reaching
1067 // the end of a chain of extract-insert pairs. This is complicated because
1068 // we do not generally form arbitrary shuffle masks in instcombine
1069 // (because those may codegen poorly), but collectShuffleElements() does
1070 // exactly that.
1071 //
1072 // The rules for determining what is an acceptable target-independent
1073 // shuffle mask are fuzzy because they evolve based on the backend's
1074 // capabilities and real-world impact.
1075 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1076 if (!Insert.hasOneUse())
1077 return true;
1078 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1079 if (!InsertUser)
1080 return true;
1081 return false;
1082 };
Bob Wilson8ecf98b2010-10-29 22:20:43 +00001083
Sanjay Patel0522b0d2018-10-20 17:15:57 +00001084 // Try to form a shuffle from a chain of extract-insert ops.
1085 if (isShuffleRootCandidate(IE)) {
1086 SmallVector<Constant*, 16> Mask;
1087 ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
Sanjay Patel729c4362018-10-20 16:25:55 +00001088
Sanjay Patel0522b0d2018-10-20 17:15:57 +00001089 // The proposed shuffle may be trivial, in which case we shouldn't
1090 // perform the combine.
1091 if (LR.first != &IE && LR.second != &IE) {
1092 // We now have a shuffle of LHS, RHS, Mask.
1093 if (LR.second == nullptr)
1094 LR.second = UndefValue::get(LR.first->getType());
1095 return new ShuffleVectorInst(LR.first, LR.second,
1096 ConstantVector::get(Mask));
Chris Lattnerec97a902010-01-05 05:36:20 +00001097 }
1098 }
1099 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +00001100
Christopher Tetreault155740c2020-04-08 10:42:22 -07001101 unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
Chris Lattnerec97a902010-01-05 05:36:20 +00001102 APInt UndefElts(VWidth, 0);
1103 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
Eli Friedmanef200db2011-02-19 22:42:40 +00001104 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
1105 if (V != &IE)
Sanjay Patel4b198802016-02-01 22:23:39 +00001106 return replaceInstUsesWith(IE, V);
Chris Lattnerec97a902010-01-05 05:36:20 +00001107 return &IE;
Eli Friedmanef200db2011-02-19 22:42:40 +00001108 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +00001109
Sanjay Patel521f19f2016-09-02 17:05:43 +00001110 if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
1111 return Shuf;
1112
Craig Topperbb4069e2017-07-07 23:16:26 +00001113 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
Sanjay Patel2f602ce2017-03-22 17:10:44 +00001114 return NewInsElt;
1115
Sanjay Patel71ad2272019-06-26 15:52:59 +00001116 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
Michael Kupersteincd7ad712016-12-28 00:18:08 +00001117 return Broadcast;
1118
Sanjay Patel3dee1132019-07-08 19:48:52 +00001119 if (Instruction *Splat = foldInsEltIntoSplat(IE))
1120 return Splat;
1121
Sanjay Patelaff5bee2019-09-08 19:03:01 +00001122 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1123 return IdentityShuf;
1124
Craig Topperf40110f2014-04-25 05:29:35 +00001125 return nullptr;
Chris Lattnerec97a902010-01-05 05:36:20 +00001126}
1127
Nick Lewyckya2b77202013-05-31 00:59:42 +00001128/// Return true if we can evaluate the specified expression tree if the vector
1129/// elements were shuffled in a different order.
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001130static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
Nick Lewycky3f715e22013-06-01 20:51:31 +00001131 unsigned Depth = 5) {
Nick Lewyckya2b77202013-05-31 00:59:42 +00001132 // We can always reorder the elements of a constant.
1133 if (isa<Constant>(V))
1134 return true;
1135
1136 // We won't reorder vector arguments. No IPO here.
1137 Instruction *I = dyn_cast<Instruction>(V);
1138 if (!I) return false;
1139
1140 // Two users may expect different orders of the elements. Don't try it.
1141 if (!I->hasOneUse())
1142 return false;
1143
1144 if (Depth == 0) return false;
1145
1146 switch (I->getOpcode()) {
Bjorn Pettersson64562522019-10-18 07:42:02 +00001147 case Instruction::UDiv:
1148 case Instruction::SDiv:
1149 case Instruction::URem:
1150 case Instruction::SRem:
1151 // Propagating an undefined shuffle mask element to integer div/rem is not
1152 // allowed because those opcodes can create immediate undefined behavior
1153 // from an undefined element in an operand.
1154 if (llvm::any_of(Mask, [](int M){ return M == -1; }))
1155 return false;
1156 LLVM_FALLTHROUGH;
Nick Lewyckya2b77202013-05-31 00:59:42 +00001157 case Instruction::Add:
1158 case Instruction::FAdd:
1159 case Instruction::Sub:
1160 case Instruction::FSub:
1161 case Instruction::Mul:
1162 case Instruction::FMul:
Nick Lewyckya2b77202013-05-31 00:59:42 +00001163 case Instruction::FDiv:
Nick Lewyckya2b77202013-05-31 00:59:42 +00001164 case Instruction::FRem:
1165 case Instruction::Shl:
1166 case Instruction::LShr:
1167 case Instruction::AShr:
1168 case Instruction::And:
1169 case Instruction::Or:
1170 case Instruction::Xor:
1171 case Instruction::ICmp:
1172 case Instruction::FCmp:
1173 case Instruction::Trunc:
1174 case Instruction::ZExt:
1175 case Instruction::SExt:
1176 case Instruction::FPToUI:
1177 case Instruction::FPToSI:
1178 case Instruction::UIToFP:
1179 case Instruction::SIToFP:
1180 case Instruction::FPTrunc:
1181 case Instruction::FPExt:
1182 case Instruction::GetElementPtr: {
Sanjay Patel26c119a2018-09-30 13:50:42 +00001183 // Bail out if we would create longer vector ops. We could allow creating
Bjorn Pettersson64562522019-10-18 07:42:02 +00001184 // longer vector ops, but that may result in more expensive codegen.
Sanjay Patel26c119a2018-09-30 13:50:42 +00001185 Type *ITy = I->getType();
Christopher Tetreault155740c2020-04-08 10:42:22 -07001186 if (ITy->isVectorTy() &&
1187 Mask.size() > cast<VectorType>(ITy)->getNumElements())
Sanjay Patel26c119a2018-09-30 13:50:42 +00001188 return false;
Sanjay Patel4e28753142015-11-16 22:16:52 +00001189 for (Value *Operand : I->operands()) {
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001190 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
Nick Lewyckya2b77202013-05-31 00:59:42 +00001191 return false;
1192 }
1193 return true;
1194 }
1195 case Instruction::InsertElement: {
1196 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1197 if (!CI) return false;
1198 int ElementNumber = CI->getLimitedValue();
1199
1200 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1201 // can't put an element into multiple indices.
1202 bool SeenOnce = false;
1203 for (int i = 0, e = Mask.size(); i != e; ++i) {
1204 if (Mask[i] == ElementNumber) {
1205 if (SeenOnce)
1206 return false;
1207 SeenOnce = true;
1208 }
1209 }
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001210 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
Nick Lewyckya2b77202013-05-31 00:59:42 +00001211 }
1212 }
1213 return false;
1214}
1215
1216/// Rebuild a new instruction just like 'I' but with the new operands given.
1217/// In the event of type mismatch, the type of the operands is correct.
Sanjay Patel431e1142015-11-17 17:24:08 +00001218static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
Nick Lewyckya2b77202013-05-31 00:59:42 +00001219 // We don't want to use the IRBuilder here because we want the replacement
1220 // instructions to appear next to 'I', not the builder's insertion point.
1221 switch (I->getOpcode()) {
1222 case Instruction::Add:
1223 case Instruction::FAdd:
1224 case Instruction::Sub:
1225 case Instruction::FSub:
1226 case Instruction::Mul:
1227 case Instruction::FMul:
1228 case Instruction::UDiv:
1229 case Instruction::SDiv:
1230 case Instruction::FDiv:
1231 case Instruction::URem:
1232 case Instruction::SRem:
1233 case Instruction::FRem:
1234 case Instruction::Shl:
1235 case Instruction::LShr:
1236 case Instruction::AShr:
1237 case Instruction::And:
1238 case Instruction::Or:
1239 case Instruction::Xor: {
1240 BinaryOperator *BO = cast<BinaryOperator>(I);
1241 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1242 BinaryOperator *New =
1243 BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
1244 NewOps[0], NewOps[1], "", BO);
1245 if (isa<OverflowingBinaryOperator>(BO)) {
1246 New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1247 New->setHasNoSignedWrap(BO->hasNoSignedWrap());
1248 }
1249 if (isa<PossiblyExactOperator>(BO)) {
1250 New->setIsExact(BO->isExact());
1251 }
Owen Anderson48b842e2014-01-18 00:48:14 +00001252 if (isa<FPMathOperator>(BO))
1253 New->copyFastMathFlags(I);
Nick Lewyckya2b77202013-05-31 00:59:42 +00001254 return New;
1255 }
1256 case Instruction::ICmp:
1257 assert(NewOps.size() == 2 && "icmp with #ops != 2");
1258 return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
1259 NewOps[0], NewOps[1]);
1260 case Instruction::FCmp:
1261 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1262 return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
1263 NewOps[0], NewOps[1]);
1264 case Instruction::Trunc:
1265 case Instruction::ZExt:
1266 case Instruction::SExt:
1267 case Instruction::FPToUI:
1268 case Instruction::FPToSI:
1269 case Instruction::UIToFP:
1270 case Instruction::SIToFP:
1271 case Instruction::FPTrunc:
1272 case Instruction::FPExt: {
1273 // It's possible that the mask has a different number of elements from
1274 // the original cast. We recompute the destination type to match the mask.
Christopher Tetreault155740c2020-04-08 10:42:22 -07001275 Type *DestTy = VectorType::get(
1276 I->getType()->getScalarType(),
1277 cast<VectorType>(NewOps[0]->getType())->getElementCount());
Nick Lewyckya2b77202013-05-31 00:59:42 +00001278 assert(NewOps.size() == 1 && "cast with #ops != 1");
1279 return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
1280 "", I);
1281 }
1282 case Instruction::GetElementPtr: {
1283 Value *Ptr = NewOps[0];
1284 ArrayRef<Value*> Idx = NewOps.slice(1);
David Blaikie22319eb2015-03-14 19:24:04 +00001285 GetElementPtrInst *GEP = GetElementPtrInst::Create(
1286 cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
Nick Lewyckya2b77202013-05-31 00:59:42 +00001287 GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
1288 return GEP;
1289 }
1290 }
1291 llvm_unreachable("failed to rebuild vector instructions");
1292}
1293
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001294static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
Nick Lewyckya2b77202013-05-31 00:59:42 +00001295 // Mask.size() does not need to be equal to the number of vector elements.
1296
1297 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
Sanjay Patelce36b032017-10-09 17:54:46 +00001298 Type *EltTy = V->getType()->getScalarType();
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001299 Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
Sanjay Patelce36b032017-10-09 17:54:46 +00001300 if (isa<UndefValue>(V))
1301 return UndefValue::get(VectorType::get(EltTy, Mask.size()));
1302
1303 if (isa<ConstantAggregateZero>(V))
1304 return ConstantAggregateZero::get(VectorType::get(EltTy, Mask.size()));
1305
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001306 if (Constant *C = dyn_cast<Constant>(V))
Nick Lewyckya2b77202013-05-31 00:59:42 +00001307 return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001308 Mask);
Nick Lewyckya2b77202013-05-31 00:59:42 +00001309
1310 Instruction *I = cast<Instruction>(V);
1311 switch (I->getOpcode()) {
1312 case Instruction::Add:
1313 case Instruction::FAdd:
1314 case Instruction::Sub:
1315 case Instruction::FSub:
1316 case Instruction::Mul:
1317 case Instruction::FMul:
1318 case Instruction::UDiv:
1319 case Instruction::SDiv:
1320 case Instruction::FDiv:
1321 case Instruction::URem:
1322 case Instruction::SRem:
1323 case Instruction::FRem:
1324 case Instruction::Shl:
1325 case Instruction::LShr:
1326 case Instruction::AShr:
1327 case Instruction::And:
1328 case Instruction::Or:
1329 case Instruction::Xor:
1330 case Instruction::ICmp:
1331 case Instruction::FCmp:
1332 case Instruction::Trunc:
1333 case Instruction::ZExt:
1334 case Instruction::SExt:
1335 case Instruction::FPToUI:
1336 case Instruction::FPToSI:
1337 case Instruction::UIToFP:
1338 case Instruction::SIToFP:
1339 case Instruction::FPTrunc:
1340 case Instruction::FPExt:
1341 case Instruction::Select:
1342 case Instruction::GetElementPtr: {
1343 SmallVector<Value*, 8> NewOps;
Christopher Tetreault155740c2020-04-08 10:42:22 -07001344 bool NeedsRebuild =
1345 (Mask.size() != cast<VectorType>(I->getType())->getNumElements());
Nick Lewyckya2b77202013-05-31 00:59:42 +00001346 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
Mikael Holmen150a7ec2019-04-01 14:10:10 +00001347 Value *V;
1348 // Recursively call evaluateInDifferentElementOrder on vector arguments
1349 // as well. E.g. GetElementPtr may have scalar operands even if the
1350 // return value is a vector, so we need to examine the operand type.
1351 if (I->getOperand(i)->getType()->isVectorTy())
1352 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
1353 else
1354 V = I->getOperand(i);
Nick Lewyckya2b77202013-05-31 00:59:42 +00001355 NewOps.push_back(V);
1356 NeedsRebuild |= (V != I->getOperand(i));
1357 }
1358 if (NeedsRebuild) {
Sanjay Patel431e1142015-11-17 17:24:08 +00001359 return buildNew(I, NewOps);
Nick Lewyckya2b77202013-05-31 00:59:42 +00001360 }
1361 return I;
1362 }
1363 case Instruction::InsertElement: {
1364 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
Nick Lewyckya2b77202013-05-31 00:59:42 +00001365
1366 // The insertelement was inserting at Element. Figure out which element
1367 // that becomes after shuffling. The answer is guaranteed to be unique
1368 // by CanEvaluateShuffled.
Nick Lewycky3f715e22013-06-01 20:51:31 +00001369 bool Found = false;
Nick Lewyckya2b77202013-05-31 00:59:42 +00001370 int Index = 0;
Nick Lewycky3f715e22013-06-01 20:51:31 +00001371 for (int e = Mask.size(); Index != e; ++Index) {
1372 if (Mask[Index] == Element) {
1373 Found = true;
Nick Lewyckya2b77202013-05-31 00:59:42 +00001374 break;
Nick Lewycky3f715e22013-06-01 20:51:31 +00001375 }
1376 }
Nick Lewyckya2b77202013-05-31 00:59:42 +00001377
Hao Liu26abebb2014-01-08 03:06:15 +00001378 // If element is not in Mask, no need to handle the operand 1 (element to
1379 // be inserted). Just evaluate values in operand 0 according to Mask.
Nick Lewycky3f715e22013-06-01 20:51:31 +00001380 if (!Found)
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001381 return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
Joey Goulya3250f22013-07-12 23:08:06 +00001382
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001383 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
Nick Lewyckya2b77202013-05-31 00:59:42 +00001384 return InsertElementInst::Create(V, I->getOperand(1),
Sanjay Patel54d31ef2018-09-29 15:05:24 +00001385 ConstantInt::get(I32Ty, Index), "", I);
Nick Lewyckya2b77202013-05-31 00:59:42 +00001386 }
1387 }
1388 llvm_unreachable("failed to reorder elements of vector instruction!");
1389}
Chris Lattnerec97a902010-01-05 05:36:20 +00001390
JF Bastiend52c9902015-02-25 22:30:51 +00001391// Returns true if the shuffle is extracting a contiguous range of values from
1392// LHS, for example:
1393// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1394// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
1395// Shuffles to: |EE|FF|GG|HH|
1396// +--+--+--+--+
1397static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001398 ArrayRef<int> Mask) {
Christopher Tetreault155740c2020-04-08 10:42:22 -07001399 unsigned LHSElems =
1400 cast<VectorType>(SVI.getOperand(0)->getType())->getNumElements();
JF Bastiend52c9902015-02-25 22:30:51 +00001401 unsigned MaskElems = Mask.size();
1402 unsigned BegIdx = Mask.front();
1403 unsigned EndIdx = Mask.back();
1404 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
1405 return false;
1406 for (unsigned I = 0; I != MaskElems; ++I)
1407 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
1408 return false;
1409 return true;
1410}
1411
Sanjay Patelb999d742018-07-02 17:42:29 +00001412/// These are the ingredients in an alternate form binary operator as described
1413/// below.
1414struct BinopElts {
1415 BinaryOperator::BinaryOps Opcode;
1416 Value *Op0;
1417 Value *Op1;
1418 BinopElts(BinaryOperator::BinaryOps Opc = (BinaryOperator::BinaryOps)0,
1419 Value *V0 = nullptr, Value *V1 = nullptr) :
1420 Opcode(Opc), Op0(V0), Op1(V1) {}
1421 operator bool() const { return Opcode != 0; }
1422};
1423
1424/// Binops may be transformed into binops with different opcodes and operands.
1425/// Reverse the usual canonicalization to enable folds with the non-canonical
1426/// form of the binop. If a transform is possible, return the elements of the
1427/// new binop. If not, return invalid elements.
1428static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL) {
1429 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
1430 Type *Ty = BO->getType();
1431 switch (BO->getOpcode()) {
1432 case Instruction::Shl: {
1433 // shl X, C --> mul X, (1 << C)
1434 Constant *C;
1435 if (match(BO1, m_Constant(C))) {
1436 Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
1437 return { Instruction::Mul, BO0, ShlOne };
1438 }
1439 break;
1440 }
1441 case Instruction::Or: {
1442 // or X, C --> add X, C (when X and C have no common bits set)
1443 const APInt *C;
1444 if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
1445 return { Instruction::Add, BO0, BO1 };
1446 break;
1447 }
1448 default:
1449 break;
1450 }
1451 return {};
1452}
1453
Sanjay Patel3074b9e2018-07-03 13:44:22 +00001454static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
1455 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
1456
1457 // Are we shuffling together some value and that same value after it has been
1458 // modified by a binop with a constant?
1459 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1460 Constant *C;
1461 bool Op0IsBinop;
1462 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
1463 Op0IsBinop = true;
1464 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
1465 Op0IsBinop = false;
1466 else
1467 return nullptr;
1468
Sanjay Patel3074b9e2018-07-03 13:44:22 +00001469 // The identity constant for a binop leaves a variable operand unchanged. For
1470 // a vector, this is a splat of something like 0, -1, or 1.
1471 // If there's no identity constant for this binop, we're done.
Sanjay Patel509a1e72018-07-10 15:12:31 +00001472 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
Sanjay Patel3074b9e2018-07-03 13:44:22 +00001473 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
Sanjay Patel509a1e72018-07-10 15:12:31 +00001474 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
Sanjay Patel3074b9e2018-07-03 13:44:22 +00001475 if (!IdC)
1476 return nullptr;
1477
1478 // Shuffle identity constants into the lanes that return the original value.
1479 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
1480 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
1481 // The existing binop constant vector remains in the same operand position.
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001482 ArrayRef<int> Mask = Shuf.getShuffleMask();
Sanjay Patel3074b9e2018-07-03 13:44:22 +00001483 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
1484 ConstantExpr::getShuffleVector(IdC, C, Mask);
1485
Sanjay Patel509a1e72018-07-10 15:12:31 +00001486 bool MightCreatePoisonOrUB =
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001487 is_contained(Mask, UndefMaskElem) &&
Sanjay Patel509a1e72018-07-10 15:12:31 +00001488 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
1489 if (MightCreatePoisonOrUB)
1490 NewC = getSafeVectorConstantForBinop(BOpcode, NewC, true);
1491
Sanjay Patel3074b9e2018-07-03 13:44:22 +00001492 // shuf (bop X, C), X, M --> bop X, C'
1493 // shuf X, (bop X, C), M --> bop X, C'
Sanjay Patel509a1e72018-07-10 15:12:31 +00001494 Value *X = Op0IsBinop ? Op1 : Op0;
Sanjay Patel3074b9e2018-07-03 13:44:22 +00001495 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
1496 NewBO->copyIRFlags(BO);
Sanjay Patel33331062018-07-10 14:27:55 +00001497
1498 // An undef shuffle mask element may propagate as an undef constant element in
1499 // the new binop. That would produce poison where the original code might not.
Sanjay Patel509a1e72018-07-10 15:12:31 +00001500 // If we already made a safe constant, then there's no danger.
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001501 if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
Sanjay Patel33331062018-07-10 14:27:55 +00001502 NewBO->dropPoisonGeneratingFlags();
Sanjay Patel3074b9e2018-07-03 13:44:22 +00001503 return NewBO;
1504}
1505
Sanjay Patel0b591032019-07-08 16:26:48 +00001506/// If we have an insert of a scalar to a non-zero element of an undefined
1507/// vector and then shuffle that value, that's the same as inserting to the zero
1508/// element and shuffling. Splatting from the zero element is recognized as the
1509/// canonical form of splat.
1510static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
1511 InstCombiner::BuilderTy &Builder) {
1512 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001513 ArrayRef<int> Mask = Shuf.getShuffleMask();
Sanjay Patel0b591032019-07-08 16:26:48 +00001514 Value *X;
1515 uint64_t IndexC;
1516
1517 // Match a shuffle that is a splat to a non-zero element.
1518 if (!match(Op0, m_OneUse(m_InsertElement(m_Undef(), m_Value(X),
1519 m_ConstantInt(IndexC)))) ||
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001520 !match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0)
Sanjay Patel0b591032019-07-08 16:26:48 +00001521 return nullptr;
1522
1523 // Insert into element 0 of an undef vector.
1524 UndefValue *UndefVec = UndefValue::get(Shuf.getType());
1525 Constant *Zero = Builder.getInt32(0);
1526 Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
1527
1528 // Splat from element 0. Any mask element that is undefined remains undefined.
1529 // For example:
1530 // shuf (inselt undef, X, 2), undef, <2,2,undef>
1531 // --> shuf (inselt undef, X, 0), undef, <0,0,undef>
Christopher Tetreault155740c2020-04-08 10:42:22 -07001532 unsigned NumMaskElts = Shuf.getType()->getNumElements();
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001533 SmallVector<int, 16> NewMask(NumMaskElts, 0);
Sanjay Patel0b591032019-07-08 16:26:48 +00001534 for (unsigned i = 0; i != NumMaskElts; ++i)
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001535 if (Mask[i] == UndefMaskElem)
1536 NewMask[i] = Mask[i];
Sanjay Patel0b591032019-07-08 16:26:48 +00001537
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001538 return new ShuffleVectorInst(NewIns, UndefVec, NewMask);
Sanjay Patel0b591032019-07-08 16:26:48 +00001539}
1540
Sanjay Patelb999d742018-07-02 17:42:29 +00001541/// Try to fold shuffles that are the equivalent of a vector select.
Sanjay Patelda667532018-06-29 13:44:06 +00001542static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf,
Sanjay Patelb999d742018-07-02 17:42:29 +00001543 InstCombiner::BuilderTy &Builder,
1544 const DataLayout &DL) {
Sanjay Patela76b7002018-06-21 20:15:09 +00001545 if (!Shuf.isSelect())
1546 return nullptr;
1547
Sanjay Patele85d2e42019-11-25 11:55:57 -05001548 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
1549 // Commuting undef to operand 0 conflicts with another canonicalization.
Christopher Tetreault155740c2020-04-08 10:42:22 -07001550 unsigned NumElts = Shuf.getType()->getNumElements();
Sanjay Patele85d2e42019-11-25 11:55:57 -05001551 if (!isa<UndefValue>(Shuf.getOperand(1)) &&
1552 Shuf.getMaskValue(0) >= (int)NumElts) {
Sanjay Patelb33938d2019-04-08 13:28:29 +00001553 // TODO: Can we assert that both operands of a shuffle-select are not undef
1554 // (otherwise, it would have been folded by instsimplify?
Sanjay Patelb276dd12019-03-31 15:01:30 +00001555 Shuf.commute();
1556 return &Shuf;
1557 }
1558
Sanjay Patel3074b9e2018-07-03 13:44:22 +00001559 if (Instruction *I = foldSelectShuffleWith1Binop(Shuf))
1560 return I;
1561
Sanjay Patela76b7002018-06-21 20:15:09 +00001562 BinaryOperator *B0, *B1;
1563 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
1564 !match(Shuf.getOperand(1), m_BinOp(B1)))
1565 return nullptr;
1566
Sanjay Patelda667532018-06-29 13:44:06 +00001567 Value *X, *Y;
Sanjay Patela76b7002018-06-21 20:15:09 +00001568 Constant *C0, *C1;
Sanjay Patela52963b2018-06-22 12:46:16 +00001569 bool ConstantsAreOp1;
1570 if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) &&
Sanjay Patelda667532018-06-29 13:44:06 +00001571 match(B1, m_BinOp(m_Value(Y), m_Constant(C1))))
Sanjay Patela52963b2018-06-22 12:46:16 +00001572 ConstantsAreOp1 = true;
1573 else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
Sanjay Patelda667532018-06-29 13:44:06 +00001574 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
Sanjay Patela52963b2018-06-22 12:46:16 +00001575 ConstantsAreOp1 = false;
1576 else
Sanjay Patela76b7002018-06-21 20:15:09 +00001577 return nullptr;
1578
Sanjay Patel57bda362018-06-28 17:48:04 +00001579 // We need matching binops to fold the lanes together.
1580 BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
1581 BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
1582 bool DropNSW = false;
1583 if (ConstantsAreOp1 && Opc0 != Opc1) {
Sanjay Patel57bda362018-06-28 17:48:04 +00001584 // TODO: We drop "nsw" if shift is converted into multiply because it may
1585 // not be correct when the shift amount is BitWidth - 1. We could examine
1586 // each vector element to determine if it is safe to keep that flag.
Sanjay Patelb999d742018-07-02 17:42:29 +00001587 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
Sanjay Patel57bda362018-06-28 17:48:04 +00001588 DropNSW = true;
Sanjay Patelb999d742018-07-02 17:42:29 +00001589 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
1590 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
1591 Opc0 = AltB0.Opcode;
1592 C0 = cast<Constant>(AltB0.Op1);
1593 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
1594 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
1595 Opc1 = AltB1.Opcode;
1596 C1 = cast<Constant>(AltB1.Op1);
Sanjay Patel57bda362018-06-28 17:48:04 +00001597 }
1598 }
1599
1600 if (Opc0 != Opc1)
Sanjay Patel4784e152018-06-21 23:56:59 +00001601 return nullptr;
1602
Sanjay Patel57bda362018-06-28 17:48:04 +00001603 // The opcodes must be the same. Use a new name to make that clear.
1604 BinaryOperator::BinaryOps BOpc = Opc0;
1605
Sanjay Patel06ea4202018-07-10 13:33:26 +00001606 // Select the constant elements needed for the single binop.
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001607 ArrayRef<int> Mask = Shuf.getShuffleMask();
Sanjay Patel06ea4202018-07-10 13:33:26 +00001608 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
1609
Sanjay Patel5bd36642018-07-09 13:21:46 +00001610 // We are moving a binop after a shuffle. When a shuffle has an undefined
1611 // mask element, the result is undefined, but it is not poison or undefined
1612 // behavior. That is not necessarily true for div/rem/shift.
Sanjay Patel5bd36642018-07-09 13:21:46 +00001613 bool MightCreatePoisonOrUB =
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001614 is_contained(Mask, UndefMaskElem) &&
Sanjay Patel5bd36642018-07-09 13:21:46 +00001615 (Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc));
Sanjay Patel06ea4202018-07-10 13:33:26 +00001616 if (MightCreatePoisonOrUB)
1617 NewC = getSafeVectorConstantForBinop(BOpc, NewC, ConstantsAreOp1);
Sanjay Patel5bd36642018-07-09 13:21:46 +00001618
Sanjay Patelda667532018-06-29 13:44:06 +00001619 Value *V;
1620 if (X == Y) {
1621 // Remove a binop and the shuffle by rearranging the constant:
1622 // shuffle (op V, C0), (op V, C1), M --> op V, C'
1623 // shuffle (op C0, V), (op C1, V), M --> op C', V
1624 V = X;
Sanjay Patel5bd36642018-07-09 13:21:46 +00001625 } else {
Sanjay Patelda667532018-06-29 13:44:06 +00001626 // If there are 2 different variable operands, we must create a new shuffle
1627 // (select) first, so check uses to ensure that we don't end up with more
1628 // instructions than we started with.
Sanjay Patel5bd36642018-07-09 13:21:46 +00001629 if (!B0->hasOneUse() && !B1->hasOneUse())
1630 return nullptr;
1631
Sanjay Patel06ea4202018-07-10 13:33:26 +00001632 // If we use the original shuffle mask and op1 is *variable*, we would be
1633 // putting an undef into operand 1 of div/rem/shift. This is either UB or
1634 // poison. We do not have to guard against UB when *constants* are op1
1635 // because safe constants guarantee that we do not overflow sdiv/srem (and
1636 // there's no danger for other opcodes).
1637 // TODO: To allow this case, create a new shuffle mask with no undefs.
1638 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
Sanjay Patel5bd36642018-07-09 13:21:46 +00001639 return nullptr;
1640
Sanjay Patelda667532018-06-29 13:44:06 +00001641 // Note: In general, we do not create new shuffles in InstCombine because we
1642 // do not know if a target can lower an arbitrary shuffle optimally. In this
1643 // case, the shuffle uses the existing mask, so there is no additional risk.
Sanjay Patelda667532018-06-29 13:44:06 +00001644
1645 // Select the variable vectors first, then perform the binop:
1646 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
1647 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
Sanjay Patel5bd36642018-07-09 13:21:46 +00001648 V = Builder.CreateShuffleVector(X, Y, Mask);
Sanjay Patelda667532018-06-29 13:44:06 +00001649 }
1650
Sanjay Patelda667532018-06-29 13:44:06 +00001651 Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) :
1652 BinaryOperator::Create(BOpc, NewC, V);
Sanjay Patela76b7002018-06-21 20:15:09 +00001653
Sanjay Patel5bd36642018-07-09 13:21:46 +00001654 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
1655 // 1. If we changed an opcode, poison conditions might have changed.
1656 // 2. If the shuffle had undef mask elements, the new binop might have undefs
Sanjay Patelc8d9d812018-07-10 16:09:49 +00001657 // where the original code did not. But if we already made a safe constant,
1658 // then there's no danger.
Sanjay Patela76b7002018-06-21 20:15:09 +00001659 NewBO->copyIRFlags(B0);
1660 NewBO->andIRFlags(B1);
Sanjay Patel57bda362018-06-28 17:48:04 +00001661 if (DropNSW)
1662 NewBO->setHasNoSignedWrap(false);
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001663 if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
Sanjay Patel5bd36642018-07-09 13:21:46 +00001664 NewBO->dropPoisonGeneratingFlags();
Sanjay Patela76b7002018-06-21 20:15:09 +00001665 return NewBO;
1666}
1667
Sanjay Patel538a8f02020-04-05 09:46:22 -04001668/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
1669/// Example (little endian):
1670/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
1671static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
1672 bool IsBigEndian) {
1673 // This must be a bitcasted shuffle of 1 vector integer operand.
1674 Type *DestType = Shuf.getType();
1675 Value *X;
1676 if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
1677 !match(Shuf.getOperand(1), m_Undef()) || !DestType->isIntOrIntVectorTy())
1678 return nullptr;
1679
1680 // The source type must have the same number of elements as the shuffle,
1681 // and the source element type must be larger than the shuffle element type.
1682 Type *SrcType = X->getType();
1683 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
Christopher Tetreault155740c2020-04-08 10:42:22 -07001684 cast<VectorType>(SrcType)->getNumElements() !=
1685 cast<VectorType>(DestType)->getNumElements() ||
Sanjay Patel538a8f02020-04-05 09:46:22 -04001686 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
1687 return nullptr;
1688
1689 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
1690 "Expected a shuffle that decreases length");
1691
1692 // Last, check that the mask chooses the correct low bits for each narrow
1693 // element in the result.
1694 uint64_t TruncRatio =
1695 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
1696 ArrayRef<int> Mask = Shuf.getShuffleMask();
1697 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
1698 if (Mask[i] == UndefMaskElem)
1699 continue;
1700 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
1701 assert(LSBIndex <= std::numeric_limits<int32_t>::max() &&
1702 "Overflowed 32-bits");
1703 if (Mask[i] != (int)LSBIndex)
1704 return nullptr;
1705 }
1706
1707 return new TruncInst(X, DestType);
1708}
1709
Sanjay Patelc1416b62018-09-07 21:03:34 +00001710/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
1711/// narrowing (concatenating with undef and extracting back to the original
1712/// length). This allows replacing the wide select with a narrow select.
Sanjay Patel88194df2018-10-09 15:29:26 +00001713static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
1714 InstCombiner::BuilderTy &Builder) {
Sanjay Patelc1416b62018-09-07 21:03:34 +00001715 // This must be a narrowing identity shuffle. It extracts the 1st N elements
1716 // of the 1st vector operand of a shuffle.
1717 if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
1718 return nullptr;
1719
1720 // The vector being shuffled must be a vector select that we can eliminate.
1721 // TODO: The one-use requirement could be eased if X and/or Y are constants.
1722 Value *Cond, *X, *Y;
1723 if (!match(Shuf.getOperand(0),
1724 m_OneUse(m_Select(m_Value(Cond), m_Value(X), m_Value(Y)))))
1725 return nullptr;
1726
1727 // We need a narrow condition value. It must be extended with undef elements
1728 // and have the same number of elements as this shuffle.
Christopher Tetreault155740c2020-04-08 10:42:22 -07001729 unsigned NarrowNumElts = Shuf.getType()->getNumElements();
Sanjay Patelc1416b62018-09-07 21:03:34 +00001730 Value *NarrowCond;
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001731 if (!match(Cond, m_OneUse(m_ShuffleVector(m_Value(NarrowCond), m_Undef()))) ||
Christopher Tetreault155740c2020-04-08 10:42:22 -07001732 cast<VectorType>(NarrowCond->getType())->getNumElements() !=
1733 NarrowNumElts ||
Sanjay Patelc1416b62018-09-07 21:03:34 +00001734 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
1735 return nullptr;
1736
1737 // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
1738 // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
1739 Value *Undef = UndefValue::get(X->getType());
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001740 Value *NarrowX = Builder.CreateShuffleVector(X, Undef, Shuf.getShuffleMask());
1741 Value *NarrowY = Builder.CreateShuffleVector(Y, Undef, Shuf.getShuffleMask());
Sanjay Patelc1416b62018-09-07 21:03:34 +00001742 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
1743}
1744
Sanjay Patel71811462018-10-14 15:25:06 +00001745/// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
1746static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
1747 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1748 if (!Shuf.isIdentityWithExtract() || !isa<UndefValue>(Op1))
1749 return nullptr;
1750
1751 Value *X, *Y;
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001752 ArrayRef<int> Mask;
1753 if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Mask(Mask))))
Sanjay Patel71811462018-10-14 15:25:06 +00001754 return nullptr;
1755
Sanjay Patelcddb1e52019-02-05 22:58:45 +00001756 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
1757 // then combining may result in worse codegen.
1758 if (!Op0->hasOneUse())
1759 return nullptr;
1760
Sanjay Patel71811462018-10-14 15:25:06 +00001761 // We are extracting a subvector from a shuffle. Remove excess elements from
1762 // the 1st shuffle mask to eliminate the extract.
1763 //
1764 // This transform is conservatively limited to identity extracts because we do
1765 // not allow arbitrary shuffle mask creation as a target-independent transform
1766 // (because we can't guarantee that will lower efficiently).
1767 //
1768 // If the extracting shuffle has an undef mask element, it transfers to the
1769 // new shuffle mask. Otherwise, copy the original mask element. Example:
1770 // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
1771 // shuf X, Y, <C0, undef, C2, undef>
Christopher Tetreault155740c2020-04-08 10:42:22 -07001772 unsigned NumElts = Shuf.getType()->getNumElements();
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001773 SmallVector<int, 16> NewMask(NumElts);
1774 assert(NumElts < Mask.size() &&
Sanjay Patel71811462018-10-14 15:25:06 +00001775 "Identity with extract must have less elements than its inputs");
1776
1777 for (unsigned i = 0; i != NumElts; ++i) {
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001778 int ExtractMaskElt = Shuf.getMaskValue(i);
1779 int MaskElt = Mask[i];
1780 NewMask[i] = ExtractMaskElt == UndefMaskElem ? ExtractMaskElt : MaskElt;
Sanjay Patel71811462018-10-14 15:25:06 +00001781 }
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001782 return new ShuffleVectorInst(X, Y, NewMask);
Sanjay Patel71811462018-10-14 15:25:06 +00001783}
1784
Sanjay Patel396d18a2019-12-10 10:10:05 -05001785/// Try to replace a shuffle with an insertelement or try to replace a shuffle
1786/// operand with the operand of an insertelement.
Nikita Popov5a8819b2020-02-03 21:17:36 +01001787static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
1788 InstCombiner &IC) {
Sanjay Patelb12e4102018-10-30 15:26:39 +00001789 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001790 SmallVector<int, 16> Mask;
1791 Shuf.getShuffleMask(Mask);
Sanjay Patelb12e4102018-10-30 15:26:39 +00001792
1793 // The shuffle must not change vector sizes.
1794 // TODO: This restriction could be removed if the insert has only one use
1795 // (because the transform would require a new length-changing shuffle).
1796 int NumElts = Mask.size();
Christopher Tetreault155740c2020-04-08 10:42:22 -07001797 if (NumElts != (int)(cast<VectorType>(V0->getType())->getNumElements()))
Sanjay Patelb12e4102018-10-30 15:26:39 +00001798 return nullptr;
1799
Sanjay Patel396d18a2019-12-10 10:10:05 -05001800 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
1801 // not be able to handle it there if the insertelement has >1 use.
1802 // If the shuffle has an insertelement operand but does not choose the
1803 // inserted scalar element from that value, then we can replace that shuffle
1804 // operand with the source vector of the insertelement.
1805 Value *X;
1806 uint64_t IdxC;
1807 if (match(V0, m_InsertElement(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
1808 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
Nikita Popov5a8819b2020-02-03 21:17:36 +01001809 if (none_of(Mask, [IdxC](int MaskElt) { return MaskElt == (int)IdxC; }))
1810 return IC.replaceOperand(Shuf, 0, X);
Sanjay Patel396d18a2019-12-10 10:10:05 -05001811 }
1812 if (match(V1, m_InsertElement(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
1813 // Offset the index constant by the vector width because we are checking for
1814 // accesses to the 2nd vector input of the shuffle.
1815 IdxC += NumElts;
1816 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
Nikita Popov5a8819b2020-02-03 21:17:36 +01001817 if (none_of(Mask, [IdxC](int MaskElt) { return MaskElt == (int)IdxC; }))
1818 return IC.replaceOperand(Shuf, 1, X);
Sanjay Patel396d18a2019-12-10 10:10:05 -05001819 }
1820
Sanjay Patelb12e4102018-10-30 15:26:39 +00001821 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
1822 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
1823 // We need an insertelement with a constant index.
1824 if (!match(V0, m_InsertElement(m_Value(), m_Value(Scalar),
1825 m_ConstantInt(IndexC))))
1826 return false;
1827
1828 // Test the shuffle mask to see if it splices the inserted scalar into the
1829 // operand 1 vector of the shuffle.
1830 int NewInsIndex = -1;
1831 for (int i = 0; i != NumElts; ++i) {
1832 // Ignore undef mask elements.
1833 if (Mask[i] == -1)
1834 continue;
1835
1836 // The shuffle takes elements of operand 1 without lane changes.
1837 if (Mask[i] == NumElts + i)
1838 continue;
1839
1840 // The shuffle must choose the inserted scalar exactly once.
1841 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
1842 return false;
1843
1844 // The shuffle is placing the inserted scalar into element i.
1845 NewInsIndex = i;
1846 }
1847
1848 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
1849
1850 // Index is updated to the potentially translated insertion lane.
1851 IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
1852 return true;
1853 };
1854
1855 // If the shuffle is unnecessary, insert the scalar operand directly into
1856 // operand 1 of the shuffle. Example:
1857 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
1858 Value *Scalar;
1859 ConstantInt *IndexC;
1860 if (isShufflingScalarIntoOp1(Scalar, IndexC))
1861 return InsertElementInst::Create(V1, Scalar, IndexC);
1862
1863 // Try again after commuting shuffle. Example:
1864 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
1865 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
1866 std::swap(V0, V1);
1867 ShuffleVectorInst::commuteShuffleMask(Mask, NumElts);
1868 if (isShufflingScalarIntoOp1(Scalar, IndexC))
1869 return InsertElementInst::Create(V1, Scalar, IndexC);
1870
1871 return nullptr;
1872}
1873
Sanjay Patel6a554182019-05-22 00:32:25 +00001874static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
1875 // Match the operands as identity with padding (also known as concatenation
1876 // with undef) shuffles of the same source type. The backend is expected to
1877 // recreate these concatenations from a shuffle of narrow operands.
1878 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
1879 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
1880 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
1881 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
1882 return nullptr;
1883
1884 // We limit this transform to power-of-2 types because we expect that the
1885 // backend can convert the simplified IR patterns to identical nodes as the
1886 // original IR.
Sanjay Patel3249be12019-05-23 18:46:03 +00001887 // TODO: If we can verify the same behavior for arbitrary types, the
1888 // power-of-2 checks can be removed.
Sanjay Patel6a554182019-05-22 00:32:25 +00001889 Value *X = Shuffle0->getOperand(0);
1890 Value *Y = Shuffle1->getOperand(0);
1891 if (X->getType() != Y->getType() ||
Christopher Tetreault155740c2020-04-08 10:42:22 -07001892 !isPowerOf2_32(Shuf.getType()->getNumElements()) ||
1893 !isPowerOf2_32(Shuffle0->getType()->getNumElements()) ||
1894 !isPowerOf2_32(cast<VectorType>(X->getType())->getNumElements()) ||
Sanjay Patel6a554182019-05-22 00:32:25 +00001895 isa<UndefValue>(X) || isa<UndefValue>(Y))
1896 return nullptr;
1897 assert(isa<UndefValue>(Shuffle0->getOperand(1)) &&
1898 isa<UndefValue>(Shuffle1->getOperand(1)) &&
1899 "Unexpected operand for identity shuffle");
1900
1901 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
1902 // operands directly by adjusting the shuffle mask to account for the narrower
1903 // types:
1904 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
Christopher Tetreault155740c2020-04-08 10:42:22 -07001905 int NarrowElts = cast<VectorType>(X->getType())->getNumElements();
1906 int WideElts = Shuffle0->getType()->getNumElements();
Sanjay Patel6a554182019-05-22 00:32:25 +00001907 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
1908
1909 Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext());
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001910 ArrayRef<int> Mask = Shuf.getShuffleMask();
Sanjay Patel6a554182019-05-22 00:32:25 +00001911 SmallVector<Constant *, 16> NewMask(Mask.size(), UndefValue::get(I32Ty));
1912 for (int i = 0, e = Mask.size(); i != e; ++i) {
1913 if (Mask[i] == -1)
1914 continue;
Sanjay Patel3249be12019-05-23 18:46:03 +00001915
1916 // If this shuffle is choosing an undef element from 1 of the sources, that
1917 // element is undef.
1918 if (Mask[i] < WideElts) {
1919 if (Shuffle0->getMaskValue(Mask[i]) == -1)
1920 continue;
1921 } else {
1922 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
1923 continue;
1924 }
1925
1926 // If this shuffle is choosing from the 1st narrow op, the mask element is
1927 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
1928 // element is offset down to adjust for the narrow vector widths.
1929 if (Mask[i] < WideElts) {
1930 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
Sanjay Patel6a554182019-05-22 00:32:25 +00001931 NewMask[i] = ConstantInt::get(I32Ty, Mask[i]);
Sanjay Patel3249be12019-05-23 18:46:03 +00001932 } else {
1933 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
Sanjay Patel6a554182019-05-22 00:32:25 +00001934 NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts));
Sanjay Patel3249be12019-05-23 18:46:03 +00001935 }
Sanjay Patel6a554182019-05-22 00:32:25 +00001936 }
1937 return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask));
1938}
1939
Chris Lattnerec97a902010-01-05 05:36:20 +00001940Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
1941 Value *LHS = SVI.getOperand(0);
1942 Value *RHS = SVI.getOperand(1);
Sanjay Patelf4448062020-04-02 13:44:50 -04001943 SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
1944 if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
1945 SVI.getType(), ShufQuery))
Zvi Rackover82bf48d2017-04-04 04:47:57 +00001946 return replaceInstUsesWith(SVI, V);
1947
Sanjay Patel35827162019-11-25 13:30:45 -05001948 // shuffle x, x, mask --> shuffle x, undef, mask'
Christopher Tetreault155740c2020-04-08 10:42:22 -07001949 unsigned VWidth = SVI.getType()->getNumElements();
1950 unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements();
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001951 ArrayRef<int> Mask = SVI.getShuffleMask();
Sanjay Patel3f4d1b42019-03-29 16:49:38 +00001952 Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
Sanjay Patelf4448062020-04-02 13:44:50 -04001953
1954 // Peek through a bitcasted shuffle operand by scaling the mask. If the
1955 // simulated shuffle can simplify, then this shuffle is unnecessary:
1956 // shuf (bitcast X), undef, Mask --> bitcast X'
1957 // TODO: This could be extended to allow length-changing shuffles and/or casts
1958 // to narrower elements. The transform might also be obsoleted if we
1959 // allowed canonicalization of bitcasted shuffles.
1960 Value *X;
1961 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
1962 X->getType()->isVectorTy() && VWidth == LHSWidth &&
Christopher Tetreault155740c2020-04-08 10:42:22 -07001963 cast<VectorType>(X->getType())->getNumElements() >= VWidth) {
Sanjay Patelf4448062020-04-02 13:44:50 -04001964 // Create the scaled mask constant.
Christopher Tetreault155740c2020-04-08 10:42:22 -07001965 auto *XType = cast<VectorType>(X->getType());
1966 unsigned XNumElts = XType->getNumElements();
Sanjay Patelf4448062020-04-02 13:44:50 -04001967 assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast");
1968 unsigned ScaleFactor = XNumElts / VWidth;
1969 SmallVector<int, 16> ScaledMask;
Sanjay Patel1318ddb2020-04-11 10:05:49 -04001970 narrowShuffleMaskElts(ScaleFactor, Mask, ScaledMask);
Sanjay Patelf4448062020-04-02 13:44:50 -04001971
1972 // If the shuffled source vector simplifies, cast that value to this
1973 // shuffle's type.
1974 if (auto *V = SimplifyShuffleVectorInst(X, UndefValue::get(XType),
1975 ScaledMask, XType, ShufQuery))
1976 return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
1977 }
1978
Sanjay Patel35827162019-11-25 13:30:45 -05001979 if (LHS == RHS) {
Sanjay Patel847aabf2019-11-25 10:54:18 -05001980 assert(!isa<UndefValue>(RHS) && "Shuffle with 2 undef ops not simplified?");
Chris Lattnerec97a902010-01-05 05:36:20 +00001981 // Remap any references to RHS to use LHS.
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001982 SmallVector<int, 16> Elts;
Sanjay Patel20684092019-11-25 10:40:21 -05001983 for (unsigned i = 0; i != VWidth; ++i) {
Sanjay Patel35827162019-11-25 13:30:45 -05001984 // Propagate undef elements or force mask to LHS.
1985 if (Mask[i] < 0)
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001986 Elts.push_back(UndefMaskElem);
Sanjay Patelfc31b582019-11-25 11:11:12 -05001987 else
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001988 Elts.push_back(Mask[i] % LHSWidth);
Chris Lattnerec97a902010-01-05 05:36:20 +00001989 }
Eli Friedman1ee6ec22020-03-31 13:08:59 -07001990 return new ShuffleVectorInst(LHS, UndefValue::get(RHS->getType()), Elts);
Chris Lattnerec97a902010-01-05 05:36:20 +00001991 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +00001992
Sanjay Patel35827162019-11-25 13:30:45 -05001993 // shuffle undef, x, mask --> shuffle x, undef, mask'
1994 if (isa<UndefValue>(LHS)) {
1995 SVI.commute();
1996 return &SVI;
1997 }
1998
Sanjay Patel0b591032019-07-08 16:26:48 +00001999 if (Instruction *I = canonicalizeInsertSplat(SVI, Builder))
2000 return I;
2001
Sanjay Patel3f4d1b42019-03-29 16:49:38 +00002002 if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
2003 return I;
2004
Sanjay Patel538a8f02020-04-05 09:46:22 -04002005 if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
2006 return I;
2007
Sanjay Patel3f4d1b42019-03-29 16:49:38 +00002008 if (Instruction *I = narrowVectorSelect(SVI, Builder))
2009 return I;
2010
2011 APInt UndefElts(VWidth, 0);
2012 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
2013 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
2014 if (V != &SVI)
2015 return replaceInstUsesWith(SVI, V);
2016 return &SVI;
2017 }
2018
2019 if (Instruction *I = foldIdentityExtractShuffle(SVI))
2020 return I;
2021
Sanjay Patel6a554182019-05-22 00:32:25 +00002022 // These transforms have the potential to lose undef knowledge, so they are
Sanjay Patel3f4d1b42019-03-29 16:49:38 +00002023 // intentionally placed after SimplifyDemandedVectorElts().
Nikita Popov5a8819b2020-02-03 21:17:36 +01002024 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
Sanjay Patel3f4d1b42019-03-29 16:49:38 +00002025 return I;
Sanjay Patel6a554182019-05-22 00:32:25 +00002026 if (Instruction *I = foldIdentityPaddedShuffles(SVI))
2027 return I;
Sanjay Patel3f4d1b42019-03-29 16:49:38 +00002028
Sanjay Patel26c119a2018-09-30 13:50:42 +00002029 if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) {
Sanjay Patel54d31ef2018-09-29 15:05:24 +00002030 Value *V = evaluateInDifferentElementOrder(LHS, Mask);
Sanjay Patel4b198802016-02-01 22:23:39 +00002031 return replaceInstUsesWith(SVI, V);
Nick Lewyckya2b77202013-05-31 00:59:42 +00002032 }
2033
JF Bastiend52c9902015-02-25 22:30:51 +00002034 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2035 // a non-vector type. We can instead bitcast the original vector followed by
2036 // an extract of the desired element:
2037 //
2038 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2039 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2040 // %1 = bitcast <4 x i8> %sroa to i32
2041 // Becomes:
2042 // %bc = bitcast <16 x i8> %in to <4 x i32>
2043 // %ext = extractelement <4 x i32> %bc, i32 0
2044 //
2045 // If the shuffle is extracting a contiguous range of values from the input
2046 // vector then each use which is a bitcast of the extracted size can be
2047 // replaced. This will work if the vector types are compatible, and the begin
2048 // index is aligned to a value in the casted vector type. If the begin index
2049 // isn't aligned then we can shuffle the original vector (keeping the same
2050 // vector type) before extracting.
2051 //
2052 // This code will bail out if the target type is fundamentally incompatible
2053 // with vectors of the source type.
2054 //
2055 // Example of <16 x i8>, target type i32:
2056 // Index range [4,8): v-----------v Will work.
2057 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2058 // <16 x i8>: | | | | | | | | | | | | | | | | |
2059 // <4 x i32>: | | | | |
2060 // +-----------+-----------+-----------+-----------+
2061 // Index range [6,10): ^-----------^ Needs an extra shuffle.
2062 // Target type i40: ^--------------^ Won't work, bail.
Sanjay Patel3f4d1b42019-03-29 16:49:38 +00002063 bool MadeChange = false;
JF Bastiend52c9902015-02-25 22:30:51 +00002064 if (isShuffleExtractingFromLHS(SVI, Mask)) {
2065 Value *V = LHS;
2066 unsigned MaskElems = Mask.size();
JF Bastiend52c9902015-02-25 22:30:51 +00002067 VectorType *SrcTy = cast<VectorType>(V->getType());
2068 unsigned VecBitWidth = SrcTy->getBitWidth();
David Majnemer98cfe2b2015-04-03 20:18:40 +00002069 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
JF Bastiend52c9902015-02-25 22:30:51 +00002070 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
2071 unsigned SrcNumElems = SrcTy->getNumElements();
2072 SmallVector<BitCastInst *, 8> BCs;
2073 DenseMap<Type *, Value *> NewBCs;
2074 for (User *U : SVI.users())
2075 if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
2076 if (!BC->use_empty())
2077 // Only visit bitcasts that weren't previously handled.
2078 BCs.push_back(BC);
2079 for (BitCastInst *BC : BCs) {
Eugene Leviant958fcd72017-02-17 07:36:03 +00002080 unsigned BegIdx = Mask.front();
JF Bastiend52c9902015-02-25 22:30:51 +00002081 Type *TgtTy = BC->getDestTy();
David Majnemer98cfe2b2015-04-03 20:18:40 +00002082 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
JF Bastiend52c9902015-02-25 22:30:51 +00002083 if (!TgtElemBitWidth)
2084 continue;
2085 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2086 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2087 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2088 if (!VecBitWidthsEqual)
2089 continue;
2090 if (!VectorType::isValidElementType(TgtTy))
2091 continue;
2092 VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems);
2093 if (!BegIsAligned) {
2094 // Shuffle the input so [0,NumElements) contains the output, and
2095 // [NumElems,SrcNumElems) is undef.
2096 SmallVector<Constant *, 16> ShuffleMask(SrcNumElems,
2097 UndefValue::get(Int32Ty));
2098 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2099 ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx);
Craig Topperbb4069e2017-07-07 23:16:26 +00002100 V = Builder.CreateShuffleVector(V, UndefValue::get(V->getType()),
2101 ConstantVector::get(ShuffleMask),
2102 SVI.getName() + ".extract");
JF Bastiend52c9902015-02-25 22:30:51 +00002103 BegIdx = 0;
2104 }
2105 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2106 assert(SrcElemsPerTgtElem);
2107 BegIdx /= SrcElemsPerTgtElem;
2108 bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
2109 auto *NewBC =
2110 BCAlreadyExists
2111 ? NewBCs[CastSrcTy]
Craig Topperbb4069e2017-07-07 23:16:26 +00002112 : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
JF Bastiend52c9902015-02-25 22:30:51 +00002113 if (!BCAlreadyExists)
2114 NewBCs[CastSrcTy] = NewBC;
Craig Topperbb4069e2017-07-07 23:16:26 +00002115 auto *Ext = Builder.CreateExtractElement(
JF Bastiend52c9902015-02-25 22:30:51 +00002116 NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
2117 // The shufflevector isn't being replaced: the bitcast that used it
2118 // is. InstCombine will visit the newly-created instructions.
Sanjay Patel4b198802016-02-01 22:23:39 +00002119 replaceInstUsesWith(*BC, Ext);
JF Bastiend52c9902015-02-25 22:30:51 +00002120 MadeChange = true;
2121 }
2122 }
2123
Eric Christopher51edc7b2010-08-17 22:55:27 +00002124 // If the LHS is a shufflevector itself, see if we can combine it with this
Eli Friedmance818272011-10-21 19:06:29 +00002125 // one without producing an unusual shuffle.
2126 // Cases that might be simplified:
2127 // 1.
2128 // x1=shuffle(v1,v2,mask1)
2129 // x=shuffle(x1,undef,mask)
2130 // ==>
2131 // x=shuffle(v1,undef,newMask)
2132 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
2133 // 2.
2134 // x1=shuffle(v1,undef,mask1)
2135 // x=shuffle(x1,x2,mask)
2136 // where v1.size() == mask1.size()
2137 // ==>
2138 // x=shuffle(v1,x2,newMask)
2139 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
2140 // 3.
2141 // x2=shuffle(v2,undef,mask2)
2142 // x=shuffle(x1,x2,mask)
2143 // where v2.size() == mask2.size()
2144 // ==>
2145 // x=shuffle(x1,v2,newMask)
2146 // newMask[i] = (mask[i] < x1.size())
2147 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
2148 // 4.
2149 // x1=shuffle(v1,undef,mask1)
2150 // x2=shuffle(v2,undef,mask2)
2151 // x=shuffle(x1,x2,mask)
2152 // where v1.size() == v2.size()
2153 // ==>
2154 // x=shuffle(v1,v2,newMask)
2155 // newMask[i] = (mask[i] < x1.size())
2156 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
2157 //
2158 // Here we are really conservative:
Eric Christopher51edc7b2010-08-17 22:55:27 +00002159 // we are absolutely afraid of producing a shuffle mask not in the input
2160 // program, because the code gen may not be smart enough to turn a merged
2161 // shuffle into two specific shuffles: it may produce worse code. As such,
Jim Grosbachd11584a2013-05-01 00:25:27 +00002162 // we only merge two shuffles if the result is either a splat or one of the
2163 // input shuffle masks. In this case, merging the shuffles just removes
2164 // one instruction, which we know is safe. This is good for things like
Eli Friedmance818272011-10-21 19:06:29 +00002165 // turning: (splat(splat)) -> splat, or
2166 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
2167 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
2168 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
2169 if (LHSShuffle)
2170 if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
Craig Topperf40110f2014-04-25 05:29:35 +00002171 LHSShuffle = nullptr;
Eli Friedmance818272011-10-21 19:06:29 +00002172 if (RHSShuffle)
2173 if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
Craig Topperf40110f2014-04-25 05:29:35 +00002174 RHSShuffle = nullptr;
Eli Friedmance818272011-10-21 19:06:29 +00002175 if (!LHSShuffle && !RHSShuffle)
Craig Topperf40110f2014-04-25 05:29:35 +00002176 return MadeChange ? &SVI : nullptr;
Eli Friedmance818272011-10-21 19:06:29 +00002177
Craig Topperf40110f2014-04-25 05:29:35 +00002178 Value* LHSOp0 = nullptr;
2179 Value* LHSOp1 = nullptr;
2180 Value* RHSOp0 = nullptr;
Eli Friedmance818272011-10-21 19:06:29 +00002181 unsigned LHSOp0Width = 0;
2182 unsigned RHSOp0Width = 0;
2183 if (LHSShuffle) {
2184 LHSOp0 = LHSShuffle->getOperand(0);
2185 LHSOp1 = LHSShuffle->getOperand(1);
Christopher Tetreault155740c2020-04-08 10:42:22 -07002186 LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements();
Eli Friedmance818272011-10-21 19:06:29 +00002187 }
2188 if (RHSShuffle) {
2189 RHSOp0 = RHSShuffle->getOperand(0);
Christopher Tetreault155740c2020-04-08 10:42:22 -07002190 RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements();
Eli Friedmance818272011-10-21 19:06:29 +00002191 }
2192 Value* newLHS = LHS;
2193 Value* newRHS = RHS;
2194 if (LHSShuffle) {
2195 // case 1
Eric Christopher51edc7b2010-08-17 22:55:27 +00002196 if (isa<UndefValue>(RHS)) {
Eli Friedmance818272011-10-21 19:06:29 +00002197 newLHS = LHSOp0;
2198 newRHS = LHSOp1;
2199 }
2200 // case 2 or 4
2201 else if (LHSOp0Width == LHSWidth) {
2202 newLHS = LHSOp0;
2203 }
2204 }
2205 // case 3 or 4
2206 if (RHSShuffle && RHSOp0Width == LHSWidth) {
2207 newRHS = RHSOp0;
2208 }
2209 // case 4
2210 if (LHSOp0 == RHSOp0) {
2211 newLHS = LHSOp0;
Craig Topperf40110f2014-04-25 05:29:35 +00002212 newRHS = nullptr;
Eli Friedmance818272011-10-21 19:06:29 +00002213 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +00002214
Eli Friedmance818272011-10-21 19:06:29 +00002215 if (newLHS == LHS && newRHS == RHS)
Craig Topperf40110f2014-04-25 05:29:35 +00002216 return MadeChange ? &SVI : nullptr;
Bob Wilson8ecf98b2010-10-29 22:20:43 +00002217
Eli Friedman1ee6ec22020-03-31 13:08:59 -07002218 ArrayRef<int> LHSMask;
2219 ArrayRef<int> RHSMask;
Chris Lattner8326bd82012-01-26 00:42:34 +00002220 if (newLHS != LHS)
2221 LHSMask = LHSShuffle->getShuffleMask();
2222 if (RHSShuffle && newRHS != RHS)
2223 RHSMask = RHSShuffle->getShuffleMask();
2224
Eli Friedmance818272011-10-21 19:06:29 +00002225 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
2226 SmallVector<int, 16> newMask;
2227 bool isSplat = true;
2228 int SplatElt = -1;
2229 // Create a new mask for the new ShuffleVectorInst so that the new
2230 // ShuffleVectorInst is equivalent to the original one.
2231 for (unsigned i = 0; i < VWidth; ++i) {
2232 int eltMask;
Craig Topper45d9f4b2013-01-18 05:30:07 +00002233 if (Mask[i] < 0) {
Eli Friedmance818272011-10-21 19:06:29 +00002234 // This element is an undef value.
2235 eltMask = -1;
2236 } else if (Mask[i] < (int)LHSWidth) {
2237 // This element is from left hand side vector operand.
Craig Topper2ea22b02013-01-18 05:09:16 +00002238 //
Eli Friedmance818272011-10-21 19:06:29 +00002239 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
2240 // new mask value for the element.
2241 if (newLHS != LHS) {
2242 eltMask = LHSMask[Mask[i]];
2243 // If the value selected is an undef value, explicitly specify it
2244 // with a -1 mask value.
2245 if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
2246 eltMask = -1;
Craig Topper2ea22b02013-01-18 05:09:16 +00002247 } else
Eli Friedmance818272011-10-21 19:06:29 +00002248 eltMask = Mask[i];
2249 } else {
2250 // This element is from right hand side vector operand
2251 //
2252 // If the value selected is an undef value, explicitly specify it
2253 // with a -1 mask value. (case 1)
2254 if (isa<UndefValue>(RHS))
2255 eltMask = -1;
2256 // If RHS is going to be replaced (case 3 or 4), calculate the
2257 // new mask value for the element.
2258 else if (newRHS != RHS) {
2259 eltMask = RHSMask[Mask[i]-LHSWidth];
2260 // If the value selected is an undef value, explicitly specify it
2261 // with a -1 mask value.
2262 if (eltMask >= (int)RHSOp0Width) {
2263 assert(isa<UndefValue>(RHSShuffle->getOperand(1))
2264 && "should have been check above");
2265 eltMask = -1;
Nate Begeman2a0ca3e92010-08-13 00:17:53 +00002266 }
Craig Topper2ea22b02013-01-18 05:09:16 +00002267 } else
Eli Friedmance818272011-10-21 19:06:29 +00002268 eltMask = Mask[i]-LHSWidth;
2269
2270 // If LHS's width is changed, shift the mask value accordingly.
Eugene Zelenko7f0f9bc2017-10-24 21:24:53 +00002271 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
Michael Gottesman02a11412012-10-16 21:29:38 +00002272 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
2273 // If newRHS == newLHS, we want to remap any references from newRHS to
2274 // newLHS so that we can properly identify splats that may occur due to
Alp Tokercb402912014-01-24 17:20:08 +00002275 // obfuscation across the two vectors.
Craig Topperf40110f2014-04-25 05:29:35 +00002276 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
Eli Friedmance818272011-10-21 19:06:29 +00002277 eltMask += newLHSWidth;
Nate Begeman2a0ca3e92010-08-13 00:17:53 +00002278 }
Eli Friedmance818272011-10-21 19:06:29 +00002279
2280 // Check if this could still be a splat.
2281 if (eltMask >= 0) {
2282 if (SplatElt >= 0 && SplatElt != eltMask)
2283 isSplat = false;
2284 SplatElt = eltMask;
2285 }
2286
2287 newMask.push_back(eltMask);
2288 }
2289
2290 // If the result mask is equal to one of the original shuffle masks,
Jim Grosbachd11584a2013-05-01 00:25:27 +00002291 // or is a splat, do the replacement.
2292 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
Eli Friedmance818272011-10-21 19:06:29 +00002293 SmallVector<Constant*, 16> Elts;
Eli Friedmance818272011-10-21 19:06:29 +00002294 for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
2295 if (newMask[i] < 0) {
2296 Elts.push_back(UndefValue::get(Int32Ty));
2297 } else {
2298 Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
2299 }
2300 }
Craig Topperf40110f2014-04-25 05:29:35 +00002301 if (!newRHS)
Eli Friedmance818272011-10-21 19:06:29 +00002302 newRHS = UndefValue::get(newLHS->getType());
2303 return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
Nate Begeman2a0ca3e92010-08-13 00:17:53 +00002304 }
Bob Wilson8ecf98b2010-10-29 22:20:43 +00002305
Craig Topperf40110f2014-04-25 05:29:35 +00002306 return MadeChange ? &SVI : nullptr;
Chris Lattnerec97a902010-01-05 05:36:20 +00002307}