blob: d0ff4441371a6757207dbabd9b3a06780723fc5e [file] [log] [blame]
Sam Parker3828c6f2018-07-23 12:27:47 +00001//===----- ARMCodeGenPrepare.cpp ------------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// This pass inserts intrinsics to handle small types that would otherwise be
12/// promoted during legalization. Here we can manually promote types or insert
13/// intrinsics which can handle narrow types that aren't supported by the
14/// register classes.
15//
16//===----------------------------------------------------------------------===//
17
18#include "ARM.h"
19#include "ARMSubtarget.h"
20#include "ARMTargetMachine.h"
21#include "llvm/ADT/StringRef.h"
22#include "llvm/CodeGen/Passes.h"
23#include "llvm/CodeGen/TargetPassConfig.h"
24#include "llvm/IR/Attributes.h"
25#include "llvm/IR/BasicBlock.h"
26#include "llvm/IR/IRBuilder.h"
27#include "llvm/IR/Constants.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
30#include "llvm/IR/Instructions.h"
31#include "llvm/IR/IntrinsicInst.h"
32#include "llvm/IR/Intrinsics.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/Value.h"
35#include "llvm/IR/Verifier.h"
36#include "llvm/Pass.h"
37#include "llvm/Support/Casting.h"
38#include "llvm/Support/CommandLine.h"
39
40#define DEBUG_TYPE "arm-codegenprepare"
41
42using namespace llvm;
43
44static cl::opt<bool>
Reid Klecknerb32ff462018-07-31 23:09:42 +000045DisableCGP("arm-disable-cgp", cl::Hidden, cl::init(true),
Sam Parker3828c6f2018-07-23 12:27:47 +000046 cl::desc("Disable ARM specific CodeGenPrepare pass"));
47
48static cl::opt<bool>
49EnableDSP("arm-enable-scalar-dsp", cl::Hidden, cl::init(false),
50 cl::desc("Use DSP instructions for scalar operations"));
51
52static cl::opt<bool>
53EnableDSPWithImms("arm-enable-scalar-dsp-imms", cl::Hidden, cl::init(false),
54 cl::desc("Use DSP instructions for scalar operations\
55 with immediate operands"));
56
57namespace {
58
59class IRPromoter {
60 SmallPtrSet<Value*, 8> NewInsts;
61 SmallVector<Instruction*, 4> InstsToRemove;
62 Module *M = nullptr;
63 LLVMContext &Ctx;
64
65public:
66 IRPromoter(Module *M) : M(M), Ctx(M->getContext()) { }
67
68 void Cleanup() {
69 for (auto *I : InstsToRemove) {
70 LLVM_DEBUG(dbgs() << "ARM CGP: Removing " << *I << "\n");
71 I->dropAllReferences();
72 I->eraseFromParent();
73 }
74 InstsToRemove.clear();
75 NewInsts.clear();
76 }
77
78 void Mutate(Type *OrigTy,
79 SmallPtrSetImpl<Value*> &Visited,
80 SmallPtrSetImpl<Value*> &Leaves,
81 SmallPtrSetImpl<Instruction*> &Roots);
82};
83
84class ARMCodeGenPrepare : public FunctionPass {
85 const ARMSubtarget *ST = nullptr;
86 IRPromoter *Promoter = nullptr;
87 std::set<Value*> AllVisited;
Sam Parker3828c6f2018-07-23 12:27:47 +000088
Sam Parker3828c6f2018-07-23 12:27:47 +000089 bool isSupportedValue(Value *V);
90 bool isLegalToPromote(Value *V);
91 bool TryToPromote(Value *V);
92
93public:
94 static char ID;
Sam Parker8c4b9642018-08-10 13:57:13 +000095 static unsigned TypeSize;
96 Type *OrigTy = nullptr;
Sam Parker3828c6f2018-07-23 12:27:47 +000097
98 ARMCodeGenPrepare() : FunctionPass(ID) {}
99
Sam Parker3828c6f2018-07-23 12:27:47 +0000100 void getAnalysisUsage(AnalysisUsage &AU) const override {
101 AU.addRequired<TargetPassConfig>();
102 }
103
104 StringRef getPassName() const override { return "ARM IR optimizations"; }
105
106 bool doInitialization(Module &M) override;
107 bool runOnFunction(Function &F) override;
Matt Morehousea70685f2018-07-23 17:00:45 +0000108 bool doFinalization(Module &M) override;
Sam Parker3828c6f2018-07-23 12:27:47 +0000109};
110
111}
112
113/// Can the given value generate sign bits.
114static bool isSigned(Value *V) {
115 if (!isa<Instruction>(V))
116 return false;
117
118 unsigned Opc = cast<Instruction>(V)->getOpcode();
119 return Opc == Instruction::AShr || Opc == Instruction::SDiv ||
120 Opc == Instruction::SRem;
121}
122
123/// Some instructions can use 8- and 16-bit operands, and we don't need to
124/// promote anything larger. We disallow booleans to make life easier when
125/// dealing with icmps but allow any other integer that is <= 16 bits. Void
126/// types are accepted so we can handle switches.
127static bool isSupportedType(Value *V) {
Sam Parker8c4b9642018-08-10 13:57:13 +0000128 LLVM_DEBUG(dbgs() << "ARM CGP: isSupportedType: " << *V << "\n");
129 Type *Ty = V->getType();
130 if (Ty->isVoidTy())
Sam Parker3828c6f2018-07-23 12:27:47 +0000131 return true;
132
Sam Parker8c4b9642018-08-10 13:57:13 +0000133 if (auto *Ld = dyn_cast<LoadInst>(V))
134 Ty = cast<PointerType>(Ld->getPointerOperandType())->getElementType();
135
136 const IntegerType *IntTy = dyn_cast<IntegerType>(Ty);
137 if (!IntTy) {
138 LLVM_DEBUG(dbgs() << "ARM CGP: No, not an integer.\n");
Sam Parker3828c6f2018-07-23 12:27:47 +0000139 return false;
Sam Parker8c4b9642018-08-10 13:57:13 +0000140 }
Sam Parker3828c6f2018-07-23 12:27:47 +0000141
Sam Parker8c4b9642018-08-10 13:57:13 +0000142 return IntTy->getBitWidth() == ARMCodeGenPrepare::TypeSize;
143}
Sam Parker3828c6f2018-07-23 12:27:47 +0000144
Sam Parker8c4b9642018-08-10 13:57:13 +0000145/// Return true if the given value is a leaf in the use-def chain, producing
146/// a narrow (i8, i16) value. These values will be zext to start the promotion
147/// of the tree to i32. We guarantee that these won't populate the upper bits
148/// of the register. ZExt on the loads will be free, and the same for call
149/// return values because we only accept ones that guarantee a zeroext ret val.
150/// Many arguments will have the zeroext attribute too, so those would be free
151/// too.
152static bool isSource(Value *V) {
153 // TODO Allow truncs and zext to be sources.
154 if (isa<Argument>(V))
155 return true;
156 else if (isa<LoadInst>(V))
157 return true;
158 else if (auto *Call = dyn_cast<CallInst>(V))
159 return Call->hasRetAttr(Attribute::AttrKind::ZExt);
160 return false;
Sam Parker3828c6f2018-07-23 12:27:47 +0000161}
162
163/// Return true if V will require any promoted values to be truncated for the
Sam Parker8c4b9642018-08-10 13:57:13 +0000164/// the IR to remain valid. We can't mutate the value type of these
165/// instructions.
Sam Parker3828c6f2018-07-23 12:27:47 +0000166static bool isSink(Value *V) {
Sam Parker8c4b9642018-08-10 13:57:13 +0000167 // TODO The truncate also isn't actually necessary because we would already
168 // proved that the data value is kept within the range of the original data
169 // type.
Sam Parker3828c6f2018-07-23 12:27:47 +0000170 auto UsesNarrowValue = [](Value *V) {
Sam Parker8c4b9642018-08-10 13:57:13 +0000171 return V->getType()->getScalarSizeInBits() == ARMCodeGenPrepare::TypeSize;
Sam Parker3828c6f2018-07-23 12:27:47 +0000172 };
173
174 if (auto *Store = dyn_cast<StoreInst>(V))
175 return UsesNarrowValue(Store->getValueOperand());
176 if (auto *Return = dyn_cast<ReturnInst>(V))
177 return UsesNarrowValue(Return->getReturnValue());
Sam Parker8c4b9642018-08-10 13:57:13 +0000178 if (auto *Trunc = dyn_cast<TruncInst>(V))
179 return UsesNarrowValue(Trunc->getOperand(0));
Sam Parker3828c6f2018-07-23 12:27:47 +0000180
181 return isa<CallInst>(V);
182}
183
Sam Parker3828c6f2018-07-23 12:27:47 +0000184/// Return whether the instruction can be promoted within any modifications to
185/// it's operands or result.
186static bool isSafeOverflow(Instruction *I) {
Sam Parker8c4b9642018-08-10 13:57:13 +0000187 // FIXME Do we need NSW too?
Sam Parker3828c6f2018-07-23 12:27:47 +0000188 if (isa<OverflowingBinaryOperator>(I) && I->hasNoUnsignedWrap())
189 return true;
190
191 unsigned Opc = I->getOpcode();
192 if (Opc == Instruction::Add || Opc == Instruction::Sub) {
193 // We don't care if the add or sub could wrap if the value is decreasing
194 // and is only being used by an unsigned compare.
195 if (!I->hasOneUse() ||
196 !isa<ICmpInst>(*I->user_begin()) ||
197 !isa<ConstantInt>(I->getOperand(1)))
198 return false;
199
200 auto *CI = cast<ICmpInst>(*I->user_begin());
201 if (CI->isSigned())
202 return false;
203
204 bool NegImm = cast<ConstantInt>(I->getOperand(1))->isNegative();
205 bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) ||
206 ((Opc == Instruction::Add) && NegImm);
207 if (!IsDecreasing)
208 return false;
209
210 LLVM_DEBUG(dbgs() << "ARM CGP: Allowing safe overflow for " << *I << "\n");
211 return true;
212 }
213
214 // Otherwise, if an instruction is using a negative immediate we will need
215 // to fix it up during the promotion.
216 for (auto &Op : I->operands()) {
217 if (auto *Const = dyn_cast<ConstantInt>(Op))
218 if (Const->isNegative())
219 return false;
220 }
221 return false;
222}
223
224static bool shouldPromote(Value *V) {
Sam Parker8c4b9642018-08-10 13:57:13 +0000225 if (!isa<IntegerType>(V->getType()) || isSink(V))
226 return false;
227
228 if (isSource(V))
229 return true;
230
Sam Parker3828c6f2018-07-23 12:27:47 +0000231 auto *I = dyn_cast<Instruction>(V);
232 if (!I)
233 return false;
234
Sam Parker8c4b9642018-08-10 13:57:13 +0000235 if (isa<ICmpInst>(I))
Sam Parker3828c6f2018-07-23 12:27:47 +0000236 return false;
237
Sam Parker3828c6f2018-07-23 12:27:47 +0000238 return true;
239}
240
241/// Return whether we can safely mutate V's type to ExtTy without having to be
242/// concerned with zero extending or truncation.
243static bool isPromotedResultSafe(Value *V) {
244 if (!isa<Instruction>(V))
245 return true;
246
247 if (isSigned(V))
248 return false;
249
250 // If I is only being used by something that will require its value to be
251 // truncated, then we don't care about the promoted result.
252 auto *I = cast<Instruction>(V);
253 if (I->hasOneUse() && isSink(*I->use_begin()))
254 return true;
255
256 if (isa<OverflowingBinaryOperator>(I))
257 return isSafeOverflow(I);
258 return true;
259}
260
261/// Return the intrinsic for the instruction that can perform the same
262/// operation but on a narrow type. This is using the parallel dsp intrinsics
263/// on scalar values.
Sam Parker8c4b9642018-08-10 13:57:13 +0000264static Intrinsic::ID getNarrowIntrinsic(Instruction *I) {
Sam Parker3828c6f2018-07-23 12:27:47 +0000265 // Whether we use the signed or unsigned versions of these intrinsics
266 // doesn't matter because we're not using the GE bits that they set in
267 // the APSR.
268 switch(I->getOpcode()) {
269 default:
270 break;
271 case Instruction::Add:
Sam Parker8c4b9642018-08-10 13:57:13 +0000272 return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_uadd16 :
Sam Parker3828c6f2018-07-23 12:27:47 +0000273 Intrinsic::arm_uadd8;
274 case Instruction::Sub:
Sam Parker8c4b9642018-08-10 13:57:13 +0000275 return ARMCodeGenPrepare::TypeSize == 16 ? Intrinsic::arm_usub16 :
Sam Parker3828c6f2018-07-23 12:27:47 +0000276 Intrinsic::arm_usub8;
277 }
278 llvm_unreachable("unhandled opcode for narrow intrinsic");
279}
280
281void IRPromoter::Mutate(Type *OrigTy,
282 SmallPtrSetImpl<Value*> &Visited,
283 SmallPtrSetImpl<Value*> &Leaves,
284 SmallPtrSetImpl<Instruction*> &Roots) {
285 IRBuilder<> Builder{Ctx};
286 Type *ExtTy = Type::getInt32Ty(M->getContext());
Sam Parker3828c6f2018-07-23 12:27:47 +0000287 SmallPtrSet<Value*, 8> Promoted;
Sam Parker8c4b9642018-08-10 13:57:13 +0000288 LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from "
289 << ARMCodeGenPrepare::TypeSize << " to 32-bits\n");
Sam Parker3828c6f2018-07-23 12:27:47 +0000290
291 auto ReplaceAllUsersOfWith = [&](Value *From, Value *To) {
292 SmallVector<Instruction*, 4> Users;
293 Instruction *InstTo = dyn_cast<Instruction>(To);
294 for (Use &U : From->uses()) {
295 auto *User = cast<Instruction>(U.getUser());
296 if (InstTo && User->isIdenticalTo(InstTo))
297 continue;
298 Users.push_back(User);
299 }
300
301 for (auto &U : Users)
302 U->replaceUsesOfWith(From, To);
303 };
304
305 auto FixConst = [&](ConstantInt *Const, Instruction *I) {
306 Constant *NewConst = nullptr;
307 if (isSafeOverflow(I)) {
308 NewConst = (Const->isNegative()) ?
309 ConstantExpr::getSExt(Const, ExtTy) :
310 ConstantExpr::getZExt(Const, ExtTy);
311 } else {
312 uint64_t NewVal = *Const->getValue().getRawData();
313 if (Const->getType() == Type::getInt16Ty(Ctx))
314 NewVal &= 0xFFFF;
315 else
316 NewVal &= 0xFF;
317 NewConst = ConstantInt::get(ExtTy, NewVal);
318 }
319 I->replaceUsesOfWith(Const, NewConst);
320 };
321
322 auto InsertDSPIntrinsic = [&](Instruction *I) {
323 LLVM_DEBUG(dbgs() << "ARM CGP: Inserting DSP intrinsic for "
324 << *I << "\n");
325 Function *DSPInst =
Sam Parker8c4b9642018-08-10 13:57:13 +0000326 Intrinsic::getDeclaration(M, getNarrowIntrinsic(I));
Sam Parker3828c6f2018-07-23 12:27:47 +0000327 Builder.SetInsertPoint(I);
328 Builder.SetCurrentDebugLocation(I->getDebugLoc());
329 Value *Args[] = { I->getOperand(0), I->getOperand(1) };
330 CallInst *Call = Builder.CreateCall(DSPInst, Args);
331 ReplaceAllUsersOfWith(I, Call);
332 InstsToRemove.push_back(I);
333 NewInsts.insert(Call);
334 };
335
336 auto InsertZExt = [&](Value *V, Instruction *InsertPt) {
337 LLVM_DEBUG(dbgs() << "ARM CGP: Inserting ZExt for " << *V << "\n");
338 Builder.SetInsertPoint(InsertPt);
339 if (auto *I = dyn_cast<Instruction>(V))
340 Builder.SetCurrentDebugLocation(I->getDebugLoc());
341 auto *ZExt = cast<Instruction>(Builder.CreateZExt(V, ExtTy));
342 if (isa<Argument>(V))
343 ZExt->moveBefore(InsertPt);
344 else
345 ZExt->moveAfter(InsertPt);
346 ReplaceAllUsersOfWith(V, ZExt);
347 NewInsts.insert(ZExt);
348 };
349
350 // First, insert extending instructions between the leaves and their users.
351 LLVM_DEBUG(dbgs() << "ARM CGP: Promoting leaves:\n");
352 for (auto V : Leaves) {
353 LLVM_DEBUG(dbgs() << " - " << *V << "\n");
Sam Parker8c4b9642018-08-10 13:57:13 +0000354 if (auto *I = dyn_cast<Instruction>(V))
Sam Parker3828c6f2018-07-23 12:27:47 +0000355 InsertZExt(I, I);
356 else if (auto *Arg = dyn_cast<Argument>(V)) {
357 BasicBlock &BB = Arg->getParent()->front();
358 InsertZExt(Arg, &*BB.getFirstInsertionPt());
359 } else {
360 llvm_unreachable("unhandled leaf that needs extending");
361 }
362 Promoted.insert(V);
363 }
364
365 LLVM_DEBUG(dbgs() << "ARM CGP: Mutating the tree..\n");
366 // Then mutate the types of the instructions within the tree. Here we handle
367 // constant operands.
368 for (auto *V : Visited) {
369 if (Leaves.count(V))
370 continue;
371
372 if (!isa<Instruction>(V))
373 continue;
374
375 auto *I = cast<Instruction>(V);
376 if (Roots.count(I))
377 continue;
378
379 for (auto &U : I->operands()) {
380 if ((U->getType() == ExtTy) || !isSupportedType(&*U))
381 continue;
382
383 if (auto *Const = dyn_cast<ConstantInt>(&*U))
384 FixConst(Const, I);
385 else if (isa<UndefValue>(&*U))
386 U->mutateType(ExtTy);
387 }
388
389 if (shouldPromote(I)) {
390 I->mutateType(ExtTy);
391 Promoted.insert(I);
392 }
393 }
394
395 // Now we need to remove any zexts that have become unnecessary, as well
396 // as insert any intrinsics.
397 for (auto *V : Visited) {
398 if (Leaves.count(V))
399 continue;
Sam Parker8c4b9642018-08-10 13:57:13 +0000400
401 if (!isa<Instruction>(V))
Sam Parker3828c6f2018-07-23 12:27:47 +0000402 continue;
Sam Parker3828c6f2018-07-23 12:27:47 +0000403
404 if (!shouldPromote(V) || isPromotedResultSafe(V))
405 continue;
406
407 // Replace unsafe instructions with appropriate intrinsic calls.
408 InsertDSPIntrinsic(cast<Instruction>(V));
409 }
410
411 LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the roots:\n");
412 // Fix up any stores or returns that use the results of the promoted
413 // chain.
414 for (auto I : Roots) {
415 LLVM_DEBUG(dbgs() << " - " << *I << "\n");
416 Type *TruncTy = OrigTy;
417 if (auto *Store = dyn_cast<StoreInst>(I)) {
418 auto *PtrTy = cast<PointerType>(Store->getPointerOperandType());
419 TruncTy = PtrTy->getElementType();
420 } else if (isa<ReturnInst>(I)) {
421 Function *F = I->getParent()->getParent();
422 TruncTy = F->getFunctionType()->getReturnType();
423 }
424
425 for (unsigned i = 0; i < I->getNumOperands(); ++i) {
426 Value *V = I->getOperand(i);
427 if (Promoted.count(V) || NewInsts.count(V)) {
428 if (auto *Op = dyn_cast<Instruction>(V)) {
429
430 if (auto *Call = dyn_cast<CallInst>(I))
431 TruncTy = Call->getFunctionType()->getParamType(i);
432
433 if (TruncTy == ExtTy)
434 continue;
435
436 LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy
437 << " Trunc for " << *Op << "\n");
438 Builder.SetInsertPoint(Op);
439 auto *Trunc = cast<Instruction>(Builder.CreateTrunc(Op, TruncTy));
440 Trunc->moveBefore(I);
441 I->setOperand(i, Trunc);
442 NewInsts.insert(Trunc);
443 }
444 }
445 }
446 }
447 LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete.\n");
448}
449
Sam Parker8c4b9642018-08-10 13:57:13 +0000450/// We accept most instructions, as well as Arguments and ConstantInsts. We
451/// Disallow casts other than zext and truncs and only allow calls if their
452/// return value is zeroext. We don't allow opcodes that can introduce sign
453/// bits.
454bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
455 LLVM_DEBUG(dbgs() << "ARM CGP: Is " << *V << " supported?\n");
456
457 if (auto *ICmp = dyn_cast<ICmpInst>(V))
458 return ICmp->isEquality() || !ICmp->isSigned();
459
460 // Memory instructions
461 if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V))
462 return true;
463
464 // Branches and targets.
465 if( isa<BranchInst>(V) || isa<SwitchInst>(V) || isa<BasicBlock>(V))
466 return true;
467
468 // Non-instruction values that we can handle.
469 if (isa<ConstantInt>(V) || isa<Argument>(V))
470 return isSupportedType(V);
471
472 if (isa<PHINode>(V) || isa<SelectInst>(V) || isa<ReturnInst>(V) ||
473 isa<LoadInst>(V))
474 return isSupportedType(V);
475
476 // Currently, Trunc is the only cast we support.
477 if (auto *Trunc = dyn_cast<TruncInst>(V))
478 return isSupportedType(Trunc->getOperand(0));
479
480 // Special cases for calls as we need to check for zeroext
481 // TODO We should accept calls even if they don't have zeroext, as they can
482 // still be roots.
483 if (auto *Call = dyn_cast<CallInst>(V))
484 return isSupportedType(Call) &&
485 Call->hasRetAttr(Attribute::AttrKind::ZExt);
486
487 if (!isa<BinaryOperator>(V)) {
488 LLVM_DEBUG(dbgs() << "ARM CGP: No, not a binary operator.\n");
489 return false;
490 }
491 if (!isSupportedType(V))
492 return false;
493
494 bool res = !isSigned(V);
495 if (!res)
496 LLVM_DEBUG(dbgs() << "ARM CGP: No, it's a signed instruction.\n");
497 return res;
498}
499
500/// Check that the type of V would be promoted and that the original type is
501/// smaller than the targeted promoted type. Check that we're not trying to
502/// promote something larger than our base 'TypeSize' type.
503bool ARMCodeGenPrepare::isLegalToPromote(Value *V) {
504 if (isPromotedResultSafe(V))
505 return true;
506
507 auto *I = dyn_cast<Instruction>(V);
508 if (!I)
509 return false;
510
511 // If promotion is not safe, can we use a DSP instruction to natively
512 // handle the narrow type?
Sam Parker3828c6f2018-07-23 12:27:47 +0000513 if (!ST->hasDSP() || !EnableDSP || !isSupportedType(I))
514 return false;
515
516 if (ST->isThumb() && !ST->hasThumb2())
517 return false;
518
519 if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub)
520 return false;
521
522 // TODO
523 // Would it be profitable? For Thumb code, these parallel DSP instructions
524 // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For
525 // Cortex-A, specifically Cortex-A72, the latency is double and throughput is
526 // halved. They also do not take immediates as operands.
527 for (auto &Op : I->operands()) {
528 if (isa<Constant>(Op)) {
529 if (!EnableDSPWithImms)
530 return false;
531 }
532 }
533 return true;
534}
535
Sam Parker3828c6f2018-07-23 12:27:47 +0000536bool ARMCodeGenPrepare::TryToPromote(Value *V) {
537 OrigTy = V->getType();
538 TypeSize = OrigTy->getPrimitiveSizeInBits();
Sam Parker8c4b9642018-08-10 13:57:13 +0000539 if (TypeSize > 16)
540 return false;
Sam Parker3828c6f2018-07-23 12:27:47 +0000541
542 if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V))
543 return false;
544
Sam Parker8c4b9642018-08-10 13:57:13 +0000545 LLVM_DEBUG(dbgs() << "ARM CGP: TryToPromote: " << *V << ", TypeSize = "
546 << TypeSize << "\n");
Sam Parker3828c6f2018-07-23 12:27:47 +0000547
548 SetVector<Value*> WorkList;
549 SmallPtrSet<Value*, 8> Leaves;
550 SmallPtrSet<Instruction*, 4> Roots;
551 WorkList.insert(V);
552 SmallPtrSet<Value*, 16> CurrentVisited;
553 CurrentVisited.clear();
554
555 // Return true if the given value can, or has been, visited. Add V to the
556 // worklist if needed.
557 auto AddLegalInst = [&](Value *V) {
558 if (CurrentVisited.count(V))
559 return true;
560
Sam Parker8c4b9642018-08-10 13:57:13 +0000561 // Ignore pointer value that aren't instructions.
562 if (!isa<Instruction>(V) && isa<PointerType>(V->getType()))
563 return true;
564
Sam Parker3828c6f2018-07-23 12:27:47 +0000565 if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) {
566 LLVM_DEBUG(dbgs() << "ARM CGP: Can't handle: " << *V << "\n");
567 return false;
568 }
569
570 WorkList.insert(V);
571 return true;
572 };
573
574 // Iterate through, and add to, a tree of operands and users in the use-def.
575 while (!WorkList.empty()) {
576 Value *V = WorkList.back();
577 WorkList.pop_back();
578 if (CurrentVisited.count(V))
579 continue;
580
581 if (!isa<Instruction>(V) && !isSource(V))
582 continue;
583
584 // If we've already visited this value from somewhere, bail now because
585 // the tree has already been explored.
586 // TODO: This could limit the transform, ie if we try to promote something
587 // from an i8 and fail first, before trying an i16.
588 if (AllVisited.count(V)) {
589 LLVM_DEBUG(dbgs() << "ARM CGP: Already visited this: " << *V << "\n");
590 return false;
591 }
592
593 CurrentVisited.insert(V);
594 AllVisited.insert(V);
595
596 // Calls can be both sources and sinks.
597 if (isSink(V))
598 Roots.insert(cast<Instruction>(V));
599 if (isSource(V))
600 Leaves.insert(V);
601 else if (auto *I = dyn_cast<Instruction>(V)) {
602 // Visit operands of any instruction visited.
603 for (auto &U : I->operands()) {
604 if (!AddLegalInst(U))
605 return false;
606 }
607 }
608
609 // Don't visit users of a node which isn't going to be mutated unless its a
610 // source.
611 if (isSource(V) || shouldPromote(V)) {
612 for (Use &U : V->uses()) {
613 if (!AddLegalInst(U.getUser()))
614 return false;
615 }
616 }
617 }
618
Sam Parker3828c6f2018-07-23 12:27:47 +0000619 LLVM_DEBUG(dbgs() << "ARM CGP: Visited nodes:\n";
620 for (auto *I : CurrentVisited)
621 I->dump();
622 );
Sam Parker3828c6f2018-07-23 12:27:47 +0000623
624 Promoter->Mutate(OrigTy, CurrentVisited, Leaves, Roots);
625 return true;
626}
627
628bool ARMCodeGenPrepare::doInitialization(Module &M) {
629 Promoter = new IRPromoter(&M);
630 return false;
631}
632
633bool ARMCodeGenPrepare::runOnFunction(Function &F) {
634 if (skipFunction(F) || DisableCGP)
635 return false;
636
637 auto *TPC = &getAnalysis<TargetPassConfig>();
638 if (!TPC)
639 return false;
640
641 const TargetMachine &TM = TPC->getTM<TargetMachine>();
642 ST = &TM.getSubtarget<ARMSubtarget>(F);
643 bool MadeChange = false;
644 LLVM_DEBUG(dbgs() << "ARM CGP: Running on " << F.getName() << "\n");
645
646 // Search up from icmps to try to promote their operands.
647 for (BasicBlock &BB : F) {
648 auto &Insts = BB.getInstList();
649 for (auto &I : Insts) {
650 if (AllVisited.count(&I))
651 continue;
652
653 if (isa<ICmpInst>(I)) {
654 auto &CI = cast<ICmpInst>(I);
655
656 // Skip signed or pointer compares
657 if (CI.isSigned() || !isa<IntegerType>(CI.getOperand(0)->getType()))
658 continue;
659
660 LLVM_DEBUG(dbgs() << "ARM CGP: Searching from: " << CI << "\n");
661 for (auto &Op : CI.operands()) {
Sam Parker8c4b9642018-08-10 13:57:13 +0000662 if (auto *I = dyn_cast<Instruction>(Op))
663 MadeChange |= TryToPromote(I);
Sam Parker3828c6f2018-07-23 12:27:47 +0000664 }
665 }
666 }
667 Promoter->Cleanup();
668 LLVM_DEBUG(if (verifyFunction(F, &dbgs())) {
669 dbgs();
670 report_fatal_error("Broken function after type promotion");
671 });
672 }
673 if (MadeChange)
674 LLVM_DEBUG(dbgs() << "After ARMCodeGenPrepare: " << F << "\n");
675
676 return MadeChange;
677}
678
Matt Morehousea70685f2018-07-23 17:00:45 +0000679bool ARMCodeGenPrepare::doFinalization(Module &M) {
680 delete Promoter;
681 return false;
682}
683
Sam Parker3828c6f2018-07-23 12:27:47 +0000684INITIALIZE_PASS_BEGIN(ARMCodeGenPrepare, DEBUG_TYPE,
685 "ARM IR optimizations", false, false)
686INITIALIZE_PASS_END(ARMCodeGenPrepare, DEBUG_TYPE, "ARM IR optimizations",
687 false, false)
688
689char ARMCodeGenPrepare::ID = 0;
Sam Parker8c4b9642018-08-10 13:57:13 +0000690unsigned ARMCodeGenPrepare::TypeSize = 0;
Sam Parker3828c6f2018-07-23 12:27:47 +0000691
692FunctionPass *llvm::createARMCodeGenPreparePass() {
693 return new ARMCodeGenPrepare();
694}