InstCombine: Restrict computeKnownBits() on all Values to OptLevel > 2 As part of r251146 InstCombine was extended to call computeKnownBits on every value in the function to determine whether it happens to be constant. This increases typical compiletime by 1-3% (5% in irgen+opt time) in my measurements. On the other hand this case did not trigger once in the whole llvm-testsuite. This patch introduces the notion of ExpensiveCombines which are only enabled for OptLevel > 2. I removed the check in InstructionSimplify as that is called from various places where the OptLevel is not known but given the rarity of the situation I think a check in InstCombine is enough. Differential Revision: http://reviews.llvm.org/D16835 llvm-svn: 263047

commit: c31032d607c25d8bc53d59b68b797285f2736ea1 [log] [tgz]
author: Matthias Braun <matze@braunis.de> Wed Mar 09 18:47:11 2016 +0000
committer: Matthias Braun <matze@braunis.de> Wed Mar 09 18:47:11 2016 +0000
tree: bb204ddd638c0789effdaa2b5e147bd51b525adb
parent: 0b5d5b881f7130e9e678d18dc97e593614b5ee3e [diff]
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index c490e06..ae4d802 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp

@@ -185,6 +185,12 @@
   PM.add(createScopedNoAliasAAWrapperPass());
 }
 
+void PassManagerBuilder::addInstructionCombiningPass(
+    legacy::PassManagerBase &PM) const {
+  bool ExpensiveCombines = OptLevel > 2;
+  PM.add(createInstructionCombiningPass(ExpensiveCombines));
+}
+
 void PassManagerBuilder::populateFunctionPassManager(
     legacy::FunctionPassManager &FPM) {
   addExtensionsToPM(EP_EarlyAsPossible, FPM);
@@ -230,7 +236,8 @@
   MPM.add(createJumpThreadingPass());         // Thread jumps.
   MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
   MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
-  MPM.add(createInstructionCombiningPass());  // Combine silly seq's
+  // Combine silly seq's
+  addInstructionCombiningPass(MPM);
   addExtensionsToPM(EP_Peephole, MPM);
 
   MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
@@ -238,7 +245,7 @@
   MPM.add(createReassociatePass());           // Reassociate expressions
   if (PrepareForThinLTO) {
     MPM.add(createAggressiveDCEPass());        // Delete dead instructions
-    MPM.add(createInstructionCombiningPass()); // Combine silly seq's
+    addInstructionCombiningPass(MPM);          // Combine silly seq's
     return;
   }
   // Rotate Loop - disable header duplication at -Oz
@@ -246,7 +253,7 @@
   MPM.add(createLICMPass());                  // Hoist loop invariants
   MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
   MPM.add(createCFGSimplificationPass());
-  MPM.add(createInstructionCombiningPass());
+  addInstructionCombiningPass(MPM);
   MPM.add(createIndVarSimplifyPass());        // Canonicalize indvars
   MPM.add(createLoopIdiomPass());             // Recognize idioms like memset.
   MPM.add(createLoopDeletionPass());          // Delete dead loops
@@ -273,7 +280,7 @@
 
   // Run instcombine after redundancy elimination to exploit opportunities
   // opened up by them.
-  MPM.add(createInstructionCombiningPass());
+  addInstructionCombiningPass(MPM);
   addExtensionsToPM(EP_Peephole, MPM);
   MPM.add(createJumpThreadingPass());         // Thread jumps
   MPM.add(createCorrelatedValuePropagationPass());
@@ -290,7 +297,7 @@
 
     if (BBVectorize) {
       MPM.add(createBBVectorizePass());
-      MPM.add(createInstructionCombiningPass());
+      addInstructionCombiningPass(MPM);
       addExtensionsToPM(EP_Peephole, MPM);
       if (OptLevel > 1 && UseGVNAfterVectorization)
         MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
@@ -308,7 +315,8 @@
 
   MPM.add(createAggressiveDCEPass());         // Delete dead instructions
   MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
-  MPM.add(createInstructionCombiningPass());  // Clean up after everything.
+  // Clean up after everything.
+  addInstructionCombiningPass(MPM);
   addExtensionsToPM(EP_Peephole, MPM);
 }
 
@@ -359,7 +367,7 @@
 
     MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
 
-    MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE
+    addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE
     addExtensionsToPM(EP_Peephole, MPM);
     MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
   }
@@ -480,7 +488,7 @@
   // on -O1 and no #pragma is found). Would be good to have these two passes
   // as function calls, so that we can only pass them when the vectorizer
   // changed the code.
-  MPM.add(createInstructionCombiningPass());
+  addInstructionCombiningPass(MPM);
   if (OptLevel > 1 && ExtraVectorizerPasses) {
     // At higher optimization levels, try to clean up any runtime overlap and
     // alignment checks inserted by the vectorizer. We want to track correllated
@@ -490,11 +498,11 @@
     // dead (or speculatable) control flows or more combining opportunities.
     MPM.add(createEarlyCSEPass());
     MPM.add(createCorrelatedValuePropagationPass());
-    MPM.add(createInstructionCombiningPass());
+    addInstructionCombiningPass(MPM);
     MPM.add(createLICMPass());
     MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
     MPM.add(createCFGSimplificationPass());
-    MPM.add(createInstructionCombiningPass());
+    addInstructionCombiningPass(MPM);
   }
 
   if (RunSLPAfterLoopVectorization) {
@@ -507,7 +515,7 @@
 
     if (BBVectorize) {
       MPM.add(createBBVectorizePass());
-      MPM.add(createInstructionCombiningPass());
+      addInstructionCombiningPass(MPM);
       addExtensionsToPM(EP_Peephole, MPM);
       if (OptLevel > 1 && UseGVNAfterVectorization)
         MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
@@ -522,13 +530,13 @@
 
   addExtensionsToPM(EP_Peephole, MPM);
   MPM.add(createCFGSimplificationPass());
-  MPM.add(createInstructionCombiningPass());
+  addInstructionCombiningPass(MPM);
 
   if (!DisableUnrollLoops) {
     MPM.add(createLoopUnrollPass());    // Unroll small loops
 
     // LoopUnroll may generate some redundency to cleanup.
-    MPM.add(createInstructionCombiningPass());
+    addInstructionCombiningPass(MPM);
 
     // Runtime unrolling will introduce runtime check in loop prologue. If the
     // unrolled loop is a inner loop, then the prologue will be inside the
@@ -595,7 +603,7 @@
   // simplification opportunities, and both can propagate functions through
   // function pointers.  When this happens, we often have to resolve varargs
   // calls, etc, so let instcombine do this.
-  PM.add(createInstructionCombiningPass());
+  addInstructionCombiningPass(PM);
   addExtensionsToPM(EP_Peephole, PM);
 
   // Inline small functions
@@ -617,7 +625,7 @@
   PM.add(createArgumentPromotionPass());
 
   // The IPO passes may leave cruft around.  Clean up after them.
-  PM.add(createInstructionCombiningPass());
+  addInstructionCombiningPass(PM);
   addExtensionsToPM(EP_Peephole, PM);
   PM.add(createJumpThreadingPass());
 
@@ -656,10 +664,10 @@
   // Now that we've optimized loops (in particular loop induction variables),
   // we may have exposed more scalar opportunities. Run parts of the scalar
   // optimizer again at this point.
-  PM.add(createInstructionCombiningPass()); // Initial cleanup
+  addInstructionCombiningPass(PM); // Initial cleanup
   PM.add(createCFGSimplificationPass()); // if-convert
   PM.add(createSCCPPass()); // Propagate exposed constants
-  PM.add(createInstructionCombiningPass()); // Clean up again
+  addInstructionCombiningPass(PM); // Clean up again
   PM.add(createBitTrackingDCEPass());
 
   // More scalar chains could be vectorized due to more alias information
@@ -675,7 +683,7 @@
     PM.add(createLoadCombinePass());
 
   // Cleanup and simplify the code after the scalar optimizations.
-  PM.add(createInstructionCombiningPass());
+  addInstructionCombiningPass(PM);
   addExtensionsToPM(EP_Peephole, PM);
 
   PM.add(createJumpThreadingPass());

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index c251683..1909cba 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h

@@ -177,6 +177,8 @@
 private:
   // Mode in which we are running the combiner.
   const bool MinimizeSize;
+  /// Enable combines that trigger rarely but are costly in compiletime.
+  const bool ExpensiveCombines;
 
   AliasAnalysis *AA;
 
@@ -195,11 +197,12 @@
 
 public:
   InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder,
-               bool MinimizeSize, AliasAnalysis *AA,
+               bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA,
                AssumptionCache *AC, TargetLibraryInfo *TLI,
                DominatorTree *DT, const DataLayout &DL, LoopInfo *LI)
       : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
-        AA(AA), AC(AC), TLI(TLI), DT(DT), DL(DL), LI(LI), MadeIRChange(false) {}
+        ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT),
+        DL(DL), LI(LI), MadeIRChange(false) {}
 
   /// \brief Run the combiner over the entire worklist until it is empty.
   ///

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index af8d569..d728f1a 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

@@ -78,6 +78,10 @@
 STATISTIC(NumFactor   , "Number of factorizations");
 STATISTIC(NumReassoc  , "Number of reassociations");
 
+static cl::opt<bool>
+EnableExpensiveCombines("expensive-combines",
+                        cl::desc("Enable expensive instruction combines"));
+
 Value *InstCombiner::EmitGEPOffset(User *GEP) {
   return llvm::EmitGEPOffset(Builder, DL, GEP);
 }
@@ -2770,9 +2774,9 @@
       }
     }
 
-    // In general, it is possible for computeKnownBits to determine all bits in a
-    // value even when the operands are not all constants.
-    if (!I->use_empty() && I->getType()->isIntegerTy()) {
+    // In general, it is possible for computeKnownBits to determine all bits in
+    // a value even when the operands are not all constants.
+    if (ExpensiveCombines && !I->use_empty() && I->getType()->isIntegerTy()) {
       unsigned BitWidth = I->getType()->getScalarSizeInBits();
       APInt KnownZero(BitWidth, 0);
       APInt KnownOne(BitWidth, 0);
@@ -3043,8 +3047,10 @@
 combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
                                 AliasAnalysis *AA, AssumptionCache &AC,
                                 TargetLibraryInfo &TLI, DominatorTree &DT,
+                                bool ExpensiveCombines = true,
                                 LoopInfo *LI = nullptr) {
   auto &DL = F.getParent()->getDataLayout();
+  ExpensiveCombines |= EnableExpensiveCombines;
 
   /// Builder - This is an IRBuilder that automatically inserts new
   /// instructions into the worklist when they are created.
@@ -3064,8 +3070,8 @@
 
     bool Changed = prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
 
-    InstCombiner IC(Worklist, &Builder, F.optForMinSize(), AA, &AC, &TLI, &DT,
-                    DL, LI);
+    InstCombiner IC(Worklist, &Builder, F.optForMinSize(), ExpensiveCombines,
+                    AA, &AC, &TLI, &DT, DL, LI);
     Changed |= IC.run();
 
     if (!Changed)
@@ -3084,7 +3090,8 @@
   auto *LI = AM->getCachedResult<LoopAnalysis>(F);
 
   // FIXME: The AliasAnalysis is not yet supported in the new pass manager
-  if (!combineInstructionsOverFunction(F, Worklist, nullptr, AC, TLI, DT, LI))
+  if (!combineInstructionsOverFunction(F, Worklist, nullptr, AC, TLI, DT,
+                                       ExpensiveCombines, LI))
     // No changes, all analyses are preserved.
     return PreservedAnalyses::all();
 
@@ -3121,7 +3128,8 @@
   auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
   auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
 
-  return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, LI);
+  return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT,
+                                         ExpensiveCombines, LI);
 }
 
 char InstructionCombiningPass::ID = 0;
@@ -3144,6 +3152,6 @@
   initializeInstructionCombiningPassPass(*unwrap(R));
 }
 
-FunctionPass *llvm::createInstructionCombiningPass() {
-  return new InstructionCombiningPass();
+FunctionPass *llvm::createInstructionCombiningPass(bool ExpensiveCombines) {
+  return new InstructionCombiningPass(ExpensiveCombines);
 }
commit	c31032d607c25d8bc53d59b68b797285f2736ea1	[log] [tgz]
author	Matthias Braun <matze@braunis.de>	Wed Mar 09 18:47:11 2016 +0000
committer	Matthias Braun <matze@braunis.de>	Wed Mar 09 18:47:11 2016 +0000
tree	bb204ddd638c0789effdaa2b5e147bd51b525adb
parent	0b5d5b881f7130e9e678d18dc97e593614b5ee3e [diff]