SLPVectorizer: add support for vectorization of diamond shaped trees. We now perform a preliminary traversal of the graph to collect values with multiple users and check where the users came from. 

llvm-svn: 179414
diff --git a/llvm/lib/Transforms/Vectorize/VecUtils.h b/llvm/lib/Transforms/Vectorize/VecUtils.h
index b1c2b0c..808fb10 100644
--- a/llvm/lib/Transforms/Vectorize/VecUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VecUtils.h
@@ -42,6 +42,14 @@
   BoUpSLP(BasicBlock *Bb, ScalarEvolution *Se, DataLayout *Dl,
          TargetTransformInfo *Tti, AliasAnalysis *Aa);
 
+  /// \brief Take the pointer operand from the Load/Store instruction.
+  /// \returns NULL if this is not a valid Load/Store instruction.
+  static Value *getPointerOperand(Value *I);
+
+  /// \brief Take the address space operand from the Load/Store instruction.
+  /// \returns -1 if this is not a valid Load/Store instruction.
+  static unsigned getAddressSpaceOperand(Value *I);
+
   /// \returns true if the memory operations A and B are consecutive.
   bool isConsecutiveAccess(Value *A, Value *B);
 
@@ -51,25 +59,31 @@
 
   /// \returns the vectorization cost of the subtree that starts at \p VL.
   /// A negative number means that this is profitable.
-  int getTreeRollCost(ValueList &VL, unsigned Depth);
-
-  /// \brief Take the pointer operand from the Load/Store instruction.
-  /// \returns NULL if this is not a valid Load/Store instruction.
-  static Value *getPointerOperand(Value *I);
-
-  /// \brief Take the address space operand from the Load/Store instruction.
-  /// \returns -1 if this is not a valid Load/Store instruction.
-  static unsigned getAddressSpaceOperand(Value *I);
+  int getTreeCost(ValueList &VL);
 
   /// \brief Attempts to order and vectorize a sequence of stores. This
   /// function does a quadratic scan of the given stores.
   /// \returns true if the basic block was modified.
   bool vectorizeStores(StoreList &Stores, int costThreshold);
 
+private:
+  /// \returns This method contains the recursive part of getTreeCost.
+  int getTreeCost_rec(ValueList &VL, unsigned Depth);
+
+  /// \returns This recursive method looks for vectorization hazards such as
+  /// values that are used by multiple users and checks that values are used
+  /// by only one vector lane. It updates the variables LaneMap, MultiUserVals.
+  void getTreeUses_rec(ValueList &VL, unsigned Depth);
+
+  /// \brief This method contains the recursive part of vectorizeTree.
+  Value *vectorizeTree_rec(ValueList &VL, int VF);
+
   /// \brief Number all of the instructions in the block.
   void numberInstructions();
 
-private:
+  ///  \brief Vectorize a sorted sequence of stores.
+  bool vectorizeStoreChain(ValueList &Chain, int CostThreshold);
+
   /// \returns the scalarization cost for this type. Scalarization in this
   /// context means the creation of vectors from a group of scalars.
   int getScalarizationCost(Type *Ty);
@@ -89,12 +103,34 @@
   /// \returns a vector from a collection of scalars in \p VL.
   Value *Scalarize(ValueList &VL, VectorType *Ty);
 
+private:
   // Maps instructions to numbers and back.
   SmallDenseMap<Value*, int> InstrIdx;
+  // Maps integers to Instructions.
   std::vector<Instruction*> InstrVec;
+
+  // -- containers that are used during getTreeCost -- //
+
+  /// Contains values that must be scalarized because they are used
+  /// by multiple lanes, or by users outside the tree.
+  /// NOTICE: The vectorization methods also use this set.
+  ValueSet MustScalarize;
+  
+  // Contains a list of values that are used outside the current tree. This
+  // set must be reset between runs.
+  ValueSet MultiUserVals;
+  // Maps values in the tree to the vector lanes that uses them. This map must
+  // be reset between runs of getCost.
+  std::map<Value*, int> LaneMap;
   // A list of instructions to ignore while sinking
-  // memory instructions.
+  // memory instructions. This map must be reset between runs of getCost.
   SmallSet<Value*, 8> MemBarrierIgnoreList;
+
+  // -- containers that are used during vectorizeTree -- //
+  // Maps between the first scalar to the vector. This map must be reset between
+  // runs.
+  DenseMap<Value*, Value*> VectorizedValues;
+
   // Analysis and block reference.
   BasicBlock *BB;
   ScalarEvolution *SE;