blockfreq: Only one mass distribution per node

Remove the concepts of "forward" and "general" mass distributions, which
was wrong.  The split might have made sense in an early version of the
algorithm, but it's definitely wrong now.

<rdar://problem/14292693>

llvm-svn: 207195
diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 43484a8..f8215bf 100644
--- a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -1024,17 +1024,14 @@
   /// This class collates the successor edge weights for later processing.
   ///
   /// \a DidOverflow indicates whether \a Total did overflow while adding to
-  /// the distribution.  It should never overflow twice.  There's no flag for
-  /// whether \a ForwardTotal overflows, since when \a Total exceeds 32-bits
-  /// they both get re-computed during \a normalize().
+  /// the distribution.  It should never overflow twice.
   struct Distribution {
     typedef SmallVector<Weight, 4> WeightList;
     WeightList Weights;    ///< Individual successor weights.
     uint64_t Total;        ///< Sum of all weights.
     bool DidOverflow;      ///< Whether \a Total did overflow.
-    uint32_t ForwardTotal; ///< Total excluding backedges.
 
-    Distribution() : Total(0), DidOverflow(false), ForwardTotal(0) {}
+    Distribution() : Total(0), DidOverflow(false) {}
     void addLocal(const BlockNode &Node, uint64_t Amount) {
       add(Node, Amount, Weight::Local);
     }
@@ -1079,9 +1076,8 @@
   /// \brief Add an edge to the distribution.
   ///
   /// Adds an edge to Succ to Dist.  If \c LoopHead.isValid(), then whether the
-  /// edge is forward/exit/backedge is in the context of LoopHead.  Otherwise,
-  /// every edge should be a forward edge (since all the loops are packaged
-  /// up).
+  /// edge is local/exit/backedge is in the context of LoopHead.  Otherwise,
+  /// every edge should be a local edge (since all the loops are packaged up).
   void addToDist(Distribution &Dist, const LoopData *OuterLoop,
                  const BlockNode &Pred, const BlockNode &Succ, uint64_t Weight);
 
@@ -1119,19 +1115,6 @@
   /// backedges and exits are stored in its entry in Loops.
   ///
   /// Mass is distributed in parallel from two copies of the source mass.
-  ///
-  /// The first mass (forward) represents the distribution of mass through the
-  /// local DAG.  This distribution should lose mass at loop exits and ignore
-  /// backedges.
-  ///
-  /// The second mass (general) represents the behavior of the loop in the
-  /// global context.  In a given distribution from the head, how much mass
-  /// exits, and to where?  How much mass returns to the loop head?
-  ///
-  /// The forward mass should be split up between local successors and exits,
-  /// but only actually distributed to the local successors.  The general mass
-  /// should be split up between all three types of successors, but distributed
-  /// only to exits and backedges.
   void distributeMass(const BlockNode &Source, LoopData *OuterLoop,
                       Distribution &Dist);
 
@@ -1270,23 +1253,17 @@
 ///           in \a LoopData::Exits.  Otherwise, fetch it from
 ///           BranchProbabilityInfo.
 ///
-///         - Each successor is categorized as \a Weight::Local, a normal
-///           forward edge within the current loop, \a Weight::Backedge, a
-///           backedge to the loop header, or \a Weight::Exit, any successor
-///           outside the loop.  The weight, the successor, and its category
-///           are stored in \a Distribution.  There can be multiple edges to
-///           each successor.
+///         - Each successor is categorized as \a Weight::Local, a local edge
+///           within the current loop, \a Weight::Backedge, a backedge to the
+///           loop header, or \a Weight::Exit, any successor outside the loop.
+///           The weight, the successor, and its category are stored in \a
+///           Distribution.  There can be multiple edges to each successor.
 ///
 ///         - Normalize the distribution:  scale weights down so that their sum
 ///           is 32-bits, and coalesce multiple edges to the same node.
 ///
 ///         - Distribute the mass accordingly, dithering to minimize mass loss,
-///           as described in \a distributeMass().  Mass is distributed in
-///           parallel in two ways: forward, and general.  Local successors
-///           take their mass from the forward mass, while exit and backedge
-///           successors take their mass from the general mass.  Additionally,
-///           exit edges use up (ignored) mass from the forward mass, and local
-///           edges use up (ignored) mass from the general distribution.
+///           as described in \a distributeMass().
 ///
 ///     Finally, calculate the loop scale from the accumulated backedge mass.
 ///
diff --git a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index cb92e5b..744bbe2 100644
--- a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -389,31 +389,12 @@
 ///      2. Calculate the portion's mass as \a RemMass times P.
 ///      3. Update \a RemWeight and \a RemMass at each portion by subtracting
 ///         the current portion's weight and mass.
-///
-/// Mass is distributed in two ways: full distribution and forward
-/// distribution.  The latter ignores backedges, and uses the parallel fields
-/// \a RemForwardWeight and \a RemForwardMass.
 struct DitheringDistributer {
   uint32_t RemWeight;
-  uint32_t RemForwardWeight;
-
   BlockMass RemMass;
-  BlockMass RemForwardMass;
 
   DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
 
-  BlockMass takeLocalMass(uint32_t Weight) {
-    (void)takeMass(Weight);
-    return takeForwardMass(Weight);
-  }
-  BlockMass takeExitMass(uint32_t Weight) {
-    (void)takeForwardMass(Weight);
-    return takeMass(Weight);
-  }
-  BlockMass takeBackedgeMass(uint32_t Weight) { return takeMass(Weight); }
-
-private:
-  BlockMass takeForwardMass(uint32_t Weight);
   BlockMass takeMass(uint32_t Weight);
 };
 }
@@ -422,22 +403,9 @@
                                            const BlockMass &Mass) {
   Dist.normalize();
   RemWeight = Dist.Total;
-  RemForwardWeight = Dist.ForwardTotal;
   RemMass = Mass;
-  RemForwardMass = Dist.ForwardTotal ? Mass : BlockMass();
 }
 
-BlockMass DitheringDistributer::takeForwardMass(uint32_t Weight) {
-  // Compute the amount of mass to take.
-  assert(Weight && "invalid weight");
-  assert(Weight <= RemForwardWeight);
-  BlockMass Mass = RemForwardMass * BranchProbability(Weight, RemForwardWeight);
-
-  // Decrement totals (dither).
-  RemForwardWeight -= Weight;
-  RemForwardMass -= Mass;
-  return Mass;
-}
 BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
   assert(Weight && "invalid weight");
   assert(Weight <= RemWeight);
@@ -468,13 +436,6 @@
   W.Amount = Amount;
   W.Type = Type;
   Weights.push_back(W);
-
-  if (Type == Weight::Backedge)
-    return;
-
-  // Update forward total.  Don't worry about overflow here, since then Total
-  // will exceed 32-bits and they'll both be recomputed in normalize().
-  ForwardTotal += Amount;
 }
 
 static void combineWeight(Weight &W, const Weight &OtherW) {
@@ -554,7 +515,6 @@
   // Early exit when combined into a single successor.
   if (Weights.size() == 1) {
     Total = 1;
-    ForwardTotal = Weights.front().Type != Weight::Backedge;
     Weights.front().Amount = 1;
     return;
   }
@@ -574,9 +534,8 @@
     return;
 
   // Recompute the total through accumulation (rather than shifting it) so that
-  // it's accurate after shifting.  ForwardTotal is dirty here anyway.
+  // it's accurate after shifting.
   Total = 0;
-  ForwardTotal = 0;
 
   // Sum the weights to each node and shift right if necessary.
   for (Weight &W : Weights) {
@@ -588,11 +547,6 @@
 
     // Update the total.
     Total += W.Amount;
-    if (W.Type == Weight::Backedge)
-      continue;
-
-    // Update the forward total.
-    ForwardTotal += W.Amount;
   }
   assert(Total <= UINT32_MAX);
 }
@@ -732,8 +686,7 @@
                                                 LoopData *OuterLoop,
                                                 Distribution &Dist) {
   BlockMass Mass = getPackageMass(*this, Source);
-  DEBUG(dbgs() << "  => mass:  " << Mass
-               << " (    general     |    forward     )\n");
+  DEBUG(dbgs() << "  => mass:  " << Mass << "\n");
 
   // Distribute mass to successors as laid out in Dist.
   DitheringDistributer D(Dist, Mass);
@@ -741,8 +694,7 @@
 #ifndef NDEBUG
   auto debugAssign = [&](const BlockNode &T, const BlockMass &M,
                          const char *Desc) {
-    dbgs() << "  => assign " << M << " (" << D.RemMass << "|"
-           << D.RemForwardMass << ")";
+    dbgs() << "  => assign " << M << " (" << D.RemMass << ")";
     if (Desc)
       dbgs() << " [" << Desc << "]";
     if (T.isValid())
@@ -753,11 +705,11 @@
 #endif
 
   for (const Weight &W : Dist.Weights) {
-    // Check for a local edge (forward and non-exit).
+    // Check for a local edge (non-backedge and non-exit).
+    BlockMass Taken = D.takeMass(W.Amount);
     if (W.Type == Weight::Local) {
-      BlockMass Local = D.takeLocalMass(W.Amount);
-      getPackageMass(*this, W.TargetNode) += Local;
-      DEBUG(debugAssign(W.TargetNode, Local, nullptr));
+      getPackageMass(*this, W.TargetNode) += Taken;
+      DEBUG(debugAssign(W.TargetNode, Taken, nullptr));
       continue;
     }
 
@@ -766,17 +718,15 @@
 
     // Check for a backedge.
     if (W.Type == Weight::Backedge) {
-      BlockMass Back = D.takeBackedgeMass(W.Amount);
-      OuterLoop->BackedgeMass += Back;
-      DEBUG(debugAssign(BlockNode(), Back, "back"));
+      OuterLoop->BackedgeMass += Taken;
+      DEBUG(debugAssign(BlockNode(), Taken, "back"));
       continue;
     }
 
     // This must be an exit.
     assert(W.Type == Weight::Exit);
-    BlockMass Exit = D.takeExitMass(W.Amount);
-    OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Exit));
-    DEBUG(debugAssign(W.TargetNode, Exit, "exit"));
+    OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
+    DEBUG(debugAssign(W.TargetNode, Taken, "exit"));
   }
 }
 
diff --git a/llvm/test/Analysis/BlockFrequencyInfo/double_backedge.ll b/llvm/test/Analysis/BlockFrequencyInfo/double_backedge.ll
new file mode 100644
index 0000000..df8217c
--- /dev/null
+++ b/llvm/test/Analysis/BlockFrequencyInfo/double_backedge.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -analyze -block-freq | FileCheck %s
+
+define void @double_backedge(i1 %x) {
+; CHECK-LABEL: Printing analysis {{.*}} for function 'double_backedge':
+; CHECK-NEXT: block-frequency-info: double_backedge
+entry:
+; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]]
+  br label %loop
+
+loop:
+; CHECK-NEXT: loop: float = 10.0,
+  br i1 %x, label %exit, label %loop.1, !prof !0
+
+loop.1:
+; CHECK-NEXT: loop.1: float = 9.0,
+  br i1 %x, label %loop, label %loop.2, !prof !1
+
+loop.2:
+; CHECK-NEXT: loop.2: float = 5.0,
+  br label %loop
+
+exit:
+; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
+  ret void
+}
+!0 = metadata !{metadata !"branch_weights", i32 1, i32 9}
+!1 = metadata !{metadata !"branch_weights", i32 4, i32 5}