[Sema, CodeGen] Implement [[likely]] and [[unlikely]] in SwitchStmt

This implements the likelihood attribute for the switch statement. Based on the
discussion in D85091 and D86559 it only handles the attribute when placed on
the case labels or the default labels.

It also marks the likelihood attribute as feature complete. There are more QoI
patches in the pipeline.

Differential Revision: https://reviews.llvm.org/D89210
diff --git a/clang/lib/AST/Stmt.cpp b/clang/lib/AST/Stmt.cpp
index e7024b2..0950307 100644
--- a/clang/lib/AST/Stmt.cpp
+++ b/clang/lib/AST/Stmt.cpp
@@ -130,19 +130,30 @@
   StatisticsEnabled = true;
 }
 
-static std::pair<Stmt::Likelihood, const Attr *> getLikelihood(const Stmt *S) {
-  if (const auto *AS = dyn_cast_or_null<AttributedStmt>(S))
-    for (const auto *A : AS->getAttrs()) {
-      if (isa<LikelyAttr>(A))
-        return std::make_pair(Stmt::LH_Likely, A);
+static std::pair<Stmt::Likelihood, const Attr *>
+getLikelihood(ArrayRef<const Attr *> Attrs) {
+  for (const auto *A : Attrs) {
+    if (isa<LikelyAttr>(A))
+      return std::make_pair(Stmt::LH_Likely, A);
 
-      if (isa<UnlikelyAttr>(A))
-        return std::make_pair(Stmt::LH_Unlikely, A);
-    }
+    if (isa<UnlikelyAttr>(A))
+      return std::make_pair(Stmt::LH_Unlikely, A);
+  }
 
   return std::make_pair(Stmt::LH_None, nullptr);
 }
 
+static std::pair<Stmt::Likelihood, const Attr *> getLikelihood(const Stmt *S) {
+  if (const auto *AS = dyn_cast_or_null<AttributedStmt>(S))
+    return getLikelihood(AS->getAttrs());
+
+  return std::make_pair(Stmt::LH_None, nullptr);
+}
+
+Stmt::Likelihood Stmt::getLikelihood(ArrayRef<const Attr *> Attrs) {
+  return ::getLikelihood(Attrs).first;
+}
+
 Stmt::Likelihood Stmt::getLikelihood(const Stmt *S) {
   return ::getLikelihood(S).first;
 }
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index c9e6ce2..4b813be 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -50,7 +50,7 @@
   PGO.setCurrentStmt(S);
 
   // These statements have their own debug info handling.
-  if (EmitSimpleStmt(S))
+  if (EmitSimpleStmt(S, Attrs))
     return;
 
   // Check if we are generating unreachable code.
@@ -370,23 +370,44 @@
   }
 }
 
-bool CodeGenFunction::EmitSimpleStmt(const Stmt *S) {
+bool CodeGenFunction::EmitSimpleStmt(const Stmt *S,
+                                     ArrayRef<const Attr *> Attrs) {
   switch (S->getStmtClass()) {
-  default: return false;
-  case Stmt::NullStmtClass: break;
-  case Stmt::CompoundStmtClass: EmitCompoundStmt(cast<CompoundStmt>(*S)); break;
-  case Stmt::DeclStmtClass:     EmitDeclStmt(cast<DeclStmt>(*S));         break;
-  case Stmt::LabelStmtClass:    EmitLabelStmt(cast<LabelStmt>(*S));       break;
+  default:
+    return false;
+  case Stmt::NullStmtClass:
+    break;
+  case Stmt::CompoundStmtClass:
+    EmitCompoundStmt(cast<CompoundStmt>(*S));
+    break;
+  case Stmt::DeclStmtClass:
+    EmitDeclStmt(cast<DeclStmt>(*S));
+    break;
+  case Stmt::LabelStmtClass:
+    EmitLabelStmt(cast<LabelStmt>(*S));
+    break;
   case Stmt::AttributedStmtClass:
-                            EmitAttributedStmt(cast<AttributedStmt>(*S)); break;
-  case Stmt::GotoStmtClass:     EmitGotoStmt(cast<GotoStmt>(*S));         break;
-  case Stmt::BreakStmtClass:    EmitBreakStmt(cast<BreakStmt>(*S));       break;
-  case Stmt::ContinueStmtClass: EmitContinueStmt(cast<ContinueStmt>(*S)); break;
-  case Stmt::DefaultStmtClass:  EmitDefaultStmt(cast<DefaultStmt>(*S));   break;
-  case Stmt::CaseStmtClass:     EmitCaseStmt(cast<CaseStmt>(*S));         break;
-  case Stmt::SEHLeaveStmtClass: EmitSEHLeaveStmt(cast<SEHLeaveStmt>(*S)); break;
+    EmitAttributedStmt(cast<AttributedStmt>(*S));
+    break;
+  case Stmt::GotoStmtClass:
+    EmitGotoStmt(cast<GotoStmt>(*S));
+    break;
+  case Stmt::BreakStmtClass:
+    EmitBreakStmt(cast<BreakStmt>(*S));
+    break;
+  case Stmt::ContinueStmtClass:
+    EmitContinueStmt(cast<ContinueStmt>(*S));
+    break;
+  case Stmt::DefaultStmtClass:
+    EmitDefaultStmt(cast<DefaultStmt>(*S), Attrs);
+    break;
+  case Stmt::CaseStmtClass:
+    EmitCaseStmt(cast<CaseStmt>(*S), Attrs);
+    break;
+  case Stmt::SEHLeaveStmtClass:
+    EmitSEHLeaveStmt(cast<SEHLeaveStmt>(*S));
+    break;
   }
-
   return true;
 }
 
@@ -1221,7 +1242,8 @@
 /// EmitCaseStmtRange - If case statement range is not too big then
 /// add multiple cases to switch instruction, one for each value within
 /// the range. If range is too big then emit "if" condition check.
-void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) {
+void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S,
+                                        ArrayRef<const Attr *> Attrs) {
   assert(S.getRHS() && "Expected RHS value in CaseStmt");
 
   llvm::APSInt LHS = S.getLHS()->EvaluateKnownConstInt(getContext());
@@ -1238,6 +1260,7 @@
   if (LHS.isSigned() ? RHS.slt(LHS) : RHS.ult(LHS))
     return;
 
+  Stmt::Likelihood LH = Stmt::getLikelihood(Attrs);
   llvm::APInt Range = RHS - LHS;
   // FIXME: parameters such as this should not be hardcoded.
   if (Range.ult(llvm::APInt(Range.getBitWidth(), 64))) {
@@ -1252,6 +1275,9 @@
     for (unsigned I = 0; I != NCases; ++I) {
       if (SwitchWeights)
         SwitchWeights->push_back(Weight + (Rem ? 1 : 0));
+      else if (SwitchLikelihood)
+        SwitchLikelihood->push_back(LH);
+
       if (Rem)
         Rem--;
       SwitchInsn->addCase(Builder.getInt(LHS), CaseDest);
@@ -1289,7 +1315,9 @@
     // need to update the weight for the default, ie, the first case, to include
     // this case.
     (*SwitchWeights)[0] += ThisCount;
-  }
+  } else if (SwitchLikelihood)
+    Weights = createBranchWeights(LH);
+
   Builder.CreateCondBr(Cond, CaseDest, FalseDest, Weights);
 
   // Restore the appropriate insertion point.
@@ -1299,7 +1327,8 @@
     Builder.ClearInsertionPoint();
 }
 
-void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) {
+void CodeGenFunction::EmitCaseStmt(const CaseStmt &S,
+                                   ArrayRef<const Attr *> Attrs) {
   // If there is no enclosing switch instance that we're aware of, then this
   // case statement and its block can be elided.  This situation only happens
   // when we've constant-folded the switch, are emitting the constant case,
@@ -1312,12 +1341,14 @@
 
   // Handle case ranges.
   if (S.getRHS()) {
-    EmitCaseStmtRange(S);
+    EmitCaseStmtRange(S, Attrs);
     return;
   }
 
   llvm::ConstantInt *CaseVal =
     Builder.getInt(S.getLHS()->EvaluateKnownConstInt(getContext()));
+  if (SwitchLikelihood)
+    SwitchLikelihood->push_back(Stmt::getLikelihood(Attrs));
 
   // If the body of the case is just a 'break', try to not emit an empty block.
   // If we're profiling or we're not optimizing, leave the block in for better
@@ -1358,6 +1389,10 @@
   // that falls through to the next case which is IR intensive.  It also causes
   // deep recursion which can run into stack depth limitations.  Handle
   // sequential non-range case statements specially.
+  //
+  // TODO When the next case has a likelihood attribute the code returns to the
+  // recursive algorithm. Maybe improve this case if it becomes common practice
+  // to use a lot of attributes.
   const CaseStmt *CurCase = &S;
   const CaseStmt *NextCase = dyn_cast<CaseStmt>(S.getSubStmt());
 
@@ -1373,6 +1408,10 @@
       CaseDest = createBasicBlock("sw.bb");
       EmitBlockWithFallThrough(CaseDest, &S);
     }
+    // Since this loop is only executed when the CaseStmt has no attributes
+    // use a hard-coded value.
+    if (SwitchLikelihood)
+      SwitchLikelihood->push_back(Stmt::LH_None);
 
     SwitchInsn->addCase(CaseVal, CaseDest);
     NextCase = dyn_cast<CaseStmt>(CurCase->getSubStmt());
@@ -1382,7 +1421,8 @@
   EmitStmt(CurCase->getSubStmt());
 }
 
-void CodeGenFunction::EmitDefaultStmt(const DefaultStmt &S) {
+void CodeGenFunction::EmitDefaultStmt(const DefaultStmt &S,
+                                      ArrayRef<const Attr *> Attrs) {
   // If there is no enclosing switch instance that we're aware of, then this
   // default statement can be elided. This situation only happens when we've
   // constant-folded the switch.
@@ -1395,6 +1435,9 @@
   assert(DefaultBlock->empty() &&
          "EmitDefaultStmt: Default block already defined?");
 
+  if (SwitchLikelihood)
+    SwitchLikelihood->front() = Stmt::getLikelihood(Attrs);
+
   EmitBlockWithFallThrough(DefaultBlock, &S);
 
   EmitStmt(S.getSubStmt());
@@ -1632,10 +1675,67 @@
          FoundCase;
 }
 
+static Optional<SmallVector<uint64_t, 16>>
+getLikelihoodWeights(ArrayRef<Stmt::Likelihood> Likelihoods) {
+  // Are there enough branches to weight them?
+  if (Likelihoods.size() <= 1)
+    return None;
+
+  uint64_t NumUnlikely = 0;
+  uint64_t NumNone = 0;
+  uint64_t NumLikely = 0;
+  for (const auto LH : Likelihoods) {
+    switch (LH) {
+    case Stmt::LH_Unlikely:
+      ++NumUnlikely;
+      break;
+    case Stmt::LH_None:
+      ++NumNone;
+      break;
+    case Stmt::LH_Likely:
+      ++NumLikely;
+      break;
+    }
+  }
+
+  // Is there a likelihood attribute used?
+  if (NumUnlikely == 0 && NumLikely == 0)
+    return None;
+
+  // When multiple cases share the same code they can be combined during
+  // optimization. In that case the weights of the branch will be the sum of
+  // the individual weights. Make sure the combined sum of all neutral cases
+  // doesn't exceed the value of a single likely attribute.
+  // The additions both avoid divisions by 0 and make sure the weights of None
+  // don't exceed the weight of Likely.
+  const uint64_t Likely = INT32_MAX / (NumLikely + 2);
+  const uint64_t None = Likely / (NumNone + 1);
+  const uint64_t Unlikely = 0;
+
+  SmallVector<uint64_t, 16> Result;
+  Result.reserve(Likelihoods.size());
+  for (const auto LH : Likelihoods) {
+    switch (LH) {
+    case Stmt::LH_Unlikely:
+      Result.push_back(Unlikely);
+      break;
+    case Stmt::LH_None:
+      Result.push_back(None);
+      break;
+    case Stmt::LH_Likely:
+      Result.push_back(Likely);
+      break;
+    }
+  }
+
+  return Result;
+}
+
 void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
   // Handle nested switch statements.
   llvm::SwitchInst *SavedSwitchInsn = SwitchInsn;
   SmallVector<uint64_t, 16> *SavedSwitchWeights = SwitchWeights;
+  SmallVector<Stmt::Likelihood, 16> *SavedSwitchLikelihood = SwitchLikelihood;
   llvm::BasicBlock *SavedCRBlock = CaseRangeBlock;
 
   // See if we can constant fold the condition of the switch and therefore only
@@ -1710,7 +1810,12 @@
     // The default needs to be first. We store the edge count, so we already
     // know the right weight.
     SwitchWeights->push_back(DefaultCount);
+  } else if (CGM.getCodeGenOpts().OptimizationLevel) {
+    SwitchLikelihood = new SmallVector<Stmt::Likelihood, 16>();
+    // Initialize the default case.
+    SwitchLikelihood->push_back(Stmt::LH_None);
   }
+
   CaseRangeBlock = DefaultBlock;
 
   // Clear the insertion point to indicate we are in unreachable code.
@@ -1774,9 +1879,21 @@
       SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof,
                               createProfileWeights(*SwitchWeights));
     delete SwitchWeights;
+  } else if (SwitchLikelihood) {
+    assert(SwitchLikelihood->size() == 1 + SwitchInsn->getNumCases() &&
+           "switch likelihoods do not match switch cases");
+    Optional<SmallVector<uint64_t, 16>> LHW =
+        getLikelihoodWeights(*SwitchLikelihood);
+    if (LHW) {
+      llvm::MDBuilder MDHelper(CGM.getLLVMContext());
+      SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof,
+                              createProfileWeights(*LHW));
+    }
+    delete SwitchLikelihood;
   }
   SwitchInsn = SavedSwitchInsn;
   SwitchWeights = SavedSwitchWeights;
+  SwitchLikelihood = SavedSwitchLikelihood;
   CaseRangeBlock = SavedCRBlock;
 }
 
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 363b418..2dcb531 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1478,21 +1478,6 @@
   return true;
 }
 
-static Optional<std::pair<uint32_t, uint32_t>>
-getLikelihoodWeights(Stmt::Likelihood LH) {
-  switch (LH) {
-  case Stmt::LH_Unlikely:
-    return std::pair<uint32_t, uint32_t>(llvm::UnlikelyBranchWeight,
-                                         llvm::LikelyBranchWeight);
-  case Stmt::LH_None:
-    return None;
-  case Stmt::LH_Likely:
-    return std::pair<uint32_t, uint32_t>(llvm::LikelyBranchWeight,
-                                         llvm::UnlikelyBranchWeight);
-  }
-  llvm_unreachable("Unknown Likelihood");
-}
-
 /// EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g. for an if
 /// statement) to the specified blocks.  Based on the condition, this might try
 /// to simplify the codegen of the conditional based on the branch.
@@ -1692,12 +1677,7 @@
     }
   }
 
-  llvm::MDNode *Weights = nullptr;
-  Optional<std::pair<uint32_t, uint32_t>> LHW = getLikelihoodWeights(LH);
-  if (LHW) {
-    llvm::MDBuilder MDHelper(CGM.getLLVMContext());
-    Weights = MDHelper.createBranchWeights(LHW->first, LHW->second);
-  }
+  llvm::MDNode *Weights = createBranchWeights(LH);
   if (!Weights) {
     uint64_t CurrentCount = std::max(getCurrentProfileCount(), TrueCount);
     Weights = createProfileWeights(TrueCount, CurrentCount - TrueCount);
@@ -2569,3 +2549,27 @@
 
   return llvm::DebugLoc();
 }
+
+static Optional<std::pair<uint32_t, uint32_t>>
+getLikelihoodWeights(Stmt::Likelihood LH) {
+  switch (LH) {
+  case Stmt::LH_Unlikely:
+    return std::pair<uint32_t, uint32_t>(llvm::UnlikelyBranchWeight,
+                                         llvm::LikelyBranchWeight);
+  case Stmt::LH_None:
+    return None;
+  case Stmt::LH_Likely:
+    return std::pair<uint32_t, uint32_t>(llvm::LikelyBranchWeight,
+                                         llvm::UnlikelyBranchWeight);
+  }
+  llvm_unreachable("Unknown Likelihood");
+}
+
+llvm::MDNode *CodeGenFunction::createBranchWeights(Stmt::Likelihood LH) const {
+  Optional<std::pair<uint32_t, uint32_t>> LHW = getLikelihoodWeights(LH);
+  if (!LHW)
+    return nullptr;
+
+  llvm::MDBuilder MDHelper(CGM.getLLVMContext());
+  return MDHelper.createBranchWeights(LHW->first, LHW->second);
+}
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index a9fb478..0a199e9 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -1395,6 +1395,9 @@
   };
   OpenMPCancelExitStack OMPCancelStack;
 
+  /// Calculate branch weights for the likelihood attribute
+  llvm::MDNode *createBranchWeights(Stmt::Likelihood LH) const;
+
   CodeGenPGO PGO;
 
   /// Calculate branch weights appropriate for PGO data
@@ -1439,6 +1442,9 @@
   /// The branch weights of SwitchInsn when doing instrumentation based PGO.
   SmallVector<uint64_t, 16> *SwitchWeights = nullptr;
 
+  /// The likelihood attributes of the SwitchCase.
+  SmallVector<Stmt::Likelihood, 16> *SwitchLikelihood = nullptr;
+
   /// CaseRangeBlock - This block holds if condition check for last case
   /// statement range in current switch instruction.
   llvm::BasicBlock *CaseRangeBlock = nullptr;
@@ -3075,7 +3081,7 @@
   /// statements.
   ///
   /// \return True if the statement was handled.
-  bool EmitSimpleStmt(const Stmt *S);
+  bool EmitSimpleStmt(const Stmt *S, ArrayRef<const Attr *> Attrs);
 
   Address EmitCompoundStmt(const CompoundStmt &S, bool GetLast = false,
                            AggValueSlot AVS = AggValueSlot::ignored());
@@ -3104,9 +3110,9 @@
   void EmitBreakStmt(const BreakStmt &S);
   void EmitContinueStmt(const ContinueStmt &S);
   void EmitSwitchStmt(const SwitchStmt &S);
-  void EmitDefaultStmt(const DefaultStmt &S);
-  void EmitCaseStmt(const CaseStmt &S);
-  void EmitCaseStmtRange(const CaseStmt &S);
+  void EmitDefaultStmt(const DefaultStmt &S, ArrayRef<const Attr *> Attrs);
+  void EmitCaseStmt(const CaseStmt &S, ArrayRef<const Attr *> Attrs);
+  void EmitCaseStmtRange(const CaseStmt &S, ArrayRef<const Attr *> Attrs);
   void EmitAsmStmt(const AsmStmt &S);
 
   void EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S);