[PR27284] Reverse the ownership between DICompileUnit and DISubprogram.

Currently each Function points to a DISubprogram and DISubprogram has a
scope field. For member functions the scope is a DICompositeType. DIScopes
point to the DICompileUnit to facilitate type uniquing.

Distinct DISubprograms (with isDefinition: true) are not part of the type
hierarchy and cannot be uniqued. This change removes the subprograms
list from DICompileUnit and instead adds a pointer to the owning compile
unit to distinct DISubprograms. This would make it easy for ThinLTO to
strip unneeded DISubprograms and their transitively referenced debug info.

Motivation
----------

Materializing DISubprograms is currently the most expensive operation when
doing a ThinLTO build of clang.

We want the DISubprogram to be stored in a separate Bitcode block (or the
same block as the function body) so we can avoid having to expensively
deserialize all DISubprograms together with the global metadata. If a
function has been inlined into another subprogram we need to store a
reference the block containing the inlined subprogram.

Attached to https://llvm.org/bugs/show_bug.cgi?id=27284 is a python script
that updates LLVM IR testcases to the new format.

http://reviews.llvm.org/D19034
<rdar://problem/25256815>

llvm-svn: 266446
diff --git a/llvm/lib/Transforms/IPO/StripSymbols.cpp b/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 46f352f..4cc78bd 100644
--- a/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -312,20 +312,6 @@
   }
 
   for (DICompileUnit *DIC : F.compile_units()) {
-    // Create our live subprogram list.
-    bool SubprogramChange = false;
-    for (DISubprogram *DISP : DIC->getSubprograms()) {
-      // Make sure we visit each subprogram only once.
-      if (!VisitedSet.insert(DISP).second)
-        continue;
-
-      // If the function referenced by DISP is not null, the function is live.
-      if (LiveSPs.count(DISP))
-        LiveSubprograms.push_back(DISP);
-      else
-        SubprogramChange = true;
-    }
-
     // Create our live global variable list.
     bool GlobalVariableChange = false;
     for (DIGlobalVariable *DIG : DIC->getGlobalVariables()) {
@@ -341,14 +327,8 @@
         GlobalVariableChange = true;
     }
 
-    // If we found dead subprograms or global variables, replace the current
-    // subprogram list/global variable list with our new live subprogram/global
-    // variable list.
-    if (SubprogramChange) {
-      DIC->replaceSubprograms(MDTuple::get(C, LiveSubprograms));
-      Changed = true;
-    }
-
+    // If we found dead global variables, replace the current global
+    // variable list with our new live global variable list.
     if (GlobalVariableChange) {
       DIC->replaceGlobalVariables(MDTuple::get(C, LiveGlobalVariables));
       Changed = true;
diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index e2968c8..9c60505 100644
--- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -138,7 +138,6 @@
     Module *M;
     LLVMContext *Ctx;
     SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
-    DenseMap<DISubprogram *, Function *> FnMap;
   };
 }
 
@@ -450,28 +449,21 @@
   this->M = &M;
   Ctx = &M.getContext();
 
-  FnMap.clear();
-  for (Function &F : M) {
-    if (DISubprogram *SP = F.getSubprogram())
-      FnMap[SP] = &F;
-  }
-
   if (Options.EmitNotes) emitProfileNotes();
   if (Options.EmitData) return emitProfileArcs();
   return false;
 }
 
-static bool functionHasLines(Function *F) {
+static bool functionHasLines(Function &F) {
   // Check whether this function actually has any source lines. Not only
   // do these waste space, they also can crash gcov.
-  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-    for (BasicBlock::iterator I = BB->begin(), IE = BB->end();
-         I != IE; ++I) {
+  for (auto &BB : F) {
+    for (auto &I : BB) {
       // Debug intrinsic locations correspond to the location of the
       // declaration, not necessarily any statements or expressions.
-      if (isa<DbgInfoIntrinsic>(I)) continue;
+      if (isa<DbgInfoIntrinsic>(&I)) continue;
 
-      const DebugLoc &Loc = I->getDebugLoc();
+      const DebugLoc &Loc = I.getDebugLoc();
       if (!Loc)
         continue;
 
@@ -504,27 +496,27 @@
     std::string EdgeDestinations;
 
     unsigned FunctionIdent = 0;
-    for (auto *SP : CU->getSubprograms()) {
-      Function *F = FnMap[SP];
-      if (!F) continue;
+    for (auto &F : M->functions()) {
+      DISubprogram *SP = F.getSubprogram();
+      if (!SP) continue;
       if (!functionHasLines(F)) continue;
 
       // gcov expects every function to start with an entry block that has a
       // single successor, so split the entry block to make sure of that.
-      BasicBlock &EntryBlock = F->getEntryBlock();
+      BasicBlock &EntryBlock = F.getEntryBlock();
       BasicBlock::iterator It = EntryBlock.begin();
       while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
         ++It;
       EntryBlock.splitBasicBlock(It);
 
-      Funcs.push_back(make_unique<GCOVFunction>(SP, F, &out, FunctionIdent++,
+      Funcs.push_back(make_unique<GCOVFunction>(SP, &F, &out, FunctionIdent++,
                                                 Options.UseCfgChecksum,
                                                 Options.ExitBlockBeforeBody));
       GCOVFunction &Func = *Funcs.back();
 
-      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-        GCOVBlock &Block = Func.getBlock(&*BB);
-        TerminatorInst *TI = BB->getTerminator();
+      for (auto &BB : F) {
+        GCOVBlock &Block = Func.getBlock(&BB);
+        TerminatorInst *TI = BB.getTerminator();
         if (int successors = TI->getNumSuccessors()) {
           for (int i = 0; i != successors; ++i) {
             Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
@@ -534,13 +526,12 @@
         }
 
         uint32_t Line = 0;
-        for (BasicBlock::iterator I = BB->begin(), IE = BB->end();
-             I != IE; ++I) {
+        for (auto &I : BB) {
           // Debug intrinsic locations correspond to the location of the
           // declaration, not necessarily any statements or expressions.
-          if (isa<DbgInfoIntrinsic>(I)) continue;
+          if (isa<DbgInfoIntrinsic>(&I)) continue;
 
-          const DebugLoc &Loc = I->getDebugLoc();
+          const DebugLoc &Loc = I.getDebugLoc();
           if (!Loc)
             continue;
 
@@ -581,16 +572,15 @@
   bool Result = false;
   bool InsertIndCounterIncrCode = false;
   for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
-    auto *CU = cast<DICompileUnit>(CU_Nodes->getOperand(i));
     SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
-    for (auto *SP : CU->getSubprograms()) {
-      Function *F = FnMap[SP];
-      if (!F) continue;
+    for (auto &F : M->functions()) {
+      DISubprogram *SP = F.getSubprogram();
+      if (!SP) continue;
       if (!functionHasLines(F)) continue;
       if (!Result) Result = true;
       unsigned Edges = 0;
-      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-        TerminatorInst *TI = BB->getTerminator();
+      for (auto &BB : F) {
+        TerminatorInst *TI = BB.getTerminator();
         if (isa<ReturnInst>(TI))
           ++Edges;
         else
@@ -610,12 +600,12 @@
       UniqueVector<BasicBlock *> ComplexEdgeSuccs;
 
       unsigned Edge = 0;
-      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-        TerminatorInst *TI = BB->getTerminator();
+      for (auto &BB : F) {
+        TerminatorInst *TI = BB.getTerminator();
         int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
         if (Successors) {
           if (Successors == 1) {
-            IRBuilder<> Builder(&*BB->getFirstInsertionPt());
+            IRBuilder<> Builder(&*BB.getFirstInsertionPt());
             Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
                                                                 Edge);
             Value *Count = Builder.CreateLoad(Counter);
@@ -635,7 +625,7 @@
             Count = Builder.CreateAdd(Count, Builder.getInt64(1));
             Builder.CreateStore(Count, Counter);
           } else {
-            ComplexEdgePreds.insert(&*BB);
+            ComplexEdgePreds.insert(&BB);
             for (int i = 0; i != Successors; ++i)
               ComplexEdgeSuccs.insert(TI->getSuccessor(i));
           }
@@ -646,7 +636,7 @@
 
       if (!ComplexEdgePreds.empty()) {
         GlobalVariable *EdgeTable =
-          buildEdgeLookupTable(F, Counters,
+          buildEdgeLookupTable(&F, Counters,
                                ComplexEdgePreds, ComplexEdgeSuccs);
         GlobalVariable *EdgeState = getEdgeStateValue();
 
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index cf39ae9..41d340b 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -172,52 +172,17 @@
                        TypeMapper, Materializer);
 }
 
-// Find the MDNode which corresponds to the subprogram data that described F.
-static DISubprogram *FindSubprogram(const Function *F,
-                                    DebugInfoFinder &Finder) {
-  for (DISubprogram *Subprogram : Finder.subprograms()) {
-    if (Subprogram->describes(F))
-      return Subprogram;
-  }
-  return nullptr;
-}
-
-// Add an operand to an existing MDNode. The new operand will be added at the
-// back of the operand list.
-static void AddOperand(DICompileUnit *CU, DISubprogramArray SPs,
-                       Metadata *NewSP) {
-  SmallVector<Metadata *, 16> NewSPs;
-  NewSPs.reserve(SPs.size() + 1);
-  for (auto *SP : SPs)
-    NewSPs.push_back(SP);
-  NewSPs.push_back(NewSP);
-  CU->replaceSubprograms(MDTuple::get(CU->getContext(), NewSPs));
-}
-
 // Clone the module-level debug info associated with OldFunc. The cloned data
 // will point to NewFunc instead.
 static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,
                                    ValueToValueMapTy &VMap) {
-  DebugInfoFinder Finder;
-  Finder.processModule(*OldFunc->getParent());
-
-  const DISubprogram *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder);
-  if (!OldSubprogramMDNode) return;
-
-  auto *NewSubprogram =
-      cast<DISubprogram>(MapMetadata(OldSubprogramMDNode, VMap));
-  NewFunc->setSubprogram(NewSubprogram);
-
-  for (auto *CU : Finder.compile_units()) {
-    auto Subprograms = CU->getSubprograms();
-    // If the compile unit's function list contains the old function, it should
-    // also contain the new one.
-    for (auto *SP : Subprograms) {
-      if (SP == OldSubprogramMDNode) {
-        AddOperand(CU, Subprograms, NewSubprogram);
-        break;
-      }
-    }
+  if (const DISubprogram *OldSP = OldFunc->getSubprogram()) {
+    auto *NewSP = cast<DISubprogram>(MapMetadata(OldSP, VMap));
+    // FIXME: There ought to be a better way to do this: ValueMapper
+    // will clone the distinct DICompileUnit. Use the original one
+    // instead.
+    NewSP->replaceUnit(OldSP->getUnit());
+    NewFunc->setSubprogram(NewSP);
   }
 }