[PCH] When serializing Stmts, keep track of when sub statements are referenced again and
in such a case just write out a reference of a previously serialized Stmt, instead
of serializing it all over again.

This saves memory + space + [de]serializing time, and avoids blowing up memory
with pathological cases. rdar://10293911

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@142696 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Serialization/ASTWriterStmt.cpp b/lib/Serialization/ASTWriterStmt.cpp
index 7e2d45c..463203b 100644
--- a/lib/Serialization/ASTWriterStmt.cpp
+++ b/lib/Serialization/ASTWriterStmt.cpp
@@ -1443,7 +1443,9 @@
 
 /// \brief Write the given substatement or subexpression to the
 /// bitstream.
-void ASTWriter::WriteSubStmt(Stmt *S) {
+void ASTWriter::WriteSubStmt(Stmt *S,
+                             llvm::DenseMap<Stmt *, uint64_t> &SubStmtEntries,
+                             llvm::DenseSet<Stmt *> &ParentStmts) {
   RecordData Record;
   ASTStmtWriter Writer(*this, Record);
   ++NumStatements;
@@ -1453,6 +1455,32 @@
     return;
   }
 
+  llvm::DenseMap<Stmt *, uint64_t>::iterator I = SubStmtEntries.find(S);
+  if (I != SubStmtEntries.end()) {
+    Record.push_back(I->second);
+    Stream.EmitRecord(serialization::STMT_REF_PTR, Record);
+    return;
+  }
+
+#ifndef NDEBUG
+  assert(!ParentStmts.count(S) && "There is a Stmt cycle!");
+
+  struct ParentStmtInserterRAII {
+    Stmt *S;
+    llvm::DenseSet<Stmt *> &ParentStmts;
+
+    ParentStmtInserterRAII(Stmt *S, llvm::DenseSet<Stmt *> &ParentStmts)
+      : S(S), ParentStmts(ParentStmts) {
+      ParentStmts.insert(S);
+    }
+    ~ParentStmtInserterRAII() {
+      ParentStmts.erase(S);
+    }
+  };
+
+  ParentStmtInserterRAII ParentStmtInserter(S, ParentStmts);
+#endif
+
   // Redirect ASTWriter::AddStmt to collect sub stmts.
   SmallVector<Stmt *, 16> SubStmts;
   CollectedStmts = &SubStmts;
@@ -1478,9 +1506,11 @@
   // This simplifies reading and allows to store a variable number of sub stmts
   // without knowing it in advance.
   while (!SubStmts.empty())
-    WriteSubStmt(SubStmts.pop_back_val());
+    WriteSubStmt(SubStmts.pop_back_val(), SubStmtEntries, ParentStmts);
   
   Stream.EmitRecord(Writer.Code, Record, Writer.AbbrevToUse);
+ 
+  SubStmtEntries[S] = Stream.GetCurrentBitNo();
 }
 
 /// \brief Flush all of the statements that have been added to the
@@ -1488,8 +1518,14 @@
 void ASTWriter::FlushStmts() {
   RecordData Record;
 
+  /// \brief Set of parent Stmts for the currently serializing sub stmt.
+  llvm::DenseSet<Stmt *> ParentStmts;
+  /// \brief Offsets of sub stmts already serialized. The offset points
+  /// just after the stmt record.
+  llvm::DenseMap<Stmt *, uint64_t> SubStmtEntries;
+
   for (unsigned I = 0, N = StmtsToEmit.size(); I != N; ++I) {
-    WriteSubStmt(StmtsToEmit[I]);
+    WriteSubStmt(StmtsToEmit[I], SubStmtEntries, ParentStmts);
     
     assert(N == StmtsToEmit.size() &&
            "Substatement written via AddStmt rather than WriteSubStmt!");
@@ -1498,6 +1534,9 @@
     // expression records that follow this one are part of a different
     // expression.
     Stream.EmitRecord(serialization::STMT_STOP, Record);
+
+    SubStmtEntries.clear();
+    ParentStmts.clear();
   }
 
   StmtsToEmit.clear();