[PCH] When serializing Stmts, keep track of when sub statements are referenced again and
in such a case just write out a reference of a previously serialized Stmt, instead
of serializing it all over again.
This saves memory + space + [de]serializing time, and avoids blowing up memory
with pathological cases. rdar://10293911
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@142696 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Serialization/ASTReaderStmt.cpp b/lib/Serialization/ASTReaderStmt.cpp
index ab07b85..85d0f92 100644
--- a/lib/Serialization/ASTReaderStmt.cpp
+++ b/lib/Serialization/ASTReaderStmt.cpp
@@ -1444,6 +1444,10 @@
ReadingKindTracker ReadingKind(Read_Stmt, *this);
llvm::BitstreamCursor &Cursor = F.DeclsCursor;
+
+ // Map of offset to previously deserialized stmt. The offset points
+ /// just after the stmt record.
+ llvm::DenseMap<uint64_t, Stmt *> StmtEntries;
#ifndef NDEBUG
unsigned PrevNumStmts = StmtStack.size();
@@ -1483,11 +1487,19 @@
Idx = 0;
Record.clear();
bool Finished = false;
+ bool IsStmtReference = false;
switch ((StmtCode)Cursor.ReadRecord(Code, Record)) {
case STMT_STOP:
Finished = true;
break;
+ case STMT_REF_PTR:
+ IsStmtReference = true;
+ assert(StmtEntries.find(Record[0]) != StmtEntries.end() &&
+ "No stmt was recorded for this offset reference!");
+ S = StmtEntries[Record[Idx++]];
+ break;
+
case STMT_NULL_PTR:
S = 0;
break;
@@ -2041,8 +2053,11 @@
++NumStatementsRead;
- if (S)
+ if (S && !IsStmtReference) {
Reader.Visit(S);
+ StmtEntries[Cursor.GetCurrentBitNo()] = S;
+ }
+
assert(Idx == Record.size() && "Invalid deserialization of statement");
StmtStack.push_back(S);
diff --git a/lib/Serialization/ASTWriterStmt.cpp b/lib/Serialization/ASTWriterStmt.cpp
index 7e2d45c..463203b 100644
--- a/lib/Serialization/ASTWriterStmt.cpp
+++ b/lib/Serialization/ASTWriterStmt.cpp
@@ -1443,7 +1443,9 @@
/// \brief Write the given substatement or subexpression to the
/// bitstream.
-void ASTWriter::WriteSubStmt(Stmt *S) {
+void ASTWriter::WriteSubStmt(Stmt *S,
+ llvm::DenseMap<Stmt *, uint64_t> &SubStmtEntries,
+ llvm::DenseSet<Stmt *> &ParentStmts) {
RecordData Record;
ASTStmtWriter Writer(*this, Record);
++NumStatements;
@@ -1453,6 +1455,32 @@
return;
}
+ llvm::DenseMap<Stmt *, uint64_t>::iterator I = SubStmtEntries.find(S);
+ if (I != SubStmtEntries.end()) {
+ Record.push_back(I->second);
+ Stream.EmitRecord(serialization::STMT_REF_PTR, Record);
+ return;
+ }
+
+#ifndef NDEBUG
+ assert(!ParentStmts.count(S) && "There is a Stmt cycle!");
+
+ struct ParentStmtInserterRAII {
+ Stmt *S;
+ llvm::DenseSet<Stmt *> &ParentStmts;
+
+ ParentStmtInserterRAII(Stmt *S, llvm::DenseSet<Stmt *> &ParentStmts)
+ : S(S), ParentStmts(ParentStmts) {
+ ParentStmts.insert(S);
+ }
+ ~ParentStmtInserterRAII() {
+ ParentStmts.erase(S);
+ }
+ };
+
+ ParentStmtInserterRAII ParentStmtInserter(S, ParentStmts);
+#endif
+
// Redirect ASTWriter::AddStmt to collect sub stmts.
SmallVector<Stmt *, 16> SubStmts;
CollectedStmts = &SubStmts;
@@ -1478,9 +1506,11 @@
// This simplifies reading and allows to store a variable number of sub stmts
// without knowing it in advance.
while (!SubStmts.empty())
- WriteSubStmt(SubStmts.pop_back_val());
+ WriteSubStmt(SubStmts.pop_back_val(), SubStmtEntries, ParentStmts);
Stream.EmitRecord(Writer.Code, Record, Writer.AbbrevToUse);
+
+ SubStmtEntries[S] = Stream.GetCurrentBitNo();
}
/// \brief Flush all of the statements that have been added to the
@@ -1488,8 +1518,14 @@
void ASTWriter::FlushStmts() {
RecordData Record;
+ /// \brief Set of parent Stmts for the currently serializing sub stmt.
+ llvm::DenseSet<Stmt *> ParentStmts;
+ /// \brief Offsets of sub stmts already serialized. The offset points
+ /// just after the stmt record.
+ llvm::DenseMap<Stmt *, uint64_t> SubStmtEntries;
+
for (unsigned I = 0, N = StmtsToEmit.size(); I != N; ++I) {
- WriteSubStmt(StmtsToEmit[I]);
+ WriteSubStmt(StmtsToEmit[I], SubStmtEntries, ParentStmts);
assert(N == StmtsToEmit.size() &&
"Substatement written via AddStmt rather than WriteSubStmt!");
@@ -1498,6 +1534,9 @@
// expression records that follow this one are part of a different
// expression.
Stream.EmitRecord(serialization::STMT_STOP, Record);
+
+ SubStmtEntries.clear();
+ ParentStmts.clear();
}
StmtsToEmit.clear();