[PCH/Modules] De/Serialize MacroInfos separately than MacroDirectives.

-Serialize the macro directives history into its own section
-Get rid of the macro updates section
-When de/serializing an identifier from a module, associate only one macro per
 submodule that defined+exported it.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@177761 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp
index f0b1200..7258963 100644
--- a/lib/Serialization/ASTWriter.cpp
+++ b/lib/Serialization/ASTWriter.cpp
@@ -835,7 +835,7 @@
   RECORD(LOCAL_REDECLARATIONS);
   RECORD(OBJC_CATEGORIES);
   RECORD(MACRO_OFFSET);
-  RECORD(MACRO_UPDATES);
+  RECORD(MACRO_TABLE);
 
   // SourceManager Block.
   BLOCK(SOURCE_MANAGER_BLOCK);
@@ -1787,11 +1787,47 @@
 // Preprocessor Serialization
 //===----------------------------------------------------------------------===//
 
-static int compareMacroDefinitions(const void *XPtr, const void *YPtr) {
-  const std::pair<const IdentifierInfo *, MacroInfo *> &X =
-    *(const std::pair<const IdentifierInfo *, MacroInfo *>*)XPtr;
-  const std::pair<const IdentifierInfo *, MacroInfo *> &Y =
-    *(const std::pair<const IdentifierInfo *, MacroInfo *>*)YPtr;
+namespace {
+class ASTMacroTableTrait {
+public:
+  typedef IdentID key_type;
+  typedef key_type key_type_ref;
+
+  struct Data {
+    uint32_t MacroDirectivesOffset;
+  };
+
+  typedef Data data_type;
+  typedef const data_type &data_type_ref;
+
+  static unsigned ComputeHash(IdentID IdID) {
+    return llvm::hash_value(IdID);
+  }
+
+  std::pair<unsigned,unsigned>
+  static EmitKeyDataLength(raw_ostream& Out,
+                           key_type_ref Key, data_type_ref Data) {
+    unsigned KeyLen = 4; // IdentID.
+    unsigned DataLen = 4; // MacroDirectivesOffset.
+    return std::make_pair(KeyLen, DataLen);
+  }
+
+  static void EmitKey(raw_ostream& Out, key_type_ref Key, unsigned KeyLen) {
+    clang::io::Emit32(Out, Key);
+  }
+
+  static void EmitData(raw_ostream& Out, key_type_ref Key, data_type_ref Data,
+                       unsigned) {
+    clang::io::Emit32(Out, Data.MacroDirectivesOffset);
+  }
+};
+} // end anonymous namespace
+
+static int compareMacroDirectives(const void *XPtr, const void *YPtr) {
+  const std::pair<const IdentifierInfo *, MacroDirective *> &X =
+    *(const std::pair<const IdentifierInfo *, MacroDirective *>*)XPtr;
+  const std::pair<const IdentifierInfo *, MacroDirective *> &Y =
+    *(const std::pair<const IdentifierInfo *, MacroDirective *>*)YPtr;
   return X.first->getName().compare(Y.first->getName());
 }
 
@@ -1837,24 +1873,68 @@
     fprintf(stderr, "warning: precompiled header used __DATE__ or __TIME__.\n");
 
 
-  // Loop over all the macro definitions that are live at the end of the file,
+  // Loop over all the macro directives that are live at the end of the file,
   // emitting each to the PP section.
 
-  // Construct the list of macro definitions that need to be serialized.
+  // Construct the list of macro directives that need to be serialized.
   SmallVector<std::pair<const IdentifierInfo *, MacroDirective *>, 2>
-    MacrosToEmit;
-  for (Preprocessor::macro_iterator I = PP.macro_begin(Chain == 0),
-                                    E = PP.macro_end(Chain == 0);
+    MacroDirectives;
+  for (Preprocessor::macro_iterator
+         I = PP.macro_begin(/*IncludeExternalMacros=*/false),
+         E = PP.macro_end(/*IncludeExternalMacros=*/false);
        I != E; ++I) {
-    if (!IsModule || I->second->isPublic()) {
-      MacrosToEmit.push_back(std::make_pair(I->first, I->second));
-    }
+    MacroDirectives.push_back(std::make_pair(I->first, I->second));
   }
 
   // Sort the set of macro definitions that need to be serialized by the
   // name of the macro, to provide a stable ordering.
-  llvm::array_pod_sort(MacrosToEmit.begin(), MacrosToEmit.end(),
-                       &compareMacroDefinitions);
+  llvm::array_pod_sort(MacroDirectives.begin(), MacroDirectives.end(),
+                       &compareMacroDirectives);
+
+  OnDiskChainedHashTableGenerator<ASTMacroTableTrait> Generator;
+
+  // Emit the macro directives as a list and associate the offset with the
+  // identifier they belong to.
+  for (unsigned I = 0, N = MacroDirectives.size(); I != N; ++I) {
+    const IdentifierInfo *Name = MacroDirectives[I].first;
+    uint64_t MacroDirectiveOffset = Stream.GetCurrentBitNo();
+    MacroDirective *MD = MacroDirectives[I].second;
+
+    // If the macro or identifier need no updates, don't write the macro history
+    // for this one.
+    if (MD->isFromPCH() && !MD->hasChangedAfterLoad() &&
+        Name->isFromAST() && !Name->hasChangedSinceDeserialization())
+      continue;
+
+    // Emit the macro directives in reverse source order.
+    for (; MD; MD = MD->getPrevious()) {
+      if (shouldIgnoreMacro(MD, IsModule, PP))
+        continue;
+      MacroID InfoID = getMacroRef(MD->getInfo(), Name);
+      if (InfoID == 0)
+        continue;
+
+      Record.push_back(InfoID);
+      AddSourceLocation(MD->getLocation(), Record);
+      AddSourceLocation(MD->getUndefLoc(), Record);
+      AddSourceLocation(MD->getVisibilityLocation(), Record);
+      Record.push_back(MD->isImported());
+      Record.push_back(MD->isPublic());
+      Record.push_back(MD->isAmbiguous());
+    }
+    if (Record.empty())
+      continue;
+
+    Stream.EmitRecord(PP_MACRO_DIRECTIVE_HISTORY, Record);
+    Record.clear();
+
+    IdentMacroDirectivesOffsetMap[Name] = MacroDirectiveOffset;
+
+    IdentID NameID = getIdentifierRef(Name);
+    ASTMacroTableTrait::Data data;
+    data.MacroDirectivesOffset = MacroDirectiveOffset;
+    Generator.insert(NameID, data);
+  }
 
   /// \brief Offsets of each of the macros into the bitstream, indexed by
   /// the local macro ID
@@ -1864,101 +1944,107 @@
   /// defined.
   std::vector<uint32_t> MacroOffsets;
 
-  for (unsigned I = 0, N = MacrosToEmit.size(); I != N; ++I) {
-    const IdentifierInfo *Name = MacrosToEmit[I].first;
+  for (unsigned I = 0, N = MacroInfosToEmit.size(); I != N; ++I) {
+    const IdentifierInfo *Name = MacroInfosToEmit[I].Name;
+    MacroInfo *MI = MacroInfosToEmit[I].MI;
+    MacroID ID = MacroInfosToEmit[I].ID;
 
-    for (MacroDirective *MD = MacrosToEmit[I].second; MD;
-         MD = MD->getPrevious()) {
-      if (shouldIgnoreMacro(MD, IsModule, PP))
-        continue;
-
-      MacroID ID = getMacroRef(MD);
-      if (!ID)
-        continue;
-
-      // Skip macros from a AST file if we're chaining.
-      if (Chain && MD->isImported() && !MD->hasChangedAfterLoad())
-        continue;
-
-      if (ID < FirstMacroID) {
-        // This will have been dealt with via an update record.
-        assert(MacroUpdates.count(MD) > 0 && "Missing macro update");
-        continue;
-      }
-
-      // Record the local offset of this macro.
-      unsigned Index = ID - FirstMacroID;
-      if (Index == MacroOffsets.size())
-        MacroOffsets.push_back(Stream.GetCurrentBitNo());
-      else {
-        if (Index > MacroOffsets.size())
-          MacroOffsets.resize(Index + 1);
-
-        MacroOffsets[Index] = Stream.GetCurrentBitNo();
-      }
-
-      AddIdentifierRef(Name, Record);
-      addMacroRef(MD, Record);
-      const MacroInfo *MI = MD->getInfo();
-      Record.push_back(inferSubmoduleIDFromLocation(MI->getDefinitionLoc()));
-      AddSourceLocation(MI->getDefinitionLoc(), Record);
-      AddSourceLocation(MI->getDefinitionEndLoc(), Record);
-      AddSourceLocation(MD->getUndefLoc(), Record);
-      Record.push_back(MI->isUsed());
-      Record.push_back(MD->isPublic());
-      AddSourceLocation(MD->getVisibilityLocation(), Record);
-      unsigned Code;
-      if (MI->isObjectLike()) {
-        Code = PP_MACRO_OBJECT_LIKE;
-      } else {
-        Code = PP_MACRO_FUNCTION_LIKE;
-
-        Record.push_back(MI->isC99Varargs());
-        Record.push_back(MI->isGNUVarargs());
-        Record.push_back(MI->hasCommaPasting());
-        Record.push_back(MI->getNumArgs());
-        for (MacroInfo::arg_iterator I = MI->arg_begin(), E = MI->arg_end();
-             I != E; ++I)
-          AddIdentifierRef(*I, Record);
-      }
-
-      // If we have a detailed preprocessing record, record the macro definition
-      // ID that corresponds to this macro.
-      if (PPRec)
-        Record.push_back(MacroDefinitions[PPRec->findMacroDefinition(MI)]);
-
-      Stream.EmitRecord(Code, Record);
-      Record.clear();
-
-      // Emit the tokens array.
-      for (unsigned TokNo = 0, e = MI->getNumTokens(); TokNo != e; ++TokNo) {
-        // Note that we know that the preprocessor does not have any annotation
-        // tokens in it because they are created by the parser, and thus can't
-        // be in a macro definition.
-        const Token &Tok = MI->getReplacementToken(TokNo);
-
-        Record.push_back(Tok.getLocation().getRawEncoding());
-        Record.push_back(Tok.getLength());
-
-        // FIXME: When reading literal tokens, reconstruct the literal pointer
-        // if it is needed.
-        AddIdentifierRef(Tok.getIdentifierInfo(), Record);
-        // FIXME: Should translate token kind to a stable encoding.
-        Record.push_back(Tok.getKind());
-        // FIXME: Should translate token flags to a stable encoding.
-        Record.push_back(Tok.getFlags());
-
-        Stream.EmitRecord(PP_TOKEN, Record);
-        Record.clear();
-      }
-      ++NumMacros;
+    if (ID < FirstMacroID) {
+      assert(0 && "Loaded MacroInfo entered MacroInfosToEmit ?");
+      continue;
     }
+
+    // Record the local offset of this macro.
+    unsigned Index = ID - FirstMacroID;
+    if (Index == MacroOffsets.size())
+      MacroOffsets.push_back(Stream.GetCurrentBitNo());
+    else {
+      if (Index > MacroOffsets.size())
+        MacroOffsets.resize(Index + 1);
+
+      MacroOffsets[Index] = Stream.GetCurrentBitNo();
+    }
+
+    AddIdentifierRef(Name, Record);
+    Record.push_back(inferSubmoduleIDFromLocation(MI->getDefinitionLoc()));
+    AddSourceLocation(MI->getDefinitionLoc(), Record);
+    AddSourceLocation(MI->getDefinitionEndLoc(), Record);
+    Record.push_back(MI->isUsed());
+    unsigned Code;
+    if (MI->isObjectLike()) {
+      Code = PP_MACRO_OBJECT_LIKE;
+    } else {
+      Code = PP_MACRO_FUNCTION_LIKE;
+
+      Record.push_back(MI->isC99Varargs());
+      Record.push_back(MI->isGNUVarargs());
+      Record.push_back(MI->hasCommaPasting());
+      Record.push_back(MI->getNumArgs());
+      for (MacroInfo::arg_iterator I = MI->arg_begin(), E = MI->arg_end();
+           I != E; ++I)
+        AddIdentifierRef(*I, Record);
+    }
+
+    // If we have a detailed preprocessing record, record the macro definition
+    // ID that corresponds to this macro.
+    if (PPRec)
+      Record.push_back(MacroDefinitions[PPRec->findMacroDefinition(MI)]);
+
+    Stream.EmitRecord(Code, Record);
+    Record.clear();
+
+    // Emit the tokens array.
+    for (unsigned TokNo = 0, e = MI->getNumTokens(); TokNo != e; ++TokNo) {
+      // Note that we know that the preprocessor does not have any annotation
+      // tokens in it because they are created by the parser, and thus can't
+      // be in a macro definition.
+      const Token &Tok = MI->getReplacementToken(TokNo);
+
+      Record.push_back(Tok.getLocation().getRawEncoding());
+      Record.push_back(Tok.getLength());
+
+      // FIXME: When reading literal tokens, reconstruct the literal pointer
+      // if it is needed.
+      AddIdentifierRef(Tok.getIdentifierInfo(), Record);
+      // FIXME: Should translate token kind to a stable encoding.
+      Record.push_back(Tok.getKind());
+      // FIXME: Should translate token flags to a stable encoding.
+      Record.push_back(Tok.getFlags());
+
+      Stream.EmitRecord(PP_TOKEN, Record);
+      Record.clear();
+    }
+    ++NumMacros;
   }
+
   Stream.ExitBlock();
 
+  // Create the on-disk hash table in a buffer.
+  SmallString<4096> MacroTable;
+  uint32_t BucketOffset;
+  {
+    llvm::raw_svector_ostream Out(MacroTable);
+    // Make sure that no bucket is at offset 0
+    clang::io::Emit32(Out, 0);
+    BucketOffset = Generator.Emit(Out);
+  }
+
+  // Write the macro table
+  using namespace llvm;
+  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+  Abbrev->Add(BitCodeAbbrevOp(MACRO_TABLE));
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
+  unsigned MacroTableAbbrev = Stream.EmitAbbrev(Abbrev);
+
+  Record.push_back(MACRO_TABLE);
+  Record.push_back(BucketOffset);
+  Stream.EmitRecordWithBlob(MacroTableAbbrev, Record, MacroTable.str());
+  Record.clear();
+
   // Write the offsets table for macro IDs.
   using namespace llvm;
-  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+  Abbrev = new BitCodeAbbrev();
   Abbrev->Add(BitCodeAbbrevOp(MACRO_OFFSET));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of macros
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID
@@ -2801,13 +2887,61 @@
     if (!II->hadMacroDefinition())
       return false;
 
-    if (Macro || (Macro = PP.getMacroDirectiveHistory(II)))
-      return !shouldIgnoreMacro(Macro, IsModule, PP) &&
-             (!IsModule || Macro->isPublic());
+    if (Macro || (Macro = PP.getMacroDirectiveHistory(II))) {
+      if (!IsModule)
+        return !shouldIgnoreMacro(Macro, IsModule, PP);
+      SubmoduleID ModID;
+      if (getFirstPublicSubmoduleMacro(Macro, ModID))
+        return true;
+    }
 
     return false;
   }
 
+  MacroDirective *getFirstPublicSubmoduleMacro(MacroDirective *MD,
+                                               SubmoduleID &ModID) {
+    if (shouldIgnoreMacro(MD, IsModule, PP))
+      return 0;
+    ModID = getSubmoduleID(MD);
+    if (ModID == 0)
+      return 0;
+    if (MD->isDefined() && MD->isPublic())
+      return MD;
+    return getNextPublicSubmoduleMacro(MD, ModID);
+  }
+
+  MacroDirective *getNextPublicSubmoduleMacro(MacroDirective *MD,
+                                              SubmoduleID &ModID) {
+    while (MD) {
+      MD = getNextSubmoduleMacro(MD, ModID);
+      if (MD && MD->isDefined() && MD->isPublic())
+        return MD;
+    }
+    return 0;
+  }
+
+  MacroDirective *getNextSubmoduleMacro(MacroDirective *CurrMD,
+                                        SubmoduleID &CurrModID) {
+    SubmoduleID OrigID = CurrModID;
+    while ((CurrMD = CurrMD->getPrevious())) {
+      if (shouldIgnoreMacro(CurrMD, IsModule, PP))
+        return 0;
+      CurrModID = getSubmoduleID(CurrMD);
+      if (CurrModID == 0)
+        return 0;
+      if (CurrModID != OrigID)
+        return CurrMD;
+    }
+    return 0;
+  }
+
+  SubmoduleID getSubmoduleID(MacroDirective *MD) {
+    MacroInfo *MI = MD->getInfo();
+    if (unsigned ID = MI->getOwningModuleID())
+      return ID;
+    return Writer.inferSubmoduleIDFromLocation(MI->getDefinitionLoc());
+  }
+
 public:
   typedef IdentifierInfo* key_type;
   typedef key_type  key_type_ref;
@@ -2832,14 +2966,15 @@
       DataLen += 2; // 2 bytes for builtin ID
       DataLen += 2; // 2 bytes for flags
       if (hadMacroDefinition(II, Macro)) {
-        for (MacroDirective *M = Macro; M; M = M->getPrevious()) {
-          if (shouldIgnoreMacro(M, IsModule, PP))
-            continue;
-          if (Writer.getMacroRef(M) != 0)
-            DataLen += 4;
+        DataLen += 4; // MacroDirectives offset.
+        if (IsModule) {
+          SubmoduleID ModID;
+          for (MacroDirective *MD = getFirstPublicSubmoduleMacro(Macro, ModID);
+                 MD; MD = getNextPublicSubmoduleMacro(MD, ModID)) {
+            DataLen += 4; // MacroInfo ID.
+          }
+          DataLen += 4;
         }
-
-        DataLen += 4;
       }
 
       for (IdentifierResolver::iterator D = IdResolver.begin(II),
@@ -2878,6 +3013,7 @@
     Bits = 0;
     bool HadMacroDefinition = hadMacroDefinition(II, Macro);
     Bits = (Bits << 1) | unsigned(HadMacroDefinition);
+    Bits = (Bits << 1) | unsigned(IsModule);
     Bits = (Bits << 1) | unsigned(II->isExtensionToken());
     Bits = (Bits << 1) | unsigned(II->isPoisoned());
     Bits = (Bits << 1) | unsigned(II->hasRevertedTokenIDToIdentifier());
@@ -2885,15 +3021,18 @@
     clang::io::Emit16(Out, Bits);
 
     if (HadMacroDefinition) {
-      // Write all of the macro IDs associated with this identifier.
-      for (MacroDirective *M = Macro; M; M = M->getPrevious()) {
-        if (shouldIgnoreMacro(M, IsModule, PP))
-          continue;
-        if (MacroID ID = Writer.getMacroRef(M))
-          clang::io::Emit32(Out, ID);
+      clang::io::Emit32(Out, Writer.getMacroDirectivesOffset(II));
+      if (IsModule) {
+        // Write the IDs of macros coming from different submodules.
+        SubmoduleID ModID;
+        for (MacroDirective *MD = getFirstPublicSubmoduleMacro(Macro, ModID);
+               MD; MD = getNextPublicSubmoduleMacro(MD, ModID)) {
+          MacroID InfoID = Writer.getMacroID(MD->getInfo());
+          assert(InfoID);
+          clang::io::Emit32(Out, InfoID);
+        }
+        clang::io::Emit32(Out, 0);
       }
-
-      clang::io::Emit32(Out, 0);
     }
 
     // Emit the declaration IDs in reverse order, because the
@@ -3968,7 +4107,6 @@
     }
   }
 
-  WriteMacroUpdates();
   WriteDeclUpdatesBlocks();
   WriteDeclReplacementsBlock();
   WriteRedeclarations();
@@ -3985,21 +4123,6 @@
   Stream.ExitBlock();
 }
 
-void ASTWriter::WriteMacroUpdates() {
-  if (MacroUpdates.empty())
-    return;
-
-  RecordData Record;
-  for (MacroUpdatesMap::iterator I = MacroUpdates.begin(),
-                                 E = MacroUpdates.end();
-       I != E; ++I) {
-    addMacroRef(I->first, Record);
-    AddSourceLocation(I->second.UndefLoc, Record);
-    Record.push_back(inferSubmoduleIDFromLocation(I->second.UndefLoc));
-  }
-  Stream.EmitRecord(MACRO_UPDATES, Record);
-}
-
 /// \brief Go through the declaration update blocks and resolve declaration
 /// pointers into declaration IDs.
 void ASTWriter::ResolveDeclUpdatesBlocks() {
@@ -4095,10 +4218,6 @@
   Record.push_back(getIdentifierRef(II));
 }
 
-void ASTWriter::addMacroRef(MacroDirective *MD, RecordDataImpl &Record) {
-  Record.push_back(getMacroRef(MD));
-}
-
 IdentID ASTWriter::getIdentifierRef(const IdentifierInfo *II) {
   if (II == 0)
     return 0;
@@ -4109,19 +4228,35 @@
   return ID;
 }
 
-MacroID ASTWriter::getMacroRef(MacroDirective *MD) {
+MacroID ASTWriter::getMacroRef(MacroInfo *MI, const IdentifierInfo *Name) {
   // Don't emit builtin macros like __LINE__ to the AST file unless they
   // have been redefined by the header (in which case they are not
   // isBuiltinMacro).
-  if (MD == 0 || MD->getInfo()->isBuiltinMacro())
+  if (MI == 0 || MI->isBuiltinMacro())
     return 0;
 
-  MacroID &ID = MacroIDs[MD];
-  if (ID == 0)
+  MacroID &ID = MacroIDs[MI];
+  if (ID == 0) {
     ID = NextMacroID++;
+    MacroInfoToEmitData Info = { Name, MI, ID };
+    MacroInfosToEmit.push_back(Info);
+  }
   return ID;
 }
 
+MacroID ASTWriter::getMacroID(MacroInfo *MI) {
+  if (MI == 0 || MI->isBuiltinMacro())
+    return 0;
+  
+  assert(MacroIDs.find(MI) != MacroIDs.end() && "Macro not emitted!");
+  return MacroIDs[MI];
+}
+
+uint64_t ASTWriter::getMacroDirectivesOffset(const IdentifierInfo *Name) {
+  assert(IdentMacroDirectivesOffsetMap[Name] && "not set!");
+  return IdentMacroDirectivesOffsetMap[Name];
+}
+
 void ASTWriter::AddSelectorRef(const Selector SelRef, RecordDataImpl &Record) {
   Record.push_back(getSelectorRef(SelRef));
 }
@@ -4861,9 +4996,9 @@
     StoredID = ID;
 }
 
-void ASTWriter::MacroRead(serialization::MacroID ID, MacroDirective *MD) {
+void ASTWriter::MacroRead(serialization::MacroID ID, MacroInfo *MI) {
   // Always keep the highest ID. See \p TypeRead() for more information.
-  MacroID &StoredID = MacroIDs[MD];
+  MacroID &StoredID = MacroIDs[MI];
   if (ID > StoredID)
     StoredID = ID;
 }
@@ -4897,10 +5032,6 @@
   SubmoduleIDs[Mod] = ID;
 }
 
-void ASTWriter::UndefinedMacro(MacroDirective *MD) {
-  MacroUpdates[MD].UndefLoc = MD->getUndefLoc();
-}
-
 void ASTWriter::CompletedTagDefinition(const TagDecl *D) {
   assert(D->isCompleteDefinition());
   assert(!WritingAST && "Already writing the AST!");