Implement (de-)serialization of the description of a module and its
submodules. This information will eventually be used for name hiding
when dealing with submodules. For now, we only use it to ensure that
the module "key" returned when loading a module will always be a
module (rather than occasionally being a FileEntry).



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@145497 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/clang/Frontend/CompilerInstance.h b/include/clang/Frontend/CompilerInstance.h
index 09b27e1..90f84ef 100644
--- a/include/clang/Frontend/CompilerInstance.h
+++ b/include/clang/Frontend/CompilerInstance.h
@@ -99,15 +99,9 @@
   /// \brief Non-owning reference to the ASTReader, if one exists.
   ASTReader *ModuleManager;
 
-  /// \brief A module that we have already attempted to load, which is known
-  /// by either a file entry (FIXME: a temporary measure) or via its module
-  /// definition.
-  typedef llvm::PointerUnion<const FileEntry *, ModuleMap::Module *> 
-    KnownModule;
-  
   /// \brief The set of top-level modules that has already been loaded,
   /// along with the module map
-  llvm::DenseMap<const IdentifierInfo *, KnownModule> KnownModules;
+  llvm::DenseMap<const IdentifierInfo *, ModuleMap::Module *> KnownModules;
   
   /// \brief The location of the module-import keyword for the last module
   /// import. 
@@ -115,7 +109,7 @@
   
   /// \brief The result of the last module import.
   ///
-  KnownModule LastModuleImportResult;
+  ModuleMap::Module *LastModuleImportResult;
   
   /// \brief Holds information about the output file.
   ///
diff --git a/include/clang/Lex/ModuleMap.h b/include/clang/Lex/ModuleMap.h
index 4f89f1e..c5727a9 100644
--- a/include/clang/Lex/ModuleMap.h
+++ b/include/clang/Lex/ModuleMap.h
@@ -162,6 +162,24 @@
   /// \returns The named module, if known; otherwise, returns null.
   Module *findModule(StringRef Name);
   
+  /// \brief Find a new module or submodule, or create it if it does not already
+  /// exist.
+  ///
+  /// \param Name The name of the module to find or create.
+  ///
+  /// \param Parent The module that will act as the parent of this submodule,
+  /// or NULL to indicate that this is a top-level module.
+  ///
+  /// \param IsFramework Whether this is a framework module.
+  ///
+  /// \param IsExplicit Whether this is an explicit submodule.
+  ///
+  /// \returns The found or newly-created module, along with a boolean value
+  /// that will be true if the module is newly-created.
+  std::pair<Module *, bool> findOrCreateModule(StringRef Name, Module *Parent, 
+                                               bool IsFramework,
+                                               bool IsExplicit);
+                       
   /// \brief Infer the contents of a framework module map from the given
   /// framework directory.
   Module *inferFrameworkModule(StringRef ModuleName, 
diff --git a/include/clang/Serialization/ASTBitCodes.h b/include/clang/Serialization/ASTBitCodes.h
index 53d970b..22dc0b9 100644
--- a/include/clang/Serialization/ASTBitCodes.h
+++ b/include/clang/Serialization/ASTBitCodes.h
@@ -198,7 +198,10 @@
       DECL_UPDATES_BLOCK_ID,
       
       /// \brief The block containing the detailed preprocessing record.
-      PREPROCESSOR_DETAIL_BLOCK_ID
+      PREPROCESSOR_DETAIL_BLOCK_ID,
+      
+      /// \brief The block containing the submodule structure.
+      SUBMODULE_BLOCK_ID
     };
 
     /// \brief Record types that occur within the AST block itself.
@@ -492,6 +495,18 @@
       PPD_INCLUSION_DIRECTIVE = 2
     };
     
+    /// \brief Record types used within a submodule description block.
+    enum SubmoduleRecordTypes {
+      /// \brief Defines the major attributes of a submodule, including its
+      /// name and parent.
+      SUBMODULE_DEFINITION = 0,
+      /// \brief Specifies the umbrella header used to create this module,
+      /// if any.
+      SUBMODULE_UMBRELLA = 1,
+      /// \brief Specifies a header that falls into this (sub)module.
+      SUBMODULE_HEADER = 2
+    };
+    
     /// \defgroup ASTAST AST file AST constants
     ///
     /// The constants in this group describe various components of the
diff --git a/include/clang/Serialization/ASTReader.h b/include/clang/Serialization/ASTReader.h
index 7a9a15c..a641942 100644
--- a/include/clang/Serialization/ASTReader.h
+++ b/include/clang/Serialization/ASTReader.h
@@ -692,8 +692,9 @@
   ASTReadResult ReadSLocEntryRecord(int ID);
   llvm::BitstreamCursor &SLocCursorForID(int ID);
   SourceLocation getImportLocation(Module *F);
+  ASTReadResult ReadSubmoduleBlock(Module &F);
   bool ParseLanguageOptions(const SmallVectorImpl<uint64_t> &Record);
-
+  
   struct RecordLocation {
     RecordLocation(Module *M, uint64_t O)
       : F(M), Offset(O) {}
diff --git a/include/clang/Serialization/ASTWriter.h b/include/clang/Serialization/ASTWriter.h
index ac750c9..5773c77 100644
--- a/include/clang/Serialization/ASTWriter.h
+++ b/include/clang/Serialization/ASTWriter.h
@@ -358,6 +358,10 @@
   /// in the order they should be written.
   SmallVector<QueuedCXXBaseSpecifiers, 2> CXXBaseSpecifiersToWrite;
 
+  /// \brief A mapping from each known submodule to its ID number, which will
+  /// be a positive integer.
+  llvm::DenseMap<ModuleMap::Module *, unsigned> SubmoduleIDs;
+                    
   /// \brief Write the given subexpression to the bitstream.
   void WriteSubStmt(Stmt *S,
                     llvm::DenseMap<Stmt *, uint64_t> &SubStmtEntries,
@@ -374,6 +378,7 @@
   void WritePreprocessor(const Preprocessor &PP, bool IsModule);
   void WriteHeaderSearch(const HeaderSearch &HS, StringRef isysroot);
   void WritePreprocessorDetail(PreprocessingRecord &PPRec);
+  void WriteSubmodules(ModuleMap::Module *WritingModule);
   void WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag);
   void WriteCXXBaseSpecifiersOffsets();
   void WriteType(QualType T);
diff --git a/include/clang/Serialization/Module.h b/include/clang/Serialization/Module.h
index ca2046b..92ff8d8 100644
--- a/include/clang/Serialization/Module.h
+++ b/include/clang/Serialization/Module.h
@@ -17,6 +17,7 @@
 
 #include "clang/Serialization/ASTBitCodes.h"
 #include "clang/Serialization/ContinuousRangeMap.h"
+#include "clang/Lex/ModuleMap.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SetVector.h"
@@ -200,6 +201,9 @@
   /// search information.
   const char *HeaderFileFrameworkStrings;
 
+  // === Submodule information ===
+  llvm::SmallVector<ModuleMap::Module *, 2> Submodules;
+  
   // === Selectors ===
 
   /// \brief The number of selectors new to this file.
diff --git a/lib/Frontend/CompilerInstance.cpp b/lib/Frontend/CompilerInstance.cpp
index 66b381e..cb66aad 100644
--- a/lib/Frontend/CompilerInstance.cpp
+++ b/lib/Frontend/CompilerInstance.cpp
@@ -1074,7 +1074,7 @@
   // This one-element cache is important to eliminate redundant diagnostics
   // when both the preprocessor and parser see the same import declaration.
   if (!ImportLoc.isInvalid() && LastModuleImportLoc == ImportLoc)
-    return LastModuleImportResult.getOpaqueValue();
+    return LastModuleImportResult;
   
   // Determine what file we're searching from.
   SourceManager &SourceMgr = getSourceManager();
@@ -1091,8 +1091,9 @@
   const FileEntry *ModuleFile = 0;
   
   // If we don't already have information on this module, load the module now.
-  KnownModule &Known = KnownModules[Path[0].first];
-  if (Known.isNull()) {  
+  llvm::DenseMap<const IdentifierInfo *, ModuleMap::Module *>::iterator Known
+    = KnownModules.find(Path[0].first);
+  if (Known == KnownModules.end()) {  
     // Search for a module with the given name.
     std::string ModuleFileName;
     ModuleFile
@@ -1173,39 +1174,49 @@
     case ASTReader::IgnorePCH:
       // FIXME: The ASTReader will already have complained, but can we showhorn
       // that diagnostic information into a more useful form?
+      KnownModules[Path[0].first] = 0;
       return 0;
 
     case ASTReader::Failure:
-      // Already complained.
+      // Already complained, but note now that we failed.
+      KnownModules[Path[0].first] = 0;
       return 0;
     }
     
-    if (Module)
-      Known = Module;
-    else
-      Known = ModuleFile;
+    if (!Module) {
+      // If we loaded the module directly, without finding a module map first,
+      // we'll have loaded the module's information from the module itself.
+      Module = PP->getHeaderSearchInfo().getModuleMap()
+                 .findModule((Path[0].first->getName()));
+    }
+    
+    // Cache the result of this top-level module lookup for later.
+    Known = KnownModules.insert(std::make_pair(Path[0].first, Module)).first;
   } else {
-    Module = Known.dyn_cast<ModuleMap::Module *>();
+    // Retrieve the cached top-level module.
+    Module = Known->second;
   }
   
+  // If we never found the module, fail.
+  if (!Module)
+    return 0;
+  
   // Verify that the rest of the module path actually corresponds to
   // a submodule.
-  ModuleMap::Module *Sub = 0;
-  if (Module && Path.size() > 1) {
-    Sub = Module;
+  if (Path.size() > 1) {
     for (unsigned I = 1, N = Path.size(); I != N; ++I) {
       StringRef Name = Path[I].first->getName();
       llvm::StringMap<ModuleMap::Module *>::iterator Pos
-        = Sub->SubModules.find(Name);
+        = Module->SubModules.find(Name);
       
-      if (Pos == Sub->SubModules.end()) {
+      if (Pos == Module->SubModules.end()) {
         // Attempt to perform typo correction to find a module name that works.
         llvm::SmallVector<StringRef, 2> Best;
         unsigned BestEditDistance = (std::numeric_limits<unsigned>::max)();
         
         for (llvm::StringMap<ModuleMap::Module *>::iterator 
-                  I = Sub->SubModules.begin(), 
-               IEnd = Sub->SubModules.end();
+                  I = Module->SubModules.begin(), 
+               IEnd = Module->SubModules.end();
              I != IEnd; ++I) {
           unsigned ED = Name.edit_distance(I->getValue()->Name,
                                            /*AllowReplacements=*/true,
@@ -1221,34 +1232,31 @@
         if (Best.size() == 1) {
           getDiagnostics().Report(Path[I].second, 
                                   diag::err_no_submodule_suggest)
-            << Path[I].first << Sub->getFullModuleName() << Best[0]
+            << Path[I].first << Module->getFullModuleName() << Best[0]
             << SourceRange(Path[0].second, Path[I-1].second)
             << FixItHint::CreateReplacement(SourceRange(Path[I].second),
                                             Best[0]);
-          Pos = Sub->SubModules.find(Best[0]);
+          Pos = Module->SubModules.find(Best[0]);
         }
       }
       
-      if (Pos == Sub->SubModules.end()) {
+      if (Pos == Module->SubModules.end()) {
         // No submodule by this name. Complain, and don't look for further
         // submodules.
         getDiagnostics().Report(Path[I].second, diag::err_no_submodule)
-          << Path[I].first << Sub->getFullModuleName()
+          << Path[I].first << Module->getFullModuleName()
           << SourceRange(Path[0].second, Path[I-1].second);
         break;
       }
       
-      Sub = Pos->getValue();
+      Module = Pos->getValue();
     }
   }
   
   // FIXME: Tell the AST reader to make the named submodule visible.
   
-  // FIXME: The module file's FileEntry makes a poor key indeed! Once we 
-  // eliminate the need for FileEntry here, the module itself will become the
-  // key (which does make sense).
   LastModuleImportLoc = ImportLoc;
-  LastModuleImportResult = Known;
-  return Known.getOpaqueValue();
+  LastModuleImportResult = Module;
+  return Module;
 }
 
diff --git a/lib/Lex/ModuleMap.cpp b/lib/Lex/ModuleMap.cpp
index bcf1596..11a20e0 100644
--- a/lib/Lex/ModuleMap.cpp
+++ b/lib/Lex/ModuleMap.cpp
@@ -177,6 +177,23 @@
   return 0;
 }
 
+std::pair<ModuleMap::Module *, bool> 
+ModuleMap::findOrCreateModule(StringRef Name, Module *Parent, bool IsFramework,
+                              bool IsExplicit) {
+  // Try to find an existing module with this name.
+  if (Module *Found = Parent? Parent->SubModules[Name] : Modules[Name])
+    return std::make_pair(Found, false);
+  
+  // Create a new module with this name.
+  Module *Result = new Module(Name, SourceLocation(), Parent, IsFramework, 
+                              IsExplicit);
+  if (Parent)
+    Parent->SubModules[Name] = Result;
+  else
+    Modules[Name] = Result;
+  return std::make_pair(Result, true);
+}
+
 ModuleMap::Module *
 ModuleMap::inferFrameworkModule(StringRef ModuleName, 
                                 const DirectoryEntry *FrameworkDir) {
diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp
index 2569e22..af12bf3 100644
--- a/lib/Serialization/ASTReader.cpp
+++ b/lib/Serialization/ASTReader.cpp
@@ -1705,6 +1705,26 @@
           return IgnorePCH;
         }
         break;
+
+      case SUBMODULE_BLOCK_ID:
+        switch (ReadSubmoduleBlock(F)) {
+        case Success:
+          break;
+          
+        case Failure:
+          Error("malformed submodule block in AST file");
+          return Failure;
+          
+        case IgnorePCH:
+          return IgnorePCH;            
+        }
+        break;
+
+      default:
+        if (!Stream.SkipBlock())
+          break;
+        Error("malformed block record in AST file");
+        return Failure;
       }
       continue;
     }
@@ -2800,6 +2820,110 @@
   return std::string();
 }
 
+ASTReader::ASTReadResult ASTReader::ReadSubmoduleBlock(Module &F) {
+  // Enter the submodule block.
+  if (F.Stream.EnterSubBlock(SUBMODULE_BLOCK_ID)) {
+    Error("malformed submodule block record in AST file");
+    return Failure;
+  }
+
+  ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();
+  ModuleMap::Module *CurrentModule = 0;
+  RecordData Record;
+  while (true) {
+    unsigned Code = F.Stream.ReadCode();
+    if (Code == llvm::bitc::END_BLOCK) {
+      if (F.Stream.ReadBlockEnd()) {
+        Error("error at end of submodule block in AST file");
+        return Failure;
+      }
+      return Success;
+    }
+    
+    if (Code == llvm::bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      F.Stream.ReadSubBlockID();
+      if (F.Stream.SkipBlock()) {
+        Error("malformed block record in AST file");
+        return Failure;
+      }
+      continue;
+    }
+    
+    if (Code == llvm::bitc::DEFINE_ABBREV) {
+      F.Stream.ReadAbbrevRecord();
+      continue;
+    }
+    
+    // Read a record.
+    const char *BlobStart;
+    unsigned BlobLen;
+    Record.clear();
+    switch (F.Stream.ReadRecord(Code, Record, &BlobStart, &BlobLen)) {
+    default:  // Default behavior: ignore.
+      break;
+      
+    case SUBMODULE_DEFINITION: {
+      StringRef Name(BlobStart, BlobLen);
+      unsigned Parent = Record[0];
+      bool IsFramework = Record[1];
+      bool IsExplicit = Record[2];
+
+      ModuleMap::Module *ParentModule = 0;
+      if (Parent) {
+        if (Parent > F.Submodules.size()) {
+          Error("malformed submodule parent entry");
+          return Failure;
+        }
+        
+        ParentModule = F.Submodules[Parent - 1];
+      } 
+      
+      // Retrieve this (sub)module from the module map, creating it if
+      // necessary.
+      CurrentModule = ModMap.findOrCreateModule(Name, ParentModule, 
+                                                IsFramework, 
+                                                IsExplicit).first;
+      F.Submodules.push_back(CurrentModule);
+      break;
+    }
+        
+    case SUBMODULE_UMBRELLA: {
+      if (!CurrentModule)
+        break;
+      
+      StringRef FileName(BlobStart, BlobLen);
+      if (const FileEntry *Umbrella = PP.getFileManager().getFile(FileName)) {
+        if (!CurrentModule->UmbrellaHeader)
+          CurrentModule->UmbrellaHeader = Umbrella;
+        else if (CurrentModule->UmbrellaHeader != Umbrella) {
+          Error("mismatched umbrella headers in submodule");
+          return Failure;
+        }
+      }
+      break;
+    }
+        
+    case SUBMODULE_HEADER: {
+      if (!CurrentModule)
+        break;
+      
+      // FIXME: Be more lazy about this!
+      StringRef FileName(BlobStart, BlobLen);
+      if (const FileEntry *File = PP.getFileManager().getFile(FileName)) {
+        if (std::find(CurrentModule->Headers.begin(), 
+                      CurrentModule->Headers.end(), 
+                      File) == CurrentModule->Headers.end())
+          CurrentModule->Headers.push_back(File);
+      }
+      break;      
+    }
+    }
+  }
+
+  return Success;
+}
+
 /// \brief Parse the record that corresponds to a LangOptions data
 /// structure.
 ///
diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp
index e225103..cf4ea41 100644
--- a/lib/Serialization/ASTWriter.cpp
+++ b/lib/Serialization/ASTWriter.cpp
@@ -1845,6 +1845,88 @@
   }
 }
 
+void ASTWriter::WriteSubmodules(ModuleMap::Module *WritingModule) {
+  // Enter the submodule description block.
+  Stream.EnterSubblock(SUBMODULE_BLOCK_ID, NUM_ALLOWED_ABBREVS_SIZE);
+  
+  // Write the abbreviations needed for the submodules block.
+  using namespace llvm;
+  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+  Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_DEFINITION));
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Parent
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsFramework
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsExplicit
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
+  unsigned DefinitionAbbrev = Stream.EmitAbbrev(Abbrev);
+
+  Abbrev = new BitCodeAbbrev();
+  Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_UMBRELLA));
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
+  unsigned UmbrellaAbbrev = Stream.EmitAbbrev(Abbrev);
+
+  Abbrev = new BitCodeAbbrev();
+  Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_HEADER));
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
+  unsigned HeaderAbbrev = Stream.EmitAbbrev(Abbrev);
+
+  // Write all of the submodules.
+  unsigned SubmoduleID = 1;
+  std::queue<ModuleMap::Module *> Q;
+  Q.push(WritingModule);
+  RecordData Record;
+  while (!Q.empty()) {
+    ModuleMap::Module *Mod = Q.front();
+    Q.pop();
+    SubmoduleIDs[Mod] = SubmoduleID++;
+    
+    // Emit the definition of the block.
+    Record.clear();
+    Record.push_back(SUBMODULE_DEFINITION);
+    if (Mod->Parent) {
+      assert(SubmoduleIDs[Mod->Parent] && "Submodule parent not written?");
+      Record.push_back(SubmoduleIDs[Mod->Parent]);
+    } else {
+      Record.push_back(0);
+    }
+    Record.push_back(Mod->IsFramework);
+    Record.push_back(Mod->IsExplicit);
+    Stream.EmitRecordWithBlob(DefinitionAbbrev, Record, Mod->Name);
+    
+    // Emit the umbrella header, if there is one.
+    if (Mod->UmbrellaHeader) {
+      Record.clear();
+      Record.push_back(SUBMODULE_UMBRELLA);
+      Stream.EmitRecordWithBlob(UmbrellaAbbrev, Record, 
+                                Mod->UmbrellaHeader->getName());
+    }
+    
+    // Emit the headers.
+    for (unsigned I = 0, N = Mod->Headers.size(); I != N; ++I) {
+      Record.clear();
+      Record.push_back(SUBMODULE_HEADER);
+      Stream.EmitRecordWithBlob(HeaderAbbrev, Record, 
+                                Mod->Headers[I]->getName());
+    }
+    
+    // Queue up the submodules of this module.
+    llvm::SmallVector<StringRef, 2> SubModules;
+    
+    // Sort the submodules first, so we get a predictable ordering in the AST
+    // file.
+    for (llvm::StringMap<ModuleMap::Module *>::iterator 
+              Sub = Mod->SubModules.begin(),
+           SubEnd = Mod->SubModules.end();
+         Sub != SubEnd; ++Sub)
+      SubModules.push_back(Sub->getKey());
+    llvm::array_pod_sort(SubModules.begin(), SubModules.end());
+    
+    for (unsigned I = 0, N = SubModules.size(); I != N; ++I)
+      Q.push(Mod->SubModules[SubModules[I]]);
+  }
+  
+  Stream.ExitBlock();
+}
+
 void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag) {
   RecordData Record;
   for (DiagnosticsEngine::DiagStatePointsTy::const_iterator
@@ -3086,7 +3168,8 @@
     Stream.EmitRecordWithBlob(ModuleOffsetMapAbbrev, Record,
                               Buffer.data(), Buffer.size());
   }
-
+  if (WritingModule)
+    WriteSubmodules(WritingModule);
   WritePreprocessor(PP, WritingModule != 0);
   WriteHeaderSearch(PP.getHeaderSearchInfo(), isysroot);
   WriteSelectors(SemaRef);