[clang][AST] Support AST files larger than 512M
Summary:
Clang uses 32-bit integers for storing bit offsets from the beginning of
the file that results in 512M limit on AST file. This diff replaces
absolute offsets with relative offsets from the beginning of
corresponding data structure when it is possible. And uses 64-bit
offsets for DeclOffests and TypeOffssts because these coder AST
section may easily exceeds 512M alone.
This diff breaks AST file format compatibility so VERSION_MAJOR bumped.
Test Plan:
Existing clang AST serialization tests
Tested on clangd with ~700M and ~900M preamble files
check-clang with ubsan
Reviewers: rsmith, dexonsmith
Subscribers: ilya-biryukov, kadircet, usaxena95, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D76594
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 7f114c0..f0e9bbd 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1470,6 +1470,7 @@
ModuleFile *F = GlobalSLocEntryMap.find(-ID)->second;
if (llvm::Error Err = F->SLocEntryCursor.JumpToBit(
+ F->SLocEntryOffsetsBase +
F->SLocEntryOffsets[ID - F->SLocEntryBaseID])) {
Error(std::move(Err));
return true;
@@ -1932,9 +1933,8 @@
return HFI;
}
-void ASTReader::addPendingMacro(IdentifierInfo *II,
- ModuleFile *M,
- uint64_t MacroDirectivesOffset) {
+void ASTReader::addPendingMacro(IdentifierInfo *II, ModuleFile *M,
+ uint32_t MacroDirectivesOffset) {
assert(NumCurrentElementsDeserializing > 0 &&"Missing deserialization guard");
PendingMacroIDs[II].push_back(PendingMacroInfo(M, MacroDirectivesOffset));
}
@@ -2099,7 +2099,8 @@
BitstreamCursor &Cursor = M.MacroCursor;
SavedStreamPosition SavedPosition(Cursor);
- if (llvm::Error Err = Cursor.JumpToBit(PMInfo.MacroDirectivesOffset)) {
+ if (llvm::Error Err =
+ Cursor.JumpToBit(M.MacroOffsetsBase + PMInfo.MacroDirectivesOffset)) {
Error(std::move(Err));
return;
}
@@ -3098,7 +3099,7 @@
Error("duplicate TYPE_OFFSET record in AST file");
return Failure;
}
- F.TypeOffsets = (const uint32_t *)Blob.data();
+ F.TypeOffsets = reinterpret_cast<const UnderalignedInt64 *>(Blob.data());
F.LocalNumTypes = Record[0];
unsigned LocalBaseTypeIndex = Record[1];
F.BaseTypeIndex = getTotalNumTypes();
@@ -3376,6 +3377,7 @@
F.SLocEntryOffsets = (const uint32_t *)Blob.data();
F.LocalNumSLocEntries = Record[0];
unsigned SLocSpaceSize = Record[1];
+ F.SLocEntryOffsetsBase = Record[2];
std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) =
SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries,
SLocSpaceSize);
@@ -3694,6 +3696,7 @@
F.MacroOffsets = (const uint32_t *)Blob.data();
F.LocalNumMacros = Record[0];
unsigned LocalBaseMacroID = Record[1];
+ F.MacroOffsetsBase = Record[2];
F.BaseMacroID = getTotalNumMacros();
if (F.LocalNumMacros > 0) {
@@ -5907,8 +5910,8 @@
}
SavedStreamPosition SavedPosition(M.PreprocessorDetailCursor);
- if (llvm::Error Err =
- M.PreprocessorDetailCursor.JumpToBit(PPOffs.BitOffset)) {
+ if (llvm::Error Err = M.PreprocessorDetailCursor.JumpToBit(
+ M.MacroOffsetsBase + PPOffs.BitOffset)) {
Error(std::move(Err));
return nullptr;
}
@@ -6321,7 +6324,8 @@
GlobalTypeMapType::iterator I = GlobalTypeMap.find(Index);
assert(I != GlobalTypeMap.end() && "Corrupted global type map");
ModuleFile *M = I->second;
- return RecordLocation(M, M->TypeOffsets[Index - M->BaseTypeIndex]);
+ return RecordLocation(
+ M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset());
}
static llvm::Optional<Type::TypeClass> getTypeClassForCode(TypeCode code) {
@@ -8427,7 +8431,8 @@
assert(I != GlobalMacroMap.end() && "Corrupted global macro map");
ModuleFile *M = I->second;
unsigned Index = ID - M->BaseMacroID;
- MacrosLoaded[ID] = ReadMacroRecord(*M, M->MacroOffsets[Index]);
+ MacrosLoaded[ID] =
+ ReadMacroRecord(*M, M->MacroOffsetsBase + M->MacroOffsets[Index]);
if (DeserializationListener)
DeserializationListener->MacroRead(ID + NUM_PREDEF_MACRO_IDS,