Add AST_SIGNATURE record to unhashed control block of PCM files
Summary:
This record is constructed by hashing the bytes of the AST block in a similiar
fashion to the SIGNATURE record. This new signature only means anything if the
AST block is fully relocatable, i.e. it does not embed absolute offsets within
the PCM file. This change ensure this does not happen by replacing these offsets
with offsets relative to the nearest relevant subblock of the AST block.
Reviewers: Bigcheese, dexonsmith
Subscribers: dexonsmith, cfe-commits
Tags: #clang
Differential Revision: https://reviews.llvm.org/D80383
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 673b65d..ae9139c 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1336,6 +1336,7 @@
Error(std::move(Err));
return true;
}
+ F.SourceManagerBlockStartOffset = SLocEntryCursor.GetCurrentBitNo();
RecordData Record;
while (true) {
@@ -1628,13 +1629,17 @@
/// Enter a subblock of the specified BlockID with the specified cursor. Read
/// the abbreviations that are at the top of the block and then leave the cursor
/// pointing into the block.
-bool ASTReader::ReadBlockAbbrevs(BitstreamCursor &Cursor, unsigned BlockID) {
+bool ASTReader::ReadBlockAbbrevs(BitstreamCursor &Cursor, unsigned BlockID,
+ uint64_t *StartOfBlockOffset) {
if (llvm::Error Err = Cursor.EnterSubBlock(BlockID)) {
// FIXME this drops errors on the floor.
consumeError(std::move(Err));
return true;
}
+ if (StartOfBlockOffset)
+ *StartOfBlockOffset = Cursor.GetCurrentBitNo();
+
while (true) {
uint64_t Offset = Cursor.GetCurrentBitNo();
Expected<unsigned> MaybeCode = Cursor.ReadCode();
@@ -2933,6 +2938,7 @@
Error(std::move(Err));
return Failure;
}
+ F.ASTBlockStartOffset = Stream.GetCurrentBitNo();
// Read all of the records and blocks for the AST file.
RecordData Record;
@@ -2973,7 +2979,8 @@
Error(std::move(Err));
return Failure;
}
- if (ReadBlockAbbrevs(F.DeclsCursor, DECLTYPES_BLOCK_ID)) {
+ if (ReadBlockAbbrevs(F.DeclsCursor, DECLTYPES_BLOCK_ID,
+ &F.DeclsBlockStartOffset)) {
Error("malformed block record in AST file");
return Failure;
}
@@ -3377,7 +3384,7 @@
F.SLocEntryOffsets = (const uint32_t *)Blob.data();
F.LocalNumSLocEntries = Record[0];
unsigned SLocSpaceSize = Record[1];
- F.SLocEntryOffsetsBase = Record[2];
+ F.SLocEntryOffsetsBase = Record[2] + F.SourceManagerBlockStartOffset;
std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) =
SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries,
SLocSpaceSize);
@@ -3696,7 +3703,7 @@
F.MacroOffsets = (const uint32_t *)Blob.data();
F.LocalNumMacros = Record[0];
unsigned LocalBaseMacroID = Record[1];
- F.MacroOffsetsBase = Record[2];
+ F.MacroOffsetsBase = Record[2] + F.ASTBlockStartOffset;
F.BaseMacroID = getTotalNumMacros();
if (F.LocalNumMacros > 0) {
@@ -3837,17 +3844,18 @@
while (Data < DataEnd) {
// FIXME: Looking up dependency modules by filename is horrible. Let's
- // start fixing this with prebuilt and explicit modules and see how it
- // goes...
+ // start fixing this with prebuilt, explicit and implicit modules and see
+ // how it goes...
using namespace llvm::support;
ModuleKind Kind = static_cast<ModuleKind>(
endian::readNext<uint8_t, little, unaligned>(Data));
uint16_t Len = endian::readNext<uint16_t, little, unaligned>(Data);
StringRef Name = StringRef((const char*)Data, Len);
Data += Len;
- ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule
- ? ModuleMgr.lookupByModuleName(Name)
- : ModuleMgr.lookupByFileName(Name));
+ ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule ||
+ Kind == MK_ImplicitModule
+ ? ModuleMgr.lookupByModuleName(Name)
+ : ModuleMgr.lookupByFileName(Name));
if (!OM) {
std::string Msg =
"SourceLocation remap refers to unknown module, cannot find ";
@@ -4736,6 +4744,11 @@
if (F)
F->Signature = ASTFileSignature::create(Record.begin(), Record.end());
break;
+ case AST_BLOCK_HASH:
+ if (F)
+ F->ASTBlockHash =
+ ASTFileSignature::create(Record.begin(), Record.end());
+ break;
case DIAGNOSTIC_OPTIONS: {
bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0;
if (Listener && ValidateDiagnosticOptions &&
@@ -6350,7 +6363,8 @@
assert(I != GlobalTypeMap.end() && "Corrupted global type map");
ModuleFile *M = I->second;
return RecordLocation(
- M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset());
+ M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset() +
+ M->DeclsBlockStartOffset);
}
static llvm::Optional<Type::TypeClass> getTypeClassForCode(TypeCode code) {
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 0e92c86..359d5e5 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -2869,7 +2869,7 @@
const DeclOffset &DOffs =
M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS];
Loc = TranslateSourceLocation(*M, DOffs.getLocation());
- return RecordLocation(M, DOffs.getBitOffset());
+ return RecordLocation(M, DOffs.getBitOffset(M->DeclsBlockStartOffset));
}
ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) {
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 5120872..cdaa974 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -10,14 +10,12 @@
//
//===----------------------------------------------------------------------===//
-#include "clang/AST/OpenMPClause.h"
-#include "clang/Serialization/ASTRecordWriter.h"
#include "ASTCommon.h"
#include "ASTReaderInternals.h"
#include "MultiOnDiskHashTable.h"
-#include "clang/AST/AbstractTypeWriter.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTUnresolvedSet.h"
+#include "clang/AST/AbstractTypeWriter.h"
#include "clang/AST/Attr.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclBase.h"
@@ -31,6 +29,7 @@
#include "clang/AST/ExprCXX.h"
#include "clang/AST/LambdaCapture.h"
#include "clang/AST/NestedNameSpecifier.h"
+#include "clang/AST/OpenMPClause.h"
#include "clang/AST/RawCommentList.h"
#include "clang/AST/TemplateName.h"
#include "clang/AST/Type.h"
@@ -65,7 +64,9 @@
#include "clang/Sema/ObjCMethodList.h"
#include "clang/Sema/Sema.h"
#include "clang/Sema/Weak.h"
+#include "clang/Serialization/ASTBitCodes.h"
#include "clang/Serialization/ASTReader.h"
+#include "clang/Serialization/ASTRecordWriter.h"
#include "clang/Serialization/InMemoryModuleCache.h"
#include "clang/Serialization/ModuleFile.h"
#include "clang/Serialization/ModuleFileExtension.h"
@@ -961,6 +962,7 @@
BLOCK(UNHASHED_CONTROL_BLOCK);
RECORD(SIGNATURE);
+ RECORD(AST_BLOCK_HASH);
RECORD(DIAGNOSTIC_OPTIONS);
RECORD(DIAG_PRAGMA_MAPPINGS);
@@ -1026,13 +1028,23 @@
return Filename + Pos;
}
-ASTFileSignature ASTWriter::createSignature(StringRef Bytes) {
- // Calculate the hash till start of UNHASHED_CONTROL_BLOCK.
+std::pair<ASTFileSignature, ASTFileSignature>
+ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) {
llvm::SHA1 Hasher;
- Hasher.update(ArrayRef<uint8_t>(Bytes.bytes_begin(), Bytes.size()));
+ Hasher.update(ASTBlockBytes);
auto Hash = Hasher.result();
+ ASTFileSignature ASTBlockHash = ASTFileSignature::create(Hash);
- return ASTFileSignature::create(Hash);
+ // Add the remaining bytes (i.e. bytes before the unhashed control block that
+ // are not part of the AST block).
+ Hasher.update(
+ AllBytes.take_front(ASTBlockBytes.bytes_end() - AllBytes.bytes_begin()));
+ Hasher.update(
+ AllBytes.take_back(AllBytes.bytes_end() - ASTBlockBytes.bytes_end()));
+ Hash = Hasher.result();
+ ASTFileSignature Signature = ASTFileSignature::create(Hash);
+
+ return std::make_pair(ASTBlockHash, Signature);
}
ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP,
@@ -1049,7 +1061,16 @@
ASTFileSignature Signature;
if (WritingModule &&
PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) {
- Signature = createSignature(StringRef(Buffer.begin(), StartOfUnhashedControl));
+ ASTFileSignature ASTBlockHash;
+ auto ASTBlockStartByte = ASTBlockRange.first >> 3;
+ auto ASTBlockByteLength = (ASTBlockRange.second >> 3) - ASTBlockStartByte;
+ std::tie(ASTBlockHash, Signature) = createSignature(
+ StringRef(Buffer.begin(), StartOfUnhashedControl),
+ StringRef(Buffer.begin() + ASTBlockStartByte, ASTBlockByteLength));
+
+ Record.append(ASTBlockHash.begin(), ASTBlockHash.end());
+ Stream.EmitRecord(AST_BLOCK_HASH, Record);
+ Record.clear();
Record.append(Signature.begin(), Signature.end());
Stream.EmitRecord(SIGNATURE, Record);
Record.clear();
@@ -1901,6 +1922,7 @@
// Enter the source manager block.
Stream.EnterSubblock(SOURCE_MANAGER_BLOCK_ID, 4);
+ const uint64_t SourceManagerBlockOffset = Stream.GetCurrentBitNo();
// Abbreviations for the various kinds of source-location entries.
unsigned SLocFileAbbrv = CreateSLocFileAbbrev(Stream);
@@ -2041,7 +2063,7 @@
RecordData::value_type Record[] = {
SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(),
SourceMgr.getNextLocalOffset() - 1 /* skip dummy */,
- SLocEntryOffsetsBase};
+ SLocEntryOffsetsBase - SourceManagerBlockOffset};
Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record,
bytes(SLocEntryOffsets));
}
@@ -2320,7 +2342,7 @@
{
RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(),
FirstMacroID - NUM_PREDEF_MACRO_IDS,
- MacroOffsetsBase};
+ MacroOffsetsBase - ASTBlockStartOffset};
Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
}
}
@@ -2834,7 +2856,7 @@
assert(Idx.getIndex() >= FirstTypeID && "Re-writing a type from a prior AST");
// Emit the type's representation.
- uint64_t Offset = ASTTypeWriter(*this).write(T);
+ uint64_t Offset = ASTTypeWriter(*this).write(T) - DeclTypesBlockStartOffset;
// Record the offset for this type.
unsigned Index = Idx.getIndex() - FirstTypeID;
@@ -4544,7 +4566,10 @@
WriteControlBlock(PP, Context, isysroot, OutputFile);
// Write the remaining AST contents.
+ Stream.FlushToWord();
+ ASTBlockRange.first = Stream.GetCurrentBitNo();
Stream.EnterSubblock(AST_BLOCK_ID, 5);
+ ASTBlockStartOffset = Stream.GetCurrentBitNo();
// This is so that older clang versions, before the introduction
// of the control block, can read and reject the newer PCH format.
@@ -4675,9 +4700,9 @@
// c++-base-specifiers-id:i32
// type-id:i32)
//
- // module-kind is the ModuleKind enum value. If it is MK_PrebuiltModule or
- // MK_ExplicitModule, then the module-name is the module name. Otherwise,
- // it is the module file name.
+ // module-kind is the ModuleKind enum value. If it is MK_PrebuiltModule,
+ // MK_ExplicitModule or MK_ImplicitModule, then the module-name is the
+ // module name. Otherwise, it is the module file name.
auto Abbrev = std::make_shared<BitCodeAbbrev>();
Abbrev->Add(BitCodeAbbrevOp(MODULE_OFFSET_MAP));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
@@ -4690,10 +4715,7 @@
endian::Writer LE(Out, little);
LE.write<uint8_t>(static_cast<uint8_t>(M.Kind));
- StringRef Name =
- M.Kind == MK_PrebuiltModule || M.Kind == MK_ExplicitModule
- ? M.ModuleName
- : M.FileName;
+ StringRef Name = M.isModule() ? M.ModuleName : M.FileName;
LE.write<uint16_t>(Name.size());
Out.write(Name.data(), Name.size());
@@ -4737,6 +4759,7 @@
// Keep writing types, declarations, and declaration update records
// until we've emitted all of them.
Stream.EnterSubblock(DECLTYPES_BLOCK_ID, /*bits for abbreviations*/5);
+ DeclTypesBlockStartOffset = Stream.GetCurrentBitNo();
WriteTypeAbbrevs();
WriteDeclAbbrevs();
do {
@@ -4905,6 +4928,8 @@
NumStatements, NumMacros, NumLexicalDeclContexts, NumVisibleDeclContexts};
Stream.EmitRecord(STATISTICS, Record);
Stream.ExitBlock();
+ Stream.FlushToWord();
+ ASTBlockRange.second = Stream.GetCurrentBitNo();
// Write the module file extension blocks.
for (const auto &ExtWriter : ModuleFileExtensionWriters)
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index a28a21b..19608c8 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -2432,12 +2432,12 @@
SourceLocation Loc = D->getLocation();
unsigned Index = ID - FirstDeclID;
if (DeclOffsets.size() == Index)
- DeclOffsets.emplace_back(Loc, Offset);
+ DeclOffsets.emplace_back(Loc, Offset, DeclTypesBlockStartOffset);
else if (DeclOffsets.size() < Index) {
// FIXME: Can/should this happen?
DeclOffsets.resize(Index+1);
DeclOffsets[Index].setLocation(Loc);
- DeclOffsets[Index].setBitOffset(Offset);
+ DeclOffsets[Index].setBitOffset(Offset, DeclTypesBlockStartOffset);
} else {
llvm_unreachable("declarations should be emitted in ID order");
}