Reapply: [Modules][PCH] Hash input files content
Summary:
When files often get touched during builds, the mtime based validation
leads to different problems in implicit modules builds, even when the
content doesn't actually change:
- Modules only: module invalidation due to out of date files. Usually causing rebuild traffic.
- Modules + PCH: build failures because clang cannot rebuild a module if it comes from building a PCH.
- PCH: build failures because clang cannot rebuild a PCH in case one of the input headers has different mtime.
This patch proposes hashing the content of input files (headers and
module maps), which is performed during serialization time. When looking
at input files for validation, clang only computes the hash in case
there's a mtime mismatch.
I've tested a couple of different hash algorithms availble in LLVM in
face of building modules+pch for `#import <Cocoa/Cocoa.h>`:
- `hash_code`: performace diff within the noise, total module cache increased by 0.07%.
- `SHA1`: 5% slowdown. Haven't done real size measurements, but it'd be BLOCK_ID+20 bytes per input file, instead of BLOCK_ID+8 bytes from `hash_code`.
- `MD5`: 3% slowdown. Like above, but BLOCK_ID+16 bytes per input file.
Given the numbers above, the patch uses `hash_code`. The patch also
improves invalidation error msgs to point out which type of problem the
user is facing: "mtime", "size" or "content".
rdar://problem/29320105
Reviewers: dexonsmith, arphaman, rsmith, aprantl
Subscribers: jkorous, cfe-commits, ributzka
Tags: #clang
Differential Revision: https://reviews.llvm.org/D67249
> llvm-svn: 374841
llvm-svn: 374895
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 60be157..20c6cbe 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1247,6 +1247,12 @@
Diag(DiagID) << Arg1 << Arg2;
}
+void ASTReader::Error(unsigned DiagID, StringRef Arg1, StringRef Arg2,
+ unsigned Select) const {
+ if (!Diags.isDiagnosticInFlight())
+ Diag(DiagID) << Arg1 << Arg2 << Select;
+}
+
void ASTReader::Error(llvm::Error &&Err) const {
Error(toString(std::move(Err)));
}
@@ -2241,6 +2247,24 @@
R.TopLevelModuleMap = static_cast<bool>(Record[5]);
R.Filename = Blob;
ResolveImportedPath(F, R.Filename);
+
+ Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
+ if (!MaybeEntry) // FIXME this drops errors on the floor.
+ consumeError(MaybeEntry.takeError());
+ llvm::BitstreamEntry Entry = MaybeEntry.get();
+ assert(Entry.Kind == llvm::BitstreamEntry::Record &&
+ "expected record type for input file hash");
+
+ Record.clear();
+ if (Expected<unsigned> Maybe = Cursor.readRecord(Entry.ID, Record))
+ assert(static_cast<InputFileRecordTypes>(Maybe.get()) == INPUT_FILE_HASH &&
+ "invalid record type for input file hash");
+ else {
+ // FIXME this drops errors on the floor.
+ consumeError(Maybe.takeError());
+ }
+ R.ContentHash = (static_cast<uint64_t>(Record[1]) << 32) |
+ static_cast<uint64_t>(Record[0]);
return R;
}
@@ -2271,6 +2295,7 @@
bool Overridden = FI.Overridden;
bool Transient = FI.Transient;
StringRef Filename = FI.Filename;
+ uint64_t StoredContentHash = FI.ContentHash;
const FileEntry *File = nullptr;
if (auto FE = FileMgr.getFile(Filename, /*OpenFile=*/false))
@@ -2325,14 +2350,46 @@
}
}
- bool IsOutOfDate = false;
+ enum ModificationType {
+ Size,
+ ModTime,
+ Content,
+ None,
+ };
+ auto HasInputFileChanged = [&]() {
+ if (StoredSize != File->getSize())
+ return ModificationType::Size;
+ if (!DisableValidation && StoredTime &&
+ StoredTime != File->getModificationTime()) {
+ // In case the modification time changes but not the content,
+ // accept the cached file as legit.
+ if (ValidateASTInputFilesContent &&
+ StoredContentHash != static_cast<uint64_t>(llvm::hash_code(-1))) {
+ auto MemBuffOrError = FileMgr.getBufferForFile(File);
+ if (!MemBuffOrError) {
+ if (!Complain)
+ return ModificationType::ModTime;
+ std::string ErrorStr = "could not get buffer for file '";
+ ErrorStr += File->getName();
+ ErrorStr += "'";
+ Error(ErrorStr);
+ return ModificationType::ModTime;
+ }
+ auto ContentHash = hash_value(MemBuffOrError.get()->getBuffer());
+ if (StoredContentHash == static_cast<uint64_t>(ContentHash))
+ return ModificationType::None;
+ return ModificationType::Content;
+ }
+ return ModificationType::ModTime;
+ }
+ return ModificationType::None;
+ };
+
+ bool IsOutOfDate = false;
+ auto FileChange = HasInputFileChanged();
// For an overridden file, there is nothing to validate.
- if (!Overridden && //
- (StoredSize != File->getSize() ||
- (StoredTime && StoredTime != File->getModificationTime() &&
- !DisableValidation)
- )) {
+ if (!Overridden && FileChange != ModificationType::None) {
if (Complain) {
// Build a list of the PCH imports that got us here (in reverse).
SmallVector<ModuleFile *, 4> ImportStack(1, &F);
@@ -2341,13 +2398,17 @@
// The top-level PCH is stale.
StringRef TopLevelPCHName(ImportStack.back()->FileName);
- unsigned DiagnosticKind = moduleKindForDiagnostic(ImportStack.back()->Kind);
+ unsigned DiagnosticKind =
+ moduleKindForDiagnostic(ImportStack.back()->Kind);
if (DiagnosticKind == 0)
- Error(diag::err_fe_pch_file_modified, Filename, TopLevelPCHName);
+ Error(diag::err_fe_pch_file_modified, Filename, TopLevelPCHName,
+ (unsigned)FileChange);
else if (DiagnosticKind == 1)
- Error(diag::err_fe_module_file_modified, Filename, TopLevelPCHName);
+ Error(diag::err_fe_module_file_modified, Filename, TopLevelPCHName,
+ (unsigned)FileChange);
else
- Error(diag::err_fe_ast_file_modified, Filename, TopLevelPCHName);
+ Error(diag::err_fe_ast_file_modified, Filename, TopLevelPCHName,
+ (unsigned)FileChange);
// Print the import stack.
if (ImportStack.size() > 1 && !Diags.isDiagnosticInFlight()) {
@@ -5192,6 +5253,8 @@
consumeError(MaybeRecordType.takeError());
}
switch ((InputFileRecordTypes)MaybeRecordType.get()) {
+ case INPUT_FILE_HASH:
+ break;
case INPUT_FILE:
bool Overridden = static_cast<bool>(Record[3]);
std::string Filename = Blob;
@@ -12153,7 +12216,7 @@
StringRef isysroot, bool DisableValidation,
bool AllowASTWithCompilerErrors,
bool AllowConfigurationMismatch, bool ValidateSystemInputs,
- bool UseGlobalIndex,
+ bool ValidateASTInputFilesContent, bool UseGlobalIndex,
std::unique_ptr<llvm::Timer> ReadTimer)
: Listener(DisableValidation
? cast<ASTReaderListener>(new SimpleASTReaderListener(PP))
@@ -12167,6 +12230,7 @@
AllowASTWithCompilerErrors(AllowASTWithCompilerErrors),
AllowConfigurationMismatch(AllowConfigurationMismatch),
ValidateSystemInputs(ValidateSystemInputs),
+ ValidateASTInputFilesContent(ValidateASTInputFilesContent),
UseGlobalIndex(UseGlobalIndex), CurrSwitchCaseStmts(&SwitchCaseStmts) {
SourceMgr.setExternalSLocEntrySource(this);