[DebugInfo] Unify ChecksumKind and Checksum value in DIFile
Rather than encode the absence of a checksum with a Kind variant, instead put
both the kind and value in a struct and wrap it in an Optional.
Differential Revision: http://reviews.llvm.org/D43043
llvm-svn: 324928
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 209a834..d5714ea 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -3612,7 +3612,6 @@
 };
 
 struct ChecksumKindField : public MDFieldImpl<DIFile::ChecksumKind> {
-  ChecksumKindField() : ImplTy(DIFile::CSK_None) {}
   ChecksumKindField(DIFile::ChecksumKind CSKind) : ImplTy(CSKind) {}
 };
 
@@ -3976,13 +3975,14 @@
 template <>
 bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
                             ChecksumKindField &Result) {
-  if (Lex.getKind() != lltok::ChecksumKind)
+  Optional<DIFile::ChecksumKind> CSKind =
+      DIFile::getChecksumKind(Lex.getStrVal());
+
+  if (Lex.getKind() != lltok::ChecksumKind || !CSKind)
     return TokError(
         "invalid checksum kind" + Twine(" '") + Lex.getStrVal() + "'");
 
-  DIFile::ChecksumKind CSKind = DIFile::getChecksumKind(Lex.getStrVal());
-
-  Result.assign(CSKind);
+  Result.assign(*CSKind);
   Lex.Lex();
   return false;
 }
@@ -4247,16 +4247,25 @@
 ///                   checksumkind: CSK_MD5,
 ///                   checksum: "000102030405060708090a0b0c0d0e0f")
 bool LLParser::ParseDIFile(MDNode *&Result, bool IsDistinct) {
+  // The default constructed value for checksumkind is required, but will never
+  // be used, as the parser checks if the field was actually Seen before using
+  // the Val.
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   REQUIRED(filename, MDStringField, );                                         \
   REQUIRED(directory, MDStringField, );                                        \
-  OPTIONAL(checksumkind, ChecksumKindField, );                                 \
+  OPTIONAL(checksumkind, ChecksumKindField, (DIFile::CSK_MD5));                \
   OPTIONAL(checksum, MDStringField, );
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
+  Optional<DIFile::ChecksumInfo<MDString *>> OptChecksum;
+  if (checksumkind.Seen && checksum.Seen)
+    OptChecksum.emplace(checksumkind.Val, checksum.Val);
+  else if (checksumkind.Seen || checksum.Seen)
+    return Lex.Error("'checksumkind' and 'checksum' must be provided together");
+
   Result = GET_OR_DISTINCT(DIFile, (Context, filename.Val, directory.Val,
-                                    checksumkind.Val, checksum.Val));
+                                    OptChecksum));
   return false;
 }
 
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index 374e315..0931192 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -1354,13 +1354,20 @@
       return error("Invalid record");
 
     IsDistinct = Record[0];
+    Optional<DIFile::ChecksumInfo<MDString *>> Checksum;
+    // The BitcodeWriter writes null bytes into Record[3:4] when the Checksum
+    // is not present. This matches up with the old internal representation,
+    // and the old encoding for CSK_None in the ChecksumKind. The new
+    // representation reserves the value 0 in the ChecksumKind to continue to
+    // encode None in a backwards-compatible way.
+    if (Record.size() == 5 && Record[3] && Record[4])
+      Checksum.emplace(static_cast<DIFile::ChecksumKind>(Record[3]),
+                       getMDString(Record[4]));
     MetadataList.assignValue(
         GET_OR_DISTINCT(
             DIFile,
             (Context, getMDString(Record[1]), getMDString(Record[2]),
-             Record.size() == 3 ? DIFile::CSK_None
-                                : static_cast<DIFile::ChecksumKind>(Record[3]),
-             Record.size() == 3 ? nullptr : getMDString(Record[4]))),
+             Checksum)),
         NextMetadataNo);
     NextMetadataNo++;
     break;
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 8c65e95..1062011 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1551,8 +1551,15 @@
   Record.push_back(N->isDistinct());
   Record.push_back(VE.getMetadataOrNullID(N->getRawFilename()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawDirectory()));
-  Record.push_back(N->getChecksumKind());
-  Record.push_back(VE.getMetadataOrNullID(N->getRawChecksum()));
+  if (N->getRawChecksum()) {
+    Record.push_back(N->getRawChecksum()->Kind);
+    Record.push_back(VE.getMetadataOrNullID(N->getRawChecksum()->Value));
+  } else {
+    // Maintain backwards compatibility with the old internal representation of
+    // CSK_None in ChecksumKind by writing nulls here when Checksum is None.
+    Record.push_back(0);
+    Record.push_back(VE.getMetadataOrNullID(nullptr));
+  }
 
   Stream.EmitRecord(bitc::METADATA_FILE, Record, Abbrev);
   Record.clear();
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 9e714e4..05c0001 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -165,14 +165,21 @@
   auto Insertion = FileIdMap.insert(std::make_pair(FullPath, NextId));
   if (Insertion.second) {
     // We have to compute the full filepath and emit a .cv_file directive.
-    std::string Checksum = fromHex(F->getChecksum());
-    void *CKMem = OS.getContext().allocate(Checksum.size(), 1);
-    memcpy(CKMem, Checksum.data(), Checksum.size());
-    ArrayRef<uint8_t> ChecksumAsBytes(reinterpret_cast<const uint8_t *>(CKMem),
-                                      Checksum.size());
-    DIFile::ChecksumKind ChecksumKind = F->getChecksumKind();
+    ArrayRef<uint8_t> ChecksumAsBytes;
+    FileChecksumKind CSKind = FileChecksumKind::None;
+    if (F->getChecksum()) {
+      std::string Checksum = fromHex(F->getChecksum()->Value);
+      void *CKMem = OS.getContext().allocate(Checksum.size(), 1);
+      memcpy(CKMem, Checksum.data(), Checksum.size());
+      ChecksumAsBytes = ArrayRef<uint8_t>(
+          reinterpret_cast<const uint8_t *>(CKMem), Checksum.size());
+      switch (F->getChecksum()->Kind) {
+      case DIFile::CSK_MD5:  CSKind = FileChecksumKind::MD5; break;
+      case DIFile::CSK_SHA1: CSKind = FileChecksumKind::SHA1; break;
+      }
+    }
     bool Success = OS.EmitCVFileDirective(NextId, FullPath, ChecksumAsBytes,
-                                          static_cast<unsigned>(ChecksumKind));
+                                          static_cast<unsigned>(CSKind));
     (void)Success;
     assert(Success && ".cv_file directive failed");
   }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 939d13f..f799453 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -280,15 +280,16 @@
 
 MD5::MD5Result *DwarfUnit::getMD5AsBytes(const DIFile *File) {
   assert(File);
-  if (File->getChecksumKind() != DIFile::CSK_MD5)
+  Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum();
+  if (!Checksum || Checksum->Kind != DIFile::CSK_MD5)
     return nullptr;
 
   // Convert the string checksum to an MD5Result for the streamer.
   // The verifier validates the checksum so we assume it's okay.
   // An MD5 checksum is 16 bytes.
-  std::string Checksum = fromHex(File->getChecksum());
+  std::string ChecksumString = fromHex(Checksum->Value);
   void *CKMem = Asm->OutStreamer->getContext().allocate(16, 1);
-  memcpy(CKMem, Checksum.data(), 16);
+  memcpy(CKMem, ChecksumString.data(), 16);
   return reinterpret_cast<MD5::MD5Result *>(CKMem);
 }
 
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 444a341..5a02cf3 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -1453,7 +1453,7 @@
 
   void printTag(const DINode *N);
   void printMacinfoType(const DIMacroNode *N);
-  void printChecksumKind(const DIFile *N);
+  void printChecksum(const DIFile::ChecksumInfo<StringRef> &N);
   void printString(StringRef Name, StringRef Value,
                    bool ShouldSkipEmpty = true);
   void printMetadata(StringRef Name, const Metadata *MD,
@@ -1488,11 +1488,10 @@
     Out << N->getMacinfoType();
 }
 
-void MDFieldPrinter::printChecksumKind(const DIFile *N) {
-  if (N->getChecksumKind() == DIFile::CSK_None)
-    // Skip CSK_None checksum kind.
-    return;
-  Out << FS << "checksumkind: " << N->getChecksumKindAsString();
+void MDFieldPrinter::printChecksum(
+    const DIFile::ChecksumInfo<StringRef> &Checksum) {
+  Out << FS << "checksumkind: " << Checksum.getKindAsString();
+  printString("checksum", Checksum.Value, /* ShouldSkipEmpty */ false);
 }
 
 void MDFieldPrinter::printString(StringRef Name, StringRef Value,
@@ -1721,8 +1720,9 @@
                       /* ShouldSkipEmpty */ false);
   Printer.printString("directory", N->getDirectory(),
                       /* ShouldSkipEmpty */ false);
-  Printer.printChecksumKind(N);
-  Printer.printString("checksum", N->getChecksum(), /* ShouldSkipEmpty */ true);
+  // Print all values for checksum together, or not at all.
+  if (N->getChecksum())
+    Printer.printChecksum(*N->getChecksum());
   Out << ")";
 }
 
diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp
index f9aca8a..8cb95d2 100644
--- a/llvm/lib/IR/DIBuilder.cpp
+++ b/llvm/lib/IR/DIBuilder.cpp
@@ -204,8 +204,8 @@
 }
 
 DIFile *DIBuilder::createFile(StringRef Filename, StringRef Directory,
-                              DIFile::ChecksumKind CSKind, StringRef Checksum) {
-  return DIFile::get(VMContext, Filename, Directory, CSKind, Checksum);
+                              Optional<DIFile::ChecksumInfo<StringRef>> CS) {
+  return DIFile::get(VMContext, Filename, Directory, CS);
 }
 
 DIMacro *DIBuilder::createMacro(DIMacroFile *Parent, unsigned LineNumber,
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index a777198..68b1d66 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -394,34 +394,36 @@
 
 // FIXME: Implement this string-enum correspondence with a .def file and macros,
 // so that the association is explicit rather than implied.
-static const char *ChecksumKindName[DIFile::CSK_Last + 1] = {
-  "CSK_None",
+static const char *ChecksumKindName[DIFile::CSK_Last] = {
   "CSK_MD5",
   "CSK_SHA1"
 };
 
-DIFile::ChecksumKind DIFile::getChecksumKind(StringRef CSKindStr) {
-  return StringSwitch<DIFile::ChecksumKind>(CSKindStr)
-      .Case("CSK_MD5", DIFile::CSK_MD5)
-      .Case("CSK_SHA1", DIFile::CSK_SHA1)
-      .Default(DIFile::CSK_None);
+StringRef DIFile::getChecksumKindAsString(ChecksumKind CSKind) {
+  assert(CSKind <= DIFile::CSK_Last && "Invalid checksum kind");
+  // The first space was originally the CSK_None variant, which is now
+  // obsolete, but the space is still reserved in ChecksumKind, so we account
+  // for it here.
+  return ChecksumKindName[CSKind - 1];
 }
 
-StringRef DIFile::getChecksumKindAsString() const {
-  assert(CSKind <= DIFile::CSK_Last && "Invalid checksum kind");
-  return ChecksumKindName[CSKind];
+Optional<DIFile::ChecksumKind> DIFile::getChecksumKind(StringRef CSKindStr) {
+  return StringSwitch<Optional<DIFile::ChecksumKind>>(CSKindStr)
+      .Case("CSK_MD5", DIFile::CSK_MD5)
+      .Case("CSK_SHA1", DIFile::CSK_SHA1)
+      .Default(None);
 }
 
 DIFile *DIFile::getImpl(LLVMContext &Context, MDString *Filename,
-                        MDString *Directory, DIFile::ChecksumKind CSKind,
-                        MDString *Checksum, StorageType Storage,
-                        bool ShouldCreate) {
+                        MDString *Directory,
+                        Optional<DIFile::ChecksumInfo<MDString *>> CS,
+                        StorageType Storage, bool ShouldCreate) {
   assert(isCanonical(Filename) && "Expected canonical MDString");
   assert(isCanonical(Directory) && "Expected canonical MDString");
-  assert(isCanonical(Checksum) && "Expected canonical MDString");
-  DEFINE_GETIMPL_LOOKUP(DIFile, (Filename, Directory, CSKind, Checksum));
-  Metadata *Ops[] = {Filename, Directory, Checksum};
-  DEFINE_GETIMPL_STORE(DIFile, (CSKind), Ops);
+  assert((!CS || isCanonical(CS->Value)) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(DIFile, (Filename, Directory, CS));
+  Metadata *Ops[] = {Filename, Directory, CS ? CS->Value : nullptr};
+  DEFINE_GETIMPL_STORE(DIFile, (CS), Ops);
 }
 
 DICompileUnit *DICompileUnit::getImpl(
@@ -901,4 +903,3 @@
   Metadata *Ops[] = { File, Elements };
   DEFINE_GETIMPL_STORE(DIMacroFile, (MIType, Line), Ops);
 }
-
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index c4cd785..313b7e1 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -574,26 +574,25 @@
 template <> struct MDNodeKeyImpl<DIFile> {
   MDString *Filename;
   MDString *Directory;
-  DIFile::ChecksumKind CSKind;
-  MDString *Checksum;
+  Optional<DIFile::ChecksumInfo<MDString *>> Checksum;
 
   MDNodeKeyImpl(MDString *Filename, MDString *Directory,
-                DIFile::ChecksumKind CSKind, MDString *Checksum)
-      : Filename(Filename), Directory(Directory), CSKind(CSKind),
-        Checksum(Checksum) {}
+                Optional<DIFile::ChecksumInfo<MDString *>> Checksum)
+      : Filename(Filename), Directory(Directory), Checksum(Checksum) {}
   MDNodeKeyImpl(const DIFile *N)
       : Filename(N->getRawFilename()), Directory(N->getRawDirectory()),
-        CSKind(N->getChecksumKind()), Checksum(N->getRawChecksum()) {}
+        Checksum(N->getRawChecksum()) {}
 
   bool isKeyOf(const DIFile *RHS) const {
     return Filename == RHS->getRawFilename() &&
            Directory == RHS->getRawDirectory() &&
-           CSKind == RHS->getChecksumKind() &&
            Checksum == RHS->getRawChecksum();
   }
 
   unsigned getHashValue() const {
-    return hash_combine(Filename, Directory, CSKind, Checksum);
+    if (Checksum)
+      return hash_combine(Filename, Directory, Checksum->Kind, Checksum->Value);
+    return hash_combine(Filename, Directory);
   }
 };
 
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 3b07ba3..a114a5f 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -988,23 +988,23 @@
 
 void Verifier::visitDIFile(const DIFile &N) {
   AssertDI(N.getTag() == dwarf::DW_TAG_file_type, "invalid tag", &N);
-  AssertDI(N.getChecksumKind() <= DIFile::CSK_Last, "invalid checksum kind",
-           &N);
-  size_t Size;
-  switch (N.getChecksumKind()) {
-  case DIFile::CSK_None:
-    Size = 0;
-    break;
-  case DIFile::CSK_MD5:
-    Size = 32;
-    break;
-  case DIFile::CSK_SHA1:
-    Size = 40;
-    break;
+  Optional<DIFile::ChecksumInfo<StringRef>> Checksum = N.getChecksum();
+  if (Checksum) {
+    AssertDI(Checksum->Kind <= DIFile::ChecksumKind::CSK_Last,
+             "invalid checksum kind", &N);
+    size_t Size;
+    switch (Checksum->Kind) {
+    case DIFile::CSK_MD5:
+      Size = 32;
+      break;
+    case DIFile::CSK_SHA1:
+      Size = 40;
+      break;
+    }
+    AssertDI(Checksum->Value.size() == Size, "invalid checksum length", &N);
+    AssertDI(Checksum->Value.find_if_not(llvm::isHexDigit) == StringRef::npos,
+             "invalid checksum", &N);
   }
-  AssertDI(N.getChecksum().size() == Size, "invalid checksum length", &N);
-  AssertDI(N.getChecksum().find_if_not(llvm::isHexDigit) == StringRef::npos,
-           "invalid checksum", &N);
 }
 
 void Verifier::visitDICompileUnit(const DICompileUnit &N) {