[DWARFv5] CodeGen support for MD5 file checksums

Pass MD5 checksums through from IR to assembly/object files.
After this, getting Clang to compute the MD5 should be the last step
to supporting MD5 in the DWARF v5 line table header.

Differential Revision: https://reviews.llvm.org/D41926

llvm-svn: 322391
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 0443bde..e77e8d9 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -94,17 +94,17 @@
                  DIEInteger(0));
 }
 
-unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName,
-                                               StringRef DirName,
-                                               MD5::MD5Result *Checksum) {
+unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) {
   // If we print assembly, we can't separate .file entries according to
   // compile units. Thus all files will belong to the default compile unit.
 
   // FIXME: add a better feature test than hasRawTextSupport. Even better,
   // extend .file to support this.
+  unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID();
+  if (!File)
+    return Asm->OutStreamer->EmitDwarfFileDirective(0, "", "", nullptr, CUID);
   return Asm->OutStreamer->EmitDwarfFileDirective(
-      0, DirName, FileName, Checksum,
-      Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID());
+      0, File->getDirectory(), File->getFilename(), getMD5AsBytes(File), CUID);
 }
 
 DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
@@ -444,7 +444,7 @@
   // Add the call site information to the DIE.
   const DILocation *IA = Scope->getInlinedAt();
   addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
-          getOrCreateSourceID(IA->getFilename(), IA->getDirectory(), nullptr));
+          getOrCreateSourceID(IA->getFile()));
   addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
   if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4)
     addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None,
@@ -688,9 +688,7 @@
   else
     EntityDie = getDIE(Entity);
   assert(EntityDie);
-  auto *File = Module->getFile();
-  addSourceLine(*IMDie, Module->getLine(), File ? File->getFilename() : "",
-                File ? File->getDirectory() : "");
+  addSourceLine(*IMDie, Module->getLine(), Module->getFile());
   addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie);
   StringRef Name = Module->getName();
   if (!Name.empty())
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 5cf59c2..3325b1a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -141,8 +141,7 @@
 
   DwarfCompileUnit &getCU() override { return *this; }
 
-  unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName,
-                               MD5::MD5Result *Checksum) override;
+  unsigned getOrCreateSourceID(const DIFile *File) override;
 
   void addImportedEntity(const DIImportedEntity* IE) {
     DIScope *Scope = IE->getScope();
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index a0094b2..bd40c9e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1366,19 +1366,17 @@
 void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
                                   unsigned Flags) {
   StringRef Fn;
-  StringRef Dir;
   unsigned Src = 1;
   unsigned Discriminator = 0;
   if (auto *Scope = cast_or_null<DIScope>(S)) {
     Fn = Scope->getFilename();
-    Dir = Scope->getDirectory();
     if (Line != 0 && getDwarfVersion() >= 4)
       if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
         Discriminator = LBF->getDiscriminator();
 
     unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID();
     Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
-              .getOrCreateSourceID(Fn, Dir, nullptr);
+              .getOrCreateSourceID(Scope->getFile());
   }
   Asm->OutStreamer->EmitDwarfLocDirective(Src, Line, Col, Flags, 0,
                                           Discriminator, Fn);
@@ -1973,11 +1971,7 @@
   assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file);
   Asm->EmitULEB128(dwarf::DW_MACINFO_start_file);
   Asm->EmitULEB128(F.getLine());
-  DIFile *File = F.getFile();
-  unsigned FID =
-      U.getOrCreateSourceID(File->getFilename(), File->getDirectory(),
-                            nullptr); // FIXME: MD5?
-  Asm->EmitULEB128(FID);
+  Asm->EmitULEB128(U.getOrCreateSourceID(F.getFile()));
   handleMacroNodes(F.getElements(), U);
   Asm->EmitULEB128(dwarf::DW_MACINFO_end_file);
 }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 28e63c2..61868a3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/None.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineOperand.h"
@@ -30,6 +31,7 @@
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
@@ -263,12 +265,25 @@
     addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer);
 }
 
-unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName,
-                                            StringRef DirName,
-                                            MD5::MD5Result *Checksum) {
+MD5::MD5Result *DwarfUnit::getMD5AsBytes(const DIFile *File) {
+  assert(File);
+  if (File->getChecksumKind() != DIFile::CSK_MD5)
+    return nullptr;
+
+  // Convert the string checksum to an MD5Result for the streamer.
+  // The verifier validates the checksum so we assume it's okay.
+  // An MD5 checksum is 16 bytes.
+  std::string Checksum = fromHex(File->getChecksum());
+  void *CKMem = Asm->OutStreamer->getContext().allocate(16, 1);
+  memcpy(CKMem, Checksum.data(), 16);
+  return reinterpret_cast<MD5::MD5Result *>(CKMem);
+}
+
+unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
   return SplitLineTable
-             ? SplitLineTable->getFile(DirName, FileName, Checksum)
-             : getCU().getOrCreateSourceID(FileName, DirName, Checksum);
+             ? SplitLineTable->getFile(File->getDirectory(),
+                                       File->getFilename(), getMD5AsBytes(File))
+             : getCU().getOrCreateSourceID(File);
 }
 
 void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
@@ -338,12 +353,11 @@
   Die.addValue(DIEValueAllocator, Attribute, Block->BestForm(), Block);
 }
 
-void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File,
-                              StringRef Directory) {
+void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) {
   if (Line == 0)
     return;
 
-  unsigned FileID = getOrCreateSourceID(File, Directory, nullptr);
+  unsigned FileID = getOrCreateSourceID(File);
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
@@ -352,32 +366,31 @@
 void DwarfUnit::addSourceLine(DIE &Die, const DILocalVariable *V) {
   assert(V);
 
-  addSourceLine(Die, V->getLine(), V->getScope()->getFilename(),
-                V->getScope()->getDirectory());
+  addSourceLine(Die, V->getLine(), V->getFile());
 }
 
 void DwarfUnit::addSourceLine(DIE &Die, const DIGlobalVariable *G) {
   assert(G);
 
-  addSourceLine(Die, G->getLine(), G->getFilename(), G->getDirectory());
+  addSourceLine(Die, G->getLine(), G->getFile());
 }
 
 void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) {
   assert(SP);
 
-  addSourceLine(Die, SP->getLine(), SP->getFilename(), SP->getDirectory());
+  addSourceLine(Die, SP->getLine(), SP->getFile());
 }
 
 void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) {
   assert(Ty);
 
-  addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory());
+  addSourceLine(Die, Ty->getLine(), Ty->getFile());
 }
 
 void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) {
   assert(Ty);
 
-  addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory());
+  addSourceLine(Die, Ty->getLine(), Ty->getFile());
 }
 
 /* Byref variables, in Blocks, are declared by the programmer as "SomeType
@@ -1164,10 +1177,8 @@
     // Look at the Decl's linkage name only if we emitted it.
     if (DD->useAllLinkageNames())
       DeclLinkageName = SPDecl->getLinkageName();
-    unsigned DeclID = getOrCreateSourceID(SPDecl->getFilename(),
-                                          SPDecl->getDirectory(), nullptr);
-    unsigned DefID =
-        getOrCreateSourceID(SP->getFilename(), SP->getDirectory(), nullptr);
+    unsigned DeclID = getOrCreateSourceID(SPDecl->getFile());
+    unsigned DefID = getOrCreateSourceID(SP->getFile());
     if (DeclID != DefID)
       addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID);
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 43b23f4..6546a0c 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -207,8 +207,7 @@
   void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block);
 
   /// Add location information to specified debug information entry.
-  void addSourceLine(DIE &Die, unsigned Line, StringRef File,
-                     StringRef Directory);
+  void addSourceLine(DIE &Die, unsigned Line, const DIFile *File);
   void addSourceLine(DIE &Die, const DILocalVariable *V);
   void addSourceLine(DIE &Die, const DIGlobalVariable *G);
   void addSourceLine(DIE &Die, const DISubprogram *SP);
@@ -306,10 +305,13 @@
   /// Create new static data member DIE.
   DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT);
 
-  /// Look up the source ID with the given directory and source file names. If
-  /// none currently exists, create a new ID and insert it in the line table.
-  virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory,
-                                       MD5::MD5Result *Checksum) = 0;
+  /// Look up the source ID for the given file. If none currently exists,
+  /// create a new ID and insert it in the line table.
+  virtual unsigned getOrCreateSourceID(const DIFile *File) = 0;
+
+  /// If the \p File has an MD5 checksum, return it as an MD5Result
+  /// allocated in the MCContext.
+  MD5::MD5Result *getMD5AsBytes(const DIFile *File);
 
   /// Look in the DwarfDebug map for the MDNode that corresponds to the
   /// reference.
@@ -359,8 +361,7 @@
   DwarfCompileUnit &CU;
   MCDwarfDwoLineTable *SplitLineTable;
 
-  unsigned getOrCreateSourceID(StringRef File, StringRef Directory,
-                               MD5::MD5Result *Checksum) override;
+  unsigned getOrCreateSourceID(const DIFile *File) override;
   bool isDwoUnit() const override;
 
 public: