llvm-dwp: Add an abstraction for the DWP string pool

Also reference strings in the memory mapped file, reduces memory usage
on a large test case by 18.5%.

llvm-svn: 270449
diff --git a/llvm/tools/llvm-dwp/DWPError.h b/llvm/tools/llvm-dwp/DWPError.h
index e0dd4ed..62025ed 100644
--- a/llvm/tools/llvm-dwp/DWPError.h
+++ b/llvm/tools/llvm-dwp/DWPError.h
@@ -1,6 +1,10 @@
+#ifndef TOOLS_LLVM_DWP_DWPERROR
+#define TOOLS_LLVM_DWP_DWPERROR
+
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <string>
+
 namespace llvm {
 class DWPError : public ErrorInfo<DWPError> {
 public:
@@ -15,3 +19,5 @@
   std::string Info;
 };
 }
+
+#endif
diff --git a/llvm/tools/llvm-dwp/DWPStringPool.h b/llvm/tools/llvm-dwp/DWPStringPool.h
new file mode 100644
index 0000000..8923e4c
--- /dev/null
+++ b/llvm/tools/llvm-dwp/DWPStringPool.h
@@ -0,0 +1,54 @@
+#ifndef TOOLS_LLVM_DWP_DWPSTRINGPOOL
+#define TOOLS_LLVM_DWP_DWPSTRINGPOOL
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include <cassert>
+
+class DWPStringPool {
+
+  struct CStrDenseMapInfo {
+    static inline const char *getEmptyKey() {
+      return reinterpret_cast<const char *>(~static_cast<uintptr_t>(0));
+    }
+    static inline const char *getTombstoneKey() {
+      return reinterpret_cast<const char *>(~static_cast<uintptr_t>(1));
+    }
+    static unsigned getHashValue(const char *Val) {
+      assert(Val != getEmptyKey() && "Cannot hash the empty key!");
+      assert(Val != getTombstoneKey() && "Cannot hash the tombstone key!");
+      return (unsigned)hash_value(StringRef(Val));
+    }
+    static bool isEqual(const char *LHS, const char *RHS) {
+      if (RHS == getEmptyKey())
+        return LHS == getEmptyKey();
+      if (RHS == getTombstoneKey())
+        return LHS == getTombstoneKey();
+      return strcmp(LHS, RHS) == 0;
+    }
+  };
+
+  MCStreamer &Out;
+  MCSection *Sec;
+  DenseMap<const char *, uint32_t, CStrDenseMapInfo> Pool;
+  uint32_t Offset = 0;
+
+public:
+  DWPStringPool(MCStreamer &Out, MCSection *Sec) : Out(Out), Sec(Sec) {}
+
+  uint32_t getOffset(const char *Str, unsigned Length) {
+    assert(strlen(Str) + 1 == Length && "Ensure length hint is correct");
+
+    auto Pair = Pool.insert(std::make_pair(Str, Offset));
+    if (Pair.second) {
+      Out.SwitchSection(Sec);
+      Out.EmitBytes(StringRef(Str, Length));
+      Offset += Length;
+    }
+
+    return Pair.first->second;
+  }
+};
+
+#endif
diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp
index 5dbe416..9a43420 100644
--- a/llvm/tools/llvm-dwp/llvm-dwp.cpp
+++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp
@@ -11,6 +11,8 @@
 // package files).
 //
 //===----------------------------------------------------------------------===//
+#include "DWPError.h"
+#include "DWPStringPool.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringSet.h"
@@ -28,6 +30,7 @@
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -35,9 +38,7 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Error.h"
 #include "llvm/Target/TargetMachine.h"
-#include "DWPError.h"
 #include <iostream>
 #include <memory>
 
@@ -54,11 +55,10 @@
                                        value_desc("filename"),
                                        cat(DwpCategory));
 
-static void
-writeStringsAndOffsets(MCStreamer &Out, StringMap<uint32_t> &Strings,
-                       uint32_t &StringOffset, MCSection *StrSection,
-                       MCSection *StrOffsetSection, StringRef CurStrSection,
-                       StringRef CurStrOffsetSection) {
+static void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings,
+                                   MCSection *StrOffsetSection,
+                                   StringRef CurStrSection,
+                                   StringRef CurStrOffsetSection) {
   // Could possibly produce an error or warning if one of these was non-null but
   // the other was null.
   if (CurStrSection.empty() || CurStrOffsetSection.empty())
@@ -70,15 +70,8 @@
   uint32_t LocalOffset = 0;
   uint32_t PrevOffset = 0;
   while (const char *s = Data.getCStr(&LocalOffset)) {
-    StringRef Str(s, LocalOffset - PrevOffset - 1);
-    auto Pair = Strings.insert(std::make_pair(Str, StringOffset));
-    if (Pair.second) {
-      Out.SwitchSection(StrSection);
-      Out.EmitBytes(
-          StringRef(Pair.first->getKeyData(), Pair.first->getKeyLength() + 1));
-      StringOffset += Str.size() + 1;
-    }
-    OffsetRemapping[PrevOffset] = Pair.first->second;
+    OffsetRemapping[PrevOffset] =
+        Strings.getOffset(s, LocalOffset - PrevOffset);
     PrevOffset = LocalOffset;
   }
 
@@ -193,7 +186,9 @@
   StringRef DWPName;
 };
 
-StringRef getSubsection(StringRef Section, const DWARFUnitIndex::Entry &Entry, DWARFSectionKind Kind) {
+static StringRef getSubsection(StringRef Section,
+                               const DWARFUnitIndex::Entry &Entry,
+                               DWARFSectionKind Kind) {
   const auto *Off = Entry.getOffset(Kind);
   if (!Off)
     return StringRef();
@@ -393,16 +388,21 @@
   MapVector<uint64_t, UnitIndexEntry> IndexEntries;
   MapVector<uint64_t, UnitIndexEntry> TypeIndexEntries;
 
-  StringMap<uint32_t> Strings;
-  uint32_t StringOffset = 0;
-
   uint32_t ContributionOffsets[8] = {};
 
+  DWPStringPool Strings(Out, StrSection);
+
+  SmallVector<OwningBinary<object::ObjectFile>, 128> Objects;
+  Objects.reserve(Inputs.size());
+
   for (const auto &Input : Inputs) {
     auto ErrOrObj = object::ObjectFile::createObjectFile(Input);
     if (!ErrOrObj)
       return ErrOrObj.takeError();
 
+    auto &Obj = *ErrOrObj->getBinary();
+    Objects.push_back(std::move(*ErrOrObj));
+
     UnitIndexEntry CurEntry = {};
 
     StringRef CurStrSection;
@@ -415,7 +415,7 @@
 
     SmallVector<SmallString<32>, 4> UncompressedSections;
 
-    for (const auto &Section : ErrOrObj->getBinary()->sections()) {
+    for (const auto &Section : Obj.sections()) {
       if (Section.isBSS())
         continue;
       if (Section.isVirtual())
@@ -438,10 +438,11 @@
         if (!consumeCompressedDebugSectionHeader(Contents, OriginalSize))
           return make_error<DWPError>(
               ("failure while decompressing compressed section: '" + Name +
-               "\'").str());
+               "\'")
+                  .str());
         UncompressedSections.resize(UncompressedSections.size() + 1);
-        if (zlib::uncompress(Contents, UncompressedSections.back(), OriginalSize) !=
-            zlib::StatusOK) {
+        if (zlib::uncompress(Contents, UncompressedSections.back(),
+                             OriginalSize) != zlib::StatusOK) {
           UncompressedSections.pop_back();
           continue;
         }
@@ -495,8 +496,7 @@
 
     if (!CurCUIndexSection.empty()) {
       DWARFUnitIndex CUIndex(DW_SECT_INFO);
-      DataExtractor CUIndexData(CurCUIndexSection,
-                                ErrOrObj->getBinary()->isLittleEndian(), 0);
+      DataExtractor CUIndexData(CurCUIndexSection, Obj.isLittleEndian(), 0);
       if (!CUIndex.parse(CUIndexData))
         return make_error<DWPError>("Failed to parse cu_index");
 
@@ -533,8 +533,7 @@
           return make_error<DWPError>(
               "multiple type unit sections in .dwp file");
         DWARFUnitIndex TUIndex(DW_SECT_TYPES);
-        DataExtractor TUIndexData(CurTUIndexSection,
-                                  ErrOrObj->getBinary()->isLittleEndian(), 0);
+        DataExtractor TUIndexData(CurTUIndexSection, Obj.isLittleEndian(), 0);
         if (!TUIndex.parse(TUIndexData))
           return make_error<DWPError>("Failed to parse tu_index");
         addAllTypesFromDWP(Out, TypeIndexEntries, TUIndex, TypesSection,
@@ -556,8 +555,7 @@
                   CurEntry, ContributionOffsets[DW_SECT_TYPES - DW_SECT_INFO]);
     }
 
-    writeStringsAndOffsets(Out, Strings, StringOffset, StrSection,
-                           StrOffsetSection, CurStrSection,
+    writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection,
                            CurStrOffsetSection);
   }