[Object] Deduplicate long archive member names

Summary:
Import libraries as created by llvm-dlltool always use the same archive
member name for every object file (namely, the DLL library name). Ensure
that long names are not repeatedly stored in the string table.

Reviewed By: ruiu

Differential Revision: https://reviews.llvm.org/D55860

llvm-svn: 349637
diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index 7672053..4de5871 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -250,6 +250,7 @@
 
 static void printMemberHeader(raw_ostream &Out, uint64_t Pos,
                               raw_ostream &StringTable,
+                              StringMap<uint64_t> &MemberNames,
                               object::Archive::Kind Kind, bool Thin,
                               StringRef ArcName, const NewArchiveMember &M,
                               sys::TimePoint<std::chrono::seconds> ModTime,
@@ -262,8 +263,20 @@
     return printGNUSmallMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID,
                                      M.Perms, Size);
   Out << '/';
-  uint64_t NamePos = StringTable.tell();
-  addToStringTable(StringTable, ArcName, M, Thin);
+  uint64_t NamePos;
+  if (Thin) {
+    NamePos = StringTable.tell();
+    addToStringTable(StringTable, ArcName, M, Thin);
+  } else {
+    StringMap<uint64_t>::const_iterator it = MemberNames.find(M.MemberName);
+    if (it == MemberNames.end()) {
+      NamePos = StringTable.tell();
+      addToStringTable(StringTable, ArcName, M, Thin);
+      MemberNames[M.MemberName] = NamePos;
+    } else {
+      NamePos = it->second;
+    }
+  }
   printWithSpacePadding(Out, NamePos, 15);
   printRestOfMemberHeader(Out, ModTime, M.UID, M.GID, M.Perms, Size);
 }
@@ -433,6 +446,11 @@
   std::vector<MemberData> Ret;
   bool HasObject = false;
 
+  // Deduplicate long member names in the string table and reuse earlier name
+  // offsets. This especially saves space for COFF Import libraries where all
+  // members have the same name.
+  StringMap<uint64_t> MemberNames;
+
   // UniqueTimestamps is a special case to improve debugging on Darwin:
   //
   // The Darwin linker does not link debug info into the final
@@ -505,8 +523,8 @@
       ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++);
     else
       ModTime = M.ModTime;
-    printMemberHeader(Out, Pos, StringTable, Kind, Thin, ArcName, M, ModTime,
-                      Buf.getBufferSize() + MemberPadding);
+    printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, ArcName,
+                      M, ModTime, Buf.getBufferSize() + MemberPadding);
     Out.flush();
 
     Expected<std::vector<unsigned>> Symbols =
diff --git a/llvm/test/Object/archive-format.test b/llvm/test/Object/archive-format.test
index b1ae411..b555bf8 100644
--- a/llvm/test/Object/archive-format.test
+++ b/llvm/test/Object/archive-format.test
@@ -15,8 +15,7 @@
 RUN: cat %t.a | FileCheck -strict-whitespace %s
 
 CHECK:      !<arch>
-CHECK-NEXT: //                                              36        `
-CHECK-NEXT: 0123456789abcdef/
+CHECK-NEXT: //                                              18        `
 CHECK-NEXT: 0123456789abcdef/
 CHECK-NEXT: 0123456789abcde/0           0     0     644     4         `
 CHECK-NEXT: bar.
@@ -24,7 +23,7 @@
 CHECK-NEXT: zed.
 CHECK-SAME: 0123456789abcde/0           0     0     644     4         `
 CHECK-NEXT: bar2
-CHECK-SAME: /18             0           0     0     644     4         `
+CHECK-SAME: /0              0           0     0     644     4         `
 CHECK-NEXT: zed2
 
 RUN: rm -f %t.a