ELF: New symbol table design.

This patch implements a new design for the symbol table that stores
SymbolBodies within a memory region of the Symbol object. Symbols are mutated
by constructing SymbolBodies in place over existing SymbolBodies, rather
than by mutating pointers. As mentioned in the initial proposal [1], this
memory layout helps reduce the cache miss rate by improving memory locality.

Performance numbers:

           old(s) new(s)
Without debug info:
chrome      7.178  6.432 (-11.5%)
LLVMgold.so 0.505  0.502 (-0.5%)
clang       0.954  0.827 (-15.4%)
llvm-as     0.052  0.045 (-15.5%)
With debug info:
scylla      5.695  5.613 (-1.5%)
clang      14.396 14.143 (-1.8%)

Performance counter results show that the fewer required indirections is
indeed the cause of the improved performance. For example, when linking
chrome, stalled cycles decreases from 14,556,444,002 to 12,959,238,310, and
instructions per cycle increases from 0.78 to 0.83. We are also executing
many fewer instructions (15,516,401,933 down to 15,002,434,310), probably
because we spend less time allocating SymbolBodies.

The new mechanism by which symbols are added to the symbol table is by calling
add* functions on the SymbolTable.

In this patch, I handle local symbols by storing them inside "unparented"
SymbolBodies. This is suboptimal, but if we do want to try to avoid allocating
these SymbolBodies, we can probably do that separately.

I also removed a few members from the SymbolBody class that were only being
used to pass information from the input file to the symbol table.

This patch implements the new design for the ELF linker only. I intend to
prepare a similar patch for the COFF linker.

[1] http://lists.llvm.org/pipermail/llvm-dev/2016-April/098832.html

Differential Revision: http://reviews.llvm.org/D19752

llvm-svn: 268178
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 2c7124e..b65540c 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -11,6 +11,7 @@
 #include "Driver.h"
 #include "Error.h"
 #include "InputSection.h"
+#include "SymbolTable.h"
 #include "Symbols.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/Analysis.h"
@@ -330,11 +331,14 @@
 
   switch (Sym->st_shndx) {
   case SHN_UNDEF:
-    return new (Alloc) Undefined(Name, Binding, Sym->st_other, Sym->getType(),
-                                 /*IsBitcode*/ false);
+    return Symtab<ELFT>::X
+        ->addUndefined(Name, Binding, Sym->st_other, Sym->getType(), this)
+        ->body();
   case SHN_COMMON:
-    return new (Alloc) DefinedCommon(Name, Sym->st_size, Sym->st_value, Binding,
-                                     Sym->st_other, Sym->getType());
+    return Symtab<ELFT>::X
+        ->addCommon(Name, Sym->st_size, Sym->st_value, Binding, Sym->st_other,
+                    Sym->getType(), this)
+        ->body();
   }
 
   switch (Binding) {
@@ -344,22 +348,19 @@
   case STB_WEAK:
   case STB_GNU_UNIQUE:
     if (Sec == &InputSection<ELFT>::Discarded)
-      return new (Alloc) Undefined(Name, Binding, Sym->st_other, Sym->getType(),
-                                   /*IsBitcode*/ false);
-    return new (Alloc) DefinedRegular<ELFT>(Name, *Sym, Sec);
+      return Symtab<ELFT>::X
+          ->addUndefined(Name, Binding, Sym->st_other, Sym->getType(), this)
+          ->body();
+    return Symtab<ELFT>::X->addRegular(Name, *Sym, Sec)->body();
   }
 }
 
-void ArchiveFile::parse() {
+template <class ELFT> void ArchiveFile::parse() {
   File = check(Archive::create(MB), "failed to parse archive");
 
-  // Allocate a buffer for Lazy objects.
-  size_t NumSyms = File->getNumberOfSymbols();
-  LazySymbols.reserve(NumSyms);
-
   // Read the symbol table to construct Lazy objects.
   for (const Archive::Symbol &Sym : File->symbols())
-    LazySymbols.emplace_back(this, Sym);
+    Symtab<ELFT>::X->addLazyArchive(this, Sym);
 }
 
 // Returns a buffer pointing to a member file containing a given symbol.
@@ -487,8 +488,6 @@
   std::vector<const Elf_Verdef *> Verdefs = parseVerdefs(Versym);
 
   Elf_Sym_Range Syms = this->getElfSymbols(true);
-  uint32_t NumSymbols = std::distance(Syms.begin(), Syms.end());
-  SymbolBodies.reserve(NumSymbols);
   for (const Elf_Sym &Sym : Syms) {
     unsigned VersymIndex = 0;
     if (Versym) {
@@ -507,16 +506,12 @@
       if (VersymIndex == 0 || (VersymIndex & VERSYM_HIDDEN))
         continue;
     }
-    SymbolBodies.emplace_back(this, Name, Sym, Verdefs[VersymIndex]);
+    Symtab<ELFT>::X->addShared(this, Name, Sym, Verdefs[VersymIndex]);
   }
 }
 
 BitcodeFile::BitcodeFile(MemoryBufferRef M) : InputFile(BitcodeKind, M) {}
 
-bool BitcodeFile::classof(const InputFile *F) {
-  return F->kind() == BitcodeKind;
-}
-
 static uint8_t getGvVisibility(const GlobalValue *GV) {
   switch (GV->getVisibility()) {
   case GlobalValue::DefaultVisibility:
@@ -529,21 +524,30 @@
   llvm_unreachable("unknown visibility");
 }
 
-SymbolBody *
-BitcodeFile::createBody(const DenseSet<const Comdat *> &KeptComdats,
-                              const IRObjectFile &Obj,
-                              const BasicSymbolRef &Sym,
-                              const GlobalValue *GV) {
+template <class ELFT>
+Symbol *BitcodeFile::createSymbol(const DenseSet<const Comdat *> &KeptComdats,
+                                  const IRObjectFile &Obj,
+                                  const BasicSymbolRef &Sym) {
+  const GlobalValue *GV = Obj.getSymbolGV(Sym.getRawDataRefImpl());
+
   SmallString<64> Name;
   raw_svector_ostream OS(Name);
   Sym.printName(OS);
   StringRef NameRef = Saver.save(StringRef(Name));
-  SymbolBody *Body;
 
   uint32_t Flags = Sym.getFlags();
   bool IsWeak = Flags & BasicSymbolRef::SF_Weak;
   uint32_t Binding = IsWeak ? STB_WEAK : STB_GLOBAL;
 
+  uint8_t Type = STT_NOTYPE;
+  bool CanOmitFromDynSym = false;
+  // FIXME: Expose a thread-local flag for module asm symbols.
+  if (GV) {
+    if (GV->isThreadLocal())
+      Type = STT_TLS;
+    CanOmitFromDynSym = canBeOmittedFromSymbolTable(GV);
+  }
+
   uint8_t Visibility;
   if (GV)
     Visibility = getGvVisibility(GV);
@@ -554,46 +558,28 @@
 
   if (GV)
     if (const Comdat *C = GV->getComdat())
-      if (!KeptComdats.count(C)) {
-        Body = new (Alloc) Undefined(NameRef, Binding, Visibility, /*Type*/ 0,
-                                     /*IsBitcode*/ true);
-        return Body;
-      }
+      if (!KeptComdats.count(C))
+        return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type,
+                                             this);
 
   const Module &M = Obj.getModule();
   if (Flags & BasicSymbolRef::SF_Undefined)
-    return new (Alloc) Undefined(NameRef, Binding, Visibility, /*Type*/ 0,
-                                 /*IsBitcode*/ true);
+    return Symtab<ELFT>::X->addUndefined(NameRef, Binding, Visibility, Type,
+                                         this);
   if (Flags & BasicSymbolRef::SF_Common) {
     // FIXME: Set SF_Common flag correctly for module asm symbols, and expose
     // size and alignment.
     assert(GV);
     const DataLayout &DL = M.getDataLayout();
     uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
-    return new (Alloc) DefinedCommon(NameRef, Size, GV->getAlignment(), Binding,
-                                     Visibility, /*Type*/ 0);
+    return Symtab<ELFT>::X->addCommon(NameRef, Size, GV->getAlignment(),
+                                      Binding, Visibility, STT_OBJECT, this);
   }
-  return new (Alloc) DefinedBitcode(NameRef, IsWeak, Visibility);
+  return Symtab<ELFT>::X->addBitcode(NameRef, IsWeak, Visibility, Type,
+                                     CanOmitFromDynSym, this);
 }
 
-SymbolBody *
-BitcodeFile::createSymbolBody(const DenseSet<const Comdat *> &KeptComdats,
-                              const IRObjectFile &Obj,
-                              const BasicSymbolRef &Sym) {
-  const GlobalValue *GV = Obj.getSymbolGV(Sym.getRawDataRefImpl());
-  SymbolBody *Body = createBody(KeptComdats, Obj, Sym, GV);
-
-  // FIXME: Expose a thread-local flag for module asm symbols.
-  if (GV) {
-    if (GV->isThreadLocal())
-      Body->Type = STT_TLS;
-    Body->CanOmitFromDynSym = canBeOmittedFromSymbolTable(GV);
-  }
-  return Body;
-}
-
-bool BitcodeFile::shouldSkip(const BasicSymbolRef &Sym) {
-  uint32_t Flags = Sym.getFlags();
+bool BitcodeFile::shouldSkip(uint32_t Flags) {
   if (!(Flags & BasicSymbolRef::SF_Global))
     return true;
   if (Flags & BasicSymbolRef::SF_FormatSpecific)
@@ -601,6 +587,7 @@
   return false;
 }
 
+template <class ELFT>
 void BitcodeFile::parse(DenseSet<StringRef> &ComdatGroups) {
   Obj = check(IRObjectFile::create(MB, Driver->Context));
   const Module &M = Obj->getModule();
@@ -613,8 +600,8 @@
   }
 
   for (const BasicSymbolRef &Sym : Obj->symbols())
-    if (!shouldSkip(Sym))
-      SymbolBodies.push_back(createSymbolBody(KeptComdats, *Obj, Sym));
+    if (!shouldSkip(Sym.getFlags()))
+      Symbols.push_back(createSymbol<ELFT>(KeptComdats, *Obj, Sym));
 }
 
 template <typename T>
@@ -675,9 +662,10 @@
   return createELFFile<SharedFile>(MB);
 }
 
+template <class ELFT>
 void LazyObjectFile::parse() {
   for (StringRef Sym : getSymbols())
-    LazySymbols.emplace_back(Sym, this->MB);
+    Symtab<ELFT>::X->addLazyObject(Sym, this->MB);
 }
 
 template <class ELFT> std::vector<StringRef> LazyObjectFile::getElfSymbols() {
@@ -707,9 +695,10 @@
       check(IRObjectFile::create(this->MB, Context));
   std::vector<StringRef> V;
   for (const BasicSymbolRef &Sym : Obj->symbols()) {
-    if (BitcodeFile::shouldSkip(Sym))
+    uint32_t Flags = Sym.getFlags();
+    if (BitcodeFile::shouldSkip(Flags))
       continue;
-    if (Sym.getFlags() & BasicSymbolRef::SF_Undefined)
+    if (Flags & BasicSymbolRef::SF_Undefined)
       continue;
     SmallString<64> Name;
     raw_svector_ostream OS(Name);
@@ -737,6 +726,25 @@
   return getElfSymbols<ELF64BE>();
 }
 
+template void ArchiveFile::parse<ELF32LE>();
+template void ArchiveFile::parse<ELF32BE>();
+template void ArchiveFile::parse<ELF64LE>();
+template void ArchiveFile::parse<ELF64BE>();
+
+template void
+BitcodeFile::parse<ELF32LE>(llvm::DenseSet<StringRef> &ComdatGroups);
+template void
+BitcodeFile::parse<ELF32BE>(llvm::DenseSet<StringRef> &ComdatGroups);
+template void
+BitcodeFile::parse<ELF64LE>(llvm::DenseSet<StringRef> &ComdatGroups);
+template void
+BitcodeFile::parse<ELF64BE>(llvm::DenseSet<StringRef> &ComdatGroups);
+
+template void LazyObjectFile::parse<ELF32LE>();
+template void LazyObjectFile::parse<ELF32BE>();
+template void LazyObjectFile::parse<ELF64LE>();
+template void LazyObjectFile::parse<ELF64BE>();
+
 template class elf::ELFFileBase<ELF32LE>;
 template class elf::ELFFileBase<ELF32BE>;
 template class elf::ELFFileBase<ELF64LE>;