Recommit "[llvm-objcopy][MachO] Support load commands used in executables/shared libraries"

Summary:
This patch implements copying some load commands that appear in executables/shared libraries such as the indirect symbol table.

I don't add tests intentionally because this patch is incomplete: we need a layout algorithm for executables/shared libraries. I'll submit it as a separate patch with tests.

Reviewers: alexshap, rupprecht, jhenderson, compnerd

Reviewed By: alexshap

Subscribers: abrachet, mgorny, mgrang, MaskRay, mtrent, jakehehrlich, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63395

llvm-svn: 369298
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
index 74200c5..0d3d235 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MachOWriter.h"
+#include "MachOLayoutBuilder.h"
 #include "Object.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/BinaryFormat/MachO.h"
@@ -40,16 +41,10 @@
     const MachO::symtab_command &SymTabCommand =
         O.LoadCommands[*O.SymTabCommandIndex]
             .MachOLoadCommand.symtab_command_data;
-    if (SymTabCommand.symoff) {
-      assert((SymTabCommand.nsyms == O.SymTable.Symbols.size()) &&
-             "Incorrect number of symbols");
+    if (SymTabCommand.symoff)
       Ends.push_back(SymTabCommand.symoff + symTableSize());
-    }
-    if (SymTabCommand.stroff) {
-      assert((SymTabCommand.strsize == StrTableBuilder.getSize()) &&
-             "Incorrect string table size");
+    if (SymTabCommand.stroff)
       Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize);
-    }
   }
   if (O.DyLdInfoCommandIndex) {
     const MachO::dyld_info_command &DyLdInfoCommand =
@@ -84,6 +79,36 @@
     }
   }
 
+  if (O.DySymTabCommandIndex) {
+    const MachO::dysymtab_command &DySymTabCommand =
+        O.LoadCommands[*O.DySymTabCommandIndex]
+            .MachOLoadCommand.dysymtab_command_data;
+
+    if (DySymTabCommand.indirectsymoff)
+      Ends.push_back(DySymTabCommand.indirectsymoff +
+                     sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
+  }
+
+  if (O.DataInCodeCommandIndex) {
+    const MachO::linkedit_data_command &LinkEditDataCommand =
+        O.LoadCommands[*O.DataInCodeCommandIndex]
+            .MachOLoadCommand.linkedit_data_command_data;
+
+    if (LinkEditDataCommand.dataoff)
+      Ends.push_back(LinkEditDataCommand.dataoff +
+                     LinkEditDataCommand.datasize);
+  }
+
+  if (O.FunctionStartsCommandIndex) {
+    const MachO::linkedit_data_command &LinkEditDataCommand =
+        O.LoadCommands[*O.FunctionStartsCommandIndex]
+            .MachOLoadCommand.linkedit_data_command_data;
+
+    if (LinkEditDataCommand.dataoff)
+      Ends.push_back(LinkEditDataCommand.dataoff +
+                     LinkEditDataCommand.datasize);
+  }
+
   // Otherwise, use the last section / reloction.
   for (const auto &LC : O.LoadCommands)
     for (const auto &S : LC.Sections) {
@@ -120,14 +145,6 @@
   memcpy(B.getBufferStart(), &Header, HeaderSize);
 }
 
-void MachOWriter::updateSymbolIndexes() {
-  uint32_t Index = 0;
-  for (auto &Symbol : O.SymTable.Symbols) {
-    Symbol->Index = Index;
-    Index++;
-  }
-}
-
 void MachOWriter::writeLoadCommands() {
   uint8_t *Begin = B.getBufferStart() + headerSize();
   for (const auto &LC : O.LoadCommands) {
@@ -261,7 +278,7 @@
           .MachOLoadCommand.symtab_command_data;
 
   uint8_t *StrTable = (uint8_t *)B.getBufferStart() + SymTabCommand.stroff;
-  StrTableBuilder.write(StrTable);
+  LayoutBuilder.getStringTableBuilder().write(StrTable);
 }
 
 void MachOWriter::writeStringTable() {
@@ -275,7 +292,7 @@
   for (auto Iter = O.SymTable.Symbols.begin(), End = O.SymTable.Symbols.end();
        Iter != End; Iter++) {
     SymbolEntry *Sym = Iter->get();
-    auto Nstrx = StrTableBuilder.getOffset(Sym->Name);
+    uint32_t Nstrx = LayoutBuilder.getStringTableBuilder().getOffset(Sym->Name);
 
     if (Is64Bit)
       writeNListEntry<MachO::nlist_64>(*Sym, IsLittleEndian, SymTable, Nstrx);
@@ -344,6 +361,45 @@
   memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size());
 }
 
+void MachOWriter::writeIndirectSymbolTable() {
+  if (!O.DySymTabCommandIndex)
+    return;
+
+  const MachO::dysymtab_command &DySymTabCommand =
+      O.LoadCommands[*O.DySymTabCommandIndex]
+          .MachOLoadCommand.dysymtab_command_data;
+
+  char *Out = (char *)B.getBufferStart() + DySymTabCommand.indirectsymoff;
+  assert((DySymTabCommand.nindirectsyms == O.IndirectSymTable.Symbols.size()) &&
+         "Incorrect indirect symbol table size");
+  memcpy(Out, O.IndirectSymTable.Symbols.data(),
+         sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
+}
+
+void MachOWriter::writeDataInCodeData() {
+  if (!O.DataInCodeCommandIndex)
+    return;
+  const MachO::linkedit_data_command &LinkEditDataCommand =
+      O.LoadCommands[*O.DataInCodeCommandIndex]
+          .MachOLoadCommand.linkedit_data_command_data;
+  char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
+  assert((LinkEditDataCommand.datasize == O.DataInCode.Data.size()) &&
+         "Incorrect data in code data size");
+  memcpy(Out, O.DataInCode.Data.data(), O.DataInCode.Data.size());
+}
+
+void MachOWriter::writeFunctionStartsData() {
+  if (!O.FunctionStartsCommandIndex)
+    return;
+  const MachO::linkedit_data_command &LinkEditDataCommand =
+      O.LoadCommands[*O.FunctionStartsCommandIndex]
+          .MachOLoadCommand.linkedit_data_command_data;
+  char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
+  assert((LinkEditDataCommand.datasize == O.FunctionStarts.Data.size()) &&
+         "Incorrect function starts data size");
+  memcpy(Out, O.FunctionStarts.Data.data(), O.FunctionStarts.Data.size());
+}
+
 void MachOWriter::writeTail() {
   typedef void (MachOWriter::*WriteHandlerType)(void);
   typedef std::pair<uint64_t, WriteHandlerType> WriteOperation;
@@ -379,6 +435,36 @@
           {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo});
   }
 
+  if (O.DySymTabCommandIndex) {
+    const MachO::dysymtab_command &DySymTabCommand =
+        O.LoadCommands[*O.DySymTabCommandIndex]
+            .MachOLoadCommand.dysymtab_command_data;
+
+    if (DySymTabCommand.indirectsymoff)
+      Queue.emplace_back(DySymTabCommand.indirectsymoff,
+                         &MachOWriter::writeIndirectSymbolTable);
+  }
+
+  if (O.DataInCodeCommandIndex) {
+    const MachO::linkedit_data_command &LinkEditDataCommand =
+        O.LoadCommands[*O.DataInCodeCommandIndex]
+            .MachOLoadCommand.linkedit_data_command_data;
+
+    if (LinkEditDataCommand.dataoff)
+      Queue.emplace_back(LinkEditDataCommand.dataoff,
+                         &MachOWriter::writeDataInCodeData);
+  }
+
+  if (O.FunctionStartsCommandIndex) {
+    const MachO::linkedit_data_command &LinkEditDataCommand =
+        O.LoadCommands[*O.FunctionStartsCommandIndex]
+            .MachOLoadCommand.linkedit_data_command_data;
+
+    if (LinkEditDataCommand.dataoff)
+      Queue.emplace_back(LinkEditDataCommand.dataoff,
+                         &MachOWriter::writeFunctionStartsData);
+  }
+
   llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) {
     return LHS.first < RHS.first;
   });
@@ -387,198 +473,13 @@
     (this->*WriteOp.second)();
 }
 
-void MachOWriter::updateSizeOfCmds() {
-  auto Size = 0;
-  for (const auto &LC : O.LoadCommands) {
-    auto &MLC = LC.MachOLoadCommand;
-    auto cmd = MLC.load_command_data.cmd;
-
-    switch (cmd) {
-    case MachO::LC_SEGMENT:
-      Size += sizeof(MachO::segment_command) +
-              sizeof(MachO::section) * LC.Sections.size();
-      continue;
-    case MachO::LC_SEGMENT_64:
-      Size += sizeof(MachO::segment_command_64) +
-              sizeof(MachO::section_64) * LC.Sections.size();
-      continue;
-    }
-
-    switch (cmd) {
-#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
-  case MachO::LCName:                                                          \
-    Size += sizeof(MachO::LCStruct);                                           \
-    break;
-#include "llvm/BinaryFormat/MachO.def"
-#undef HANDLE_LOAD_COMMAND
-    }
-  }
-
-  O.Header.SizeOfCmds = Size;
-}
-
-// Updates the index and the number of local/external/undefined symbols. Here we
-// assume that MLC is a LC_DYSYMTAB and the nlist entries in the symbol table
-// are already sorted by the those types.
-void MachOWriter::updateDySymTab(MachO::macho_load_command &MLC) {
-  uint32_t NumLocalSymbols = 0;
-  auto Iter = O.SymTable.Symbols.begin();
-  auto End = O.SymTable.Symbols.end();
-  for (; Iter != End; Iter++) {
-    if ((*Iter)->n_type & (MachO::N_EXT | MachO::N_PEXT))
-      break;
-
-    NumLocalSymbols++;
-  }
-
-  uint32_t NumExtDefSymbols = 0;
-  for (; Iter != End; Iter++) {
-    if (((*Iter)->n_type & MachO::N_TYPE) == MachO::N_UNDF)
-      break;
-
-    NumExtDefSymbols++;
-  }
-
-  MLC.dysymtab_command_data.ilocalsym = 0;
-  MLC.dysymtab_command_data.nlocalsym = NumLocalSymbols;
-  MLC.dysymtab_command_data.iextdefsym = NumLocalSymbols;
-  MLC.dysymtab_command_data.nextdefsym = NumExtDefSymbols;
-  MLC.dysymtab_command_data.iundefsym = NumLocalSymbols + NumExtDefSymbols;
-  MLC.dysymtab_command_data.nundefsym =
-      O.SymTable.Symbols.size() - (NumLocalSymbols + NumExtDefSymbols);
-}
-
-// Recomputes and updates offset and size fields in load commands and sections
-// since they could be modified.
-Error MachOWriter::layout() {
-  auto SizeOfCmds = loadCommandsSize();
-  auto Offset = headerSize() + SizeOfCmds;
-  O.Header.NCmds = O.LoadCommands.size();
-  O.Header.SizeOfCmds = SizeOfCmds;
-
-  // Lay out sections.
-  for (auto &LC : O.LoadCommands) {
-    uint64_t FileOff = Offset;
-    uint64_t VMSize = 0;
-    uint64_t FileOffsetInSegment = 0;
-    for (auto &Sec : LC.Sections) {
-      if (!Sec.isVirtualSection()) {
-        auto FilePaddingSize =
-            OffsetToAlignment(FileOffsetInSegment, 1ull << Sec.Align);
-        Sec.Offset = Offset + FileOffsetInSegment + FilePaddingSize;
-        Sec.Size = Sec.Content.size();
-        FileOffsetInSegment += FilePaddingSize + Sec.Size;
-      }
-
-      VMSize = std::max(VMSize, Sec.Addr + Sec.Size);
-    }
-
-    // TODO: Handle the __PAGEZERO segment.
-    auto &MLC = LC.MachOLoadCommand;
-    switch (MLC.load_command_data.cmd) {
-    case MachO::LC_SEGMENT:
-      MLC.segment_command_data.cmdsize =
-          sizeof(MachO::segment_command) +
-          sizeof(MachO::section) * LC.Sections.size();
-      MLC.segment_command_data.nsects = LC.Sections.size();
-      MLC.segment_command_data.fileoff = FileOff;
-      MLC.segment_command_data.vmsize = VMSize;
-      MLC.segment_command_data.filesize = FileOffsetInSegment;
-      break;
-    case MachO::LC_SEGMENT_64:
-      MLC.segment_command_64_data.cmdsize =
-          sizeof(MachO::segment_command_64) +
-          sizeof(MachO::section_64) * LC.Sections.size();
-      MLC.segment_command_64_data.nsects = LC.Sections.size();
-      MLC.segment_command_64_data.fileoff = FileOff;
-      MLC.segment_command_64_data.vmsize = VMSize;
-      MLC.segment_command_64_data.filesize = FileOffsetInSegment;
-      break;
-    }
-
-    Offset += FileOffsetInSegment;
-  }
-
-  // Lay out relocations.
-  for (auto &LC : O.LoadCommands)
-    for (auto &Sec : LC.Sections) {
-      Sec.RelOff = Sec.Relocations.empty() ? 0 : Offset;
-      Sec.NReloc = Sec.Relocations.size();
-      Offset += sizeof(MachO::any_relocation_info) * Sec.NReloc;
-    }
-
-  // Lay out tail stuff.
-  auto NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
-  for (auto &LC : O.LoadCommands) {
-    auto &MLC = LC.MachOLoadCommand;
-    auto cmd = MLC.load_command_data.cmd;
-    switch (cmd) {
-    case MachO::LC_SYMTAB:
-      MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
-      MLC.symtab_command_data.strsize = StrTableBuilder.getSize();
-      MLC.symtab_command_data.symoff = Offset;
-      Offset += NListSize * MLC.symtab_command_data.nsyms;
-      MLC.symtab_command_data.stroff = Offset;
-      Offset += MLC.symtab_command_data.strsize;
-      break;
-    case MachO::LC_DYSYMTAB: {
-      if (MLC.dysymtab_command_data.ntoc != 0 ||
-          MLC.dysymtab_command_data.nmodtab != 0 ||
-          MLC.dysymtab_command_data.nextrefsyms != 0 ||
-          MLC.dysymtab_command_data.nlocrel != 0 ||
-          MLC.dysymtab_command_data.nextrel != 0)
-        return createStringError(llvm::errc::not_supported,
-                                 "shared library is not yet supported");
-
-      if (MLC.dysymtab_command_data.nindirectsyms != 0)
-        return createStringError(llvm::errc::not_supported,
-                                 "indirect symbol table is not yet supported");
-
-      updateDySymTab(MLC);
-      break;
-    }
-    case MachO::LC_SEGMENT:
-    case MachO::LC_SEGMENT_64:
-    case MachO::LC_VERSION_MIN_MACOSX:
-    case MachO::LC_BUILD_VERSION:
-    case MachO::LC_ID_DYLIB:
-    case MachO::LC_LOAD_DYLIB:
-    case MachO::LC_UUID:
-    case MachO::LC_SOURCE_VERSION:
-      // Nothing to update.
-      break;
-    default:
-      // Abort if it's unsupported in order to prevent corrupting the object.
-      return createStringError(llvm::errc::not_supported,
-                               "unsupported load command (cmd=0x%x)", cmd);
-    }
-  }
-
-  return Error::success();
-}
-
-void MachOWriter::constructStringTable() {
-  for (std::unique_ptr<SymbolEntry> &Sym : O.SymTable.Symbols)
-    StrTableBuilder.add(Sym->Name);
-  StrTableBuilder.finalize();
-}
-
-Error MachOWriter::finalize() {
-  updateSizeOfCmds();
-  constructStringTable();
-
-  if (auto E = layout())
-    return E;
-
-  return Error::success();
-}
+Error MachOWriter::finalize() { return LayoutBuilder.layout(); }
 
 Error MachOWriter::write() {
   if (Error E = B.allocate(totalSize()))
     return E;
   memset(B.getBufferStart(), 0, totalSize());
   writeHeader();
-  updateSymbolIndexes();
   writeLoadCommands();
   writeSections();
   writeTail();