[llvm-objcopy] Add ability to copy MachO object files

This diff implements first bits for copying (without modification) MachO object files.

Test plan: make check-all

Differential revision: https://reviews.llvm.org/D54674

llvm-svn: 352944
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
new file mode 100644
index 0000000..10dce52
--- /dev/null
+++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
@@ -0,0 +1,353 @@
+//===- MachOWriter.cpp ------------------------------------------*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOWriter.h"
+#include "../llvm-objcopy.h"
+#include "Object.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Object/MachO.h"
+#include <memory>
+
+namespace llvm {
+namespace objcopy {
+namespace macho {
+
+size_t MachOWriter::headerSize() const {
+  return Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
+}
+
+size_t MachOWriter::loadCommandsSize() const { return O.Header.SizeOfCmds; }
+
+size_t MachOWriter::symTableSize() const {
+  return O.SymTable.NameList.size() *
+         (Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist));
+}
+
+size_t MachOWriter::strTableSize() const {
+  size_t S = 0;
+  for (const auto &Str : O.StrTable.Strings)
+    S += Str.size();
+  S += (O.StrTable.Strings.empty() ? 0 : O.StrTable.Strings.size() - 1);
+  return S;
+}
+
+size_t MachOWriter::totalSize() const {
+  // Going from tail to head and looking for an appropriate "anchor" to
+  // calculate the total size assuming that all the offsets are either valid
+  // ("true") or 0 (0 indicates that the corresponding part is missing).
+
+  SmallVector<size_t, 7> Ends;
+  if (O.SymTabCommandIndex) {
+    const MachO::symtab_command &SymTabCommand =
+        O.LoadCommands[*O.SymTabCommandIndex]
+            .MachOLoadCommand.symtab_command_data;
+    if (SymTabCommand.symoff) {
+      assert((SymTabCommand.nsyms == O.SymTable.NameList.size()) &&
+             "Incorrect number of symbols");
+      Ends.push_back(SymTabCommand.symoff + symTableSize());
+    }
+    if (SymTabCommand.stroff) {
+      assert((SymTabCommand.strsize == strTableSize()) &&
+             "Incorrect string table size");
+      Ends.push_back(SymTabCommand.stroff + SymTabCommand.strsize);
+    }
+  }
+  if (O.DyLdInfoCommandIndex) {
+    const MachO::dyld_info_command &DyLdInfoCommand =
+        O.LoadCommands[*O.DyLdInfoCommandIndex]
+            .MachOLoadCommand.dyld_info_command_data;
+    if (DyLdInfoCommand.rebase_off) {
+      assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) &&
+             "Incorrect rebase opcodes size");
+      Ends.push_back(DyLdInfoCommand.rebase_off + DyLdInfoCommand.rebase_size);
+    }
+    if (DyLdInfoCommand.bind_off) {
+      assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) &&
+             "Incorrect bind opcodes size");
+      Ends.push_back(DyLdInfoCommand.bind_off + DyLdInfoCommand.bind_size);
+    }
+    if (DyLdInfoCommand.weak_bind_off) {
+      assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) &&
+             "Incorrect weak bind opcodes size");
+      Ends.push_back(DyLdInfoCommand.weak_bind_off +
+                     DyLdInfoCommand.weak_bind_size);
+    }
+    if (DyLdInfoCommand.lazy_bind_off) {
+      assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) &&
+             "Incorrect lazy bind opcodes size");
+      Ends.push_back(DyLdInfoCommand.lazy_bind_off +
+                     DyLdInfoCommand.lazy_bind_size);
+    }
+    if (DyLdInfoCommand.export_off) {
+      assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) &&
+             "Incorrect trie size");
+      Ends.push_back(DyLdInfoCommand.export_off + DyLdInfoCommand.export_size);
+    }
+  }
+
+  // Otherwise, use the last section / reloction.
+  for (const auto &LC : O.LoadCommands)
+    for (const auto &S : LC.Sections) {
+      Ends.push_back(S.Offset + S.Size);
+      if (S.RelOff)
+        Ends.push_back(S.RelOff +
+                       S.NReloc * sizeof(MachO::any_relocation_info));
+    }
+
+  if (!Ends.empty())
+    return *std::max_element(Ends.begin(), Ends.end());
+
+  // Otherwise, we have only Mach header and load commands.
+  return headerSize() + loadCommandsSize();
+}
+
+void MachOWriter::writeHeader() {
+  MachO::mach_header_64 Header;
+
+  Header.magic = O.Header.Magic;
+  Header.cputype = O.Header.CPUType;
+  Header.cpusubtype = O.Header.CPUSubType;
+  Header.filetype = O.Header.FileType;
+  Header.ncmds = O.Header.NCmds;
+  Header.sizeofcmds = O.Header.SizeOfCmds;
+  Header.flags = O.Header.Flags;
+  Header.reserved = O.Header.Reserved;
+
+  if (IsLittleEndian != sys::IsLittleEndianHost)
+    MachO::swapStruct(Header);
+
+  auto HeaderSize =
+      Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
+  memcpy(B.getBufferStart(), &Header, HeaderSize);
+}
+
+void MachOWriter::writeLoadCommands() {
+  uint8_t *Begin = B.getBufferStart() + headerSize();
+  MachO::macho_load_command MLC;
+  for (const auto &LC : O.LoadCommands) {
+#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct)                         \
+  case MachO::LCName:                                                          \
+    assert(sizeof(MachO::LCStruct) + LC.Payload.size() ==                      \
+           LC.MachOLoadCommand.load_command_data.cmdsize);                     \
+    MLC = LC.MachOLoadCommand;                                                 \
+    if (IsLittleEndian != sys::IsLittleEndianHost)                             \
+      MachO::swapStruct(MLC.LCStruct##_data);                                  \
+    memcpy(Begin, &MLC.LCStruct##_data, sizeof(MachO::LCStruct));              \
+    Begin += sizeof(MachO::LCStruct);                                          \
+    memcpy(Begin, LC.Payload.data(), LC.Payload.size());                       \
+    Begin += LC.Payload.size();                                                \
+    break;
+
+    switch (LC.MachOLoadCommand.load_command_data.cmd) {
+    default:
+      assert(sizeof(MachO::load_command) + LC.Payload.size() ==
+             LC.MachOLoadCommand.load_command_data.cmdsize);
+      MLC = LC.MachOLoadCommand;
+      if (IsLittleEndian != sys::IsLittleEndianHost)
+        MachO::swapStruct(MLC.load_command_data);
+      memcpy(Begin, &MLC.load_command_data, sizeof(MachO::load_command));
+      Begin += sizeof(MachO::load_command);
+      memcpy(Begin, LC.Payload.data(), LC.Payload.size());
+      Begin += LC.Payload.size();
+      break;
+#include "llvm/BinaryFormat/MachO.def"
+    }
+  }
+}
+
+void MachOWriter::writeSections() {
+  for (const auto &LC : O.LoadCommands)
+    for (const auto &Sec : LC.Sections) {
+      assert(Sec.Offset && "Section offset can not be zero");
+      assert((Sec.Size == Sec.Content.size()) && "Incorrect section size");
+      memcpy(B.getBufferStart() + Sec.Offset, Sec.Content.data(),
+             Sec.Content.size());
+      for (size_t Index = 0; Index < Sec.Relocations.size(); ++Index) {
+        MachO::any_relocation_info R = Sec.Relocations[Index];
+        if (IsLittleEndian != sys::IsLittleEndianHost)
+          MachO::swapStruct(R);
+        memcpy(B.getBufferStart() + Sec.RelOff +
+                   Index * sizeof(MachO::any_relocation_info),
+               &R, sizeof(R));
+      }
+    }
+}
+
+template <typename NListType>
+void writeNListEntry(const NListEntry &NLE, bool IsLittleEndian, char *&Out) {
+  NListType ListEntry;
+  ListEntry.n_strx = NLE.n_strx;
+  ListEntry.n_type = NLE.n_type;
+  ListEntry.n_sect = NLE.n_sect;
+  ListEntry.n_desc = NLE.n_desc;
+  ListEntry.n_value = NLE.n_value;
+
+  if (IsLittleEndian != sys::IsLittleEndianHost)
+    MachO::swapStruct(ListEntry);
+  memcpy(Out, reinterpret_cast<const char *>(&ListEntry), sizeof(NListType));
+  Out += sizeof(NListType);
+}
+
+void MachOWriter::writeSymbolTable() {
+  if (!O.SymTabCommandIndex)
+    return;
+  const MachO::symtab_command &SymTabCommand =
+      O.LoadCommands[*O.SymTabCommandIndex]
+          .MachOLoadCommand.symtab_command_data;
+  assert((SymTabCommand.nsyms == O.SymTable.NameList.size()) &&
+         "Incorrect number of symbols");
+  char *Out = (char *)B.getBufferStart() + SymTabCommand.symoff;
+  for (auto NLE : O.SymTable.NameList) {
+    if (Is64Bit)
+      writeNListEntry<MachO::nlist_64>(NLE, IsLittleEndian, Out);
+    else
+      writeNListEntry<MachO::nlist>(NLE, IsLittleEndian, Out);
+  }
+}
+
+void MachOWriter::writeStringTable() {
+  if (!O.SymTabCommandIndex)
+    return;
+  const MachO::symtab_command &SymTabCommand =
+      O.LoadCommands[*O.SymTabCommandIndex]
+          .MachOLoadCommand.symtab_command_data;
+  char *Out = (char *)B.getBufferStart() + SymTabCommand.stroff;
+  assert((SymTabCommand.strsize == strTableSize()) &&
+         "Incorrect string table size");
+  for (size_t Index = 0; Index < O.StrTable.Strings.size(); ++Index) {
+    memcpy(Out, O.StrTable.Strings[Index].data(),
+           O.StrTable.Strings[Index].size());
+    Out += O.StrTable.Strings[Index].size();
+    if (Index + 1 != O.StrTable.Strings.size()) {
+      memcpy(Out, "\0", 1);
+      Out += 1;
+    }
+  }
+}
+
+void MachOWriter::writeRebaseInfo() {
+  if (!O.DyLdInfoCommandIndex)
+    return;
+  const MachO::dyld_info_command &DyLdInfoCommand =
+      O.LoadCommands[*O.DyLdInfoCommandIndex]
+          .MachOLoadCommand.dyld_info_command_data;
+  char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.rebase_off;
+  assert((DyLdInfoCommand.rebase_size == O.Rebases.Opcodes.size()) &&
+         "Incorrect rebase opcodes size");
+  memcpy(Out, O.Rebases.Opcodes.data(), O.Rebases.Opcodes.size());
+}
+
+void MachOWriter::writeBindInfo() {
+  if (!O.DyLdInfoCommandIndex)
+    return;
+  const MachO::dyld_info_command &DyLdInfoCommand =
+      O.LoadCommands[*O.DyLdInfoCommandIndex]
+          .MachOLoadCommand.dyld_info_command_data;
+  char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.bind_off;
+  assert((DyLdInfoCommand.bind_size == O.Binds.Opcodes.size()) &&
+         "Incorrect bind opcodes size");
+  memcpy(Out, O.Binds.Opcodes.data(), O.Binds.Opcodes.size());
+}
+
+void MachOWriter::writeWeakBindInfo() {
+  if (!O.DyLdInfoCommandIndex)
+    return;
+  const MachO::dyld_info_command &DyLdInfoCommand =
+      O.LoadCommands[*O.DyLdInfoCommandIndex]
+          .MachOLoadCommand.dyld_info_command_data;
+  char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.weak_bind_off;
+  assert((DyLdInfoCommand.weak_bind_size == O.WeakBinds.Opcodes.size()) &&
+         "Incorrect weak bind opcodes size");
+  memcpy(Out, O.WeakBinds.Opcodes.data(), O.WeakBinds.Opcodes.size());
+}
+
+void MachOWriter::writeLazyBindInfo() {
+  if (!O.DyLdInfoCommandIndex)
+    return;
+  const MachO::dyld_info_command &DyLdInfoCommand =
+      O.LoadCommands[*O.DyLdInfoCommandIndex]
+          .MachOLoadCommand.dyld_info_command_data;
+  char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.lazy_bind_off;
+  assert((DyLdInfoCommand.lazy_bind_size == O.LazyBinds.Opcodes.size()) &&
+         "Incorrect lazy bind opcodes size");
+  memcpy(Out, O.LazyBinds.Opcodes.data(), O.LazyBinds.Opcodes.size());
+}
+
+void MachOWriter::writeExportInfo() {
+  if (!O.DyLdInfoCommandIndex)
+    return;
+  const MachO::dyld_info_command &DyLdInfoCommand =
+      O.LoadCommands[*O.DyLdInfoCommandIndex]
+          .MachOLoadCommand.dyld_info_command_data;
+  char *Out = (char *)B.getBufferStart() + DyLdInfoCommand.export_off;
+  assert((DyLdInfoCommand.export_size == O.Exports.Trie.size()) &&
+         "Incorrect export trie size");
+  memcpy(Out, O.Exports.Trie.data(), O.Exports.Trie.size());
+}
+
+void MachOWriter::writeTail() {
+  typedef void (MachOWriter::*WriteHandlerType)(void);
+  typedef std::pair<uint64_t, WriteHandlerType> WriteOperation;
+  SmallVector<WriteOperation, 7> Queue;
+
+  if (O.SymTabCommandIndex) {
+    const MachO::symtab_command &SymTabCommand =
+        O.LoadCommands[*O.SymTabCommandIndex]
+            .MachOLoadCommand.symtab_command_data;
+    if (SymTabCommand.symoff)
+      Queue.push_back({SymTabCommand.symoff, &MachOWriter::writeSymbolTable});
+    if (SymTabCommand.stroff)
+      Queue.push_back({SymTabCommand.stroff, &MachOWriter::writeStringTable});
+  }
+
+  if (O.DyLdInfoCommandIndex) {
+    const MachO::dyld_info_command &DyLdInfoCommand =
+        O.LoadCommands[*O.DyLdInfoCommandIndex]
+            .MachOLoadCommand.dyld_info_command_data;
+    if (DyLdInfoCommand.rebase_off)
+      Queue.push_back(
+          {DyLdInfoCommand.rebase_off, &MachOWriter::writeRebaseInfo});
+    if (DyLdInfoCommand.bind_off)
+      Queue.push_back({DyLdInfoCommand.bind_off, &MachOWriter::writeBindInfo});
+    if (DyLdInfoCommand.weak_bind_off)
+      Queue.push_back(
+          {DyLdInfoCommand.weak_bind_off, &MachOWriter::writeWeakBindInfo});
+    if (DyLdInfoCommand.lazy_bind_off)
+      Queue.push_back(
+          {DyLdInfoCommand.lazy_bind_off, &MachOWriter::writeLazyBindInfo});
+    if (DyLdInfoCommand.export_off)
+      Queue.push_back(
+          {DyLdInfoCommand.export_off, &MachOWriter::writeExportInfo});
+  }
+
+  llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) {
+    return LHS.first < RHS.first;
+  });
+
+  for (auto WriteOp : Queue)
+    (this->*WriteOp.second)();
+}
+
+Error MachOWriter::write() {
+  if (Error E = B.allocate(totalSize()))
+    return E;
+  memset(B.getBufferStart(), 0, totalSize());
+  writeHeader();
+  writeLoadCommands();
+  writeSections();
+  writeTail();
+  if (auto E = B.commit())
+    return E;
+  return Error::success();
+}
+
+} // end namespace macho
+} // end namespace objcopy
+} // end namespace llvm