[llvm-objcopy][MachO] Add support for LC_CODE_SIGNATURE

This diff adds support for copying binaries
containing a LC_CODE_SIGNATURE load command.

Test plan: make check-all

Differential revision: https://reviews.llvm.org/D81768
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
index 0ffe22a..4fc0001 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
@@ -216,10 +216,22 @@
 }
 
 Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
+  // If we are building the layout of an executable or dynamic library
+  // which does not have any segments other than __LINKEDIT,
+  // the Offset can be equal to zero by this time. It happens because of the
+  // convention that in such cases the file offsets specified by LC_SEGMENT start
+  // with zero (unlike the case of a relocatable object file).
+  const bool IsObject = O.Header.FileType == MachO::HeaderFileType::MH_OBJECT;
+  const uint64_t HeaderSize =
+      Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
+  assert((!IsObject || Offset >= HeaderSize + O.Header.SizeOfCmds) &&
+         "Incorrect tail offset");
+  Offset = std::max(Offset, HeaderSize + O.Header.SizeOfCmds);
+
   // The order of LINKEDIT elements is as follows:
   // rebase info, binding info, weak binding info, lazy binding info, export
   // trie, data-in-code, symbol table, indirect symbol table, symbol table
-  // strings.
+  // strings, code signature.
   uint64_t NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
   uint64_t StartOfLinkEdit = Offset;
   uint64_t StartOfRebaseInfo = StartOfLinkEdit;
@@ -238,8 +250,10 @@
   uint64_t StartOfSymbolStrings =
       StartOfIndirectSymbols +
       sizeof(uint32_t) * O.IndirectSymTable.Symbols.size();
+  uint64_t StartOfCodeSignature =
+      StartOfSymbolStrings + StrTableBuilder.getSize();
   uint64_t LinkEditSize =
-      (StartOfSymbolStrings + StrTableBuilder.getSize()) - StartOfLinkEdit;
+      (StartOfCodeSignature + O.CodeSignature.Data.size()) - StartOfLinkEdit;
 
   // Now we have determined the layout of the contents of the __LINKEDIT
   // segment. Update its load command.
@@ -265,6 +279,10 @@
     auto &MLC = LC.MachOLoadCommand;
     auto cmd = MLC.load_command_data.cmd;
     switch (cmd) {
+    case MachO::LC_CODE_SIGNATURE:
+      MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature;
+      MLC.linkedit_data_command_data.datasize = O.CodeSignature.Data.size();
+      break;
     case MachO::LC_SYMTAB:
       MLC.symtab_command_data.symoff = StartOfSymbols;
       MLC.symtab_command_data.nsyms = O.SymTable.Symbols.size();
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
index 39a8893..99bcec7 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
@@ -119,6 +119,9 @@
   for (auto LoadCmd : MachOObj.load_commands()) {
     LoadCommand LC;
     switch (LoadCmd.C.cmd) {
+    case MachO::LC_CODE_SIGNATURE:
+      O.CodeSignatureCommandIndex = O.LoadCommands.size();
+      break;
     case MachO::LC_SEGMENT:
       LC.Sections = extractSections<MachO::section, MachO::segment_command>(
           LoadCmd, MachOObj, NextSectionIndex);
@@ -247,26 +250,26 @@
   O.Exports.Trie = MachOObj.getDyldInfoExportsTrie();
 }
 
-void MachOReader::readDataInCodeData(Object &O) const {
-  if (!O.DataInCodeCommandIndex)
+void MachOReader::readLinkData(Object &O, Optional<size_t> LCIndex,
+                               LinkData &LD) const {
+  if (!LCIndex)
     return;
-  const MachO::linkedit_data_command &LDC =
-      O.LoadCommands[*O.DataInCodeCommandIndex]
-          .MachOLoadCommand.linkedit_data_command_data;
+  const MachO::linkedit_data_command &LC =
+      O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data;
+  LD.Data =
+      arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize));
+}
 
-  O.DataInCode.Data = arrayRefFromStringRef(
-      MachOObj.getData().substr(LDC.dataoff, LDC.datasize));
+void MachOReader::readCodeSignature(Object &O) const {
+  return readLinkData(O, O.CodeSignatureCommandIndex, O.CodeSignature);
+}
+
+void MachOReader::readDataInCodeData(Object &O) const {
+  return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode);
 }
 
 void MachOReader::readFunctionStartsData(Object &O) const {
-  if (!O.FunctionStartsCommandIndex)
-    return;
-  const MachO::linkedit_data_command &LDC =
-      O.LoadCommands[*O.FunctionStartsCommandIndex]
-          .MachOLoadCommand.linkedit_data_command_data;
-
-  O.FunctionStarts.Data = arrayRefFromStringRef(
-      MachOObj.getData().substr(LDC.dataoff, LDC.datasize));
+  return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts);
 }
 
 void MachOReader::readIndirectSymbolTable(Object &O) const {
@@ -316,6 +319,7 @@
   readWeakBindInfo(*Obj);
   readLazyBindInfo(*Obj);
   readExportInfo(*Obj);
+  readCodeSignature(*Obj);
   readDataInCodeData(*Obj);
   readFunctionStartsData(*Obj);
   readIndirectSymbolTable(*Obj);
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.h b/llvm/tools/llvm-objcopy/MachO/MachOReader.h
index a369907..65824b6 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOReader.h
+++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.h
@@ -36,6 +36,8 @@
   void readWeakBindInfo(Object &O) const;
   void readLazyBindInfo(Object &O) const;
   void readExportInfo(Object &O) const;
+  void readLinkData(Object &O, Optional<size_t> LCIndex, LinkData &LD) const;
+  void readCodeSignature(Object &O) const;
   void readDataInCodeData(Object &O) const;
   void readFunctionStartsData(Object &O) const;
   void readIndirectSymbolTable(Object &O) const;
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
index 1cb67d0..3c41e73 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
@@ -89,6 +89,15 @@
                      sizeof(uint32_t) * O.IndirectSymTable.Symbols.size());
   }
 
+  if (O.CodeSignatureCommandIndex) {
+    const MachO::linkedit_data_command &LinkEditDataCommand =
+        O.LoadCommands[*O.CodeSignatureCommandIndex]
+            .MachOLoadCommand.linkedit_data_command_data;
+    if (LinkEditDataCommand.dataoff)
+      Ends.push_back(LinkEditDataCommand.dataoff +
+                     LinkEditDataCommand.datasize);
+  }
+
   if (O.DataInCodeCommandIndex) {
     const MachO::linkedit_data_command &LinkEditDataCommand =
         O.LoadCommands[*O.DataInCodeCommandIndex]
@@ -381,28 +390,27 @@
   }
 }
 
-void MachOWriter::writeDataInCodeData() {
-  if (!O.DataInCodeCommandIndex)
+void MachOWriter::writeLinkData(Optional<size_t> LCIndex, const LinkData &LD) {
+  if (!LCIndex)
     return;
   const MachO::linkedit_data_command &LinkEditDataCommand =
-      O.LoadCommands[*O.DataInCodeCommandIndex]
-          .MachOLoadCommand.linkedit_data_command_data;
+      O.LoadCommands[*LCIndex].MachOLoadCommand.linkedit_data_command_data;
   char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
-  assert((LinkEditDataCommand.datasize == O.DataInCode.Data.size()) &&
-         "Incorrect data in code data size");
-  memcpy(Out, O.DataInCode.Data.data(), O.DataInCode.Data.size());
+  assert((LinkEditDataCommand.datasize == LD.Data.size()) &&
+         "Incorrect data size");
+  memcpy(Out, LD.Data.data(), LD.Data.size());
+}
+
+void MachOWriter::writeCodeSignatureData() {
+  return writeLinkData(O.CodeSignatureCommandIndex, O.CodeSignature);
+}
+
+void MachOWriter::writeDataInCodeData() {
+  return writeLinkData(O.DataInCodeCommandIndex, O.DataInCode);
 }
 
 void MachOWriter::writeFunctionStartsData() {
-  if (!O.FunctionStartsCommandIndex)
-    return;
-  const MachO::linkedit_data_command &LinkEditDataCommand =
-      O.LoadCommands[*O.FunctionStartsCommandIndex]
-          .MachOLoadCommand.linkedit_data_command_data;
-  char *Out = (char *)B.getBufferStart() + LinkEditDataCommand.dataoff;
-  assert((LinkEditDataCommand.datasize == O.FunctionStarts.Data.size()) &&
-         "Incorrect function starts data size");
-  memcpy(Out, O.FunctionStarts.Data.data(), O.FunctionStarts.Data.size());
+  return writeLinkData(O.FunctionStartsCommandIndex, O.FunctionStarts);
 }
 
 void MachOWriter::writeTail() {
@@ -450,6 +458,16 @@
                          &MachOWriter::writeIndirectSymbolTable);
   }
 
+  if (O.CodeSignatureCommandIndex) {
+    const MachO::linkedit_data_command &LinkEditDataCommand =
+        O.LoadCommands[*O.CodeSignatureCommandIndex]
+            .MachOLoadCommand.linkedit_data_command_data;
+
+    if (LinkEditDataCommand.dataoff)
+      Queue.emplace_back(LinkEditDataCommand.dataoff,
+                         &MachOWriter::writeCodeSignatureData);
+  }
+
   if (O.DataInCodeCommandIndex) {
     const MachO::linkedit_data_command &LinkEditDataCommand =
         O.LoadCommands[*O.DataInCodeCommandIndex]
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.h b/llvm/tools/llvm-objcopy/MachO/MachOWriter.h
index 22abbad..c2c6f5a 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.h
+++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.h
@@ -45,6 +45,8 @@
   void writeLazyBindInfo();
   void writeExportInfo();
   void writeIndirectSymbolTable();
+  void writeLinkData(Optional<size_t> LCIndex, const LinkData &LD);
+  void writeCodeSignatureData();
   void writeDataInCodeData();
   void writeFunctionStartsData();
   void writeTail();
diff --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/tools/llvm-objcopy/MachO/Object.h
index b9ecd1e..5da31d2 100644
--- a/llvm/tools/llvm-objcopy/MachO/Object.h
+++ b/llvm/tools/llvm-objcopy/MachO/Object.h
@@ -302,9 +302,12 @@
   IndirectSymbolTable IndirectSymTable;
   LinkData DataInCode;
   LinkData FunctionStarts;
+  LinkData CodeSignature;
 
   Optional<uint32_t> SwiftVersion;
 
+  /// The index of LC_CODE_SIGNATURE load command if present.
+  Optional<size_t> CodeSignatureCommandIndex;
   /// The index of LC_SYMTAB load command if present.
   Optional<size_t> SymTabCommandIndex;
   /// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present.