llvm-objcopy: Implement --extract-partition and --extract-main-partition.

This implements the functionality described in
https://lld.llvm.org/Partitions.html. It works as follows:

- Reads the section headers using the ELF header at file offset 0;
- If extracting a loadable partition:
  - Finds the section containing the required partition ELF header by looking it up in the section table;
  - Reads the ELF and program headers from the section.
- If extracting the main partition:
  - Reads the ELF and program headers from file offset 0.
- Filters the section table according to which sections are in the program headers that it read:
  - If ParentSegment != nullptr or section is not SHF_ALLOC, then it goes in.
  - Sections containing partition ELF headers or program headers are excluded as there are no headers for these in ordinary ELF files.

Differential Revision: https://reviews.llvm.org/D62364

llvm-svn: 362818
diff --git a/llvm/tools/llvm-objcopy/CopyConfig.cpp b/llvm/tools/llvm-objcopy/CopyConfig.cpp
index b138544..8500ff4 100644
--- a/llvm/tools/llvm-objcopy/CopyConfig.cpp
+++ b/llvm/tools/llvm-objcopy/CopyConfig.cpp
@@ -519,6 +519,8 @@
   Config.SymbolsPrefix = InputArgs.getLastArgValue(OBJCOPY_prefix_symbols);
   Config.AllocSectionsPrefix =
       InputArgs.getLastArgValue(OBJCOPY_prefix_alloc_sections);
+  if (auto Arg = InputArgs.getLastArg(OBJCOPY_extract_partition))
+    Config.ExtractPartition = Arg->getValue();
 
   for (auto Arg : InputArgs.filtered(OBJCOPY_redefine_symbol)) {
     if (!StringRef(Arg->getValue()).contains('='))
@@ -593,6 +595,8 @@
   Config.StripNonAlloc = InputArgs.hasArg(OBJCOPY_strip_non_alloc);
   Config.StripUnneeded = InputArgs.hasArg(OBJCOPY_strip_unneeded);
   Config.ExtractDWO = InputArgs.hasArg(OBJCOPY_extract_dwo);
+  Config.ExtractMainPartition =
+      InputArgs.hasArg(OBJCOPY_extract_main_partition);
   Config.LocalizeHidden = InputArgs.hasArg(OBJCOPY_localize_hidden);
   Config.Weaken = InputArgs.hasArg(OBJCOPY_weaken);
   if (InputArgs.hasArg(OBJCOPY_discard_all, OBJCOPY_discard_locals))
@@ -697,6 +701,11 @@
         errc::invalid_argument,
         "LLVM was not compiled with LLVM_ENABLE_ZLIB: cannot decompress");
 
+  if (Config.ExtractPartition && Config.ExtractMainPartition)
+    return createStringError(errc::invalid_argument,
+                             "cannot specify --extract-partition together with "
+                             "--extract-main-partition");
+
   DC.CopyConfigs.push_back(std::move(Config));
   return std::move(DC);
 }
diff --git a/llvm/tools/llvm-objcopy/CopyConfig.h b/llvm/tools/llvm-objcopy/CopyConfig.h
index 7607d3b..06b3efd 100644
--- a/llvm/tools/llvm-objcopy/CopyConfig.h
+++ b/llvm/tools/llvm-objcopy/CopyConfig.h
@@ -120,6 +120,7 @@
   StringRef BuildIdLinkDir;
   Optional<StringRef> BuildIdLinkInput;
   Optional<StringRef> BuildIdLinkOutput;
+  Optional<StringRef> ExtractPartition;
   StringRef SplitDWO;
   StringRef SymbolsPrefix;
   StringRef AllocSectionsPrefix;
@@ -155,6 +156,7 @@
   bool AllowBrokenLinks = false;
   bool DeterministicArchives = true;
   bool ExtractDWO = false;
+  bool ExtractMainPartition = false;
   bool KeepFileSymbols = false;
   bool LocalizeHidden = false;
   bool OnlyKeepDebug = false;
diff --git a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
index efb8f05..adc7a95 100644
--- a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
@@ -507,6 +507,16 @@
       return (Sec.Flags & SHF_ALLOC) == 0;
     };
 
+  if (Config.ExtractPartition || Config.ExtractMainPartition) {
+    RemovePred = [RemovePred, &Obj](const SectionBase &Sec) {
+      if (RemovePred(Sec))
+        return true;
+      if (Sec.Type == SHT_LLVM_PART_EHDR || Sec.Type == SHT_LLVM_PART_PHDR)
+        return true;
+      return (Sec.Flags & SHF_ALLOC) != 0 && !Sec.ParentSegment;
+    };
+  }
+
   // Explicit copies:
   if (!Config.OnlySection.empty()) {
     RemovePred = [&Config, RemovePred, &Obj](const SectionBase &Sec) {
@@ -747,7 +757,7 @@
 
 Error executeObjcopyOnBinary(const CopyConfig &Config,
                              object::ELFObjectFileBase &In, Buffer &Out) {
-  ELFReader Reader(&In);
+  ELFReader Reader(&In, Config.ExtractPartition);
   std::unique_ptr<Object> Obj = Reader.create();
   // Prefer OutputArch (-O<format>) if set, otherwise infer it from the input.
   const ElfType OutputElfType =
diff --git a/llvm/tools/llvm-objcopy/ELF/Object.cpp b/llvm/tools/llvm-objcopy/ELF/Object.cpp
index b654305..89f25a4 100644
--- a/llvm/tools/llvm-objcopy/ELF/Object.cpp
+++ b/llvm/tools/llvm-objcopy/ELF/Object.cpp
@@ -1101,21 +1101,36 @@
   }
 }
 
-template <class ELFT> void ELFBuilder<ELFT>::readProgramHeaders() {
+template <class ELFT> void ELFBuilder<ELFT>::findEhdrOffset() {
+  if (!ExtractPartition)
+    return;
+
+  for (const SectionBase &Section : Obj.sections()) {
+    if (Section.Type == SHT_LLVM_PART_EHDR &&
+        Section.Name == *ExtractPartition) {
+      EhdrOffset = Section.Offset;
+      return;
+    }
+  }
+  error("could not find partition named '" + *ExtractPartition + "'");
+}
+
+template <class ELFT>
+void ELFBuilder<ELFT>::readProgramHeaders(const ELFFile<ELFT> &HeadersFile) {
   uint32_t Index = 0;
-  for (const auto &Phdr : unwrapOrError(ElfFile.program_headers())) {
-    if (Phdr.p_offset + Phdr.p_filesz > ElfFile.getBufSize())
+  for (const auto &Phdr : unwrapOrError(HeadersFile.program_headers())) {
+    if (Phdr.p_offset + Phdr.p_filesz > HeadersFile.getBufSize())
       error("program header with offset 0x" + Twine::utohexstr(Phdr.p_offset) +
             " and file size 0x" + Twine::utohexstr(Phdr.p_filesz) +
             " goes past the end of the file");
 
-    ArrayRef<uint8_t> Data{ElfFile.base() + Phdr.p_offset,
+    ArrayRef<uint8_t> Data{HeadersFile.base() + Phdr.p_offset,
                            (size_t)Phdr.p_filesz};
     Segment &Seg = Obj.addSegment(Data);
     Seg.Type = Phdr.p_type;
     Seg.Flags = Phdr.p_flags;
-    Seg.OriginalOffset = Phdr.p_offset;
-    Seg.Offset = Phdr.p_offset;
+    Seg.OriginalOffset = Phdr.p_offset + EhdrOffset;
+    Seg.Offset = Phdr.p_offset + EhdrOffset;
     Seg.VAddr = Phdr.p_vaddr;
     Seg.PAddr = Phdr.p_paddr;
     Seg.FileSize = Phdr.p_filesz;
@@ -1135,8 +1150,9 @@
 
   auto &ElfHdr = Obj.ElfHdrSegment;
   ElfHdr.Index = Index++;
+  ElfHdr.OriginalOffset = ElfHdr.Offset = EhdrOffset;
 
-  const auto &Ehdr = *ElfFile.getHeader();
+  const auto &Ehdr = *HeadersFile.getHeader();
   auto &PrHdr = Obj.ProgramHdrSegment;
   PrHdr.Type = PT_PHDR;
   PrHdr.Flags = 0;
@@ -1144,7 +1160,7 @@
   // Whereas this works automatically for ElfHdr, here OriginalOffset is
   // always non-zero and to ensure the equation we assign the same value to
   // VAddr as well.
-  PrHdr.OriginalOffset = PrHdr.Offset = PrHdr.VAddr = Ehdr.e_phoff;
+  PrHdr.OriginalOffset = PrHdr.Offset = PrHdr.VAddr = EhdrOffset + Ehdr.e_phoff;
   PrHdr.PAddr = 0;
   PrHdr.FileSize = PrHdr.MemSize = Ehdr.e_phentsize * Ehdr.e_phnum;
   // The spec requires us to naturally align all the fields.
@@ -1363,7 +1379,9 @@
         ArrayRef<uint8_t>(ElfFile.base() + Shdr.sh_offset,
                           (Shdr.sh_type == SHT_NOBITS) ? 0 : Shdr.sh_size);
   }
+}
 
+template <class ELFT> void ELFBuilder<ELFT>::readSections() {
   // If a section index table exists we'll need to initialize it before we
   // initialize the symbol table because the symbol table might need to
   // reference it.
@@ -1397,23 +1415,8 @@
       initGroupSection(GroupSec);
     }
   }
-}
 
-template <class ELFT> void ELFBuilder<ELFT>::build() {
-  const auto &Ehdr = *ElfFile.getHeader();
-
-  Obj.OSABI = Ehdr.e_ident[EI_OSABI];
-  Obj.ABIVersion = Ehdr.e_ident[EI_ABIVERSION];
-  Obj.Type = Ehdr.e_type;
-  Obj.Machine = Ehdr.e_machine;
-  Obj.Version = Ehdr.e_version;
-  Obj.Entry = Ehdr.e_entry;
-  Obj.Flags = Ehdr.e_flags;
-
-  readSectionHeaders();
-  readProgramHeaders();
-
-  uint32_t ShstrIndex = Ehdr.e_shstrndx;
+  uint32_t ShstrIndex = ElfFile.getHeader()->e_shstrndx;
   if (ShstrIndex == SHN_XINDEX)
     ShstrIndex = unwrapOrError(ElfFile.getSection(0))->sh_link;
 
@@ -1423,10 +1426,33 @@
     Obj.SectionNames =
         Obj.sections().template getSectionOfType<StringTableSection>(
             ShstrIndex,
-            "e_shstrndx field value " + Twine(Ehdr.e_shstrndx) +
-                " in elf header is invalid",
-            "e_shstrndx field value " + Twine(Ehdr.e_shstrndx) +
-                " in elf header is not a string table");
+            "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " +
+                " is invalid",
+            "e_shstrndx field value " + Twine(ShstrIndex) + " in elf header " +
+                " is not a string table");
+}
+
+template <class ELFT> void ELFBuilder<ELFT>::build() {
+  readSectionHeaders();
+  findEhdrOffset();
+
+  // The ELFFile whose ELF headers and program headers are copied into the
+  // output file. Normally the same as ElfFile, but if we're extracting a
+  // loadable partition it will point to the partition's headers.
+  ELFFile<ELFT> HeadersFile = unwrapOrError(ELFFile<ELFT>::create(toStringRef(
+      {ElfFile.base() + EhdrOffset, ElfFile.getBufSize() - EhdrOffset})));
+
+  auto &Ehdr = *HeadersFile.getHeader();
+  Obj.OSABI = Ehdr.e_ident[EI_OSABI];
+  Obj.ABIVersion = Ehdr.e_ident[EI_ABIVERSION];
+  Obj.Type = Ehdr.e_type;
+  Obj.Machine = Ehdr.e_machine;
+  Obj.Version = Ehdr.e_version;
+  Obj.Entry = Ehdr.e_entry;
+  Obj.Flags = Ehdr.e_flags;
+
+  readSections();
+  readProgramHeaders(HeadersFile);
 }
 
 Writer::~Writer() {}
@@ -1440,19 +1466,19 @@
 std::unique_ptr<Object> ELFReader::create() const {
   auto Obj = llvm::make_unique<Object>();
   if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Bin)) {
-    ELFBuilder<ELF32LE> Builder(*O, *Obj);
+    ELFBuilder<ELF32LE> Builder(*O, *Obj, ExtractPartition);
     Builder.build();
     return Obj;
   } else if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(Bin)) {
-    ELFBuilder<ELF64LE> Builder(*O, *Obj);
+    ELFBuilder<ELF64LE> Builder(*O, *Obj, ExtractPartition);
     Builder.build();
     return Obj;
   } else if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(Bin)) {
-    ELFBuilder<ELF32BE> Builder(*O, *Obj);
+    ELFBuilder<ELF32BE> Builder(*O, *Obj, ExtractPartition);
     Builder.build();
     return Obj;
   } else if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(Bin)) {
-    ELFBuilder<ELF64BE> Builder(*O, *Obj);
+    ELFBuilder<ELF64BE> Builder(*O, *Obj, ExtractPartition);
     Builder.build();
     return Obj;
   }
@@ -1732,7 +1758,6 @@
   Segment &ElfHdr = Obj.ElfHdrSegment;
   ElfHdr.Type = PT_PHDR;
   ElfHdr.Flags = 0;
-  ElfHdr.OriginalOffset = ElfHdr.Offset = 0;
   ElfHdr.VAddr = 0;
   ElfHdr.PAddr = 0;
   ElfHdr.FileSize = ElfHdr.MemSize = sizeof(Elf_Ehdr);
diff --git a/llvm/tools/llvm-objcopy/ELF/Object.h b/llvm/tools/llvm-objcopy/ELF/Object.h
index fabbb7f..af37a2b 100644
--- a/llvm/tools/llvm-objcopy/ELF/Object.h
+++ b/llvm/tools/llvm-objcopy/ELF/Object.h
@@ -889,17 +889,23 @@
 
   const ELFFile<ELFT> &ElfFile;
   Object &Obj;
+  uint64_t EhdrOffset = 0;
+  Optional<StringRef> ExtractPartition;
 
   void setParentSegment(Segment &Child);
-  void readProgramHeaders();
+  void readProgramHeaders(const ELFFile<ELFT> &HeadersFile);
   void initGroupSection(GroupSection *GroupSec);
   void initSymbolTable(SymbolTableSection *SymTab);
   void readSectionHeaders();
+  void readSections();
+  void findEhdrOffset();
   SectionBase &makeSection(const Elf_Shdr &Shdr);
 
 public:
-  ELFBuilder(const ELFObjectFile<ELFT> &ElfObj, Object &Obj)
-      : ElfFile(*ElfObj.getELFFile()), Obj(Obj) {}
+  ELFBuilder(const ELFObjectFile<ELFT> &ElfObj, Object &Obj,
+             Optional<StringRef> ExtractPartition)
+      : ElfFile(*ElfObj.getELFFile()), Obj(Obj),
+        ExtractPartition(ExtractPartition) {}
 
   void build();
 };
@@ -916,10 +922,12 @@
 
 class ELFReader : public Reader {
   Binary *Bin;
+  Optional<StringRef> ExtractPartition;
 
 public:
   std::unique_ptr<Object> create() const override;
-  explicit ELFReader(Binary *B) : Bin(B) {}
+  explicit ELFReader(Binary *B, Optional<StringRef> ExtractPartition)
+      : Bin(B), ExtractPartition(ExtractPartition) {}
 };
 
 class Object {
diff --git a/llvm/tools/llvm-objcopy/ObjcopyOpts.td b/llvm/tools/llvm-objcopy/ObjcopyOpts.td
index 33545a5..c97c356 100644
--- a/llvm/tools/llvm-objcopy/ObjcopyOpts.td
+++ b/llvm/tools/llvm-objcopy/ObjcopyOpts.td
@@ -145,6 +145,13 @@
       HelpText<
           "Remove all sections that are not DWARF .dwo sections from file">;
 
+defm extract_partition
+    : Eq<"extract-partition", "Extract named partition from input file">,
+      MetaVarName<"name">;
+def extract_main_partition
+    : Flag<["--"], "extract-main-partition">,
+      HelpText<"Extract main partition from the input file">;
+
 def localize_hidden
     : Flag<["--"], "localize-hidden">,
       HelpText<