[llvm-objcopy] [COFF] Add support for removing sections

Differential Revision: https://reviews.llvm.org/D56683

llvm-svn: 351660
diff --git a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
index 437dccb..dd2e482 100644
--- a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
@@ -27,9 +27,17 @@
 using namespace COFF;
 
 static Error handleArgs(const CopyConfig &Config, Object &Obj) {
+  // Perform the actual section removals.
+  Obj.removeSections([&Config](const Section &Sec) {
+    if (is_contained(Config.ToRemove, Sec.Name))
+      return true;
+
+    return false;
+  });
+
   // StripAll removes all symbols and thus also removes all relocations.
   if (Config.StripAll || Config.StripAllGNU)
-    for (Section &Sec : Obj.Sections)
+    for (Section &Sec : Obj.getMutableSections())
       Sec.Relocs.clear();
 
   // If we need to do per-symbol removals, initialize the Referenced field.
diff --git a/llvm/tools/llvm-objcopy/COFF/Object.cpp b/llvm/tools/llvm-objcopy/COFF/Object.cpp
index e58e161..e19cea6 100644
--- a/llvm/tools/llvm-objcopy/COFF/Object.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/Object.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Object.h"
+#include "llvm/ADT/DenseSet.h"
 #include <algorithm>
 
 namespace llvm {
@@ -64,6 +65,68 @@
   return Error::success();
 }
 
+void Object::addSections(ArrayRef<Section> NewSections) {
+  for (Section S : NewSections) {
+    S.UniqueId = NextSectionUniqueId++;
+    Sections.emplace_back(S);
+  }
+  updateSections();
+}
+
+void Object::updateSections() {
+  SectionMap = DenseMap<ssize_t, Section *>(Sections.size());
+  size_t Index = 1;
+  for (Section &S : Sections) {
+    SectionMap[S.UniqueId] = &S;
+    S.Index = Index++;
+  }
+}
+
+const Section *Object::findSection(ssize_t UniqueId) const {
+  auto It = SectionMap.find(UniqueId);
+  if (It == SectionMap.end())
+    return nullptr;
+  return It->second;
+}
+
+void Object::removeSections(function_ref<bool(const Section &)> ToRemove) {
+  DenseSet<ssize_t> AssociatedSections;
+  auto RemoveAssociated = [&AssociatedSections](const Section &Sec) {
+    return AssociatedSections.count(Sec.UniqueId) == 1;
+  };
+  do {
+    DenseSet<ssize_t> RemovedSections;
+    Sections.erase(
+        std::remove_if(std::begin(Sections), std::end(Sections),
+                       [ToRemove, &RemovedSections](const Section &Sec) {
+                         bool Remove = ToRemove(Sec);
+                         if (Remove)
+                           RemovedSections.insert(Sec.UniqueId);
+                         return Remove;
+                       }),
+        std::end(Sections));
+    // Remove all symbols referring to the removed sections.
+    AssociatedSections.clear();
+    Symbols.erase(
+        std::remove_if(
+            std::begin(Symbols), std::end(Symbols),
+            [&RemovedSections, &AssociatedSections](const Symbol &Sym) {
+              // If there are sections that are associative to a removed
+              // section,
+              // remove those as well as nothing will include them (and we can't
+              // leave them dangling).
+              if (RemovedSections.count(Sym.AssociativeComdatTargetSectionId) ==
+                  1)
+                AssociatedSections.insert(Sym.TargetSectionId);
+              return RemovedSections.count(Sym.TargetSectionId) == 1;
+            }),
+        std::end(Symbols));
+    ToRemove = RemoveAssociated;
+  } while (!AssociatedSections.empty());
+  updateSections();
+  updateSymbols();
+}
+
 } // end namespace coff
 } // end namespace objcopy
 } // end namespace llvm
diff --git a/llvm/tools/llvm-objcopy/COFF/Object.h b/llvm/tools/llvm-objcopy/COFF/Object.h
index e6147c4..a73e936 100644
--- a/llvm/tools/llvm-objcopy/COFF/Object.h
+++ b/llvm/tools/llvm-objcopy/COFF/Object.h
@@ -37,12 +37,16 @@
   ArrayRef<uint8_t> Contents;
   std::vector<Relocation> Relocs;
   StringRef Name;
+  ssize_t UniqueId;
+  size_t Index;
 };
 
 struct Symbol {
   object::coff_symbol32 Sym;
   StringRef Name;
-  ArrayRef<uint8_t> AuxData;
+  std::vector<uint8_t> AuxData;
+  ssize_t TargetSectionId;
+  ssize_t AssociativeComdatTargetSectionId = 0;
   size_t UniqueId;
   size_t RawIndex;
   bool Referenced;
@@ -61,7 +65,6 @@
   uint32_t BaseOfData = 0; // pe32plus_header lacks this field.
 
   std::vector<object::data_directory> DataDirectories;
-  std::vector<Section> Sections;
 
   ArrayRef<Symbol> getSymbols() const { return Symbols; }
   // This allows mutating individual Symbols, but not mutating the list
@@ -79,14 +82,34 @@
   // all sections.
   Error markSymbols();
 
+  ArrayRef<Section> getSections() const { return Sections; }
+  // This allows mutating individual Sections, but not mutating the list
+  // of symbols itself.
+  iterator_range<std::vector<Section>::iterator> getMutableSections() {
+    return make_range(Sections.begin(), Sections.end());
+  }
+
+  const Section *findSection(ssize_t UniqueId) const;
+
+  void addSections(ArrayRef<Section> NewSections);
+  void removeSections(function_ref<bool(const Section &)> ToRemove);
+
 private:
   std::vector<Symbol> Symbols;
   DenseMap<size_t, Symbol *> SymbolMap;
 
   size_t NextSymbolUniqueId = 0;
 
+  std::vector<Section> Sections;
+  DenseMap<ssize_t, Section *> SectionMap;
+
+  ssize_t NextSectionUniqueId = 1; // Allow a UniqueId 0 to mean undefined.
+
   // Update SymbolMap and RawIndex in each Symbol.
   void updateSymbols();
+
+  // Update SectionMap and Index in each Section.
+  void updateSections();
 };
 
 // Copy between coff_symbol16 and coff_symbol32.
diff --git a/llvm/tools/llvm-objcopy/COFF/Reader.cpp b/llvm/tools/llvm-objcopy/COFF/Reader.cpp
index d794042..c8abe29 100644
--- a/llvm/tools/llvm-objcopy/COFF/Reader.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/Reader.cpp
@@ -11,6 +11,7 @@
 #include "llvm-objcopy.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/COFF.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cstddef>
@@ -21,6 +22,7 @@
 namespace coff {
 
 using namespace object;
+using namespace COFF;
 
 Error COFFReader::readExecutableHeaders(Object &Obj) const {
   const dos_header *DH = COFFObj.getDOSHeader();
@@ -58,13 +60,14 @@
 }
 
 Error COFFReader::readSections(Object &Obj) const {
+  std::vector<Section> Sections;
   // Section indexing starts from 1.
   for (size_t I = 1, E = COFFObj.getNumberOfSections(); I <= E; I++) {
     const coff_section *Sec;
     if (auto EC = COFFObj.getSection(I, Sec))
       return errorCodeToError(EC);
-    Obj.Sections.push_back(Section());
-    Section &S = Obj.Sections.back();
+    Sections.push_back(Section());
+    Section &S = Sections.back();
     S.Header = *Sec;
     if (auto EC = COFFObj.getSectionContents(Sec, S.Contents))
       return errorCodeToError(EC);
@@ -77,12 +80,14 @@
       return make_error<StringError>("Extended relocations not supported yet",
                                      object_error::parse_failed);
   }
+  Obj.addSections(Sections);
   return Error::success();
 }
 
 Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) const {
   std::vector<Symbol> Symbols;
   Symbols.reserve(COFFObj.getRawNumberOfSymbols());
+  ArrayRef<Section> Sections = Obj.getSections();
   for (uint32_t I = 0, E = COFFObj.getRawNumberOfSymbols(); I < E;) {
     Expected<COFFSymbolRef> SymOrErr = COFFObj.getSymbol(I);
     if (!SymOrErr)
@@ -103,6 +108,26 @@
     Sym.AuxData = COFFObj.getSymbolAuxData(SymRef);
     assert((Sym.AuxData.size() %
             (IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16))) == 0);
+    // Find the unique id of the section
+    if (SymRef.getSectionNumber() <=
+        0) // Special symbol (undefined/absolute/debug)
+      Sym.TargetSectionId = SymRef.getSectionNumber();
+    else if (static_cast<uint32_t>(SymRef.getSectionNumber() - 1) <
+             Sections.size())
+      Sym.TargetSectionId = Sections[SymRef.getSectionNumber() - 1].UniqueId;
+    else
+      return make_error<StringError>("Section number out of range",
+                                     object_error::parse_failed);
+    // For section definitions, check if it is comdat associative, and if
+    // it is, find the target section unique id.
+    const coff_aux_section_definition *SD = SymRef.getSectionDefinition();
+    if (SD && SD->Selection == IMAGE_COMDAT_SELECT_ASSOCIATIVE) {
+      int32_t Index = SD->getNumber(IsBigObj);
+      if (Index <= 0 || static_cast<uint32_t>(Index - 1) >= Sections.size())
+        return make_error<StringError>("Unexpected associative section index",
+                                       object_error::parse_failed);
+      Sym.AssociativeComdatTargetSectionId = Sections[Index - 1].UniqueId;
+    }
     I += 1 + SymRef.getNumberOfAuxSymbols();
   }
   Obj.addSymbols(Symbols);
@@ -116,7 +141,7 @@
     for (size_t I = 0; I < Sym.Sym.NumberOfAuxSymbols; I++)
       RawSymbolTable.push_back(nullptr);
   }
-  for (Section &Sec : Obj.Sections) {
+  for (Section &Sec : Obj.getMutableSections()) {
     for (Relocation &R : Sec.Relocs) {
       if (R.Reloc.SymbolTableIndex >= RawSymbolTable.size())
         return make_error<StringError>("SymbolTableIndex out of range",
diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.cpp b/llvm/tools/llvm-objcopy/COFF/Writer.cpp
index c347810..9fb7812 100644
--- a/llvm/tools/llvm-objcopy/COFF/Writer.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/Writer.cpp
@@ -25,7 +25,7 @@
 using namespace COFF;
 
 Error COFFWriter::finalizeRelocTargets() {
-  for (Section &Sec : Obj.Sections) {
+  for (Section &Sec : Obj.getMutableSections()) {
     for (Relocation &R : Sec.Relocs) {
       const Symbol *Sym = Obj.findSymbol(R.Target);
       if (Sym == nullptr)
@@ -39,8 +39,48 @@
   return Error::success();
 }
 
+Error COFFWriter::finalizeSectionNumbers() {
+  for (Symbol &Sym : Obj.getMutableSymbols()) {
+    if (Sym.TargetSectionId <= 0) {
+      // Undefined, or a special kind of symbol. These negative values
+      // are stored in the SectionNumber field which is unsigned.
+      Sym.Sym.SectionNumber = static_cast<uint32_t>(Sym.TargetSectionId);
+    } else {
+      const Section *Sec = Obj.findSection(Sym.TargetSectionId);
+      if (Sec == nullptr)
+        return make_error<StringError>("Symbol " + Sym.Name +
+                                           " points to a removed section",
+                                       object_error::invalid_symbol_index);
+      Sym.Sym.SectionNumber = Sec->Index;
+
+      if (Sym.Sym.NumberOfAuxSymbols == 1 &&
+          Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC) {
+        coff_aux_section_definition *SD =
+            reinterpret_cast<coff_aux_section_definition *>(Sym.AuxData.data());
+        uint32_t SDSectionNumber;
+        if (Sym.AssociativeComdatTargetSectionId == 0) {
+          // Not a comdat associative section; just set the Number field to
+          // the number of the section itself.
+          SDSectionNumber = Sec->Index;
+        } else {
+          Sec = Obj.findSection(Sym.AssociativeComdatTargetSectionId);
+          if (Sec == nullptr)
+            return make_error<StringError>(
+                "Symbol " + Sym.Name + " is associative to a removed section",
+                object_error::invalid_symbol_index);
+          SDSectionNumber = Sec->Index;
+        }
+        // Update the section definition with the new section number.
+        SD->NumberLowPart = static_cast<uint16_t>(SDSectionNumber);
+        SD->NumberHighPart = static_cast<uint16_t>(SDSectionNumber >> 16);
+      }
+    }
+  }
+  return Error::success();
+}
+
 void COFFWriter::layoutSections() {
-  for (auto &S : Obj.Sections) {
+  for (auto &S : Obj.getMutableSections()) {
     if (S.Header.SizeOfRawData > 0)
       S.Header.PointerToRawData = FileSize;
     FileSize += S.Header.SizeOfRawData; // For executables, this is already
@@ -57,7 +97,7 @@
 }
 
 size_t COFFWriter::finalizeStringTable() {
-  for (auto &S : Obj.Sections)
+  for (const auto &S : Obj.getSections())
     if (S.Name.size() > COFF::NameSize)
       StrTabBuilder.add(S.Name);
 
@@ -67,7 +107,7 @@
 
   StrTabBuilder.finalize();
 
-  for (auto &S : Obj.Sections) {
+  for (auto &S : Obj.getMutableSections()) {
     if (S.Name.size() > COFF::NameSize) {
       snprintf(S.Header.Name, sizeof(S.Header.Name), "/%d",
                (int)StrTabBuilder.getOffset(S.Name));
@@ -97,6 +137,8 @@
 Error COFFWriter::finalize(bool IsBigObj) {
   if (Error E = finalizeRelocTargets())
     return E;
+  if (Error E = finalizeSectionNumbers())
+    return E;
 
   size_t SizeOfHeaders = 0;
   FileAlignment = 1;
@@ -113,10 +155,10 @@
     SizeOfHeaders +=
         PeHeaderSize + sizeof(data_directory) * Obj.DataDirectories.size();
   }
-  Obj.CoffFileHeader.NumberOfSections = Obj.Sections.size();
+  Obj.CoffFileHeader.NumberOfSections = Obj.getSections().size();
   SizeOfHeaders +=
       IsBigObj ? sizeof(coff_bigobj_file_header) : sizeof(coff_file_header);
-  SizeOfHeaders += sizeof(coff_section) * Obj.Sections.size();
+  SizeOfHeaders += sizeof(coff_section) * Obj.getSections().size();
   SizeOfHeaders = alignTo(SizeOfHeaders, FileAlignment);
 
   Obj.CoffFileHeader.SizeOfOptionalHeader =
@@ -131,8 +173,8 @@
     Obj.PeHeader.SizeOfHeaders = SizeOfHeaders;
     Obj.PeHeader.SizeOfInitializedData = SizeOfInitializedData;
 
-    if (!Obj.Sections.empty()) {
-      const Section &S = Obj.Sections.back();
+    if (!Obj.getSections().empty()) {
+      const Section &S = Obj.getSections().back();
       Obj.PeHeader.SizeOfImage =
           alignTo(S.Header.VirtualAddress + S.Header.VirtualSize,
                   Obj.PeHeader.SectionAlignment);
@@ -198,7 +240,7 @@
     BigObjHeader.unused4 = 0;
     // The value in Obj.CoffFileHeader.NumberOfSections is truncated, thus
     // get the original one instead.
-    BigObjHeader.NumberOfSections = Obj.Sections.size();
+    BigObjHeader.NumberOfSections = Obj.getSections().size();
     BigObjHeader.PointerToSymbolTable = Obj.CoffFileHeader.PointerToSymbolTable;
     BigObjHeader.NumberOfSymbols = Obj.CoffFileHeader.NumberOfSymbols;
 
@@ -223,14 +265,14 @@
       Ptr += sizeof(DD);
     }
   }
-  for (const auto &S : Obj.Sections) {
+  for (const auto &S : Obj.getSections()) {
     memcpy(Ptr, &S.Header, sizeof(S.Header));
     Ptr += sizeof(S.Header);
   }
 }
 
 void COFFWriter::writeSections() {
-  for (const auto &S : Obj.Sections) {
+  for (const auto &S : Obj.getSections()) {
     uint8_t *Ptr = Buf.getBufferStart() + S.Header.PointerToRawData;
     std::copy(S.Contents.begin(), S.Contents.end(), Ptr);
 
@@ -295,7 +337,7 @@
   const data_directory *Dir = &Obj.DataDirectories[DEBUG_DIRECTORY];
   if (Dir->Size <= 0)
     return Error::success();
-  for (const auto &S : Obj.Sections) {
+  for (const auto &S : Obj.getSections()) {
     if (Dir->RelativeVirtualAddress >= S.Header.VirtualAddress &&
         Dir->RelativeVirtualAddress <
             S.Header.VirtualAddress + S.Header.SizeOfRawData) {
@@ -324,7 +366,7 @@
 }
 
 Error COFFWriter::write() {
-  bool IsBigObj = Obj.Sections.size() > MaxNumberOfSections16;
+  bool IsBigObj = Obj.getSections().size() > MaxNumberOfSections16;
   if (IsBigObj && Obj.IsPE)
     return make_error<StringError>("Too many sections for executable",
                                    object_error::parse_failed);
diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.h b/llvm/tools/llvm-objcopy/COFF/Writer.h
index 52fef38..a967a10 100644
--- a/llvm/tools/llvm-objcopy/COFF/Writer.h
+++ b/llvm/tools/llvm-objcopy/COFF/Writer.h
@@ -31,6 +31,7 @@
   StringTableBuilder StrTabBuilder;
 
   Error finalizeRelocTargets();
+  Error finalizeSectionNumbers();
   void layoutSections();
   size_t finalizeStringTable();
   template <class SymbolTy> std::pair<size_t, size_t> finalizeSymbolTable();