[llvm-objcopy] [COFF] Fix handling of aux symbols for big objects

The aux symbols were stored in an opaque std::vector<uint8_t>,
with contents interpreted according to the rest of the symbol.

All aux symbol types but one fit in 18 bytes (sizeof(coff_symbol16)),
and if written to a bigobj, two extra padding bytes are written (as
sizeof(coff_symbol32) is 20). In the storage agnostic intermediate
representation, store the aux symbols as a series of coff_symbol16
sized opaque blobs. (In practice, all such aux symbols only consist
of one aux symbol, so this is more flexible than what reality needs.)

The special case is the file aux symbols, which are written in
potentially more than one aux symbol slot, without any padding,
as one single long string. This can't be stored in the same opaque
vector of fixed sized aux symbol entries. The file aux symbols will
occupy a different number of aux symbol slots depending on the type
of output object file. As nothing in the intermediate process needs
to have accurate raw symbol indices, updating that is moved into the
writer class.

Differential Revision: https://reviews.llvm.org/D57009

llvm-svn: 351947
diff --git a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
index 20adbe1..64b4e79 100644
--- a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
@@ -37,7 +37,7 @@
     return 0;
   const Section &Last = Obj.getSections().back();
   return alignTo(Last.Header.VirtualAddress + Last.Header.VirtualSize,
-                 Obj.PeHeader.SectionAlignment);
+                 Obj.IsPE ? Obj.PeHeader.SectionAlignment : 1);
 }
 
 static uint32_t getCRC32(StringRef Data) {
@@ -74,8 +74,8 @@
   Sec.Name = ".gnu_debuglink";
   Sec.Header.VirtualSize = Sec.getContents().size();
   Sec.Header.VirtualAddress = StartRVA;
-  Sec.Header.SizeOfRawData =
-      alignTo(Sec.Header.VirtualSize, Obj.PeHeader.FileAlignment);
+  Sec.Header.SizeOfRawData = alignTo(Sec.Header.VirtualSize,
+                                     Obj.IsPE ? Obj.PeHeader.FileAlignment : 1);
   // Sec.Header.PointerToRawData is filled in by the writer.
   Sec.Header.PointerToRelocations = 0;
   Sec.Header.PointerToLinenumbers = 0;
diff --git a/llvm/tools/llvm-objcopy/COFF/Object.cpp b/llvm/tools/llvm-objcopy/COFF/Object.cpp
index 8c382c1..0ad5a05 100644
--- a/llvm/tools/llvm-objcopy/COFF/Object.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/Object.cpp
@@ -26,12 +26,8 @@
 
 void Object::updateSymbols() {
   SymbolMap = DenseMap<size_t, Symbol *>(Symbols.size());
-  size_t RawSymIndex = 0;
-  for (Symbol &Sym : Symbols) {
+  for (Symbol &Sym : Symbols)
     SymbolMap[Sym.UniqueId] = &Sym;
-    Sym.RawIndex = RawSymIndex;
-    RawSymIndex += 1 + Sym.Sym.NumberOfAuxSymbols;
-  }
 }
 
 const Symbol *Object::findSymbol(size_t UniqueId) const {
diff --git a/llvm/tools/llvm-objcopy/COFF/Object.h b/llvm/tools/llvm-objcopy/COFF/Object.h
index afa2722..21475b0 100644
--- a/llvm/tools/llvm-objcopy/COFF/Object.h
+++ b/llvm/tools/llvm-objcopy/COFF/Object.h
@@ -66,10 +66,24 @@
   std::vector<uint8_t> OwnedContents;
 };
 
+struct AuxSymbol {
+  AuxSymbol(ArrayRef<uint8_t> In) {
+    assert(In.size() == sizeof(Opaque));
+    std::copy(In.begin(), In.end(), Opaque);
+  }
+
+  ArrayRef<uint8_t> getRef() const {
+    return ArrayRef<uint8_t>(Opaque, sizeof(Opaque));
+  }
+
+  uint8_t Opaque[sizeof(object::coff_symbol16)];
+};
+
 struct Symbol {
   object::coff_symbol32 Sym;
   StringRef Name;
-  std::vector<uint8_t> AuxData;
+  std::vector<AuxSymbol> AuxData;
+  StringRef AuxFile;
   ssize_t TargetSectionId;
   ssize_t AssociativeComdatTargetSectionId = 0;
   Optional<size_t> WeakTargetSymbolId;
@@ -132,7 +146,7 @@
 
   ssize_t NextSectionUniqueId = 1; // Allow a UniqueId 0 to mean undefined.
 
-  // Update SymbolMap and RawIndex in each Symbol.
+  // Update SymbolMap.
   void updateSymbols();
 
   // Update SectionMap and Index in each Section.
diff --git a/llvm/tools/llvm-objcopy/COFF/Reader.cpp b/llvm/tools/llvm-objcopy/COFF/Reader.cpp
index 87dd60a..7270bbf 100644
--- a/llvm/tools/llvm-objcopy/COFF/Reader.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/Reader.cpp
@@ -107,9 +107,24 @@
                  *reinterpret_cast<const coff_symbol16 *>(SymRef.getRawPtr()));
     if (auto EC = COFFObj.getSymbolName(SymRef, Sym.Name))
       return errorCodeToError(EC);
-    Sym.AuxData = COFFObj.getSymbolAuxData(SymRef);
-    assert((Sym.AuxData.size() %
-            (IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16))) == 0);
+
+    ArrayRef<uint8_t> AuxData = COFFObj.getSymbolAuxData(SymRef);
+    size_t SymSize = IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16);
+    assert(AuxData.size() == SymSize * SymRef.getNumberOfAuxSymbols());
+    // The auxillary symbols are structs of sizeof(coff_symbol16) each.
+    // In the big object format (where symbols are coff_symbol32), each
+    // auxillary symbol is padded with 2 bytes at the end. Copy each
+    // auxillary symbol to the Sym.AuxData vector. For file symbols,
+    // the whole range of aux symbols are interpreted as one null padded
+    // string instead.
+    if (SymRef.isFileRecord())
+      Sym.AuxFile = StringRef(reinterpret_cast<const char *>(AuxData.data()),
+                              AuxData.size())
+                        .rtrim('\0');
+    else
+      for (size_t I = 0; I < SymRef.getNumberOfAuxSymbols(); I++)
+        Sym.AuxData.push_back(AuxData.slice(I * SymSize, sizeof(AuxSymbol)));
+
     // Find the unique id of the section
     if (SymRef.getSectionNumber() <=
         0) // Special symbol (undefined/absolute/debug)
diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.cpp b/llvm/tools/llvm-objcopy/COFF/Writer.cpp
index db897e2..6e69c59 100644
--- a/llvm/tools/llvm-objcopy/COFF/Writer.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/Writer.cpp
@@ -55,7 +55,8 @@
       if (Sym.Sym.NumberOfAuxSymbols == 1 &&
           Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC) {
         coff_aux_section_definition *SD =
-            reinterpret_cast<coff_aux_section_definition *>(Sym.AuxData.data());
+            reinterpret_cast<coff_aux_section_definition *>(
+                Sym.AuxData[0].Opaque);
         uint32_t SDSectionNumber;
         if (Sym.AssociativeComdatTargetSectionId == 0) {
           // Not a comdat associative section; just set the Number field to
@@ -79,7 +80,7 @@
     // we want to set. Only >= 1 would be required, but only == 1 makes sense.
     if (Sym.WeakTargetSymbolId && Sym.Sym.NumberOfAuxSymbols == 1) {
       coff_aux_weak_external *WE =
-          reinterpret_cast<coff_aux_weak_external *>(Sym.AuxData.data());
+          reinterpret_cast<coff_aux_weak_external *>(Sym.AuxData[0].Opaque);
       const Symbol *Target = Obj.findSymbol(*Sym.WeakTargetSymbolId);
       if (Target == nullptr)
         return createStringError(object_error::invalid_symbol_index,
@@ -141,13 +142,26 @@
 
 template <class SymbolTy>
 std::pair<size_t, size_t> COFFWriter::finalizeSymbolTable() {
-  size_t SymTabSize = Obj.getSymbols().size() * sizeof(SymbolTy);
-  for (const auto &S : Obj.getSymbols())
-    SymTabSize += S.AuxData.size();
-  return std::make_pair(SymTabSize, sizeof(SymbolTy));
+  size_t RawSymIndex = 0;
+  for (auto &S : Obj.getMutableSymbols()) {
+    // Symbols normally have NumberOfAuxSymbols set correctly all the time.
+    // For file symbols, we need to know the output file's symbol size to be
+    // able to calculate the number of slots it occupies.
+    if (!S.AuxFile.empty())
+      S.Sym.NumberOfAuxSymbols =
+          alignTo(S.AuxFile.size(), sizeof(SymbolTy)) / sizeof(SymbolTy);
+    S.RawIndex = RawSymIndex;
+    RawSymIndex += 1 + S.Sym.NumberOfAuxSymbols;
+  }
+  return std::make_pair(RawSymIndex * sizeof(SymbolTy), sizeof(SymbolTy));
 }
 
 Error COFFWriter::finalize(bool IsBigObj) {
+  size_t SymTabSize, SymbolSize;
+  std::tie(SymTabSize, SymbolSize) = IsBigObj
+                                         ? finalizeSymbolTable<coff_symbol32>()
+                                         : finalizeSymbolTable<coff_symbol16>();
+
   if (Error E = finalizeRelocTargets())
     return E;
   if (Error E = finalizeSymbolContents())
@@ -199,10 +213,6 @@
   }
 
   size_t StrTabSize = finalizeStringTable();
-  size_t SymTabSize, SymbolSize;
-  std::tie(SymTabSize, SymbolSize) = IsBigObj
-                                         ? finalizeSymbolTable<coff_symbol32>()
-                                         : finalizeSymbolTable<coff_symbol16>();
 
   size_t PointerToSymbolTable = FileSize;
   // StrTabSize <= 4 is the size of an empty string table, only consisting
@@ -312,8 +322,23 @@
     copySymbol<SymbolTy, coff_symbol32>(*reinterpret_cast<SymbolTy *>(Ptr),
                                         S.Sym);
     Ptr += sizeof(SymbolTy);
-    std::copy(S.AuxData.begin(), S.AuxData.end(), Ptr);
-    Ptr += S.AuxData.size();
+    if (!S.AuxFile.empty()) {
+      // For file symbols, just write the string into the aux symbol slots,
+      // assuming that the unwritten parts are initialized to zero in the memory
+      // mapped file.
+      std::copy(S.AuxFile.begin(), S.AuxFile.end(), Ptr);
+      Ptr += S.Sym.NumberOfAuxSymbols * sizeof(SymbolTy);
+    } else {
+      // For other auxillary symbols, write their opaque payload into one symbol
+      // table slot each. For big object files, the symbols are larger than the
+      // opaque auxillary symbol struct and we leave padding at the end of each
+      // entry.
+      for (const AuxSymbol &AuxSym : S.AuxData) {
+        ArrayRef<uint8_t> Ref = AuxSym.getRef();
+        std::copy(Ref.begin(), Ref.end(), Ptr);
+        Ptr += sizeof(SymbolTy);
+      }
+    }
   }
   if (StrTabBuilder.getSize() > 4 || !Obj.IsPE) {
     // Always write a string table in object files, even an empty one.
diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.h b/llvm/tools/llvm-objcopy/COFF/Writer.h
index 9b1cfa9..681a8d5 100644
--- a/llvm/tools/llvm-objcopy/COFF/Writer.h
+++ b/llvm/tools/llvm-objcopy/COFF/Writer.h
@@ -30,11 +30,11 @@
   size_t SizeOfInitializedData;
   StringTableBuilder StrTabBuilder;
 
+  template <class SymbolTy> std::pair<size_t, size_t> finalizeSymbolTable();
   Error finalizeRelocTargets();
   Error finalizeSymbolContents();
   void layoutSections();
   size_t finalizeStringTable();
-  template <class SymbolTy> std::pair<size_t, size_t> finalizeSymbolTable();
 
   Error finalize(bool IsBigObj);