[llvm-objcopy] Implement IHEX writer

Differential revision: https://reviews.llvm.org/D60270

llvm-svn: 361949
diff --git a/llvm/tools/llvm-objcopy/CopyConfig.cpp b/llvm/tools/llvm-objcopy/CopyConfig.cpp
index 0b0023f..b138544 100644
--- a/llvm/tools/llvm-objcopy/CopyConfig.cpp
+++ b/llvm/tools/llvm-objcopy/CopyConfig.cpp
@@ -458,7 +458,8 @@
       return MI.takeError();
     Config.BinaryArch = *MI;
   }
-  if (!Config.OutputFormat.empty() && Config.OutputFormat != "binary") {
+  if (!Config.OutputFormat.empty() && Config.OutputFormat != "binary" &&
+      Config.OutputFormat != "ihex") {
     Expected<MachineInfo> MI = getOutputFormatMachineInfo(Config.OutputFormat);
     if (!MI)
       return MI.takeError();
diff --git a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
index be25bd5..efb8f05 100644
--- a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
@@ -130,12 +130,9 @@
     return MI.IsLittleEndian ? ELFT_ELF32LE : ELFT_ELF32BE;
 }
 
-static std::unique_ptr<Writer> createWriter(const CopyConfig &Config,
-                                            Object &Obj, Buffer &Buf,
-                                            ElfType OutputElfType) {
-  if (Config.OutputFormat == "binary") {
-    return llvm::make_unique<BinaryWriter>(Obj, Buf);
-  }
+static std::unique_ptr<Writer> createELFWriter(const CopyConfig &Config,
+                                               Object &Obj, Buffer &Buf,
+                                               ElfType OutputElfType) {
   // Depending on the initial ELFT and OutputFormat we need a different Writer.
   switch (OutputElfType) {
   case ELFT_ELF32LE:
@@ -154,6 +151,17 @@
   llvm_unreachable("Invalid output format");
 }
 
+static std::unique_ptr<Writer> createWriter(const CopyConfig &Config,
+                                            Object &Obj, Buffer &Buf,
+                                            ElfType OutputElfType) {
+  using Functor = std::function<std::unique_ptr<Writer>()>;
+  return StringSwitch<Functor>(Config.OutputFormat)
+      .Case("binary", [&] { return llvm::make_unique<BinaryWriter>(Obj, Buf); })
+      .Case("ihex", [&] { return llvm::make_unique<IHexWriter>(Obj, Buf); })
+      .Default(
+          [&] { return createELFWriter(Config, Obj, Buf, OutputElfType); })();
+}
+
 template <class ELFT>
 static Expected<ArrayRef<uint8_t>>
 findBuildID(const CopyConfig &Config, const object::ELFFile<ELFT> &In) {
@@ -714,6 +722,15 @@
   return Error::success();
 }
 
+static Error writeOutput(const CopyConfig &Config, Object &Obj, Buffer &Out,
+                         ElfType OutputElfType) {
+  std::unique_ptr<Writer> Writer =
+      createWriter(Config, Obj, Out, OutputElfType);
+  if (Error E = Writer->finalize())
+    return E;
+  return Writer->write();
+}
+
 Error executeObjcopyOnRawBinary(const CopyConfig &Config, MemoryBuffer &In,
                                 Buffer &Out) {
   BinaryReader Reader(Config.BinaryArch, &In);
@@ -721,15 +738,11 @@
 
   // Prefer OutputArch (-O<format>) if set, otherwise fallback to BinaryArch
   // (-B<arch>).
-  const ElfType OutputElfType = getOutputElfType(
-      Config.OutputArch ? Config.OutputArch.getValue() : Config.BinaryArch);
+  const ElfType OutputElfType =
+      getOutputElfType(Config.OutputArch.getValueOr(Config.BinaryArch));
   if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType))
     return E;
-  std::unique_ptr<Writer> Writer =
-      createWriter(Config, *Obj, Out, OutputElfType);
-  if (Error E = Writer->finalize())
-    return E;
-  return Writer->write();
+  return writeOutput(Config, *Obj, Out, OutputElfType);
 }
 
 Error executeObjcopyOnBinary(const CopyConfig &Config,
@@ -764,12 +777,8 @@
   if (Error E = handleArgs(Config, *Obj, Reader, OutputElfType))
     return createFileError(Config.InputFilename, std::move(E));
 
-  std::unique_ptr<Writer> Writer =
-      createWriter(Config, *Obj, Out, OutputElfType);
-  if (Error E = Writer->finalize())
+  if (Error E = writeOutput(Config, *Obj, Out, OutputElfType))
     return createFileError(Config.InputFilename, std::move(E));
-  if (Error E = Writer->write())
-    return E;
   if (!Config.BuildIdLinkDir.empty() && Config.BuildIdLinkOutput)
     if (Error E =
             linkToBuildIdDir(Config, Config.OutputFilename,
diff --git a/llvm/tools/llvm-objcopy/ELF/Object.cpp b/llvm/tools/llvm-objcopy/ELF/Object.cpp
index 85e7ffa..7a9a1bd 100644
--- a/llvm/tools/llvm-objcopy/ELF/Object.cpp
+++ b/llvm/tools/llvm-objcopy/ELF/Object.cpp
@@ -17,7 +17,7 @@
 #include "llvm/MC/MCTargetOptions.h"
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Support/Compression.h"
-#include "llvm/Support/Errc.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/Path.h"
@@ -147,6 +147,156 @@
     llvm::copy(Sec.Contents, Out.getBufferStart() + Sec.Offset);
 }
 
+static bool addressOverflows32bit(uint64_t Addr) {
+  // Sign extended 32 bit addresses (e.g 0xFFFFFFFF80000000) are ok
+  return Addr > UINT32_MAX && Addr + 0x80000000 > UINT32_MAX;
+}
+
+template <class T> static T checkedGetHex(StringRef S) {
+  T Value;
+  bool Fail = S.getAsInteger(16, Value);
+  assert(!Fail);
+  (void)Fail;
+  return Value;
+}
+
+// Fills exactly Len bytes of buffer with hexadecimal characters
+// representing value 'X'
+template <class T, class Iterator>
+static Iterator utohexstr(T X, Iterator It, size_t Len) {
+  // Fill range with '0'
+  std::fill(It, It + Len, '0');
+
+  for (long I = Len - 1; I >= 0; --I) {
+    unsigned char Mod = static_cast<unsigned char>(X) & 15;
+    *(It + I) = hexdigit(Mod, false);
+    X >>= 4;
+  }
+  assert(X == 0);
+  return It + Len;
+}
+
+uint8_t IHexRecord::getChecksum(StringRef S) {
+  assert((S.size() & 1) == 0);
+  uint8_t Checksum = 0;
+  while (!S.empty()) {
+    Checksum += checkedGetHex<uint8_t>(S.take_front(2));
+    S = S.drop_front(2);
+  }
+  return -Checksum;
+}
+
+IHexLineData IHexRecord::getLine(uint8_t Type, uint16_t Addr,
+                                 ArrayRef<uint8_t> Data) {
+  IHexLineData Line(getLineLength(Data.size()));
+  assert(Line.size());
+  auto Iter = Line.begin();
+  *Iter++ = ':';
+  Iter = utohexstr(Data.size(), Iter, 2);
+  Iter = utohexstr(Addr, Iter, 4);
+  Iter = utohexstr(Type, Iter, 2);
+  for (uint8_t X : Data)
+    Iter = utohexstr(X, Iter, 2);
+  StringRef S(Line.data() + 1, std::distance(Line.begin() + 1, Iter));
+  Iter = utohexstr(getChecksum(S), Iter, 2);
+  *Iter++ = '\r';
+  *Iter++ = '\n';
+  assert(Iter == Line.end());
+  return Line;
+}
+
+static uint64_t sectionPhysicalAddr(const SectionBase *Sec) {
+  Segment *Seg = Sec->ParentSegment;
+  if (Seg && Seg->Type != ELF::PT_LOAD)
+    Seg = nullptr;
+  return Seg ? Seg->PAddr + Sec->OriginalOffset - Seg->OriginalOffset
+             : Sec->Addr;
+}
+
+void IHexSectionWriterBase::writeSection(const SectionBase *Sec,
+                                         ArrayRef<uint8_t> Data) {
+  assert(Data.size() == Sec->Size);
+  const uint32_t ChunkSize = 16;
+  uint32_t Addr = sectionPhysicalAddr(Sec) & 0xFFFFFFFFU;
+  while (!Data.empty()) {
+    uint64_t DataSize = std::min<uint64_t>(Data.size(), ChunkSize);
+    if (Addr > SegmentAddr + BaseAddr + 0xFFFFU) {
+      if (Addr > 0xFFFFFU) {
+        // Write extended address record, zeroing segment address
+        // if needed.
+        if (SegmentAddr != 0)
+          SegmentAddr = writeSegmentAddr(0U);
+        BaseAddr = writeBaseAddr(Addr);
+      } else {
+        // We can still remain 16-bit
+        SegmentAddr = writeSegmentAddr(Addr);
+      }
+    }
+    uint64_t SegOffset = Addr - BaseAddr - SegmentAddr;
+    assert(SegOffset <= 0xFFFFU);
+    DataSize = std::min(DataSize, 0x10000U - SegOffset);
+    writeData(0, SegOffset, Data.take_front(DataSize));
+    Addr += DataSize;
+    Data = Data.drop_front(DataSize);
+  }
+}
+
+uint64_t IHexSectionWriterBase::writeSegmentAddr(uint64_t Addr) {
+  assert(Addr <= 0xFFFFFU);
+  uint8_t Data[] = {static_cast<uint8_t>((Addr & 0xF0000U) >> 12), 0};
+  writeData(2, 0, Data);
+  return Addr & 0xF0000U;
+}
+
+uint64_t IHexSectionWriterBase::writeBaseAddr(uint64_t Addr) {
+  assert(Addr <= 0xFFFFFFFFU);
+  uint64_t Base = Addr & 0xFFFF0000U;
+  uint8_t Data[] = {static_cast<uint8_t>(Base >> 24),
+                    static_cast<uint8_t>((Base >> 16) & 0xFF)};
+  writeData(4, 0, Data);
+  return Base;
+}
+
+void IHexSectionWriterBase::writeData(uint8_t Type, uint16_t Addr,
+                                      ArrayRef<uint8_t> Data) {
+  Offset += IHexRecord::getLineLength(Data.size());
+}
+
+void IHexSectionWriterBase::visit(const Section &Sec) {
+  writeSection(&Sec, Sec.Contents);
+}
+
+void IHexSectionWriterBase::visit(const OwnedDataSection &Sec) {
+  writeSection(&Sec, Sec.Data);
+}
+
+void IHexSectionWriterBase::visit(const StringTableSection &Sec) {
+  // Check that sizer has already done its work
+  assert(Sec.Size == Sec.StrTabBuilder.getSize());
+  // We are free to pass an invalid pointer to writeSection as long
+  // as we don't actually write any data. The real writer class has
+  // to override this method .
+  writeSection(&Sec, {nullptr, Sec.Size});
+}
+
+void IHexSectionWriterBase::visit(const DynamicRelocationSection &Sec) {
+  writeSection(&Sec, Sec.Contents);
+}
+
+void IHexSectionWriter::writeData(uint8_t Type, uint16_t Addr,
+                                  ArrayRef<uint8_t> Data) {
+  IHexLineData HexData = IHexRecord::getLine(Type, Addr, Data);
+  memcpy(Out.getBufferStart() + Offset, HexData.data(), HexData.size());
+  Offset += HexData.size();
+}
+
+void IHexSectionWriter::visit(const StringTableSection &Sec) {
+  assert(Sec.Size == Sec.StrTabBuilder.getSize());
+  std::vector<uint8_t> Data(Sec.Size);
+  Sec.StrTabBuilder.write(Data.data());
+  writeSection(&Sec, Data);
+}
+
 void Section::accept(SectionVisitor &Visitor) const { Visitor.visit(*this); }
 
 void Section::accept(MutableSectionVisitor &Visitor) { Visitor.visit(*this); }
@@ -217,6 +367,15 @@
   Visitor.visit(*this);
 }
 
+void OwnedDataSection::appendHexData(StringRef HexData) {
+  assert((HexData.size() & 1) == 0);
+  while (!HexData.empty()) {
+    Data.push_back(checkedGetHex<uint8_t>(HexData.take_front(2)));
+    HexData = HexData.drop_front(2);
+  }
+  Size = Data.size();
+}
+
 void BinarySectionWriter::visit(const CompressedSection &Sec) {
   error("cannot write compressed section '" + Sec.Name + "' ");
 }
@@ -1807,6 +1966,109 @@
   return Error::success();
 }
 
+bool IHexWriter::SectionCompare::operator()(const SectionBase *Lhs,
+                                            const SectionBase *Rhs) const {
+  return (sectionPhysicalAddr(Lhs) & 0xFFFFFFFFU) <
+         (sectionPhysicalAddr(Rhs) & 0xFFFFFFFFU);
+}
+
+uint64_t IHexWriter::writeEntryPointRecord(uint8_t *Buf) {
+  IHexLineData HexData;
+  uint8_t Data[4] = {};
+  // We don't write entry point record if entry is zero.
+  if (Obj.Entry == 0)
+    return 0;
+
+  if (Obj.Entry <= 0xFFFFFU) {
+    Data[0] = ((Obj.Entry & 0xF0000U) >> 12) & 0xFF;
+    support::endian::write(&Data[2], static_cast<uint16_t>(Obj.Entry),
+                           support::big);
+    HexData = IHexRecord::getLine(IHexRecord::StartAddr80x86, 0, Data);
+  } else {
+    support::endian::write(Data, static_cast<uint32_t>(Obj.Entry),
+                           support::big);
+    HexData = IHexRecord::getLine(IHexRecord::StartAddr, 0, Data);
+  }
+  memcpy(Buf, HexData.data(), HexData.size());
+  return HexData.size();
+}
+
+uint64_t IHexWriter::writeEndOfFileRecord(uint8_t *Buf) {
+  IHexLineData HexData = IHexRecord::getLine(IHexRecord::EndOfFile, 0, {});
+  memcpy(Buf, HexData.data(), HexData.size());
+  return HexData.size();
+}
+
+Error IHexWriter::write() {
+  IHexSectionWriter Writer(Buf);
+  // Write sections.
+  for (const SectionBase *Sec : Sections)
+    Sec->accept(Writer);
+
+  uint64_t Offset = Writer.getBufferOffset();
+  // Write entry point address.
+  Offset += writeEntryPointRecord(Buf.getBufferStart() + Offset);
+  // Write EOF.
+  Offset += writeEndOfFileRecord(Buf.getBufferStart() + Offset);
+  assert(Offset == TotalSize);
+  return Buf.commit();
+}
+
+Error IHexWriter::checkSection(const SectionBase &Sec) {
+  uint64_t Addr = sectionPhysicalAddr(&Sec);
+  if (addressOverflows32bit(Addr) || addressOverflows32bit(Addr + Sec.Size - 1))
+    return createStringError(
+        errc::invalid_argument,
+        "Section '%s' address range [%p, %p] is not 32 bit", Sec.Name.c_str(),
+        Addr, Addr + Sec.Size - 1);
+  return Error::success();
+}
+
+Error IHexWriter::finalize() {
+  bool UseSegments = false;
+  auto ShouldWrite = [](const SectionBase &Sec) {
+    return (Sec.Flags & ELF::SHF_ALLOC) && (Sec.Type != ELF::SHT_NOBITS);
+  };
+  auto IsInPtLoad = [](const SectionBase &Sec) {
+    return Sec.ParentSegment && Sec.ParentSegment->Type == ELF::PT_LOAD;
+  };
+
+  // We can't write 64-bit addresses.
+  if (addressOverflows32bit(Obj.Entry))
+    return createStringError(errc::invalid_argument,
+                             "Entry point address %p overflows 32 bits.",
+                             Obj.Entry);
+
+  // If any section we're to write has segment then we
+  // switch to using physical addresses. Otherwise we
+  // use section virtual address.
+  for (auto &Section : Obj.sections())
+    if (ShouldWrite(Section) && IsInPtLoad(Section)) {
+      UseSegments = true;
+      break;
+    }
+
+  for (auto &Section : Obj.sections())
+    if (ShouldWrite(Section) && (!UseSegments || IsInPtLoad(Section))) {
+      if (Error E = checkSection(Section))
+        return E;
+      Sections.insert(&Section);
+    }
+
+  IHexSectionWriterBase LengthCalc(Buf);
+  for (const SectionBase *Sec : Sections)
+    Sec->accept(LengthCalc);
+
+  // We need space to write section records + StartAddress record
+  // (if start adress is not zero) + EndOfFile record.
+  TotalSize = LengthCalc.getBufferOffset() +
+              (Obj.Entry ? IHexRecord::getLineLength(4) : 0) +
+              IHexRecord::getLineLength(0);
+  if (Error E = Buf.allocate(TotalSize))
+    return E;
+  return Error::success();
+}
+
 template class ELFBuilder<ELF64LE>;
 template class ELFBuilder<ELF64BE>;
 template class ELFBuilder<ELF32LE>;
diff --git a/llvm/tools/llvm-objcopy/ELF/Object.h b/llvm/tools/llvm-objcopy/ELF/Object.h
index 9298518..fabbb7f 100644
--- a/llvm/tools/llvm-objcopy/ELF/Object.h
+++ b/llvm/tools/llvm-objcopy/ELF/Object.h
@@ -17,6 +17,7 @@
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/MC/StringTableBuilder.h"
 #include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/FileOutputBuffer.h"
 #include <cstddef>
 #include <cstdint>
@@ -168,6 +169,8 @@
 
 #define MAKE_SEC_WRITER_FRIEND                                                 \
   friend class SectionWriter;                                                  \
+  friend class IHexSectionWriterBase;                                          \
+  friend class IHexSectionWriter;                                              \
   template <class ELFT> friend class ELFSectionWriter;                         \
   template <class ELFT> friend class ELFSectionSizer;
 
@@ -186,6 +189,114 @@
   explicit BinarySectionWriter(Buffer &Buf) : SectionWriter(Buf) {}
 };
 
+using IHexLineData = SmallVector<char, 64>;
+
+struct IHexRecord {
+  // Memory address of the record.
+  uint16_t Addr;
+  // Record type (see below).
+  uint16_t Type;
+  // Record data in hexadecimal form.
+  StringRef HexData;
+
+  // Helper method to get file length of the record
+  // including newline character
+  static size_t getLength(size_t DataSize) {
+    // :LLAAAATT[DD...DD]CC'
+    return DataSize * 2 + 11;
+  }
+
+  // Gets length of line in a file (getLength + CRLF).
+  static size_t getLineLength(size_t DataSize) {
+    return getLength(DataSize) + 2;
+  }
+
+  // Given type, address and data returns line which can
+  // be written to output file.
+  static IHexLineData getLine(uint8_t Type, uint16_t Addr,
+                              ArrayRef<uint8_t> Data);
+
+  // Calculates checksum of stringified record representation
+  // S must NOT contain leading ':' and trailing whitespace
+  // characters
+  static uint8_t getChecksum(StringRef S);
+
+  enum Type {
+    // Contains data and a 16-bit starting address for the data.
+    // The byte count specifies number of data bytes in the record.
+    Data = 0,
+    // Must occur exactly once per file in the last line of the file.
+    // The data field is empty (thus byte count is 00) and the address
+    // field is typically 0000.
+    EndOfFile = 1,
+    // The data field contains a 16-bit segment base address (thus byte
+    // count is always 02) compatible with 80x86 real mode addressing.
+    // The address field (typically 0000) is ignored. The segment address
+    // from the most recent 02 record is multiplied by 16 and added to each
+    // subsequent data record address to form the physical starting address
+    // for the data. This allows addressing up to one megabyte of address
+    // space.
+    SegmentAddr = 2,
+    // or 80x86 processors, specifies the initial content of the CS:IP
+    // registers. The address field is 0000, the byte count is always 04,
+    // the first two data bytes are the CS value, the latter two are the
+    // IP value.
+    StartAddr80x86 = 3,
+    // Allows for 32 bit addressing (up to 4GiB). The record's address field
+    // is ignored (typically 0000) and its byte count is always 02. The two
+    // data bytes (big endian) specify the upper 16 bits of the 32 bit
+    // absolute address for all subsequent type 00 records
+    ExtendedAddr = 4,
+    // The address field is 0000 (not used) and the byte count is always 04.
+    // The four data bytes represent a 32-bit address value. In the case of
+    // 80386 and higher CPUs, this address is loaded into the EIP register.
+    StartAddr = 5,
+    // We have no other valid types
+    InvalidType = 6
+  };
+};
+
+// Base class for IHexSectionWriter. This class implements writing algorithm,
+// but doesn't actually write records. It is used for output buffer size
+// calculation in IHexWriter::finalize.
+class IHexSectionWriterBase : public BinarySectionWriter {
+  // 20-bit segment address
+  uint32_t SegmentAddr = 0;
+  // Extended linear address
+  uint32_t BaseAddr = 0;
+
+  // Write segment address corresponding to 'Addr'
+  uint64_t writeSegmentAddr(uint64_t Addr);
+  // Write extended linear (base) address corresponding to 'Addr'
+  uint64_t writeBaseAddr(uint64_t Addr);
+
+protected:
+  // Offset in the output buffer
+  uint64_t Offset = 0;
+
+  void writeSection(const SectionBase *Sec, ArrayRef<uint8_t> Data);
+  virtual void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data);
+
+public:
+  explicit IHexSectionWriterBase(Buffer &Buf) : BinarySectionWriter(Buf) {}
+
+  uint64_t getBufferOffset() const { return Offset; }
+  void visit(const Section &Sec) final;
+  void visit(const OwnedDataSection &Sec) final;
+  void visit(const StringTableSection &Sec) override;
+  void visit(const DynamicRelocationSection &Sec) final;
+  using BinarySectionWriter::visit;
+};
+
+// Real IHEX section writer
+class IHexSectionWriter : public IHexSectionWriterBase {
+public:
+  IHexSectionWriter(Buffer &Buf) : IHexSectionWriterBase(Buf) {}
+
+  void writeData(uint8_t Type, uint16_t Addr, ArrayRef<uint8_t> Data) override;
+  void visit(const StringTableSection &Sec) override;
+};
+
 class Writer {
 protected:
   Object &Obj;
@@ -245,6 +356,25 @@
   BinaryWriter(Object &Obj, Buffer &Buf) : Writer(Obj, Buf) {}
 };
 
+class IHexWriter : public Writer {
+  struct SectionCompare {
+    bool operator()(const SectionBase *Lhs, const SectionBase *Rhs) const;
+  };
+
+  std::set<const SectionBase *, SectionCompare> Sections;
+  size_t TotalSize;
+
+  Error checkSection(const SectionBase &Sec);
+  uint64_t writeEntryPointRecord(uint8_t *Buf);
+  uint64_t writeEndOfFileRecord(uint8_t *Buf);
+
+public:
+  ~IHexWriter() {}
+  Error finalize() override;
+  Error write() override;
+  IHexWriter(Object &Obj, Buffer &Buf) : Writer(Obj, Buf) {}
+};
+
 class SectionBase {
 public:
   std::string Name;
@@ -361,6 +491,16 @@
     OriginalOffset = std::numeric_limits<uint64_t>::max();
   }
 
+  OwnedDataSection(const Twine &SecName, uint64_t SecAddr, uint64_t SecFlags,
+                   uint64_t SecOff) {
+    Name = SecName.str();
+    Type = ELF::SHT_PROGBITS;
+    Addr = SecAddr;
+    Flags = SecFlags;
+    OriginalOffset = SecOff;
+  }
+
+  void appendHexData(StringRef HexData);
   void accept(SectionVisitor &Sec) const override;
   void accept(MutableSectionVisitor &Visitor) override;
 };