This patch improves the MCJIT runtime dynamic loader by adding new handling
of zero-initialized sections, virtual sections and common symbols
and preventing the loading of sections which are not required for
execution such as debug information.

Patch by Andy Kaylor!



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154610 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 80110e8..44f89cf 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -74,9 +74,9 @@
   OS.flush();
 
   // Load the object into the dynamic linker.
-  // FIXME: It would be nice to avoid making yet another copy.
-  MemoryBuffer *MB = MemoryBuffer::getMemBufferCopy(StringRef(Buffer.data(),
-                                                              Buffer.size()));
+  MemoryBuffer *MB = MemoryBuffer::getMemBuffer(StringRef(Buffer.data(),
+                                                          Buffer.size()),
+                                                "", false);
   if (Dyld.loadObject(MB))
     report_fatal_error(Dyld.getErrorString());
   // Resolve any relocations.
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 7a2b858..63cec1a 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -36,10 +36,9 @@
   }
 } // end anonymous namespace
 
-
 // Resolve the relocations for all symbols we currently know about.
 void RuntimeDyldImpl::resolveRelocations() {
-  // First, resolve relocations assotiated with external symbols.
+  // First, resolve relocations associated with external symbols.
   resolveSymbols();
 
   // Just iterate over the sections we have and resolve all the relocations
@@ -63,14 +62,18 @@
 bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) {
   // FIXME: ObjectFile don't modify MemoryBuffer.
   //        It should use const MemoryBuffer as parameter.
-  ObjectFile *obj
-    = ObjectFile::createObjectFile(const_cast<MemoryBuffer*>(InputBuffer));
+  OwningPtr<ObjectFile> obj(ObjectFile::createObjectFile(
+                                       const_cast<MemoryBuffer*>(InputBuffer)));
+  if (!obj)
+    report_fatal_error("Unable to create object image from memory buffer!");
 
   Arch = (Triple::ArchType)obj->getArch();
 
   LocalSymbolMap LocalSymbols;     // Functions and data symbols from the
                                    // object file.
   ObjSectionToIDMap LocalSections; // Used sections from the object file
+  CommonSymbolMap   CommonSymbols; // Common symbols requiring allocation
+  uint64_t          CommonSize = 0;
 
   error_code err;
   // Parse symbols
@@ -83,36 +86,50 @@
     Check(i->getType(SymType));
     Check(i->getName(Name));
 
-    if (SymType == object::SymbolRef::ST_Function ||
-        SymType == object::SymbolRef::ST_Data) {
-      uint64_t FileOffset;
-      uint32_t flags;
-      StringRef sData;
-      section_iterator si = obj->end_sections();
-      Check(i->getFileOffset(FileOffset));
-      Check(i->getFlags(flags));
-      Check(i->getSection(si));
-      if (si == obj->end_sections()) continue;
-      Check(si->getContents(sData));
-      const uint8_t* SymPtr = (const uint8_t*)InputBuffer->getBufferStart() +
-                              (uintptr_t)FileOffset;
-      uintptr_t SectOffset = (uintptr_t)(SymPtr - (const uint8_t*)sData.begin());
-      unsigned SectionID
-        = findOrEmitSection(*si,
-                          SymType == object::SymbolRef::ST_Function,
-                          LocalSections);
-      bool isGlobal = flags & SymbolRef::SF_Global;
-      LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset);
-      DEBUG(dbgs() << "\tFileOffset: " << format("%p", (uintptr_t)FileOffset)
-                   << " flags: " << flags
-                   << " SID: " << SectionID
-                   << " Offset: " << format("%p", SectOffset));
-      if (isGlobal)
-        SymbolTable[Name] = SymbolLoc(SectionID, SectOffset);
+    uint32_t flags;
+    Check(i->getFlags(flags));
+
+    bool isCommon = flags & SymbolRef::SF_Common;
+    if (isCommon) {
+      // Add the common symbols to a list.  We'll allocate them all below.
+      uint64_t Size = 0;
+      Check(i->getSize(Size));
+      CommonSize += Size;
+      CommonSymbols[*i] = Size;
+    } else {
+      if (SymType == object::SymbolRef::ST_Function ||
+          SymType == object::SymbolRef::ST_Data) {
+        uint64_t FileOffset;
+        StringRef sData;
+        section_iterator si = obj->end_sections();
+        Check(i->getFileOffset(FileOffset));
+        Check(i->getSection(si));
+        if (si == obj->end_sections()) continue;
+        Check(si->getContents(sData));
+        const uint8_t* SymPtr = (const uint8_t*)InputBuffer->getBufferStart() +
+                                (uintptr_t)FileOffset;
+        uintptr_t SectOffset = (uintptr_t)(SymPtr - (const uint8_t*)sData.begin());
+        unsigned SectionID =
+          findOrEmitSection(*si,
+                            SymType == object::SymbolRef::ST_Function,
+                            LocalSections);
+        bool isGlobal = flags & SymbolRef::SF_Global;
+        LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset);
+        DEBUG(dbgs() << "\tFileOffset: " << format("%p", (uintptr_t)FileOffset)
+                     << " flags: " << flags
+                     << " SID: " << SectionID
+                     << " Offset: " << format("%p", SectOffset));
+        if (isGlobal)
+          SymbolTable[Name] = SymbolLoc(SectionID, SectOffset);
+      }
     }
     DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name << "\n");
   }
 
+  // Allocate common symbols
+  if (CommonSize != 0)
+    emitCommonSymbols(CommonSymbols, CommonSize, LocalSymbols);
+
   // Parse and proccess relocations
   DEBUG(dbgs() << "Parse relocations:\n");
   for (section_iterator si = obj->begin_sections(),
@@ -150,6 +167,38 @@
   return false;
 }
 
+unsigned RuntimeDyldImpl::emitCommonSymbols(const CommonSymbolMap &Map,
+                                            uint64_t TotalSize,
+                                            LocalSymbolMap &LocalSymbols) {
+  // Allocate memory for the section
+  unsigned SectionID = Sections.size();
+  uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, sizeof(void*),
+                                              SectionID);
+  if (!Addr)
+    report_fatal_error("Unable to allocate memory for common symbols!");
+  uint64_t Offset = 0;
+  Sections.push_back(SectionEntry(Addr, TotalSize, TotalSize, 0));
+  memset(Addr, 0, TotalSize);
+
+  DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID
+               << " new addr: " << format("%p", Addr)
+               << " DataSize: " << TotalSize
+               << "\n");
+
+  // Assign the address of each symbol
+  for (CommonSymbolMap::const_iterator it = Map.begin(), itEnd = Map.end();
+       it != itEnd; it++) {
+    uint64_t Size = it->second;
+    StringRef Name;
+    it->first.getName(Name);
+    LocalSymbols[Name.data()] = SymbolLoc(SectionID, Offset);
+    Offset += Size;
+    Addr += Size;
+  }
+
+  return SectionID;
+}
+
 unsigned RuntimeDyldImpl::emitSection(const SectionRef &Section,
                                       bool IsCode) {
 
@@ -158,7 +207,7 @@
   error_code err;
   if (StubSize > 0) {
     for (relocation_iterator i = Section.begin_relocations(),
-         e = Section.end_relocations(); i != e; i.increment(err))
+         e = Section.end_relocations(); i != e; i.increment(err), Check(err))
       StubBufSize += StubSize;
   }
   StringRef data;
@@ -167,22 +216,63 @@
   Check(Section.getAlignment(Alignment64));
 
   unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL;
-  unsigned DataSize = data.size();
-  unsigned Allocate = DataSize + StubBufSize;
-  unsigned SectionID = Sections.size();
-  const char *pData = data.data();
-  uint8_t *Addr = IsCode
-    ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID)
-    : MemMgr->allocateDataSection(Allocate, Alignment, SectionID);
+  bool IsRequired;
+  bool IsVirtual;
+  bool IsZeroInit;
+  uint64_t DataSize;
+  Check(Section.isRequiredForExecution(IsRequired));
+  Check(Section.isVirtual(IsVirtual));
+  Check(Section.isZeroInit(IsZeroInit));
+  Check(Section.getSize(DataSize));
 
-  memcpy(Addr, pData, DataSize);
-  DEBUG(dbgs() << "emitSection SectionID: " << SectionID
-               << " obj addr: " << format("%p", pData)
-               << " new addr: " << format("%p", Addr)
-               << " DataSize: " << DataSize
-               << " StubBufSize: " << StubBufSize
-               << " Allocate: " << Allocate
-               << "\n");
+  unsigned Allocate;
+  unsigned SectionID = Sections.size();
+  uint8_t *Addr;
+  const char *pData = 0;
+
+  // Some sections, such as debug info, don't need to be loaded for execution.
+  // Leave those where they are.
+  if (IsRequired) {
+    Allocate = DataSize + StubBufSize;
+    Addr = IsCode
+      ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID)
+      : MemMgr->allocateDataSection(Allocate, Alignment, SectionID);
+    if (!Addr)
+      report_fatal_error("Unable to allocate section memory!");
+
+    // Virtual sections have no data in the object image, so leave pData = 0
+    if (!IsVirtual)
+      pData = data.data();
+
+    // Zero-initialize or copy the data from the image
+    if (IsZeroInit || IsVirtual)
+      memset(Addr, 0, DataSize);
+    else
+      memcpy(Addr, pData, DataSize);
+
+    DEBUG(dbgs() << "emitSection SectionID: " << SectionID
+                 << " obj addr: " << format("%p", pData)
+                 << " new addr: " << format("%p", Addr)
+                 << " DataSize: " << DataSize
+                 << " StubBufSize: " << StubBufSize
+                 << " Allocate: " << Allocate
+                 << "\n");
+  }
+  else {
+    // Even if we didn't load the section, we need to record an entry for it
+    //   to handle later processing (and by 'handle' I mean don't do anything
+    //   with these sections).
+    Allocate = 0;
+    Addr = 0;
+    DEBUG(dbgs() << "emitSection SectionID: " << SectionID
+                 << " obj addr: " << format("%p", data.data())
+                 << " new addr: 0"
+                 << " DataSize: " << DataSize
+                 << " StubBufSize: " << StubBufSize
+                 << " Allocate: " << Allocate
+                 << "\n");
+  }
+
   Sections.push_back(SectionEntry(Addr, Allocate, DataSize,(uintptr_t)pData));
   return SectionID;
 }
@@ -259,15 +349,18 @@
 
 void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE,
                                              uint64_t Value) {
-    uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset;
-    DEBUG(dbgs() << "\tSectionID: " << RE.SectionID
-          << " + " << RE.Offset << " (" << format("%p", Target) << ")"
-          << " Data: " << RE.Data
-          << " Addend: " << RE.Addend
-          << "\n");
+    // Ignore relocations for sections that were not loaded
+    if (Sections[RE.SectionID].Address != 0) {
+      uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset;
+      DEBUG(dbgs() << "\tSectionID: " << RE.SectionID
+            << " + " << RE.Offset << " (" << format("%p", Target) << ")"
+            << " Data: " << RE.Data
+            << " Addend: " << RE.Addend
+            << "\n");
 
-    resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset,
-                      Value, RE.Data, RE.Addend);
+      resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset,
+                        Value, RE.Data, RE.Addend);
+  }
 }
 
 void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 9d46b21..57fefee 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -71,7 +71,8 @@
   switch (Type) {
   case ELF::R_386_32: {
     uint32_t *Target = (uint32_t*)(LocalAddress);
-    *Target = Value + Addend;
+    uint32_t Placeholder = *Target;
+    *Target = Placeholder + Value + Addend;
     break;
   }
   case ELF::R_386_PC32: {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index d6430a9..bf678af 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -110,6 +110,9 @@
   StringMap<SymbolLoc> SymbolTable;
   typedef DenseMap<const char*, SymbolLoc> LocalSymbolMap;
 
+  // Keep a map of common symbols to their sizes
+  typedef std::map<SymbolRef, unsigned> CommonSymbolMap;
+
   // For each symbol, keep a list of relocations based on it. Anytime
   // its address is reassigned (the JIT re-compiled the function, e.g.),
   // the relocations get re-resolved.
@@ -149,6 +152,12 @@
     return (uint8_t*)Sections[SectionID].Address;
   }
 
+  /// \brief Emits a section containing common symbols.
+  /// \return SectionID.
+  unsigned emitCommonSymbols(const CommonSymbolMap &Map,
+                             uint64_t TotalSize,
+                             LocalSymbolMap &Symbols);
+
   /// \brief Emits section data from the object file to the MemoryManager.
   /// \param IsCode if it's true then allocateCodeSection() will be
   ///        used for emmits, else allocateDataSection() will be used.
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 53b15d0..bd27a56 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -356,6 +356,27 @@
   return object_error::success;
 }
 
+error_code COFFObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
+                                                         bool &Result) const {
+  // FIXME: Unimplemented
+  Result = true;
+  return object_error::success;
+}
+
+error_code COFFObjectFile::isSectionVirtual(DataRefImpl Sec,
+                                           bool &Result) const {
+  const coff_section *sec = toSec(Sec);
+  Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+  return object_error::success;
+}
+
+error_code COFFObjectFile::isSectionZeroInit(DataRefImpl Sec,
+                                             bool &Result) const {
+  // FIXME: Unimplemented
+  Result = false;
+  return object_error::success;
+}
+
 error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec,
                                                  DataRefImpl Symb,
                                                  bool &Result) const {
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index ac90d5c..1078faa 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -175,7 +175,12 @@
     BeginOffset = Entry->Value;
     SectionIndex = Entry->SectionIndex;
     if (!SectionIndex) {
-      Result = UnknownAddressOrSize;
+      uint32_t flags = SymbolRef::SF_None;
+      getSymbolFlags(DRI, flags);
+      if (flags & SymbolRef::SF_Common)
+        Result = Entry->Value;
+      else
+        Result = UnknownAddressOrSize;
       return object_error::success;
     }
     // Unfortunately symbols are unsorted so we need to touch all
@@ -198,7 +203,12 @@
     BeginOffset = Entry->Value;
     SectionIndex = Entry->SectionIndex;
     if (!SectionIndex) {
-      Result = UnknownAddressOrSize;
+      uint32_t flags = SymbolRef::SF_None;
+      getSymbolFlags(DRI, flags);
+      if (flags & SymbolRef::SF_Common)
+        Result = Entry->Value;
+      else
+        Result = UnknownAddressOrSize;
       return object_error::success;
     }
     // Unfortunately symbols are unsorted so we need to touch all
@@ -265,19 +275,22 @@
                                            uint32_t &Result) const {
   uint16_t MachOFlags;
   uint8_t MachOType;
+  uint8_t MachOSectionIndex;
   if (MachOObj->is64Bit()) {
     InMemoryStruct<macho::Symbol64TableEntry> Entry;
     getSymbol64TableEntry(DRI, Entry);
     MachOFlags = Entry->Flags;
     MachOType = Entry->Type;
+    MachOSectionIndex = Entry->SectionIndex;
   } else {
     InMemoryStruct<macho::SymbolTableEntry> Entry;
     getSymbolTableEntry(DRI, Entry);
     MachOFlags = Entry->Flags;
     MachOType = Entry->Type;
+    MachOSectionIndex = Entry->SectionIndex;
   }
 
-  // TODO: Correctly set SF_ThreadLocal and SF_Common.
+  // TODO: Correctly set SF_ThreadLocal
   Result = SymbolRef::SF_None;
 
   if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined)
@@ -286,8 +299,11 @@
   if (MachOFlags & macho::STF_StabsEntryMask)
     Result |= SymbolRef::SF_FormatSpecific;
 
-  if (MachOType & MachO::NlistMaskExternal)
+  if (MachOType & MachO::NlistMaskExternal) {
     Result |= SymbolRef::SF_Global;
+    if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined)
+      Result |= SymbolRef::SF_Common;
+  }
 
   if (MachOFlags & (MachO::NListDescWeakRef | MachO::NListDescWeakDef))
     Result |= SymbolRef::SF_Weak;
@@ -566,6 +582,37 @@
   return object_error::success;
 }
 
+error_code MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
+                                                          bool &Result) const {
+  // FIXME: Unimplemented
+  Result = true;
+  return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionVirtual(DataRefImpl Sec,
+                                            bool &Result) const {
+  // FIXME: Unimplemented
+  Result = false;
+  return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionZeroInit(DataRefImpl DRI,
+                                              bool &Result) const {
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Section64> Sect;
+    getSection64(DRI, Sect);
+    Result = (Sect->Flags & MachO::SectionTypeZeroFill ||
+              Sect->Flags & MachO::SectionTypeZeroFillLarge);
+  } else {
+    InMemoryStruct<macho::Section> Sect;
+    getSection(DRI, Sect);
+    Result = (Sect->Flags & MachO::SectionTypeZeroFill ||
+              Sect->Flags & MachO::SectionTypeZeroFillLarge);
+  }
+
+  return object_error::success;
+}
+
 error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
                                                   DataRefImpl Symb,
                                                   bool &Result) const {