[mach-o] Add support for LC_DATA_IN_CODE

Sometimes compilers emit data into code sections (e.g. constant pools or
jump tables). These runs of data can throw off disassemblers.  The solution
in mach-o is that ranges of data-in-code are encoded into a table pointed to
by the LC_DATA_IN_CODE load command.

The way the data-in-code information is encoded into lld's Atom model is that
that start and end of each data run is marked with a Reference whose offset
is the start/end of the data run.  For arm, the switch back to code also marks
whether it is thumb or arm code.

llvm-svn: 213901
diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp
index bedfe6d..adda9b7 100644
--- a/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp
+++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp
@@ -102,6 +102,7 @@
   void      addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file);
   void      addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file);
   void      addSectionRelocs(const lld::File &, NormalizedFile &file);
+  void      buildDataInCodeArray(const lld::File &, NormalizedFile &file);
   void      addDependentDylibs(const lld::File &, NormalizedFile &file);
   void      copyEntryPointAddress(NormalizedFile &file);
 
@@ -899,6 +900,46 @@
   }
 }
 
+void Util::buildDataInCodeArray(const lld::File &, NormalizedFile &file) {
+  for (SectionInfo *si : _sectionInfos) {
+    for (const AtomInfo &info : si->atomsAndOffsets) {
+      // Atoms that contain data-in-code have "transition" references
+      // which mark a point where the embedded data starts of ends.
+      // This needs to be converted to the mach-o format which is an array
+      // of data-in-code ranges.
+      uint32_t startOffset = 0;
+      DataRegionType mode = DataRegionType(0);
+      for (const Reference *ref : *info.atom) {
+        if (ref->kindNamespace() != Reference::KindNamespace::mach_o)
+          continue;
+        if (_archHandler.isDataInCodeTransition(ref->kindValue())) {
+          DataRegionType nextMode = (DataRegionType)ref->addend();
+          if (mode != nextMode) {
+            if (mode != 0) {
+              // Found end data range, so make range entry.
+              DataInCode entry;
+              entry.offset = si->address + info.offsetInSection + startOffset;
+              entry.length = ref->offsetInAtom() - startOffset;
+              entry.kind   = mode;
+              file.dataInCode.push_back(entry);
+            }
+          }
+          mode = nextMode;
+          startOffset = ref->offsetInAtom();
+        }
+      }
+      if (mode != 0) {
+        // Function ends with data (no end transition).
+        DataInCode entry;
+        entry.offset = si->address + info.offsetInSection + startOffset;
+        entry.length = info.atom->size() - startOffset;
+        entry.kind   = mode;
+        file.dataInCode.push_back(entry);
+      }
+    }
+  }
+}
+
 void Util::addRebaseAndBindingInfo(const lld::File &atomFile,
                                                         NormalizedFile &nFile) {
   if (_context.outputMachOType() == llvm::MachO::MH_OBJECT)
@@ -992,6 +1033,7 @@
   util.addIndirectSymbols(atomFile, normFile);
   util.addRebaseAndBindingInfo(atomFile, normFile);
   util.addSectionRelocs(atomFile, normFile);
+  util.buildDataInCodeArray(atomFile, normFile);
   util.copyEntryPointAddress(normFile);
 
   return std::move(f);