[mach-o] Add support for LC_DATA_IN_CODE
Sometimes compilers emit data into code sections (e.g. constant pools or
jump tables). These runs of data can throw off disassemblers. The solution
in mach-o is that ranges of data-in-code are encoded into a table pointed to
by the LC_DATA_IN_CODE load command.
The way the data-in-code information is encoded into lld's Atom model is that
that start and end of each data run is marked with a Reference whose offset
is the start/end of the data run. For arm, the switch back to code also marks
whether it is thumb or arm code.
llvm-svn: 213901
diff --git a/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp b/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp
index bedfe6d..adda9b7 100644
--- a/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp
+++ b/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp
@@ -102,6 +102,7 @@
void addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file);
void addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file);
void addSectionRelocs(const lld::File &, NormalizedFile &file);
+ void buildDataInCodeArray(const lld::File &, NormalizedFile &file);
void addDependentDylibs(const lld::File &, NormalizedFile &file);
void copyEntryPointAddress(NormalizedFile &file);
@@ -899,6 +900,46 @@
}
}
+void Util::buildDataInCodeArray(const lld::File &, NormalizedFile &file) {
+ for (SectionInfo *si : _sectionInfos) {
+ for (const AtomInfo &info : si->atomsAndOffsets) {
+ // Atoms that contain data-in-code have "transition" references
+ // which mark a point where the embedded data starts of ends.
+ // This needs to be converted to the mach-o format which is an array
+ // of data-in-code ranges.
+ uint32_t startOffset = 0;
+ DataRegionType mode = DataRegionType(0);
+ for (const Reference *ref : *info.atom) {
+ if (ref->kindNamespace() != Reference::KindNamespace::mach_o)
+ continue;
+ if (_archHandler.isDataInCodeTransition(ref->kindValue())) {
+ DataRegionType nextMode = (DataRegionType)ref->addend();
+ if (mode != nextMode) {
+ if (mode != 0) {
+ // Found end data range, so make range entry.
+ DataInCode entry;
+ entry.offset = si->address + info.offsetInSection + startOffset;
+ entry.length = ref->offsetInAtom() - startOffset;
+ entry.kind = mode;
+ file.dataInCode.push_back(entry);
+ }
+ }
+ mode = nextMode;
+ startOffset = ref->offsetInAtom();
+ }
+ }
+ if (mode != 0) {
+ // Function ends with data (no end transition).
+ DataInCode entry;
+ entry.offset = si->address + info.offsetInSection + startOffset;
+ entry.length = info.atom->size() - startOffset;
+ entry.kind = mode;
+ file.dataInCode.push_back(entry);
+ }
+ }
+ }
+}
+
void Util::addRebaseAndBindingInfo(const lld::File &atomFile,
NormalizedFile &nFile) {
if (_context.outputMachOType() == llvm::MachO::MH_OBJECT)
@@ -992,6 +1033,7 @@
util.addIndirectSymbols(atomFile, normFile);
util.addRebaseAndBindingInfo(atomFile, normFile);
util.addSectionRelocs(atomFile, normFile);
+ util.buildDataInCodeArray(atomFile, normFile);
util.copyEntryPointAddress(normFile);
return std::move(f);