blob: 2cfe87f37b0c24e2b369343d940261aa3f5d8d86 [file] [log] [blame]
Jim Grosbach6e563312011-03-21 22:15:52 +00001//===-- RuntimeDyld.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Implementation of the MC-JIT runtime dynamic linker.
11//
12//===----------------------------------------------------------------------===//
13
Jim Grosbach8b54dca2011-03-23 19:52:00 +000014#define DEBUG_TYPE "dyld"
Jim Grosbach6e563312011-03-21 22:15:52 +000015#include "llvm/ADT/OwningPtr.h"
Jim Grosbach8b54dca2011-03-23 19:52:00 +000016#include "llvm/ADT/SmallVector.h"
Jim Grosbach6e563312011-03-21 22:15:52 +000017#include "llvm/ADT/StringMap.h"
18#include "llvm/ADT/StringRef.h"
Jim Grosbachc41ab782011-04-06 01:11:05 +000019#include "llvm/ADT/STLExtras.h"
Jim Grosbach6e563312011-03-21 22:15:52 +000020#include "llvm/ADT/Twine.h"
21#include "llvm/ExecutionEngine/RuntimeDyld.h"
22#include "llvm/Object/MachOObject.h"
Jim Grosbach8b54dca2011-03-23 19:52:00 +000023#include "llvm/Support/Debug.h"
24#include "llvm/Support/ErrorHandling.h"
25#include "llvm/Support/Format.h"
Jim Grosbach6e563312011-03-21 22:15:52 +000026#include "llvm/Support/Memory.h"
27#include "llvm/Support/MemoryBuffer.h"
28#include "llvm/Support/system_error.h"
Jim Grosbach8b54dca2011-03-23 19:52:00 +000029#include "llvm/Support/raw_ostream.h"
Jim Grosbach6e563312011-03-21 22:15:52 +000030using namespace llvm;
31using namespace llvm::object;
32
Chandler Carruth53c5e7b2011-04-05 23:54:31 +000033// Empty out-of-line virtual destructor as the key function.
34RTDyldMemoryManager::~RTDyldMemoryManager() {}
35
Jim Grosbach6e563312011-03-21 22:15:52 +000036namespace llvm {
37class RuntimeDyldImpl {
Jim Grosbacha8287e32011-03-23 22:06:06 +000038 unsigned CPUType;
39 unsigned CPUSubtype;
40
Jim Grosbachfcbe5b72011-04-04 23:04:39 +000041 // The MemoryManager to load objects into.
42 RTDyldMemoryManager *MemMgr;
Jim Grosbach5acfa9f2011-03-29 21:03:05 +000043
Jim Grosbachf8c1c842011-04-12 21:20:41 +000044 // FIXME: This all assumes we're dealing with external symbols for anything
45 // explicitly referenced. I.e., we can index by name and things
46 // will work out. In practice, this may not be the case, so we
47 // should find a way to effectively generalize.
Jim Grosbachc41ab782011-04-06 01:11:05 +000048
49 // For each function, we have a MemoryBlock of it's instruction data.
50 StringMap<sys::MemoryBlock> Functions;
51
Jim Grosbach6e563312011-03-21 22:15:52 +000052 // Master symbol table. As modules are loaded and external symbols are
53 // resolved, their addresses are stored here.
Jim Grosbachf8c1c842011-04-12 21:20:41 +000054 StringMap<uint8_t*> SymbolTable;
Jim Grosbach6e563312011-03-21 22:15:52 +000055
Jim Grosbachf8c1c842011-04-12 21:20:41 +000056 // For each symbol, keep a list of relocations based on it. Anytime
57 // its address is reassigned (the JIT re-compiled the function, e.g.),
58 // the relocations get re-resolved.
59 struct RelocationEntry {
60 std::string Target; // Object this relocation is contained in.
61 uint64_t Offset; // Offset into the object for the relocation.
62 uint32_t Data; // Second word of the raw macho relocation entry.
63 int64_t Addend; // Addend encoded in the instruction itself, if any.
64 bool isResolved; // Has this relocation been resolved previously?
65
66 RelocationEntry(StringRef t, uint64_t offset, uint32_t data, int64_t addend)
67 : Target(t), Offset(offset), Data(data), Addend(addend),
68 isResolved(false) {}
69 };
70 typedef SmallVector<RelocationEntry, 4> RelocationList;
71 StringMap<RelocationList> Relocations;
72
73 // FIXME: Also keep a map of all the relocations contained in an object. Use
74 // this to dynamically answer whether all of the relocations in it have
75 // been resolved or not.
Jim Grosbach6e563312011-03-21 22:15:52 +000076
77 bool HasError;
78 std::string ErrorStr;
79
80 // Set the error state and record an error string.
81 bool Error(const Twine &Msg) {
82 ErrorStr = Msg.str();
83 HasError = true;
84 return true;
85 }
86
Jim Grosbachc41ab782011-04-06 01:11:05 +000087 void extractFunction(StringRef Name, uint8_t *StartAddress,
88 uint8_t *EndAddress);
Jim Grosbachf8c1c842011-04-12 21:20:41 +000089 bool resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
90 unsigned Type, unsigned Size);
91 bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
Jim Grosbacha8287e32011-03-23 22:06:06 +000092 unsigned Type, unsigned Size);
Jim Grosbachf8c1c842011-04-12 21:20:41 +000093 bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
Jim Grosbacha8287e32011-03-23 22:06:06 +000094 unsigned Type, unsigned Size);
Jim Grosbach8b54dca2011-03-23 19:52:00 +000095
Jim Grosbach6e563312011-03-21 22:15:52 +000096 bool loadSegment32(const MachOObject *Obj,
97 const MachOObject::LoadCommandInfo *SegmentLCI,
98 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
99 bool loadSegment64(const MachOObject *Obj,
100 const MachOObject::LoadCommandInfo *SegmentLCI,
101 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
102
103public:
Jim Grosbachfcbe5b72011-04-04 23:04:39 +0000104 RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {}
Jim Grosbach8371c892011-03-22 00:42:19 +0000105
Jim Grosbach6e563312011-03-21 22:15:52 +0000106 bool loadObject(MemoryBuffer *InputBuffer);
107
Jim Grosbachb0271052011-04-08 17:31:24 +0000108 void *getSymbolAddress(StringRef Name) {
Jim Grosbachc41ab782011-04-06 01:11:05 +0000109 // FIXME: Just look up as a function for now. Overly simple of course.
110 // Work in progress.
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000111 return SymbolTable.lookup(Name);
Jim Grosbach6e563312011-03-21 22:15:52 +0000112 }
113
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000114 void resolveRelocations();
115
116 void reassignSymbolAddress(StringRef Name, uint8_t *Addr);
117
Jim Grosbach6e563312011-03-21 22:15:52 +0000118 // Is the linker in an error state?
119 bool hasError() { return HasError; }
120
121 // Mark the error condition as handled and continue.
122 void clearError() { HasError = false; }
123
124 // Get the error message.
125 StringRef getErrorString() { return ErrorStr; }
126};
127
Jim Grosbachc41ab782011-04-06 01:11:05 +0000128void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress,
Jim Grosbach01ccab42011-04-06 22:13:52 +0000129 uint8_t *EndAddress) {
Jim Grosbachc41ab782011-04-06 01:11:05 +0000130 // Allocate memory for the function via the memory manager.
131 uintptr_t Size = EndAddress - StartAddress + 1;
Jim Grosbachffa62502011-05-13 20:12:14 +0000132 uintptr_t AllocSize = Size;
133 uint8_t *Mem = MemMgr->startFunctionBody(Name.data(), AllocSize);
Jim Grosbachc41ab782011-04-06 01:11:05 +0000134 assert(Size >= (uint64_t)(EndAddress - StartAddress + 1) &&
135 "Memory manager failed to allocate enough memory!");
136 // Copy the function payload into the memory block.
Jim Grosbachffa62502011-05-13 20:12:14 +0000137 memcpy(Mem, StartAddress, Size);
Jim Grosbachc41ab782011-04-06 01:11:05 +0000138 MemMgr->endFunctionBody(Name.data(), Mem, Mem + Size);
139 // Remember where we put it.
140 Functions[Name] = sys::MemoryBlock(Mem, Size);
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000141 // Default the assigned address for this symbol to wherever this
142 // allocated it.
143 SymbolTable[Name] = Mem;
Jim Grosbachffa62502011-05-13 20:12:14 +0000144 DEBUG(dbgs() << " allocated to [" << Mem << ", " << Mem + Size << "]\n");
Jim Grosbachc41ab782011-04-06 01:11:05 +0000145}
146
Jim Grosbach8b54dca2011-03-23 19:52:00 +0000147bool RuntimeDyldImpl::
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000148resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
149 unsigned Type, unsigned Size) {
150 // This just dispatches to the proper target specific routine.
Jim Grosbacha8287e32011-03-23 22:06:06 +0000151 switch (CPUType) {
152 default: assert(0 && "Unsupported CPU type!");
153 case mach::CTM_x86_64:
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000154 return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value,
155 isPCRel, Type, Size);
Jim Grosbacha8287e32011-03-23 22:06:06 +0000156 case mach::CTM_ARM:
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000157 return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value,
158 isPCRel, Type, Size);
Jim Grosbacha8287e32011-03-23 22:06:06 +0000159 }
160 llvm_unreachable("");
161}
162
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000163bool RuntimeDyldImpl::
164resolveX86_64Relocation(uintptr_t Address, uintptr_t Value,
165 bool isPCRel, unsigned Type,
166 unsigned Size) {
Jim Grosbach8b54dca2011-03-23 19:52:00 +0000167 // If the relocation is PC-relative, the value to be encoded is the
168 // pointer difference.
169 if (isPCRel)
170 // FIXME: It seems this value needs to be adjusted by 4 for an effective PC
171 // address. Is that expected? Only for branches, perhaps?
172 Value -= Address + 4;
173
174 switch(Type) {
175 default:
176 llvm_unreachable("Invalid relocation type!");
177 case macho::RIT_X86_64_Unsigned:
178 case macho::RIT_X86_64_Branch: {
179 // Mask in the target value a byte at a time (we don't have an alignment
180 // guarantee for the target address, so this is safest).
Jim Grosbach8b54dca2011-03-23 19:52:00 +0000181 uint8_t *p = (uint8_t*)Address;
Jim Grosbacha8287e32011-03-23 22:06:06 +0000182 for (unsigned i = 0; i < Size; ++i) {
Jim Grosbach8b54dca2011-03-23 19:52:00 +0000183 *p++ = (uint8_t)Value;
184 Value >>= 8;
185 }
186 return false;
187 }
188 case macho::RIT_X86_64_Signed:
189 case macho::RIT_X86_64_GOTLoad:
190 case macho::RIT_X86_64_GOT:
191 case macho::RIT_X86_64_Subtractor:
192 case macho::RIT_X86_64_Signed1:
193 case macho::RIT_X86_64_Signed2:
194 case macho::RIT_X86_64_Signed4:
195 case macho::RIT_X86_64_TLV:
196 return Error("Relocation type not implemented yet!");
197 }
198 return false;
199}
Jim Grosbach6e563312011-03-21 22:15:52 +0000200
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000201bool RuntimeDyldImpl::resolveARMRelocation(uintptr_t Address, uintptr_t Value,
Jim Grosbacha8287e32011-03-23 22:06:06 +0000202 bool isPCRel, unsigned Type,
203 unsigned Size) {
204 // If the relocation is PC-relative, the value to be encoded is the
205 // pointer difference.
206 if (isPCRel) {
207 Value -= Address;
208 // ARM PCRel relocations have an effective-PC offset of two instructions
209 // (four bytes in Thumb mode, 8 bytes in ARM mode).
210 // FIXME: For now, assume ARM mode.
211 Value -= 8;
212 }
213
214 switch(Type) {
215 default:
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000216 llvm_unreachable("Invalid relocation type!");
Jim Grosbacha8287e32011-03-23 22:06:06 +0000217 case macho::RIT_Vanilla: {
218 llvm_unreachable("Invalid relocation type!");
219 // Mask in the target value a byte at a time (we don't have an alignment
220 // guarantee for the target address, so this is safest).
221 uint8_t *p = (uint8_t*)Address;
222 for (unsigned i = 0; i < Size; ++i) {
223 *p++ = (uint8_t)Value;
224 Value >>= 8;
225 }
Jim Grosbach5ffe37f2011-03-23 23:35:17 +0000226 break;
Jim Grosbacha8287e32011-03-23 22:06:06 +0000227 }
Jim Grosbach5ffe37f2011-03-23 23:35:17 +0000228 case macho::RIT_ARM_Branch24Bit: {
229 // Mask the value into the target address. We know instructions are
230 // 32-bit aligned, so we can do it all at once.
231 uint32_t *p = (uint32_t*)Address;
232 // The low two bits of the value are not encoded.
233 Value >>= 2;
234 // Mask the value to 24 bits.
235 Value &= 0xffffff;
236 // FIXME: If the destination is a Thumb function (and the instruction
237 // is a non-predicated BL instruction), we need to change it to a BLX
238 // instruction instead.
239
240 // Insert the value into the instruction.
241 *p = (*p & ~0xffffff) | Value;
242 break;
243 }
Jim Grosbacha8287e32011-03-23 22:06:06 +0000244 case macho::RIT_ARM_ThumbBranch22Bit:
245 case macho::RIT_ARM_ThumbBranch32Bit:
246 case macho::RIT_ARM_Half:
247 case macho::RIT_ARM_HalfDifference:
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000248 case macho::RIT_Pair:
249 case macho::RIT_Difference:
250 case macho::RIT_ARM_LocalDifference:
251 case macho::RIT_ARM_PreboundLazyPointer:
Jim Grosbacha8287e32011-03-23 22:06:06 +0000252 return Error("Relocation type not implemented yet!");
253 }
254 return false;
255}
256
Jim Grosbach6e563312011-03-21 22:15:52 +0000257bool RuntimeDyldImpl::
258loadSegment32(const MachOObject *Obj,
259 const MachOObject::LoadCommandInfo *SegmentLCI,
260 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000261 InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
262 Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC);
263 if (!SegmentLC)
Jim Grosbach6e563312011-03-21 22:15:52 +0000264 return Error("unable to load segment load command");
265
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000266 for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
Jim Grosbach6e563312011-03-21 22:15:52 +0000267 InMemoryStruct<macho::Section> Sect;
Jim Grosbachb0271052011-04-08 17:31:24 +0000268 Obj->ReadSection(*SegmentLCI, SectNum, Sect);
269 if (!Sect)
270 return Error("unable to load section: '" + Twine(SectNum) + "'");
Jim Grosbach6e563312011-03-21 22:15:52 +0000271
Jim Grosbach757a1422011-05-12 21:21:16 +0000272 // FIXME: For the time being, we're only loading text segments.
Jim Grosbachb0271052011-04-08 17:31:24 +0000273 if (Sect->Flags != 0x80000400)
Jim Grosbach757a1422011-05-12 21:21:16 +0000274 continue;
Jim Grosbach6e563312011-03-21 22:15:52 +0000275
Jim Grosbachb0271052011-04-08 17:31:24 +0000276 // Address and names of symbols in the section.
277 typedef std::pair<uint64_t, StringRef> SymbolEntry;
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000278 SmallVector<SymbolEntry, 64> Symbols;
279 // Index of all the names, in this section or not. Used when we're
280 // dealing with relocation entries.
281 SmallVector<StringRef, 64> SymbolNames;
Jim Grosbachb0271052011-04-08 17:31:24 +0000282 for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
283 InMemoryStruct<macho::SymbolTableEntry> STE;
284 Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
285 if (!STE)
286 return Error("unable to read symbol: '" + Twine(i) + "'");
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000287 if (STE->SectionIndex > SegmentLC->NumSections)
Benjamin Kramercc513e1c2011-04-09 10:10:35 +0000288 return Error("invalid section index for symbol: '" + Twine(i) + "'");
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000289 // Get the symbol name.
290 StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
291 SymbolNames.push_back(Name);
Jim Grosbachb0271052011-04-08 17:31:24 +0000292
293 // Just skip symbols not defined in this section.
Jim Grosbache2e777b2011-04-08 21:11:20 +0000294 if ((unsigned)STE->SectionIndex - 1 != SectNum)
Jim Grosbachb0271052011-04-08 17:31:24 +0000295 continue;
296
Jim Grosbachb0271052011-04-08 17:31:24 +0000297 // FIXME: Check the symbol type and flags.
298 if (STE->Type != 0xF) // external, defined in this section.
299 return Error("unexpected symbol type!");
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000300 // Flags == 0x8 marks a thumb function for ARM, which is fine as it
301 // doesn't require any special handling here.
302 if (STE->Flags != 0x0 && STE->Flags != 0x8)
Jim Grosbachb0271052011-04-08 17:31:24 +0000303 return Error("unexpected symbol type!");
304
Jim Grosbachb0271052011-04-08 17:31:24 +0000305 // Remember the symbol.
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000306 Symbols.push_back(SymbolEntry(STE->Value, Name));
Jim Grosbachb0271052011-04-08 17:31:24 +0000307
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000308 DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
309 (Sect->Address + STE->Value) << "\n");
Jim Grosbachb0271052011-04-08 17:31:24 +0000310 }
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000311 // Sort the symbols by address, just in case they didn't come in that way.
Jim Grosbachb0271052011-04-08 17:31:24 +0000312 array_pod_sort(Symbols.begin(), Symbols.end());
313
314 // Extract the function data.
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000315 uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset,
316 SegmentLC->FileSize).data();
Jim Grosbachb0271052011-04-08 17:31:24 +0000317 for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000318 uint64_t StartOffset = Sect->Address + Symbols[i].first;
Jim Grosbachb0271052011-04-08 17:31:24 +0000319 uint64_t EndOffset = Symbols[i + 1].first - 1;
320 DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
321 << " from [" << StartOffset << ", " << EndOffset << "]\n");
322 extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
323 }
324 // The last symbol we do after since the end address is calculated
325 // differently because there is no next symbol to reference.
326 uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
327 uint64_t EndOffset = Sect->Size - 1;
328 DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
329 << " from [" << StartOffset << ", " << EndOffset << "]\n");
330 extractFunction(Symbols[Symbols.size()-1].second,
331 Base + StartOffset, Base + EndOffset);
Jim Grosbach6e563312011-03-21 22:15:52 +0000332
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000333 // Now extract the relocation information for each function and process it.
334 for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
335 InMemoryStruct<macho::RelocationEntry> RE;
336 Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
337 if (RE->Word0 & macho::RF_Scattered)
338 return Error("NOT YET IMPLEMENTED: scattered relocations.");
339 // Word0 of the relocation is the offset into the section where the
340 // relocation should be applied. We need to translate that into an
341 // offset into a function since that's our atom.
342 uint32_t Offset = RE->Word0;
343 // Look for the function containing the address. This is used for JIT
344 // code, so the number of functions in section is almost always going
345 // to be very small (usually just one), so until we have use cases
346 // where that's not true, just use a trivial linear search.
347 unsigned SymbolNum;
348 unsigned NumSymbols = Symbols.size();
349 assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
350 "No symbol containing relocation!");
351 for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
352 if (Symbols[SymbolNum + 1].first > Offset)
353 break;
354 // Adjust the offset to be relative to the symbol.
355 Offset -= Symbols[SymbolNum].first;
356 // Get the name of the symbol containing the relocation.
357 StringRef TargetName = SymbolNames[SymbolNum];
358
359 bool isExtern = (RE->Word1 >> 27) & 1;
360 // Figure out the source symbol of the relocation. If isExtern is true,
361 // this relocation references the symbol table, otherwise it references
362 // a section in the same object, numbered from 1 through NumSections
363 // (SectionBases is [0, NumSections-1]).
364 // FIXME: Some targets (ARM) use internal relocations even for
365 // externally visible symbols, if the definition is in the same
366 // file as the reference. We need to convert those back to by-name
367 // references. We can resolve the address based on the section
368 // offset and see if we have a symbol at that address. If we do,
369 // use that; otherwise, puke.
370 if (!isExtern)
371 return Error("Internal relocations not supported.");
372 uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
373 StringRef SourceName = SymbolNames[SourceNum];
374
375 // FIXME: Get the relocation addend from the target address.
376
377 // Now store the relocation information. Associate it with the source
378 // symbol.
379 Relocations[SourceName].push_back(RelocationEntry(TargetName,
380 Offset,
381 RE->Word1,
382 0 /*Addend*/));
383 DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
384 << " from '" << SourceName << "(Word1: "
385 << format("0x%x", RE->Word1) << ")\n");
386 }
387 }
Jim Grosbach6e563312011-03-21 22:15:52 +0000388 return false;
389}
390
391
392bool RuntimeDyldImpl::
393loadSegment64(const MachOObject *Obj,
394 const MachOObject::LoadCommandInfo *SegmentLCI,
395 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
396 InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
397 Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC);
398 if (!Segment64LC)
399 return Error("unable to load segment load command");
400
Jim Grosbachc41ab782011-04-06 01:11:05 +0000401 for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) {
Jim Grosbach6e563312011-03-21 22:15:52 +0000402 InMemoryStruct<macho::Section64> Sect;
Jim Grosbachc41ab782011-04-06 01:11:05 +0000403 Obj->ReadSection64(*SegmentLCI, SectNum, Sect);
Jim Grosbach6e563312011-03-21 22:15:52 +0000404 if (!Sect)
Jim Grosbachc41ab782011-04-06 01:11:05 +0000405 return Error("unable to load section: '" + Twine(SectNum) + "'");
Jim Grosbach6e563312011-03-21 22:15:52 +0000406
Jim Grosbach757a1422011-05-12 21:21:16 +0000407 // FIXME: For the time being, we're only loading text segments.
Jim Grosbach6e563312011-03-21 22:15:52 +0000408 if (Sect->Flags != 0x80000400)
Jim Grosbach757a1422011-05-12 21:21:16 +0000409 continue;
Jim Grosbach6e563312011-03-21 22:15:52 +0000410
Jim Grosbachc41ab782011-04-06 01:11:05 +0000411 // Address and names of symbols in the section.
412 typedef std::pair<uint64_t, StringRef> SymbolEntry;
413 SmallVector<SymbolEntry, 64> Symbols;
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000414 // Index of all the names, in this section or not. Used when we're
415 // dealing with relocation entries.
416 SmallVector<StringRef, 64> SymbolNames;
Jim Grosbachc41ab782011-04-06 01:11:05 +0000417 for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
418 InMemoryStruct<macho::Symbol64TableEntry> STE;
419 Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
420 if (!STE)
421 return Error("unable to read symbol: '" + Twine(i) + "'");
422 if (STE->SectionIndex > Segment64LC->NumSections)
Benjamin Kramercc513e1c2011-04-09 10:10:35 +0000423 return Error("invalid section index for symbol: '" + Twine(i) + "'");
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000424 // Get the symbol name.
425 StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
426 SymbolNames.push_back(Name);
Jim Grosbachc41ab782011-04-06 01:11:05 +0000427
428 // Just skip symbols not defined in this section.
Jim Grosbache2e777b2011-04-08 21:11:20 +0000429 if ((unsigned)STE->SectionIndex - 1 != SectNum)
Jim Grosbachc41ab782011-04-06 01:11:05 +0000430 continue;
431
Jim Grosbachc41ab782011-04-06 01:11:05 +0000432 // FIXME: Check the symbol type and flags.
433 if (STE->Type != 0xF) // external, defined in this section.
434 return Error("unexpected symbol type!");
435 if (STE->Flags != 0x0)
436 return Error("unexpected symbol type!");
437
Jim Grosbachc41ab782011-04-06 01:11:05 +0000438 // Remember the symbol.
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000439 Symbols.push_back(SymbolEntry(STE->Value, Name));
Jim Grosbachc41ab782011-04-06 01:11:05 +0000440
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000441 DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
442 (Sect->Address + STE->Value) << "\n");
Jim Grosbachc41ab782011-04-06 01:11:05 +0000443 }
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000444 // Sort the symbols by address, just in case they didn't come in that way.
Jim Grosbachc41ab782011-04-06 01:11:05 +0000445 array_pod_sort(Symbols.begin(), Symbols.end());
446
447 // Extract the function data.
448 uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset,
449 Segment64LC->FileSize).data();
450 for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000451 uint64_t StartOffset = Sect->Address + Symbols[i].first;
Jim Grosbachc41ab782011-04-06 01:11:05 +0000452 uint64_t EndOffset = Symbols[i + 1].first - 1;
453 DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
454 << " from [" << StartOffset << ", " << EndOffset << "]\n");
455 extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
456 }
457 // The last symbol we do after since the end address is calculated
458 // differently because there is no next symbol to reference.
459 uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
460 uint64_t EndOffset = Sect->Size - 1;
461 DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
462 << " from [" << StartOffset << ", " << EndOffset << "]\n");
463 extractFunction(Symbols[Symbols.size()-1].second,
464 Base + StartOffset, Base + EndOffset);
Jim Grosbach6e563312011-03-21 22:15:52 +0000465
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000466 // Now extract the relocation information for each function and process it.
467 for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
468 InMemoryStruct<macho::RelocationEntry> RE;
469 Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
470 if (RE->Word0 & macho::RF_Scattered)
471 return Error("NOT YET IMPLEMENTED: scattered relocations.");
472 // Word0 of the relocation is the offset into the section where the
473 // relocation should be applied. We need to translate that into an
474 // offset into a function since that's our atom.
475 uint32_t Offset = RE->Word0;
476 // Look for the function containing the address. This is used for JIT
477 // code, so the number of functions in section is almost always going
478 // to be very small (usually just one), so until we have use cases
479 // where that's not true, just use a trivial linear search.
480 unsigned SymbolNum;
481 unsigned NumSymbols = Symbols.size();
482 assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
483 "No symbol containing relocation!");
484 for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
485 if (Symbols[SymbolNum + 1].first > Offset)
486 break;
487 // Adjust the offset to be relative to the symbol.
488 Offset -= Symbols[SymbolNum].first;
489 // Get the name of the symbol containing the relocation.
490 StringRef TargetName = SymbolNames[SymbolNum];
491
492 bool isExtern = (RE->Word1 >> 27) & 1;
493 // Figure out the source symbol of the relocation. If isExtern is true,
494 // this relocation references the symbol table, otherwise it references
495 // a section in the same object, numbered from 1 through NumSections
496 // (SectionBases is [0, NumSections-1]).
497 if (!isExtern)
498 return Error("Internal relocations not supported.");
499 uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
500 StringRef SourceName = SymbolNames[SourceNum];
501
502 // FIXME: Get the relocation addend from the target address.
503
504 // Now store the relocation information. Associate it with the source
505 // symbol.
506 Relocations[SourceName].push_back(RelocationEntry(TargetName,
507 Offset,
508 RE->Word1,
509 0 /*Addend*/));
510 DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
511 << " from '" << SourceName << "(Word1: "
512 << format("0x%x", RE->Word1) << ")\n");
513 }
514 }
Jim Grosbach6e563312011-03-21 22:15:52 +0000515 return false;
516}
517
Jim Grosbach6e563312011-03-21 22:15:52 +0000518bool RuntimeDyldImpl::loadObject(MemoryBuffer *InputBuffer) {
519 // If the linker is in an error state, don't do anything.
520 if (hasError())
521 return true;
522 // Load the Mach-O wrapper object.
523 std::string ErrorStr;
524 OwningPtr<MachOObject> Obj(
525 MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr));
526 if (!Obj)
527 return Error("unable to load object: '" + ErrorStr + "'");
528
Jim Grosbacha8287e32011-03-23 22:06:06 +0000529 // Get the CPU type information from the header.
530 const macho::Header &Header = Obj->getHeader();
531
532 // FIXME: Error checking that the loaded object is compatible with
533 // the system we're running on.
534 CPUType = Header.CPUType;
535 CPUSubtype = Header.CPUSubtype;
536
Jim Grosbach6e563312011-03-21 22:15:52 +0000537 // Validate that the load commands match what we expect.
538 const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0,
539 *DysymtabLCI = 0;
Jim Grosbacha8287e32011-03-23 22:06:06 +0000540 for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
Jim Grosbach6e563312011-03-21 22:15:52 +0000541 const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i);
542 switch (LCI.Command.Type) {
543 case macho::LCT_Segment:
544 case macho::LCT_Segment64:
545 if (SegmentLCI)
546 return Error("unexpected input object (multiple segments)");
547 SegmentLCI = &LCI;
548 break;
549 case macho::LCT_Symtab:
550 if (SymtabLCI)
551 return Error("unexpected input object (multiple symbol tables)");
552 SymtabLCI = &LCI;
553 break;
554 case macho::LCT_Dysymtab:
555 if (DysymtabLCI)
556 return Error("unexpected input object (multiple symbol tables)");
557 DysymtabLCI = &LCI;
558 break;
559 default:
560 return Error("unexpected input object (unexpected load command");
561 }
562 }
563
564 if (!SymtabLCI)
565 return Error("no symbol table found in object");
566 if (!SegmentLCI)
567 return Error("no symbol table found in object");
568
569 // Read and register the symbol table data.
570 InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
571 Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
572 if (!SymtabLC)
573 return Error("unable to load symbol table load command");
574 Obj->RegisterStringTable(*SymtabLC);
575
576 // Read the dynamic link-edit information, if present (not present in static
577 // objects).
578 if (DysymtabLCI) {
579 InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC;
580 Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC);
581 if (!DysymtabLC)
582 return Error("unable to load dynamic link-exit load command");
583
584 // FIXME: We don't support anything interesting yet.
Jim Grosbach8b54dca2011-03-23 19:52:00 +0000585// if (DysymtabLC->LocalSymbolsIndex != 0)
586// return Error("NOT YET IMPLEMENTED: local symbol entries");
587// if (DysymtabLC->ExternalSymbolsIndex != 0)
588// return Error("NOT YET IMPLEMENTED: non-external symbol entries");
589// if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries)
590// return Error("NOT YET IMPLEMENTED: undefined symbol entries");
Jim Grosbach6e563312011-03-21 22:15:52 +0000591 }
592
593 // Load the segment load command.
594 if (SegmentLCI->Command.Type == macho::LCT_Segment) {
595 if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC))
596 return true;
597 } else {
598 if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC))
599 return true;
600 }
601
602 return false;
603}
604
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000605// Resolve the relocations for all symbols we currently know about.
606void RuntimeDyldImpl::resolveRelocations() {
607 // Just iterate over the symbols in our symbol table and assign their
608 // addresses.
609 StringMap<uint8_t*>::iterator i = SymbolTable.begin();
610 StringMap<uint8_t*>::iterator e = SymbolTable.end();
611 for (;i != e; ++i)
612 reassignSymbolAddress(i->getKey(), i->getValue());
613}
614
615// Assign an address to a symbol name and resolve all the relocations
616// associated with it.
617void RuntimeDyldImpl::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
618 // Assign the address in our symbol table.
619 SymbolTable[Name] = Addr;
620
621 RelocationList &Relocs = Relocations[Name];
622 for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
623 RelocationEntry &RE = Relocs[i];
624 uint8_t *Target = SymbolTable[RE.Target] + RE.Offset;
625 bool isPCRel = (RE.Data >> 24) & 1;
626 unsigned Type = (RE.Data >> 28) & 0xf;
627 unsigned Size = 1 << ((RE.Data >> 25) & 3);
628
629 DEBUG(dbgs() << "Resolving relocation at '" << RE.Target
630 << "' + " << RE.Offset << " (" << format("%p", Target) << ")"
631 << " from '" << Name << " (" << format("%p", Addr) << ")"
632 << "(" << (isPCRel ? "pcrel" : "absolute")
633 << ", type: " << Type << ", Size: " << Size << ").\n");
634
635 resolveRelocation(Target, Addr, isPCRel, Type, Size);
636 RE.isResolved = true;
637 }
638}
Jim Grosbach6e563312011-03-21 22:15:52 +0000639
640//===----------------------------------------------------------------------===//
641// RuntimeDyld class implementation
Jim Grosbachfcbe5b72011-04-04 23:04:39 +0000642RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *MM) {
643 Dyld = new RuntimeDyldImpl(MM);
Jim Grosbach6e563312011-03-21 22:15:52 +0000644}
645
646RuntimeDyld::~RuntimeDyld() {
647 delete Dyld;
648}
649
650bool RuntimeDyld::loadObject(MemoryBuffer *InputBuffer) {
651 return Dyld->loadObject(InputBuffer);
652}
653
Jim Grosbachb0271052011-04-08 17:31:24 +0000654void *RuntimeDyld::getSymbolAddress(StringRef Name) {
Jim Grosbach6e563312011-03-21 22:15:52 +0000655 return Dyld->getSymbolAddress(Name);
656}
657
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000658void RuntimeDyld::resolveRelocations() {
659 Dyld->resolveRelocations();
660}
661
662void RuntimeDyld::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
663 Dyld->reassignSymbolAddress(Name, Addr);
664}
665
Jim Grosbach91dde152011-03-22 18:22:27 +0000666StringRef RuntimeDyld::getErrorString() {
Jim Grosbachb3eecaf2011-03-22 18:19:42 +0000667 return Dyld->getErrorString();
668}
669
Jim Grosbach6e563312011-03-21 22:15:52 +0000670} // end namespace llvm