blob: 7548a87c9556c76eec115b7f1ca46ba4639254fe [file] [log] [blame]
Jim Grosbach6e563312011-03-21 22:15:52 +00001//===-- RuntimeDyld.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Implementation of the MC-JIT runtime dynamic linker.
11//
12//===----------------------------------------------------------------------===//
13
Jim Grosbach8b54dca2011-03-23 19:52:00 +000014#define DEBUG_TYPE "dyld"
Jim Grosbach6e563312011-03-21 22:15:52 +000015#include "llvm/ADT/OwningPtr.h"
Jim Grosbach8b54dca2011-03-23 19:52:00 +000016#include "llvm/ADT/SmallVector.h"
Jim Grosbach6e563312011-03-21 22:15:52 +000017#include "llvm/ADT/StringMap.h"
18#include "llvm/ADT/StringRef.h"
Jim Grosbachc41ab782011-04-06 01:11:05 +000019#include "llvm/ADT/STLExtras.h"
Jim Grosbach6e563312011-03-21 22:15:52 +000020#include "llvm/ADT/Twine.h"
21#include "llvm/ExecutionEngine/RuntimeDyld.h"
22#include "llvm/Object/MachOObject.h"
Jim Grosbach8b54dca2011-03-23 19:52:00 +000023#include "llvm/Support/Debug.h"
24#include "llvm/Support/ErrorHandling.h"
25#include "llvm/Support/Format.h"
Jim Grosbach6e563312011-03-21 22:15:52 +000026#include "llvm/Support/Memory.h"
27#include "llvm/Support/MemoryBuffer.h"
28#include "llvm/Support/system_error.h"
Jim Grosbach8b54dca2011-03-23 19:52:00 +000029#include "llvm/Support/raw_ostream.h"
Jim Grosbach6e563312011-03-21 22:15:52 +000030using namespace llvm;
31using namespace llvm::object;
32
Chandler Carruth53c5e7b2011-04-05 23:54:31 +000033// Empty out-of-line virtual destructor as the key function.
34RTDyldMemoryManager::~RTDyldMemoryManager() {}
35
Jim Grosbach6e563312011-03-21 22:15:52 +000036namespace llvm {
37class RuntimeDyldImpl {
Jim Grosbacha8287e32011-03-23 22:06:06 +000038 unsigned CPUType;
39 unsigned CPUSubtype;
40
Jim Grosbachfcbe5b72011-04-04 23:04:39 +000041 // The MemoryManager to load objects into.
42 RTDyldMemoryManager *MemMgr;
Jim Grosbach5acfa9f2011-03-29 21:03:05 +000043
Jim Grosbachf8c1c842011-04-12 21:20:41 +000044 // FIXME: This all assumes we're dealing with external symbols for anything
45 // explicitly referenced. I.e., we can index by name and things
46 // will work out. In practice, this may not be the case, so we
47 // should find a way to effectively generalize.
Jim Grosbachc41ab782011-04-06 01:11:05 +000048
49 // For each function, we have a MemoryBlock of it's instruction data.
50 StringMap<sys::MemoryBlock> Functions;
51
Jim Grosbach6e563312011-03-21 22:15:52 +000052 // Master symbol table. As modules are loaded and external symbols are
53 // resolved, their addresses are stored here.
Jim Grosbachf8c1c842011-04-12 21:20:41 +000054 StringMap<uint8_t*> SymbolTable;
Jim Grosbach6e563312011-03-21 22:15:52 +000055
Jim Grosbachf8c1c842011-04-12 21:20:41 +000056 // For each symbol, keep a list of relocations based on it. Anytime
57 // its address is reassigned (the JIT re-compiled the function, e.g.),
58 // the relocations get re-resolved.
59 struct RelocationEntry {
60 std::string Target; // Object this relocation is contained in.
61 uint64_t Offset; // Offset into the object for the relocation.
62 uint32_t Data; // Second word of the raw macho relocation entry.
63 int64_t Addend; // Addend encoded in the instruction itself, if any.
64 bool isResolved; // Has this relocation been resolved previously?
65
66 RelocationEntry(StringRef t, uint64_t offset, uint32_t data, int64_t addend)
67 : Target(t), Offset(offset), Data(data), Addend(addend),
68 isResolved(false) {}
69 };
70 typedef SmallVector<RelocationEntry, 4> RelocationList;
71 StringMap<RelocationList> Relocations;
72
73 // FIXME: Also keep a map of all the relocations contained in an object. Use
74 // this to dynamically answer whether all of the relocations in it have
75 // been resolved or not.
Jim Grosbach6e563312011-03-21 22:15:52 +000076
77 bool HasError;
78 std::string ErrorStr;
79
80 // Set the error state and record an error string.
81 bool Error(const Twine &Msg) {
82 ErrorStr = Msg.str();
83 HasError = true;
84 return true;
85 }
86
Jim Grosbachc41ab782011-04-06 01:11:05 +000087 void extractFunction(StringRef Name, uint8_t *StartAddress,
88 uint8_t *EndAddress);
Jim Grosbachf8c1c842011-04-12 21:20:41 +000089 bool resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
90 unsigned Type, unsigned Size);
91 bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
Jim Grosbacha8287e32011-03-23 22:06:06 +000092 unsigned Type, unsigned Size);
Jim Grosbachf8c1c842011-04-12 21:20:41 +000093 bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
Jim Grosbacha8287e32011-03-23 22:06:06 +000094 unsigned Type, unsigned Size);
Jim Grosbach8b54dca2011-03-23 19:52:00 +000095
Jim Grosbach6e563312011-03-21 22:15:52 +000096 bool loadSegment32(const MachOObject *Obj,
97 const MachOObject::LoadCommandInfo *SegmentLCI,
98 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
99 bool loadSegment64(const MachOObject *Obj,
100 const MachOObject::LoadCommandInfo *SegmentLCI,
101 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
102
103public:
Jim Grosbachfcbe5b72011-04-04 23:04:39 +0000104 RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {}
Jim Grosbach8371c892011-03-22 00:42:19 +0000105
Jim Grosbach6e563312011-03-21 22:15:52 +0000106 bool loadObject(MemoryBuffer *InputBuffer);
107
Jim Grosbachb0271052011-04-08 17:31:24 +0000108 void *getSymbolAddress(StringRef Name) {
Jim Grosbachc41ab782011-04-06 01:11:05 +0000109 // FIXME: Just look up as a function for now. Overly simple of course.
110 // Work in progress.
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000111 return SymbolTable.lookup(Name);
Jim Grosbach6e563312011-03-21 22:15:52 +0000112 }
113
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000114 void resolveRelocations();
115
116 void reassignSymbolAddress(StringRef Name, uint8_t *Addr);
117
Jim Grosbach6e563312011-03-21 22:15:52 +0000118 // Is the linker in an error state?
119 bool hasError() { return HasError; }
120
121 // Mark the error condition as handled and continue.
122 void clearError() { HasError = false; }
123
124 // Get the error message.
125 StringRef getErrorString() { return ErrorStr; }
126};
127
Jim Grosbachc41ab782011-04-06 01:11:05 +0000128void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress,
Jim Grosbach01ccab42011-04-06 22:13:52 +0000129 uint8_t *EndAddress) {
Jim Grosbachc41ab782011-04-06 01:11:05 +0000130 // Allocate memory for the function via the memory manager.
131 uintptr_t Size = EndAddress - StartAddress + 1;
132 uint8_t *Mem = MemMgr->startFunctionBody(Name.data(), Size);
133 assert(Size >= (uint64_t)(EndAddress - StartAddress + 1) &&
134 "Memory manager failed to allocate enough memory!");
135 // Copy the function payload into the memory block.
136 memcpy(Mem, StartAddress, EndAddress - StartAddress + 1);
137 MemMgr->endFunctionBody(Name.data(), Mem, Mem + Size);
138 // Remember where we put it.
139 Functions[Name] = sys::MemoryBlock(Mem, Size);
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000140 // Default the assigned address for this symbol to wherever this
141 // allocated it.
142 SymbolTable[Name] = Mem;
Jim Grosbachc41ab782011-04-06 01:11:05 +0000143 DEBUG(dbgs() << " allocated to " << Mem << "\n");
144}
145
Jim Grosbach8b54dca2011-03-23 19:52:00 +0000146bool RuntimeDyldImpl::
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000147resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
148 unsigned Type, unsigned Size) {
149 // This just dispatches to the proper target specific routine.
Jim Grosbacha8287e32011-03-23 22:06:06 +0000150 switch (CPUType) {
151 default: assert(0 && "Unsupported CPU type!");
152 case mach::CTM_x86_64:
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000153 return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value,
154 isPCRel, Type, Size);
Jim Grosbacha8287e32011-03-23 22:06:06 +0000155 case mach::CTM_ARM:
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000156 return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value,
157 isPCRel, Type, Size);
Jim Grosbacha8287e32011-03-23 22:06:06 +0000158 }
159 llvm_unreachable("");
160}
161
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000162bool RuntimeDyldImpl::
163resolveX86_64Relocation(uintptr_t Address, uintptr_t Value,
164 bool isPCRel, unsigned Type,
165 unsigned Size) {
Jim Grosbach8b54dca2011-03-23 19:52:00 +0000166 // If the relocation is PC-relative, the value to be encoded is the
167 // pointer difference.
168 if (isPCRel)
169 // FIXME: It seems this value needs to be adjusted by 4 for an effective PC
170 // address. Is that expected? Only for branches, perhaps?
171 Value -= Address + 4;
172
173 switch(Type) {
174 default:
175 llvm_unreachable("Invalid relocation type!");
176 case macho::RIT_X86_64_Unsigned:
177 case macho::RIT_X86_64_Branch: {
178 // Mask in the target value a byte at a time (we don't have an alignment
179 // guarantee for the target address, so this is safest).
Jim Grosbach8b54dca2011-03-23 19:52:00 +0000180 uint8_t *p = (uint8_t*)Address;
Jim Grosbacha8287e32011-03-23 22:06:06 +0000181 for (unsigned i = 0; i < Size; ++i) {
Jim Grosbach8b54dca2011-03-23 19:52:00 +0000182 *p++ = (uint8_t)Value;
183 Value >>= 8;
184 }
185 return false;
186 }
187 case macho::RIT_X86_64_Signed:
188 case macho::RIT_X86_64_GOTLoad:
189 case macho::RIT_X86_64_GOT:
190 case macho::RIT_X86_64_Subtractor:
191 case macho::RIT_X86_64_Signed1:
192 case macho::RIT_X86_64_Signed2:
193 case macho::RIT_X86_64_Signed4:
194 case macho::RIT_X86_64_TLV:
195 return Error("Relocation type not implemented yet!");
196 }
197 return false;
198}
Jim Grosbach6e563312011-03-21 22:15:52 +0000199
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000200bool RuntimeDyldImpl::resolveARMRelocation(uintptr_t Address, uintptr_t Value,
Jim Grosbacha8287e32011-03-23 22:06:06 +0000201 bool isPCRel, unsigned Type,
202 unsigned Size) {
203 // If the relocation is PC-relative, the value to be encoded is the
204 // pointer difference.
205 if (isPCRel) {
206 Value -= Address;
207 // ARM PCRel relocations have an effective-PC offset of two instructions
208 // (four bytes in Thumb mode, 8 bytes in ARM mode).
209 // FIXME: For now, assume ARM mode.
210 Value -= 8;
211 }
212
213 switch(Type) {
214 default:
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000215 llvm_unreachable("Invalid relocation type!");
Jim Grosbacha8287e32011-03-23 22:06:06 +0000216 case macho::RIT_Vanilla: {
217 llvm_unreachable("Invalid relocation type!");
218 // Mask in the target value a byte at a time (we don't have an alignment
219 // guarantee for the target address, so this is safest).
220 uint8_t *p = (uint8_t*)Address;
221 for (unsigned i = 0; i < Size; ++i) {
222 *p++ = (uint8_t)Value;
223 Value >>= 8;
224 }
Jim Grosbach5ffe37f2011-03-23 23:35:17 +0000225 break;
Jim Grosbacha8287e32011-03-23 22:06:06 +0000226 }
Jim Grosbach5ffe37f2011-03-23 23:35:17 +0000227 case macho::RIT_ARM_Branch24Bit: {
228 // Mask the value into the target address. We know instructions are
229 // 32-bit aligned, so we can do it all at once.
230 uint32_t *p = (uint32_t*)Address;
231 // The low two bits of the value are not encoded.
232 Value >>= 2;
233 // Mask the value to 24 bits.
234 Value &= 0xffffff;
235 // FIXME: If the destination is a Thumb function (and the instruction
236 // is a non-predicated BL instruction), we need to change it to a BLX
237 // instruction instead.
238
239 // Insert the value into the instruction.
240 *p = (*p & ~0xffffff) | Value;
241 break;
242 }
Jim Grosbacha8287e32011-03-23 22:06:06 +0000243 case macho::RIT_ARM_ThumbBranch22Bit:
244 case macho::RIT_ARM_ThumbBranch32Bit:
245 case macho::RIT_ARM_Half:
246 case macho::RIT_ARM_HalfDifference:
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000247 case macho::RIT_Pair:
248 case macho::RIT_Difference:
249 case macho::RIT_ARM_LocalDifference:
250 case macho::RIT_ARM_PreboundLazyPointer:
Jim Grosbacha8287e32011-03-23 22:06:06 +0000251 return Error("Relocation type not implemented yet!");
252 }
253 return false;
254}
255
Jim Grosbach6e563312011-03-21 22:15:52 +0000256bool RuntimeDyldImpl::
257loadSegment32(const MachOObject *Obj,
258 const MachOObject::LoadCommandInfo *SegmentLCI,
259 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000260 InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
261 Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC);
262 if (!SegmentLC)
Jim Grosbach6e563312011-03-21 22:15:52 +0000263 return Error("unable to load segment load command");
264
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000265 for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
Jim Grosbach6e563312011-03-21 22:15:52 +0000266 InMemoryStruct<macho::Section> Sect;
Jim Grosbachb0271052011-04-08 17:31:24 +0000267 Obj->ReadSection(*SegmentLCI, SectNum, Sect);
268 if (!Sect)
269 return Error("unable to load section: '" + Twine(SectNum) + "'");
Jim Grosbach6e563312011-03-21 22:15:52 +0000270
Jim Grosbach757a1422011-05-12 21:21:16 +0000271 // FIXME: For the time being, we're only loading text segments.
Jim Grosbachb0271052011-04-08 17:31:24 +0000272 if (Sect->Flags != 0x80000400)
Jim Grosbach757a1422011-05-12 21:21:16 +0000273 continue;
Jim Grosbach6e563312011-03-21 22:15:52 +0000274
Jim Grosbachb0271052011-04-08 17:31:24 +0000275 // Address and names of symbols in the section.
276 typedef std::pair<uint64_t, StringRef> SymbolEntry;
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000277 SmallVector<SymbolEntry, 64> Symbols;
278 // Index of all the names, in this section or not. Used when we're
279 // dealing with relocation entries.
280 SmallVector<StringRef, 64> SymbolNames;
Jim Grosbachb0271052011-04-08 17:31:24 +0000281 for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
282 InMemoryStruct<macho::SymbolTableEntry> STE;
283 Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
284 if (!STE)
285 return Error("unable to read symbol: '" + Twine(i) + "'");
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000286 if (STE->SectionIndex > SegmentLC->NumSections)
Benjamin Kramercc513e1c2011-04-09 10:10:35 +0000287 return Error("invalid section index for symbol: '" + Twine(i) + "'");
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000288 // Get the symbol name.
289 StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
290 SymbolNames.push_back(Name);
Jim Grosbachb0271052011-04-08 17:31:24 +0000291
292 // Just skip symbols not defined in this section.
Jim Grosbache2e777b2011-04-08 21:11:20 +0000293 if ((unsigned)STE->SectionIndex - 1 != SectNum)
Jim Grosbachb0271052011-04-08 17:31:24 +0000294 continue;
295
Jim Grosbachb0271052011-04-08 17:31:24 +0000296 // FIXME: Check the symbol type and flags.
297 if (STE->Type != 0xF) // external, defined in this section.
298 return Error("unexpected symbol type!");
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000299 // Flags == 0x8 marks a thumb function for ARM, which is fine as it
300 // doesn't require any special handling here.
301 if (STE->Flags != 0x0 && STE->Flags != 0x8)
Jim Grosbachb0271052011-04-08 17:31:24 +0000302 return Error("unexpected symbol type!");
303
Jim Grosbachb0271052011-04-08 17:31:24 +0000304 // Remember the symbol.
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000305 Symbols.push_back(SymbolEntry(STE->Value, Name));
Jim Grosbachb0271052011-04-08 17:31:24 +0000306
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000307 DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
308 (Sect->Address + STE->Value) << "\n");
Jim Grosbachb0271052011-04-08 17:31:24 +0000309 }
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000310 // Sort the symbols by address, just in case they didn't come in that way.
Jim Grosbachb0271052011-04-08 17:31:24 +0000311 array_pod_sort(Symbols.begin(), Symbols.end());
312
313 // Extract the function data.
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000314 uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset,
315 SegmentLC->FileSize).data();
Jim Grosbachb0271052011-04-08 17:31:24 +0000316 for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000317 uint64_t StartOffset = Sect->Address + Symbols[i].first;
Jim Grosbachb0271052011-04-08 17:31:24 +0000318 uint64_t EndOffset = Symbols[i + 1].first - 1;
319 DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
320 << " from [" << StartOffset << ", " << EndOffset << "]\n");
321 extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
322 }
323 // The last symbol we do after since the end address is calculated
324 // differently because there is no next symbol to reference.
325 uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
326 uint64_t EndOffset = Sect->Size - 1;
327 DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
328 << " from [" << StartOffset << ", " << EndOffset << "]\n");
329 extractFunction(Symbols[Symbols.size()-1].second,
330 Base + StartOffset, Base + EndOffset);
Jim Grosbach6e563312011-03-21 22:15:52 +0000331
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000332 // Now extract the relocation information for each function and process it.
333 for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
334 InMemoryStruct<macho::RelocationEntry> RE;
335 Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
336 if (RE->Word0 & macho::RF_Scattered)
337 return Error("NOT YET IMPLEMENTED: scattered relocations.");
338 // Word0 of the relocation is the offset into the section where the
339 // relocation should be applied. We need to translate that into an
340 // offset into a function since that's our atom.
341 uint32_t Offset = RE->Word0;
342 // Look for the function containing the address. This is used for JIT
343 // code, so the number of functions in section is almost always going
344 // to be very small (usually just one), so until we have use cases
345 // where that's not true, just use a trivial linear search.
346 unsigned SymbolNum;
347 unsigned NumSymbols = Symbols.size();
348 assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
349 "No symbol containing relocation!");
350 for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
351 if (Symbols[SymbolNum + 1].first > Offset)
352 break;
353 // Adjust the offset to be relative to the symbol.
354 Offset -= Symbols[SymbolNum].first;
355 // Get the name of the symbol containing the relocation.
356 StringRef TargetName = SymbolNames[SymbolNum];
357
358 bool isExtern = (RE->Word1 >> 27) & 1;
359 // Figure out the source symbol of the relocation. If isExtern is true,
360 // this relocation references the symbol table, otherwise it references
361 // a section in the same object, numbered from 1 through NumSections
362 // (SectionBases is [0, NumSections-1]).
363 // FIXME: Some targets (ARM) use internal relocations even for
364 // externally visible symbols, if the definition is in the same
365 // file as the reference. We need to convert those back to by-name
366 // references. We can resolve the address based on the section
367 // offset and see if we have a symbol at that address. If we do,
368 // use that; otherwise, puke.
369 if (!isExtern)
370 return Error("Internal relocations not supported.");
371 uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
372 StringRef SourceName = SymbolNames[SourceNum];
373
374 // FIXME: Get the relocation addend from the target address.
375
376 // Now store the relocation information. Associate it with the source
377 // symbol.
378 Relocations[SourceName].push_back(RelocationEntry(TargetName,
379 Offset,
380 RE->Word1,
381 0 /*Addend*/));
382 DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
383 << " from '" << SourceName << "(Word1: "
384 << format("0x%x", RE->Word1) << ")\n");
385 }
386 }
Jim Grosbach6e563312011-03-21 22:15:52 +0000387 return false;
388}
389
390
391bool RuntimeDyldImpl::
392loadSegment64(const MachOObject *Obj,
393 const MachOObject::LoadCommandInfo *SegmentLCI,
394 const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
395 InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
396 Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC);
397 if (!Segment64LC)
398 return Error("unable to load segment load command");
399
Jim Grosbachc41ab782011-04-06 01:11:05 +0000400 for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) {
Jim Grosbach6e563312011-03-21 22:15:52 +0000401 InMemoryStruct<macho::Section64> Sect;
Jim Grosbachc41ab782011-04-06 01:11:05 +0000402 Obj->ReadSection64(*SegmentLCI, SectNum, Sect);
Jim Grosbach6e563312011-03-21 22:15:52 +0000403 if (!Sect)
Jim Grosbachc41ab782011-04-06 01:11:05 +0000404 return Error("unable to load section: '" + Twine(SectNum) + "'");
Jim Grosbach6e563312011-03-21 22:15:52 +0000405
Jim Grosbach757a1422011-05-12 21:21:16 +0000406 // FIXME: For the time being, we're only loading text segments.
Jim Grosbach6e563312011-03-21 22:15:52 +0000407 if (Sect->Flags != 0x80000400)
Jim Grosbach757a1422011-05-12 21:21:16 +0000408 continue;
Jim Grosbach6e563312011-03-21 22:15:52 +0000409
Jim Grosbachc41ab782011-04-06 01:11:05 +0000410 // Address and names of symbols in the section.
411 typedef std::pair<uint64_t, StringRef> SymbolEntry;
412 SmallVector<SymbolEntry, 64> Symbols;
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000413 // Index of all the names, in this section or not. Used when we're
414 // dealing with relocation entries.
415 SmallVector<StringRef, 64> SymbolNames;
Jim Grosbachc41ab782011-04-06 01:11:05 +0000416 for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
417 InMemoryStruct<macho::Symbol64TableEntry> STE;
418 Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
419 if (!STE)
420 return Error("unable to read symbol: '" + Twine(i) + "'");
421 if (STE->SectionIndex > Segment64LC->NumSections)
Benjamin Kramercc513e1c2011-04-09 10:10:35 +0000422 return Error("invalid section index for symbol: '" + Twine(i) + "'");
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000423 // Get the symbol name.
424 StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
425 SymbolNames.push_back(Name);
Jim Grosbachc41ab782011-04-06 01:11:05 +0000426
427 // Just skip symbols not defined in this section.
Jim Grosbache2e777b2011-04-08 21:11:20 +0000428 if ((unsigned)STE->SectionIndex - 1 != SectNum)
Jim Grosbachc41ab782011-04-06 01:11:05 +0000429 continue;
430
Jim Grosbachc41ab782011-04-06 01:11:05 +0000431 // FIXME: Check the symbol type and flags.
432 if (STE->Type != 0xF) // external, defined in this section.
433 return Error("unexpected symbol type!");
434 if (STE->Flags != 0x0)
435 return Error("unexpected symbol type!");
436
Jim Grosbachc41ab782011-04-06 01:11:05 +0000437 // Remember the symbol.
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000438 Symbols.push_back(SymbolEntry(STE->Value, Name));
Jim Grosbachc41ab782011-04-06 01:11:05 +0000439
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000440 DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
441 (Sect->Address + STE->Value) << "\n");
Jim Grosbachc41ab782011-04-06 01:11:05 +0000442 }
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000443 // Sort the symbols by address, just in case they didn't come in that way.
Jim Grosbachc41ab782011-04-06 01:11:05 +0000444 array_pod_sort(Symbols.begin(), Symbols.end());
445
446 // Extract the function data.
447 uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset,
448 Segment64LC->FileSize).data();
449 for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000450 uint64_t StartOffset = Sect->Address + Symbols[i].first;
Jim Grosbachc41ab782011-04-06 01:11:05 +0000451 uint64_t EndOffset = Symbols[i + 1].first - 1;
452 DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
453 << " from [" << StartOffset << ", " << EndOffset << "]\n");
454 extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
455 }
456 // The last symbol we do after since the end address is calculated
457 // differently because there is no next symbol to reference.
458 uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
459 uint64_t EndOffset = Sect->Size - 1;
460 DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
461 << " from [" << StartOffset << ", " << EndOffset << "]\n");
462 extractFunction(Symbols[Symbols.size()-1].second,
463 Base + StartOffset, Base + EndOffset);
Jim Grosbach6e563312011-03-21 22:15:52 +0000464
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000465 // Now extract the relocation information for each function and process it.
466 for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
467 InMemoryStruct<macho::RelocationEntry> RE;
468 Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
469 if (RE->Word0 & macho::RF_Scattered)
470 return Error("NOT YET IMPLEMENTED: scattered relocations.");
471 // Word0 of the relocation is the offset into the section where the
472 // relocation should be applied. We need to translate that into an
473 // offset into a function since that's our atom.
474 uint32_t Offset = RE->Word0;
475 // Look for the function containing the address. This is used for JIT
476 // code, so the number of functions in section is almost always going
477 // to be very small (usually just one), so until we have use cases
478 // where that's not true, just use a trivial linear search.
479 unsigned SymbolNum;
480 unsigned NumSymbols = Symbols.size();
481 assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
482 "No symbol containing relocation!");
483 for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
484 if (Symbols[SymbolNum + 1].first > Offset)
485 break;
486 // Adjust the offset to be relative to the symbol.
487 Offset -= Symbols[SymbolNum].first;
488 // Get the name of the symbol containing the relocation.
489 StringRef TargetName = SymbolNames[SymbolNum];
490
491 bool isExtern = (RE->Word1 >> 27) & 1;
492 // Figure out the source symbol of the relocation. If isExtern is true,
493 // this relocation references the symbol table, otherwise it references
494 // a section in the same object, numbered from 1 through NumSections
495 // (SectionBases is [0, NumSections-1]).
496 if (!isExtern)
497 return Error("Internal relocations not supported.");
498 uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
499 StringRef SourceName = SymbolNames[SourceNum];
500
501 // FIXME: Get the relocation addend from the target address.
502
503 // Now store the relocation information. Associate it with the source
504 // symbol.
505 Relocations[SourceName].push_back(RelocationEntry(TargetName,
506 Offset,
507 RE->Word1,
508 0 /*Addend*/));
509 DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
510 << " from '" << SourceName << "(Word1: "
511 << format("0x%x", RE->Word1) << ")\n");
512 }
513 }
Jim Grosbach6e563312011-03-21 22:15:52 +0000514 return false;
515}
516
Jim Grosbach6e563312011-03-21 22:15:52 +0000517bool RuntimeDyldImpl::loadObject(MemoryBuffer *InputBuffer) {
518 // If the linker is in an error state, don't do anything.
519 if (hasError())
520 return true;
521 // Load the Mach-O wrapper object.
522 std::string ErrorStr;
523 OwningPtr<MachOObject> Obj(
524 MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr));
525 if (!Obj)
526 return Error("unable to load object: '" + ErrorStr + "'");
527
Jim Grosbacha8287e32011-03-23 22:06:06 +0000528 // Get the CPU type information from the header.
529 const macho::Header &Header = Obj->getHeader();
530
531 // FIXME: Error checking that the loaded object is compatible with
532 // the system we're running on.
533 CPUType = Header.CPUType;
534 CPUSubtype = Header.CPUSubtype;
535
Jim Grosbach6e563312011-03-21 22:15:52 +0000536 // Validate that the load commands match what we expect.
537 const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0,
538 *DysymtabLCI = 0;
Jim Grosbacha8287e32011-03-23 22:06:06 +0000539 for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
Jim Grosbach6e563312011-03-21 22:15:52 +0000540 const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i);
541 switch (LCI.Command.Type) {
542 case macho::LCT_Segment:
543 case macho::LCT_Segment64:
544 if (SegmentLCI)
545 return Error("unexpected input object (multiple segments)");
546 SegmentLCI = &LCI;
547 break;
548 case macho::LCT_Symtab:
549 if (SymtabLCI)
550 return Error("unexpected input object (multiple symbol tables)");
551 SymtabLCI = &LCI;
552 break;
553 case macho::LCT_Dysymtab:
554 if (DysymtabLCI)
555 return Error("unexpected input object (multiple symbol tables)");
556 DysymtabLCI = &LCI;
557 break;
558 default:
559 return Error("unexpected input object (unexpected load command");
560 }
561 }
562
563 if (!SymtabLCI)
564 return Error("no symbol table found in object");
565 if (!SegmentLCI)
566 return Error("no symbol table found in object");
567
568 // Read and register the symbol table data.
569 InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
570 Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
571 if (!SymtabLC)
572 return Error("unable to load symbol table load command");
573 Obj->RegisterStringTable(*SymtabLC);
574
575 // Read the dynamic link-edit information, if present (not present in static
576 // objects).
577 if (DysymtabLCI) {
578 InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC;
579 Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC);
580 if (!DysymtabLC)
581 return Error("unable to load dynamic link-exit load command");
582
583 // FIXME: We don't support anything interesting yet.
Jim Grosbach8b54dca2011-03-23 19:52:00 +0000584// if (DysymtabLC->LocalSymbolsIndex != 0)
585// return Error("NOT YET IMPLEMENTED: local symbol entries");
586// if (DysymtabLC->ExternalSymbolsIndex != 0)
587// return Error("NOT YET IMPLEMENTED: non-external symbol entries");
588// if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries)
589// return Error("NOT YET IMPLEMENTED: undefined symbol entries");
Jim Grosbach6e563312011-03-21 22:15:52 +0000590 }
591
592 // Load the segment load command.
593 if (SegmentLCI->Command.Type == macho::LCT_Segment) {
594 if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC))
595 return true;
596 } else {
597 if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC))
598 return true;
599 }
600
601 return false;
602}
603
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000604// Resolve the relocations for all symbols we currently know about.
605void RuntimeDyldImpl::resolveRelocations() {
606 // Just iterate over the symbols in our symbol table and assign their
607 // addresses.
608 StringMap<uint8_t*>::iterator i = SymbolTable.begin();
609 StringMap<uint8_t*>::iterator e = SymbolTable.end();
610 for (;i != e; ++i)
611 reassignSymbolAddress(i->getKey(), i->getValue());
612}
613
614// Assign an address to a symbol name and resolve all the relocations
615// associated with it.
616void RuntimeDyldImpl::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
617 // Assign the address in our symbol table.
618 SymbolTable[Name] = Addr;
619
620 RelocationList &Relocs = Relocations[Name];
621 for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
622 RelocationEntry &RE = Relocs[i];
623 uint8_t *Target = SymbolTable[RE.Target] + RE.Offset;
624 bool isPCRel = (RE.Data >> 24) & 1;
625 unsigned Type = (RE.Data >> 28) & 0xf;
626 unsigned Size = 1 << ((RE.Data >> 25) & 3);
627
628 DEBUG(dbgs() << "Resolving relocation at '" << RE.Target
629 << "' + " << RE.Offset << " (" << format("%p", Target) << ")"
630 << " from '" << Name << " (" << format("%p", Addr) << ")"
631 << "(" << (isPCRel ? "pcrel" : "absolute")
632 << ", type: " << Type << ", Size: " << Size << ").\n");
633
634 resolveRelocation(Target, Addr, isPCRel, Type, Size);
635 RE.isResolved = true;
636 }
637}
Jim Grosbach6e563312011-03-21 22:15:52 +0000638
639//===----------------------------------------------------------------------===//
640// RuntimeDyld class implementation
Jim Grosbachfcbe5b72011-04-04 23:04:39 +0000641RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *MM) {
642 Dyld = new RuntimeDyldImpl(MM);
Jim Grosbach6e563312011-03-21 22:15:52 +0000643}
644
645RuntimeDyld::~RuntimeDyld() {
646 delete Dyld;
647}
648
649bool RuntimeDyld::loadObject(MemoryBuffer *InputBuffer) {
650 return Dyld->loadObject(InputBuffer);
651}
652
Jim Grosbachb0271052011-04-08 17:31:24 +0000653void *RuntimeDyld::getSymbolAddress(StringRef Name) {
Jim Grosbach6e563312011-03-21 22:15:52 +0000654 return Dyld->getSymbolAddress(Name);
655}
656
Jim Grosbachf8c1c842011-04-12 21:20:41 +0000657void RuntimeDyld::resolveRelocations() {
658 Dyld->resolveRelocations();
659}
660
661void RuntimeDyld::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
662 Dyld->reassignSymbolAddress(Name, Addr);
663}
664
Jim Grosbach91dde152011-03-22 18:22:27 +0000665StringRef RuntimeDyld::getErrorString() {
Jim Grosbachb3eecaf2011-03-22 18:19:42 +0000666 return Dyld->getErrorString();
667}
668
Jim Grosbach6e563312011-03-21 22:15:52 +0000669} // end namespace llvm