blob: 1969bcb473513bf71572a435a7917186c8a82c13 [file] [log] [blame]
Ahmed Bougachaef993562013-05-24 01:07:04 +00001//===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/MC/MCObjectDisassembler.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/SetVector.h"
13#include "llvm/ADT/StringExtras.h"
14#include "llvm/ADT/StringRef.h"
15#include "llvm/ADT/Twine.h"
16#include "llvm/MC/MCAtom.h"
17#include "llvm/MC/MCDisassembler.h"
18#include "llvm/MC/MCFunction.h"
19#include "llvm/MC/MCInstrAnalysis.h"
20#include "llvm/MC/MCModule.h"
Ahmed Bougacha0e83b902013-08-21 07:28:44 +000021#include "llvm/Object/MachO.h"
Ahmed Bougachaef993562013-05-24 01:07:04 +000022#include "llvm/Object/ObjectFile.h"
Ahmed Bougacha0e83b902013-08-21 07:28:44 +000023#include "llvm/Support/Debug.h"
24#include "llvm/Support/MachO.h"
Ahmed Bougachaef993562013-05-24 01:07:04 +000025#include "llvm/Support/MemoryObject.h"
26#include "llvm/Support/StringRefMemoryObject.h"
27#include "llvm/Support/raw_ostream.h"
28#include <map>
29#include <set>
30
31using namespace llvm;
32using namespace object;
33
34MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj,
35 const MCDisassembler &Dis,
36 const MCInstrAnalysis &MIA)
37 : Obj(Obj), Dis(Dis), MIA(MIA) {}
38
Ahmed Bougacha0a30ccc2013-08-21 07:28:29 +000039uint64_t MCObjectDisassembler::getEntrypoint() {
40 error_code ec;
41 for (symbol_iterator SI = Obj.begin_symbols(), SE = Obj.end_symbols();
42 SI != SE; SI.increment(ec)) {
43 if (ec)
44 break;
45 StringRef Name;
46 SI->getName(Name);
47 if (Name == "main" || Name == "_main") {
48 uint64_t Entrypoint;
49 SI->getAddress(Entrypoint);
Ahmed Bougacha484a6eb2013-08-21 07:28:37 +000050 return getEffectiveLoadAddr(Entrypoint);
Ahmed Bougacha0a30ccc2013-08-21 07:28:29 +000051 }
52 }
53 return 0;
54}
55
56ArrayRef<uint64_t> MCObjectDisassembler::getStaticInitFunctions() {
57 return ArrayRef<uint64_t>();
58}
59
60ArrayRef<uint64_t> MCObjectDisassembler::getStaticExitFunctions() {
61 return ArrayRef<uint64_t>();
62}
63
Ahmed Bougacha484a6eb2013-08-21 07:28:37 +000064uint64_t MCObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) {
65 return Addr;
66}
67
68uint64_t MCObjectDisassembler::getOriginalLoadAddr(uint64_t Addr) {
69 return Addr;
70}
71
Ahmed Bougacha0a30ccc2013-08-21 07:28:29 +000072MCModule *MCObjectDisassembler::buildEmptyModule() {
Ahmed Bougachaef993562013-05-24 01:07:04 +000073 MCModule *Module = new MCModule;
Ahmed Bougacha0a30ccc2013-08-21 07:28:29 +000074 Module->Entrypoint = getEntrypoint();
75 return Module;
76}
77
78MCModule *MCObjectDisassembler::buildModule(bool withCFG) {
79 MCModule *Module = buildEmptyModule();
80
Ahmed Bougachaef993562013-05-24 01:07:04 +000081 buildSectionAtoms(Module);
82 if (withCFG)
83 buildCFG(Module);
84 return Module;
85}
86
87void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) {
88 error_code ec;
89 for (section_iterator SI = Obj.begin_sections(),
90 SE = Obj.end_sections();
91 SI != SE;
92 SI.increment(ec)) {
93 if (ec) break;
94
95 bool isText; SI->isText(isText);
96 bool isData; SI->isData(isData);
97 if (!isData && !isText)
98 continue;
99
100 uint64_t StartAddr; SI->getAddress(StartAddr);
101 uint64_t SecSize; SI->getSize(SecSize);
102 if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize)
103 continue;
Ahmed Bougacha484a6eb2013-08-21 07:28:37 +0000104 StartAddr = getEffectiveLoadAddr(StartAddr);
Ahmed Bougachaef993562013-05-24 01:07:04 +0000105
106 StringRef Contents; SI->getContents(Contents);
Ahmed Bougacha0a30ccc2013-08-21 07:28:29 +0000107 StringRefMemoryObject memoryObject(Contents, StartAddr);
Ahmed Bougachaef993562013-05-24 01:07:04 +0000108
109 // We don't care about things like non-file-backed sections yet.
110 if (Contents.size() != SecSize || !SecSize)
111 continue;
112 uint64_t EndAddr = StartAddr + SecSize - 1;
113
114 StringRef SecName; SI->getName(SecName);
115
116 if (isText) {
Ahmed Bougacha46937272013-08-21 07:28:32 +0000117 MCTextAtom *Text = 0;
118 MCDataAtom *InvalidData = 0;
119
Ahmed Bougachaef993562013-05-24 01:07:04 +0000120 uint64_t InstSize;
121 for (uint64_t Index = 0; Index < SecSize; Index += InstSize) {
Ahmed Bougacha46937272013-08-21 07:28:32 +0000122 const uint64_t CurAddr = StartAddr + Index;
Ahmed Bougachaef993562013-05-24 01:07:04 +0000123 MCInst Inst;
Ahmed Bougacha46937272013-08-21 07:28:32 +0000124 if (Dis.getInstruction(Inst, InstSize, memoryObject, CurAddr, nulls(),
125 nulls())) {
126 if (!Text) {
127 Text = Module->createTextAtom(CurAddr, CurAddr);
128 Text->setName(SecName);
129 }
Ahmed Bougachaef993562013-05-24 01:07:04 +0000130 Text->addInst(Inst, InstSize);
Ahmed Bougacha46937272013-08-21 07:28:32 +0000131 InvalidData = 0;
132 } else {
133 if (!InvalidData) {
134 Text = 0;
135 InvalidData = Module->createDataAtom(CurAddr, EndAddr);
136 }
137 InvalidData->addData(Contents[Index]);
138 }
Ahmed Bougachaef993562013-05-24 01:07:04 +0000139 }
Ahmed Bougachaef993562013-05-24 01:07:04 +0000140 } else {
141 MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr);
142 Data->setName(SecName);
143 for (uint64_t Index = 0; Index < SecSize; ++Index)
144 Data->addData(Contents[Index]);
145 }
146 }
147}
148
149namespace {
150 struct BBInfo;
151 typedef std::set<BBInfo*> BBInfoSetTy;
152
153 struct BBInfo {
154 MCTextAtom *Atom;
155 MCBasicBlock *BB;
156 BBInfoSetTy Succs;
157 BBInfoSetTy Preds;
158
Ahmed Bougacha46937272013-08-21 07:28:32 +0000159 BBInfo() : Atom(0), BB(0) {}
160
Ahmed Bougachaef993562013-05-24 01:07:04 +0000161 void addSucc(BBInfo &Succ) {
162 Succs.insert(&Succ);
163 Succ.Preds.insert(this);
164 }
165 };
166}
167
168void MCObjectDisassembler::buildCFG(MCModule *Module) {
169 typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
170 BBInfoByAddrTy BBInfos;
171 typedef std::set<uint64_t> AddressSetTy;
172 AddressSetTy Splits;
173 AddressSetTy Calls;
174
Ahmed Bougacha0a30ccc2013-08-21 07:28:29 +0000175 error_code ec;
176 for (symbol_iterator SI = Obj.begin_symbols(), SE = Obj.end_symbols();
177 SI != SE; SI.increment(ec)) {
178 if (ec)
179 break;
180 SymbolRef::Type SymType;
181 SI->getType(SymType);
182 if (SymType == SymbolRef::ST_Function) {
183 uint64_t SymAddr;
184 SI->getAddress(SymAddr);
Ahmed Bougacha484a6eb2013-08-21 07:28:37 +0000185 SymAddr = getEffectiveLoadAddr(SymAddr);
Ahmed Bougacha0a30ccc2013-08-21 07:28:29 +0000186 Calls.insert(SymAddr);
187 Splits.insert(SymAddr);
188 }
189 }
190
Ahmed Bougachaef993562013-05-24 01:07:04 +0000191 assert(Module->func_begin() == Module->func_end()
192 && "Module already has a CFG!");
193
194 // First, determine the basic block boundaries and call targets.
195 for (MCModule::atom_iterator AI = Module->atom_begin(),
196 AE = Module->atom_end();
197 AI != AE; ++AI) {
198 MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
199 if (!TA) continue;
200 Calls.insert(TA->getBeginAddr());
Ahmed Bougacha7ab184a2013-06-19 20:18:59 +0000201 BBInfos[TA->getBeginAddr()].Atom = TA;
Ahmed Bougachaef993562013-05-24 01:07:04 +0000202 for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end();
203 II != IE; ++II) {
204 if (MIA.isTerminator(II->Inst))
205 Splits.insert(II->Address + II->Size);
206 uint64_t Target;
207 if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) {
208 if (MIA.isCall(II->Inst))
209 Calls.insert(Target);
210 Splits.insert(Target);
211 }
212 }
213 }
214
215 // Split text atoms into basic block atoms.
216 for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end();
217 SI != SE; ++SI) {
218 MCAtom *A = Module->findAtomContaining(*SI);
219 if (!A) continue;
220 MCTextAtom *TA = cast<MCTextAtom>(A);
Ahmed Bougachaef993562013-05-24 01:07:04 +0000221 if (TA->getBeginAddr() == *SI)
222 continue;
223 MCTextAtom *NewAtom = TA->split(*SI);
224 BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom;
225 StringRef BBName = TA->getName();
226 BBName = BBName.substr(0, BBName.find_last_of(':'));
227 NewAtom->setName((BBName + ":" + utohexstr(*SI)).str());
228 }
229
230 // Compute succs/preds.
231 for (MCModule::atom_iterator AI = Module->atom_begin(),
232 AE = Module->atom_end();
233 AI != AE; ++AI) {
234 MCTextAtom *TA = dyn_cast<MCTextAtom>(*AI);
235 if (!TA) continue;
236 BBInfo &CurBB = BBInfos[TA->getBeginAddr()];
237 const MCDecodedInst &LI = TA->back();
238 if (MIA.isBranch(LI.Inst)) {
239 uint64_t Target;
240 if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target))
241 CurBB.addSucc(BBInfos[Target]);
242 if (MIA.isConditionalBranch(LI.Inst))
243 CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
244 } else if (!MIA.isTerminator(LI.Inst))
245 CurBB.addSucc(BBInfos[LI.Address + LI.Size]);
246 }
247
248
249 // Create functions and basic blocks.
250 for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end();
251 CI != CE; ++CI) {
252 BBInfo &BBI = BBInfos[*CI];
253 if (!BBI.Atom) continue;
254
255 MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName());
256
257 // Create MCBBs.
258 SmallSetVector<BBInfo*, 16> Worklist;
259 Worklist.insert(&BBI);
Ahmed Bougacha46937272013-08-21 07:28:32 +0000260 for (size_t wi = 0; wi < Worklist.size(); ++wi) {
261 BBInfo *BBI = Worklist[wi];
Ahmed Bougachaef993562013-05-24 01:07:04 +0000262 if (!BBI->Atom)
263 continue;
264 BBI->BB = &MCFN.createBlock(*BBI->Atom);
265 // Add all predecessors and successors to the worklist.
266 for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
267 SI != SE; ++SI)
268 Worklist.insert(*SI);
269 for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
270 PI != PE; ++PI)
271 Worklist.insert(*PI);
272 }
273
274 // Set preds/succs.
Ahmed Bougacha46937272013-08-21 07:28:32 +0000275 for (size_t wi = 0; wi < Worklist.size(); ++wi) {
276 BBInfo *BBI = Worklist[wi];
Ahmed Bougachaef993562013-05-24 01:07:04 +0000277 MCBasicBlock *MCBB = BBI->BB;
278 if (!MCBB)
279 continue;
280 for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
Ahmed Bougacha46937272013-08-21 07:28:32 +0000281 SI != SE; ++SI)
282 if ((*SI)->BB)
283 MCBB->addSuccessor((*SI)->BB);
Ahmed Bougachaef993562013-05-24 01:07:04 +0000284 for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
Ahmed Bougacha46937272013-08-21 07:28:32 +0000285 PI != PE; ++PI)
286 if ((*PI)->BB)
287 MCBB->addPredecessor((*PI)->BB);
Ahmed Bougachaef993562013-05-24 01:07:04 +0000288 }
289 }
290}
Ahmed Bougacha0e83b902013-08-21 07:28:44 +0000291
292// MachO MCObjectDisassembler implementation.
293
294MCMachOObjectDisassembler::MCMachOObjectDisassembler(
295 const MachOObjectFile &MOOF, const MCDisassembler &Dis,
296 const MCInstrAnalysis &MIA, uint64_t VMAddrSlide,
297 uint64_t HeaderLoadAddress)
298 : MCObjectDisassembler(MOOF, Dis, MIA), MOOF(MOOF),
299 VMAddrSlide(VMAddrSlide), HeaderLoadAddress(HeaderLoadAddress) {
300
301 error_code ec;
302 for (section_iterator SI = MOOF.begin_sections(), SE = MOOF.end_sections();
303 SI != SE; SI.increment(ec)) {
304 if (ec)
305 break;
306 StringRef Name;
307 SI->getName(Name);
308 // FIXME: We should use the S_ section type instead of the name.
309 if (Name == "__mod_init_func") {
310 DEBUG(dbgs() << "Found __mod_init_func section!\n");
311 SI->getContents(ModInitContents);
312 } else if (Name == "__mod_exit_func") {
313 DEBUG(dbgs() << "Found __mod_exit_func section!\n");
314 SI->getContents(ModExitContents);
315 }
316 }
317}
318
319// FIXME: Only do the translations for addresses actually inside the object.
320uint64_t MCMachOObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) {
321 return Addr + VMAddrSlide;
322}
323
324uint64_t
325MCMachOObjectDisassembler::getOriginalLoadAddr(uint64_t EffectiveAddr) {
326 return EffectiveAddr - VMAddrSlide;
327}
328
329uint64_t MCMachOObjectDisassembler::getEntrypoint() {
330 uint64_t EntryFileOffset = 0;
331
332 // Look for LC_MAIN.
333 {
334 uint32_t LoadCommandCount = MOOF.getHeader().NumLoadCommands;
335 MachOObjectFile::LoadCommandInfo Load = MOOF.getFirstLoadCommandInfo();
336 for (unsigned I = 0;; ++I) {
337 if (Load.C.Type == MachO::LoadCommandMain) {
338 EntryFileOffset =
339 ((const MachO::entry_point_command *)Load.Ptr)->entryoff;
340 break;
341 }
342
343 if (I == LoadCommandCount - 1)
344 break;
345 else
346 Load = MOOF.getNextLoadCommandInfo(Load);
347 }
348 }
349
350 // If we didn't find anything, default to the common implementation.
351 // FIXME: Maybe we could also look at LC_UNIXTHREAD and friends?
352 if (EntryFileOffset)
353 return MCObjectDisassembler::getEntrypoint();
354
355 return EntryFileOffset + HeaderLoadAddress;
356}
357
358ArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticInitFunctions() {
359 // FIXME: We only handle 64bit mach-o
360 assert(MOOF.is64Bit());
361
362 size_t EntrySize = 8;
363 size_t EntryCount = ModInitContents.size() / EntrySize;
364 return ArrayRef<uint64_t>(
365 reinterpret_cast<const uint64_t *>(ModInitContents.data()), EntryCount);
366}
367
368ArrayRef<uint64_t> MCMachOObjectDisassembler::getStaticExitFunctions() {
369 // FIXME: We only handle 64bit mach-o
370 assert(MOOF.is64Bit());
371
372 size_t EntrySize = 8;
373 size_t EntryCount = ModExitContents.size() / EntrySize;
374 return ArrayRef<uint64_t>(
375 reinterpret_cast<const uint64_t *>(ModExitContents.data()), EntryCount);
376}