blob: 5a59e34f5764623aa818961046271b9831f726b9 [file] [log] [blame]
Sean Callananee5dfd42010-02-01 08:49:35 +00001//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the Enhanced Disassembly library's disassembler class.
11// The disassembler is responsible for vending individual instructions according
12// to a given architecture and disassembly syntax.
13//
14//===----------------------------------------------------------------------===//
15
Sean Callanan8f993b82010-04-08 00:48:21 +000016#include "EDDisassembler.h"
17#include "EDInst.h"
18
Sean Callananee5dfd42010-02-01 08:49:35 +000019#include "llvm/ADT/OwningPtr.h"
20#include "llvm/ADT/SmallVector.h"
Sean Callanan9899f702010-04-13 21:21:57 +000021#include "llvm/MC/EDInstInfo.h"
Sean Callananee5dfd42010-02-01 08:49:35 +000022#include "llvm/MC/MCAsmInfo.h"
23#include "llvm/MC/MCContext.h"
24#include "llvm/MC/MCDisassembler.h"
25#include "llvm/MC/MCExpr.h"
26#include "llvm/MC/MCInst.h"
27#include "llvm/MC/MCInstPrinter.h"
28#include "llvm/MC/MCStreamer.h"
29#include "llvm/MC/MCParser/AsmLexer.h"
Sean Callananee5dfd42010-02-01 08:49:35 +000030#include "llvm/MC/MCParser/MCAsmParser.h"
31#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
32#include "llvm/Support/MemoryBuffer.h"
33#include "llvm/Support/MemoryObject.h"
34#include "llvm/Support/SourceMgr.h"
35#include "llvm/Target/TargetAsmLexer.h"
36#include "llvm/Target/TargetAsmParser.h"
37#include "llvm/Target/TargetRegistry.h"
38#include "llvm/Target/TargetMachine.h"
39#include "llvm/Target/TargetRegisterInfo.h"
40#include "llvm/Target/TargetSelect.h"
41
Sean Callananee5dfd42010-02-01 08:49:35 +000042using namespace llvm;
43
44bool EDDisassembler::sInitialized = false;
45EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
46
Sean Callanan9899f702010-04-13 21:21:57 +000047struct TripleMap {
Sean Callananee5dfd42010-02-01 08:49:35 +000048 Triple::ArchType Arch;
49 const char *String;
Sean Callananee5dfd42010-02-01 08:49:35 +000050};
51
Sean Callanan9899f702010-04-13 21:21:57 +000052static struct TripleMap triplemap[] = {
53 { Triple::x86, "i386-unknown-unknown" },
54 { Triple::x86_64, "x86_64-unknown-unknown" },
55 { Triple::arm, "arm-unknown-unknown" },
56 { Triple::thumb, "thumb-unknown-unknown" },
57 { Triple::InvalidArch, NULL, }
Sean Callananee5dfd42010-02-01 08:49:35 +000058};
59
Sean Callanan9899f702010-04-13 21:21:57 +000060/// infoFromArch - Returns the TripleMap corresponding to a given architecture,
Sean Callananee5dfd42010-02-01 08:49:35 +000061/// or NULL if there is an error
62///
63/// @arg arch - The Triple::ArchType for the desired architecture
Sean Callanan9899f702010-04-13 21:21:57 +000064static const char *tripleFromArch(Triple::ArchType arch) {
Sean Callananee5dfd42010-02-01 08:49:35 +000065 unsigned int infoIndex;
66
Sean Callanan9899f702010-04-13 21:21:57 +000067 for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) {
68 if (arch == triplemap[infoIndex].Arch)
69 return triplemap[infoIndex].String;
Sean Callananee5dfd42010-02-01 08:49:35 +000070 }
71
72 return NULL;
73}
74
75/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer
76/// for the desired assembly syntax, suitable for passing to
77/// Target::createMCInstPrinter()
78///
79/// @arg arch - The target architecture
80/// @arg syntax - The assembly syntax in sd form
81static int getLLVMSyntaxVariant(Triple::ArchType arch,
82 EDAssemblySyntax_t syntax) {
83 switch (syntax) {
84 default:
85 return -1;
86 // Mappings below from X86AsmPrinter.cpp
87 case kEDAssemblySyntaxX86ATT:
88 if (arch == Triple::x86 || arch == Triple::x86_64)
89 return 0;
90 else
91 return -1;
92 case kEDAssemblySyntaxX86Intel:
93 if (arch == Triple::x86 || arch == Triple::x86_64)
94 return 1;
95 else
96 return -1;
Sean Callanan8f993b82010-04-08 00:48:21 +000097 case kEDAssemblySyntaxARMUAL:
98 if (arch == Triple::arm || arch == Triple::thumb)
99 return 0;
100 else
101 return -1;
Sean Callananee5dfd42010-02-01 08:49:35 +0000102 }
103}
104
Sean Callananee5dfd42010-02-01 08:49:35 +0000105void EDDisassembler::initialize() {
106 if (sInitialized)
107 return;
108
109 sInitialized = true;
110
Sean Callanan9899f702010-04-13 21:21:57 +0000111 InitializeAllTargetInfos();
112 InitializeAllTargets();
113 InitializeAllAsmPrinters();
114 InitializeAllAsmParsers();
115 InitializeAllDisassemblers();
Sean Callananee5dfd42010-02-01 08:49:35 +0000116}
117
118#undef BRINGUP_TARGET
119
120EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
121 EDAssemblySyntax_t syntax) {
122 CPUKey key;
123 key.Arch = arch;
124 key.Syntax = syntax;
125
126 EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
127
128 if (i != sDisassemblers.end()) {
129 return i->second;
Sean Callanan8f993b82010-04-08 00:48:21 +0000130 } else {
Sean Callananee5dfd42010-02-01 08:49:35 +0000131 EDDisassembler* sdd = new EDDisassembler(key);
Sean Callanan8f993b82010-04-08 00:48:21 +0000132 if (!sdd->valid()) {
Sean Callananee5dfd42010-02-01 08:49:35 +0000133 delete sdd;
134 return NULL;
135 }
136
137 sDisassemblers[key] = sdd;
138
139 return sdd;
140 }
141
142 return NULL;
143}
144
145EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
146 EDAssemblySyntax_t syntax) {
147 Triple triple(str);
148
149 return getDisassembler(triple.getArch(), syntax);
150}
151
Sean Callananee5dfd42010-02-01 08:49:35 +0000152EDDisassembler::EDDisassembler(CPUKey &key) :
Sean Callanan8f993b82010-04-08 00:48:21 +0000153 Valid(false),
154 HasSemantics(false),
155 ErrorStream(nulls()),
156 Key(key) {
Sean Callanan9899f702010-04-13 21:21:57 +0000157 const char *triple = tripleFromArch(key.Arch);
Sean Callanan8f993b82010-04-08 00:48:21 +0000158
Sean Callanan9899f702010-04-13 21:21:57 +0000159 if (!triple)
160 return;
Sean Callananee5dfd42010-02-01 08:49:35 +0000161
Sean Callanan4285b292010-04-09 00:11:15 +0000162 LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
Sean Callananee5dfd42010-02-01 08:49:35 +0000163
Sean Callanan4285b292010-04-09 00:11:15 +0000164 if (LLVMSyntaxVariant < 0)
Sean Callananee5dfd42010-02-01 08:49:35 +0000165 return;
166
167 std::string tripleString(triple);
168 std::string errorString;
169
170 Tgt = TargetRegistry::lookupTarget(tripleString,
171 errorString);
172
173 if (!Tgt)
174 return;
175
176 std::string featureString;
177
178 OwningPtr<const TargetMachine>
179 targetMachine(Tgt->createTargetMachine(tripleString,
180 featureString));
181
182 const TargetRegisterInfo *registerInfo = targetMachine->getRegisterInfo();
183
184 if (!registerInfo)
185 return;
Sean Callanan8f993b82010-04-08 00:48:21 +0000186
187 initMaps(*registerInfo);
Sean Callananee5dfd42010-02-01 08:49:35 +0000188
189 AsmInfo.reset(Tgt->createAsmInfo(tripleString));
190
191 if (!AsmInfo)
192 return;
193
194 Disassembler.reset(Tgt->createMCDisassembler());
195
196 if (!Disassembler)
197 return;
Sean Callanan9899f702010-04-13 21:21:57 +0000198
199 InstInfos = Disassembler->getEDInfo();
Sean Callananee5dfd42010-02-01 08:49:35 +0000200
201 InstString.reset(new std::string);
202 InstStream.reset(new raw_string_ostream(*InstString));
Sean Callanan4285b292010-04-09 00:11:15 +0000203 InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
Sean Callananee5dfd42010-02-01 08:49:35 +0000204
205 if (!InstPrinter)
206 return;
207
208 GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
209 SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo));
210 SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
Sean Callanand74667e2010-02-02 02:18:20 +0000211
212 initMaps(*targetMachine->getRegisterInfo());
Sean Callananee5dfd42010-02-01 08:49:35 +0000213
214 Valid = true;
215}
216
217EDDisassembler::~EDDisassembler() {
Sean Callanan8f993b82010-04-08 00:48:21 +0000218 if (!valid())
Sean Callananee5dfd42010-02-01 08:49:35 +0000219 return;
220}
221
222namespace {
223 /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback
224 /// as provided by the sd interface. See MemoryObject.
225 class EDMemoryObject : public llvm::MemoryObject {
226 private:
227 EDByteReaderCallback Callback;
228 void *Arg;
229 public:
230 EDMemoryObject(EDByteReaderCallback callback,
231 void *arg) : Callback(callback), Arg(arg) { }
232 ~EDMemoryObject() { }
233 uint64_t getBase() const { return 0x0; }
234 uint64_t getExtent() const { return (uint64_t)-1; }
235 int readByte(uint64_t address, uint8_t *ptr) const {
Sean Callanan8f993b82010-04-08 00:48:21 +0000236 if (!Callback)
Sean Callananee5dfd42010-02-01 08:49:35 +0000237 return -1;
238
Sean Callanan8f993b82010-04-08 00:48:21 +0000239 if (Callback(ptr, address, Arg))
Sean Callananee5dfd42010-02-01 08:49:35 +0000240 return -1;
241
242 return 0;
243 }
244 };
245}
246
247EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
248 uint64_t address,
249 void *arg) {
250 EDMemoryObject memoryObject(byteReader, arg);
251
252 MCInst* inst = new MCInst;
253 uint64_t byteSize;
254
255 if (!Disassembler->getInstruction(*inst,
256 byteSize,
257 memoryObject,
258 address,
259 ErrorStream)) {
260 delete inst;
261 return NULL;
Sean Callanan8f993b82010-04-08 00:48:21 +0000262 } else {
Sean Callanan9899f702010-04-13 21:21:57 +0000263 const llvm::EDInstInfo *thisInstInfo;
Sean Callanan8f993b82010-04-08 00:48:21 +0000264
265 thisInstInfo = &InstInfos[inst->getOpcode()];
Sean Callananee5dfd42010-02-01 08:49:35 +0000266
267 EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
268 return sdInst;
269 }
270}
271
272void EDDisassembler::initMaps(const TargetRegisterInfo &registerInfo) {
273 unsigned numRegisters = registerInfo.getNumRegs();
274 unsigned registerIndex;
275
276 for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) {
277 const char* registerName = registerInfo.get(registerIndex).Name;
278
279 RegVec.push_back(registerName);
280 RegRMap[registerName] = registerIndex;
281 }
282
Sean Callanan8f993b82010-04-08 00:48:21 +0000283 switch (Key.Arch) {
284 default:
285 break;
286 case Triple::x86:
287 case Triple::x86_64:
Sean Callananee5dfd42010-02-01 08:49:35 +0000288 stackPointers.insert(registerIDWithName("SP"));
289 stackPointers.insert(registerIDWithName("ESP"));
290 stackPointers.insert(registerIDWithName("RSP"));
291
292 programCounters.insert(registerIDWithName("IP"));
293 programCounters.insert(registerIDWithName("EIP"));
294 programCounters.insert(registerIDWithName("RIP"));
Sean Callanan8f993b82010-04-08 00:48:21 +0000295 break;
296 case Triple::arm:
297 case Triple::thumb:
298 stackPointers.insert(registerIDWithName("SP"));
299
300 programCounters.insert(registerIDWithName("PC"));
301 break;
Sean Callananee5dfd42010-02-01 08:49:35 +0000302 }
303}
304
305const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const {
306 if (registerID >= RegVec.size())
307 return NULL;
308 else
309 return RegVec[registerID].c_str();
310}
311
312unsigned EDDisassembler::registerIDWithName(const char *name) const {
313 regrmap_t::const_iterator iter = RegRMap.find(std::string(name));
314 if (iter == RegRMap.end())
315 return 0;
316 else
317 return (*iter).second;
318}
319
320bool EDDisassembler::registerIsStackPointer(unsigned registerID) {
321 return (stackPointers.find(registerID) != stackPointers.end());
322}
323
324bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
325 return (programCounters.find(registerID) != programCounters.end());
326}
327
Chris Lattnerd3740872010-04-04 05:04:31 +0000328int EDDisassembler::printInst(std::string &str, MCInst &inst) {
Sean Callananee5dfd42010-02-01 08:49:35 +0000329 PrinterMutex.acquire();
330
Chris Lattnerd3740872010-04-04 05:04:31 +0000331 InstPrinter->printInst(&inst, *InstStream);
Sean Callananee5dfd42010-02-01 08:49:35 +0000332 InstStream->flush();
333 str = *InstString;
334 InstString->clear();
335
336 PrinterMutex.release();
337
338 return 0;
339}
340
341int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
342 SmallVectorImpl<AsmToken> &tokens,
343 const std::string &str) {
344 int ret = 0;
345
Sean Callanan8f993b82010-04-08 00:48:21 +0000346 switch (Key.Arch) {
347 default:
348 return -1;
349 case Triple::x86:
350 case Triple::x86_64:
351 case Triple::arm:
352 case Triple::thumb:
353 break;
354 }
355
Sean Callananee5dfd42010-02-01 08:49:35 +0000356 const char *cStr = str.c_str();
357 MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
358
359 StringRef instName;
360 SMLoc instLoc;
361
362 SourceMgr sourceMgr;
363 sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
Chris Lattnerc18409a2010-03-11 22:53:35 +0000364 MCContext context(*AsmInfo);
365 OwningPtr<MCStreamer> streamer(createNullStreamer(context));
Daniel Dunbar9fbb37e2010-07-18 18:31:33 +0000366 OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr,
367 context, *streamer,
368 *AsmInfo));
369 OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*genericParser));
Sean Callananee5dfd42010-02-01 08:49:35 +0000370
Daniel Dunbar9fbb37e2010-07-18 18:31:33 +0000371 AsmToken OpcodeToken = genericParser->Lex();
372 AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to
Sean Callanana8702562010-04-24 01:00:16 +0000373
Sean Callanan8f993b82010-04-08 00:48:21 +0000374 if (OpcodeToken.is(AsmToken::Identifier)) {
Sean Callananee5dfd42010-02-01 08:49:35 +0000375 instName = OpcodeToken.getString();
376 instLoc = OpcodeToken.getLoc();
Sean Callanana8702562010-04-24 01:00:16 +0000377
378 if (NextToken.isNot(AsmToken::Eof) &&
379 TargetParser->ParseInstruction(instName, instLoc, operands))
Sean Callananee5dfd42010-02-01 08:49:35 +0000380 ret = -1;
Sean Callanan8f993b82010-04-08 00:48:21 +0000381 } else {
Sean Callananee5dfd42010-02-01 08:49:35 +0000382 ret = -1;
383 }
384
Sean Callananee5dfd42010-02-01 08:49:35 +0000385 ParserMutex.acquire();
386
387 if (!ret) {
388 GenericAsmLexer->setBuffer(buf);
389
390 while (SpecificAsmLexer->Lex(),
391 SpecificAsmLexer->isNot(AsmToken::Eof) &&
392 SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) {
393 if (SpecificAsmLexer->is(AsmToken::Error)) {
394 ret = -1;
395 break;
396 }
397 tokens.push_back(SpecificAsmLexer->getTok());
398 }
399 }
400
401 ParserMutex.release();
402
403 return ret;
404}
405
406int EDDisassembler::llvmSyntaxVariant() const {
407 return LLVMSyntaxVariant;
408}