blob: a766d2fc2b4eb6657b3e953c8530fcc588afcbe9 [file] [log] [blame]
Sean Callananee5dfd42010-02-01 08:49:35 +00001//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the Enhanced Disassembly library's disassembler class.
11// The disassembler is responsible for vending individual instructions according
12// to a given architecture and disassembly syntax.
13//
14//===----------------------------------------------------------------------===//
15
Sean Callanan8f993b82010-04-08 00:48:21 +000016#include "EDDisassembler.h"
17#include "EDInst.h"
18
Sean Callananee5dfd42010-02-01 08:49:35 +000019#include "llvm/ADT/OwningPtr.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/MC/MCAsmInfo.h"
22#include "llvm/MC/MCContext.h"
23#include "llvm/MC/MCDisassembler.h"
24#include "llvm/MC/MCExpr.h"
25#include "llvm/MC/MCInst.h"
26#include "llvm/MC/MCInstPrinter.h"
27#include "llvm/MC/MCStreamer.h"
28#include "llvm/MC/MCParser/AsmLexer.h"
29#include "llvm/MC/MCParser/AsmParser.h"
30#include "llvm/MC/MCParser/MCAsmParser.h"
31#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
32#include "llvm/Support/MemoryBuffer.h"
33#include "llvm/Support/MemoryObject.h"
34#include "llvm/Support/SourceMgr.h"
35#include "llvm/Target/TargetAsmLexer.h"
36#include "llvm/Target/TargetAsmParser.h"
37#include "llvm/Target/TargetRegistry.h"
38#include "llvm/Target/TargetMachine.h"
39#include "llvm/Target/TargetRegisterInfo.h"
40#include "llvm/Target/TargetSelect.h"
41
Sean Callanan35a3d3f2010-04-10 00:48:10 +000042#ifdef EDIS_X86
Sean Callananee5dfd42010-02-01 08:49:35 +000043#include "../../lib/Target/X86/X86GenEDInfo.inc"
Sean Callanan35a3d3f2010-04-10 00:48:10 +000044#endif
45
46#ifdef EDIS_ARM
Sean Callanan8f993b82010-04-08 00:48:21 +000047#include "../../lib/Target/ARM/ARMGenEDInfo.inc"
Sean Callanan35a3d3f2010-04-10 00:48:10 +000048#endif
Sean Callananee5dfd42010-02-01 08:49:35 +000049
50using namespace llvm;
51
52bool EDDisassembler::sInitialized = false;
53EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
54
55struct InfoMap {
56 Triple::ArchType Arch;
57 const char *String;
58 const InstInfo *Info;
59};
60
61static struct InfoMap infomap[] = {
Sean Callanan35a3d3f2010-04-10 00:48:10 +000062#ifdef EDIS_X86
Sean Callananee5dfd42010-02-01 08:49:35 +000063 { Triple::x86, "i386-unknown-unknown", instInfoX86 },
64 { Triple::x86_64, "x86_64-unknown-unknown", instInfoX86 },
Sean Callanan35a3d3f2010-04-10 00:48:10 +000065#endif
66#ifdef EDIS_ARM
Sean Callanan8f993b82010-04-08 00:48:21 +000067 { Triple::arm, "arm-unknown-unknown", instInfoARM },
68 { Triple::thumb, "thumb-unknown-unknown", instInfoARM },
Sean Callanan35a3d3f2010-04-10 00:48:10 +000069#endif
Sean Callananee5dfd42010-02-01 08:49:35 +000070 { Triple::InvalidArch, NULL, NULL }
71};
72
73/// infoFromArch - Returns the InfoMap corresponding to a given architecture,
74/// or NULL if there is an error
75///
76/// @arg arch - The Triple::ArchType for the desired architecture
77static const InfoMap *infoFromArch(Triple::ArchType arch) {
78 unsigned int infoIndex;
79
80 for (infoIndex = 0; infomap[infoIndex].String != NULL; ++infoIndex) {
Sean Callanan8f993b82010-04-08 00:48:21 +000081 if (arch == infomap[infoIndex].Arch)
Sean Callananee5dfd42010-02-01 08:49:35 +000082 return &infomap[infoIndex];
83 }
84
85 return NULL;
86}
87
88/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer
89/// for the desired assembly syntax, suitable for passing to
90/// Target::createMCInstPrinter()
91///
92/// @arg arch - The target architecture
93/// @arg syntax - The assembly syntax in sd form
94static int getLLVMSyntaxVariant(Triple::ArchType arch,
95 EDAssemblySyntax_t syntax) {
96 switch (syntax) {
97 default:
98 return -1;
99 // Mappings below from X86AsmPrinter.cpp
100 case kEDAssemblySyntaxX86ATT:
101 if (arch == Triple::x86 || arch == Triple::x86_64)
102 return 0;
103 else
104 return -1;
105 case kEDAssemblySyntaxX86Intel:
106 if (arch == Triple::x86 || arch == Triple::x86_64)
107 return 1;
108 else
109 return -1;
Sean Callanan8f993b82010-04-08 00:48:21 +0000110 case kEDAssemblySyntaxARMUAL:
111 if (arch == Triple::arm || arch == Triple::thumb)
112 return 0;
113 else
114 return -1;
Sean Callananee5dfd42010-02-01 08:49:35 +0000115 }
116}
117
118#define BRINGUP_TARGET(tgt) \
119 LLVMInitialize##tgt##TargetInfo(); \
120 LLVMInitialize##tgt##Target(); \
121 LLVMInitialize##tgt##AsmPrinter(); \
122 LLVMInitialize##tgt##AsmParser(); \
123 LLVMInitialize##tgt##Disassembler();
124
125void EDDisassembler::initialize() {
126 if (sInitialized)
127 return;
128
129 sInitialized = true;
130
Sean Callanan35a3d3f2010-04-10 00:48:10 +0000131#ifdef EDIS_X86
Sean Callananee5dfd42010-02-01 08:49:35 +0000132 BRINGUP_TARGET(X86)
Sean Callanan35a3d3f2010-04-10 00:48:10 +0000133#endif
134#ifdef EDIS_ARM
Sean Callanan8f993b82010-04-08 00:48:21 +0000135 BRINGUP_TARGET(ARM)
Sean Callanan35a3d3f2010-04-10 00:48:10 +0000136#endif
Sean Callananee5dfd42010-02-01 08:49:35 +0000137}
138
139#undef BRINGUP_TARGET
140
141EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
142 EDAssemblySyntax_t syntax) {
143 CPUKey key;
144 key.Arch = arch;
145 key.Syntax = syntax;
146
147 EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
148
149 if (i != sDisassemblers.end()) {
150 return i->second;
Sean Callanan8f993b82010-04-08 00:48:21 +0000151 } else {
Sean Callananee5dfd42010-02-01 08:49:35 +0000152 EDDisassembler* sdd = new EDDisassembler(key);
Sean Callanan8f993b82010-04-08 00:48:21 +0000153 if (!sdd->valid()) {
Sean Callananee5dfd42010-02-01 08:49:35 +0000154 delete sdd;
155 return NULL;
156 }
157
158 sDisassemblers[key] = sdd;
159
160 return sdd;
161 }
162
163 return NULL;
164}
165
166EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
167 EDAssemblySyntax_t syntax) {
168 Triple triple(str);
169
170 return getDisassembler(triple.getArch(), syntax);
171}
172
Sean Callananee5dfd42010-02-01 08:49:35 +0000173EDDisassembler::EDDisassembler(CPUKey &key) :
Sean Callanan8f993b82010-04-08 00:48:21 +0000174 Valid(false),
175 HasSemantics(false),
176 ErrorStream(nulls()),
177 Key(key) {
Sean Callananee5dfd42010-02-01 08:49:35 +0000178 const InfoMap *infoMap = infoFromArch(key.Arch);
179
180 if (!infoMap)
181 return;
Sean Callanan8f993b82010-04-08 00:48:21 +0000182
183 InstInfos = infoMap->Info;
Sean Callananee5dfd42010-02-01 08:49:35 +0000184
185 const char *triple = infoMap->String;
186
Sean Callanan4285b292010-04-09 00:11:15 +0000187 LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
Sean Callananee5dfd42010-02-01 08:49:35 +0000188
Sean Callanan4285b292010-04-09 00:11:15 +0000189 if (LLVMSyntaxVariant < 0)
Sean Callananee5dfd42010-02-01 08:49:35 +0000190 return;
191
192 std::string tripleString(triple);
193 std::string errorString;
194
195 Tgt = TargetRegistry::lookupTarget(tripleString,
196 errorString);
197
198 if (!Tgt)
199 return;
200
201 std::string featureString;
202
203 OwningPtr<const TargetMachine>
204 targetMachine(Tgt->createTargetMachine(tripleString,
205 featureString));
206
207 const TargetRegisterInfo *registerInfo = targetMachine->getRegisterInfo();
208
209 if (!registerInfo)
210 return;
Sean Callanan8f993b82010-04-08 00:48:21 +0000211
212 initMaps(*registerInfo);
Sean Callananee5dfd42010-02-01 08:49:35 +0000213
214 AsmInfo.reset(Tgt->createAsmInfo(tripleString));
215
216 if (!AsmInfo)
217 return;
218
219 Disassembler.reset(Tgt->createMCDisassembler());
220
221 if (!Disassembler)
222 return;
223
224 InstString.reset(new std::string);
225 InstStream.reset(new raw_string_ostream(*InstString));
Sean Callanan4285b292010-04-09 00:11:15 +0000226 InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
Sean Callananee5dfd42010-02-01 08:49:35 +0000227
228 if (!InstPrinter)
229 return;
230
231 GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
232 SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo));
233 SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
234
235 InstInfos = infoMap->Info;
Sean Callanand74667e2010-02-02 02:18:20 +0000236
237 initMaps(*targetMachine->getRegisterInfo());
Sean Callananee5dfd42010-02-01 08:49:35 +0000238
239 Valid = true;
240}
241
242EDDisassembler::~EDDisassembler() {
Sean Callanan8f993b82010-04-08 00:48:21 +0000243 if (!valid())
Sean Callananee5dfd42010-02-01 08:49:35 +0000244 return;
245}
246
247namespace {
248 /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback
249 /// as provided by the sd interface. See MemoryObject.
250 class EDMemoryObject : public llvm::MemoryObject {
251 private:
252 EDByteReaderCallback Callback;
253 void *Arg;
254 public:
255 EDMemoryObject(EDByteReaderCallback callback,
256 void *arg) : Callback(callback), Arg(arg) { }
257 ~EDMemoryObject() { }
258 uint64_t getBase() const { return 0x0; }
259 uint64_t getExtent() const { return (uint64_t)-1; }
260 int readByte(uint64_t address, uint8_t *ptr) const {
Sean Callanan8f993b82010-04-08 00:48:21 +0000261 if (!Callback)
Sean Callananee5dfd42010-02-01 08:49:35 +0000262 return -1;
263
Sean Callanan8f993b82010-04-08 00:48:21 +0000264 if (Callback(ptr, address, Arg))
Sean Callananee5dfd42010-02-01 08:49:35 +0000265 return -1;
266
267 return 0;
268 }
269 };
270}
271
272EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
273 uint64_t address,
274 void *arg) {
275 EDMemoryObject memoryObject(byteReader, arg);
276
277 MCInst* inst = new MCInst;
278 uint64_t byteSize;
279
280 if (!Disassembler->getInstruction(*inst,
281 byteSize,
282 memoryObject,
283 address,
284 ErrorStream)) {
285 delete inst;
286 return NULL;
Sean Callanan8f993b82010-04-08 00:48:21 +0000287 } else {
288 const InstInfo *thisInstInfo;
289
290 thisInstInfo = &InstInfos[inst->getOpcode()];
Sean Callananee5dfd42010-02-01 08:49:35 +0000291
292 EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
293 return sdInst;
294 }
295}
296
297void EDDisassembler::initMaps(const TargetRegisterInfo &registerInfo) {
298 unsigned numRegisters = registerInfo.getNumRegs();
299 unsigned registerIndex;
300
301 for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) {
302 const char* registerName = registerInfo.get(registerIndex).Name;
303
304 RegVec.push_back(registerName);
305 RegRMap[registerName] = registerIndex;
306 }
307
Sean Callanan8f993b82010-04-08 00:48:21 +0000308 switch (Key.Arch) {
309 default:
310 break;
Sean Callanan35a3d3f2010-04-10 00:48:10 +0000311#ifdef EDIS_X86
Sean Callanan8f993b82010-04-08 00:48:21 +0000312 case Triple::x86:
313 case Triple::x86_64:
Sean Callananee5dfd42010-02-01 08:49:35 +0000314 stackPointers.insert(registerIDWithName("SP"));
315 stackPointers.insert(registerIDWithName("ESP"));
316 stackPointers.insert(registerIDWithName("RSP"));
317
318 programCounters.insert(registerIDWithName("IP"));
319 programCounters.insert(registerIDWithName("EIP"));
320 programCounters.insert(registerIDWithName("RIP"));
Sean Callanan8f993b82010-04-08 00:48:21 +0000321 break;
Sean Callanan35a3d3f2010-04-10 00:48:10 +0000322#endif
323#ifdef EDIS_ARM
Sean Callanan8f993b82010-04-08 00:48:21 +0000324 case Triple::arm:
325 case Triple::thumb:
326 stackPointers.insert(registerIDWithName("SP"));
327
328 programCounters.insert(registerIDWithName("PC"));
329 break;
Sean Callanan35a3d3f2010-04-10 00:48:10 +0000330#endif
Sean Callananee5dfd42010-02-01 08:49:35 +0000331 }
332}
333
334const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const {
335 if (registerID >= RegVec.size())
336 return NULL;
337 else
338 return RegVec[registerID].c_str();
339}
340
341unsigned EDDisassembler::registerIDWithName(const char *name) const {
342 regrmap_t::const_iterator iter = RegRMap.find(std::string(name));
343 if (iter == RegRMap.end())
344 return 0;
345 else
346 return (*iter).second;
347}
348
349bool EDDisassembler::registerIsStackPointer(unsigned registerID) {
350 return (stackPointers.find(registerID) != stackPointers.end());
351}
352
353bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
354 return (programCounters.find(registerID) != programCounters.end());
355}
356
Chris Lattnerd3740872010-04-04 05:04:31 +0000357int EDDisassembler::printInst(std::string &str, MCInst &inst) {
Sean Callananee5dfd42010-02-01 08:49:35 +0000358 PrinterMutex.acquire();
359
Chris Lattnerd3740872010-04-04 05:04:31 +0000360 InstPrinter->printInst(&inst, *InstStream);
Sean Callananee5dfd42010-02-01 08:49:35 +0000361 InstStream->flush();
362 str = *InstString;
363 InstString->clear();
364
365 PrinterMutex.release();
366
367 return 0;
368}
369
370int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
371 SmallVectorImpl<AsmToken> &tokens,
372 const std::string &str) {
373 int ret = 0;
374
Sean Callanan8f993b82010-04-08 00:48:21 +0000375 switch (Key.Arch) {
376 default:
377 return -1;
378 case Triple::x86:
379 case Triple::x86_64:
380 case Triple::arm:
381 case Triple::thumb:
382 break;
383 }
384
Sean Callananee5dfd42010-02-01 08:49:35 +0000385 const char *cStr = str.c_str();
386 MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
387
388 StringRef instName;
389 SMLoc instLoc;
390
391 SourceMgr sourceMgr;
392 sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
Chris Lattnerc18409a2010-03-11 22:53:35 +0000393 MCContext context(*AsmInfo);
394 OwningPtr<MCStreamer> streamer(createNullStreamer(context));
Sean Callananee5dfd42010-02-01 08:49:35 +0000395 AsmParser genericParser(sourceMgr, context, *streamer, *AsmInfo);
Chris Lattnerc18409a2010-03-11 22:53:35 +0000396 OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(genericParser));
Sean Callananee5dfd42010-02-01 08:49:35 +0000397
398 AsmToken OpcodeToken = genericParser.Lex();
Sean Callanan8f993b82010-04-08 00:48:21 +0000399 genericParser.Lex(); // consume next token, because specificParser expects us to
Sean Callananee5dfd42010-02-01 08:49:35 +0000400
Sean Callanan8f993b82010-04-08 00:48:21 +0000401 if (OpcodeToken.is(AsmToken::Identifier)) {
Sean Callananee5dfd42010-02-01 08:49:35 +0000402 instName = OpcodeToken.getString();
403 instLoc = OpcodeToken.getLoc();
Chris Lattnerc18409a2010-03-11 22:53:35 +0000404 if (TargetParser->ParseInstruction(instName, instLoc, operands))
Sean Callananee5dfd42010-02-01 08:49:35 +0000405 ret = -1;
Sean Callanan8f993b82010-04-08 00:48:21 +0000406 } else {
Sean Callananee5dfd42010-02-01 08:49:35 +0000407 ret = -1;
408 }
409
Sean Callananee5dfd42010-02-01 08:49:35 +0000410 ParserMutex.acquire();
411
412 if (!ret) {
413 GenericAsmLexer->setBuffer(buf);
414
415 while (SpecificAsmLexer->Lex(),
416 SpecificAsmLexer->isNot(AsmToken::Eof) &&
417 SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) {
418 if (SpecificAsmLexer->is(AsmToken::Error)) {
419 ret = -1;
420 break;
421 }
422 tokens.push_back(SpecificAsmLexer->getTok());
423 }
424 }
425
426 ParserMutex.release();
427
428 return ret;
429}
430
431int EDDisassembler::llvmSyntaxVariant() const {
432 return LLVMSyntaxVariant;
433}