blob: 00b5d8d33a0052f23f4088cc66ab2dac7447ff07 [file] [log] [blame]
Sean Callananee5dfd42010-02-01 08:49:35 +00001//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the Enhanced Disassembly library's disassembler class.
11// The disassembler is responsible for vending individual instructions according
12// to a given architecture and disassembly syntax.
13//
14//===----------------------------------------------------------------------===//
15
Sean Callanan8f993b82010-04-08 00:48:21 +000016#include "EDDisassembler.h"
17#include "EDInst.h"
18
Sean Callananee5dfd42010-02-01 08:49:35 +000019#include "llvm/ADT/OwningPtr.h"
20#include "llvm/ADT/SmallVector.h"
Sean Callanan9899f702010-04-13 21:21:57 +000021#include "llvm/MC/EDInstInfo.h"
Sean Callananee5dfd42010-02-01 08:49:35 +000022#include "llvm/MC/MCAsmInfo.h"
23#include "llvm/MC/MCContext.h"
24#include "llvm/MC/MCDisassembler.h"
25#include "llvm/MC/MCExpr.h"
26#include "llvm/MC/MCInst.h"
27#include "llvm/MC/MCInstPrinter.h"
28#include "llvm/MC/MCStreamer.h"
29#include "llvm/MC/MCParser/AsmLexer.h"
30#include "llvm/MC/MCParser/AsmParser.h"
31#include "llvm/MC/MCParser/MCAsmParser.h"
32#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
33#include "llvm/Support/MemoryBuffer.h"
34#include "llvm/Support/MemoryObject.h"
35#include "llvm/Support/SourceMgr.h"
36#include "llvm/Target/TargetAsmLexer.h"
37#include "llvm/Target/TargetAsmParser.h"
38#include "llvm/Target/TargetRegistry.h"
39#include "llvm/Target/TargetMachine.h"
40#include "llvm/Target/TargetRegisterInfo.h"
41#include "llvm/Target/TargetSelect.h"
42
Sean Callananee5dfd42010-02-01 08:49:35 +000043using namespace llvm;
44
45bool EDDisassembler::sInitialized = false;
46EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
47
Sean Callanan9899f702010-04-13 21:21:57 +000048struct TripleMap {
Sean Callananee5dfd42010-02-01 08:49:35 +000049 Triple::ArchType Arch;
50 const char *String;
Sean Callananee5dfd42010-02-01 08:49:35 +000051};
52
Sean Callanan9899f702010-04-13 21:21:57 +000053static struct TripleMap triplemap[] = {
54 { Triple::x86, "i386-unknown-unknown" },
55 { Triple::x86_64, "x86_64-unknown-unknown" },
56 { Triple::arm, "arm-unknown-unknown" },
57 { Triple::thumb, "thumb-unknown-unknown" },
58 { Triple::InvalidArch, NULL, }
Sean Callananee5dfd42010-02-01 08:49:35 +000059};
60
Sean Callanan9899f702010-04-13 21:21:57 +000061/// infoFromArch - Returns the TripleMap corresponding to a given architecture,
Sean Callananee5dfd42010-02-01 08:49:35 +000062/// or NULL if there is an error
63///
64/// @arg arch - The Triple::ArchType for the desired architecture
Sean Callanan9899f702010-04-13 21:21:57 +000065static const char *tripleFromArch(Triple::ArchType arch) {
Sean Callananee5dfd42010-02-01 08:49:35 +000066 unsigned int infoIndex;
67
Sean Callanan9899f702010-04-13 21:21:57 +000068 for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) {
69 if (arch == triplemap[infoIndex].Arch)
70 return triplemap[infoIndex].String;
Sean Callananee5dfd42010-02-01 08:49:35 +000071 }
72
73 return NULL;
74}
75
76/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer
77/// for the desired assembly syntax, suitable for passing to
78/// Target::createMCInstPrinter()
79///
80/// @arg arch - The target architecture
81/// @arg syntax - The assembly syntax in sd form
82static int getLLVMSyntaxVariant(Triple::ArchType arch,
83 EDAssemblySyntax_t syntax) {
84 switch (syntax) {
85 default:
86 return -1;
87 // Mappings below from X86AsmPrinter.cpp
88 case kEDAssemblySyntaxX86ATT:
89 if (arch == Triple::x86 || arch == Triple::x86_64)
90 return 0;
91 else
92 return -1;
93 case kEDAssemblySyntaxX86Intel:
94 if (arch == Triple::x86 || arch == Triple::x86_64)
95 return 1;
96 else
97 return -1;
Sean Callanan8f993b82010-04-08 00:48:21 +000098 case kEDAssemblySyntaxARMUAL:
99 if (arch == Triple::arm || arch == Triple::thumb)
100 return 0;
101 else
102 return -1;
Sean Callananee5dfd42010-02-01 08:49:35 +0000103 }
104}
105
Sean Callananee5dfd42010-02-01 08:49:35 +0000106void EDDisassembler::initialize() {
107 if (sInitialized)
108 return;
109
110 sInitialized = true;
111
Sean Callanan9899f702010-04-13 21:21:57 +0000112 InitializeAllTargetInfos();
113 InitializeAllTargets();
114 InitializeAllAsmPrinters();
115 InitializeAllAsmParsers();
116 InitializeAllDisassemblers();
Sean Callananee5dfd42010-02-01 08:49:35 +0000117}
118
119#undef BRINGUP_TARGET
120
121EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
122 EDAssemblySyntax_t syntax) {
123 CPUKey key;
124 key.Arch = arch;
125 key.Syntax = syntax;
126
127 EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
128
129 if (i != sDisassemblers.end()) {
130 return i->second;
Sean Callanan8f993b82010-04-08 00:48:21 +0000131 } else {
Sean Callananee5dfd42010-02-01 08:49:35 +0000132 EDDisassembler* sdd = new EDDisassembler(key);
Sean Callanan8f993b82010-04-08 00:48:21 +0000133 if (!sdd->valid()) {
Sean Callananee5dfd42010-02-01 08:49:35 +0000134 delete sdd;
135 return NULL;
136 }
137
138 sDisassemblers[key] = sdd;
139
140 return sdd;
141 }
142
143 return NULL;
144}
145
146EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
147 EDAssemblySyntax_t syntax) {
148 Triple triple(str);
149
150 return getDisassembler(triple.getArch(), syntax);
151}
152
Sean Callananee5dfd42010-02-01 08:49:35 +0000153EDDisassembler::EDDisassembler(CPUKey &key) :
Sean Callanan8f993b82010-04-08 00:48:21 +0000154 Valid(false),
155 HasSemantics(false),
156 ErrorStream(nulls()),
157 Key(key) {
Sean Callanan9899f702010-04-13 21:21:57 +0000158 const char *triple = tripleFromArch(key.Arch);
Sean Callanan8f993b82010-04-08 00:48:21 +0000159
Sean Callanan9899f702010-04-13 21:21:57 +0000160 if (!triple)
161 return;
Sean Callananee5dfd42010-02-01 08:49:35 +0000162
Sean Callanan4285b292010-04-09 00:11:15 +0000163 LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
Sean Callananee5dfd42010-02-01 08:49:35 +0000164
Sean Callanan4285b292010-04-09 00:11:15 +0000165 if (LLVMSyntaxVariant < 0)
Sean Callananee5dfd42010-02-01 08:49:35 +0000166 return;
167
168 std::string tripleString(triple);
169 std::string errorString;
170
171 Tgt = TargetRegistry::lookupTarget(tripleString,
172 errorString);
173
174 if (!Tgt)
175 return;
176
177 std::string featureString;
178
179 OwningPtr<const TargetMachine>
180 targetMachine(Tgt->createTargetMachine(tripleString,
181 featureString));
182
183 const TargetRegisterInfo *registerInfo = targetMachine->getRegisterInfo();
184
185 if (!registerInfo)
186 return;
Sean Callanan8f993b82010-04-08 00:48:21 +0000187
188 initMaps(*registerInfo);
Sean Callananee5dfd42010-02-01 08:49:35 +0000189
190 AsmInfo.reset(Tgt->createAsmInfo(tripleString));
191
192 if (!AsmInfo)
193 return;
194
195 Disassembler.reset(Tgt->createMCDisassembler());
196
197 if (!Disassembler)
198 return;
Sean Callanan9899f702010-04-13 21:21:57 +0000199
200 InstInfos = Disassembler->getEDInfo();
Sean Callananee5dfd42010-02-01 08:49:35 +0000201
202 InstString.reset(new std::string);
203 InstStream.reset(new raw_string_ostream(*InstString));
Sean Callanan4285b292010-04-09 00:11:15 +0000204 InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
Sean Callananee5dfd42010-02-01 08:49:35 +0000205
206 if (!InstPrinter)
207 return;
208
209 GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
210 SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo));
211 SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
Sean Callanand74667e2010-02-02 02:18:20 +0000212
213 initMaps(*targetMachine->getRegisterInfo());
Sean Callananee5dfd42010-02-01 08:49:35 +0000214
215 Valid = true;
216}
217
218EDDisassembler::~EDDisassembler() {
Sean Callanan8f993b82010-04-08 00:48:21 +0000219 if (!valid())
Sean Callananee5dfd42010-02-01 08:49:35 +0000220 return;
221}
222
223namespace {
224 /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback
225 /// as provided by the sd interface. See MemoryObject.
226 class EDMemoryObject : public llvm::MemoryObject {
227 private:
228 EDByteReaderCallback Callback;
229 void *Arg;
230 public:
231 EDMemoryObject(EDByteReaderCallback callback,
232 void *arg) : Callback(callback), Arg(arg) { }
233 ~EDMemoryObject() { }
234 uint64_t getBase() const { return 0x0; }
235 uint64_t getExtent() const { return (uint64_t)-1; }
236 int readByte(uint64_t address, uint8_t *ptr) const {
Sean Callanan8f993b82010-04-08 00:48:21 +0000237 if (!Callback)
Sean Callananee5dfd42010-02-01 08:49:35 +0000238 return -1;
239
Sean Callanan8f993b82010-04-08 00:48:21 +0000240 if (Callback(ptr, address, Arg))
Sean Callananee5dfd42010-02-01 08:49:35 +0000241 return -1;
242
243 return 0;
244 }
245 };
246}
247
248EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
249 uint64_t address,
250 void *arg) {
251 EDMemoryObject memoryObject(byteReader, arg);
252
253 MCInst* inst = new MCInst;
254 uint64_t byteSize;
255
256 if (!Disassembler->getInstruction(*inst,
257 byteSize,
258 memoryObject,
259 address,
260 ErrorStream)) {
261 delete inst;
262 return NULL;
Sean Callanan8f993b82010-04-08 00:48:21 +0000263 } else {
Sean Callanan9899f702010-04-13 21:21:57 +0000264 const llvm::EDInstInfo *thisInstInfo;
Sean Callanan8f993b82010-04-08 00:48:21 +0000265
266 thisInstInfo = &InstInfos[inst->getOpcode()];
Sean Callananee5dfd42010-02-01 08:49:35 +0000267
268 EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
269 return sdInst;
270 }
271}
272
273void EDDisassembler::initMaps(const TargetRegisterInfo &registerInfo) {
274 unsigned numRegisters = registerInfo.getNumRegs();
275 unsigned registerIndex;
276
277 for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) {
278 const char* registerName = registerInfo.get(registerIndex).Name;
279
280 RegVec.push_back(registerName);
281 RegRMap[registerName] = registerIndex;
282 }
283
Sean Callanan8f993b82010-04-08 00:48:21 +0000284 switch (Key.Arch) {
285 default:
286 break;
287 case Triple::x86:
288 case Triple::x86_64:
Sean Callananee5dfd42010-02-01 08:49:35 +0000289 stackPointers.insert(registerIDWithName("SP"));
290 stackPointers.insert(registerIDWithName("ESP"));
291 stackPointers.insert(registerIDWithName("RSP"));
292
293 programCounters.insert(registerIDWithName("IP"));
294 programCounters.insert(registerIDWithName("EIP"));
295 programCounters.insert(registerIDWithName("RIP"));
Sean Callanan8f993b82010-04-08 00:48:21 +0000296 break;
297 case Triple::arm:
298 case Triple::thumb:
299 stackPointers.insert(registerIDWithName("SP"));
300
301 programCounters.insert(registerIDWithName("PC"));
302 break;
Sean Callananee5dfd42010-02-01 08:49:35 +0000303 }
304}
305
306const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const {
307 if (registerID >= RegVec.size())
308 return NULL;
309 else
310 return RegVec[registerID].c_str();
311}
312
313unsigned EDDisassembler::registerIDWithName(const char *name) const {
314 regrmap_t::const_iterator iter = RegRMap.find(std::string(name));
315 if (iter == RegRMap.end())
316 return 0;
317 else
318 return (*iter).second;
319}
320
321bool EDDisassembler::registerIsStackPointer(unsigned registerID) {
322 return (stackPointers.find(registerID) != stackPointers.end());
323}
324
325bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
326 return (programCounters.find(registerID) != programCounters.end());
327}
328
Chris Lattnerd3740872010-04-04 05:04:31 +0000329int EDDisassembler::printInst(std::string &str, MCInst &inst) {
Sean Callananee5dfd42010-02-01 08:49:35 +0000330 PrinterMutex.acquire();
331
Chris Lattnerd3740872010-04-04 05:04:31 +0000332 InstPrinter->printInst(&inst, *InstStream);
Sean Callananee5dfd42010-02-01 08:49:35 +0000333 InstStream->flush();
334 str = *InstString;
335 InstString->clear();
336
337 PrinterMutex.release();
338
339 return 0;
340}
341
342int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
343 SmallVectorImpl<AsmToken> &tokens,
344 const std::string &str) {
345 int ret = 0;
346
Sean Callanan8f993b82010-04-08 00:48:21 +0000347 switch (Key.Arch) {
348 default:
349 return -1;
350 case Triple::x86:
351 case Triple::x86_64:
352 case Triple::arm:
353 case Triple::thumb:
354 break;
355 }
356
Sean Callananee5dfd42010-02-01 08:49:35 +0000357 const char *cStr = str.c_str();
358 MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
359
360 StringRef instName;
361 SMLoc instLoc;
362
363 SourceMgr sourceMgr;
364 sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
Chris Lattnerc18409a2010-03-11 22:53:35 +0000365 MCContext context(*AsmInfo);
366 OwningPtr<MCStreamer> streamer(createNullStreamer(context));
Sean Callananee5dfd42010-02-01 08:49:35 +0000367 AsmParser genericParser(sourceMgr, context, *streamer, *AsmInfo);
Chris Lattnerc18409a2010-03-11 22:53:35 +0000368 OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(genericParser));
Sean Callananee5dfd42010-02-01 08:49:35 +0000369
370 AsmToken OpcodeToken = genericParser.Lex();
Sean Callanana8702562010-04-24 01:00:16 +0000371 AsmToken NextToken = genericParser.Lex(); // consume next token, because specificParser expects us to
372
Sean Callanan8f993b82010-04-08 00:48:21 +0000373 if (OpcodeToken.is(AsmToken::Identifier)) {
Sean Callananee5dfd42010-02-01 08:49:35 +0000374 instName = OpcodeToken.getString();
375 instLoc = OpcodeToken.getLoc();
Sean Callanana8702562010-04-24 01:00:16 +0000376
377 if (NextToken.isNot(AsmToken::Eof) &&
378 TargetParser->ParseInstruction(instName, instLoc, operands))
Sean Callananee5dfd42010-02-01 08:49:35 +0000379 ret = -1;
Sean Callanan8f993b82010-04-08 00:48:21 +0000380 } else {
Sean Callananee5dfd42010-02-01 08:49:35 +0000381 ret = -1;
382 }
383
Sean Callananee5dfd42010-02-01 08:49:35 +0000384 ParserMutex.acquire();
385
386 if (!ret) {
387 GenericAsmLexer->setBuffer(buf);
388
389 while (SpecificAsmLexer->Lex(),
390 SpecificAsmLexer->isNot(AsmToken::Eof) &&
391 SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) {
392 if (SpecificAsmLexer->is(AsmToken::Error)) {
393 ret = -1;
394 break;
395 }
396 tokens.push_back(SpecificAsmLexer->getTok());
397 }
398 }
399
400 ParserMutex.release();
401
402 return ret;
403}
404
405int EDDisassembler::llvmSyntaxVariant() const {
406 return LLVMSyntaxVariant;
407}