| Sean Callanan | ee5dfd4 | 2010-02-01 08:49:35 +0000 | [diff] [blame] | 1 | //===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===// | 
|  | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
|  | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
|  | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
|  | 9 | // | 
|  | 10 | // This file implements the Enhanced Disassembly library's  disassembler class. | 
|  | 11 | // The disassembler is responsible for vending individual instructions according | 
|  | 12 | // to a given architecture and disassembly syntax. | 
|  | 13 | // | 
|  | 14 | //===----------------------------------------------------------------------===// | 
|  | 15 |  | 
|  | 16 | #include "llvm/ADT/OwningPtr.h" | 
|  | 17 | #include "llvm/ADT/SmallVector.h" | 
|  | 18 | #include "llvm/MC/MCAsmInfo.h" | 
|  | 19 | #include "llvm/MC/MCContext.h" | 
|  | 20 | #include "llvm/MC/MCDisassembler.h" | 
|  | 21 | #include "llvm/MC/MCExpr.h" | 
|  | 22 | #include "llvm/MC/MCInst.h" | 
|  | 23 | #include "llvm/MC/MCInstPrinter.h" | 
|  | 24 | #include "llvm/MC/MCStreamer.h" | 
|  | 25 | #include "llvm/MC/MCParser/AsmLexer.h" | 
|  | 26 | #include "llvm/MC/MCParser/AsmParser.h" | 
|  | 27 | #include "llvm/MC/MCParser/MCAsmParser.h" | 
|  | 28 | #include "llvm/MC/MCParser/MCParsedAsmOperand.h" | 
|  | 29 | #include "llvm/Support/MemoryBuffer.h" | 
|  | 30 | #include "llvm/Support/MemoryObject.h" | 
|  | 31 | #include "llvm/Support/SourceMgr.h" | 
|  | 32 | #include "llvm/Target/TargetAsmLexer.h" | 
|  | 33 | #include "llvm/Target/TargetAsmParser.h" | 
|  | 34 | #include "llvm/Target/TargetRegistry.h" | 
|  | 35 | #include "llvm/Target/TargetMachine.h" | 
|  | 36 | #include "llvm/Target/TargetRegisterInfo.h" | 
|  | 37 | #include "llvm/Target/TargetSelect.h" | 
|  | 38 |  | 
|  | 39 | #include "EDDisassembler.h" | 
|  | 40 | #include "EDInst.h" | 
|  | 41 |  | 
|  | 42 | #include "../../lib/Target/X86/X86GenEDInfo.inc" | 
|  | 43 |  | 
|  | 44 | using namespace llvm; | 
|  | 45 |  | 
|  | 46 | bool EDDisassembler::sInitialized = false; | 
|  | 47 | EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers; | 
|  | 48 |  | 
|  | 49 | struct InfoMap { | 
|  | 50 | Triple::ArchType Arch; | 
|  | 51 | const char *String; | 
|  | 52 | const InstInfo *Info; | 
|  | 53 | }; | 
|  | 54 |  | 
|  | 55 | static struct InfoMap infomap[] = { | 
|  | 56 | { Triple::x86,          "i386-unknown-unknown",   instInfoX86 }, | 
|  | 57 | { Triple::x86_64,       "x86_64-unknown-unknown", instInfoX86 }, | 
|  | 58 | { Triple::InvalidArch,  NULL,                     NULL        } | 
|  | 59 | }; | 
|  | 60 |  | 
|  | 61 | /// infoFromArch - Returns the InfoMap corresponding to a given architecture, | 
|  | 62 | ///   or NULL if there is an error | 
|  | 63 | /// | 
|  | 64 | /// @arg arch - The Triple::ArchType for the desired architecture | 
|  | 65 | static const InfoMap *infoFromArch(Triple::ArchType arch) { | 
|  | 66 | unsigned int infoIndex; | 
|  | 67 |  | 
|  | 68 | for (infoIndex = 0; infomap[infoIndex].String != NULL; ++infoIndex) { | 
|  | 69 | if(arch == infomap[infoIndex].Arch) | 
|  | 70 | return &infomap[infoIndex]; | 
|  | 71 | } | 
|  | 72 |  | 
|  | 73 | return NULL; | 
|  | 74 | } | 
|  | 75 |  | 
|  | 76 | /// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer | 
|  | 77 | ///   for the desired assembly syntax, suitable for passing to | 
|  | 78 | ///   Target::createMCInstPrinter() | 
|  | 79 | /// | 
|  | 80 | /// @arg arch   - The target architecture | 
|  | 81 | /// @arg syntax - The assembly syntax in sd form | 
|  | 82 | static int getLLVMSyntaxVariant(Triple::ArchType arch, | 
|  | 83 | EDAssemblySyntax_t syntax) { | 
|  | 84 | switch (syntax) { | 
|  | 85 | default: | 
|  | 86 | return -1; | 
|  | 87 | // Mappings below from X86AsmPrinter.cpp | 
|  | 88 | case kEDAssemblySyntaxX86ATT: | 
|  | 89 | if (arch == Triple::x86 || arch == Triple::x86_64) | 
|  | 90 | return 0; | 
|  | 91 | else | 
|  | 92 | return -1; | 
|  | 93 | case kEDAssemblySyntaxX86Intel: | 
|  | 94 | if (arch == Triple::x86 || arch == Triple::x86_64) | 
|  | 95 | return 1; | 
|  | 96 | else | 
|  | 97 | return -1; | 
|  | 98 | } | 
|  | 99 | } | 
|  | 100 |  | 
|  | 101 | #define BRINGUP_TARGET(tgt)           \ | 
|  | 102 | LLVMInitialize##tgt##TargetInfo();  \ | 
|  | 103 | LLVMInitialize##tgt##Target();      \ | 
|  | 104 | LLVMInitialize##tgt##AsmPrinter();  \ | 
|  | 105 | LLVMInitialize##tgt##AsmParser();   \ | 
|  | 106 | LLVMInitialize##tgt##Disassembler(); | 
|  | 107 |  | 
|  | 108 | void EDDisassembler::initialize() { | 
|  | 109 | if (sInitialized) | 
|  | 110 | return; | 
|  | 111 |  | 
|  | 112 | sInitialized = true; | 
|  | 113 |  | 
|  | 114 | BRINGUP_TARGET(X86) | 
|  | 115 | } | 
|  | 116 |  | 
|  | 117 | #undef BRINGUP_TARGET | 
|  | 118 |  | 
|  | 119 | EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch, | 
|  | 120 | EDAssemblySyntax_t syntax) { | 
|  | 121 | CPUKey key; | 
|  | 122 | key.Arch = arch; | 
|  | 123 | key.Syntax = syntax; | 
|  | 124 |  | 
|  | 125 | EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key); | 
|  | 126 |  | 
|  | 127 | if (i != sDisassemblers.end()) { | 
|  | 128 | return i->second; | 
|  | 129 | } | 
|  | 130 | else { | 
|  | 131 | EDDisassembler* sdd = new EDDisassembler(key); | 
|  | 132 | if(!sdd->valid()) { | 
|  | 133 | delete sdd; | 
|  | 134 | return NULL; | 
|  | 135 | } | 
|  | 136 |  | 
|  | 137 | sDisassemblers[key] = sdd; | 
|  | 138 |  | 
|  | 139 | return sdd; | 
|  | 140 | } | 
|  | 141 |  | 
|  | 142 | return NULL; | 
|  | 143 | } | 
|  | 144 |  | 
|  | 145 | EDDisassembler *EDDisassembler::getDisassembler(StringRef str, | 
|  | 146 | EDAssemblySyntax_t syntax) { | 
|  | 147 | Triple triple(str); | 
|  | 148 |  | 
|  | 149 | return getDisassembler(triple.getArch(), syntax); | 
|  | 150 | } | 
|  | 151 |  | 
| Sean Callanan | ee5dfd4 | 2010-02-01 08:49:35 +0000 | [diff] [blame] | 152 | EDDisassembler::EDDisassembler(CPUKey &key) : | 
|  | 153 | Valid(false), ErrorString(), ErrorStream(ErrorString), Key(key) { | 
|  | 154 | const InfoMap *infoMap = infoFromArch(key.Arch); | 
|  | 155 |  | 
|  | 156 | if (!infoMap) | 
|  | 157 | return; | 
|  | 158 |  | 
|  | 159 | const char *triple = infoMap->String; | 
|  | 160 |  | 
|  | 161 | int syntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax); | 
|  | 162 |  | 
|  | 163 | if (syntaxVariant < 0) | 
|  | 164 | return; | 
|  | 165 |  | 
|  | 166 | std::string tripleString(triple); | 
|  | 167 | std::string errorString; | 
|  | 168 |  | 
|  | 169 | Tgt = TargetRegistry::lookupTarget(tripleString, | 
|  | 170 | errorString); | 
|  | 171 |  | 
|  | 172 | if (!Tgt) | 
|  | 173 | return; | 
|  | 174 |  | 
|  | 175 | std::string featureString; | 
|  | 176 |  | 
|  | 177 | OwningPtr<const TargetMachine> | 
|  | 178 | targetMachine(Tgt->createTargetMachine(tripleString, | 
|  | 179 | featureString)); | 
|  | 180 |  | 
|  | 181 | const TargetRegisterInfo *registerInfo = targetMachine->getRegisterInfo(); | 
|  | 182 |  | 
|  | 183 | if (!registerInfo) | 
|  | 184 | return; | 
|  | 185 |  | 
|  | 186 | AsmInfo.reset(Tgt->createAsmInfo(tripleString)); | 
|  | 187 |  | 
|  | 188 | if (!AsmInfo) | 
|  | 189 | return; | 
|  | 190 |  | 
|  | 191 | Disassembler.reset(Tgt->createMCDisassembler()); | 
|  | 192 |  | 
|  | 193 | if (!Disassembler) | 
|  | 194 | return; | 
|  | 195 |  | 
|  | 196 | InstString.reset(new std::string); | 
|  | 197 | InstStream.reset(new raw_string_ostream(*InstString)); | 
|  | 198 |  | 
|  | 199 | InstPrinter.reset(Tgt->createMCInstPrinter(syntaxVariant, | 
|  | 200 | *AsmInfo, | 
|  | 201 | *InstStream)); | 
|  | 202 |  | 
|  | 203 | if (!InstPrinter) | 
|  | 204 | return; | 
|  | 205 |  | 
|  | 206 | GenericAsmLexer.reset(new AsmLexer(*AsmInfo)); | 
|  | 207 | SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo)); | 
|  | 208 | SpecificAsmLexer->InstallLexer(*GenericAsmLexer); | 
|  | 209 |  | 
|  | 210 | InstInfos = infoMap->Info; | 
| Sean Callanan | d74667e | 2010-02-02 02:18:20 +0000 | [diff] [blame] | 211 |  | 
|  | 212 | initMaps(*targetMachine->getRegisterInfo()); | 
| Sean Callanan | ee5dfd4 | 2010-02-01 08:49:35 +0000 | [diff] [blame] | 213 |  | 
|  | 214 | Valid = true; | 
|  | 215 | } | 
|  | 216 |  | 
|  | 217 | EDDisassembler::~EDDisassembler() { | 
|  | 218 | if(!valid()) | 
|  | 219 | return; | 
|  | 220 | } | 
|  | 221 |  | 
|  | 222 | namespace { | 
|  | 223 | /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback | 
|  | 224 | ///   as provided by the sd interface.  See MemoryObject. | 
|  | 225 | class EDMemoryObject : public llvm::MemoryObject { | 
|  | 226 | private: | 
|  | 227 | EDByteReaderCallback Callback; | 
|  | 228 | void *Arg; | 
|  | 229 | public: | 
|  | 230 | EDMemoryObject(EDByteReaderCallback callback, | 
|  | 231 | void *arg) : Callback(callback), Arg(arg) { } | 
|  | 232 | ~EDMemoryObject() { } | 
|  | 233 | uint64_t getBase() const { return 0x0; } | 
|  | 234 | uint64_t getExtent() const { return (uint64_t)-1; } | 
|  | 235 | int readByte(uint64_t address, uint8_t *ptr) const { | 
|  | 236 | if(!Callback) | 
|  | 237 | return -1; | 
|  | 238 |  | 
|  | 239 | if(Callback(ptr, address, Arg)) | 
|  | 240 | return -1; | 
|  | 241 |  | 
|  | 242 | return 0; | 
|  | 243 | } | 
|  | 244 | }; | 
|  | 245 | } | 
|  | 246 |  | 
|  | 247 | EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader, | 
|  | 248 | uint64_t address, | 
|  | 249 | void *arg) { | 
|  | 250 | EDMemoryObject memoryObject(byteReader, arg); | 
|  | 251 |  | 
|  | 252 | MCInst* inst = new MCInst; | 
|  | 253 | uint64_t byteSize; | 
|  | 254 |  | 
|  | 255 | if (!Disassembler->getInstruction(*inst, | 
|  | 256 | byteSize, | 
|  | 257 | memoryObject, | 
|  | 258 | address, | 
|  | 259 | ErrorStream)) { | 
|  | 260 | delete inst; | 
|  | 261 | return NULL; | 
|  | 262 | } | 
|  | 263 | else { | 
|  | 264 | const InstInfo *thisInstInfo = &InstInfos[inst->getOpcode()]; | 
|  | 265 |  | 
|  | 266 | EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo); | 
|  | 267 | return sdInst; | 
|  | 268 | } | 
|  | 269 | } | 
|  | 270 |  | 
|  | 271 | void EDDisassembler::initMaps(const TargetRegisterInfo ®isterInfo) { | 
|  | 272 | unsigned numRegisters = registerInfo.getNumRegs(); | 
|  | 273 | unsigned registerIndex; | 
|  | 274 |  | 
|  | 275 | for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) { | 
|  | 276 | const char* registerName = registerInfo.get(registerIndex).Name; | 
|  | 277 |  | 
|  | 278 | RegVec.push_back(registerName); | 
|  | 279 | RegRMap[registerName] = registerIndex; | 
|  | 280 | } | 
|  | 281 |  | 
|  | 282 | if (Key.Arch == Triple::x86 || | 
|  | 283 | Key.Arch == Triple::x86_64) { | 
|  | 284 | stackPointers.insert(registerIDWithName("SP")); | 
|  | 285 | stackPointers.insert(registerIDWithName("ESP")); | 
|  | 286 | stackPointers.insert(registerIDWithName("RSP")); | 
|  | 287 |  | 
|  | 288 | programCounters.insert(registerIDWithName("IP")); | 
|  | 289 | programCounters.insert(registerIDWithName("EIP")); | 
|  | 290 | programCounters.insert(registerIDWithName("RIP")); | 
|  | 291 | } | 
|  | 292 | } | 
|  | 293 |  | 
|  | 294 | const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const { | 
|  | 295 | if (registerID >= RegVec.size()) | 
|  | 296 | return NULL; | 
|  | 297 | else | 
|  | 298 | return RegVec[registerID].c_str(); | 
|  | 299 | } | 
|  | 300 |  | 
|  | 301 | unsigned EDDisassembler::registerIDWithName(const char *name) const { | 
|  | 302 | regrmap_t::const_iterator iter = RegRMap.find(std::string(name)); | 
|  | 303 | if (iter == RegRMap.end()) | 
|  | 304 | return 0; | 
|  | 305 | else | 
|  | 306 | return (*iter).second; | 
|  | 307 | } | 
|  | 308 |  | 
|  | 309 | bool EDDisassembler::registerIsStackPointer(unsigned registerID) { | 
|  | 310 | return (stackPointers.find(registerID) != stackPointers.end()); | 
|  | 311 | } | 
|  | 312 |  | 
|  | 313 | bool EDDisassembler::registerIsProgramCounter(unsigned registerID) { | 
|  | 314 | return (programCounters.find(registerID) != programCounters.end()); | 
|  | 315 | } | 
|  | 316 |  | 
|  | 317 | int EDDisassembler::printInst(std::string& str, | 
|  | 318 | MCInst& inst) { | 
|  | 319 | PrinterMutex.acquire(); | 
|  | 320 |  | 
|  | 321 | InstPrinter->printInst(&inst); | 
|  | 322 | InstStream->flush(); | 
|  | 323 | str = *InstString; | 
|  | 324 | InstString->clear(); | 
|  | 325 |  | 
|  | 326 | PrinterMutex.release(); | 
|  | 327 |  | 
|  | 328 | return 0; | 
|  | 329 | } | 
|  | 330 |  | 
|  | 331 | int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands, | 
|  | 332 | SmallVectorImpl<AsmToken> &tokens, | 
|  | 333 | const std::string &str) { | 
|  | 334 | int ret = 0; | 
|  | 335 |  | 
|  | 336 | const char *cStr = str.c_str(); | 
|  | 337 | MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr)); | 
|  | 338 |  | 
|  | 339 | StringRef instName; | 
|  | 340 | SMLoc instLoc; | 
|  | 341 |  | 
|  | 342 | SourceMgr sourceMgr; | 
|  | 343 | sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over | 
| Chris Lattner | c18409a | 2010-03-11 22:53:35 +0000 | [diff] [blame] | 344 | MCContext context(*AsmInfo); | 
|  | 345 | OwningPtr<MCStreamer> streamer(createNullStreamer(context)); | 
| Sean Callanan | ee5dfd4 | 2010-02-01 08:49:35 +0000 | [diff] [blame] | 346 | AsmParser genericParser(sourceMgr, context, *streamer, *AsmInfo); | 
| Chris Lattner | c18409a | 2010-03-11 22:53:35 +0000 | [diff] [blame] | 347 | OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(genericParser)); | 
| Sean Callanan | ee5dfd4 | 2010-02-01 08:49:35 +0000 | [diff] [blame] | 348 |  | 
|  | 349 | AsmToken OpcodeToken = genericParser.Lex(); | 
|  | 350 |  | 
|  | 351 | if(OpcodeToken.is(AsmToken::Identifier)) { | 
|  | 352 | instName = OpcodeToken.getString(); | 
|  | 353 | instLoc = OpcodeToken.getLoc(); | 
| Chris Lattner | c18409a | 2010-03-11 22:53:35 +0000 | [diff] [blame] | 354 | if (TargetParser->ParseInstruction(instName, instLoc, operands)) | 
| Sean Callanan | ee5dfd4 | 2010-02-01 08:49:35 +0000 | [diff] [blame] | 355 | ret = -1; | 
|  | 356 | } | 
|  | 357 | else { | 
|  | 358 | ret = -1; | 
|  | 359 | } | 
|  | 360 |  | 
| Sean Callanan | ee5dfd4 | 2010-02-01 08:49:35 +0000 | [diff] [blame] | 361 | ParserMutex.acquire(); | 
|  | 362 |  | 
|  | 363 | if (!ret) { | 
|  | 364 | GenericAsmLexer->setBuffer(buf); | 
|  | 365 |  | 
|  | 366 | while (SpecificAsmLexer->Lex(), | 
|  | 367 | SpecificAsmLexer->isNot(AsmToken::Eof) && | 
|  | 368 | SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) { | 
|  | 369 | if (SpecificAsmLexer->is(AsmToken::Error)) { | 
|  | 370 | ret = -1; | 
|  | 371 | break; | 
|  | 372 | } | 
|  | 373 | tokens.push_back(SpecificAsmLexer->getTok()); | 
|  | 374 | } | 
|  | 375 | } | 
|  | 376 |  | 
|  | 377 | ParserMutex.release(); | 
|  | 378 |  | 
|  | 379 | return ret; | 
|  | 380 | } | 
|  | 381 |  | 
|  | 382 | int EDDisassembler::llvmSyntaxVariant() const { | 
|  | 383 | return LLVMSyntaxVariant; | 
|  | 384 | } |