Sean Callanan | c3fbf91 | 2010-01-27 23:03:46 +0000 | [diff] [blame] | 1 | /*===-- llvm-c/EnhancedDisassembly.h - Disassembler C Interface ---*- C -*-===*\ |
| 2 | |* *| |
| 3 | |* The LLVM Compiler Infrastructure *| |
| 4 | |* *| |
| 5 | |* This file is distributed under the University of Illinois Open Source *| |
| 6 | |* License. See LICENSE.TXT for details. *| |
| 7 | |* *| |
| 8 | |*===----------------------------------------------------------------------===*| |
| 9 | |* *| |
| 10 | |* This header declares the C interface to EnhancedDisassembly.so, which *| |
| 11 | |* implements a disassembler with the ability to extract operand values and *| |
| 12 | |* individual tokens from assembly instructions. *| |
| 13 | |* *| |
| 14 | |* The header declares additional interfaces if the host compiler supports *| |
| 15 | |* the blocks API. *| |
| 16 | |* *| |
| 17 | \*===----------------------------------------------------------------------===*/ |
| 18 | |
| 19 | #ifndef LLVM_C_ENHANCEDDISASSEMBLY_H |
| 20 | #define LLVM_C_ENHANCEDDISASSEMBLY_H |
| 21 | |
Sean Callanan | 8f993b8 | 2010-04-08 00:48:21 +0000 | [diff] [blame^] | 22 | #include <inttypes.h> |
Sean Callanan | c3fbf91 | 2010-01-27 23:03:46 +0000 | [diff] [blame] | 23 | |
| 24 | #ifdef __cplusplus |
| 25 | extern "C" { |
| 26 | #endif |
| 27 | |
| 28 | /*! |
| 29 | @typedef EDByteReaderCallback |
| 30 | Interface to memory from which instructions may be read. |
| 31 | @param byte A pointer whose target should be filled in with the data returned. |
| 32 | @param address The address of the byte to be read. |
| 33 | @param arg An anonymous argument for client use. |
| 34 | @result 0 on success; -1 otherwise. |
| 35 | */ |
| 36 | typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg); |
| 37 | |
| 38 | /*! |
| 39 | @typedef EDRegisterReaderCallback |
| 40 | Interface to registers from which registers may be read. |
| 41 | @param value A pointer whose target should be filled in with the value of the |
| 42 | register. |
| 43 | @param regID The LLVM register identifier for the register to read. |
| 44 | @param arg An anonymous argument for client use. |
| 45 | @result 0 if the register could be read; -1 otherwise. |
| 46 | */ |
| 47 | typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID, |
| 48 | void* arg); |
| 49 | |
| 50 | /*! |
| 51 | @typedef EDAssemblySyntax_t |
| 52 | An assembly syntax for use in tokenizing instructions. |
| 53 | */ |
Sean Callanan | e274901 | 2010-01-27 23:20:51 +0000 | [diff] [blame] | 54 | typedef enum { |
Sean Callanan | c3fbf91 | 2010-01-27 23:03:46 +0000 | [diff] [blame] | 55 | /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */ |
Sean Callanan | e274901 | 2010-01-27 23:20:51 +0000 | [diff] [blame] | 56 | kEDAssemblySyntaxX86Intel = 0, |
Sean Callanan | c3fbf91 | 2010-01-27 23:03:46 +0000 | [diff] [blame] | 57 | /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */ |
Sean Callanan | 8f993b8 | 2010-04-08 00:48:21 +0000 | [diff] [blame^] | 58 | kEDAssemblySyntaxX86ATT = 1, |
| 59 | kEDAssemblySyntaxARMUAL = 2 |
Sean Callanan | e274901 | 2010-01-27 23:20:51 +0000 | [diff] [blame] | 60 | } EDAssemblySyntax_t; |
Sean Callanan | c3fbf91 | 2010-01-27 23:03:46 +0000 | [diff] [blame] | 61 | |
| 62 | /*! |
| 63 | @typedef EDDisassemblerRef |
| 64 | Encapsulates a disassembler for a single CPU architecture. |
| 65 | */ |
| 66 | struct EDDisassembler; |
| 67 | typedef struct EDDisassembler *EDDisassemblerRef; |
| 68 | |
| 69 | /*! |
| 70 | @typedef EDInstRef |
| 71 | Encapsulates a single disassembled instruction in one assembly syntax. |
| 72 | */ |
| 73 | struct EDInst; |
| 74 | typedef struct EDInst *EDInstRef; |
| 75 | |
| 76 | /*! |
| 77 | @typedef EDTokenRef |
| 78 | Encapsulates a token from the disassembly of an instruction. |
| 79 | */ |
| 80 | struct EDToken; |
| 81 | typedef struct EDToken *EDTokenRef; |
| 82 | |
| 83 | /*! |
| 84 | @typedef EDOperandRef |
| 85 | Encapsulates an operand of an instruction. |
| 86 | */ |
| 87 | struct EDOperand; |
| 88 | typedef struct EDOperand *EDOperandRef; |
| 89 | |
| 90 | /*! |
| 91 | @functiongroup Getting a disassembler |
| 92 | */ |
| 93 | |
| 94 | /*! |
| 95 | @function EDGetDisassembler |
| 96 | Gets the disassembler for a given target. |
| 97 | @param disassembler A pointer whose target will be filled in with the |
| 98 | disassembler. |
| 99 | @param triple Identifies the target. Example: "x86_64-apple-darwin10" |
| 100 | @param syntax The assembly syntax to use when decoding instructions. |
| 101 | @result 0 on success; -1 otherwise. |
| 102 | */ |
| 103 | int EDGetDisassembler(EDDisassemblerRef *disassembler, |
| 104 | const char *triple, |
| 105 | EDAssemblySyntax_t syntax); |
| 106 | |
| 107 | /*! |
| 108 | @functiongroup Generic architectural queries |
| 109 | */ |
| 110 | |
| 111 | /*! |
| 112 | @function EDGetRegisterName |
| 113 | Gets the human-readable name for a given register. |
| 114 | @param regName A pointer whose target will be pointed at the name of the |
| 115 | register. The name does not need to be deallocated and will be |
| 116 | @param disassembler The disassembler to query for the name. |
| 117 | @param regID The register identifier, as returned by EDRegisterTokenValue. |
| 118 | @result 0 on success; -1 otherwise. |
| 119 | */ |
| 120 | int EDGetRegisterName(const char** regName, |
| 121 | EDDisassemblerRef disassembler, |
| 122 | unsigned regID); |
| 123 | |
| 124 | /*! |
| 125 | @function EDRegisterIsStackPointer |
| 126 | Determines if a register is one of the platform's stack-pointer registers. |
| 127 | @param disassembler The disassembler to query. |
| 128 | @param regID The register identifier, as returned by EDRegisterTokenValue. |
| 129 | @result 1 if true; 0 otherwise. |
| 130 | */ |
| 131 | int EDRegisterIsStackPointer(EDDisassemblerRef disassembler, |
| 132 | unsigned regID); |
| 133 | |
| 134 | /*! |
| 135 | @function EDRegisterIsProgramCounter |
| 136 | Determines if a register is one of the platform's stack-pointer registers. |
| 137 | @param disassembler The disassembler to query. |
| 138 | @param regID The register identifier, as returned by EDRegisterTokenValue. |
| 139 | @result 1 if true; 0 otherwise. |
| 140 | */ |
| 141 | int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler, |
| 142 | unsigned regID); |
| 143 | |
| 144 | /*! |
| 145 | @functiongroup Creating and querying instructions |
| 146 | */ |
| 147 | |
| 148 | /*! |
| 149 | @function EDCreateInst |
| 150 | Gets a set of contiguous instructions from a disassembler. |
| 151 | @param insts A pointer to an array that will be filled in with the |
| 152 | instructions. Must have at least count entries. Entries not filled in will |
| 153 | be set to NULL. |
| 154 | @param count The maximum number of instructions to fill in. |
| 155 | @param disassembler The disassembler to use when decoding the instructions. |
| 156 | @param byteReader The function to use when reading the instruction's machine |
| 157 | code. |
| 158 | @param address The address of the first byte of the instruction. |
| 159 | @param arg An anonymous argument to be passed to byteReader. |
| 160 | @result The number of instructions read on success; 0 otherwise. |
| 161 | */ |
| 162 | unsigned int EDCreateInsts(EDInstRef *insts, |
| 163 | unsigned int count, |
| 164 | EDDisassemblerRef disassembler, |
| 165 | EDByteReaderCallback byteReader, |
| 166 | uint64_t address, |
| 167 | void *arg); |
| 168 | |
| 169 | /*! |
| 170 | @function EDReleaseInst |
| 171 | Frees the memory for an instruction. The instruction can no longer be accessed |
| 172 | after this call. |
| 173 | @param inst The instruction to be freed. |
| 174 | */ |
| 175 | void EDReleaseInst(EDInstRef inst); |
| 176 | |
| 177 | /*! |
| 178 | @function EDInstByteSize |
| 179 | @param inst The instruction to be queried. |
Sean Callanan | 7670658 | 2010-02-04 01:43:08 +0000 | [diff] [blame] | 180 | @result The number of bytes in the instruction's machine-code representation. |
Sean Callanan | c3fbf91 | 2010-01-27 23:03:46 +0000 | [diff] [blame] | 181 | */ |
| 182 | int EDInstByteSize(EDInstRef inst); |
| 183 | |
| 184 | /*! |
| 185 | @function EDGetInstString |
| 186 | Gets the disassembled text equivalent of the instruction. |
| 187 | @param buf A pointer whose target will be filled in with a pointer to the |
| 188 | string. (The string becomes invalid when the instruction is released.) |
| 189 | @param inst The instruction to be queried. |
| 190 | @result 0 on success; -1 otherwise. |
| 191 | */ |
| 192 | int EDGetInstString(const char **buf, |
| 193 | EDInstRef inst); |
| 194 | |
| 195 | /*! |
| 196 | @function EDInstID |
| 197 | @param instID A pointer whose target will be filled in with the LLVM identifier |
| 198 | for the instruction. |
| 199 | @param inst The instruction to be queried. |
| 200 | @result 0 on success; -1 otherwise. |
| 201 | */ |
| 202 | int EDInstID(unsigned *instID, EDInstRef inst); |
| 203 | |
| 204 | /*! |
| 205 | @function EDInstIsBranch |
| 206 | @param inst The instruction to be queried. |
| 207 | @result 1 if the instruction is a branch instruction; 0 if it is some other |
| 208 | type of instruction; -1 if there was an error. |
| 209 | */ |
| 210 | int EDInstIsBranch(EDInstRef inst); |
| 211 | |
| 212 | /*! |
| 213 | @function EDInstIsMove |
| 214 | @param inst The instruction to be queried. |
| 215 | @result 1 if the instruction is a move instruction; 0 if it is some other |
| 216 | type of instruction; -1 if there was an error. |
| 217 | */ |
| 218 | int EDInstIsMove(EDInstRef inst); |
| 219 | |
| 220 | /*! |
| 221 | @function EDBranchTargetID |
| 222 | @param inst The instruction to be queried. |
| 223 | @result The ID of the branch target operand, suitable for use with |
| 224 | EDCopyOperand. -1 if no such operand exists. |
| 225 | */ |
| 226 | int EDBranchTargetID(EDInstRef inst); |
| 227 | |
| 228 | /*! |
| 229 | @function EDMoveSourceID |
| 230 | @param inst The instruction to be queried. |
| 231 | @result The ID of the move source operand, suitable for use with |
| 232 | EDCopyOperand. -1 if no such operand exists. |
| 233 | */ |
| 234 | int EDMoveSourceID(EDInstRef inst); |
| 235 | |
| 236 | /*! |
| 237 | @function EDMoveTargetID |
| 238 | @param inst The instruction to be queried. |
| 239 | @result The ID of the move source operand, suitable for use with |
| 240 | EDCopyOperand. -1 if no such operand exists. |
| 241 | */ |
| 242 | int EDMoveTargetID(EDInstRef inst); |
| 243 | |
| 244 | /*! |
| 245 | @functiongroup Creating and querying tokens |
| 246 | */ |
| 247 | |
| 248 | /*! |
| 249 | @function EDNumTokens |
| 250 | @param inst The instruction to be queried. |
| 251 | @result The number of tokens in the instruction, or -1 on error. |
| 252 | */ |
| 253 | int EDNumTokens(EDInstRef inst); |
| 254 | |
| 255 | /*! |
| 256 | @function EDGetToken |
| 257 | Retrieves a token from an instruction. The token is valid until the |
| 258 | instruction is released. |
| 259 | @param token A pointer to be filled in with the token. |
| 260 | @param inst The instruction to be queried. |
| 261 | @param index The index of the token in the instruction. |
| 262 | @result 0 on success; -1 otherwise. |
| 263 | */ |
| 264 | int EDGetToken(EDTokenRef *token, |
| 265 | EDInstRef inst, |
| 266 | int index); |
| 267 | |
| 268 | /*! |
| 269 | @function EDGetTokenString |
| 270 | Gets the disassembled text for a token. |
| 271 | @param buf A pointer whose target will be filled in with a pointer to the |
| 272 | string. (The string becomes invalid when the token is released.) |
| 273 | @param token The token to be queried. |
| 274 | @result 0 on success; -1 otherwise. |
| 275 | */ |
| 276 | int EDGetTokenString(const char **buf, |
| 277 | EDTokenRef token); |
| 278 | |
| 279 | /*! |
| 280 | @function EDOperandIndexForToken |
| 281 | Returns the index of the operand to which a token belongs. |
| 282 | @param token The token to be queried. |
| 283 | @result The operand index on success; -1 otherwise |
| 284 | */ |
| 285 | int EDOperandIndexForToken(EDTokenRef token); |
| 286 | |
| 287 | /*! |
| 288 | @function EDTokenIsWhitespace |
| 289 | @param token The token to be queried. |
| 290 | @result 1 if the token is whitespace; 0 if not; -1 on error. |
| 291 | */ |
| 292 | int EDTokenIsWhitespace(EDTokenRef token); |
| 293 | |
| 294 | /*! |
| 295 | @function EDTokenIsPunctuation |
| 296 | @param token The token to be queried. |
| 297 | @result 1 if the token is punctuation; 0 if not; -1 on error. |
| 298 | */ |
| 299 | int EDTokenIsPunctuation(EDTokenRef token); |
| 300 | |
| 301 | /*! |
| 302 | @function EDTokenIsOpcode |
| 303 | @param token The token to be queried. |
| 304 | @result 1 if the token is opcode; 0 if not; -1 on error. |
| 305 | */ |
| 306 | int EDTokenIsOpcode(EDTokenRef token); |
| 307 | |
| 308 | /*! |
| 309 | @function EDTokenIsLiteral |
| 310 | @param token The token to be queried. |
| 311 | @result 1 if the token is a numeric literal; 0 if not; -1 on error. |
| 312 | */ |
| 313 | int EDTokenIsLiteral(EDTokenRef token); |
| 314 | |
| 315 | /*! |
| 316 | @function EDTokenIsRegister |
| 317 | @param token The token to be queried. |
| 318 | @result 1 if the token identifies a register; 0 if not; -1 on error. |
| 319 | */ |
| 320 | int EDTokenIsRegister(EDTokenRef token); |
| 321 | |
| 322 | /*! |
| 323 | @function EDTokenIsNegativeLiteral |
| 324 | @param token The token to be queried. |
| 325 | @result 1 if the token is a negative signed literal; 0 if not; -1 on error. |
| 326 | */ |
| 327 | int EDTokenIsNegativeLiteral(EDTokenRef token); |
| 328 | |
| 329 | /*! |
| 330 | @function EDLiteralTokenAbsoluteValue |
| 331 | @param value A pointer whose target will be filled in with the absolute value |
| 332 | of the literal. |
| 333 | @param token The token to be queried. |
| 334 | @result 0 on success; -1 otherwise. |
| 335 | */ |
| 336 | int EDLiteralTokenAbsoluteValue(uint64_t *value, |
| 337 | EDTokenRef token); |
| 338 | |
| 339 | /*! |
| 340 | @function EDRegisterTokenValue |
| 341 | @param registerID A pointer whose target will be filled in with the LLVM |
| 342 | register identifier for the token. |
| 343 | @param token The token to be queried. |
| 344 | @result 0 on success; -1 otherwise. |
| 345 | */ |
| 346 | int EDRegisterTokenValue(unsigned *registerID, |
| 347 | EDTokenRef token); |
| 348 | |
| 349 | /*! |
| 350 | @functiongroup Creating and querying operands |
| 351 | */ |
| 352 | |
| 353 | /*! |
| 354 | @function EDNumOperands |
| 355 | @param inst The instruction to be queried. |
| 356 | @result The number of operands in the instruction, or -1 on error. |
| 357 | */ |
| 358 | int EDNumOperands(EDInstRef inst); |
| 359 | |
| 360 | /*! |
| 361 | @function EDGetOperand |
| 362 | Retrieves an operand from an instruction. The operand is valid until the |
| 363 | instruction is released. |
| 364 | @param operand A pointer to be filled in with the operand. |
| 365 | @param inst The instruction to be queried. |
| 366 | @param index The index of the operand in the instruction. |
| 367 | @result 0 on success; -1 otherwise. |
| 368 | */ |
| 369 | int EDGetOperand(EDOperandRef *operand, |
| 370 | EDInstRef inst, |
| 371 | int index); |
Sean Callanan | 01cd79f | 2010-02-08 23:34:25 +0000 | [diff] [blame] | 372 | |
| 373 | /*! |
| 374 | @function EDOperandIsRegister |
| 375 | @param operand The operand to be queried. |
| 376 | @result 1 if the operand names a register; 0 if not; -1 on error. |
| 377 | */ |
| 378 | int EDOperandIsRegister(EDOperandRef operand); |
| 379 | |
| 380 | /*! |
| 381 | @function EDOperandIsImmediate |
| 382 | @param operand The operand to be queried. |
| 383 | @result 1 if the operand specifies an immediate value; 0 if not; -1 on error. |
| 384 | */ |
| 385 | int EDOperandIsImmediate(EDOperandRef operand); |
| 386 | |
| 387 | /*! |
| 388 | @function EDOperandIsMemory |
| 389 | @param operand The operand to be queried. |
| 390 | @result 1 if the operand specifies a location in memory; 0 if not; -1 on error. |
| 391 | */ |
| 392 | int EDOperandIsMemory(EDOperandRef operand); |
| 393 | |
| 394 | /*! |
| 395 | @function EDRegisterOperandValue |
| 396 | @param value A pointer whose target will be filled in with the LLVM register ID |
| 397 | of the register named by the operand. |
| 398 | @param operand The operand to be queried. |
| 399 | @result 0 on success; -1 otherwise. |
| 400 | */ |
| 401 | int EDRegisterOperandValue(unsigned *value, |
| 402 | EDOperandRef operand); |
| 403 | |
| 404 | /*! |
| 405 | @function EDImmediateOperandValue |
| 406 | @param value A pointer whose target will be filled in with the value of the |
| 407 | immediate. |
| 408 | @param operand The operand to be queried. |
| 409 | @result 0 on success; -1 otherwise. |
| 410 | */ |
| 411 | int EDImmediateOperandValue(uint64_t *value, |
| 412 | EDOperandRef operand); |
Sean Callanan | c3fbf91 | 2010-01-27 23:03:46 +0000 | [diff] [blame] | 413 | |
| 414 | /*! |
| 415 | @function EDEvaluateOperand |
Sean Callanan | 01cd79f | 2010-02-08 23:34:25 +0000 | [diff] [blame] | 416 | Evaluates an operand using a client-supplied register state accessor. Register |
| 417 | operands are evaluated by reading the value of the register; immediate operands |
| 418 | are evaluated by reporting the immediate value; memory operands are evaluated |
| 419 | by computing the target address (with only those relocations applied that were |
| 420 | already applied to the original bytes). |
Sean Callanan | c3fbf91 | 2010-01-27 23:03:46 +0000 | [diff] [blame] | 421 | @param result A pointer whose target is to be filled with the result of |
| 422 | evaluating the operand. |
| 423 | @param operand The operand to be evaluated. |
| 424 | @param regReader The function to use when reading registers from the register |
| 425 | state. |
| 426 | @param arg An anonymous argument for client use. |
| 427 | @result 0 if the operand could be evaluated; -1 otherwise. |
| 428 | */ |
| 429 | int EDEvaluateOperand(uint64_t *result, |
| 430 | EDOperandRef operand, |
| 431 | EDRegisterReaderCallback regReader, |
| 432 | void *arg); |
| 433 | |
| 434 | #ifdef __BLOCKS__ |
| 435 | |
| 436 | /*! |
| 437 | @typedef EDByteBlock_t |
| 438 | Block-based interface to memory from which instructions may be read. |
| 439 | @param byte A pointer whose target should be filled in with the data returned. |
| 440 | @param address The address of the byte to be read. |
| 441 | @result 0 on success; -1 otherwise. |
| 442 | */ |
| 443 | typedef int (^EDByteBlock_t)(uint8_t *byte, uint64_t address); |
| 444 | |
| 445 | /*! |
| 446 | @typedef EDRegisterBlock_t |
| 447 | Block-based interface to registers from which registers may be read. |
| 448 | @param value A pointer whose target should be filled in with the value of the |
| 449 | register. |
| 450 | @param regID The LLVM register identifier for the register to read. |
| 451 | @result 0 if the register could be read; -1 otherwise. |
| 452 | */ |
| 453 | typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID); |
| 454 | |
| 455 | /*! |
| 456 | @typedef EDTokenVisitor_t |
| 457 | Block-based handler for individual tokens. |
| 458 | @param token The current token being read. |
| 459 | @result 0 to continue; 1 to stop normally; -1 on error. |
| 460 | */ |
| 461 | typedef int (^EDTokenVisitor_t)(EDTokenRef token); |
| 462 | |
| 463 | /*! @functiongroup Block-based interfaces */ |
| 464 | |
| 465 | /*! |
| 466 | @function EDBlockCreateInsts |
| 467 | Gets a set of contiguous instructions from a disassembler, using a block to |
| 468 | read memory. |
| 469 | @param insts A pointer to an array that will be filled in with the |
| 470 | instructions. Must have at least count entries. Entries not filled in will |
| 471 | be set to NULL. |
| 472 | @param count The maximum number of instructions to fill in. |
| 473 | @param disassembler The disassembler to use when decoding the instructions. |
| 474 | @param byteBlock The block to use when reading the instruction's machine |
| 475 | code. |
| 476 | @param address The address of the first byte of the instruction. |
| 477 | @result The number of instructions read on success; 0 otherwise. |
| 478 | */ |
| 479 | unsigned int EDBlockCreateInsts(EDInstRef *insts, |
| 480 | int count, |
| 481 | EDDisassemblerRef disassembler, |
| 482 | EDByteBlock_t byteBlock, |
| 483 | uint64_t address); |
| 484 | |
| 485 | /*! |
| 486 | @function EDBlockEvaluateOperand |
| 487 | Evaluates an operand using a block to read registers. |
| 488 | @param result A pointer whose target is to be filled with the result of |
| 489 | evaluating the operand. |
| 490 | @param operand The operand to be evaluated. |
| 491 | @param regBlock The block to use when reading registers from the register |
| 492 | state. |
| 493 | @result 0 if the operand could be evaluated; -1 otherwise. |
| 494 | */ |
| 495 | int EDBlockEvaluateOperand(uint64_t *result, |
| 496 | EDOperandRef operand, |
| 497 | EDRegisterBlock_t regBlock); |
| 498 | |
| 499 | /*! |
| 500 | @function EDBlockVisitTokens |
| 501 | Visits every token with a visitor. |
| 502 | @param inst The instruction with the tokens to be visited. |
| 503 | @param visitor The visitor. |
| 504 | @result 0 if the visit ended normally; -1 if the visitor encountered an error |
| 505 | or there was some other error. |
| 506 | */ |
| 507 | int EDBlockVisitTokens(EDInstRef inst, |
| 508 | EDTokenVisitor_t visitor); |
| 509 | |
| 510 | #endif |
| 511 | |
| 512 | #ifdef __cplusplus |
| 513 | } |
| 514 | #endif |
| 515 | |
| 516 | #endif |