Sean Callanan | c3fbf91 | 2010-01-27 23:03:46 +0000 | [diff] [blame] | 1 | /*===-- llvm-c/EnhancedDisassembly.h - Disassembler C Interface ---*- C -*-===*\ |
| 2 | |* *| |
| 3 | |* The LLVM Compiler Infrastructure *| |
| 4 | |* *| |
| 5 | |* This file is distributed under the University of Illinois Open Source *| |
| 6 | |* License. See LICENSE.TXT for details. *| |
| 7 | |* *| |
| 8 | |*===----------------------------------------------------------------------===*| |
| 9 | |* *| |
| 10 | |* This header declares the C interface to EnhancedDisassembly.so, which *| |
| 11 | |* implements a disassembler with the ability to extract operand values and *| |
| 12 | |* individual tokens from assembly instructions. *| |
| 13 | |* *| |
| 14 | |* The header declares additional interfaces if the host compiler supports *| |
| 15 | |* the blocks API. *| |
| 16 | |* *| |
| 17 | \*===----------------------------------------------------------------------===*/ |
| 18 | |
| 19 | #ifndef LLVM_C_ENHANCEDDISASSEMBLY_H |
| 20 | #define LLVM_C_ENHANCEDDISASSEMBLY_H |
| 21 | |
| 22 | #include "llvm/System/DataTypes.h" |
| 23 | |
| 24 | #ifdef __cplusplus |
| 25 | extern "C" { |
| 26 | #endif |
| 27 | |
| 28 | /*! |
| 29 | @typedef EDByteReaderCallback |
| 30 | Interface to memory from which instructions may be read. |
| 31 | @param byte A pointer whose target should be filled in with the data returned. |
| 32 | @param address The address of the byte to be read. |
| 33 | @param arg An anonymous argument for client use. |
| 34 | @result 0 on success; -1 otherwise. |
| 35 | */ |
| 36 | typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg); |
| 37 | |
| 38 | /*! |
| 39 | @typedef EDRegisterReaderCallback |
| 40 | Interface to registers from which registers may be read. |
| 41 | @param value A pointer whose target should be filled in with the value of the |
| 42 | register. |
| 43 | @param regID The LLVM register identifier for the register to read. |
| 44 | @param arg An anonymous argument for client use. |
| 45 | @result 0 if the register could be read; -1 otherwise. |
| 46 | */ |
| 47 | typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID, |
| 48 | void* arg); |
| 49 | |
| 50 | /*! |
| 51 | @typedef EDAssemblySyntax_t |
| 52 | An assembly syntax for use in tokenizing instructions. |
| 53 | */ |
Sean Callanan | e274901 | 2010-01-27 23:20:51 +0000 | [diff] [blame] | 54 | typedef enum { |
Sean Callanan | c3fbf91 | 2010-01-27 23:03:46 +0000 | [diff] [blame] | 55 | /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */ |
Sean Callanan | e274901 | 2010-01-27 23:20:51 +0000 | [diff] [blame] | 56 | kEDAssemblySyntaxX86Intel = 0, |
Sean Callanan | c3fbf91 | 2010-01-27 23:03:46 +0000 | [diff] [blame] | 57 | /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */ |
Sean Callanan | e274901 | 2010-01-27 23:20:51 +0000 | [diff] [blame] | 58 | kEDAssemblySyntaxX86ATT = 1 |
| 59 | } EDAssemblySyntax_t; |
Sean Callanan | c3fbf91 | 2010-01-27 23:03:46 +0000 | [diff] [blame] | 60 | |
| 61 | /*! |
| 62 | @typedef EDDisassemblerRef |
| 63 | Encapsulates a disassembler for a single CPU architecture. |
| 64 | */ |
| 65 | struct EDDisassembler; |
| 66 | typedef struct EDDisassembler *EDDisassemblerRef; |
| 67 | |
| 68 | /*! |
| 69 | @typedef EDInstRef |
| 70 | Encapsulates a single disassembled instruction in one assembly syntax. |
| 71 | */ |
| 72 | struct EDInst; |
| 73 | typedef struct EDInst *EDInstRef; |
| 74 | |
| 75 | /*! |
| 76 | @typedef EDTokenRef |
| 77 | Encapsulates a token from the disassembly of an instruction. |
| 78 | */ |
| 79 | struct EDToken; |
| 80 | typedef struct EDToken *EDTokenRef; |
| 81 | |
| 82 | /*! |
| 83 | @typedef EDOperandRef |
| 84 | Encapsulates an operand of an instruction. |
| 85 | */ |
| 86 | struct EDOperand; |
| 87 | typedef struct EDOperand *EDOperandRef; |
| 88 | |
| 89 | /*! |
| 90 | @functiongroup Getting a disassembler |
| 91 | */ |
| 92 | |
| 93 | /*! |
| 94 | @function EDGetDisassembler |
| 95 | Gets the disassembler for a given target. |
| 96 | @param disassembler A pointer whose target will be filled in with the |
| 97 | disassembler. |
| 98 | @param triple Identifies the target. Example: "x86_64-apple-darwin10" |
| 99 | @param syntax The assembly syntax to use when decoding instructions. |
| 100 | @result 0 on success; -1 otherwise. |
| 101 | */ |
| 102 | int EDGetDisassembler(EDDisassemblerRef *disassembler, |
| 103 | const char *triple, |
| 104 | EDAssemblySyntax_t syntax); |
| 105 | |
| 106 | /*! |
| 107 | @functiongroup Generic architectural queries |
| 108 | */ |
| 109 | |
| 110 | /*! |
| 111 | @function EDGetRegisterName |
| 112 | Gets the human-readable name for a given register. |
| 113 | @param regName A pointer whose target will be pointed at the name of the |
| 114 | register. The name does not need to be deallocated and will be |
| 115 | @param disassembler The disassembler to query for the name. |
| 116 | @param regID The register identifier, as returned by EDRegisterTokenValue. |
| 117 | @result 0 on success; -1 otherwise. |
| 118 | */ |
| 119 | int EDGetRegisterName(const char** regName, |
| 120 | EDDisassemblerRef disassembler, |
| 121 | unsigned regID); |
| 122 | |
| 123 | /*! |
| 124 | @function EDRegisterIsStackPointer |
| 125 | Determines if a register is one of the platform's stack-pointer registers. |
| 126 | @param disassembler The disassembler to query. |
| 127 | @param regID The register identifier, as returned by EDRegisterTokenValue. |
| 128 | @result 1 if true; 0 otherwise. |
| 129 | */ |
| 130 | int EDRegisterIsStackPointer(EDDisassemblerRef disassembler, |
| 131 | unsigned regID); |
| 132 | |
| 133 | /*! |
| 134 | @function EDRegisterIsProgramCounter |
| 135 | Determines if a register is one of the platform's stack-pointer registers. |
| 136 | @param disassembler The disassembler to query. |
| 137 | @param regID The register identifier, as returned by EDRegisterTokenValue. |
| 138 | @result 1 if true; 0 otherwise. |
| 139 | */ |
| 140 | int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler, |
| 141 | unsigned regID); |
| 142 | |
| 143 | /*! |
| 144 | @functiongroup Creating and querying instructions |
| 145 | */ |
| 146 | |
| 147 | /*! |
| 148 | @function EDCreateInst |
| 149 | Gets a set of contiguous instructions from a disassembler. |
| 150 | @param insts A pointer to an array that will be filled in with the |
| 151 | instructions. Must have at least count entries. Entries not filled in will |
| 152 | be set to NULL. |
| 153 | @param count The maximum number of instructions to fill in. |
| 154 | @param disassembler The disassembler to use when decoding the instructions. |
| 155 | @param byteReader The function to use when reading the instruction's machine |
| 156 | code. |
| 157 | @param address The address of the first byte of the instruction. |
| 158 | @param arg An anonymous argument to be passed to byteReader. |
| 159 | @result The number of instructions read on success; 0 otherwise. |
| 160 | */ |
| 161 | unsigned int EDCreateInsts(EDInstRef *insts, |
| 162 | unsigned int count, |
| 163 | EDDisassemblerRef disassembler, |
| 164 | EDByteReaderCallback byteReader, |
| 165 | uint64_t address, |
| 166 | void *arg); |
| 167 | |
| 168 | /*! |
| 169 | @function EDReleaseInst |
| 170 | Frees the memory for an instruction. The instruction can no longer be accessed |
| 171 | after this call. |
| 172 | @param inst The instruction to be freed. |
| 173 | */ |
| 174 | void EDReleaseInst(EDInstRef inst); |
| 175 | |
| 176 | /*! |
| 177 | @function EDInstByteSize |
| 178 | @param inst The instruction to be queried. |
Sean Callanan | 7670658 | 2010-02-04 01:43:08 +0000 | [diff] [blame] | 179 | @result The number of bytes in the instruction's machine-code representation. |
Sean Callanan | c3fbf91 | 2010-01-27 23:03:46 +0000 | [diff] [blame] | 180 | */ |
| 181 | int EDInstByteSize(EDInstRef inst); |
| 182 | |
| 183 | /*! |
| 184 | @function EDGetInstString |
| 185 | Gets the disassembled text equivalent of the instruction. |
| 186 | @param buf A pointer whose target will be filled in with a pointer to the |
| 187 | string. (The string becomes invalid when the instruction is released.) |
| 188 | @param inst The instruction to be queried. |
| 189 | @result 0 on success; -1 otherwise. |
| 190 | */ |
| 191 | int EDGetInstString(const char **buf, |
| 192 | EDInstRef inst); |
| 193 | |
| 194 | /*! |
| 195 | @function EDInstID |
| 196 | @param instID A pointer whose target will be filled in with the LLVM identifier |
| 197 | for the instruction. |
| 198 | @param inst The instruction to be queried. |
| 199 | @result 0 on success; -1 otherwise. |
| 200 | */ |
| 201 | int EDInstID(unsigned *instID, EDInstRef inst); |
| 202 | |
| 203 | /*! |
| 204 | @function EDInstIsBranch |
| 205 | @param inst The instruction to be queried. |
| 206 | @result 1 if the instruction is a branch instruction; 0 if it is some other |
| 207 | type of instruction; -1 if there was an error. |
| 208 | */ |
| 209 | int EDInstIsBranch(EDInstRef inst); |
| 210 | |
| 211 | /*! |
| 212 | @function EDInstIsMove |
| 213 | @param inst The instruction to be queried. |
| 214 | @result 1 if the instruction is a move instruction; 0 if it is some other |
| 215 | type of instruction; -1 if there was an error. |
| 216 | */ |
| 217 | int EDInstIsMove(EDInstRef inst); |
| 218 | |
| 219 | /*! |
| 220 | @function EDBranchTargetID |
| 221 | @param inst The instruction to be queried. |
| 222 | @result The ID of the branch target operand, suitable for use with |
| 223 | EDCopyOperand. -1 if no such operand exists. |
| 224 | */ |
| 225 | int EDBranchTargetID(EDInstRef inst); |
| 226 | |
| 227 | /*! |
| 228 | @function EDMoveSourceID |
| 229 | @param inst The instruction to be queried. |
| 230 | @result The ID of the move source operand, suitable for use with |
| 231 | EDCopyOperand. -1 if no such operand exists. |
| 232 | */ |
| 233 | int EDMoveSourceID(EDInstRef inst); |
| 234 | |
| 235 | /*! |
| 236 | @function EDMoveTargetID |
| 237 | @param inst The instruction to be queried. |
| 238 | @result The ID of the move source operand, suitable for use with |
| 239 | EDCopyOperand. -1 if no such operand exists. |
| 240 | */ |
| 241 | int EDMoveTargetID(EDInstRef inst); |
| 242 | |
| 243 | /*! |
| 244 | @functiongroup Creating and querying tokens |
| 245 | */ |
| 246 | |
| 247 | /*! |
| 248 | @function EDNumTokens |
| 249 | @param inst The instruction to be queried. |
| 250 | @result The number of tokens in the instruction, or -1 on error. |
| 251 | */ |
| 252 | int EDNumTokens(EDInstRef inst); |
| 253 | |
| 254 | /*! |
| 255 | @function EDGetToken |
| 256 | Retrieves a token from an instruction. The token is valid until the |
| 257 | instruction is released. |
| 258 | @param token A pointer to be filled in with the token. |
| 259 | @param inst The instruction to be queried. |
| 260 | @param index The index of the token in the instruction. |
| 261 | @result 0 on success; -1 otherwise. |
| 262 | */ |
| 263 | int EDGetToken(EDTokenRef *token, |
| 264 | EDInstRef inst, |
| 265 | int index); |
| 266 | |
| 267 | /*! |
| 268 | @function EDGetTokenString |
| 269 | Gets the disassembled text for a token. |
| 270 | @param buf A pointer whose target will be filled in with a pointer to the |
| 271 | string. (The string becomes invalid when the token is released.) |
| 272 | @param token The token to be queried. |
| 273 | @result 0 on success; -1 otherwise. |
| 274 | */ |
| 275 | int EDGetTokenString(const char **buf, |
| 276 | EDTokenRef token); |
| 277 | |
| 278 | /*! |
| 279 | @function EDOperandIndexForToken |
| 280 | Returns the index of the operand to which a token belongs. |
| 281 | @param token The token to be queried. |
| 282 | @result The operand index on success; -1 otherwise |
| 283 | */ |
| 284 | int EDOperandIndexForToken(EDTokenRef token); |
| 285 | |
| 286 | /*! |
| 287 | @function EDTokenIsWhitespace |
| 288 | @param token The token to be queried. |
| 289 | @result 1 if the token is whitespace; 0 if not; -1 on error. |
| 290 | */ |
| 291 | int EDTokenIsWhitespace(EDTokenRef token); |
| 292 | |
| 293 | /*! |
| 294 | @function EDTokenIsPunctuation |
| 295 | @param token The token to be queried. |
| 296 | @result 1 if the token is punctuation; 0 if not; -1 on error. |
| 297 | */ |
| 298 | int EDTokenIsPunctuation(EDTokenRef token); |
| 299 | |
| 300 | /*! |
| 301 | @function EDTokenIsOpcode |
| 302 | @param token The token to be queried. |
| 303 | @result 1 if the token is opcode; 0 if not; -1 on error. |
| 304 | */ |
| 305 | int EDTokenIsOpcode(EDTokenRef token); |
| 306 | |
| 307 | /*! |
| 308 | @function EDTokenIsLiteral |
| 309 | @param token The token to be queried. |
| 310 | @result 1 if the token is a numeric literal; 0 if not; -1 on error. |
| 311 | */ |
| 312 | int EDTokenIsLiteral(EDTokenRef token); |
| 313 | |
| 314 | /*! |
| 315 | @function EDTokenIsRegister |
| 316 | @param token The token to be queried. |
| 317 | @result 1 if the token identifies a register; 0 if not; -1 on error. |
| 318 | */ |
| 319 | int EDTokenIsRegister(EDTokenRef token); |
| 320 | |
| 321 | /*! |
| 322 | @function EDTokenIsNegativeLiteral |
| 323 | @param token The token to be queried. |
| 324 | @result 1 if the token is a negative signed literal; 0 if not; -1 on error. |
| 325 | */ |
| 326 | int EDTokenIsNegativeLiteral(EDTokenRef token); |
| 327 | |
| 328 | /*! |
| 329 | @function EDLiteralTokenAbsoluteValue |
| 330 | @param value A pointer whose target will be filled in with the absolute value |
| 331 | of the literal. |
| 332 | @param token The token to be queried. |
| 333 | @result 0 on success; -1 otherwise. |
| 334 | */ |
| 335 | int EDLiteralTokenAbsoluteValue(uint64_t *value, |
| 336 | EDTokenRef token); |
| 337 | |
| 338 | /*! |
| 339 | @function EDRegisterTokenValue |
| 340 | @param registerID A pointer whose target will be filled in with the LLVM |
| 341 | register identifier for the token. |
| 342 | @param token The token to be queried. |
| 343 | @result 0 on success; -1 otherwise. |
| 344 | */ |
| 345 | int EDRegisterTokenValue(unsigned *registerID, |
| 346 | EDTokenRef token); |
| 347 | |
| 348 | /*! |
| 349 | @functiongroup Creating and querying operands |
| 350 | */ |
| 351 | |
| 352 | /*! |
| 353 | @function EDNumOperands |
| 354 | @param inst The instruction to be queried. |
| 355 | @result The number of operands in the instruction, or -1 on error. |
| 356 | */ |
| 357 | int EDNumOperands(EDInstRef inst); |
| 358 | |
| 359 | /*! |
| 360 | @function EDGetOperand |
| 361 | Retrieves an operand from an instruction. The operand is valid until the |
| 362 | instruction is released. |
| 363 | @param operand A pointer to be filled in with the operand. |
| 364 | @param inst The instruction to be queried. |
| 365 | @param index The index of the operand in the instruction. |
| 366 | @result 0 on success; -1 otherwise. |
| 367 | */ |
| 368 | int EDGetOperand(EDOperandRef *operand, |
| 369 | EDInstRef inst, |
| 370 | int index); |
Sean Callanan | 01cd79f | 2010-02-08 23:34:25 +0000 | [diff] [blame^] | 371 | |
| 372 | /*! |
| 373 | @function EDOperandIsRegister |
| 374 | @param operand The operand to be queried. |
| 375 | @result 1 if the operand names a register; 0 if not; -1 on error. |
| 376 | */ |
| 377 | int EDOperandIsRegister(EDOperandRef operand); |
| 378 | |
| 379 | /*! |
| 380 | @function EDOperandIsImmediate |
| 381 | @param operand The operand to be queried. |
| 382 | @result 1 if the operand specifies an immediate value; 0 if not; -1 on error. |
| 383 | */ |
| 384 | int EDOperandIsImmediate(EDOperandRef operand); |
| 385 | |
| 386 | /*! |
| 387 | @function EDOperandIsMemory |
| 388 | @param operand The operand to be queried. |
| 389 | @result 1 if the operand specifies a location in memory; 0 if not; -1 on error. |
| 390 | */ |
| 391 | int EDOperandIsMemory(EDOperandRef operand); |
| 392 | |
| 393 | /*! |
| 394 | @function EDRegisterOperandValue |
| 395 | @param value A pointer whose target will be filled in with the LLVM register ID |
| 396 | of the register named by the operand. |
| 397 | @param operand The operand to be queried. |
| 398 | @result 0 on success; -1 otherwise. |
| 399 | */ |
| 400 | int EDRegisterOperandValue(unsigned *value, |
| 401 | EDOperandRef operand); |
| 402 | |
| 403 | /*! |
| 404 | @function EDImmediateOperandValue |
| 405 | @param value A pointer whose target will be filled in with the value of the |
| 406 | immediate. |
| 407 | @param operand The operand to be queried. |
| 408 | @result 0 on success; -1 otherwise. |
| 409 | */ |
| 410 | int EDImmediateOperandValue(uint64_t *value, |
| 411 | EDOperandRef operand); |
Sean Callanan | c3fbf91 | 2010-01-27 23:03:46 +0000 | [diff] [blame] | 412 | |
| 413 | /*! |
| 414 | @function EDEvaluateOperand |
Sean Callanan | 01cd79f | 2010-02-08 23:34:25 +0000 | [diff] [blame^] | 415 | Evaluates an operand using a client-supplied register state accessor. Register |
| 416 | operands are evaluated by reading the value of the register; immediate operands |
| 417 | are evaluated by reporting the immediate value; memory operands are evaluated |
| 418 | by computing the target address (with only those relocations applied that were |
| 419 | already applied to the original bytes). |
Sean Callanan | c3fbf91 | 2010-01-27 23:03:46 +0000 | [diff] [blame] | 420 | @param result A pointer whose target is to be filled with the result of |
| 421 | evaluating the operand. |
| 422 | @param operand The operand to be evaluated. |
| 423 | @param regReader The function to use when reading registers from the register |
| 424 | state. |
| 425 | @param arg An anonymous argument for client use. |
| 426 | @result 0 if the operand could be evaluated; -1 otherwise. |
| 427 | */ |
| 428 | int EDEvaluateOperand(uint64_t *result, |
| 429 | EDOperandRef operand, |
| 430 | EDRegisterReaderCallback regReader, |
| 431 | void *arg); |
| 432 | |
| 433 | #ifdef __BLOCKS__ |
| 434 | |
| 435 | /*! |
| 436 | @typedef EDByteBlock_t |
| 437 | Block-based interface to memory from which instructions may be read. |
| 438 | @param byte A pointer whose target should be filled in with the data returned. |
| 439 | @param address The address of the byte to be read. |
| 440 | @result 0 on success; -1 otherwise. |
| 441 | */ |
| 442 | typedef int (^EDByteBlock_t)(uint8_t *byte, uint64_t address); |
| 443 | |
| 444 | /*! |
| 445 | @typedef EDRegisterBlock_t |
| 446 | Block-based interface to registers from which registers may be read. |
| 447 | @param value A pointer whose target should be filled in with the value of the |
| 448 | register. |
| 449 | @param regID The LLVM register identifier for the register to read. |
| 450 | @result 0 if the register could be read; -1 otherwise. |
| 451 | */ |
| 452 | typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID); |
| 453 | |
| 454 | /*! |
| 455 | @typedef EDTokenVisitor_t |
| 456 | Block-based handler for individual tokens. |
| 457 | @param token The current token being read. |
| 458 | @result 0 to continue; 1 to stop normally; -1 on error. |
| 459 | */ |
| 460 | typedef int (^EDTokenVisitor_t)(EDTokenRef token); |
| 461 | |
| 462 | /*! @functiongroup Block-based interfaces */ |
| 463 | |
| 464 | /*! |
| 465 | @function EDBlockCreateInsts |
| 466 | Gets a set of contiguous instructions from a disassembler, using a block to |
| 467 | read memory. |
| 468 | @param insts A pointer to an array that will be filled in with the |
| 469 | instructions. Must have at least count entries. Entries not filled in will |
| 470 | be set to NULL. |
| 471 | @param count The maximum number of instructions to fill in. |
| 472 | @param disassembler The disassembler to use when decoding the instructions. |
| 473 | @param byteBlock The block to use when reading the instruction's machine |
| 474 | code. |
| 475 | @param address The address of the first byte of the instruction. |
| 476 | @result The number of instructions read on success; 0 otherwise. |
| 477 | */ |
| 478 | unsigned int EDBlockCreateInsts(EDInstRef *insts, |
| 479 | int count, |
| 480 | EDDisassemblerRef disassembler, |
| 481 | EDByteBlock_t byteBlock, |
| 482 | uint64_t address); |
| 483 | |
| 484 | /*! |
| 485 | @function EDBlockEvaluateOperand |
| 486 | Evaluates an operand using a block to read registers. |
| 487 | @param result A pointer whose target is to be filled with the result of |
| 488 | evaluating the operand. |
| 489 | @param operand The operand to be evaluated. |
| 490 | @param regBlock The block to use when reading registers from the register |
| 491 | state. |
| 492 | @result 0 if the operand could be evaluated; -1 otherwise. |
| 493 | */ |
| 494 | int EDBlockEvaluateOperand(uint64_t *result, |
| 495 | EDOperandRef operand, |
| 496 | EDRegisterBlock_t regBlock); |
| 497 | |
| 498 | /*! |
| 499 | @function EDBlockVisitTokens |
| 500 | Visits every token with a visitor. |
| 501 | @param inst The instruction with the tokens to be visited. |
| 502 | @param visitor The visitor. |
| 503 | @result 0 if the visit ended normally; -1 if the visitor encountered an error |
| 504 | or there was some other error. |
| 505 | */ |
| 506 | int EDBlockVisitTokens(EDInstRef inst, |
| 507 | EDTokenVisitor_t visitor); |
| 508 | |
| 509 | #endif |
| 510 | |
| 511 | #ifdef __cplusplus |
| 512 | } |
| 513 | #endif |
| 514 | |
| 515 | #endif |