blob: 9cd1e1f5f3cfe72aaf744a650246447ed88572d7 [file] [log] [blame]
Sean Callananc3fbf912010-01-27 23:03:46 +00001/*===-- llvm-c/EnhancedDisassembly.h - Disassembler C Interface ---*- C -*-===*\
2|* *|
3|* The LLVM Compiler Infrastructure *|
4|* *|
5|* This file is distributed under the University of Illinois Open Source *|
6|* License. See LICENSE.TXT for details. *|
7|* *|
8|*===----------------------------------------------------------------------===*|
9|* *|
10|* This header declares the C interface to EnhancedDisassembly.so, which *|
11|* implements a disassembler with the ability to extract operand values and *|
12|* individual tokens from assembly instructions. *|
13|* *|
14|* The header declares additional interfaces if the host compiler supports *|
15|* the blocks API. *|
16|* *|
17\*===----------------------------------------------------------------------===*/
18
19#ifndef LLVM_C_ENHANCEDDISASSEMBLY_H
20#define LLVM_C_ENHANCEDDISASSEMBLY_H
21
22#include "llvm/System/DataTypes.h"
23
24#ifdef __cplusplus
25extern "C" {
26#endif
27
28/*!
29 @typedef EDByteReaderCallback
30 Interface to memory from which instructions may be read.
31 @param byte A pointer whose target should be filled in with the data returned.
32 @param address The address of the byte to be read.
33 @param arg An anonymous argument for client use.
34 @result 0 on success; -1 otherwise.
35 */
36typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
37
38/*!
39 @typedef EDRegisterReaderCallback
40 Interface to registers from which registers may be read.
41 @param value A pointer whose target should be filled in with the value of the
42 register.
43 @param regID The LLVM register identifier for the register to read.
44 @param arg An anonymous argument for client use.
45 @result 0 if the register could be read; -1 otherwise.
46 */
47typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID,
48 void* arg);
49
50/*!
51 @typedef EDAssemblySyntax_t
52 An assembly syntax for use in tokenizing instructions.
53 */
Sean Callanane2749012010-01-27 23:20:51 +000054typedef enum {
Sean Callananc3fbf912010-01-27 23:03:46 +000055/*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
Sean Callanane2749012010-01-27 23:20:51 +000056 kEDAssemblySyntaxX86Intel = 0,
Sean Callananc3fbf912010-01-27 23:03:46 +000057/*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
Sean Callanane2749012010-01-27 23:20:51 +000058 kEDAssemblySyntaxX86ATT = 1
59} EDAssemblySyntax_t;
Sean Callananc3fbf912010-01-27 23:03:46 +000060
61/*!
62 @typedef EDDisassemblerRef
63 Encapsulates a disassembler for a single CPU architecture.
64 */
65struct EDDisassembler;
66typedef struct EDDisassembler *EDDisassemblerRef;
67
68/*!
69 @typedef EDInstRef
70 Encapsulates a single disassembled instruction in one assembly syntax.
71 */
72struct EDInst;
73typedef struct EDInst *EDInstRef;
74
75/*!
76 @typedef EDTokenRef
77 Encapsulates a token from the disassembly of an instruction.
78 */
79struct EDToken;
80typedef struct EDToken *EDTokenRef;
81
82/*!
83 @typedef EDOperandRef
84 Encapsulates an operand of an instruction.
85 */
86struct EDOperand;
87typedef struct EDOperand *EDOperandRef;
88
89/*!
90 @functiongroup Getting a disassembler
91 */
92
93/*!
94 @function EDGetDisassembler
95 Gets the disassembler for a given target.
96 @param disassembler A pointer whose target will be filled in with the
97 disassembler.
98 @param triple Identifies the target. Example: "x86_64-apple-darwin10"
99 @param syntax The assembly syntax to use when decoding instructions.
100 @result 0 on success; -1 otherwise.
101 */
102int EDGetDisassembler(EDDisassemblerRef *disassembler,
103 const char *triple,
104 EDAssemblySyntax_t syntax);
105
106/*!
107 @functiongroup Generic architectural queries
108 */
109
110/*!
111 @function EDGetRegisterName
112 Gets the human-readable name for a given register.
113 @param regName A pointer whose target will be pointed at the name of the
114 register. The name does not need to be deallocated and will be
115 @param disassembler The disassembler to query for the name.
116 @param regID The register identifier, as returned by EDRegisterTokenValue.
117 @result 0 on success; -1 otherwise.
118 */
119int EDGetRegisterName(const char** regName,
120 EDDisassemblerRef disassembler,
121 unsigned regID);
122
123/*!
124 @function EDRegisterIsStackPointer
125 Determines if a register is one of the platform's stack-pointer registers.
126 @param disassembler The disassembler to query.
127 @param regID The register identifier, as returned by EDRegisterTokenValue.
128 @result 1 if true; 0 otherwise.
129 */
130int EDRegisterIsStackPointer(EDDisassemblerRef disassembler,
131 unsigned regID);
132
133/*!
134 @function EDRegisterIsProgramCounter
135 Determines if a register is one of the platform's stack-pointer registers.
136 @param disassembler The disassembler to query.
137 @param regID The register identifier, as returned by EDRegisterTokenValue.
138 @result 1 if true; 0 otherwise.
139 */
140int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler,
141 unsigned regID);
142
143/*!
144 @functiongroup Creating and querying instructions
145 */
146
147/*!
148 @function EDCreateInst
149 Gets a set of contiguous instructions from a disassembler.
150 @param insts A pointer to an array that will be filled in with the
151 instructions. Must have at least count entries. Entries not filled in will
152 be set to NULL.
153 @param count The maximum number of instructions to fill in.
154 @param disassembler The disassembler to use when decoding the instructions.
155 @param byteReader The function to use when reading the instruction's machine
156 code.
157 @param address The address of the first byte of the instruction.
158 @param arg An anonymous argument to be passed to byteReader.
159 @result The number of instructions read on success; 0 otherwise.
160 */
161unsigned int EDCreateInsts(EDInstRef *insts,
162 unsigned int count,
163 EDDisassemblerRef disassembler,
164 EDByteReaderCallback byteReader,
165 uint64_t address,
166 void *arg);
167
168/*!
169 @function EDReleaseInst
170 Frees the memory for an instruction. The instruction can no longer be accessed
171 after this call.
172 @param inst The instruction to be freed.
173 */
174void EDReleaseInst(EDInstRef inst);
175
176/*!
177 @function EDInstByteSize
178 @param inst The instruction to be queried.
Sean Callanan76706582010-02-04 01:43:08 +0000179 @result The number of bytes in the instruction's machine-code representation.
Sean Callananc3fbf912010-01-27 23:03:46 +0000180 */
181int EDInstByteSize(EDInstRef inst);
182
183/*!
184 @function EDGetInstString
185 Gets the disassembled text equivalent of the instruction.
186 @param buf A pointer whose target will be filled in with a pointer to the
187 string. (The string becomes invalid when the instruction is released.)
188 @param inst The instruction to be queried.
189 @result 0 on success; -1 otherwise.
190 */
191int EDGetInstString(const char **buf,
192 EDInstRef inst);
193
194/*!
195 @function EDInstID
196 @param instID A pointer whose target will be filled in with the LLVM identifier
197 for the instruction.
198 @param inst The instruction to be queried.
199 @result 0 on success; -1 otherwise.
200 */
201int EDInstID(unsigned *instID, EDInstRef inst);
202
203/*!
204 @function EDInstIsBranch
205 @param inst The instruction to be queried.
206 @result 1 if the instruction is a branch instruction; 0 if it is some other
207 type of instruction; -1 if there was an error.
208 */
209int EDInstIsBranch(EDInstRef inst);
210
211/*!
212 @function EDInstIsMove
213 @param inst The instruction to be queried.
214 @result 1 if the instruction is a move instruction; 0 if it is some other
215 type of instruction; -1 if there was an error.
216 */
217int EDInstIsMove(EDInstRef inst);
218
219/*!
220 @function EDBranchTargetID
221 @param inst The instruction to be queried.
222 @result The ID of the branch target operand, suitable for use with
223 EDCopyOperand. -1 if no such operand exists.
224 */
225int EDBranchTargetID(EDInstRef inst);
226
227/*!
228 @function EDMoveSourceID
229 @param inst The instruction to be queried.
230 @result The ID of the move source operand, suitable for use with
231 EDCopyOperand. -1 if no such operand exists.
232 */
233int EDMoveSourceID(EDInstRef inst);
234
235/*!
236 @function EDMoveTargetID
237 @param inst The instruction to be queried.
238 @result The ID of the move source operand, suitable for use with
239 EDCopyOperand. -1 if no such operand exists.
240 */
241int EDMoveTargetID(EDInstRef inst);
242
243/*!
244 @functiongroup Creating and querying tokens
245 */
246
247/*!
248 @function EDNumTokens
249 @param inst The instruction to be queried.
250 @result The number of tokens in the instruction, or -1 on error.
251 */
252int EDNumTokens(EDInstRef inst);
253
254/*!
255 @function EDGetToken
256 Retrieves a token from an instruction. The token is valid until the
257 instruction is released.
258 @param token A pointer to be filled in with the token.
259 @param inst The instruction to be queried.
260 @param index The index of the token in the instruction.
261 @result 0 on success; -1 otherwise.
262 */
263int EDGetToken(EDTokenRef *token,
264 EDInstRef inst,
265 int index);
266
267/*!
268 @function EDGetTokenString
269 Gets the disassembled text for a token.
270 @param buf A pointer whose target will be filled in with a pointer to the
271 string. (The string becomes invalid when the token is released.)
272 @param token The token to be queried.
273 @result 0 on success; -1 otherwise.
274 */
275int EDGetTokenString(const char **buf,
276 EDTokenRef token);
277
278/*!
279 @function EDOperandIndexForToken
280 Returns the index of the operand to which a token belongs.
281 @param token The token to be queried.
282 @result The operand index on success; -1 otherwise
283 */
284int EDOperandIndexForToken(EDTokenRef token);
285
286/*!
287 @function EDTokenIsWhitespace
288 @param token The token to be queried.
289 @result 1 if the token is whitespace; 0 if not; -1 on error.
290 */
291int EDTokenIsWhitespace(EDTokenRef token);
292
293/*!
294 @function EDTokenIsPunctuation
295 @param token The token to be queried.
296 @result 1 if the token is punctuation; 0 if not; -1 on error.
297 */
298int EDTokenIsPunctuation(EDTokenRef token);
299
300/*!
301 @function EDTokenIsOpcode
302 @param token The token to be queried.
303 @result 1 if the token is opcode; 0 if not; -1 on error.
304 */
305int EDTokenIsOpcode(EDTokenRef token);
306
307/*!
308 @function EDTokenIsLiteral
309 @param token The token to be queried.
310 @result 1 if the token is a numeric literal; 0 if not; -1 on error.
311 */
312int EDTokenIsLiteral(EDTokenRef token);
313
314/*!
315 @function EDTokenIsRegister
316 @param token The token to be queried.
317 @result 1 if the token identifies a register; 0 if not; -1 on error.
318 */
319int EDTokenIsRegister(EDTokenRef token);
320
321/*!
322 @function EDTokenIsNegativeLiteral
323 @param token The token to be queried.
324 @result 1 if the token is a negative signed literal; 0 if not; -1 on error.
325 */
326int EDTokenIsNegativeLiteral(EDTokenRef token);
327
328/*!
329 @function EDLiteralTokenAbsoluteValue
330 @param value A pointer whose target will be filled in with the absolute value
331 of the literal.
332 @param token The token to be queried.
333 @result 0 on success; -1 otherwise.
334 */
335int EDLiteralTokenAbsoluteValue(uint64_t *value,
336 EDTokenRef token);
337
338/*!
339 @function EDRegisterTokenValue
340 @param registerID A pointer whose target will be filled in with the LLVM
341 register identifier for the token.
342 @param token The token to be queried.
343 @result 0 on success; -1 otherwise.
344 */
345int EDRegisterTokenValue(unsigned *registerID,
346 EDTokenRef token);
347
348/*!
349 @functiongroup Creating and querying operands
350 */
351
352/*!
353 @function EDNumOperands
354 @param inst The instruction to be queried.
355 @result The number of operands in the instruction, or -1 on error.
356 */
357int EDNumOperands(EDInstRef inst);
358
359/*!
360 @function EDGetOperand
361 Retrieves an operand from an instruction. The operand is valid until the
362 instruction is released.
363 @param operand A pointer to be filled in with the operand.
364 @param inst The instruction to be queried.
365 @param index The index of the operand in the instruction.
366 @result 0 on success; -1 otherwise.
367 */
368int EDGetOperand(EDOperandRef *operand,
369 EDInstRef inst,
370 int index);
Sean Callanan01cd79f2010-02-08 23:34:25 +0000371
372/*!
373 @function EDOperandIsRegister
374 @param operand The operand to be queried.
375 @result 1 if the operand names a register; 0 if not; -1 on error.
376 */
377int EDOperandIsRegister(EDOperandRef operand);
378
379/*!
380 @function EDOperandIsImmediate
381 @param operand The operand to be queried.
382 @result 1 if the operand specifies an immediate value; 0 if not; -1 on error.
383 */
384int EDOperandIsImmediate(EDOperandRef operand);
385
386/*!
387 @function EDOperandIsMemory
388 @param operand The operand to be queried.
389 @result 1 if the operand specifies a location in memory; 0 if not; -1 on error.
390 */
391int EDOperandIsMemory(EDOperandRef operand);
392
393/*!
394 @function EDRegisterOperandValue
395 @param value A pointer whose target will be filled in with the LLVM register ID
396 of the register named by the operand.
397 @param operand The operand to be queried.
398 @result 0 on success; -1 otherwise.
399 */
400int EDRegisterOperandValue(unsigned *value,
401 EDOperandRef operand);
402
403/*!
404 @function EDImmediateOperandValue
405 @param value A pointer whose target will be filled in with the value of the
406 immediate.
407 @param operand The operand to be queried.
408 @result 0 on success; -1 otherwise.
409 */
410int EDImmediateOperandValue(uint64_t *value,
411 EDOperandRef operand);
Sean Callananc3fbf912010-01-27 23:03:46 +0000412
413/*!
414 @function EDEvaluateOperand
Sean Callanan01cd79f2010-02-08 23:34:25 +0000415 Evaluates an operand using a client-supplied register state accessor. Register
416 operands are evaluated by reading the value of the register; immediate operands
417 are evaluated by reporting the immediate value; memory operands are evaluated
418 by computing the target address (with only those relocations applied that were
419 already applied to the original bytes).
Sean Callananc3fbf912010-01-27 23:03:46 +0000420 @param result A pointer whose target is to be filled with the result of
421 evaluating the operand.
422 @param operand The operand to be evaluated.
423 @param regReader The function to use when reading registers from the register
424 state.
425 @param arg An anonymous argument for client use.
426 @result 0 if the operand could be evaluated; -1 otherwise.
427 */
428int EDEvaluateOperand(uint64_t *result,
429 EDOperandRef operand,
430 EDRegisterReaderCallback regReader,
431 void *arg);
432
433#ifdef __BLOCKS__
434
435/*!
436 @typedef EDByteBlock_t
437 Block-based interface to memory from which instructions may be read.
438 @param byte A pointer whose target should be filled in with the data returned.
439 @param address The address of the byte to be read.
440 @result 0 on success; -1 otherwise.
441 */
442typedef int (^EDByteBlock_t)(uint8_t *byte, uint64_t address);
443
444/*!
445 @typedef EDRegisterBlock_t
446 Block-based interface to registers from which registers may be read.
447 @param value A pointer whose target should be filled in with the value of the
448 register.
449 @param regID The LLVM register identifier for the register to read.
450 @result 0 if the register could be read; -1 otherwise.
451 */
452typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID);
453
454/*!
455 @typedef EDTokenVisitor_t
456 Block-based handler for individual tokens.
457 @param token The current token being read.
458 @result 0 to continue; 1 to stop normally; -1 on error.
459 */
460typedef int (^EDTokenVisitor_t)(EDTokenRef token);
461
462/*! @functiongroup Block-based interfaces */
463
464/*!
465 @function EDBlockCreateInsts
466 Gets a set of contiguous instructions from a disassembler, using a block to
467 read memory.
468 @param insts A pointer to an array that will be filled in with the
469 instructions. Must have at least count entries. Entries not filled in will
470 be set to NULL.
471 @param count The maximum number of instructions to fill in.
472 @param disassembler The disassembler to use when decoding the instructions.
473 @param byteBlock The block to use when reading the instruction's machine
474 code.
475 @param address The address of the first byte of the instruction.
476 @result The number of instructions read on success; 0 otherwise.
477 */
478unsigned int EDBlockCreateInsts(EDInstRef *insts,
479 int count,
480 EDDisassemblerRef disassembler,
481 EDByteBlock_t byteBlock,
482 uint64_t address);
483
484/*!
485 @function EDBlockEvaluateOperand
486 Evaluates an operand using a block to read registers.
487 @param result A pointer whose target is to be filled with the result of
488 evaluating the operand.
489 @param operand The operand to be evaluated.
490 @param regBlock The block to use when reading registers from the register
491 state.
492 @result 0 if the operand could be evaluated; -1 otherwise.
493 */
494int EDBlockEvaluateOperand(uint64_t *result,
495 EDOperandRef operand,
496 EDRegisterBlock_t regBlock);
497
498/*!
499 @function EDBlockVisitTokens
500 Visits every token with a visitor.
501 @param inst The instruction with the tokens to be visited.
502 @param visitor The visitor.
503 @result 0 if the visit ended normally; -1 if the visitor encountered an error
504 or there was some other error.
505 */
506int EDBlockVisitTokens(EDInstRef inst,
507 EDTokenVisitor_t visitor);
508
509#endif
510
511#ifdef __cplusplus
512}
513#endif
514
515#endif