Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 1 | /*===-- llvm-c/Disassembler.h - Disassembler Public C Interface ---*- C -*-===*\ |
| 2 | |* *| |
| 3 | |* The LLVM Compiler Infrastructure *| |
| 4 | |* *| |
| 5 | |* This file is distributed under the University of Illinois Open Source *| |
| 6 | |* License. See LICENSE.TXT for details. *| |
| 7 | |* *| |
| 8 | |*===----------------------------------------------------------------------===*| |
| 9 | |* *| |
Chris Lattner | 1a55c41 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 10 | |* This header provides a public interface to a disassembler library. *| |
Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 11 | |* LLVM provides an implementation of this interface. *| |
| 12 | |* *| |
| 13 | \*===----------------------------------------------------------------------===*/ |
| 14 | |
| 15 | #ifndef LLVM_C_DISASSEMBLER_H |
Chris Lattner | 1a55c41 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 16 | #define LLVM_C_DISASSEMBLER_H |
Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 17 | |
Daniel Dunbar | 8470475 | 2011-03-29 02:30:34 +0000 | [diff] [blame] | 18 | #include "llvm/Support/DataTypes.h" |
Chris Lattner | 1a55c41 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 19 | #include <stddef.h> |
Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 20 | |
| 21 | /** |
Gregory Szorc | 6244b51 | 2012-03-21 03:54:29 +0000 | [diff] [blame^] | 22 | * @defgroup LLVMCDisassembler Disassembler |
| 23 | * @ingroup LLVMC |
| 24 | * |
| 25 | * @{ |
| 26 | */ |
| 27 | |
| 28 | /** |
Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 29 | * An opaque reference to a disassembler context. |
| 30 | */ |
| 31 | typedef void *LLVMDisasmContextRef; |
| 32 | |
| 33 | /** |
| 34 | * The type for the operand information call back function. This is called to |
| 35 | * get the symbolic information for an operand of an instruction. Typically |
| 36 | * this is from the relocation information, symbol table, etc. That block of |
| 37 | * information is saved when the disassembler context is created and passed to |
| 38 | * the call back in the DisInfo parameter. The instruction containing operand |
| 39 | * is at the PC parameter. For some instruction sets, there can be more than |
| 40 | * one operand with symbolic information. To determine the symbolic operand |
Chris Lattner | 7a2bdde | 2011-04-15 05:18:47 +0000 | [diff] [blame] | 41 | * information for each operand, the bytes for the specific operand in the |
Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 42 | * instruction are specified by the Offset parameter and its byte widith is the |
| 43 | * size parameter. For instructions sets with fixed widths and one symbolic |
| 44 | * operand per instruction, the Offset parameter will be zero and Size parameter |
| 45 | * will be the instruction width. The information is returned in TagBuf and is |
| 46 | * Triple specific with its specific information defined by the value of |
| 47 | * TagType for that Triple. If symbolic information is returned the function |
Chris Lattner | 1a55c41 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 48 | * returns 1, otherwise it returns 0. |
Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 49 | */ |
Chris Lattner | 1a55c41 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 50 | typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, |
| 51 | uint64_t Offset, uint64_t Size, |
| 52 | int TagType, void *TagBuf); |
Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 53 | |
| 54 | /** |
Kevin Enderby | bd33276 | 2011-04-11 18:08:50 +0000 | [diff] [blame] | 55 | * The initial support in LLVM MC for the most general form of a relocatable |
| 56 | * expression is "AddSymbol - SubtractSymbol + Offset". For some Darwin targets |
| 57 | * this full form is encoded in the relocation information so that AddSymbol and |
| 58 | * SubtractSymbol can be link edited independent of each other. Many other |
| 59 | * platforms only allow a relocatable expression of the form AddSymbol + Offset |
| 60 | * to be encoded. |
| 61 | * |
| 62 | * The LLVMOpInfoCallback() for the TagType value of 1 uses the struct |
| 63 | * LLVMOpInfo1. The value of the relocatable expression for the operand, |
| 64 | * including any PC adjustment, is passed in to the call back in the Value |
| 65 | * field. The symbolic information about the operand is returned using all |
| 66 | * the fields of the structure with the Offset of the relocatable expression |
| 67 | * returned in the Value field. It is possible that some symbols in the |
| 68 | * relocatable expression were assembly temporary symbols, for example |
| 69 | * "Ldata - LpicBase + constant", and only the Values of the symbols without |
| 70 | * symbol names are present in the relocation information. The VariantKind |
| 71 | * type is one of the Target specific #defines below and is used to print |
| 72 | * operands like "_foo@GOT", ":lower16:_foo", etc. |
| 73 | */ |
| 74 | struct LLVMOpInfoSymbol1 { |
Chris Lattner | 1a55c41 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 75 | uint64_t Present; /* 1 if this symbol is present */ |
Kevin Enderby | 9e5887b | 2011-10-04 22:44:48 +0000 | [diff] [blame] | 76 | const char *Name; /* symbol name if not NULL */ |
Chris Lattner | 1a55c41 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 77 | uint64_t Value; /* symbol value if name is NULL */ |
Kevin Enderby | bd33276 | 2011-04-11 18:08:50 +0000 | [diff] [blame] | 78 | }; |
Chris Lattner | 1a55c41 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 79 | |
Kevin Enderby | bd33276 | 2011-04-11 18:08:50 +0000 | [diff] [blame] | 80 | struct LLVMOpInfo1 { |
| 81 | struct LLVMOpInfoSymbol1 AddSymbol; |
| 82 | struct LLVMOpInfoSymbol1 SubtractSymbol; |
| 83 | uint64_t Value; |
| 84 | uint64_t VariantKind; |
| 85 | }; |
| 86 | |
| 87 | /** |
| 88 | * The operand VariantKinds for symbolic disassembly. |
| 89 | */ |
| 90 | #define LLVMDisassembler_VariantKind_None 0 /* all targets */ |
| 91 | |
| 92 | /** |
| 93 | * The ARM target VariantKinds. |
| 94 | */ |
| 95 | #define LLVMDisassembler_VariantKind_ARM_HI16 1 /* :upper16: */ |
| 96 | #define LLVMDisassembler_VariantKind_ARM_LO16 2 /* :lower16: */ |
| 97 | |
| 98 | /** |
Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 99 | * The type for the symbol lookup function. This may be called by the |
Chris Lattner | 1a55c41 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 100 | * disassembler for things like adding a comment for a PC plus a constant |
Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 101 | * offset load instruction to use a symbol name instead of a load address value. |
| 102 | * It is passed the block information is saved when the disassembler context is |
Kevin Enderby | 9e5887b | 2011-10-04 22:44:48 +0000 | [diff] [blame] | 103 | * created and the ReferenceValue to look up as a symbol. If no symbol is found |
| 104 | * for the ReferenceValue NULL is returned. The ReferenceType of the |
| 105 | * instruction is passed indirectly as is the PC of the instruction in |
| 106 | * ReferencePC. If the output reference can be determined its type is returned |
| 107 | * indirectly in ReferenceType along with ReferenceName if any, or that is set |
| 108 | * to NULL. |
Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 109 | */ |
| 110 | typedef const char *(*LLVMSymbolLookupCallback)(void *DisInfo, |
Kevin Enderby | 9e5887b | 2011-10-04 22:44:48 +0000 | [diff] [blame] | 111 | uint64_t ReferenceValue, |
| 112 | uint64_t *ReferenceType, |
| 113 | uint64_t ReferencePC, |
| 114 | const char **ReferenceName); |
| 115 | /** |
| 116 | * The reference types on input and output. |
| 117 | */ |
| 118 | /* No input reference type or no output reference type. */ |
| 119 | #define LLVMDisassembler_ReferenceType_InOut_None 0 |
| 120 | |
| 121 | /* The input reference is from a branch instruction. */ |
| 122 | #define LLVMDisassembler_ReferenceType_In_Branch 1 |
| 123 | /* The input reference is from a PC relative load instruction. */ |
| 124 | #define LLVMDisassembler_ReferenceType_In_PCrel_Load 2 |
| 125 | |
| 126 | /* The output reference is to as symbol stub. */ |
| 127 | #define LLVMDisassembler_ReferenceType_Out_SymbolStub 1 |
| 128 | /* The output reference is to a symbol address in a literal pool. */ |
| 129 | #define LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr 2 |
| 130 | /* The output reference is to a cstring address in a literal pool. */ |
| 131 | #define LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr 3 |
Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 132 | |
| 133 | #ifdef __cplusplus |
| 134 | extern "C" { |
| 135 | #endif /* !defined(__cplusplus) */ |
| 136 | |
| 137 | /** |
| 138 | * Create a disassembler for the TripleName. Symbolic disassembly is supported |
Chris Lattner | 1a55c41 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 139 | * by passing a block of information in the DisInfo parameter and specifying the |
| 140 | * TagType and callback functions as described above. These can all be passed |
| 141 | * as NULL. If successful, this returns a disassembler context. If not, it |
Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 142 | * returns NULL. |
| 143 | */ |
Chris Lattner | 1a55c41 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 144 | LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo, |
| 145 | int TagType, LLVMOpInfoCallback GetOpInfo, |
| 146 | LLVMSymbolLookupCallback SymbolLookUp); |
Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 147 | |
| 148 | /** |
| 149 | * Dispose of a disassembler context. |
| 150 | */ |
Chris Lattner | 1a55c41 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 151 | void LLVMDisasmDispose(LLVMDisasmContextRef DC); |
Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 152 | |
| 153 | /** |
Chris Lattner | 1a55c41 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 154 | * Disassemble a single instruction using the disassembler context specified in |
| 155 | * the parameter DC. The bytes of the instruction are specified in the |
| 156 | * parameter Bytes, and contains at least BytesSize number of bytes. The |
| 157 | * instruction is at the address specified by the PC parameter. If a valid |
| 158 | * instruction can be disassembled, its string is returned indirectly in |
| 159 | * OutString whose size is specified in the parameter OutStringSize. This |
| 160 | * function returns the number of bytes in the instruction or zero if there was |
| 161 | * no valid instruction. |
Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 162 | */ |
Chris Lattner | 1a55c41 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 163 | size_t LLVMDisasmInstruction(LLVMDisasmContextRef DC, uint8_t *Bytes, |
| 164 | uint64_t BytesSize, uint64_t PC, |
| 165 | char *OutString, size_t OutStringSize); |
Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 166 | |
Gregory Szorc | 6244b51 | 2012-03-21 03:54:29 +0000 | [diff] [blame^] | 167 | /** |
| 168 | * @} |
| 169 | */ |
| 170 | |
Kevin Enderby | 93f7936 | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 171 | #ifdef __cplusplus |
| 172 | } |
| 173 | #endif /* !defined(__cplusplus) */ |
| 174 | |
| 175 | #endif /* !defined(LLVM_C_DISASSEMBLER_H) */ |