Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 1 | /*===-- llvm-c/Disassembler.h - Disassembler Public C Interface ---*- C -*-===*\ |
| 2 | |* *| |
| 3 | |* The LLVM Compiler Infrastructure *| |
| 4 | |* *| |
| 5 | |* This file is distributed under the University of Illinois Open Source *| |
| 6 | |* License. See LICENSE.TXT for details. *| |
| 7 | |* *| |
| 8 | |*===----------------------------------------------------------------------===*| |
| 9 | |* *| |
Chris Lattner | e8bf2d5 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 10 | |* This header provides a public interface to a disassembler library. *| |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 11 | |* LLVM provides an implementation of this interface. *| |
| 12 | |* *| |
| 13 | \*===----------------------------------------------------------------------===*/ |
| 14 | |
| 15 | #ifndef LLVM_C_DISASSEMBLER_H |
Chris Lattner | e8bf2d5 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 16 | #define LLVM_C_DISASSEMBLER_H |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 17 | |
Daniel Dunbar | 30d0bdd | 2011-03-29 02:30:34 +0000 | [diff] [blame] | 18 | #include "llvm/Support/DataTypes.h" |
Eugene Zelenko | 35623fb | 2016-03-28 17:40:08 +0000 | [diff] [blame] | 19 | #ifdef __cplusplus |
| 20 | #include <cstddef> |
| 21 | #else |
Chris Lattner | e8bf2d5 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 22 | #include <stddef.h> |
Eugene Zelenko | 35623fb | 2016-03-28 17:40:08 +0000 | [diff] [blame] | 23 | #endif |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 24 | |
| 25 | /** |
Gregory Szorc | 34c863a | 2012-03-21 03:54:29 +0000 | [diff] [blame] | 26 | * @defgroup LLVMCDisassembler Disassembler |
| 27 | * @ingroup LLVMC |
| 28 | * |
| 29 | * @{ |
| 30 | */ |
| 31 | |
| 32 | /** |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 33 | * An opaque reference to a disassembler context. |
| 34 | */ |
| 35 | typedef void *LLVMDisasmContextRef; |
| 36 | |
| 37 | /** |
| 38 | * The type for the operand information call back function. This is called to |
| 39 | * get the symbolic information for an operand of an instruction. Typically |
| 40 | * this is from the relocation information, symbol table, etc. That block of |
| 41 | * information is saved when the disassembler context is created and passed to |
| 42 | * the call back in the DisInfo parameter. The instruction containing operand |
| 43 | * is at the PC parameter. For some instruction sets, there can be more than |
| 44 | * one operand with symbolic information. To determine the symbolic operand |
Chris Lattner | 0ab5e2c | 2011-04-15 05:18:47 +0000 | [diff] [blame] | 45 | * information for each operand, the bytes for the specific operand in the |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 46 | * instruction are specified by the Offset parameter and its byte widith is the |
| 47 | * size parameter. For instructions sets with fixed widths and one symbolic |
| 48 | * operand per instruction, the Offset parameter will be zero and Size parameter |
NAKAMURA Takumi | a3a8135 | 2013-10-23 17:56:29 +0000 | [diff] [blame] | 49 | * will be the instruction width. The information is returned in TagBuf and is |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 50 | * Triple specific with its specific information defined by the value of |
| 51 | * TagType for that Triple. If symbolic information is returned the function |
Chris Lattner | e8bf2d5 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 52 | * returns 1, otherwise it returns 0. |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 53 | */ |
Chris Lattner | e8bf2d5 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 54 | typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, |
| 55 | uint64_t Offset, uint64_t Size, |
| 56 | int TagType, void *TagBuf); |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 57 | |
| 58 | /** |
Kevin Enderby | 9377a52 | 2011-04-11 18:08:50 +0000 | [diff] [blame] | 59 | * The initial support in LLVM MC for the most general form of a relocatable |
| 60 | * expression is "AddSymbol - SubtractSymbol + Offset". For some Darwin targets |
| 61 | * this full form is encoded in the relocation information so that AddSymbol and |
| 62 | * SubtractSymbol can be link edited independent of each other. Many other |
| 63 | * platforms only allow a relocatable expression of the form AddSymbol + Offset |
| 64 | * to be encoded. |
NAKAMURA Takumi | a3a8135 | 2013-10-23 17:56:29 +0000 | [diff] [blame] | 65 | * |
Kevin Enderby | 9377a52 | 2011-04-11 18:08:50 +0000 | [diff] [blame] | 66 | * The LLVMOpInfoCallback() for the TagType value of 1 uses the struct |
| 67 | * LLVMOpInfo1. The value of the relocatable expression for the operand, |
| 68 | * including any PC adjustment, is passed in to the call back in the Value |
| 69 | * field. The symbolic information about the operand is returned using all |
| 70 | * the fields of the structure with the Offset of the relocatable expression |
| 71 | * returned in the Value field. It is possible that some symbols in the |
| 72 | * relocatable expression were assembly temporary symbols, for example |
| 73 | * "Ldata - LpicBase + constant", and only the Values of the symbols without |
| 74 | * symbol names are present in the relocation information. The VariantKind |
| 75 | * type is one of the Target specific #defines below and is used to print |
| 76 | * operands like "_foo@GOT", ":lower16:_foo", etc. |
| 77 | */ |
| 78 | struct LLVMOpInfoSymbol1 { |
Chris Lattner | e8bf2d5 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 79 | uint64_t Present; /* 1 if this symbol is present */ |
Kevin Enderby | 5dcda64 | 2011-10-04 22:44:48 +0000 | [diff] [blame] | 80 | const char *Name; /* symbol name if not NULL */ |
Chris Lattner | e8bf2d5 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 81 | uint64_t Value; /* symbol value if name is NULL */ |
Kevin Enderby | 9377a52 | 2011-04-11 18:08:50 +0000 | [diff] [blame] | 82 | }; |
Chris Lattner | e8bf2d5 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 83 | |
Kevin Enderby | 9377a52 | 2011-04-11 18:08:50 +0000 | [diff] [blame] | 84 | struct LLVMOpInfo1 { |
| 85 | struct LLVMOpInfoSymbol1 AddSymbol; |
| 86 | struct LLVMOpInfoSymbol1 SubtractSymbol; |
| 87 | uint64_t Value; |
| 88 | uint64_t VariantKind; |
| 89 | }; |
| 90 | |
| 91 | /** |
| 92 | * The operand VariantKinds for symbolic disassembly. |
| 93 | */ |
| 94 | #define LLVMDisassembler_VariantKind_None 0 /* all targets */ |
| 95 | |
| 96 | /** |
| 97 | * The ARM target VariantKinds. |
| 98 | */ |
| 99 | #define LLVMDisassembler_VariantKind_ARM_HI16 1 /* :upper16: */ |
| 100 | #define LLVMDisassembler_VariantKind_ARM_LO16 2 /* :lower16: */ |
| 101 | |
| 102 | /** |
Tim Northover | 00ed996 | 2014-03-29 10:18:08 +0000 | [diff] [blame] | 103 | * The ARM64 target VariantKinds. |
| 104 | */ |
| 105 | #define LLVMDisassembler_VariantKind_ARM64_PAGE 1 /* @page */ |
| 106 | #define LLVMDisassembler_VariantKind_ARM64_PAGEOFF 2 /* @pageoff */ |
| 107 | #define LLVMDisassembler_VariantKind_ARM64_GOTPAGE 3 /* @gotpage */ |
| 108 | #define LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF 4 /* @gotpageoff */ |
| 109 | #define LLVMDisassembler_VariantKind_ARM64_TLVP 5 /* @tvlppage */ |
| 110 | #define LLVMDisassembler_VariantKind_ARM64_TLVOFF 6 /* @tvlppageoff */ |
| 111 | |
| 112 | /** |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 113 | * The type for the symbol lookup function. This may be called by the |
Chris Lattner | e8bf2d5 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 114 | * disassembler for things like adding a comment for a PC plus a constant |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 115 | * offset load instruction to use a symbol name instead of a load address value. |
| 116 | * It is passed the block information is saved when the disassembler context is |
Kevin Enderby | 5dcda64 | 2011-10-04 22:44:48 +0000 | [diff] [blame] | 117 | * created and the ReferenceValue to look up as a symbol. If no symbol is found |
| 118 | * for the ReferenceValue NULL is returned. The ReferenceType of the |
| 119 | * instruction is passed indirectly as is the PC of the instruction in |
| 120 | * ReferencePC. If the output reference can be determined its type is returned |
| 121 | * indirectly in ReferenceType along with ReferenceName if any, or that is set |
| 122 | * to NULL. |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 123 | */ |
| 124 | typedef const char *(*LLVMSymbolLookupCallback)(void *DisInfo, |
Kevin Enderby | 5dcda64 | 2011-10-04 22:44:48 +0000 | [diff] [blame] | 125 | uint64_t ReferenceValue, |
Bill Wendling | 0de5913 | 2012-07-19 00:01:33 +0000 | [diff] [blame] | 126 | uint64_t *ReferenceType, |
| 127 | uint64_t ReferencePC, |
| 128 | const char **ReferenceName); |
Kevin Enderby | 5dcda64 | 2011-10-04 22:44:48 +0000 | [diff] [blame] | 129 | /** |
| 130 | * The reference types on input and output. |
| 131 | */ |
| 132 | /* No input reference type or no output reference type. */ |
| 133 | #define LLVMDisassembler_ReferenceType_InOut_None 0 |
| 134 | |
| 135 | /* The input reference is from a branch instruction. */ |
| 136 | #define LLVMDisassembler_ReferenceType_In_Branch 1 |
| 137 | /* The input reference is from a PC relative load instruction. */ |
| 138 | #define LLVMDisassembler_ReferenceType_In_PCrel_Load 2 |
| 139 | |
Tim Northover | 00ed996 | 2014-03-29 10:18:08 +0000 | [diff] [blame] | 140 | /* The input reference is from an ARM64::ADRP instruction. */ |
| 141 | #define LLVMDisassembler_ReferenceType_In_ARM64_ADRP 0x100000001 |
| 142 | /* The input reference is from an ARM64::ADDXri instruction. */ |
| 143 | #define LLVMDisassembler_ReferenceType_In_ARM64_ADDXri 0x100000002 |
| 144 | /* The input reference is from an ARM64::LDRXui instruction. */ |
| 145 | #define LLVMDisassembler_ReferenceType_In_ARM64_LDRXui 0x100000003 |
| 146 | /* The input reference is from an ARM64::LDRXl instruction. */ |
| 147 | #define LLVMDisassembler_ReferenceType_In_ARM64_LDRXl 0x100000004 |
| 148 | /* The input reference is from an ARM64::ADR instruction. */ |
| 149 | #define LLVMDisassembler_ReferenceType_In_ARM64_ADR 0x100000005 |
| 150 | |
Kevin Enderby | 5dcda64 | 2011-10-04 22:44:48 +0000 | [diff] [blame] | 151 | /* The output reference is to as symbol stub. */ |
| 152 | #define LLVMDisassembler_ReferenceType_Out_SymbolStub 1 |
| 153 | /* The output reference is to a symbol address in a literal pool. */ |
| 154 | #define LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr 2 |
| 155 | /* The output reference is to a cstring address in a literal pool. */ |
| 156 | #define LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr 3 |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 157 | |
Kevin Enderby | 3c5ac81 | 2013-11-01 00:00:07 +0000 | [diff] [blame] | 158 | /* The output reference is to a Objective-C CoreFoundation string. */ |
| 159 | #define LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref 4 |
| 160 | /* The output reference is to a Objective-C message. */ |
| 161 | #define LLVMDisassembler_ReferenceType_Out_Objc_Message 5 |
| 162 | /* The output reference is to a Objective-C message ref. */ |
| 163 | #define LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref 6 |
| 164 | /* The output reference is to a Objective-C selector ref. */ |
| 165 | #define LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref 7 |
| 166 | /* The output reference is to a Objective-C class ref. */ |
| 167 | #define LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref 8 |
| 168 | |
Kevin Enderby | f16c8c5 | 2014-01-06 22:08:08 +0000 | [diff] [blame] | 169 | /* The output reference is to a C++ symbol name. */ |
| 170 | #define LLVMDisassembler_ReferenceType_DeMangled_Name 9 |
| 171 | |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 172 | #ifdef __cplusplus |
| 173 | extern "C" { |
| 174 | #endif /* !defined(__cplusplus) */ |
| 175 | |
| 176 | /** |
| 177 | * Create a disassembler for the TripleName. Symbolic disassembly is supported |
Chris Lattner | e8bf2d5 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 178 | * by passing a block of information in the DisInfo parameter and specifying the |
| 179 | * TagType and callback functions as described above. These can all be passed |
| 180 | * as NULL. If successful, this returns a disassembler context. If not, it |
Bradley Smith | 7a77075 | 2014-09-30 16:31:40 +0000 | [diff] [blame] | 181 | * returns NULL. This function is equivalent to calling |
| 182 | * LLVMCreateDisasmCPUFeatures() with an empty CPU name and feature set. |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 183 | */ |
Chris Lattner | e8bf2d5 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 184 | LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo, |
| 185 | int TagType, LLVMOpInfoCallback GetOpInfo, |
| 186 | LLVMSymbolLookupCallback SymbolLookUp); |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 187 | |
| 188 | /** |
Jim Grosbach | 0ca9d5b | 2012-12-07 23:53:27 +0000 | [diff] [blame] | 189 | * Create a disassembler for the TripleName and a specific CPU. Symbolic |
| 190 | * disassembly is supported by passing a block of information in the DisInfo |
| 191 | * parameter and specifying the TagType and callback functions as described |
| 192 | * above. These can all be passed * as NULL. If successful, this returns a |
Bradley Smith | 7a77075 | 2014-09-30 16:31:40 +0000 | [diff] [blame] | 193 | * disassembler context. If not, it returns NULL. This function is equivalent |
| 194 | * to calling LLVMCreateDisasmCPUFeatures() with an empty feature set. |
Jim Grosbach | 0ca9d5b | 2012-12-07 23:53:27 +0000 | [diff] [blame] | 195 | */ |
| 196 | LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, |
| 197 | void *DisInfo, int TagType, |
| 198 | LLVMOpInfoCallback GetOpInfo, |
| 199 | LLVMSymbolLookupCallback SymbolLookUp); |
| 200 | |
| 201 | /** |
Bradley Smith | 7a77075 | 2014-09-30 16:31:40 +0000 | [diff] [blame] | 202 | * Create a disassembler for the TripleName, a specific CPU and specific feature |
| 203 | * string. Symbolic disassembly is supported by passing a block of information |
| 204 | * in the DisInfo parameter and specifying the TagType and callback functions as |
| 205 | * described above. These can all be passed * as NULL. If successful, this |
| 206 | * returns a disassembler context. If not, it returns NULL. |
| 207 | */ |
| 208 | LLVMDisasmContextRef |
| 209 | LLVMCreateDisasmCPUFeatures(const char *Triple, const char *CPU, |
| 210 | const char *Features, void *DisInfo, int TagType, |
| 211 | LLVMOpInfoCallback GetOpInfo, |
| 212 | LLVMSymbolLookupCallback SymbolLookUp); |
| 213 | |
| 214 | /** |
Kevin Enderby | 62183c4 | 2012-10-22 22:31:46 +0000 | [diff] [blame] | 215 | * Set the disassembler's options. Returns 1 if it can set the Options and 0 |
| 216 | * otherwise. |
| 217 | */ |
| 218 | int LLVMSetDisasmOptions(LLVMDisasmContextRef DC, uint64_t Options); |
| 219 | |
| 220 | /* The option to produce marked up assembly. */ |
| 221 | #define LLVMDisassembler_Option_UseMarkup 1 |
Kevin Enderby | 168ffb3 | 2012-12-05 18:13:19 +0000 | [diff] [blame] | 222 | /* The option to print immediates as hex. */ |
| 223 | #define LLVMDisassembler_Option_PrintImmHex 2 |
Kevin Enderby | 85cf531 | 2012-12-18 23:47:28 +0000 | [diff] [blame] | 224 | /* The option use the other assembler printer variant */ |
| 225 | #define LLVMDisassembler_Option_AsmPrinterVariant 4 |
Quentin Colombet | 93a98aa | 2013-10-01 22:14:56 +0000 | [diff] [blame] | 226 | /* The option to set comment on instructions */ |
| 227 | #define LLVMDisassembler_Option_SetInstrComments 8 |
Quentin Colombet | 5f09cb0 | 2013-10-02 22:07:57 +0000 | [diff] [blame] | 228 | /* The option to print latency information alongside instructions */ |
| 229 | #define LLVMDisassembler_Option_PrintLatency 16 |
Kevin Enderby | 62183c4 | 2012-10-22 22:31:46 +0000 | [diff] [blame] | 230 | |
| 231 | /** |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 232 | * Dispose of a disassembler context. |
| 233 | */ |
Chris Lattner | e8bf2d5 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 234 | void LLVMDisasmDispose(LLVMDisasmContextRef DC); |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 235 | |
| 236 | /** |
Chris Lattner | e8bf2d5 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 237 | * Disassemble a single instruction using the disassembler context specified in |
| 238 | * the parameter DC. The bytes of the instruction are specified in the |
| 239 | * parameter Bytes, and contains at least BytesSize number of bytes. The |
| 240 | * instruction is at the address specified by the PC parameter. If a valid |
| 241 | * instruction can be disassembled, its string is returned indirectly in |
| 242 | * OutString whose size is specified in the parameter OutStringSize. This |
| 243 | * function returns the number of bytes in the instruction or zero if there was |
| 244 | * no valid instruction. |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 245 | */ |
Chris Lattner | e8bf2d5 | 2011-05-22 04:44:48 +0000 | [diff] [blame] | 246 | size_t LLVMDisasmInstruction(LLVMDisasmContextRef DC, uint8_t *Bytes, |
| 247 | uint64_t BytesSize, uint64_t PC, |
| 248 | char *OutString, size_t OutStringSize); |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 249 | |
Gregory Szorc | 34c863a | 2012-03-21 03:54:29 +0000 | [diff] [blame] | 250 | /** |
| 251 | * @} |
| 252 | */ |
| 253 | |
Kevin Enderby | f3070dc | 2011-03-28 18:25:07 +0000 | [diff] [blame] | 254 | #ifdef __cplusplus |
| 255 | } |
| 256 | #endif /* !defined(__cplusplus) */ |
| 257 | |
Eugene Zelenko | 35623fb | 2016-03-28 17:40:08 +0000 | [diff] [blame] | 258 | #endif /* LLVM_C_DISASSEMBLER_H */ |