blob: 8a5caa8a1ecb7b3faa7b9b176ec787f2e9cffd36 [file] [log] [blame]
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +08001#ifndef __CS_H__
2#define __CS_H__
3
4/* Capstone Disassembler Engine */
5/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */
6
7#ifdef __cplusplus
8extern "C" {
9#endif
10
11#include <stdint.h>
12#include <stdbool.h>
Nguyen Anh Quynhf1851802013-12-21 12:16:47 +080013#include <stdlib.h>
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080014
Nguyen Anh Quynhbb64b0b2013-12-10 07:56:17 +080015// Capstone API version
16#define CS_API_MAJOR 1
Nguyen Anh Quynh39a42ed2013-12-22 10:40:58 +080017#define CS_API_MINOR 1
Nguyen Anh Quynhbb64b0b2013-12-10 07:56:17 +080018
Nguyen Anh Quynh2296d5e2013-12-22 21:01:17 +080019// Macro to create combined version which can be compared to
20// result of cs_version_ex() API.
21#define CS_MAKE_VERSION(major, minor) ((major << 8) + minor)
22
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080023// Handle using with all API
Nguyen Anh Quynhb42a6572013-11-29 17:40:07 +080024typedef size_t csh;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080025
26// Architecture type
27typedef enum cs_arch {
28 CS_ARCH_ARM = 0, // ARM architecture (including Thumb, Thumb-2)
29 CS_ARCH_ARM64, // ARM-64, also called AArch64
30 CS_ARCH_MIPS, // Mips architecture
31 CS_ARCH_X86, // X86 architecture (including x86 & x86-64)
32 CS_ARCH_MAX,
Nguyen Anh Quynh39a42ed2013-12-22 10:40:58 +080033 CS_ARCH_ALL = 0xFFFF,
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080034} cs_arch;
35
36// Mode type
37typedef enum cs_mode {
38 CS_MODE_LITTLE_ENDIAN = 0, // little endian mode (default mode)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080039 CS_MODE_ARM = 0, // 32-bit ARM
40 CS_MODE_16 = 1 << 1, // 16-bit mode
41 CS_MODE_32 = 1 << 2, // 32-bit mode
42 CS_MODE_64 = 1 << 3, // 64-bit mode
43 CS_MODE_THUMB = 1 << 4, // ARM's Thumb mode, including Thumb-2
44 CS_MODE_MICRO = 1 << 4, // MicroMips mode (MIPS architecture)
45 CS_MODE_N64 = 1 << 5, // Nintendo-64 mode (MIPS architecture)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080046
47 CS_MODE_BIG_ENDIAN = 1 << 31 // big endian mode
48} cs_mode;
49
Nguyen Anh Quynh79976c12013-12-04 23:03:13 +080050// Runtime option for the disassembled engine
Nguyen Anh Quynhb8ce68e2013-12-03 23:45:08 +080051typedef enum cs_opt_type {
Nguyen Anh Quynh4d70daf2013-12-05 09:50:50 +080052 CS_OPT_SYNTAX = 1, // Asssembly output syntax
Nguyen Anh Quynha209e672013-12-14 00:23:41 +080053 CS_OPT_DETAIL, // Break down instruction structure into details
Nguyen Anh Quynh1bdb23a2013-12-20 00:04:26 +080054 CS_OPT_MODE, // Change engine's mode at run-time
Nguyen Anh Quynhb8ce68e2013-12-03 23:45:08 +080055} cs_opt_type;
56
Nguyen Anh Quynh79976c12013-12-04 23:03:13 +080057// Runtime option value (associated with option type above)
Nguyen Anh Quynhb8ce68e2013-12-03 23:45:08 +080058typedef enum cs_opt_value {
Nguyen Anh Quynh4994c582013-12-14 11:39:33 +080059 CS_OPT_OFF = 0, // Turn OFF an option (CS_OPT_DETAIL)
60 CS_OPT_SYNTAX_INTEL = 1, // X86 Intel asm syntax - default syntax on X86 (CS_OPT_SYNTAX).
Nguyen Anh Quynhc618db42013-12-04 00:05:04 +080061 CS_OPT_SYNTAX_ATT, // X86 ATT asm syntax (CS_OPT_SYNTAX)
Nguyen Anh Quynh4994c582013-12-14 11:39:33 +080062 CS_OPT_ON, // Turn ON an option - default option for CS_OPT_DETAIL
Nguyen Anh Quynhb8ce68e2013-12-03 23:45:08 +080063} cs_opt_value;
Nguyen Anh Quynh01aba002013-12-03 21:00:09 +080064
65
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080066#include "arm.h"
67#include "arm64.h"
68#include "mips.h"
69#include "x86.h"
70
Nguyen Anh Quynh4fe224b2013-12-24 16:49:36 +080071// NOTE: All information in cs_detail is only available when CS_OPT_DETAIL = CS_OPT_ON
72typedef struct cs_detail {
73 uint8_t regs_read[12]; // list of implicit registers read by this insn
74 uint8_t regs_read_count; // number of implicit registers read by this insn
75
76 uint8_t regs_write[20]; // list of implicit registers modified by this insn
77 uint8_t regs_write_count; // number of implicit registers modified by this insn
78
79 uint8_t groups[8]; // list of group this instruction belong to
80 uint8_t groups_count; // number of groups this insn belongs to
81
82 // Architecture-specific instruction info
83 union {
84 cs_x86 x86; // X86 architecture, including 16-bit, 32-bit & 64-bit mode
85 cs_arm64 arm64; // ARM64 architecture (aka AArch64)
86 cs_arm arm; // ARM architecture (including Thumb/Thumb2)
87 cs_mips mips; // MIPS architecture
88 };
89} cs_detail;
90
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080091// Detail information of disassembled instruction
92typedef struct cs_insn {
93 // Instruction ID
94 // Find the instruction id from header file of corresponding architecture,
95 // such as arm.h for ARM, x86.h for X86, etc...
Nguyen Anh Quynh46a5afd2013-12-14 11:52:06 +080096 // This information is available even when CS_OPT_DETAIL = CS_OPT_OFF
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080097 unsigned int id;
98
Nguyen Anh Quynh79976c12013-12-04 23:03:13 +080099 // Address (EIP) of this instruction
Nguyen Anh Quynh46a5afd2013-12-14 11:52:06 +0800100 // This information is available even when CS_OPT_DETAIL = CS_OPT_OFF
Nguyen Anh Quynhf2a649e2013-12-03 12:21:01 +0800101 uint64_t address;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800102
Nguyen Anh Quynh79976c12013-12-04 23:03:13 +0800103 // Size of this instruction
Nguyen Anh Quynh46a5afd2013-12-14 11:52:06 +0800104 // This information is available even when CS_OPT_DETAIL = CS_OPT_OFF
Nguyen Anh Quynh8f13f3c2013-12-04 22:57:04 +0800105 uint16_t size;
Nguyen Anh Quynh79976c12013-12-04 23:03:13 +0800106 // Machine bytes of this instruction, with number of bytes indicated by @size above
Nguyen Anh Quynh46a5afd2013-12-14 11:52:06 +0800107 // This information is available even when CS_OPT_DETAIL = CS_OPT_OFF
pancakef0e4eed2013-12-11 22:14:42 +0100108 uint8_t bytes[16];
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800109
110 // Ascii text of instruction mnemonic
Nguyen Anh Quynh46a5afd2013-12-14 11:52:06 +0800111 // This information is available even when CS_OPT_DETAIL = CS_OPT_OFF
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800112 char mnemonic[32];
113
114 // Ascii text of instruction operands
Nguyen Anh Quynh46a5afd2013-12-14 11:52:06 +0800115 // This information is available even when CS_OPT_DETAIL = CS_OPT_OFF
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800116 char op_str[96];
117
Nguyen Anh Quynh4fe224b2013-12-24 16:49:36 +0800118 // Pointer to cs_detail.
119 // NOTE: detail pointer is only valid (not NULL) when CS_OP_DETAIL = CS_OPT_ON
120 // Otherwise, if CS_OPT_DETAIL = CS_OPT_OFF, @detail = NULL
121 cs_detail *detail;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800122} cs_insn;
123
Nguyen Anh Quynhbb546032013-12-05 18:29:51 +0800124
125// Calculate the offset of a disassembled instruction in its buffer, given its position
126// in its array of disassembled insn
Nguyen Anh Quynha2369022013-12-05 20:21:09 +0800127// NOTE: this macro works with position (>=1), not index
128#define CS_INSN_OFFSET(insns, post) (insns[post - 1].address - insns[0].address)
Nguyen Anh Quynhbb546032013-12-05 18:29:51 +0800129
130
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800131// All type of errors encountered by Capstone API.
132// These are values returned by cs_errno()
133typedef enum cs_err {
134 CS_ERR_OK = 0, // No error: everything was fine
Nguyen Anh Quynh041e25d2013-12-06 00:37:32 +0800135 CS_ERR_MEM, // Out-Of-Memory error: cs_open(), cs_disasm_dyn()
136 CS_ERR_ARCH, // Unsupported architecture: cs_open()
137 CS_ERR_HANDLE, // Invalid handle: cs_op_count(), cs_op_index()
138 CS_ERR_CSH, // Invalid csh argument: cs_close(), cs_errno(), cs_option()
139 CS_ERR_MODE, // Invalid/unsupported mode: cs_open()
140 CS_ERR_OPTION, // Invalid/unsupported option: cs_option()
Nguyen Anh Quynh4fe224b2013-12-24 16:49:36 +0800141 CS_ERR_DETAIL, // Information is unavailable because detail option is OFF
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800142} cs_err;
143
Nguyen Anh Quynhb8806782013-12-22 15:20:07 +0800144/*
145 Retrieve API version in major and minor numbers.
146
147 @major: major number of API version
148 @minor: minor number of API version
149
150 For example, first API version would return 1 in @major, and 0 in @minor
151*/
152void cs_version(int *major, int *minor);
153
Nguyen Anh Quynh79976c12013-12-04 23:03:13 +0800154
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800155/*
Nguyen Anh Quynhb8806782013-12-22 15:20:07 +0800156 Return combined API version & major and minor version numbers.
Nguyen Anh Quynh36df4bb2013-12-10 13:31:20 +0800157
Nguyen Anh Quynhb8806782013-12-22 15:20:07 +0800158 @major: major number of API version
159 @minor: minor number of API version
Nguyen Anh Quynh36df4bb2013-12-10 13:31:20 +0800160
Nguyen Anh Quynh39a42ed2013-12-22 10:40:58 +0800161 @return hexical number encoding both major & minor versions, which is comparisonable.
162
163 For example, second API version would return 1 in @major, and 1 in @minor
164 The return value would be 0x0101
Nguyen Anh Quynh08777472013-12-22 14:16:28 +0800165
166 NOTE: if you only care about returned value, but not major and minor values,
167 set both arguments to NULL.
Nguyen Anh Quynh36df4bb2013-12-10 13:31:20 +0800168*/
Nguyen Anh Quynhb8806782013-12-22 15:20:07 +0800169unsigned int cs_version_ex(int *major, int *minor);
Nguyen Anh Quynh39a42ed2013-12-22 10:40:58 +0800170
171
172/*
173 Check if a particular arch is supported by this library.
174
175 @arch: the architecture to be checked.
176 To verify if this library supports everything, use CS_ARCH_ALL
177
178 @return True if this library supports the given arch.
179*/
180bool cs_support(cs_arch arch);
Nguyen Anh Quynh36df4bb2013-12-10 13:31:20 +0800181
182/*
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800183 Initialize CS handle: this must be done before any usage of CS.
184
185 @arch: architecture type (CS_ARCH_*)
186 @mode: hardware mode. This is combined of CS_MODE_*
187 @handle: pointer to handle, which will be updated at return time
188
189 @return CS_ERR_OK on success, or other value on failure (refer to cs_err enum
190 for detailed error).
191*/
192cs_err cs_open(cs_arch arch, cs_mode mode, csh *handle);
193
194/*
195 Close CS handle: MUST do to release the handle when it is not used anymore.
196
197 @handle: handle returned by cs_open()
198
199 @return CS_ERR_OK on success, or other value on failure (refer to cs_err enum
200 for detailed error).
201*/
202cs_err cs_close(csh handle);
203
204/*
Nguyen Anh Quynh79976c12013-12-04 23:03:13 +0800205 Set option for disassembling engine at runtime
Nguyen Anh Quynh01aba002013-12-03 21:00:09 +0800206
207 @handle: handle returned by cs_open()
Nguyen Anh Quynhb8ce68e2013-12-03 23:45:08 +0800208 @type: type of option to be set
Nguyen Anh Quynh79976c12013-12-04 23:03:13 +0800209 @value: option value corresponding with @type
Nguyen Anh Quynh01aba002013-12-03 21:00:09 +0800210
Nguyen Anh Quynh79976c12013-12-04 23:03:13 +0800211 @return CS_ERR_OK on success, or other value on failure.
212 Refer to cs_err enum for detailed error.
Nguyen Anh Quynh01aba002013-12-03 21:00:09 +0800213*/
Nguyen Anh Quynhda8adad2013-12-04 09:44:07 +0800214cs_err cs_option(csh handle, cs_opt_type type, size_t value);
Nguyen Anh Quynh01aba002013-12-03 21:00:09 +0800215
216/*
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800217 Report the last error number when some API function fail.
218 Like glibc's errno, cs_errno might not retain its old value once accessed.
219
220 @handle: handle returned by cs_open()
221
222 @return: error code of cs_err enum type (CS_ERR_*, see above)
223*/
224cs_err cs_errno(csh handle);
225
226/*
227 Disasm the binary code in @buffer.
228 Disassembled instructions will be put into @insn
229 NOTE: this API requires the pre-allocated buffer in @insn
230
231 @handle: handle returned by cs_open()
232 @code: buffer containing raw binary code to be disassembled
233 @code_size: size of above code
Nguyen Anh Quynh612b5d22013-12-03 12:23:09 +0800234 @address: address of the first insn in given raw code buffer
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800235 @insn: array of insn filled in by this function
236 NOTE: @insn size must be at least @count to avoid buffer overflow
237 @count: number of instrutions to be disassembled, or 0 to get all of them
238 @return: the number of succesfully disassembled instructions,
239 or 0 if this function failed to disassemble the given code
Nguyen Anh Quynh029df202013-12-03 11:36:54 +0800240
Nguyen Anh Quynh4fe224b2013-12-24 16:49:36 +0800241 NOTE: this API does not provide detail information, meaning @detail = NULL
242
Nguyen Anh Quynh029df202013-12-03 11:36:54 +0800243 On failure, call cs_errno() for error code.
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800244*/
Nguyen Anh Quynhb42a6572013-11-29 17:40:07 +0800245size_t cs_disasm(csh handle,
pancakef0e4eed2013-12-11 22:14:42 +0100246 const uint8_t *code, size_t code_size,
Nguyen Anh Quynh612b5d22013-12-03 12:23:09 +0800247 uint64_t address,
Nguyen Anh Quynhb42a6572013-11-29 17:40:07 +0800248 size_t count,
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800249 cs_insn *insn);
250
251/*
252 Dynamicly allocate memory to contain disasm insn
253 Disassembled instructions will be put into @*insn
254
255 NOTE 1: this API will automatically determine memory needed to contain
256 output disassembled instructions in @insn.
257 NOTE 2: caller must free() the allocated memory itself to avoid memory leaking
258
259 @handle: handle returned by cs_open()
260 @code: buffer containing raw binary code to be disassembled
261 @code_size: size of above code
Nguyen Anh Quynh612b5d22013-12-03 12:23:09 +0800262 @address: address of the first insn in given raw code buffer
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800263 @insn: array of insn filled in by this function
Nguyen Anh Quynh79976c12013-12-04 23:03:13 +0800264 NOTE: @insn will be allocated by this function, and should be freed
265 with cs_free() API.
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800266 @count: number of instrutions to be disassembled, or 0 to get all of them
267 @return: the number of succesfully disassembled instructions,
268 or 0 if this function failed to disassemble the given code
Nguyen Anh Quynh029df202013-12-03 11:36:54 +0800269
270 On failure, call cs_errno() for error code.
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800271*/
Nguyen Anh Quynhb42a6572013-11-29 17:40:07 +0800272size_t cs_disasm_dyn(csh handle,
pancakef0e4eed2013-12-11 22:14:42 +0100273 const uint8_t *code, size_t code_size,
Nguyen Anh Quynh612b5d22013-12-03 12:23:09 +0800274 uint64_t address,
Nguyen Anh Quynhb42a6572013-11-29 17:40:07 +0800275 size_t count,
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800276 cs_insn **insn);
277
278/*
Nguyen Anh Quynh79976c12013-12-04 23:03:13 +0800279 Free memory allocated in @insn by cs_disasm_dyn()
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800280
Nguyen Anh Quynh4fe224b2013-12-24 16:49:36 +0800281 @insn: pointer returned by @insn argument in cs_disasm_dyn()
282 @count: number of cs_insn structures returned by cs_disasm_dyn()
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800283*/
Nguyen Anh Quynh4fe224b2013-12-24 16:49:36 +0800284void cs_free(cs_insn *insn, size_t count);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800285
286/*
287 Return friendly name of regiser in a string
288 Find the instruction id from header file of corresponding architecture (arm.h for ARM, x86.h for X86, ...)
289
Nguyen Anh Quynh3640f3c2013-12-01 00:26:27 +0800290 @handle: handle returned by cs_open()
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800291 @reg: register id
292 @return: string name of the register, or NULL if @reg_id is invalid.
293*/
pancakef0e4eed2013-12-11 22:14:42 +0100294const char *cs_reg_name(csh handle, unsigned int reg_id);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800295
296/*
297 Return friendly name of an instruction in a string
298 Find the instruction id from header file of corresponding architecture (arm.h for ARM, x86.h for X86, ...)
299
Nguyen Anh Quynh3640f3c2013-12-01 00:26:27 +0800300 @handle: handle returned by cs_open()
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800301 @insn: instruction id
302
303 @return: string name of the instruction, or NULL if @insn_id is invalid.
304*/
pancakef0e4eed2013-12-11 22:14:42 +0100305const char *cs_insn_name(csh handle, unsigned int insn_id);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800306
307/*
308 Check if a disassembled instruction belong to a particular group.
309 Find the group id from header file of corresponding architecture (arm.h for ARM, x86.h for X86, ...)
310 Internally, this simply verifies if @group_id matches any member of insn->groups array.
311
312 @handle: handle returned by cs_open()
313 @insn: disassembled instruction structure received from cs_disasm() or cs_disasm_dyn()
314 @group_id: group that you want to check if this instruction belong to.
315
316 @return: true if this instruction indeed belongs to aboved group, or false otherwise.
317*/
318bool cs_insn_group(csh handle, cs_insn *insn, unsigned int group_id);
319
320/*
321 Check if a disassembled instruction IMPLICITLY used a particular register.
322 Find the register id from header file of corresponding architecture (arm.h for ARM, x86.h for X86, ...)
323 Internally, this simply verifies if @reg_id matches any member of insn->regs_read array.
324
325 @insn: disassembled instruction structure received from cs_disasm() or cs_disasm_dyn()
326 @reg_id: register that you want to check if this instruction used it.
327
328 @return: true if this instruction indeed implicitly used aboved register, or false otherwise.
329*/
330bool cs_reg_read(csh handle, cs_insn *insn, unsigned int reg_id);
331
332/*
333 Check if a disassembled instruction IMPLICITLY modified a particular register.
334 Find the register id from header file of corresponding architecture (arm.h for ARM, x86.h for X86, ...)
335 Internally, this simply verifies if @reg_id matches any member of insn->regs_write array.
336
337 @insn: disassembled instruction structure received from cs_disasm() or cs_disasm_dyn()
338 @reg_id: register that you want to check if this instruction modified it.
339
340 @return: true if this instruction indeed implicitly modified aboved register, or false otherwise.
341*/
342bool cs_reg_write(csh handle, cs_insn *insn, unsigned int reg_id);
343
344/*
345 Count the number of operands of a given type.
346 Find the operand type in header file of corresponding architecture (arm.h for ARM, x86.h for X86, ...)
347
Nguyen Anh Quynh3640f3c2013-12-01 00:26:27 +0800348 @handle: handle returned by cs_open()
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800349 @insn: disassembled instruction structure received from cs_disasm() or cs_disasm_dyn()
350 @op_type: Operand type to be found.
351
352 @return: number of operands of given type @op_type in instruction @insn,
353 or -1 on failure.
354*/
355int cs_op_count(csh handle, cs_insn *insn, unsigned int op_type);
356
357/*
358 Retrieve the position of operand of given type in arch.op_info[] array.
359 Later, the operand can be accessed using the returned position.
360 Find the operand type in header file of corresponding architecture (arm.h for ARM, x86.h for X86, ...)
361
Nguyen Anh Quynh3640f3c2013-12-01 00:26:27 +0800362 @handle: handle returned by cs_open()
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800363 @insn: disassembled instruction structure received from cs_disasm() or cs_disasm_dyn()
364 @op_type: Operand type to be found.
365 @position: position of the operand to be found. This must be in the range
366 [1, cs_op_count(handle, insn, op_type)]
367
368 @return: index of operand of given type @op_type in arch.op_info[] array
369 in instruction @insn, or -1 on failure.
370*/
371int cs_op_index(csh handle, cs_insn *insn, unsigned int op_type,
372 unsigned int position);
373
374#ifdef __cplusplus
375}
376#endif
377
378#endif