Nguyen Anh Quynh | 81a97c6 | 2014-09-26 23:38:53 +0800 | [diff] [blame] | 1 | (* Capstone Disassembly Engine |
| 2 | * By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2014 *) |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 3 | |
| 4 | open Arm |
| 5 | open Arm64 |
| 6 | open Mips |
Guillaume Jeanne | e002ac7 | 2014-06-30 15:46:04 +0200 | [diff] [blame] | 7 | open Ppc |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 8 | open X86 |
Guillaume Jeanne | ae48c97 | 2014-08-19 14:46:06 +0200 | [diff] [blame] | 9 | open Sparc |
| 10 | open Systemz |
| 11 | open Xcore |
Wolfgang Schwotzer | 22b4d0e | 2017-10-21 15:44:36 +0200 | [diff] [blame^] | 12 | open M680x |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 13 | open Printf (* debug *) |
| 14 | |
Nguyen Anh Quynh | 69271dd | 2014-10-31 14:32:34 +0800 | [diff] [blame] | 15 | (* Hardware architectures *) |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 16 | type arch = |
| 17 | | CS_ARCH_ARM |
| 18 | | CS_ARCH_ARM64 |
| 19 | | CS_ARCH_MIPS |
| 20 | | CS_ARCH_X86 |
Guillaume Jeanne | ae48c97 | 2014-08-19 14:46:06 +0200 | [diff] [blame] | 21 | | CS_ARCH_PPC |
| 22 | | CS_ARCH_SPARC |
| 23 | | CS_ARCH_SYSZ |
| 24 | | CS_ARCH_XCORE |
Wolfgang Schwotzer | 22b4d0e | 2017-10-21 15:44:36 +0200 | [diff] [blame^] | 25 | | CS_ARCH_M68K |
| 26 | | CS_ARCH_TMS320C64X |
| 27 | | CS_ARCH_M680X |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 28 | |
Nguyen Anh Quynh | 69271dd | 2014-10-31 14:32:34 +0800 | [diff] [blame] | 29 | (* Hardware modes *) |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 30 | type mode = |
| 31 | | CS_MODE_LITTLE_ENDIAN (* little-endian mode (default mode) *) |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 32 | | CS_MODE_ARM (* ARM mode *) |
Nguyen Anh Quynh | ff9a574 | 2014-11-13 12:09:49 +0800 | [diff] [blame] | 33 | | CS_MODE_16 (* 16-bit mode (for X86) *) |
| 34 | | CS_MODE_32 (* 32-bit mode (for X86) *) |
| 35 | | CS_MODE_64 (* 64-bit mode (for X86, PPC) *) |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 36 | | CS_MODE_THUMB (* ARM's Thumb mode, including Thumb-2 *) |
Nguyen Anh Quynh | 77d93e9 | 2014-09-25 23:03:36 +0800 | [diff] [blame] | 37 | | CS_MODE_MCLASS (* ARM's MClass mode *) |
Nguyen Anh Quynh | 8e53890 | 2014-11-10 22:06:23 +0800 | [diff] [blame] | 38 | | CS_MODE_V8 (* ARMv8 A32 encodings for ARM *) |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 39 | | CS_MODE_MICRO (* MicroMips mode (MIPS architecture) *) |
Nguyen Anh Quynh | 77d93e9 | 2014-09-25 23:03:36 +0800 | [diff] [blame] | 40 | | CS_MODE_MIPS3 (* Mips3 mode (MIPS architecture) *) |
| 41 | | CS_MODE_MIPS32R6 (* Mips32-R6 mode (MIPS architecture) *) |
Francesco Tamagni | 1fb2b53 | 2017-06-27 14:56:54 +0200 | [diff] [blame] | 42 | | CS_MODE_MIPS2 (* Mips2 mode (MIPS architecture) *) |
Nguyen Anh Quynh | 77d93e9 | 2014-09-25 23:03:36 +0800 | [diff] [blame] | 43 | | CS_MODE_V9 (* SparcV9 mode (Sparc architecture) *) |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 44 | | CS_MODE_BIG_ENDIAN (* big-endian mode *) |
Nguyen Anh Quynh | ff9a574 | 2014-11-13 12:09:49 +0800 | [diff] [blame] | 45 | | CS_MODE_MIPS32 (* Mips32 mode (for Mips) *) |
| 46 | | CS_MODE_MIPS64 (* Mips64 mode (for Mips) *) |
Wolfgang Schwotzer | 22b4d0e | 2017-10-21 15:44:36 +0200 | [diff] [blame^] | 47 | | CS_MODE_QPX (* Quad Processing eXtensions mode (PowerPC) *) |
| 48 | | CS_MODE_M680X_6301 (* M680X Hitachi 6301,6303 mode *) |
| 49 | | CS_MODE_M680X_6309 (* M680X Hitachi 6309 mode *) |
| 50 | | CS_MODE_M680X_6800 (* M680X Motorola 6800,6802 mode *) |
| 51 | | CS_MODE_M680X_6801 (* M680X Motorola 6801,6803 mode *) |
| 52 | | CS_MODE_M680X_6805 (* M680X Motorola 6805 mode *) |
| 53 | | CS_MODE_M680X_6808 (* M680X Motorola 6808 mode *) |
| 54 | | CS_MODE_M680X_6809 (* M680X Motorola 6809 mode *) |
| 55 | | CS_MODE_M680X_6811 (* M680X Motorola/Freescale 68HC11 mode *) |
| 56 | | CS_MODE_M680X_CPU12 (* M680X Motorola/Freescale/NXP CPU12 mode *) |
| 57 | | CS_MODE_M680X_HCS08 (* M680X Freescale HCS08 mode *) |
Nguyen Anh Quynh | cac770a | 2015-03-12 17:03:33 +0800 | [diff] [blame] | 58 | |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 59 | |
Guillaume Jeanne | ae48c97 | 2014-08-19 14:46:06 +0200 | [diff] [blame] | 60 | |
Nguyen Anh Quynh | 69271dd | 2014-10-31 14:32:34 +0800 | [diff] [blame] | 61 | (* Runtime option for the disassembled engine *) |
Guillaume Jeanne | ae48c97 | 2014-08-19 14:46:06 +0200 | [diff] [blame] | 62 | type opt_type = |
| 63 | | CS_OPT_SYNTAX (* Asssembly output syntax *) |
| 64 | | CS_OPT_DETAIL (* Break down instruction structure into details *) |
| 65 | | CS_OPT_MODE (* Change engine's mode at run-time *) |
| 66 | | CS_OPT_MEM (* User-defined dynamic memory related functions *) |
| 67 | | CS_OPT_SKIPDATA (* Skip data when disassembling. Then engine is in SKIPDATA mode. *) |
| 68 | | CS_OPT_SKIPDATA_SETUP (* Setup user-defined function for SKIPDATA option *) |
| 69 | |
| 70 | |
Nguyen Anh Quynh | 10647ae | 2015-03-25 17:35:59 +0800 | [diff] [blame] | 71 | (* Common instruction operand access types - to be consistent across all architectures. *) |
| 72 | (* It is possible to combine access types, for example: CS_AC_READ | CS_AC_WRITE *) |
| 73 | let _CS_AC_INVALID = 0;; (* Uninitialized/invalid access type. *) |
| 74 | let _CS_AC_READ = 1 lsl 0;; (* Operand read from memory or register. *) |
| 75 | let _CS_AC_WRITE = 1 lsl 1;; (* Operand write to memory or register. *) |
| 76 | |
Nguyen Anh Quynh | 69271dd | 2014-10-31 14:32:34 +0800 | [diff] [blame] | 77 | (* Runtime option value (associated with option type above) *) |
Nguyen Anh Quynh | fe4822c | 2014-10-04 16:30:02 +0800 | [diff] [blame] | 78 | let _CS_OPT_OFF = 0L;; (* Turn OFF an option - default option of CS_OPT_DETAIL, CS_OPT_SKIPDATA. *) |
| 79 | let _CS_OPT_ON = 3L;; (* Turn ON an option (CS_OPT_DETAIL, CS_OPT_SKIPDATA). *) |
| 80 | let _CS_OPT_SYNTAX_DEFAULT = 0L;; (* Default asm syntax (CS_OPT_SYNTAX). *) |
| 81 | let _CS_OPT_SYNTAX_INTEL = 1L;; (* X86 Intel asm syntax - default on X86 (CS_OPT_SYNTAX). *) |
| 82 | let _CS_OPT_SYNTAX_ATT = 2L;; (* X86 ATT asm syntax (CS_OPT_SYNTAX). *) |
| 83 | let _CS_OPT_SYNTAX_NOREGNAME = 3L;; (* Prints register name with only number (CS_OPT_SYNTAX) *) |
Guillaume Jeanne | ae48c97 | 2014-08-19 14:46:06 +0200 | [diff] [blame] | 84 | |
Nguyen Anh Quynh | 69271dd | 2014-10-31 14:32:34 +0800 | [diff] [blame] | 85 | (* Common instruction operand types - to be consistent across all architectures. *) |
| 86 | let _CS_OP_INVALID = 0;; (* uninitialized/invalid operand. *) |
| 87 | let _CS_OP_REG = 1;; (* Register operand. *) |
| 88 | let _CS_OP_IMM = 2;; (* Immediate operand. *) |
| 89 | let _CS_OP_MEM = 3;; (* Memory operand. *) |
| 90 | let _CS_OP_FP = 4;; (* Floating-Point operand. *) |
Nguyen Anh Quynh | 82354b6 | 2014-09-28 23:56:02 +0800 | [diff] [blame] | 91 | |
Nguyen Anh Quynh | a65d7ef | 2014-10-31 15:47:17 +0800 | [diff] [blame] | 92 | (* Common instruction groups - to be consistent across all architectures. *) |
| 93 | let _CS_GRP_INVALID = 0;; (* uninitialized/invalid group. *) |
| 94 | let _CS_GRP_JUMP = 1;; (* all jump instructions (conditional+direct+indirect jumps) *) |
| 95 | let _CS_GRP_CALL = 2;; (* all call instructions *) |
| 96 | let _CS_GRP_RET = 3;; (* all return instructions *) |
| 97 | let _CS_GRP_INT = 4;; (* all interrupt instructions (int+syscall) *) |
| 98 | let _CS_GRP_IRET = 5;; (* all interrupt return instructions *) |
Nguyen Anh Quynh | 4dd0dcb | 2015-03-09 00:04:45 +0800 | [diff] [blame] | 99 | let _CS_GRP_PRIVILEGE = 6;; (* all privileged instructions *) |
Nguyen Anh Quynh | a65d7ef | 2014-10-31 15:47:17 +0800 | [diff] [blame] | 100 | |
Jay Oster | 79e253c | 2014-10-12 16:03:12 -0700 | [diff] [blame] | 101 | type cs_arch = |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 102 | | CS_INFO_ARM of cs_arm |
| 103 | | CS_INFO_ARM64 of cs_arm64 |
| 104 | | CS_INFO_MIPS of cs_mips |
| 105 | | CS_INFO_X86 of cs_x86 |
Guillaume Jeanne | ae48c97 | 2014-08-19 14:46:06 +0200 | [diff] [blame] | 106 | | CS_INFO_PPC of cs_ppc |
| 107 | | CS_INFO_SPARC of cs_sparc |
| 108 | | CS_INFO_SYSZ of cs_sysz |
| 109 | | CS_INFO_XCORE of cs_xcore |
Wolfgang Schwotzer | 22b4d0e | 2017-10-21 15:44:36 +0200 | [diff] [blame^] | 110 | | CS_INFO_M680X of cs_m680x |
Guillaume Jeanne | ae48c97 | 2014-08-19 14:46:06 +0200 | [diff] [blame] | 111 | |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 112 | |
Nguyen Anh Quynh | fe4822c | 2014-10-04 16:30:02 +0800 | [diff] [blame] | 113 | type csh = { |
| 114 | h: Int64.t; |
| 115 | a: arch; |
| 116 | } |
| 117 | |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 118 | type cs_insn0 = { |
| 119 | id: int; |
| 120 | address: int; |
| 121 | size: int; |
Guillaume Jeanne | cece24e | 2014-06-26 15:35:06 +0200 | [diff] [blame] | 122 | bytes: int array; |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 123 | mnemonic: string; |
| 124 | op_str: string; |
| 125 | regs_read: int array; |
| 126 | regs_write: int array; |
| 127 | groups: int array; |
| 128 | arch: cs_arch; |
| 129 | } |
| 130 | |
Nguyen Anh Quynh | fe4822c | 2014-10-04 16:30:02 +0800 | [diff] [blame] | 131 | external _cs_open: arch -> mode list -> Int64.t option = "ocaml_open" |
Nguyen Anh Quynh | 82354b6 | 2014-09-28 23:56:02 +0800 | [diff] [blame] | 132 | external cs_disasm_quick: arch -> mode list -> string -> Int64.t -> Int64.t -> cs_insn0 list = "ocaml_cs_disasm" |
Nguyen Anh Quynh | 77d93e9 | 2014-09-25 23:03:36 +0800 | [diff] [blame] | 133 | external _cs_disasm_internal: arch -> Int64.t -> string -> Int64.t -> Int64.t -> cs_insn0 list = "ocaml_cs_disasm_internal" |
Nguyen Anh Quynh | fe4822c | 2014-10-04 16:30:02 +0800 | [diff] [blame] | 134 | external _cs_reg_name: Int64.t -> int -> string = "ocaml_register_name" |
| 135 | external _cs_insn_name: Int64.t -> int -> string = "ocaml_instruction_name" |
| 136 | external _cs_group_name: Int64.t -> int -> string = "ocaml_group_name" |
Nguyen Anh Quynh | 77d93e9 | 2014-09-25 23:03:36 +0800 | [diff] [blame] | 137 | external cs_version: unit -> int = "ocaml_version" |
Nguyen Anh Quynh | fe4822c | 2014-10-04 16:30:02 +0800 | [diff] [blame] | 138 | external _cs_option: Int64.t -> opt_type -> Int64.t -> int = "ocaml_option" |
| 139 | external _cs_close: Int64.t -> int = "ocaml_close" |
| 140 | |
| 141 | |
| 142 | let cs_open _arch _mode: csh = ( |
| 143 | let _handle = _cs_open _arch _mode in ( |
| 144 | match _handle with |
| 145 | | None -> { h = 0L; a = _arch } |
| 146 | | Some v -> { h = v; a = _arch } |
| 147 | ); |
| 148 | );; |
| 149 | |
| 150 | let cs_close handle = ( |
| 151 | _cs_close handle.h; |
| 152 | ) |
| 153 | |
| 154 | let cs_option handle opt value = ( |
| 155 | _cs_option handle.h opt value; |
| 156 | );; |
| 157 | |
| 158 | let cs_disasm handle code address count = ( |
| 159 | _cs_disasm_internal handle.a handle.h code address count; |
| 160 | );; |
| 161 | |
| 162 | let cs_reg_name handle id = ( |
| 163 | _cs_reg_name handle.h id; |
| 164 | );; |
| 165 | |
| 166 | let cs_insn_name handle id = ( |
| 167 | _cs_insn_name handle.h id; |
| 168 | );; |
| 169 | |
| 170 | let cs_group_name handle id = ( |
| 171 | _cs_group_name handle.h id; |
| 172 | );; |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 173 | |
| 174 | class cs_insn c a = |
| 175 | let csh = c in |
Nguyen Anh Quynh | 6dc1dd5 | 2014-09-27 00:40:34 +0800 | [diff] [blame] | 176 | let (id, address, size, bytes, mnemonic, op_str, regs_read, |
| 177 | regs_write, groups, arch) = |
Guillaume Jeanne | cece24e | 2014-06-26 15:35:06 +0200 | [diff] [blame] | 178 | (a.id, a.address, a.size, a.bytes, a.mnemonic, a.op_str, |
Nguyen Anh Quynh | 6dc1dd5 | 2014-09-27 00:40:34 +0800 | [diff] [blame] | 179 | a.regs_read, a.regs_write, a.groups, a.arch) in |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 180 | object |
| 181 | method id = id; |
| 182 | method address = address; |
| 183 | method size = size; |
Guillaume Jeanne | cece24e | 2014-06-26 15:35:06 +0200 | [diff] [blame] | 184 | method bytes = bytes; |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 185 | method mnemonic = mnemonic; |
| 186 | method op_str = op_str; |
| 187 | method regs_read = regs_read; |
| 188 | method regs_write = regs_write; |
| 189 | method groups = groups; |
| 190 | method arch = arch; |
Nguyen Anh Quynh | fe4822c | 2014-10-04 16:30:02 +0800 | [diff] [blame] | 191 | method reg_name id = _cs_reg_name csh.h id; |
| 192 | method insn_name id = _cs_insn_name csh.h id; |
| 193 | method group_name id = _cs_group_name csh.h id; |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 194 | end;; |
| 195 | |
| 196 | let cs_insn_group handle insn group_id = |
| 197 | List.exists (fun g -> g == group_id) (Array.to_list insn.groups);; |
| 198 | |
| 199 | let cs_reg_read handle insn reg_id = |
| 200 | List.exists (fun g -> g == reg_id) (Array.to_list insn.regs_read);; |
| 201 | |
| 202 | let cs_reg_write handle insn reg_id = |
| 203 | List.exists (fun g -> g == reg_id) (Array.to_list insn.regs_write);; |
| 204 | |
| 205 | |
| 206 | class cs a m = |
| 207 | let mode = m and arch = a in |
Nguyen Anh Quynh | fe4822c | 2014-10-04 16:30:02 +0800 | [diff] [blame] | 208 | let handle = cs_open arch mode in |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 209 | object |
| 210 | method disasm code offset count = |
Nguyen Anh Quynh | fe4822c | 2014-10-04 16:30:02 +0800 | [diff] [blame] | 211 | let insns = (_cs_disasm_internal arch handle.h code offset count) in |
Guillaume Jeanne | cece24e | 2014-06-26 15:35:06 +0200 | [diff] [blame] | 212 | List.map (fun x -> new cs_insn handle x) insns; |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 213 | |
| 214 | end;; |