blob: 3af3abdec2e10c23a1e980298c9ce4b9fc8b877d [file] [log] [blame]
Nguyen Anh Quynh81a97c62014-09-26 23:38:53 +08001(* Capstone Disassembly Engine
2 * By Nguyen Anh Quynh <aquynh@gmail.com>, 2013-2014 *)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +08003
4open Arm
5open Arm64
6open Mips
Guillaume Jeannee002ac72014-06-30 15:46:04 +02007open Ppc
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +08008open X86
Guillaume Jeanneae48c972014-08-19 14:46:06 +02009open Sparc
10open Systemz
11open Xcore
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080012open Printf (* debug *)
13
Nguyen Anh Quynh69271dd2014-10-31 14:32:34 +080014(* Hardware architectures *)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080015type arch =
16 | CS_ARCH_ARM
17 | CS_ARCH_ARM64
18 | CS_ARCH_MIPS
19 | CS_ARCH_X86
Guillaume Jeanneae48c972014-08-19 14:46:06 +020020 | CS_ARCH_PPC
21 | CS_ARCH_SPARC
22 | CS_ARCH_SYSZ
23 | CS_ARCH_XCORE
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080024
Nguyen Anh Quynh69271dd2014-10-31 14:32:34 +080025(* Hardware modes *)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080026type mode =
27 | CS_MODE_LITTLE_ENDIAN (* little-endian mode (default mode) *)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080028 | CS_MODE_ARM (* ARM mode *)
Nguyen Anh Quynhff9a5742014-11-13 12:09:49 +080029 | CS_MODE_16 (* 16-bit mode (for X86) *)
30 | CS_MODE_32 (* 32-bit mode (for X86) *)
31 | CS_MODE_64 (* 64-bit mode (for X86, PPC) *)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080032 | CS_MODE_THUMB (* ARM's Thumb mode, including Thumb-2 *)
Nguyen Anh Quynh77d93e92014-09-25 23:03:36 +080033 | CS_MODE_MCLASS (* ARM's MClass mode *)
Nguyen Anh Quynh8e538902014-11-10 22:06:23 +080034 | CS_MODE_V8 (* ARMv8 A32 encodings for ARM *)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080035 | CS_MODE_MICRO (* MicroMips mode (MIPS architecture) *)
Nguyen Anh Quynh77d93e92014-09-25 23:03:36 +080036 | CS_MODE_MIPS3 (* Mips3 mode (MIPS architecture) *)
37 | CS_MODE_MIPS32R6 (* Mips32-R6 mode (MIPS architecture) *)
38 | CS_MODE_MIPSGP64 (* MipsGP64 mode (MIPS architecture) *)
39 | CS_MODE_V9 (* SparcV9 mode (Sparc architecture) *)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080040 | CS_MODE_BIG_ENDIAN (* big-endian mode *)
Nguyen Anh Quynhff9a5742014-11-13 12:09:49 +080041 | CS_MODE_MIPS32 (* Mips32 mode (for Mips) *)
42 | CS_MODE_MIPS64 (* Mips64 mode (for Mips) *)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080043
Guillaume Jeanneae48c972014-08-19 14:46:06 +020044
Nguyen Anh Quynh69271dd2014-10-31 14:32:34 +080045(* Runtime option for the disassembled engine *)
Guillaume Jeanneae48c972014-08-19 14:46:06 +020046type opt_type =
47 | CS_OPT_SYNTAX (* Asssembly output syntax *)
48 | CS_OPT_DETAIL (* Break down instruction structure into details *)
49 | CS_OPT_MODE (* Change engine's mode at run-time *)
50 | CS_OPT_MEM (* User-defined dynamic memory related functions *)
51 | CS_OPT_SKIPDATA (* Skip data when disassembling. Then engine is in SKIPDATA mode. *)
52 | CS_OPT_SKIPDATA_SETUP (* Setup user-defined function for SKIPDATA option *)
53
54
Nguyen Anh Quynh69271dd2014-10-31 14:32:34 +080055(* Runtime option value (associated with option type above) *)
Nguyen Anh Quynhfe4822c2014-10-04 16:30:02 +080056let _CS_OPT_OFF = 0L;; (* Turn OFF an option - default option of CS_OPT_DETAIL, CS_OPT_SKIPDATA. *)
57let _CS_OPT_ON = 3L;; (* Turn ON an option (CS_OPT_DETAIL, CS_OPT_SKIPDATA). *)
58let _CS_OPT_SYNTAX_DEFAULT = 0L;; (* Default asm syntax (CS_OPT_SYNTAX). *)
59let _CS_OPT_SYNTAX_INTEL = 1L;; (* X86 Intel asm syntax - default on X86 (CS_OPT_SYNTAX). *)
60let _CS_OPT_SYNTAX_ATT = 2L;; (* X86 ATT asm syntax (CS_OPT_SYNTAX). *)
61let _CS_OPT_SYNTAX_NOREGNAME = 3L;; (* Prints register name with only number (CS_OPT_SYNTAX) *)
Guillaume Jeanneae48c972014-08-19 14:46:06 +020062
Nguyen Anh Quynh69271dd2014-10-31 14:32:34 +080063(* Common instruction operand types - to be consistent across all architectures. *)
64let _CS_OP_INVALID = 0;; (* uninitialized/invalid operand. *)
65let _CS_OP_REG = 1;; (* Register operand. *)
66let _CS_OP_IMM = 2;; (* Immediate operand. *)
67let _CS_OP_MEM = 3;; (* Memory operand. *)
68let _CS_OP_FP = 4;; (* Floating-Point operand. *)
Nguyen Anh Quynh82354b62014-09-28 23:56:02 +080069
Nguyen Anh Quynha65d7ef2014-10-31 15:47:17 +080070(* Common instruction groups - to be consistent across all architectures. *)
71let _CS_GRP_INVALID = 0;; (* uninitialized/invalid group. *)
72let _CS_GRP_JUMP = 1;; (* all jump instructions (conditional+direct+indirect jumps) *)
73let _CS_GRP_CALL = 2;; (* all call instructions *)
74let _CS_GRP_RET = 3;; (* all return instructions *)
75let _CS_GRP_INT = 4;; (* all interrupt instructions (int+syscall) *)
76let _CS_GRP_IRET = 5;; (* all interrupt return instructions *)
77
Jay Oster79e253c2014-10-12 16:03:12 -070078type cs_arch =
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080079 | CS_INFO_ARM of cs_arm
80 | CS_INFO_ARM64 of cs_arm64
81 | CS_INFO_MIPS of cs_mips
82 | CS_INFO_X86 of cs_x86
Guillaume Jeanneae48c972014-08-19 14:46:06 +020083 | CS_INFO_PPC of cs_ppc
84 | CS_INFO_SPARC of cs_sparc
85 | CS_INFO_SYSZ of cs_sysz
86 | CS_INFO_XCORE of cs_xcore
87
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080088
Nguyen Anh Quynhfe4822c2014-10-04 16:30:02 +080089type csh = {
90 h: Int64.t;
91 a: arch;
92}
93
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080094type cs_insn0 = {
95 id: int;
96 address: int;
97 size: int;
Guillaume Jeannecece24e2014-06-26 15:35:06 +020098 bytes: int array;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080099 mnemonic: string;
100 op_str: string;
101 regs_read: int array;
102 regs_write: int array;
103 groups: int array;
104 arch: cs_arch;
105}
106
Nguyen Anh Quynhfe4822c2014-10-04 16:30:02 +0800107external _cs_open: arch -> mode list -> Int64.t option = "ocaml_open"
Nguyen Anh Quynh82354b62014-09-28 23:56:02 +0800108external cs_disasm_quick: arch -> mode list -> string -> Int64.t -> Int64.t -> cs_insn0 list = "ocaml_cs_disasm"
Nguyen Anh Quynh77d93e92014-09-25 23:03:36 +0800109external _cs_disasm_internal: arch -> Int64.t -> string -> Int64.t -> Int64.t -> cs_insn0 list = "ocaml_cs_disasm_internal"
Nguyen Anh Quynhfe4822c2014-10-04 16:30:02 +0800110external _cs_reg_name: Int64.t -> int -> string = "ocaml_register_name"
111external _cs_insn_name: Int64.t -> int -> string = "ocaml_instruction_name"
112external _cs_group_name: Int64.t -> int -> string = "ocaml_group_name"
Nguyen Anh Quynh77d93e92014-09-25 23:03:36 +0800113external cs_version: unit -> int = "ocaml_version"
Nguyen Anh Quynhfe4822c2014-10-04 16:30:02 +0800114external _cs_option: Int64.t -> opt_type -> Int64.t -> int = "ocaml_option"
115external _cs_close: Int64.t -> int = "ocaml_close"
116
117
118let cs_open _arch _mode: csh = (
119 let _handle = _cs_open _arch _mode in (
120 match _handle with
121 | None -> { h = 0L; a = _arch }
122 | Some v -> { h = v; a = _arch }
123 );
124);;
125
126let cs_close handle = (
127 _cs_close handle.h;
128)
129
130let cs_option handle opt value = (
131 _cs_option handle.h opt value;
132);;
133
134let cs_disasm handle code address count = (
135 _cs_disasm_internal handle.a handle.h code address count;
136);;
137
138let cs_reg_name handle id = (
139 _cs_reg_name handle.h id;
140);;
141
142let cs_insn_name handle id = (
143 _cs_insn_name handle.h id;
144);;
145
146let cs_group_name handle id = (
147 _cs_group_name handle.h id;
148);;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800149
150class cs_insn c a =
151 let csh = c in
Nguyen Anh Quynh6dc1dd52014-09-27 00:40:34 +0800152 let (id, address, size, bytes, mnemonic, op_str, regs_read,
153 regs_write, groups, arch) =
Guillaume Jeannecece24e2014-06-26 15:35:06 +0200154 (a.id, a.address, a.size, a.bytes, a.mnemonic, a.op_str,
Nguyen Anh Quynh6dc1dd52014-09-27 00:40:34 +0800155 a.regs_read, a.regs_write, a.groups, a.arch) in
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800156 object
157 method id = id;
158 method address = address;
159 method size = size;
Guillaume Jeannecece24e2014-06-26 15:35:06 +0200160 method bytes = bytes;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800161 method mnemonic = mnemonic;
162 method op_str = op_str;
163 method regs_read = regs_read;
164 method regs_write = regs_write;
165 method groups = groups;
166 method arch = arch;
Nguyen Anh Quynhfe4822c2014-10-04 16:30:02 +0800167 method reg_name id = _cs_reg_name csh.h id;
168 method insn_name id = _cs_insn_name csh.h id;
169 method group_name id = _cs_group_name csh.h id;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800170 end;;
171
172let cs_insn_group handle insn group_id =
173 List.exists (fun g -> g == group_id) (Array.to_list insn.groups);;
174
175let cs_reg_read handle insn reg_id =
176 List.exists (fun g -> g == reg_id) (Array.to_list insn.regs_read);;
177
178let cs_reg_write handle insn reg_id =
179 List.exists (fun g -> g == reg_id) (Array.to_list insn.regs_write);;
180
181
182class cs a m =
183 let mode = m and arch = a in
Nguyen Anh Quynhfe4822c2014-10-04 16:30:02 +0800184 let handle = cs_open arch mode in
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800185 object
186 method disasm code offset count =
Nguyen Anh Quynhfe4822c2014-10-04 16:30:02 +0800187 let insns = (_cs_disasm_internal arch handle.h code offset count) in
Guillaume Jeannecece24e2014-06-26 15:35:06 +0200188 List.map (fun x -> new cs_insn handle x) insns;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800189
190 end;;