blob: 7eb0f5dc60420af80bfcbc9a16385afb67beccf2 [file] [log] [blame]
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +08001/* Capstone Disassembler Engine */
2/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */
3
4#include <stdio.h>
5#include <stdlib.h>
6#include <string.h>
7#include <capstone.h>
8
9#include "cs_priv.h"
10
11#include "MCRegisterInfo.h"
12
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080013#include "utils.h"
14
Nguyen Anh Quynh58747ad2013-12-22 13:37:13 +080015void (*arch_init[MAX_ARCH])(cs_struct *) = { NULL };
Nguyen Anh Quynhd3458392013-12-22 11:10:56 +080016cs_err (*arch_option[MAX_ARCH]) (cs_struct*, cs_opt_type, size_t value);
Nguyen Anh Quynhf1851802013-12-21 12:16:47 +080017
Nguyen Anh Quynh39a42ed2013-12-22 10:40:58 +080018unsigned int all_arch = 0;
Nguyen Anh Quynhf1851802013-12-21 12:16:47 +080019
Nguyen Anh Quynh39a42ed2013-12-22 10:40:58 +080020unsigned int cs_version(int *major, int *minor)
Nguyen Anh Quynh36df4bb2013-12-10 13:31:20 +080021{
22 *major = CS_API_MAJOR;
23 *minor = CS_API_MINOR;
Nguyen Anh Quynh39a42ed2013-12-22 10:40:58 +080024
25 return (CS_API_MAJOR << 8) + CS_API_MINOR;
26}
27
28bool cs_support(cs_arch arch)
29{
30 if (arch == CS_ARCH_ALL)
31 return all_arch == ((1 << CS_ARCH_ARM) | (1 << CS_ARCH_ARM64) |
32 (1 << CS_ARCH_MIPS) | (1 << CS_ARCH_X86));
33
34 return all_arch & (1 << arch);
Nguyen Anh Quynh36df4bb2013-12-10 13:31:20 +080035}
36
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080037cs_err cs_errno(csh handle)
38{
39 if (!handle)
40 return CS_ERR_CSH;
41
42 cs_struct *ud = (cs_struct *)(uintptr_t)handle;
43
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +080044 return ud->errnum;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080045}
46
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080047cs_err cs_open(cs_arch arch, cs_mode mode, csh *handle)
48{
danghvu2b192962013-12-19 22:40:28 -060049 cs_struct *ud;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080050
danghvu2b192962013-12-19 22:40:28 -060051 ud = calloc(1, sizeof(*ud));
52 if (!ud) {
53 // memory insufficient
54 return CS_ERR_MEM;
55 }
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080056
Nguyen Anh Quynh9a197b32013-12-22 13:41:38 +080057 if (arch < CS_ARCH_MAX && arch_init[ud->arch]) {
58 ud->errnum = CS_ERR_OK;
59 ud->arch = arch;
60 ud->mode = mode;
61 ud->big_endian = mode & CS_MODE_BIG_ENDIAN;
62 ud->reg_name = NULL;
63 ud->detail = CS_OPT_ON; // by default break instruction into details
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080064
Nguyen Anh Quynhd3458392013-12-22 11:10:56 +080065 arch_init[ud->arch](ud);
Nguyen Anh Quynh9a197b32013-12-22 13:41:38 +080066 } else {
67 *handle = 0;
68 return CS_ERR_ARCH;
69 }
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080070
danghvu2b192962013-12-19 22:40:28 -060071 *handle = (uintptr_t)ud;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080072
danghvu2b192962013-12-19 22:40:28 -060073 return CS_ERR_OK;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080074}
75
76cs_err cs_close(csh handle)
77{
78 if (!handle)
79 return CS_ERR_CSH;
80
81 cs_struct *ud = (cs_struct *)(uintptr_t)handle;
82
83 switch (ud->arch) {
84 case CS_ARCH_X86:
85 break;
86 case CS_ARCH_ARM:
87 case CS_ARCH_MIPS:
88 case CS_ARCH_ARM64:
89 free(ud->printer_info);
90 break;
91 default: // unsupported architecture
92 return CS_ERR_HANDLE;
93 }
94
95 memset(ud, 0, sizeof(*ud));
96 free(ud);
97
98 return CS_ERR_OK;
99}
100
Nguyen Anh Quynh8f13f3c2013-12-04 22:57:04 +0800101#define MIN(x, y) ((x) < (y) ? (x) : (y))
102
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800103// fill insn with mnemonic & operands info
104static void fill_insn(cs_struct *handle, cs_insn *insn, char *buffer, MCInst *mci,
pancakef0e4eed2013-12-11 22:14:42 +0100105 PostPrinter_t printer, const uint8_t *code)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800106{
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800107 if (handle->detail) {
108 memcpy(insn, &mci->pub_insn, sizeof(*insn));
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800109
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800110 // fill the instruction bytes
111 memcpy(insn->bytes, code, MIN(sizeof(insn->bytes), insn->size));
Nguyen Anh Quynhad61c492013-11-30 16:23:31 +0800112
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800113 } else {
114 insn->address = mci->address;
115 insn->size = mci->insn_size;
116 }
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800117
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800118 // map internal instruction opcode to public insn ID
119 if (handle->insn_id)
120 handle->insn_id(insn, MCInst_getOpcode(mci), handle->detail);
121
122 // alias instruction might have ID saved in OpcodePub
123 if (MCInst_getOpcodePub(mci))
124 insn->id = MCInst_getOpcodePub(mci);
125
126 // post printer handles some corner cases (hacky)
127 if (printer)
128 printer((csh)handle, insn, buffer);
129
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800130 // fill in mnemonic & operands
Nguyen Anh Quynhdefb9bc2013-12-12 14:00:12 +0800131 // find first space or tab
132 char *sp = buffer;
133 for (sp = buffer; *sp; sp++)
134 if (*sp == ' '||*sp == '\t')
135 break;
136 if (*sp) {
137 *sp = '\0';
Nguyen Anh Quynh86dc3932013-12-12 14:43:39 +0800138 // find the next non-space char
139 sp++;
140 for (; ((*sp == ' ') || (*sp == '\t')); sp++);
141 strncpy(insn->op_str, sp, sizeof(insn->op_str) - 1);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800142 insn->op_str[sizeof(insn->op_str) - 1] = '\0';
143 } else
144 insn->op_str[0] = '\0';
145
146 strncpy(insn->mnemonic, buffer, sizeof(insn->mnemonic) - 1);
147 insn->mnemonic[sizeof(insn->mnemonic) - 1] = '\0';
148}
149
Nguyen Anh Quynhda8adad2013-12-04 09:44:07 +0800150cs_err cs_option(csh ud, cs_opt_type type, size_t value)
Nguyen Anh Quynh01aba002013-12-03 21:00:09 +0800151{
danghvu2b192962013-12-19 22:40:28 -0600152 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
153 if (!handle)
154 return CS_ERR_CSH;
Nguyen Anh Quynh01aba002013-12-03 21:00:09 +0800155
danghvu0b6ea042013-12-19 23:07:26 -0600156 if (type == CS_OPT_DETAIL) {
Nguyen Anh Quynh7d02c922013-12-21 09:59:31 +0800157 handle->detail = value;
158 return CS_ERR_OK;
159 }
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800160
Nguyen Anh Quynhd3458392013-12-22 11:10:56 +0800161 return arch_option[handle->arch](handle, type, value);
Nguyen Anh Quynh01aba002013-12-03 21:00:09 +0800162}
163
pancakef0e4eed2013-12-11 22:14:42 +0100164size_t cs_disasm(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, size_t count, cs_insn *insn)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800165{
166 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
167 MCInst mci;
168 uint16_t insn_size;
Nguyen Anh Quynhb42a6572013-11-29 17:40:07 +0800169 size_t c = 0;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800170
171 if (!handle) {
172 // FIXME: handle this case?
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800173 // handle->errnum = CS_ERR_HANDLE;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800174 return 0;
175 }
176
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800177 handle->errnum = CS_ERR_OK;
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800178 memset(insn, 0, count * sizeof(*insn));
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800179
180 while (size > 0) {
danghvu2b192962013-12-19 22:40:28 -0600181 MCInst_Init(&mci);
Nguyen Anh Quynh1f449282013-12-15 14:04:59 +0800182 mci.detail = handle->detail;
183 mci.mode = handle->mode;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800184
185 bool r = handle->disasm(ud, buffer, size, &mci, &insn_size, offset, handle->getinsn_info);
186 if (r) {
187 SStream ss;
188 SStream_Init(&ss);
189
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800190 // relative branches need to know the address & size of current insn
191 mci.insn_size = insn_size;
192 mci.address = offset;
193
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800194 if (handle->detail) {
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800195 // save all the information for non-detailed mode
196 mci.pub_insn.address = offset;
197 mci.pub_insn.size = insn_size;
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800198 }
199
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800200 handle->printer(&mci, &ss, handle->printer_info);
201
Joxean114df0e2013-12-04 07:11:32 +0100202 fill_insn(handle, insn, ss.buffer, &mci, handle->post_printer, buffer);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800203
204 c++;
205 insn++;
206 buffer += insn_size;
207 size -= insn_size;
208 offset += insn_size;
209
Nguyen Anh Quynh9a0dbab2013-12-15 22:25:58 +0800210 if (c == count)
211 return c;
Nguyen Anh Quynh8f13f3c2013-12-04 22:57:04 +0800212 } else
Nguyen Anh Quynh9a0dbab2013-12-15 22:25:58 +0800213 // face a broken instruction? then we stop here
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800214 return c;
215 }
216
217 return c;
218}
219
220// dynamicly allocate memory to contain disasm insn
221// NOTE: caller must free() the allocated memory itself to avoid memory leaking
pancakef0e4eed2013-12-11 22:14:42 +0100222size_t cs_disasm_dyn(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, size_t count, cs_insn **insn)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800223{
224 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
225 MCInst mci;
226 uint16_t insn_size;
Nguyen Anh Quynhb42a6572013-11-29 17:40:07 +0800227 size_t c = 0, f = 0;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800228 cs_insn insn_cache[64];
229 void *total = NULL;
Nguyen Anh Quynhb42a6572013-11-29 17:40:07 +0800230 size_t total_size = 0;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800231
232 if (!handle) {
233 // FIXME: how to handle this case:
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800234 // handle->errnum = CS_ERR_HANDLE;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800235 return 0;
236 }
237
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800238 handle->errnum = CS_ERR_OK;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800239
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800240 memset(insn_cache, 0, sizeof(insn_cache));
241
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800242 while (size > 0) {
danghvu2b192962013-12-19 22:40:28 -0600243 MCInst_Init(&mci);
Nguyen Anh Quynh1f449282013-12-15 14:04:59 +0800244 mci.detail = handle->detail;
245 mci.mode = handle->mode;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800246
247 bool r = handle->disasm(ud, buffer, size, &mci, &insn_size, offset, handle->getinsn_info);
248 if (r) {
249 SStream ss;
250 SStream_Init(&ss);
251
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800252 // relative branches need to know the address & size of current insn
253 mci.insn_size = insn_size;
254 mci.address = offset;
255
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800256 if (handle->detail) {
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800257 // save all the information for non-detailed mode
258 mci.pub_insn.address = offset;
259 mci.pub_insn.size = insn_size;
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800260 }
261
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800262 handle->printer(&mci, &ss, handle->printer_info);
263
Joxean114df0e2013-12-04 07:11:32 +0100264 fill_insn(handle, &insn_cache[f], ss.buffer, &mci, handle->post_printer, buffer);
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800265
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800266 f++;
267
268 if (f == ARR_SIZE(insn_cache)) {
269 // resize total to contain newly disasm insns
270 total_size += sizeof(insn_cache);
271 void *tmp = realloc(total, total_size);
272 if (tmp == NULL) { // insufficient memory
273 free(total);
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800274 handle->errnum = CS_ERR_MEM;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800275 return 0;
276 }
277
278 total = tmp;
279 memcpy(total + total_size - sizeof(insn_cache), insn_cache, sizeof(insn_cache));
280 // reset f back to 0
281 f = 0;
282 }
283
284 c++;
285 buffer += insn_size;
286 size -= insn_size;
287 offset += insn_size;
288
289 if (count > 0 && c == count)
290 break;
Nguyen Anh Quynh8f13f3c2013-12-04 22:57:04 +0800291 } else {
292 // encounter a broken instruction
293 // XXX: TODO: JOXEAN continue here
294 break;
295 }
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800296 }
297
298 if (f) {
299 // resize total to contain newly disasm insns
300 void *tmp = realloc(total, total_size + f * sizeof(insn_cache[0]));
301 if (tmp == NULL) { // insufficient memory
302 free(total);
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800303 handle->errnum = CS_ERR_MEM;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800304 return 0;
305 }
306
307 total = tmp;
308 memcpy(total + total_size, insn_cache, f * sizeof(insn_cache[0]));
309 }
310
311 *insn = total;
312
313 return c;
314}
315
316void cs_free(void *m)
317{
318 free(m);
319}
320
321// return friendly name of regiser in a string
pancakef0e4eed2013-12-11 22:14:42 +0100322const char *cs_reg_name(csh ud, unsigned int reg)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800323{
324 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
325
326 if (!handle || handle->reg_name == NULL) {
327 return NULL;
328 }
329
Nguyen Anh Quynha253c7a2013-12-09 10:26:18 +0800330 return handle->reg_name(ud, reg);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800331}
332
pancakef0e4eed2013-12-11 22:14:42 +0100333const char *cs_insn_name(csh ud, unsigned int insn)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800334{
335 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
336
337 if (!handle || handle->insn_name == NULL) {
338 return NULL;
339 }
340
Nguyen Anh Quynha253c7a2013-12-09 10:26:18 +0800341 return handle->insn_name(ud, insn);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800342}
343
Nguyen Anh Quynh70083562013-12-20 22:02:20 +0800344static bool arr_exist(unsigned char *arr, unsigned char max, unsigned int id)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800345{
346 int i;
347
348 for (i = 0; i < max; i++) {
349 if (arr[i] == id)
350 return true;
351 }
352
353 return false;
354}
355
356bool cs_insn_group(csh handle, cs_insn *insn, unsigned int group_id)
357{
358 if (!handle)
359 return false;
360
Nguyen Anh Quynhf35e2ad2013-12-03 11:10:26 +0800361 return arr_exist(insn->groups, insn->groups_count, group_id);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800362}
363
364bool cs_reg_read(csh handle, cs_insn *insn, unsigned int reg_id)
365{
366 if (!handle)
367 return false;
368
Nguyen Anh Quynhf35e2ad2013-12-03 11:10:26 +0800369 return arr_exist(insn->regs_read, insn->regs_read_count, reg_id);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800370}
371
372bool cs_reg_write(csh handle, cs_insn *insn, unsigned int reg_id)
373{
374 if (!handle)
375 return false;
376
Nguyen Anh Quynhf35e2ad2013-12-03 11:10:26 +0800377 return arr_exist(insn->regs_write, insn->regs_write_count, reg_id);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800378}
379
380int cs_op_count(csh ud, cs_insn *insn, unsigned int op_type)
381{
382 if (!ud)
383 return -1;
384
385 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
386 unsigned int count = 0, i;
387
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800388 handle->errnum = CS_ERR_OK;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800389
390 switch (handle->arch) {
391 default:
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800392 handle->errnum = CS_ERR_HANDLE;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800393 return -1;
394 case CS_ARCH_ARM:
395 for (i = 0; i < insn->arm.op_count; i++)
396 if (insn->arm.operands[i].type == op_type)
397 count++;
398 break;
399 case CS_ARCH_ARM64:
400 for (i = 0; i < insn->arm64.op_count; i++)
401 if (insn->arm64.operands[i].type == op_type)
402 count++;
403 break;
404 case CS_ARCH_X86:
405 for (i = 0; i < insn->x86.op_count; i++)
406 if (insn->x86.operands[i].type == op_type)
407 count++;
408 break;
409 case CS_ARCH_MIPS:
410 for (i = 0; i < insn->mips.op_count; i++)
411 if (insn->mips.operands[i].type == op_type)
412 count++;
413 break;
414 }
415
416 return count;
417}
418
419int cs_op_index(csh ud, cs_insn *insn, unsigned int op_type,
420 unsigned int post)
421{
422 if (!ud)
423 return -1;
424
425 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
426 unsigned int count = 0, i;
427
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800428 handle->errnum = CS_ERR_OK;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800429
430 switch (handle->arch) {
431 default:
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800432 handle->errnum = CS_ERR_HANDLE;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800433 return -1;
434 case CS_ARCH_ARM:
435 for (i = 0; i < insn->arm.op_count; i++) {
436 if (insn->arm.operands[i].type == op_type)
437 count++;
438 if (count == post)
439 return i;
440 }
441 break;
442 case CS_ARCH_ARM64:
443 for (i = 0; i < insn->arm64.op_count; i++) {
444 if (insn->arm64.operands[i].type == op_type)
445 count++;
446 if (count == post)
447 return i;
448 }
449 break;
450 case CS_ARCH_X86:
451 for (i = 0; i < insn->x86.op_count; i++) {
452 if (insn->x86.operands[i].type == op_type)
453 count++;
454 if (count == post)
455 return i;
456 }
457 break;
458 case CS_ARCH_MIPS:
459 for (i = 0; i < insn->mips.op_count; i++) {
460 if (insn->mips.operands[i].type == op_type)
461 count++;
462 if (count == post)
463 return i;
464 }
465 break;
466 }
467
468 return -1;
469}