blob: a83f522fe15db4aa4d3929b1f533f68318584b90 [file] [log] [blame]
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +08001/* Capstone Disassembler Engine */
2/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */
3
4#include <stdio.h>
5#include <stdlib.h>
6#include <string.h>
7#include <capstone.h>
8
9#include "cs_priv.h"
10
11#include "MCRegisterInfo.h"
12
13#include "arch/X86/X86Disassembler.h"
14#include "arch/X86/X86InstPrinter.h"
15#include "arch/X86/mapping.h"
16
17#include "arch/ARM/ARMDisassembler.h"
18#include "arch/ARM/ARMInstPrinter.h"
19#include "arch/ARM/mapping.h"
20
21#include "arch/Mips/MipsDisassembler.h"
22#include "arch/Mips/MipsInstPrinter.h"
23#include "arch/Mips/mapping.h"
24
25#include "arch/AArch64/AArch64Disassembler.h"
26#include "arch/AArch64/AArch64InstPrinter.h"
27#include "arch/AArch64/mapping.h"
28
29#include "utils.h"
30
Nguyen Anh Quynh5dbe12a2013-12-03 12:27:46 +080031// Package version
32#define PKG_MAJOR 1
33#define PKG_MINOR 0
34
Nguyen Anh Quynh36df4bb2013-12-10 13:31:20 +080035
36void cs_version(int *major, int *minor)
37{
38 *major = CS_API_MAJOR;
39 *minor = CS_API_MINOR;
40}
41
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080042cs_err cs_errno(csh handle)
43{
44 if (!handle)
45 return CS_ERR_CSH;
46
47 cs_struct *ud = (cs_struct *)(uintptr_t)handle;
48
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +080049 return ud->errnum;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080050}
51
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080052cs_err cs_open(cs_arch arch, cs_mode mode, csh *handle)
53{
54 cs_struct *ud;
55
56 ud = calloc(1, sizeof(*ud));
57 if (!ud) {
58 // memory insufficient
59 return CS_ERR_MEM;
60 }
61
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +080062 ud->errnum = CS_ERR_OK;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080063 ud->arch = arch;
64 ud->mode = mode;
65 ud->big_endian = mode & CS_MODE_BIG_ENDIAN;
66 ud->reg_name = NULL;
67
68 switch (ud->arch) {
69 case CS_ARCH_X86:
Nguyen Anh Quynh01aba002013-12-03 21:00:09 +080070 // by default, we use Intel syntax
71 ud->printer = X86_Intel_printInst;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080072 ud->printer_info = NULL;
73 ud->disasm = X86_getInstruction;
74 ud->reg_name = X86_reg_name;
75 ud->insn_id = X86_get_insn_id;
76 ud->insn_name = X86_insn_name;
77 break;
78 case CS_ARCH_ARM: {
79 MCRegisterInfo *mri = malloc(sizeof(*mri));
80
81 ARM_init(mri);
82
83 ud->printer = ARM_printInst;
84 ud->printer_info = mri;
85 ud->reg_name = ARM_reg_name;
86 ud->insn_id = ARM_get_insn_id;
87 ud->insn_name = ARM_insn_name;
88 ud->post_printer = ARM_post_printer;
89
90 if (ud->mode & CS_MODE_THUMB)
91 ud->disasm = Thumb_getInstruction;
92 else
93 ud->disasm = ARM_getInstruction;
94 break;
95 }
96 case CS_ARCH_MIPS: {
97 MCRegisterInfo *mri = malloc(sizeof(*mri));
98
99 Mips_init(mri);
100 ud->printer = Mips_printInst;
101 ud->printer_info = mri;
102 ud->getinsn_info = mri;
103 ud->reg_name = Mips_reg_name;
104 ud->insn_id = Mips_get_insn_id;
105 ud->insn_name = Mips_insn_name;
106
107 if (ud->mode & CS_MODE_32)
108 ud->disasm = Mips_getInstruction;
109 else
110 ud->disasm = Mips64_getInstruction;
111
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800112 break;
113 }
114 case CS_ARCH_ARM64: {
115 MCRegisterInfo *mri = malloc(sizeof(*mri));
116
117 AArch64_init(mri);
118 ud->printer = AArch64_printInst;
119 ud->printer_info = mri;
120 ud->getinsn_info = mri;
121 ud->disasm = AArch64_getInstruction;
122 ud->reg_name = AArch64_reg_name;
123 ud->insn_id = AArch64_get_insn_id;
124 ud->insn_name = AArch64_insn_name;
125 ud->post_printer = AArch64_post_printer;
126 break;
127 }
128 default: // unsupported architecture
129 free(ud);
130 return CS_ERR_ARCH;
131 }
132
133 *handle = (uintptr_t)ud;
134
135 return CS_ERR_OK;
136}
137
138cs_err cs_close(csh handle)
139{
140 if (!handle)
141 return CS_ERR_CSH;
142
143 cs_struct *ud = (cs_struct *)(uintptr_t)handle;
144
145 switch (ud->arch) {
146 case CS_ARCH_X86:
147 break;
148 case CS_ARCH_ARM:
149 case CS_ARCH_MIPS:
150 case CS_ARCH_ARM64:
151 free(ud->printer_info);
152 break;
153 default: // unsupported architecture
154 return CS_ERR_HANDLE;
155 }
156
157 memset(ud, 0, sizeof(*ud));
158 free(ud);
159
160 return CS_ERR_OK;
161}
162
Nguyen Anh Quynh8f13f3c2013-12-04 22:57:04 +0800163#define MIN(x, y) ((x) < (y) ? (x) : (y))
164
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800165// fill insn with mnemonic & operands info
166static void fill_insn(cs_struct *handle, cs_insn *insn, char *buffer, MCInst *mci,
pancakef0e4eed2013-12-11 22:14:42 +0100167 PostPrinter_t printer, const uint8_t *code)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800168{
169 memcpy(insn, &mci->pub_insn, sizeof(*insn));
170
171 // map internal instruction opcode to public insn ID
Nguyen Anh Quynhad61c492013-11-30 16:23:31 +0800172 if (handle->insn_id)
173 handle->insn_id(insn, MCInst_getOpcode(mci));
174
175 // alias instruction might have ID saved in OpcodePub
Nguyen Anh Quynh6b7abe32013-11-30 00:54:24 +0800176 if (MCInst_getOpcodePub(mci))
Nguyen Anh Quynhad61c492013-11-30 16:23:31 +0800177 insn->id = MCInst_getOpcodePub(mci);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800178
179 if (printer)
180 printer(insn->id, insn, buffer);
181
182 // fill in mnemonic & operands
Nguyen Anh Quynhdefb9bc2013-12-12 14:00:12 +0800183 // find first space or tab
184 char *sp = buffer;
185 for (sp = buffer; *sp; sp++)
186 if (*sp == ' '||*sp == '\t')
187 break;
188 if (*sp) {
189 *sp = '\0';
Nguyen Anh Quynh86dc3932013-12-12 14:43:39 +0800190 // find the next non-space char
191 sp++;
192 for (; ((*sp == ' ') || (*sp == '\t')); sp++);
193 strncpy(insn->op_str, sp, sizeof(insn->op_str) - 1);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800194 insn->op_str[sizeof(insn->op_str) - 1] = '\0';
Nguyen Anh Quynh86dc3932013-12-12 14:43:39 +0800195 printf(">>>> |%s|\n", insn->op_str);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800196 } else
197 insn->op_str[0] = '\0';
198
199 strncpy(insn->mnemonic, buffer, sizeof(insn->mnemonic) - 1);
200 insn->mnemonic[sizeof(insn->mnemonic) - 1] = '\0';
Nguyen Anh Quynh8f13f3c2013-12-04 22:57:04 +0800201
202 // fill the instruction bytes
203 memcpy(insn->bytes, code, MIN(sizeof(insn->bytes), insn->size));
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800204}
205
Nguyen Anh Quynhda8adad2013-12-04 09:44:07 +0800206cs_err cs_option(csh ud, cs_opt_type type, size_t value)
Nguyen Anh Quynh01aba002013-12-03 21:00:09 +0800207{
208 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
209 if (!handle)
210 return CS_ERR_CSH;
211
Nguyen Anh Quynh4a60a562013-12-03 21:56:54 +0800212 switch (handle->arch) {
213 default:
Nguyen Anh Quynhfe8030b2013-12-06 10:09:43 +0800214 handle->errnum = CS_ERR_OPTION;
Nguyen Anh Quynh041e25d2013-12-06 00:37:32 +0800215 return CS_ERR_OPTION;
216
Nguyen Anh Quynh4a60a562013-12-03 21:56:54 +0800217 case CS_ARCH_X86:
Nguyen Anh Quynhb8ce68e2013-12-03 23:45:08 +0800218 if (type & CS_OPT_SYNTAX) {
219 switch(value) {
220 default:
Nguyen Anh Quynhfe8030b2013-12-06 10:09:43 +0800221 handle->errnum = CS_ERR_OPTION;
Nguyen Anh Quynh041e25d2013-12-06 00:37:32 +0800222 return CS_ERR_OPTION;
223
Nguyen Anh Quynhc618db42013-12-04 00:05:04 +0800224 case CS_OPT_SYNTAX_INTEL:
Nguyen Anh Quynhb8ce68e2013-12-03 23:45:08 +0800225 handle->printer = X86_Intel_printInst;
226 break;
Nguyen Anh Quynh041e25d2013-12-06 00:37:32 +0800227
Nguyen Anh Quynhc618db42013-12-04 00:05:04 +0800228 case CS_OPT_SYNTAX_ATT:
Nguyen Anh Quynhb8ce68e2013-12-03 23:45:08 +0800229 handle->printer = X86_ATT_printInst;
230 break;
231 }
Nguyen Anh Quynhfe8030b2013-12-06 10:09:43 +0800232 } else {
233 handle->errnum = CS_ERR_OPTION;
Nguyen Anh Quynh041e25d2013-12-06 00:37:32 +0800234 return CS_ERR_OPTION;
Nguyen Anh Quynhfe8030b2013-12-06 10:09:43 +0800235 }
Nguyen Anh Quynh4a60a562013-12-03 21:56:54 +0800236 break;
237 }
Nguyen Anh Quynh01aba002013-12-03 21:00:09 +0800238
239 return CS_ERR_OK;
240}
241
pancakef0e4eed2013-12-11 22:14:42 +0100242size_t cs_disasm(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, size_t count, cs_insn *insn)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800243{
244 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
245 MCInst mci;
246 uint16_t insn_size;
Nguyen Anh Quynhb42a6572013-11-29 17:40:07 +0800247 size_t c = 0;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800248
249 if (!handle) {
250 // FIXME: handle this case?
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800251 // handle->errnum = CS_ERR_HANDLE;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800252 return 0;
253 }
254
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800255 handle->errnum = CS_ERR_OK;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800256
257 while (size > 0) {
258 MCInst_Init(&mci);
259
260 bool r = handle->disasm(ud, buffer, size, &mci, &insn_size, offset, handle->getinsn_info);
261 if (r) {
262 SStream ss;
263 SStream_Init(&ss);
264
265 mci.pub_insn.size = insn_size;
266 mci.pub_insn.address = offset;
267 mci.mode = handle->mode;
268 handle->printer(&mci, &ss, handle->printer_info);
269
Joxean114df0e2013-12-04 07:11:32 +0100270 fill_insn(handle, insn, ss.buffer, &mci, handle->post_printer, buffer);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800271
272 c++;
273 insn++;
274 buffer += insn_size;
275 size -= insn_size;
276 offset += insn_size;
277
278 if (count > 0) {
279 if (c == count)
280 return c;
281 }
Nguyen Anh Quynh8f13f3c2013-12-04 22:57:04 +0800282 } else
283 // face a broken instruction?
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800284 return c;
285 }
286
287 return c;
288}
289
290// dynamicly allocate memory to contain disasm insn
291// NOTE: caller must free() the allocated memory itself to avoid memory leaking
pancakef0e4eed2013-12-11 22:14:42 +0100292size_t cs_disasm_dyn(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, size_t count, cs_insn **insn)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800293{
294 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
295 MCInst mci;
296 uint16_t insn_size;
Nguyen Anh Quynhb42a6572013-11-29 17:40:07 +0800297 size_t c = 0, f = 0;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800298 cs_insn insn_cache[64];
299 void *total = NULL;
Nguyen Anh Quynhb42a6572013-11-29 17:40:07 +0800300 size_t total_size = 0;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800301
302 if (!handle) {
303 // FIXME: how to handle this case:
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800304 // handle->errnum = CS_ERR_HANDLE;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800305 return 0;
306 }
307
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800308 handle->errnum = CS_ERR_OK;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800309
310 while (size > 0) {
311 MCInst_Init(&mci);
312
313 bool r = handle->disasm(ud, buffer, size, &mci, &insn_size, offset, handle->getinsn_info);
314 if (r) {
315 SStream ss;
316 SStream_Init(&ss);
317
318 mci.pub_insn.size = insn_size;
319 mci.pub_insn.address = offset;
320 mci.mode = handle->mode;
321 handle->printer(&mci, &ss, handle->printer_info);
322
Joxean114df0e2013-12-04 07:11:32 +0100323 fill_insn(handle, &insn_cache[f], ss.buffer, &mci, handle->post_printer, buffer);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800324 f++;
325
326 if (f == ARR_SIZE(insn_cache)) {
327 // resize total to contain newly disasm insns
328 total_size += sizeof(insn_cache);
329 void *tmp = realloc(total, total_size);
330 if (tmp == NULL) { // insufficient memory
331 free(total);
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800332 handle->errnum = CS_ERR_MEM;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800333 return 0;
334 }
335
336 total = tmp;
337 memcpy(total + total_size - sizeof(insn_cache), insn_cache, sizeof(insn_cache));
338 // reset f back to 0
339 f = 0;
340 }
341
342 c++;
343 buffer += insn_size;
344 size -= insn_size;
345 offset += insn_size;
346
347 if (count > 0 && c == count)
348 break;
Nguyen Anh Quynh8f13f3c2013-12-04 22:57:04 +0800349 } else {
350 // encounter a broken instruction
351 // XXX: TODO: JOXEAN continue here
352 break;
353 }
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800354 }
355
356 if (f) {
357 // resize total to contain newly disasm insns
358 void *tmp = realloc(total, total_size + f * sizeof(insn_cache[0]));
359 if (tmp == NULL) { // insufficient memory
360 free(total);
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800361 handle->errnum = CS_ERR_MEM;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800362 return 0;
363 }
364
365 total = tmp;
366 memcpy(total + total_size, insn_cache, f * sizeof(insn_cache[0]));
367 }
368
369 *insn = total;
370
371 return c;
372}
373
374void cs_free(void *m)
375{
376 free(m);
377}
378
379// return friendly name of regiser in a string
pancakef0e4eed2013-12-11 22:14:42 +0100380const char *cs_reg_name(csh ud, unsigned int reg)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800381{
382 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
383
384 if (!handle || handle->reg_name == NULL) {
385 return NULL;
386 }
387
Nguyen Anh Quynha253c7a2013-12-09 10:26:18 +0800388 return handle->reg_name(ud, reg);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800389}
390
pancakef0e4eed2013-12-11 22:14:42 +0100391const char *cs_insn_name(csh ud, unsigned int insn)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800392{
393 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
394
395 if (!handle || handle->insn_name == NULL) {
396 return NULL;
397 }
398
Nguyen Anh Quynha253c7a2013-12-09 10:26:18 +0800399 return handle->insn_name(ud, insn);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800400}
401
402static bool arr_exist(unsigned int *arr, int max, unsigned int id)
403{
404 int i;
405
406 for (i = 0; i < max; i++) {
407 if (arr[i] == id)
408 return true;
409 }
410
411 return false;
412}
413
414bool cs_insn_group(csh handle, cs_insn *insn, unsigned int group_id)
415{
416 if (!handle)
417 return false;
418
Nguyen Anh Quynhf35e2ad2013-12-03 11:10:26 +0800419 return arr_exist(insn->groups, insn->groups_count, group_id);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800420}
421
422bool cs_reg_read(csh handle, cs_insn *insn, unsigned int reg_id)
423{
424 if (!handle)
425 return false;
426
Nguyen Anh Quynhf35e2ad2013-12-03 11:10:26 +0800427 return arr_exist(insn->regs_read, insn->regs_read_count, reg_id);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800428}
429
430bool cs_reg_write(csh handle, cs_insn *insn, unsigned int reg_id)
431{
432 if (!handle)
433 return false;
434
Nguyen Anh Quynhf35e2ad2013-12-03 11:10:26 +0800435 return arr_exist(insn->regs_write, insn->regs_write_count, reg_id);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800436}
437
438int cs_op_count(csh ud, cs_insn *insn, unsigned int op_type)
439{
440 if (!ud)
441 return -1;
442
443 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
444 unsigned int count = 0, i;
445
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800446 handle->errnum = CS_ERR_OK;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800447
448 switch (handle->arch) {
449 default:
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800450 handle->errnum = CS_ERR_HANDLE;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800451 return -1;
452 case CS_ARCH_ARM:
453 for (i = 0; i < insn->arm.op_count; i++)
454 if (insn->arm.operands[i].type == op_type)
455 count++;
456 break;
457 case CS_ARCH_ARM64:
458 for (i = 0; i < insn->arm64.op_count; i++)
459 if (insn->arm64.operands[i].type == op_type)
460 count++;
461 break;
462 case CS_ARCH_X86:
463 for (i = 0; i < insn->x86.op_count; i++)
464 if (insn->x86.operands[i].type == op_type)
465 count++;
466 break;
467 case CS_ARCH_MIPS:
468 for (i = 0; i < insn->mips.op_count; i++)
469 if (insn->mips.operands[i].type == op_type)
470 count++;
471 break;
472 }
473
474 return count;
475}
476
477int cs_op_index(csh ud, cs_insn *insn, unsigned int op_type,
478 unsigned int post)
479{
480 if (!ud)
481 return -1;
482
483 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
484 unsigned int count = 0, i;
485
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800486 handle->errnum = CS_ERR_OK;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800487
488 switch (handle->arch) {
489 default:
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800490 handle->errnum = CS_ERR_HANDLE;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800491 return -1;
492 case CS_ARCH_ARM:
493 for (i = 0; i < insn->arm.op_count; i++) {
494 if (insn->arm.operands[i].type == op_type)
495 count++;
496 if (count == post)
497 return i;
498 }
499 break;
500 case CS_ARCH_ARM64:
501 for (i = 0; i < insn->arm64.op_count; i++) {
502 if (insn->arm64.operands[i].type == op_type)
503 count++;
504 if (count == post)
505 return i;
506 }
507 break;
508 case CS_ARCH_X86:
509 for (i = 0; i < insn->x86.op_count; i++) {
510 if (insn->x86.operands[i].type == op_type)
511 count++;
512 if (count == post)
513 return i;
514 }
515 break;
516 case CS_ARCH_MIPS:
517 for (i = 0; i < insn->mips.op_count; i++) {
518 if (insn->mips.operands[i].type == op_type)
519 count++;
520 if (count == post)
521 return i;
522 }
523 break;
524 }
525
526 return -1;
527}