blob: 7f32e939cd810edcc791c44ee4e1c835cb5ae7f4 [file] [log] [blame]
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +08001/* Capstone Disassembler Engine */
2/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */
3
4#include <stdio.h>
5#include <stdlib.h>
6#include <string.h>
7#include <capstone.h>
8
9#include "cs_priv.h"
10
11#include "MCRegisterInfo.h"
12
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080013#include "utils.h"
14
Nguyen Anh Quynh58747ad2013-12-22 13:37:13 +080015void (*arch_init[MAX_ARCH])(cs_struct *) = { NULL };
Nguyen Anh Quynhd3458392013-12-22 11:10:56 +080016cs_err (*arch_option[MAX_ARCH]) (cs_struct*, cs_opt_type, size_t value);
Nguyen Anh Quynhf1851802013-12-21 12:16:47 +080017
Nguyen Anh Quynh39a42ed2013-12-22 10:40:58 +080018unsigned int all_arch = 0;
Nguyen Anh Quynhf1851802013-12-21 12:16:47 +080019
Nguyen Anh Quynh39a42ed2013-12-22 10:40:58 +080020unsigned int cs_version(int *major, int *minor)
Nguyen Anh Quynh36df4bb2013-12-10 13:31:20 +080021{
Nguyen Anh Quynh08777472013-12-22 14:16:28 +080022 if (major != NULL && minor != NULL) {
23 *major = CS_API_MAJOR;
24 *minor = CS_API_MINOR;
25 }
Nguyen Anh Quynh39a42ed2013-12-22 10:40:58 +080026
27 return (CS_API_MAJOR << 8) + CS_API_MINOR;
28}
29
30bool cs_support(cs_arch arch)
31{
32 if (arch == CS_ARCH_ALL)
33 return all_arch == ((1 << CS_ARCH_ARM) | (1 << CS_ARCH_ARM64) |
34 (1 << CS_ARCH_MIPS) | (1 << CS_ARCH_X86));
35
36 return all_arch & (1 << arch);
Nguyen Anh Quynh36df4bb2013-12-10 13:31:20 +080037}
38
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080039cs_err cs_errno(csh handle)
40{
41 if (!handle)
42 return CS_ERR_CSH;
43
44 cs_struct *ud = (cs_struct *)(uintptr_t)handle;
45
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +080046 return ud->errnum;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080047}
48
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080049cs_err cs_open(cs_arch arch, cs_mode mode, csh *handle)
50{
danghvu2b192962013-12-19 22:40:28 -060051 cs_struct *ud;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080052
danghvu2b192962013-12-19 22:40:28 -060053 ud = calloc(1, sizeof(*ud));
54 if (!ud) {
55 // memory insufficient
56 return CS_ERR_MEM;
57 }
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080058
Nguyen Anh Quynh9a197b32013-12-22 13:41:38 +080059 if (arch < CS_ARCH_MAX && arch_init[ud->arch]) {
60 ud->errnum = CS_ERR_OK;
61 ud->arch = arch;
62 ud->mode = mode;
63 ud->big_endian = mode & CS_MODE_BIG_ENDIAN;
64 ud->reg_name = NULL;
65 ud->detail = CS_OPT_ON; // by default break instruction into details
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080066
Nguyen Anh Quynhd3458392013-12-22 11:10:56 +080067 arch_init[ud->arch](ud);
Nguyen Anh Quynh9a197b32013-12-22 13:41:38 +080068 } else {
69 *handle = 0;
70 return CS_ERR_ARCH;
71 }
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080072
danghvu2b192962013-12-19 22:40:28 -060073 *handle = (uintptr_t)ud;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080074
danghvu2b192962013-12-19 22:40:28 -060075 return CS_ERR_OK;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080076}
77
78cs_err cs_close(csh handle)
79{
80 if (!handle)
81 return CS_ERR_CSH;
82
83 cs_struct *ud = (cs_struct *)(uintptr_t)handle;
84
85 switch (ud->arch) {
86 case CS_ARCH_X86:
87 break;
88 case CS_ARCH_ARM:
89 case CS_ARCH_MIPS:
90 case CS_ARCH_ARM64:
91 free(ud->printer_info);
92 break;
93 default: // unsupported architecture
94 return CS_ERR_HANDLE;
95 }
96
97 memset(ud, 0, sizeof(*ud));
98 free(ud);
99
100 return CS_ERR_OK;
101}
102
Nguyen Anh Quynh8f13f3c2013-12-04 22:57:04 +0800103#define MIN(x, y) ((x) < (y) ? (x) : (y))
104
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800105// fill insn with mnemonic & operands info
106static void fill_insn(cs_struct *handle, cs_insn *insn, char *buffer, MCInst *mci,
pancakef0e4eed2013-12-11 22:14:42 +0100107 PostPrinter_t printer, const uint8_t *code)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800108{
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800109 if (handle->detail) {
110 memcpy(insn, &mci->pub_insn, sizeof(*insn));
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800111
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800112 // fill the instruction bytes
113 memcpy(insn->bytes, code, MIN(sizeof(insn->bytes), insn->size));
Nguyen Anh Quynhad61c492013-11-30 16:23:31 +0800114
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800115 } else {
116 insn->address = mci->address;
117 insn->size = mci->insn_size;
118 }
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800119
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800120 // map internal instruction opcode to public insn ID
121 if (handle->insn_id)
122 handle->insn_id(insn, MCInst_getOpcode(mci), handle->detail);
123
124 // alias instruction might have ID saved in OpcodePub
125 if (MCInst_getOpcodePub(mci))
126 insn->id = MCInst_getOpcodePub(mci);
127
128 // post printer handles some corner cases (hacky)
129 if (printer)
130 printer((csh)handle, insn, buffer);
131
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800132 // fill in mnemonic & operands
Nguyen Anh Quynhdefb9bc2013-12-12 14:00:12 +0800133 // find first space or tab
134 char *sp = buffer;
135 for (sp = buffer; *sp; sp++)
136 if (*sp == ' '||*sp == '\t')
137 break;
138 if (*sp) {
139 *sp = '\0';
Nguyen Anh Quynh86dc3932013-12-12 14:43:39 +0800140 // find the next non-space char
141 sp++;
142 for (; ((*sp == ' ') || (*sp == '\t')); sp++);
143 strncpy(insn->op_str, sp, sizeof(insn->op_str) - 1);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800144 insn->op_str[sizeof(insn->op_str) - 1] = '\0';
145 } else
146 insn->op_str[0] = '\0';
147
148 strncpy(insn->mnemonic, buffer, sizeof(insn->mnemonic) - 1);
149 insn->mnemonic[sizeof(insn->mnemonic) - 1] = '\0';
150}
151
Nguyen Anh Quynhda8adad2013-12-04 09:44:07 +0800152cs_err cs_option(csh ud, cs_opt_type type, size_t value)
Nguyen Anh Quynh01aba002013-12-03 21:00:09 +0800153{
danghvu2b192962013-12-19 22:40:28 -0600154 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
155 if (!handle)
156 return CS_ERR_CSH;
Nguyen Anh Quynh01aba002013-12-03 21:00:09 +0800157
danghvu0b6ea042013-12-19 23:07:26 -0600158 if (type == CS_OPT_DETAIL) {
Nguyen Anh Quynh7d02c922013-12-21 09:59:31 +0800159 handle->detail = value;
160 return CS_ERR_OK;
161 }
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800162
Nguyen Anh Quynhd3458392013-12-22 11:10:56 +0800163 return arch_option[handle->arch](handle, type, value);
Nguyen Anh Quynh01aba002013-12-03 21:00:09 +0800164}
165
pancakef0e4eed2013-12-11 22:14:42 +0100166size_t cs_disasm(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, size_t count, cs_insn *insn)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800167{
168 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
169 MCInst mci;
170 uint16_t insn_size;
Nguyen Anh Quynhb42a6572013-11-29 17:40:07 +0800171 size_t c = 0;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800172
173 if (!handle) {
174 // FIXME: handle this case?
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800175 // handle->errnum = CS_ERR_HANDLE;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800176 return 0;
177 }
178
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800179 handle->errnum = CS_ERR_OK;
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800180 memset(insn, 0, count * sizeof(*insn));
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800181
182 while (size > 0) {
danghvu2b192962013-12-19 22:40:28 -0600183 MCInst_Init(&mci);
Nguyen Anh Quynh1f449282013-12-15 14:04:59 +0800184 mci.detail = handle->detail;
185 mci.mode = handle->mode;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800186
187 bool r = handle->disasm(ud, buffer, size, &mci, &insn_size, offset, handle->getinsn_info);
188 if (r) {
189 SStream ss;
190 SStream_Init(&ss);
191
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800192 // relative branches need to know the address & size of current insn
193 mci.insn_size = insn_size;
194 mci.address = offset;
195
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800196 if (handle->detail) {
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800197 // save all the information for non-detailed mode
198 mci.pub_insn.address = offset;
199 mci.pub_insn.size = insn_size;
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800200 }
201
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800202 handle->printer(&mci, &ss, handle->printer_info);
203
Joxean114df0e2013-12-04 07:11:32 +0100204 fill_insn(handle, insn, ss.buffer, &mci, handle->post_printer, buffer);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800205
206 c++;
207 insn++;
208 buffer += insn_size;
209 size -= insn_size;
210 offset += insn_size;
211
Nguyen Anh Quynh9a0dbab2013-12-15 22:25:58 +0800212 if (c == count)
213 return c;
Nguyen Anh Quynh8f13f3c2013-12-04 22:57:04 +0800214 } else
Nguyen Anh Quynh9a0dbab2013-12-15 22:25:58 +0800215 // face a broken instruction? then we stop here
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800216 return c;
217 }
218
219 return c;
220}
221
222// dynamicly allocate memory to contain disasm insn
223// NOTE: caller must free() the allocated memory itself to avoid memory leaking
pancakef0e4eed2013-12-11 22:14:42 +0100224size_t cs_disasm_dyn(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, size_t count, cs_insn **insn)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800225{
226 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
227 MCInst mci;
228 uint16_t insn_size;
Nguyen Anh Quynhb42a6572013-11-29 17:40:07 +0800229 size_t c = 0, f = 0;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800230 cs_insn insn_cache[64];
231 void *total = NULL;
Nguyen Anh Quynhb42a6572013-11-29 17:40:07 +0800232 size_t total_size = 0;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800233
234 if (!handle) {
235 // FIXME: how to handle this case:
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800236 // handle->errnum = CS_ERR_HANDLE;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800237 return 0;
238 }
239
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800240 handle->errnum = CS_ERR_OK;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800241
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800242 memset(insn_cache, 0, sizeof(insn_cache));
243
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800244 while (size > 0) {
danghvu2b192962013-12-19 22:40:28 -0600245 MCInst_Init(&mci);
Nguyen Anh Quynh1f449282013-12-15 14:04:59 +0800246 mci.detail = handle->detail;
247 mci.mode = handle->mode;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800248
249 bool r = handle->disasm(ud, buffer, size, &mci, &insn_size, offset, handle->getinsn_info);
250 if (r) {
251 SStream ss;
252 SStream_Init(&ss);
253
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800254 // relative branches need to know the address & size of current insn
255 mci.insn_size = insn_size;
256 mci.address = offset;
257
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800258 if (handle->detail) {
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800259 // save all the information for non-detailed mode
260 mci.pub_insn.address = offset;
261 mci.pub_insn.size = insn_size;
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800262 }
263
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800264 handle->printer(&mci, &ss, handle->printer_info);
265
Joxean114df0e2013-12-04 07:11:32 +0100266 fill_insn(handle, &insn_cache[f], ss.buffer, &mci, handle->post_printer, buffer);
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800267
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800268 f++;
269
270 if (f == ARR_SIZE(insn_cache)) {
271 // resize total to contain newly disasm insns
272 total_size += sizeof(insn_cache);
273 void *tmp = realloc(total, total_size);
274 if (tmp == NULL) { // insufficient memory
275 free(total);
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800276 handle->errnum = CS_ERR_MEM;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800277 return 0;
278 }
279
280 total = tmp;
281 memcpy(total + total_size - sizeof(insn_cache), insn_cache, sizeof(insn_cache));
282 // reset f back to 0
283 f = 0;
284 }
285
286 c++;
287 buffer += insn_size;
288 size -= insn_size;
289 offset += insn_size;
290
291 if (count > 0 && c == count)
292 break;
Nguyen Anh Quynh8f13f3c2013-12-04 22:57:04 +0800293 } else {
294 // encounter a broken instruction
295 // XXX: TODO: JOXEAN continue here
296 break;
297 }
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800298 }
299
300 if (f) {
301 // resize total to contain newly disasm insns
302 void *tmp = realloc(total, total_size + f * sizeof(insn_cache[0]));
303 if (tmp == NULL) { // insufficient memory
304 free(total);
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800305 handle->errnum = CS_ERR_MEM;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800306 return 0;
307 }
308
309 total = tmp;
310 memcpy(total + total_size, insn_cache, f * sizeof(insn_cache[0]));
311 }
312
313 *insn = total;
314
315 return c;
316}
317
318void cs_free(void *m)
319{
320 free(m);
321}
322
323// return friendly name of regiser in a string
pancakef0e4eed2013-12-11 22:14:42 +0100324const char *cs_reg_name(csh ud, unsigned int reg)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800325{
326 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
327
328 if (!handle || handle->reg_name == NULL) {
329 return NULL;
330 }
331
Nguyen Anh Quynha253c7a2013-12-09 10:26:18 +0800332 return handle->reg_name(ud, reg);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800333}
334
pancakef0e4eed2013-12-11 22:14:42 +0100335const char *cs_insn_name(csh ud, unsigned int insn)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800336{
337 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
338
339 if (!handle || handle->insn_name == NULL) {
340 return NULL;
341 }
342
Nguyen Anh Quynha253c7a2013-12-09 10:26:18 +0800343 return handle->insn_name(ud, insn);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800344}
345
Nguyen Anh Quynh70083562013-12-20 22:02:20 +0800346static bool arr_exist(unsigned char *arr, unsigned char max, unsigned int id)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800347{
348 int i;
349
350 for (i = 0; i < max; i++) {
351 if (arr[i] == id)
352 return true;
353 }
354
355 return false;
356}
357
358bool cs_insn_group(csh handle, cs_insn *insn, unsigned int group_id)
359{
360 if (!handle)
361 return false;
362
Nguyen Anh Quynhf35e2ad2013-12-03 11:10:26 +0800363 return arr_exist(insn->groups, insn->groups_count, group_id);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800364}
365
366bool cs_reg_read(csh handle, cs_insn *insn, unsigned int reg_id)
367{
368 if (!handle)
369 return false;
370
Nguyen Anh Quynhf35e2ad2013-12-03 11:10:26 +0800371 return arr_exist(insn->regs_read, insn->regs_read_count, reg_id);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800372}
373
374bool cs_reg_write(csh handle, cs_insn *insn, unsigned int reg_id)
375{
376 if (!handle)
377 return false;
378
Nguyen Anh Quynhf35e2ad2013-12-03 11:10:26 +0800379 return arr_exist(insn->regs_write, insn->regs_write_count, reg_id);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800380}
381
382int cs_op_count(csh ud, cs_insn *insn, unsigned int op_type)
383{
384 if (!ud)
385 return -1;
386
387 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
388 unsigned int count = 0, i;
389
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800390 handle->errnum = CS_ERR_OK;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800391
392 switch (handle->arch) {
393 default:
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800394 handle->errnum = CS_ERR_HANDLE;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800395 return -1;
396 case CS_ARCH_ARM:
397 for (i = 0; i < insn->arm.op_count; i++)
398 if (insn->arm.operands[i].type == op_type)
399 count++;
400 break;
401 case CS_ARCH_ARM64:
402 for (i = 0; i < insn->arm64.op_count; i++)
403 if (insn->arm64.operands[i].type == op_type)
404 count++;
405 break;
406 case CS_ARCH_X86:
407 for (i = 0; i < insn->x86.op_count; i++)
408 if (insn->x86.operands[i].type == op_type)
409 count++;
410 break;
411 case CS_ARCH_MIPS:
412 for (i = 0; i < insn->mips.op_count; i++)
413 if (insn->mips.operands[i].type == op_type)
414 count++;
415 break;
416 }
417
418 return count;
419}
420
421int cs_op_index(csh ud, cs_insn *insn, unsigned int op_type,
422 unsigned int post)
423{
424 if (!ud)
425 return -1;
426
427 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
428 unsigned int count = 0, i;
429
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800430 handle->errnum = CS_ERR_OK;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800431
432 switch (handle->arch) {
433 default:
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800434 handle->errnum = CS_ERR_HANDLE;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800435 return -1;
436 case CS_ARCH_ARM:
437 for (i = 0; i < insn->arm.op_count; i++) {
438 if (insn->arm.operands[i].type == op_type)
439 count++;
440 if (count == post)
441 return i;
442 }
443 break;
444 case CS_ARCH_ARM64:
445 for (i = 0; i < insn->arm64.op_count; i++) {
446 if (insn->arm64.operands[i].type == op_type)
447 count++;
448 if (count == post)
449 return i;
450 }
451 break;
452 case CS_ARCH_X86:
453 for (i = 0; i < insn->x86.op_count; i++) {
454 if (insn->x86.operands[i].type == op_type)
455 count++;
456 if (count == post)
457 return i;
458 }
459 break;
460 case CS_ARCH_MIPS:
461 for (i = 0; i < insn->mips.op_count; i++) {
462 if (insn->mips.operands[i].type == op_type)
463 count++;
464 if (count == post)
465 return i;
466 }
467 break;
468 }
469
470 return -1;
471}