blob: 6999ced564936c8b7ae2b1076511faf314edaf37 [file] [log] [blame]
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +08001/* Capstone Disassembler Engine */
2/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */
3
4#include <stdio.h>
5#include <stdlib.h>
6#include <string.h>
7#include <capstone.h>
8
9#include "cs_priv.h"
10
11#include "MCRegisterInfo.h"
12
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080013#include "utils.h"
14
Nguyen Anh Quynhf1851802013-12-21 12:16:47 +080015void (*init_arch[MAX_ARCH]) (cs_struct *);
16cs_err (*option_arch[MAX_ARCH]) (cs_struct*, cs_opt_type, size_t value);
17
Nguyen Anh Quynh39a42ed2013-12-22 10:40:58 +080018unsigned int all_arch = 0;
Nguyen Anh Quynhf1851802013-12-21 12:16:47 +080019
Nguyen Anh Quynh39a42ed2013-12-22 10:40:58 +080020unsigned int cs_version(int *major, int *minor)
Nguyen Anh Quynh36df4bb2013-12-10 13:31:20 +080021{
22 *major = CS_API_MAJOR;
23 *minor = CS_API_MINOR;
Nguyen Anh Quynh39a42ed2013-12-22 10:40:58 +080024
25 return (CS_API_MAJOR << 8) + CS_API_MINOR;
26}
27
28bool cs_support(cs_arch arch)
29{
30 if (arch == CS_ARCH_ALL)
31 return all_arch == ((1 << CS_ARCH_ARM) | (1 << CS_ARCH_ARM64) |
32 (1 << CS_ARCH_MIPS) | (1 << CS_ARCH_X86));
33
34 return all_arch & (1 << arch);
Nguyen Anh Quynh36df4bb2013-12-10 13:31:20 +080035}
36
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080037cs_err cs_errno(csh handle)
38{
39 if (!handle)
40 return CS_ERR_CSH;
41
42 cs_struct *ud = (cs_struct *)(uintptr_t)handle;
43
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +080044 return ud->errnum;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080045}
46
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080047cs_err cs_open(cs_arch arch, cs_mode mode, csh *handle)
48{
danghvu2b192962013-12-19 22:40:28 -060049 cs_struct *ud;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080050
danghvu2b192962013-12-19 22:40:28 -060051 ud = calloc(1, sizeof(*ud));
52 if (!ud) {
53 // memory insufficient
54 return CS_ERR_MEM;
55 }
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080056
danghvu2b192962013-12-19 22:40:28 -060057 ud->errnum = CS_ERR_OK;
58 ud->arch = arch;
59 ud->mode = mode;
60 ud->big_endian = mode & CS_MODE_BIG_ENDIAN;
61 ud->reg_name = NULL;
62 ud->detail = CS_OPT_ON; // by default break instruction into details
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080063
Nguyen Anh Quynh7d02c922013-12-21 09:59:31 +080064 if (init_arch[ud->arch])
65 init_arch[ud->arch](ud);
danghvu0b6ea042013-12-19 23:07:26 -060066 else
Nguyen Anh Quynh7d02c922013-12-21 09:59:31 +080067 return CS_ERR_HANDLE;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080068
danghvu2b192962013-12-19 22:40:28 -060069 *handle = (uintptr_t)ud;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080070
danghvu2b192962013-12-19 22:40:28 -060071 return CS_ERR_OK;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +080072}
73
74cs_err cs_close(csh handle)
75{
76 if (!handle)
77 return CS_ERR_CSH;
78
79 cs_struct *ud = (cs_struct *)(uintptr_t)handle;
80
81 switch (ud->arch) {
82 case CS_ARCH_X86:
83 break;
84 case CS_ARCH_ARM:
85 case CS_ARCH_MIPS:
86 case CS_ARCH_ARM64:
87 free(ud->printer_info);
88 break;
89 default: // unsupported architecture
90 return CS_ERR_HANDLE;
91 }
92
93 memset(ud, 0, sizeof(*ud));
94 free(ud);
95
96 return CS_ERR_OK;
97}
98
Nguyen Anh Quynh8f13f3c2013-12-04 22:57:04 +080099#define MIN(x, y) ((x) < (y) ? (x) : (y))
100
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800101// fill insn with mnemonic & operands info
102static void fill_insn(cs_struct *handle, cs_insn *insn, char *buffer, MCInst *mci,
pancakef0e4eed2013-12-11 22:14:42 +0100103 PostPrinter_t printer, const uint8_t *code)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800104{
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800105 if (handle->detail) {
106 memcpy(insn, &mci->pub_insn, sizeof(*insn));
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800107
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800108 // fill the instruction bytes
109 memcpy(insn->bytes, code, MIN(sizeof(insn->bytes), insn->size));
Nguyen Anh Quynhad61c492013-11-30 16:23:31 +0800110
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800111 } else {
112 insn->address = mci->address;
113 insn->size = mci->insn_size;
114 }
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800115
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800116 // map internal instruction opcode to public insn ID
117 if (handle->insn_id)
118 handle->insn_id(insn, MCInst_getOpcode(mci), handle->detail);
119
120 // alias instruction might have ID saved in OpcodePub
121 if (MCInst_getOpcodePub(mci))
122 insn->id = MCInst_getOpcodePub(mci);
123
124 // post printer handles some corner cases (hacky)
125 if (printer)
126 printer((csh)handle, insn, buffer);
127
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800128 // fill in mnemonic & operands
Nguyen Anh Quynhdefb9bc2013-12-12 14:00:12 +0800129 // find first space or tab
130 char *sp = buffer;
131 for (sp = buffer; *sp; sp++)
132 if (*sp == ' '||*sp == '\t')
133 break;
134 if (*sp) {
135 *sp = '\0';
Nguyen Anh Quynh86dc3932013-12-12 14:43:39 +0800136 // find the next non-space char
137 sp++;
138 for (; ((*sp == ' ') || (*sp == '\t')); sp++);
139 strncpy(insn->op_str, sp, sizeof(insn->op_str) - 1);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800140 insn->op_str[sizeof(insn->op_str) - 1] = '\0';
141 } else
142 insn->op_str[0] = '\0';
143
144 strncpy(insn->mnemonic, buffer, sizeof(insn->mnemonic) - 1);
145 insn->mnemonic[sizeof(insn->mnemonic) - 1] = '\0';
146}
147
Nguyen Anh Quynhda8adad2013-12-04 09:44:07 +0800148cs_err cs_option(csh ud, cs_opt_type type, size_t value)
Nguyen Anh Quynh01aba002013-12-03 21:00:09 +0800149{
danghvu2b192962013-12-19 22:40:28 -0600150 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
151 if (!handle)
152 return CS_ERR_CSH;
Nguyen Anh Quynh01aba002013-12-03 21:00:09 +0800153
danghvu0b6ea042013-12-19 23:07:26 -0600154 if (type == CS_OPT_DETAIL) {
Nguyen Anh Quynh7d02c922013-12-21 09:59:31 +0800155 handle->detail = value;
156 return CS_ERR_OK;
157 }
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800158
Nguyen Anh Quynh39a42ed2013-12-22 10:40:58 +0800159 return option_arch[handle->arch](handle, type, value);
Nguyen Anh Quynh01aba002013-12-03 21:00:09 +0800160}
161
pancakef0e4eed2013-12-11 22:14:42 +0100162size_t cs_disasm(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, size_t count, cs_insn *insn)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800163{
164 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
165 MCInst mci;
166 uint16_t insn_size;
Nguyen Anh Quynhb42a6572013-11-29 17:40:07 +0800167 size_t c = 0;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800168
169 if (!handle) {
170 // FIXME: handle this case?
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800171 // handle->errnum = CS_ERR_HANDLE;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800172 return 0;
173 }
174
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800175 handle->errnum = CS_ERR_OK;
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800176 memset(insn, 0, count * sizeof(*insn));
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800177
178 while (size > 0) {
danghvu2b192962013-12-19 22:40:28 -0600179 MCInst_Init(&mci);
Nguyen Anh Quynh1f449282013-12-15 14:04:59 +0800180 mci.detail = handle->detail;
181 mci.mode = handle->mode;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800182
183 bool r = handle->disasm(ud, buffer, size, &mci, &insn_size, offset, handle->getinsn_info);
184 if (r) {
185 SStream ss;
186 SStream_Init(&ss);
187
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800188 // relative branches need to know the address & size of current insn
189 mci.insn_size = insn_size;
190 mci.address = offset;
191
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800192 if (handle->detail) {
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800193 // save all the information for non-detailed mode
194 mci.pub_insn.address = offset;
195 mci.pub_insn.size = insn_size;
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800196 }
197
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800198 handle->printer(&mci, &ss, handle->printer_info);
199
Joxean114df0e2013-12-04 07:11:32 +0100200 fill_insn(handle, insn, ss.buffer, &mci, handle->post_printer, buffer);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800201
202 c++;
203 insn++;
204 buffer += insn_size;
205 size -= insn_size;
206 offset += insn_size;
207
Nguyen Anh Quynh9a0dbab2013-12-15 22:25:58 +0800208 if (c == count)
209 return c;
Nguyen Anh Quynh8f13f3c2013-12-04 22:57:04 +0800210 } else
Nguyen Anh Quynh9a0dbab2013-12-15 22:25:58 +0800211 // face a broken instruction? then we stop here
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800212 return c;
213 }
214
215 return c;
216}
217
218// dynamicly allocate memory to contain disasm insn
219// NOTE: caller must free() the allocated memory itself to avoid memory leaking
pancakef0e4eed2013-12-11 22:14:42 +0100220size_t cs_disasm_dyn(csh ud, const uint8_t *buffer, size_t size, uint64_t offset, size_t count, cs_insn **insn)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800221{
222 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
223 MCInst mci;
224 uint16_t insn_size;
Nguyen Anh Quynhb42a6572013-11-29 17:40:07 +0800225 size_t c = 0, f = 0;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800226 cs_insn insn_cache[64];
227 void *total = NULL;
Nguyen Anh Quynhb42a6572013-11-29 17:40:07 +0800228 size_t total_size = 0;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800229
230 if (!handle) {
231 // FIXME: how to handle this case:
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800232 // handle->errnum = CS_ERR_HANDLE;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800233 return 0;
234 }
235
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800236 handle->errnum = CS_ERR_OK;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800237
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800238 memset(insn_cache, 0, sizeof(insn_cache));
239
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800240 while (size > 0) {
danghvu2b192962013-12-19 22:40:28 -0600241 MCInst_Init(&mci);
Nguyen Anh Quynh1f449282013-12-15 14:04:59 +0800242 mci.detail = handle->detail;
243 mci.mode = handle->mode;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800244
245 bool r = handle->disasm(ud, buffer, size, &mci, &insn_size, offset, handle->getinsn_info);
246 if (r) {
247 SStream ss;
248 SStream_Init(&ss);
249
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800250 // relative branches need to know the address & size of current insn
251 mci.insn_size = insn_size;
252 mci.address = offset;
253
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800254 if (handle->detail) {
Nguyen Anh Quynh4d3e8522013-12-14 10:45:09 +0800255 // save all the information for non-detailed mode
256 mci.pub_insn.address = offset;
257 mci.pub_insn.size = insn_size;
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800258 }
259
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800260 handle->printer(&mci, &ss, handle->printer_info);
261
Joxean114df0e2013-12-04 07:11:32 +0100262 fill_insn(handle, &insn_cache[f], ss.buffer, &mci, handle->post_printer, buffer);
Nguyen Anh Quynha209e672013-12-14 00:23:41 +0800263
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800264 f++;
265
266 if (f == ARR_SIZE(insn_cache)) {
267 // resize total to contain newly disasm insns
268 total_size += sizeof(insn_cache);
269 void *tmp = realloc(total, total_size);
270 if (tmp == NULL) { // insufficient memory
271 free(total);
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800272 handle->errnum = CS_ERR_MEM;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800273 return 0;
274 }
275
276 total = tmp;
277 memcpy(total + total_size - sizeof(insn_cache), insn_cache, sizeof(insn_cache));
278 // reset f back to 0
279 f = 0;
280 }
281
282 c++;
283 buffer += insn_size;
284 size -= insn_size;
285 offset += insn_size;
286
287 if (count > 0 && c == count)
288 break;
Nguyen Anh Quynh8f13f3c2013-12-04 22:57:04 +0800289 } else {
290 // encounter a broken instruction
291 // XXX: TODO: JOXEAN continue here
292 break;
293 }
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800294 }
295
296 if (f) {
297 // resize total to contain newly disasm insns
298 void *tmp = realloc(total, total_size + f * sizeof(insn_cache[0]));
299 if (tmp == NULL) { // insufficient memory
300 free(total);
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800301 handle->errnum = CS_ERR_MEM;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800302 return 0;
303 }
304
305 total = tmp;
306 memcpy(total + total_size, insn_cache, f * sizeof(insn_cache[0]));
307 }
308
309 *insn = total;
310
311 return c;
312}
313
314void cs_free(void *m)
315{
316 free(m);
317}
318
319// return friendly name of regiser in a string
pancakef0e4eed2013-12-11 22:14:42 +0100320const char *cs_reg_name(csh ud, unsigned int reg)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800321{
322 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
323
324 if (!handle || handle->reg_name == NULL) {
325 return NULL;
326 }
327
Nguyen Anh Quynha253c7a2013-12-09 10:26:18 +0800328 return handle->reg_name(ud, reg);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800329}
330
pancakef0e4eed2013-12-11 22:14:42 +0100331const char *cs_insn_name(csh ud, unsigned int insn)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800332{
333 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
334
335 if (!handle || handle->insn_name == NULL) {
336 return NULL;
337 }
338
Nguyen Anh Quynha253c7a2013-12-09 10:26:18 +0800339 return handle->insn_name(ud, insn);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800340}
341
Nguyen Anh Quynh70083562013-12-20 22:02:20 +0800342static bool arr_exist(unsigned char *arr, unsigned char max, unsigned int id)
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800343{
344 int i;
345
346 for (i = 0; i < max; i++) {
347 if (arr[i] == id)
348 return true;
349 }
350
351 return false;
352}
353
354bool cs_insn_group(csh handle, cs_insn *insn, unsigned int group_id)
355{
356 if (!handle)
357 return false;
358
Nguyen Anh Quynhf35e2ad2013-12-03 11:10:26 +0800359 return arr_exist(insn->groups, insn->groups_count, group_id);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800360}
361
362bool cs_reg_read(csh handle, cs_insn *insn, unsigned int reg_id)
363{
364 if (!handle)
365 return false;
366
Nguyen Anh Quynhf35e2ad2013-12-03 11:10:26 +0800367 return arr_exist(insn->regs_read, insn->regs_read_count, reg_id);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800368}
369
370bool cs_reg_write(csh handle, cs_insn *insn, unsigned int reg_id)
371{
372 if (!handle)
373 return false;
374
Nguyen Anh Quynhf35e2ad2013-12-03 11:10:26 +0800375 return arr_exist(insn->regs_write, insn->regs_write_count, reg_id);
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800376}
377
378int cs_op_count(csh ud, cs_insn *insn, unsigned int op_type)
379{
380 if (!ud)
381 return -1;
382
383 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
384 unsigned int count = 0, i;
385
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800386 handle->errnum = CS_ERR_OK;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800387
388 switch (handle->arch) {
389 default:
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800390 handle->errnum = CS_ERR_HANDLE;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800391 return -1;
392 case CS_ARCH_ARM:
393 for (i = 0; i < insn->arm.op_count; i++)
394 if (insn->arm.operands[i].type == op_type)
395 count++;
396 break;
397 case CS_ARCH_ARM64:
398 for (i = 0; i < insn->arm64.op_count; i++)
399 if (insn->arm64.operands[i].type == op_type)
400 count++;
401 break;
402 case CS_ARCH_X86:
403 for (i = 0; i < insn->x86.op_count; i++)
404 if (insn->x86.operands[i].type == op_type)
405 count++;
406 break;
407 case CS_ARCH_MIPS:
408 for (i = 0; i < insn->mips.op_count; i++)
409 if (insn->mips.operands[i].type == op_type)
410 count++;
411 break;
412 }
413
414 return count;
415}
416
417int cs_op_index(csh ud, cs_insn *insn, unsigned int op_type,
418 unsigned int post)
419{
420 if (!ud)
421 return -1;
422
423 cs_struct *handle = (cs_struct *)(uintptr_t)ud;
424 unsigned int count = 0, i;
425
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800426 handle->errnum = CS_ERR_OK;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800427
428 switch (handle->arch) {
429 default:
Nguyen Anh Quynh3eb9ac92013-11-27 15:24:47 +0800430 handle->errnum = CS_ERR_HANDLE;
Nguyen Anh Quynh26ee41a2013-11-27 12:11:31 +0800431 return -1;
432 case CS_ARCH_ARM:
433 for (i = 0; i < insn->arm.op_count; i++) {
434 if (insn->arm.operands[i].type == op_type)
435 count++;
436 if (count == post)
437 return i;
438 }
439 break;
440 case CS_ARCH_ARM64:
441 for (i = 0; i < insn->arm64.op_count; i++) {
442 if (insn->arm64.operands[i].type == op_type)
443 count++;
444 if (count == post)
445 return i;
446 }
447 break;
448 case CS_ARCH_X86:
449 for (i = 0; i < insn->x86.op_count; i++) {
450 if (insn->x86.operands[i].type == op_type)
451 count++;
452 if (count == post)
453 return i;
454 }
455 break;
456 case CS_ARCH_MIPS:
457 for (i = 0; i < insn->mips.op_count; i++) {
458 if (insn->mips.operands[i].type == op_type)
459 count++;
460 if (count == post)
461 return i;
462 }
463 break;
464 }
465
466 return -1;
467}