Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 1 | /* Capstone Disassembler Engine */ |
| 2 | /* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */ |
| 3 | |
| 4 | #include <stdio.h> |
| 5 | #include <stdlib.h> |
| 6 | #include <string.h> |
| 7 | #include <capstone.h> |
| 8 | |
| 9 | #include "cs_priv.h" |
| 10 | |
| 11 | #include "MCRegisterInfo.h" |
| 12 | |
| 13 | #include "arch/X86/X86Disassembler.h" |
| 14 | #include "arch/X86/X86InstPrinter.h" |
| 15 | #include "arch/X86/mapping.h" |
| 16 | |
| 17 | #include "arch/ARM/ARMDisassembler.h" |
| 18 | #include "arch/ARM/ARMInstPrinter.h" |
| 19 | #include "arch/ARM/mapping.h" |
| 20 | |
| 21 | #include "arch/Mips/MipsDisassembler.h" |
| 22 | #include "arch/Mips/MipsInstPrinter.h" |
| 23 | #include "arch/Mips/mapping.h" |
| 24 | |
| 25 | #include "arch/AArch64/AArch64Disassembler.h" |
| 26 | #include "arch/AArch64/AArch64InstPrinter.h" |
| 27 | #include "arch/AArch64/mapping.h" |
| 28 | |
| 29 | #include "utils.h" |
| 30 | |
| 31 | #define VERSION_MAJOR 1 |
| 32 | #define VERSION_MINOR 2 |
| 33 | |
| 34 | cs_err cs_errno(csh handle) |
| 35 | { |
| 36 | if (!handle) |
| 37 | return CS_ERR_CSH; |
| 38 | |
| 39 | cs_struct *ud = (cs_struct *)(uintptr_t)handle; |
| 40 | |
Nguyen Anh Quynh | 3eb9ac9 | 2013-11-27 15:24:47 +0800 | [diff] [blame^] | 41 | return ud->errnum; |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 42 | } |
| 43 | |
| 44 | void cs_version(int *major, int *minor) |
| 45 | { |
| 46 | *major = VERSION_MAJOR; |
| 47 | *minor = VERSION_MINOR; |
| 48 | } |
| 49 | |
| 50 | cs_err cs_open(cs_arch arch, cs_mode mode, csh *handle) |
| 51 | { |
| 52 | cs_struct *ud; |
| 53 | |
| 54 | ud = calloc(1, sizeof(*ud)); |
| 55 | if (!ud) { |
| 56 | // memory insufficient |
| 57 | return CS_ERR_MEM; |
| 58 | } |
| 59 | |
Nguyen Anh Quynh | 3eb9ac9 | 2013-11-27 15:24:47 +0800 | [diff] [blame^] | 60 | ud->errnum = CS_ERR_OK; |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 61 | ud->arch = arch; |
| 62 | ud->mode = mode; |
| 63 | ud->big_endian = mode & CS_MODE_BIG_ENDIAN; |
| 64 | ud->reg_name = NULL; |
| 65 | |
| 66 | switch (ud->arch) { |
| 67 | case CS_ARCH_X86: |
| 68 | if (ud->mode & CS_MODE_SYNTAX_ATT) |
| 69 | ud->printer = X86_ATT_printInst; |
| 70 | else |
| 71 | ud->printer = X86_Intel_printInst; |
| 72 | ud->printer_info = NULL; |
| 73 | ud->disasm = X86_getInstruction; |
| 74 | ud->reg_name = X86_reg_name; |
| 75 | ud->insn_id = X86_get_insn_id; |
| 76 | ud->insn_name = X86_insn_name; |
| 77 | break; |
| 78 | case CS_ARCH_ARM: { |
| 79 | MCRegisterInfo *mri = malloc(sizeof(*mri)); |
| 80 | |
| 81 | ARM_init(mri); |
| 82 | |
| 83 | ud->printer = ARM_printInst; |
| 84 | ud->printer_info = mri; |
| 85 | ud->reg_name = ARM_reg_name; |
| 86 | ud->insn_id = ARM_get_insn_id; |
| 87 | ud->insn_name = ARM_insn_name; |
| 88 | ud->post_printer = ARM_post_printer; |
| 89 | |
| 90 | if (ud->mode & CS_MODE_THUMB) |
| 91 | ud->disasm = Thumb_getInstruction; |
| 92 | else |
| 93 | ud->disasm = ARM_getInstruction; |
| 94 | break; |
| 95 | } |
| 96 | case CS_ARCH_MIPS: { |
| 97 | MCRegisterInfo *mri = malloc(sizeof(*mri)); |
| 98 | |
| 99 | Mips_init(mri); |
| 100 | ud->printer = Mips_printInst; |
| 101 | ud->printer_info = mri; |
| 102 | ud->getinsn_info = mri; |
| 103 | ud->reg_name = Mips_reg_name; |
| 104 | ud->insn_id = Mips_get_insn_id; |
| 105 | ud->insn_name = Mips_insn_name; |
| 106 | |
| 107 | if (ud->mode & CS_MODE_32) |
| 108 | ud->disasm = Mips_getInstruction; |
| 109 | else |
| 110 | ud->disasm = Mips64_getInstruction; |
| 111 | |
| 112 | if (ud->mode & CS_MODE_MICRO) |
| 113 | ud->micro_mips = true; |
| 114 | |
| 115 | break; |
| 116 | } |
| 117 | case CS_ARCH_ARM64: { |
| 118 | MCRegisterInfo *mri = malloc(sizeof(*mri)); |
| 119 | |
| 120 | AArch64_init(mri); |
| 121 | ud->printer = AArch64_printInst; |
| 122 | ud->printer_info = mri; |
| 123 | ud->getinsn_info = mri; |
| 124 | ud->disasm = AArch64_getInstruction; |
| 125 | ud->reg_name = AArch64_reg_name; |
| 126 | ud->insn_id = AArch64_get_insn_id; |
| 127 | ud->insn_name = AArch64_insn_name; |
| 128 | ud->post_printer = AArch64_post_printer; |
| 129 | break; |
| 130 | } |
| 131 | default: // unsupported architecture |
| 132 | free(ud); |
| 133 | return CS_ERR_ARCH; |
| 134 | } |
| 135 | |
| 136 | *handle = (uintptr_t)ud; |
| 137 | |
| 138 | return CS_ERR_OK; |
| 139 | } |
| 140 | |
| 141 | cs_err cs_close(csh handle) |
| 142 | { |
| 143 | if (!handle) |
| 144 | return CS_ERR_CSH; |
| 145 | |
| 146 | cs_struct *ud = (cs_struct *)(uintptr_t)handle; |
| 147 | |
| 148 | switch (ud->arch) { |
| 149 | case CS_ARCH_X86: |
| 150 | break; |
| 151 | case CS_ARCH_ARM: |
| 152 | case CS_ARCH_MIPS: |
| 153 | case CS_ARCH_ARM64: |
| 154 | free(ud->printer_info); |
| 155 | break; |
| 156 | default: // unsupported architecture |
| 157 | return CS_ERR_HANDLE; |
| 158 | } |
| 159 | |
| 160 | memset(ud, 0, sizeof(*ud)); |
| 161 | free(ud); |
| 162 | |
| 163 | return CS_ERR_OK; |
| 164 | } |
| 165 | |
| 166 | // fill insn with mnemonic & operands info |
| 167 | static void fill_insn(cs_struct *handle, cs_insn *insn, char *buffer, MCInst *mci, |
| 168 | PostPrinter_t printer) |
| 169 | { |
| 170 | memcpy(insn, &mci->pub_insn, sizeof(*insn)); |
| 171 | |
| 172 | // map internal instruction opcode to public insn ID |
| 173 | if (handle->insn_id) |
| 174 | handle->insn_id(insn, MCInst_getOpcode(mci)); |
| 175 | |
| 176 | if (printer) |
| 177 | printer(insn->id, insn, buffer); |
| 178 | |
| 179 | // fill in mnemonic & operands |
| 180 | char *tab = strchr(buffer, '\t'); |
| 181 | if (tab) { |
| 182 | *tab = '\0'; |
| 183 | strncpy(insn->op_str, tab + 1, sizeof(insn->op_str) - 1); |
| 184 | insn->op_str[sizeof(insn->op_str) - 1] = '\0'; |
| 185 | } else |
| 186 | insn->op_str[0] = '\0'; |
| 187 | |
| 188 | strncpy(insn->mnemonic, buffer, sizeof(insn->mnemonic) - 1); |
| 189 | insn->mnemonic[sizeof(insn->mnemonic) - 1] = '\0'; |
| 190 | } |
| 191 | |
| 192 | uint64_t cs_disasm(csh ud, char *buffer, uint64_t size, uint64_t offset, uint64_t count, cs_insn *insn) |
| 193 | { |
| 194 | cs_struct *handle = (cs_struct *)(uintptr_t)ud; |
| 195 | MCInst mci; |
| 196 | uint16_t insn_size; |
| 197 | uint64_t c = 0; |
| 198 | |
| 199 | if (!handle) { |
| 200 | // FIXME: handle this case? |
Nguyen Anh Quynh | 3eb9ac9 | 2013-11-27 15:24:47 +0800 | [diff] [blame^] | 201 | // handle->errnum = CS_ERR_HANDLE; |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 202 | return 0; |
| 203 | } |
| 204 | |
Nguyen Anh Quynh | 3eb9ac9 | 2013-11-27 15:24:47 +0800 | [diff] [blame^] | 205 | handle->errnum = CS_ERR_OK; |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 206 | |
| 207 | while (size > 0) { |
| 208 | MCInst_Init(&mci); |
| 209 | |
| 210 | bool r = handle->disasm(ud, buffer, size, &mci, &insn_size, offset, handle->getinsn_info); |
| 211 | if (r) { |
| 212 | SStream ss; |
| 213 | SStream_Init(&ss); |
| 214 | |
| 215 | mci.pub_insn.size = insn_size; |
| 216 | mci.pub_insn.address = offset; |
| 217 | mci.mode = handle->mode; |
| 218 | handle->printer(&mci, &ss, handle->printer_info); |
| 219 | |
| 220 | fill_insn(handle, insn, ss.buffer, &mci, handle->post_printer); |
| 221 | |
| 222 | c++; |
| 223 | insn++; |
| 224 | buffer += insn_size; |
| 225 | size -= insn_size; |
| 226 | offset += insn_size; |
| 227 | |
| 228 | if (count > 0) { |
| 229 | if (c == count) |
| 230 | return c; |
| 231 | } |
| 232 | } else // face a broken instruction? |
| 233 | return c; |
| 234 | } |
| 235 | |
| 236 | return c; |
| 237 | } |
| 238 | |
| 239 | // dynamicly allocate memory to contain disasm insn |
| 240 | // NOTE: caller must free() the allocated memory itself to avoid memory leaking |
| 241 | uint64_t cs_disasm_dyn(csh ud, char *buffer, uint64_t size, uint64_t offset, uint64_t count, cs_insn **insn) |
| 242 | { |
| 243 | cs_struct *handle = (cs_struct *)(uintptr_t)ud; |
| 244 | MCInst mci; |
| 245 | uint16_t insn_size; |
| 246 | uint64_t c = 0, f = 0; |
| 247 | cs_insn insn_cache[64]; |
| 248 | void *total = NULL; |
| 249 | uint64_t total_size = 0; |
| 250 | |
| 251 | if (!handle) { |
| 252 | // FIXME: how to handle this case: |
Nguyen Anh Quynh | 3eb9ac9 | 2013-11-27 15:24:47 +0800 | [diff] [blame^] | 253 | // handle->errnum = CS_ERR_HANDLE; |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 254 | return 0; |
| 255 | } |
| 256 | |
Nguyen Anh Quynh | 3eb9ac9 | 2013-11-27 15:24:47 +0800 | [diff] [blame^] | 257 | handle->errnum = CS_ERR_OK; |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 258 | |
| 259 | while (size > 0) { |
| 260 | MCInst_Init(&mci); |
| 261 | |
| 262 | bool r = handle->disasm(ud, buffer, size, &mci, &insn_size, offset, handle->getinsn_info); |
| 263 | if (r) { |
| 264 | SStream ss; |
| 265 | SStream_Init(&ss); |
| 266 | |
| 267 | mci.pub_insn.size = insn_size; |
| 268 | mci.pub_insn.address = offset; |
| 269 | mci.mode = handle->mode; |
| 270 | handle->printer(&mci, &ss, handle->printer_info); |
| 271 | |
| 272 | fill_insn(handle, &insn_cache[f], ss.buffer, &mci, handle->post_printer); |
| 273 | f++; |
| 274 | |
| 275 | if (f == ARR_SIZE(insn_cache)) { |
| 276 | // resize total to contain newly disasm insns |
| 277 | total_size += sizeof(insn_cache); |
| 278 | void *tmp = realloc(total, total_size); |
| 279 | if (tmp == NULL) { // insufficient memory |
| 280 | free(total); |
Nguyen Anh Quynh | 3eb9ac9 | 2013-11-27 15:24:47 +0800 | [diff] [blame^] | 281 | handle->errnum = CS_ERR_MEM; |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 282 | return 0; |
| 283 | } |
| 284 | |
| 285 | total = tmp; |
| 286 | memcpy(total + total_size - sizeof(insn_cache), insn_cache, sizeof(insn_cache)); |
| 287 | // reset f back to 0 |
| 288 | f = 0; |
| 289 | } |
| 290 | |
| 291 | c++; |
| 292 | buffer += insn_size; |
| 293 | size -= insn_size; |
| 294 | offset += insn_size; |
| 295 | |
| 296 | if (count > 0 && c == count) |
| 297 | break; |
| 298 | } else // encounter a broken instruction |
| 299 | break; |
| 300 | } |
| 301 | |
| 302 | if (f) { |
| 303 | // resize total to contain newly disasm insns |
| 304 | void *tmp = realloc(total, total_size + f * sizeof(insn_cache[0])); |
| 305 | if (tmp == NULL) { // insufficient memory |
| 306 | free(total); |
Nguyen Anh Quynh | 3eb9ac9 | 2013-11-27 15:24:47 +0800 | [diff] [blame^] | 307 | handle->errnum = CS_ERR_MEM; |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 308 | return 0; |
| 309 | } |
| 310 | |
| 311 | total = tmp; |
| 312 | memcpy(total + total_size, insn_cache, f * sizeof(insn_cache[0])); |
| 313 | } |
| 314 | |
| 315 | *insn = total; |
| 316 | |
| 317 | return c; |
| 318 | } |
| 319 | |
| 320 | void cs_free(void *m) |
| 321 | { |
| 322 | free(m); |
| 323 | } |
| 324 | |
| 325 | // return friendly name of regiser in a string |
| 326 | char *cs_reg_name(csh ud, unsigned int reg) |
| 327 | { |
| 328 | cs_struct *handle = (cs_struct *)(uintptr_t)ud; |
| 329 | |
| 330 | if (!handle || handle->reg_name == NULL) { |
| 331 | return NULL; |
| 332 | } |
| 333 | |
| 334 | // x86 flags register must be specially handled |
| 335 | if ((handle->arch == CS_ARCH_X86) && (reg == X86_REG_FLAGS)) { |
| 336 | if (handle->mode & CS_MODE_64) |
| 337 | return "rflags"; |
| 338 | if (handle->mode & CS_MODE_32) |
| 339 | return "eflags"; |
| 340 | if (handle->mode & CS_MODE_16) |
| 341 | return "flags"; |
| 342 | } |
| 343 | |
| 344 | return handle->reg_name(reg); |
| 345 | } |
| 346 | |
| 347 | char *cs_insn_name(csh ud, unsigned int insn) |
| 348 | { |
| 349 | cs_struct *handle = (cs_struct *)(uintptr_t)ud; |
| 350 | |
| 351 | if (!handle || handle->insn_name == NULL) { |
| 352 | return NULL; |
| 353 | } |
| 354 | |
| 355 | return handle->insn_name(insn); |
| 356 | } |
| 357 | |
| 358 | static bool arr_exist(unsigned int *arr, int max, unsigned int id) |
| 359 | { |
| 360 | int i; |
| 361 | |
| 362 | for (i = 0; i < max; i++) { |
| 363 | if (arr[i] == id) |
| 364 | return true; |
| 365 | } |
| 366 | |
| 367 | return false; |
| 368 | } |
| 369 | |
| 370 | bool cs_insn_group(csh handle, cs_insn *insn, unsigned int group_id) |
| 371 | { |
| 372 | if (!handle) |
| 373 | return false; |
| 374 | |
| 375 | return arr_exist(insn->groups, ARR_SIZE(insn->groups), group_id); |
| 376 | } |
| 377 | |
| 378 | bool cs_reg_read(csh handle, cs_insn *insn, unsigned int reg_id) |
| 379 | { |
| 380 | if (!handle) |
| 381 | return false; |
| 382 | |
| 383 | return arr_exist(insn->regs_read, ARR_SIZE(insn->regs_read), reg_id); |
| 384 | } |
| 385 | |
| 386 | bool cs_reg_write(csh handle, cs_insn *insn, unsigned int reg_id) |
| 387 | { |
| 388 | if (!handle) |
| 389 | return false; |
| 390 | |
| 391 | return arr_exist(insn->regs_write, ARR_SIZE(insn->regs_write), reg_id); |
| 392 | } |
| 393 | |
| 394 | int cs_op_count(csh ud, cs_insn *insn, unsigned int op_type) |
| 395 | { |
| 396 | if (!ud) |
| 397 | return -1; |
| 398 | |
| 399 | cs_struct *handle = (cs_struct *)(uintptr_t)ud; |
| 400 | unsigned int count = 0, i; |
| 401 | |
Nguyen Anh Quynh | 3eb9ac9 | 2013-11-27 15:24:47 +0800 | [diff] [blame^] | 402 | handle->errnum = CS_ERR_OK; |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 403 | |
| 404 | switch (handle->arch) { |
| 405 | default: |
Nguyen Anh Quynh | 3eb9ac9 | 2013-11-27 15:24:47 +0800 | [diff] [blame^] | 406 | handle->errnum = CS_ERR_HANDLE; |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 407 | return -1; |
| 408 | case CS_ARCH_ARM: |
| 409 | for (i = 0; i < insn->arm.op_count; i++) |
| 410 | if (insn->arm.operands[i].type == op_type) |
| 411 | count++; |
| 412 | break; |
| 413 | case CS_ARCH_ARM64: |
| 414 | for (i = 0; i < insn->arm64.op_count; i++) |
| 415 | if (insn->arm64.operands[i].type == op_type) |
| 416 | count++; |
| 417 | break; |
| 418 | case CS_ARCH_X86: |
| 419 | for (i = 0; i < insn->x86.op_count; i++) |
| 420 | if (insn->x86.operands[i].type == op_type) |
| 421 | count++; |
| 422 | break; |
| 423 | case CS_ARCH_MIPS: |
| 424 | for (i = 0; i < insn->mips.op_count; i++) |
| 425 | if (insn->mips.operands[i].type == op_type) |
| 426 | count++; |
| 427 | break; |
| 428 | } |
| 429 | |
| 430 | return count; |
| 431 | } |
| 432 | |
| 433 | int cs_op_index(csh ud, cs_insn *insn, unsigned int op_type, |
| 434 | unsigned int post) |
| 435 | { |
| 436 | if (!ud) |
| 437 | return -1; |
| 438 | |
| 439 | cs_struct *handle = (cs_struct *)(uintptr_t)ud; |
| 440 | unsigned int count = 0, i; |
| 441 | |
Nguyen Anh Quynh | 3eb9ac9 | 2013-11-27 15:24:47 +0800 | [diff] [blame^] | 442 | handle->errnum = CS_ERR_OK; |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 443 | |
| 444 | switch (handle->arch) { |
| 445 | default: |
Nguyen Anh Quynh | 3eb9ac9 | 2013-11-27 15:24:47 +0800 | [diff] [blame^] | 446 | handle->errnum = CS_ERR_HANDLE; |
Nguyen Anh Quynh | 26ee41a | 2013-11-27 12:11:31 +0800 | [diff] [blame] | 447 | return -1; |
| 448 | case CS_ARCH_ARM: |
| 449 | for (i = 0; i < insn->arm.op_count; i++) { |
| 450 | if (insn->arm.operands[i].type == op_type) |
| 451 | count++; |
| 452 | if (count == post) |
| 453 | return i; |
| 454 | } |
| 455 | break; |
| 456 | case CS_ARCH_ARM64: |
| 457 | for (i = 0; i < insn->arm64.op_count; i++) { |
| 458 | if (insn->arm64.operands[i].type == op_type) |
| 459 | count++; |
| 460 | if (count == post) |
| 461 | return i; |
| 462 | } |
| 463 | break; |
| 464 | case CS_ARCH_X86: |
| 465 | for (i = 0; i < insn->x86.op_count; i++) { |
| 466 | if (insn->x86.operands[i].type == op_type) |
| 467 | count++; |
| 468 | if (count == post) |
| 469 | return i; |
| 470 | } |
| 471 | break; |
| 472 | case CS_ARCH_MIPS: |
| 473 | for (i = 0; i < insn->mips.op_count; i++) { |
| 474 | if (insn->mips.operands[i].type == op_type) |
| 475 | count++; |
| 476 | if (count == post) |
| 477 | return i; |
| 478 | } |
| 479 | break; |
| 480 | } |
| 481 | |
| 482 | return -1; |
| 483 | } |