blob: 0b3efaeef537c15a87f39bcd791464235546e1a5 [file] [log] [blame]
YUHANG TANGa7b48522016-10-19 22:28:05 +08001/* Tang Yuhang <tyh000011112222@gmail.com> 2016 */
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +02002/* pancake <pancake@nopcode.org> 2017 */
3
echotyh51c8c502016-10-10 15:16:56 +08004#include <string.h>
5#include <ctype.h>
6#include <errno.h>
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +02007#include "getopt.h"
echotyh51c8c502016-10-10 15:16:56 +08008
Nguyen Anh Quynh56649982016-10-11 00:04:46 +08009#include <capstone/capstone.h>
echotyh51c8c502016-10-10 15:16:56 +080010
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +020011static struct {
12 const char *name;
13 cs_arch arch;
14 cs_mode mode;
Nguyen Anh Quynh13e06982017-07-04 16:04:53 +080015} all_archs[] = {
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +020016 { "arm", CS_ARCH_ARM, CS_MODE_ARM },
17 { "armb", CS_ARCH_ARM, CS_MODE_ARM | CS_MODE_BIG_ENDIAN },
18 { "armbe", CS_ARCH_ARM, CS_MODE_ARM | CS_MODE_BIG_ENDIAN },
19 { "arml", CS_ARCH_ARM, CS_MODE_ARM | CS_MODE_LITTLE_ENDIAN },
20 { "armle", CS_ARCH_ARM, CS_MODE_ARM | CS_MODE_LITTLE_ENDIAN },
21 { "thumb", CS_ARCH_ARM, CS_MODE_ARM | CS_MODE_THUMB },
22 { "thumbbe", CS_ARCH_ARM, CS_MODE_ARM | CS_MODE_THUMB | CS_MODE_BIG_ENDIAN },
23 { "thumble", CS_ARCH_ARM, CS_MODE_ARM | CS_MODE_THUMB | CS_MODE_LITTLE_ENDIAN },
24 { "arm64", CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN },
25 { "arm64be", CS_ARCH_ARM64, CS_MODE_BIG_ENDIAN },
26 { "mips", CS_ARCH_MIPS, CS_MODE_MIPS32 | CS_MODE_LITTLE_ENDIAN },
27 { "mipsbe", CS_ARCH_MIPS, CS_MODE_MIPS32 | CS_MODE_BIG_ENDIAN },
28 { "mips64", CS_ARCH_MIPS, CS_MODE_MIPS64 | CS_MODE_LITTLE_ENDIAN },
29 { "mips64be", CS_ARCH_MIPS, CS_MODE_MIPS64 | CS_MODE_BIG_ENDIAN },
30 { "x16", CS_ARCH_X86, CS_MODE_16 }, // CS_MODE_16
31 { "x16att", CS_ARCH_X86, CS_MODE_16 }, // CS_MODE_16 , CS_OPT_SYNTAX_ATT
32 { "x32", CS_ARCH_X86, CS_MODE_32 }, // CS_MODE_32
33 { "x32att", CS_ARCH_X86, CS_MODE_32 }, // CS_MODE_32, CS_OPT_SYNTAX_ATT
34 { "x64", CS_ARCH_X86, CS_MODE_64 }, // CS_MODE_64
35 { "x64att", CS_ARCH_X86, CS_MODE_64 }, // CS_MODE_64, CS_OPT_SYNTAX_ATT
36 { "ppc64", CS_ARCH_PPC, CS_MODE_64 | CS_MODE_LITTLE_ENDIAN },
37 { "ppc64be", CS_ARCH_PPC, CS_MODE_64 | CS_MODE_BIG_ENDIAN },
38 { "sparc", CS_ARCH_SPARC, CS_MODE_BIG_ENDIAN },
39 { "systemz", CS_ARCH_SYSZ, CS_MODE_BIG_ENDIAN },
40 { "sysz", CS_ARCH_SYSZ, CS_MODE_BIG_ENDIAN },
41 { "s390x", CS_ARCH_SYSZ, CS_MODE_BIG_ENDIAN },
42 { "xcore", CS_ARCH_XCORE, CS_MODE_BIG_ENDIAN },
43 { "m68k", CS_ARCH_M68K, CS_MODE_BIG_ENDIAN },
44 { "m68k40", CS_ARCH_M68K, CS_MODE_M68K_040 },
45 { "tms320c64x", CS_ARCH_TMS320C64X, CS_MODE_BIG_ENDIAN },
46 { "tms320c64x", CS_ARCH_TMS320C64X, CS_MODE_BIG_ENDIAN },
47 { NULL }
48};
49
YUHANG TANG9354e5e2016-10-14 17:29:56 +080050void print_insn_detail_x86(csh ud, cs_mode mode, cs_insn *ins);
YUHANG TANG08da0c02016-10-14 20:47:29 +080051void print_insn_detail_arm(csh handle, cs_insn *ins);
52void print_insn_detail_arm64(csh handle, cs_insn *ins);
53void print_insn_detail_mips(csh handle, cs_insn *ins);
54void print_insn_detail_ppc(csh handle, cs_insn *ins);
55void print_insn_detail_sparc(csh handle, cs_insn *ins);
56void print_insn_detail_sysz(csh handle, cs_insn *ins);
57void print_insn_detail_xcore(csh handle, cs_insn *ins);
YUHANG TANG9bc14c12016-10-28 15:32:50 +080058void print_insn_detail_m68k(csh handle, cs_insn *ins);
Fotis Loukos44ca0e32017-04-17 11:58:29 +030059void print_insn_detail_tms320c64x(csh handle, cs_insn *ins);
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +020060static void print_details(csh handle, cs_arch arch, cs_mode md, cs_insn *ins);
YUHANG TANG9354e5e2016-10-14 17:29:56 +080061
Nguyen Anh Quynh32238dc2016-10-21 16:42:47 +080062void print_string_hex(char *comment, unsigned char *str, size_t len)
63{
64 unsigned char *c;
65
66 printf("%s", comment);
67 for (c = str; c < str + len; c++) {
68 printf("0x%02x ", *c & 0xff);
69 }
70
71 printf("\n");
72}
73
echotyh51c8c502016-10-10 15:16:56 +080074// convert hexchar to hexnum
75static uint8_t char_to_hexnum(char c)
76{
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +080077 if (c >= '0' && c <= '9') {
YUHANG TANGbde12ae2016-10-21 16:03:35 +080078 return (uint8_t)(c - '0');
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +080079 }
80
81 if (c >= 'a' && c <= 'f') {
82 return (uint8_t)(10 + c - 'a');
83 }
84
85 // c >= 'A' && c <= 'F'
86 return (uint8_t)(10 + c - 'A');
echotyh51c8c502016-10-10 15:16:56 +080087}
88
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +080089// convert user input (char[]) to uint8_t[], each element of which is
90// valid hexadecimal, and return actual length of uint8_t[] in @size.
echotyh51c8c502016-10-10 15:16:56 +080091static uint8_t *preprocess(char *code, size_t *size)
92{
YUHANG TANGbde12ae2016-10-21 16:03:35 +080093 size_t i = 0, j = 0;
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +080094 uint8_t high, low;
95 uint8_t *result;
echotyh51c8c502016-10-10 15:16:56 +080096
Nguyen Anh Quynhfef1c292017-07-26 23:22:46 +080097 if (strlen(code) == 0)
98 return NULL;
99
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +0800100 result = (uint8_t *)malloc(strlen(code));
101 if (result != NULL) {
102 while (code[i] != '\0') {
103 if (isxdigit(code[i]) && isxdigit(code[i+1])) {
104 high = 16 * char_to_hexnum(code[i]);
105 low = char_to_hexnum(code[i+1]);
106 result[j] = high + low;
107 i++;
108 j++;
109 }
110 i++;
111 }
112 *size = j;
113 }
114
115 return result;
echotyh51c8c502016-10-10 15:16:56 +0800116}
117
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +0800118static void usage(char *prog)
echotyh51c8c502016-10-10 15:16:56 +0800119{
Nguyen Anh Quynh7532fc72016-11-04 23:57:52 +0800120 printf("Cstool for Capstone Disassembler Engine v%u.%u.%u\n\n", CS_VERSION_MAJOR, CS_VERSION_MINOR, CS_VERSION_EXTRA);
radare9af97422017-06-15 20:13:28 +0200121 printf("Syntax: %s [-u|-d] <arch+mode> <assembly-hexstring> [start-address-in-hex-format]\n", prog);
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +0800122 printf("\nThe following <arch+mode> options are supported:\n");
123
124 if (cs_support(CS_ARCH_X86)) {
125 printf(" x16: 16-bit mode (X86)\n");
126 printf(" x32: 32-bit mode (X86)\n");
127 printf(" x64: 64-bit mode (X86)\n");
128 printf(" x16att: 16-bit mode (X86) syntax-att\n");
129 printf(" x32att: 32-bit mode (X86) syntax-att\n");
130 printf(" x64att: 64-bit mode (X86) syntax-att\n");
131 }
132
133 if (cs_support(CS_ARCH_ARM)) {
134 printf(" arm: arm\n");
Nguyen Anh Quynh996db1f2017-03-10 20:30:55 +0800135 printf(" armbe: arm + big endian\n");
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +0800136 printf(" thumb: thumb mode\n");
137 printf(" thumbbe: thumb + big endian\n");
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +0800138 }
139
140 if (cs_support(CS_ARCH_ARM64)) {
141 printf(" arm64: aarch64 mode\n");
Nguyen Anh Quynh6d609eb2017-04-25 21:33:26 +0800142 printf(" arm64be: aarch64 + big endian\n");
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +0800143 }
144
145 if (cs_support(CS_ARCH_MIPS)) {
146 printf(" mips: mips32 + little endian\n");
147 printf(" mipsbe: mips32 + big endian\n");
148 printf(" mips64: mips64 + little endian\n");
149 printf(" mips64be: mips64 + big endian\n");
150 }
151
152 if (cs_support(CS_ARCH_PPC)) {
153 printf(" ppc64: ppc64 + little endian\n");
154 printf(" ppc64be: ppc64 + big endian\n");
155 }
156
157 if (cs_support(CS_ARCH_SPARC)) {
158 printf(" sparc: sparc\n");
159 }
160
161 if (cs_support(CS_ARCH_SYSZ)) {
162 printf(" systemz: systemz (s390x)\n");
163 }
164
165 if (cs_support(CS_ARCH_XCORE)) {
166 printf(" xcore: xcore\n");
167 }
YUHANG TANGbe3f8672016-10-27 12:12:59 +0800168
169 if (cs_support(CS_ARCH_M68K)) {
Nguyen Anh Quynhdf6f9cc2016-10-28 16:12:05 +0800170 printf(" m68k: m68k + big endian\n");
YUHANG TANGbe3f8672016-10-27 12:12:59 +0800171 printf(" m68k40: m68k_040\n");
YUHANG TANGbe3f8672016-10-27 12:12:59 +0800172 }
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +0800173
Fotis Loukos44ca0e32017-04-17 11:58:29 +0300174 if (cs_support(CS_ARCH_TMS320C64X)) {
175 printf(" tms320c64x:TMS320C64x\n");
176 }
177
Nguyen Anh Quynh13e06982017-07-04 16:04:53 +0800178 printf("\nExtra options:\n");
179 printf(" -d show detailed information of the instructions\n");
180 printf(" -u show immediates as unsigned\n\n");
echotyh51c8c502016-10-10 15:16:56 +0800181}
182
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +0200183static void print_details(csh handle, cs_arch arch, cs_mode md, cs_insn *ins)
184{
185 switch(arch) {
186 case CS_ARCH_X86:
187 print_insn_detail_x86(handle, md, ins);
188 break;
189 case CS_ARCH_ARM:
190 print_insn_detail_arm(handle, ins);
191 break;
192 case CS_ARCH_ARM64:
193 print_insn_detail_arm64(handle, ins);
194 break;
195 case CS_ARCH_MIPS:
196 print_insn_detail_mips(handle, ins);
197 break;
198 case CS_ARCH_PPC:
199 print_insn_detail_ppc(handle, ins);
200 break;
201 case CS_ARCH_SPARC:
202 print_insn_detail_sparc(handle, ins);
203 break;
204 case CS_ARCH_SYSZ:
205 print_insn_detail_sysz(handle, ins);
206 break;
207 case CS_ARCH_XCORE:
208 print_insn_detail_xcore(handle, ins);
209 break;
210 case CS_ARCH_M68K:
211 print_insn_detail_m68k(handle, ins);
212 break;
213 case CS_ARCH_TMS320C64X:
214 print_insn_detail_tms320c64x(handle, ins);
215 break;
216 default: break;
217 }
218
219 if (ins->detail->groups_count) {
220 int j;
221
222 printf("\tGroups: ");
223 for(j = 0; j < ins->detail->groups_count; j++) {
224 printf("%s ", cs_group_name(handle, ins->detail->groups[j]));
225 }
226 printf("\n");
227 }
228
229 printf("\n");
230}
231
echotyh51c8c502016-10-10 15:16:56 +0800232int main(int argc, char **argv)
233{
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +0200234 int i, c;
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +0800235 csh handle;
236 char *mode;
237 uint8_t *assembly;
238 size_t count, size;
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +0200239 uint64_t address = 0LL;
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +0800240 cs_insn *insn;
241 cs_err err;
YUHANG TANGbde12ae2016-10-21 16:03:35 +0800242 cs_mode md;
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +0200243 cs_arch arch = -1;
Nguyen Anh Quynh32238dc2016-10-21 16:42:47 +0800244 bool detail_flag = false;
radare9af97422017-06-15 20:13:28 +0200245 bool unsigned_flag = false;
Nguyen Anh Quynh13e06982017-07-04 16:04:53 +0800246 int args_left;
echotyh51c8c502016-10-10 15:16:56 +0800247
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +0200248 while ((c = getopt (argc, argv, "udhv")) != -1) {
249 switch (c) {
250 case 'u':
251 unsigned_flag = true;
252 break;
253 case 'd':
254 detail_flag = true;
255 break;
256 case 'v':
257 printf("%u.%u.%u\n", CS_VERSION_MAJOR, CS_VERSION_MINOR, CS_VERSION_EXTRA);
258 return 0;
259 case 'h':
260 usage(argv[0]);
261 return 0;
262 default:
263 usage(argv[0]);
264 return -1;
265 }
266 }
Nguyen Anh Quynh13e06982017-07-04 16:04:53 +0800267
268 args_left = argc - optind;
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +0200269 if (args_left < 2 || args_left > 3) {
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +0800270 usage(argv[0]);
271 return -1;
272 }
echotyh51c8c502016-10-10 15:16:56 +0800273
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +0200274 mode = argv[optind];
275 assembly = preprocess(argv[optind + 1], &size);
Nguyen Anh Quynhfef1c292017-07-26 23:22:46 +0800276 if (!assembly) {
277 usage(argv[0]);
278 return -1;
279 }
Nguyen Anh Quynh13e06982017-07-04 16:04:53 +0800280
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +0200281 if (args_left == 3) {
282 char *temp, *src = argv[optind + 2];
283 address = strtoull(src, &temp, 16);
284 if (temp == src || *temp != '\0' || errno == ERANGE) {
285 printf("ERROR: invalid address argument, quit!\n");
286 return -2;
Nguyen Anh Quynh32238dc2016-10-21 16:42:47 +0800287 }
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +0200288 }
289
Nguyen Anh Quynh13e06982017-07-04 16:04:53 +0800290 for (i = 0; all_archs[i].name; i++) {
291 if (!strcmp(all_archs[i].name, mode)) {
292 arch = all_archs[i].arch;
293 err = cs_open(all_archs[i].arch, all_archs[i].mode, &handle);
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +0200294 if (!err) {
Nguyen Anh Quynh13e06982017-07-04 16:04:53 +0800295 md = all_archs[i].mode;
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +0200296 if (strstr (mode, "att")) {
297 cs_option(handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
298 }
YUHANG TANGbde12ae2016-10-21 16:03:35 +0800299 }
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +0200300 break;
YUHANG TANGbde12ae2016-10-21 16:03:35 +0800301 }
Fotis Loukos44ca0e32017-04-17 11:58:29 +0300302 }
303
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +0800304 if (err) {
305 printf("ERROR: Failed on cs_open(), quit!\n");
306 usage(argv[0]);
307 return -1;
308 }
309
Nguyen Anh Quynh32238dc2016-10-21 16:42:47 +0800310 if (detail_flag) {
YUHANG TANGbde12ae2016-10-21 16:03:35 +0800311 cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON);
312 }
Nguyen Anh Quynh13e06982017-07-04 16:04:53 +0800313
radare9af97422017-06-15 20:13:28 +0200314 if (unsigned_flag) {
315 cs_option(handle, CS_OPT_UNSIGNED, CS_OPT_ON);
316 }
YUHANG TANG9354e5e2016-10-14 17:29:56 +0800317
YUHANG TANGbde12ae2016-10-21 16:03:35 +0800318 count = cs_disasm(handle, assembly, size, address, 0, &insn);
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +0800319 if (count > 0) {
Nguyen Anh Quynhbab2a932016-10-11 16:19:27 +0800320 size_t i;
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +0800321
Nguyen Anh Quynhbab2a932016-10-11 16:19:27 +0800322 for (i = 0; i < count; i++) {
323 int j;
Nguyen Anh Quynh13e06982017-07-04 16:04:53 +0800324
Nguyen Anh Quynhbab2a932016-10-11 16:19:27 +0800325 printf("%"PRIx64" ", insn[i].address);
326 for (j = 0; j < insn[i].size; j++) {
Ruslan Kabatsayev4aec4de2017-09-06 17:35:19 +0400327 if (j > 0)
328 putchar(' ');
Nguyen Anh Quynhbab2a932016-10-11 16:19:27 +0800329 printf("%02x", insn[i].bytes[j]);
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +0800330 }
Nguyen Anh Quynhbab2a932016-10-11 16:19:27 +0800331 // X86 instruction size is variable.
332 // align assembly instruction after the opcode
Nguyen Anh Quynh32238dc2016-10-21 16:42:47 +0800333 if (arch == CS_ARCH_X86) {
Nguyen Anh Quynhbab2a932016-10-11 16:19:27 +0800334 for (; j < 16; j++) {
335 printf(" ");
336 }
337 }
Nguyen Anh Quynh32238dc2016-10-21 16:42:47 +0800338
Nguyen Anh Quynhbab2a932016-10-11 16:19:27 +0800339 printf(" %s\t%s\n", insn[i].mnemonic, insn[i].op_str);
Nguyen Anh Quynh32238dc2016-10-21 16:42:47 +0800340
341 if (detail_flag) {
Sergi Àlvarez i Capillacff66502017-07-04 09:55:46 +0200342 print_details(handle, arch, md, &insn[i]);
YUHANG TANGbde12ae2016-10-21 16:03:35 +0800343 }
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +0800344 }
Nguyen Anh Quynh570db5f2016-11-05 00:43:22 +0800345
Nguyen Anh Quynh815b94a2016-10-10 23:20:29 +0800346 cs_free(insn, count);
347 } else {
348 printf("ERROR: invalid assembly code\n");
349 return(-4);
350 }
351
352 cs_close(&handle);
353
354 return 0;
echotyh51c8c502016-10-10 15:16:56 +0800355}