blob: ff5875a9b5205c1df04b841d7d99b6e600e9bead [file] [log] [blame]
Nguyen Anh Quynhf49df922017-05-16 18:15:02 +07001#!/usr/bin/python
2# Test tool to disassemble MC files. By Nguyen Anh Quynh, 2017
3import array, os.path, sys
4from capstone import *
5
6
7# convert all hex numbers to decimal numbers in a text
8def normalize_hex(a):
9 while(True):
10 i = a.find('0x')
11 if i == -1: # no more hex number
12 break
13 hexnum = '0x'
14 for c in a[i + 2:]:
15 if c in '0123456789abcdefABCDEF':
16 hexnum += c
17 else:
18 break
19 num = int(hexnum, 16)
20 a = a.replace(hexnum, str(num))
21 return a
22
23
24def test_file(fname):
25 print("Test %s" %fname);
26 f = open(fname)
27 lines = f.readlines()
28 f.close()
29
30 if not lines[0].startswith('# '):
31 print("ERROR: decoding information is missing")
32 return
33
34 # skip '# ' at the front, then split line to get out hexcode
35 # Note: option can be '', or 'None'
36 #print lines[0]
37 #print lines[0][2:].split(', ')
38 (arch, mode, option) = lines[0][2:].split(', ')
39 mode = mode.replace(' ', '')
40 option = option.strip()
41
42 archs = {
43 "CS_ARCH_ARM": CS_ARCH_ARM,
44 "CS_ARCH_ARM64": CS_ARCH_ARM64,
45 "CS_ARCH_MIPS": CS_ARCH_MIPS,
46 "CS_ARCH_PPC": CS_ARCH_PPC,
47 "CS_ARCH_SPARC": CS_ARCH_SPARC,
48 "CS_ARCH_SYSZ": CS_ARCH_SYSZ,
49 "CS_ARCH_X86": CS_ARCH_X86,
50 "CS_ARCH_XCORE": CS_ARCH_XCORE,
51 "CS_ARCH_M68K": CS_ARCH_M68K,
52 }
53
54 modes = {
55 "CS_MODE_16": CS_MODE_16,
56 "CS_MODE_32": CS_MODE_32,
57 "CS_MODE_64": CS_MODE_64,
58 "CS_MODE_MIPS32": CS_MODE_MIPS32,
59 "CS_MODE_MIPS64": CS_MODE_MIPS64,
60 "0": CS_MODE_ARM,
61 "CS_MODE_ARM": CS_MODE_ARM,
62 "CS_MODE_THUMB": CS_MODE_THUMB,
63 "CS_MODE_ARM+CS_MODE_V8": CS_MODE_ARM+CS_MODE_V8,
64 "CS_MODE_THUMB+CS_MODE_V8": CS_MODE_THUMB+CS_MODE_V8,
65 "CS_MODE_THUMB+CS_MODE_MCLASS": CS_MODE_THUMB+CS_MODE_MCLASS,
66 "CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN,
67 "CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN,
68 "CS_MODE_64+CS_MODE_LITTLE_ENDIAN": CS_MODE_64+CS_MODE_LITTLE_ENDIAN,
69 "CS_MODE_64+CS_MODE_BIG_ENDIAN": CS_MODE_64+CS_MODE_BIG_ENDIAN,
70 "CS_MODE_MIPS32+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO,
71 "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
72 "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
73 "CS_MODE_BIG_ENDIAN+CS_MODE_V9": CS_MODE_BIG_ENDIAN + CS_MODE_V9,
74 "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN,
75 "CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN,
76 "CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN,
77 "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN,
78 }
79
80 options = {
81 "CS_OPT_SYNTAX_ATT": CS_OPT_SYNTAX_ATT,
82 "CS_OPT_SYNTAX_NOREGNAME": CS_OPT_SYNTAX_NOREGNAME,
83 }
84
85 mc_modes = {
86 ("CS_ARCH_X86", "CS_MODE_32"): ['-triple=i386'],
87 ("CS_ARCH_X86", "CS_MODE_64"): ['-triple=x86_64'],
88 ("CS_ARCH_ARM", "CS_MODE_ARM"): ['-triple=armv7'],
89 ("CS_ARCH_ARM", "CS_MODE_THUMB"): ['-triple=thumbv7'],
90 ("CS_ARCH_ARM", "CS_MODE_ARM+CS_MODE_V8"): ['-triple=armv8'],
91 ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8"): ['-triple=thumbv8'],
92 ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_MCLASS"): ['-triple=thumbv7m'],
93 ("CS_ARCH_ARM64", "0"): ['-triple=aarch64'],
94 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN"): ['-triple=mips'],
95 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO"): ['-triple=mipsel', '-mattr=+micromips'],
96 ("CS_ARCH_MIPS", "CS_MODE_MIPS64"): ['-triple=mips64el'],
97 ("CS_ARCH_MIPS", "CS_MODE_MIPS32"): ['-triple=mipsel'],
98 ("CS_ARCH_MIPS", "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN"): ['-triple=mips64'],
99 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): ['-triple=mips', '-mattr=+micromips'],
100 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO"): ['-triple=mips', '-mattr=+micromips'],
101 ("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): ['-triple=powerpc64'],
102 ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN'): ['-triple=sparc'],
103 ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN+CS_MODE_V9'): ['-triple=sparcv9'],
104 ('CS_ARCH_SYSZ', '0'): ['-triple=s390x', '-mcpu=z196'],
105 }
106
107 #if not option in ('', 'None'):
108 # print archs[arch], modes[mode], options[option]
109
110 #print(arch, mode, option)
111 md = Cs(archs[arch], modes[mode])
112
113 if arch == 'CS_ARCH_ARM' or arch == 'CS_ARCH_PPC' :
114 md.syntax = CS_OPT_SYNTAX_NOREGNAME
115
116 if fname.endswith('3DNow.s.cs'):
117 md.syntax = CS_OPT_SYNTAX_ATT
118
119 for line in lines[1:]:
120 # ignore all the input lines having # in front.
121 if line.startswith('#'):
122 continue
123 #print("Check %s" %line)
124 code = line.split(' = ')[0]
125 asm = ''.join(line.split(' = ')[1:])
126 hex_code = code.replace('0x', '')
127 hex_code = hex_code.replace(',', '')
128 hex_data = hex_code.decode('hex')
129 #hex_bytes = array.array('B', hex_data)
130
131 x = list(md.disasm(hex_data, 0))
132 if len(x) > 0:
133 if x[0].op_str != '':
134 cs_output = "%s %s" %(x[0].mnemonic, x[0].op_str)
135 else:
136 cs_output = x[0].mnemonic
137 else:
138 cs_output = 'FAILED to disassemble'
139
140 cs_output2 = normalize_hex(cs_output)
141 cs_output2 = cs_output2.replace(' ', '')
142
143 if arch == 'CS_ARCH_MIPS':
144 # normalize register alias names
145 cs_output2 = cs_output2.replace('$at', '$1')
146 cs_output2 = cs_output2.replace('$v0', '$2')
147 cs_output2 = cs_output2.replace('$v1', '$3')
148
149 cs_output2 = cs_output2.replace('$a0', '$4')
150 cs_output2 = cs_output2.replace('$a1', '$5')
151 cs_output2 = cs_output2.replace('$a2', '$6')
152 cs_output2 = cs_output2.replace('$a3', '$7')
153
154 cs_output2 = cs_output2.replace('$t0', '$8')
155 cs_output2 = cs_output2.replace('$t1', '$9')
156 cs_output2 = cs_output2.replace('$t2', '$10')
157 cs_output2 = cs_output2.replace('$t3', '$11')
158 cs_output2 = cs_output2.replace('$t4', '$12')
159 cs_output2 = cs_output2.replace('$t5', '$13')
160 cs_output2 = cs_output2.replace('$t6', '$14')
161 cs_output2 = cs_output2.replace('$t7', '$15')
162 cs_output2 = cs_output2.replace('$t8', '$24')
163 cs_output2 = cs_output2.replace('$t9', '$25')
164
165 cs_output2 = cs_output2.replace('$s0', '$16')
166 cs_output2 = cs_output2.replace('$s1', '$17')
167 cs_output2 = cs_output2.replace('$s2', '$18')
168 cs_output2 = cs_output2.replace('$s3', '$19')
169 cs_output2 = cs_output2.replace('$s4', '$20')
170 cs_output2 = cs_output2.replace('$s5', '$21')
171 cs_output2 = cs_output2.replace('$s6', '$22')
172 cs_output2 = cs_output2.replace('$s7', '$23')
173
174 cs_output2 = cs_output2.replace('$k0', '$26')
175 cs_output2 = cs_output2.replace('$k1', '$27')
176
177 print("\t%s = %s" %(hex_code, cs_output))
178
179
180if __name__ == '__main__':
181 if len(sys.argv) == 1:
182 fnames = sys.stdin.readlines()
183 for fname in fnames:
184 test_file(fname.strip())
185 else:
186 #print("Usage: ./test_mc.py <input-file.s.cs>")
187 test_file(sys.argv[1])
188