blob: 3af7bba0afae666b59f7761db37831d18bbda55e [file] [log] [blame]
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +08001#!/usr/bin/python
Nguyen Anh Quynh9025e922014-11-07 17:28:39 +08002# Test tool to compare Capstone output with llvm-mc. By Nguyen Anh Quynh, 2014
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +08003import array, os.path, sys
4from subprocess import Popen, PIPE, STDOUT
5from capstone import *
6
Nguyen Anh Quynhdf7dde22014-11-10 21:50:54 +08007
8# convert all hex numbers to decimal numbers in a text
9def normalize_hex(a):
10 while(True):
11 i = a.find('0x')
12 if i == -1: # no more hex number
13 break
14 hexnum = '0x'
15 for c in a[i + 2:]:
16 if c in '0123456789abcdefABCDEF':
17 hexnum += c
18 else:
19 break
20 num = int(hexnum, 16)
21 a = a.replace(hexnum, str(num))
22 return a
23
24
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +080025def run_mc(arch, hexcode, option, syntax=None):
26 def normalize(text):
27 # remove tabs
Nguyen Anh Quynhdf7dde22014-11-10 21:50:54 +080028 text = text.lower()
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +080029 items = text.split()
30 text = ' '.join(items)
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +080031 if arch == CS_ARCH_X86:
Nguyen Anh Quynhd5e63412014-11-08 14:01:18 +080032 # remove comment after #
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +080033 i = text.find('# ')
34 if i != -1:
Nguyen Anh Quynhdf7dde22014-11-10 21:50:54 +080035 return text[:i].strip()
36 if arch == CS_ARCH_ARM64:
37 # remove comment after #
38 i = text.find('// ')
39 if i != -1:
40 return text[:i].strip()
41 # remove some redundant spaces
42 text = text.replace('{ ', '{')
43 text = text.replace(' }', '}')
44 return text.strip()
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +080045
46 #print("Trying to decode: %s" %hexcode)
47 if syntax:
48 if arch == CS_ARCH_MIPS:
49 p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex', '-mattr=+msa', syntax] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT)
50 else:
51 p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex', syntax] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT)
52 else:
53 if arch == CS_ARCH_MIPS:
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +080054 p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex', '-mattr=+msa'] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT)
55 else:
56 p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex'] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT)
57 output = p.communicate(input=hexcode)[0]
58 lines = output.split('\n')
59 #print lines
60 if 'invalid' in lines[0]:
61 #print 'invalid ----'
Nguyen Anh Quynhdf7dde22014-11-10 21:50:54 +080062 return 'FAILED to disassemble (MC)'
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +080063 else:
64 #print 'OK:', lines[1]
65 return normalize(lines[1].strip())
66
67def test_file(fname):
68 print("Test %s" %fname);
69 f = open(fname)
70 lines = f.readlines()
71 f.close()
72
73 if not lines[0].startswith('# '):
74 print("ERROR: decoding information is missing")
75 return
76
77 # skip '# ' at the front, then split line to get out hexcode
78 # Note: option can be '', or 'None'
79 #print lines[0]
80 #print lines[0][2:].split(', ')
81 (arch, mode, option) = lines[0][2:].split(', ')
82 mode = mode.replace(' ', '')
83 option = option.strip()
84
85 archs = {
86 "CS_ARCH_ARM": CS_ARCH_ARM,
87 "CS_ARCH_ARM64": CS_ARCH_ARM64,
88 "CS_ARCH_MIPS": CS_ARCH_MIPS,
89 "CS_ARCH_PPC": CS_ARCH_PPC,
90 "CS_ARCH_SPARC": CS_ARCH_SPARC,
91 "CS_ARCH_SYSZ": CS_ARCH_SYSZ,
92 "CS_ARCH_X86": CS_ARCH_X86,
93 "CS_ARCH_XCORE": CS_ARCH_XCORE,
Daniel Collin2ee675c2015-08-03 18:45:08 +020094 "CS_ARCH_M68K": CS_ARCH_M68K,
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +080095 }
96
97 modes = {
98 "CS_MODE_16": CS_MODE_16,
99 "CS_MODE_32": CS_MODE_32,
100 "CS_MODE_64": CS_MODE_64,
Nguyen Anh Quynh02cafeb2014-11-13 12:46:48 +0800101 "CS_MODE_MIPS32": CS_MODE_MIPS32,
102 "CS_MODE_MIPS64": CS_MODE_MIPS64,
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +0800103 "0": CS_MODE_ARM,
104 "CS_MODE_ARM": CS_MODE_ARM,
105 "CS_MODE_THUMB": CS_MODE_THUMB,
Nguyen Anh Quynhdf7dde22014-11-10 21:50:54 +0800106 "CS_MODE_ARM+CS_MODE_V8": CS_MODE_ARM+CS_MODE_V8,
107 "CS_MODE_THUMB+CS_MODE_V8": CS_MODE_THUMB+CS_MODE_V8,
108 "CS_MODE_THUMB+CS_MODE_MCLASS": CS_MODE_THUMB+CS_MODE_MCLASS,
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +0800109 "CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN,
110 "CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN,
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +0800111 "CS_MODE_64+CS_MODE_LITTLE_ENDIAN": CS_MODE_64+CS_MODE_LITTLE_ENDIAN,
112 "CS_MODE_64+CS_MODE_BIG_ENDIAN": CS_MODE_64+CS_MODE_BIG_ENDIAN,
Nguyen Anh Quynh02cafeb2014-11-13 12:46:48 +0800113 "CS_MODE_MIPS32+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO,
114 "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
115 "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +0800116 "CS_MODE_BIG_ENDIAN+CS_MODE_V9": CS_MODE_BIG_ENDIAN + CS_MODE_V9,
Nguyen Anh Quynh02cafeb2014-11-13 12:46:48 +0800117 "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN,
118 "CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN,
119 "CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN,
120 "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN,
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +0800121 }
122
123 options = {
124 "CS_OPT_SYNTAX_ATT": CS_OPT_SYNTAX_ATT,
125 "CS_OPT_SYNTAX_NOREGNAME": CS_OPT_SYNTAX_NOREGNAME,
126 }
127
128 mc_modes = {
129 ("CS_ARCH_X86", "CS_MODE_32"): ['-triple=i386'],
130 ("CS_ARCH_X86", "CS_MODE_64"): ['-triple=x86_64'],
131 ("CS_ARCH_ARM", "CS_MODE_ARM"): ['-triple=armv7'],
Nguyen Anh Quynhdf7dde22014-11-10 21:50:54 +0800132 ("CS_ARCH_ARM", "CS_MODE_THUMB"): ['-triple=thumbv7'],
133 ("CS_ARCH_ARM", "CS_MODE_ARM+CS_MODE_V8"): ['-triple=armv8'],
134 ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8"): ['-triple=thumbv8'],
135 ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_MCLASS"): ['-triple=thumbv7m'],
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +0800136 ("CS_ARCH_ARM64", "0"): ['-triple=aarch64'],
Nguyen Anh Quynh02cafeb2014-11-13 12:46:48 +0800137 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN"): ['-triple=mips'],
138 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO"): ['-triple=mipsel', '-mattr=+micromips'],
139 ("CS_ARCH_MIPS", "CS_MODE_MIPS64"): ['-triple=mips64el'],
140 ("CS_ARCH_MIPS", "CS_MODE_MIPS32"): ['-triple=mipsel'],
141 ("CS_ARCH_MIPS", "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN"): ['-triple=mips64'],
142 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): ['-triple=mips', '-mattr=+micromips'],
143 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO"): ['-triple=mips', '-mattr=+micromips'],
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +0800144 ("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): ['-triple=powerpc64'],
145 ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN'): ['-triple=sparc'],
146 ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN+CS_MODE_V9'): ['-triple=sparcv9'],
Nguyen Anh Quynhdf7dde22014-11-10 21:50:54 +0800147 ('CS_ARCH_SYSZ', '0'): ['-triple=s390x', '-mcpu=z196'],
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +0800148 }
149
150 #if not option in ('', 'None'):
151 # print archs[arch], modes[mode], options[option]
152
153 #print(arch, mode, option)
154 md = Cs(archs[arch], modes[mode])
155
156 mc_option = None
157 if arch == 'CS_ARCH_X86':
Nguyen Anh Quynh9025e922014-11-07 17:28:39 +0800158 # tell llvm-mc to use Intel syntax
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +0800159 mc_option = '-output-asm-variant=1'
160
Nguyen Anh Quynhdf7dde22014-11-10 21:50:54 +0800161 if arch == 'CS_ARCH_ARM' or arch == 'CS_ARCH_PPC' :
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +0800162 md.syntax = CS_OPT_SYNTAX_NOREGNAME
163
Nguyen Anh Quynhdf7dde22014-11-10 21:50:54 +0800164 if fname.endswith('3DNow.s.cs'):
165 md.syntax = CS_OPT_SYNTAX_ATT
166
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +0800167 for line in lines[1:]:
Nguyen Anh Quynh9025e922014-11-07 17:28:39 +0800168 # ignore all the input lines having # in front.
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +0800169 if line.startswith('#'):
170 continue
171 #print("Check %s" %line)
172 code = line.split(' = ')[0]
Nguyen Anh Quynhdf7dde22014-11-10 21:50:54 +0800173 asm = ''.join(line.split(' = ')[1:])
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +0800174 hex_code = code.replace('0x', '')
175 hex_code = hex_code.replace(',', '')
176 hex_data = hex_code.decode('hex')
177 #hex_bytes = array.array('B', hex_data)
178
179 x = list(md.disasm(hex_data, 0))
180 if len(x) > 0:
181 if x[0].op_str != '':
182 cs_output = "%s %s" %(x[0].mnemonic, x[0].op_str)
183 else:
184 cs_output = x[0].mnemonic
185 else:
186 cs_output = 'FAILED to disassemble'
187
Nguyen Anh Quynhdf7dde22014-11-10 21:50:54 +0800188 cs_output2 = normalize_hex(cs_output)
189 cs_output2 = cs_output2.replace(' ', '')
190
191 if arch == 'CS_ARCH_MIPS':
192 # normalize register alias names
193 cs_output2 = cs_output2.replace('$at', '$1')
194 cs_output2 = cs_output2.replace('$v0', '$2')
195 cs_output2 = cs_output2.replace('$v1', '$3')
196
197 cs_output2 = cs_output2.replace('$a0', '$4')
198 cs_output2 = cs_output2.replace('$a1', '$5')
199 cs_output2 = cs_output2.replace('$a2', '$6')
200 cs_output2 = cs_output2.replace('$a3', '$7')
201
202 cs_output2 = cs_output2.replace('$t0', '$8')
203 cs_output2 = cs_output2.replace('$t1', '$9')
204 cs_output2 = cs_output2.replace('$t2', '$10')
205 cs_output2 = cs_output2.replace('$t3', '$11')
206 cs_output2 = cs_output2.replace('$t4', '$12')
207 cs_output2 = cs_output2.replace('$t5', '$13')
208 cs_output2 = cs_output2.replace('$t6', '$14')
209 cs_output2 = cs_output2.replace('$t7', '$15')
210 cs_output2 = cs_output2.replace('$t8', '$24')
211 cs_output2 = cs_output2.replace('$t9', '$25')
212
213 cs_output2 = cs_output2.replace('$s0', '$16')
214 cs_output2 = cs_output2.replace('$s1', '$17')
215 cs_output2 = cs_output2.replace('$s2', '$18')
216 cs_output2 = cs_output2.replace('$s3', '$19')
217 cs_output2 = cs_output2.replace('$s4', '$20')
218 cs_output2 = cs_output2.replace('$s5', '$21')
219 cs_output2 = cs_output2.replace('$s6', '$22')
220 cs_output2 = cs_output2.replace('$s7', '$23')
221
222 cs_output2 = cs_output2.replace('$k0', '$26')
223 cs_output2 = cs_output2.replace('$k1', '$27')
224
225 #print("Running MC ...")
226 if fname.endswith('thumb-fp-armv8.s.cs'):
227 mc_output = run_mc(archs[arch], code, ['-triple=thumbv8'], mc_option)
228 elif fname.endswith('mips64-alu-instructions.s.cs'):
229 mc_output = run_mc(archs[arch], code, ['-triple=mips64el', '-mcpu=mips64r2'], mc_option)
230 else:
231 mc_output = run_mc(archs[arch], code, mc_modes[(arch, mode)], mc_option)
232 mc_output2 = normalize_hex(mc_output)
Nguyen Anh Quynh4c363742014-11-11 12:51:57 +0800233
Nguyen Anh Quynhdf7dde22014-11-10 21:50:54 +0800234 if arch == 'CS_ARCH_MIPS':
235 mc_output2 = mc_output2.replace(' 0(', '(')
Nguyen Anh Quynh4c363742014-11-11 12:51:57 +0800236
237 if arch == 'CS_ARCH_PPC':
238 mc_output2 = mc_output2.replace('.+', '')
239 mc_output2 = mc_output2.replace('.', '')
240 mc_output2 = mc_output2.replace(' 0(', '(')
241
Nguyen Anh Quynhdf7dde22014-11-10 21:50:54 +0800242 mc_output2 = mc_output2.replace(' ', '')
243 mc_output2 = mc_output2.replace('opaque', '')
244
245
246 if (cs_output2 != mc_output2):
247 asm = asm.replace(' ', '').strip().lower()
248 if asm != cs_output2:
249 print("Mismatch: %s" %line.strip())
250 print("\tMC = %s" %mc_output)
251 print("\tCS = %s" %cs_output)
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +0800252
253
254if __name__ == '__main__':
255 if len(sys.argv) == 1:
256 fnames = sys.stdin.readlines()
257 for fname in fnames:
258 test_file(fname.strip())
259 else:
260 #print("Usage: ./test_mc.py <input-file.s.cs>")
261 test_file(sys.argv[1])
Nguyen Anh Quynh8ba72502014-11-07 17:24:01 +0800262