blob: 43bebdaf08276dbd09d32d986c66bee893695c4e [file] [log] [blame]
Catena cyberbcb12472018-06-19 03:31:50 +02001#!/usr/bin/python
2# Test tool to compare Capstone output with llvm-mc. By Nguyen Anh Quynh, 2014
3import sys
4import os
5from capstone import *
6
7def test_file(fname):
8 print("Test %s" %fname);
9 f = open(fname)
10 lines = f.readlines()
11 f.close()
12
13 if not lines[0].startswith('# '):
14 print("ERROR: decoding information is missing")
15 return
16
17 # skip '# ' at the front, then split line to get out hexcode
18 # Note: option can be '', or 'None'
19 #print lines[0]
20 #print lines[0][2:].split(', ')
21 (arch, mode, option) = lines[0][2:].split(', ')
22 mode = mode.replace(' ', '')
23 option = option.strip()
24
25 archs = {
26 "CS_ARCH_ARM": CS_ARCH_ARM,
27 "CS_ARCH_ARM64": CS_ARCH_ARM64,
28 "CS_ARCH_MIPS": CS_ARCH_MIPS,
29 "CS_ARCH_PPC": CS_ARCH_PPC,
30 "CS_ARCH_SPARC": CS_ARCH_SPARC,
31 "CS_ARCH_SYSZ": CS_ARCH_SYSZ,
32 "CS_ARCH_X86": CS_ARCH_X86,
33 "CS_ARCH_XCORE": CS_ARCH_XCORE,
34 }
35
36 modes = {
37 "CS_MODE_16": CS_MODE_16,
38 "CS_MODE_32": CS_MODE_32,
39 "CS_MODE_64": CS_MODE_64,
40 "CS_MODE_MIPS32": CS_MODE_MIPS32,
41 "CS_MODE_MIPS64": CS_MODE_MIPS64,
42 "0": CS_MODE_ARM,
43 "CS_MODE_ARM": CS_MODE_ARM,
44 "CS_MODE_THUMB": CS_MODE_THUMB,
45 "CS_MODE_ARM+CS_MODE_V8": CS_MODE_ARM+CS_MODE_V8,
46 "CS_MODE_THUMB+CS_MODE_V8": CS_MODE_THUMB+CS_MODE_V8,
47 "CS_MODE_THUMB+CS_MODE_MCLASS": CS_MODE_THUMB+CS_MODE_MCLASS,
48 "CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN,
49 "CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN,
50 "CS_MODE_64+CS_MODE_LITTLE_ENDIAN": CS_MODE_64+CS_MODE_LITTLE_ENDIAN,
51 "CS_MODE_64+CS_MODE_BIG_ENDIAN": CS_MODE_64+CS_MODE_BIG_ENDIAN,
52 "CS_MODE_MIPS32+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO,
53 "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
54 "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN,
55 "CS_MODE_BIG_ENDIAN+CS_MODE_V9": CS_MODE_BIG_ENDIAN + CS_MODE_V9,
56 "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN,
57 "CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN,
58 "CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN,
59 "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN,
60 }
61
62 mc_modes = {
63 ("CS_ARCH_X86", "CS_MODE_32"): 0,
64 ("CS_ARCH_X86", "CS_MODE_64"): 1,
65 ("CS_ARCH_ARM", "CS_MODE_ARM"): 2,
66 ("CS_ARCH_ARM", "CS_MODE_THUMB"): 3,
67 ("CS_ARCH_ARM", "CS_MODE_ARM+CS_MODE_V8"): 4,
68 ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8"): 5,
69 ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_MCLASS"): 6,
70 ("CS_ARCH_ARM64", "0"): 7,
71 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN"): 8,
72 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO"): 9,
73 ("CS_ARCH_MIPS", "CS_MODE_MIPS64"): 10,
74 ("CS_ARCH_MIPS", "CS_MODE_MIPS32"): 11,
75 ("CS_ARCH_MIPS", "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN"): 12,
76 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): 13,
77 ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO"): 13,
78 ("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): 14,
79 ("CS_ARCH_SPARC", "CS_MODE_BIG_ENDIAN"): 15,
80 ("CS_ARCH_SPARC", "CS_MODE_BIG_ENDIAN+CS_MODE_V9"): 16,
81 ("CS_ARCH_SYSZ", "0"): 17,
82 ("CS_ARCH_XCORE", "0"): 18,
83 ("CS_ARCH_MIPS", "CS_MODE_MIPS32R6+CS_MODE_BIG_ENDIAN"): 19,
84 ("CS_ARCH_MIPS", "CS_MODE_MIPS32R6+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): 20,
85 ("CS_ARCH_MIPS", "CS_MODE_MIPS32R6"): 21,
86 ("CS_ARCH_MIPS", "CS_MODE_MIPS32R6+CS_MODE_MICRO"): 22,
87 ("CS_ARCH_M68K", "0"): 23,
88 ("CS_ARCH_M680X", "CS_MODE_M680X_6809"): 24,
89 ("CS_ARCH_EVM", "0"): 25,
90 }
91
92 #if not option in ('', 'None'):
93 # print archs[arch], modes[mode], options[option]
94
95 for line in lines[1:]:
96 # ignore all the input lines having # in front.
97 if line.startswith('#'):
98 continue
99 #print("Check %s" %line)
100 code = line.split(' = ')[0]
101 if len(code) < 2:
102 continue
103 asm = ''.join(line.split(' = ')[1:])
104 hex_code = code.replace('0x', '')
105 hex_code = hex_code.replace(',', '')
106 hex_data = hex_code.decode('hex')
107 fout = open("fuzz/corpus/%s_%s" % (os.path.basename(fname), hex_code), 'w')
108 if (arch, mode) not in mc_modes:
109 print "fail", arch, mode
110 fout.write(unichr(mc_modes[(arch, mode)]))
111 fout.write(hex_data)
112 fout.close()
113
114
115if __name__ == '__main__':
116 if len(sys.argv) == 1:
117 fnames = sys.stdin.readlines()
118 for fname in fnames:
119 test_file(fname.strip())
120 else:
121 #print("Usage: ./test_mc.py <input-file.s.cs>")
122 test_file(sys.argv[1])
123