Catena cyber | bcb1247 | 2018-06-19 03:31:50 +0200 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | # Test tool to compare Capstone output with llvm-mc. By Nguyen Anh Quynh, 2014 |
| 3 | import sys |
| 4 | import os |
| 5 | from capstone import * |
| 6 | |
| 7 | def test_file(fname): |
| 8 | print("Test %s" %fname); |
| 9 | f = open(fname) |
| 10 | lines = f.readlines() |
| 11 | f.close() |
| 12 | |
| 13 | if not lines[0].startswith('# '): |
| 14 | print("ERROR: decoding information is missing") |
| 15 | return |
| 16 | |
| 17 | # skip '# ' at the front, then split line to get out hexcode |
| 18 | # Note: option can be '', or 'None' |
| 19 | #print lines[0] |
| 20 | #print lines[0][2:].split(', ') |
| 21 | (arch, mode, option) = lines[0][2:].split(', ') |
| 22 | mode = mode.replace(' ', '') |
| 23 | option = option.strip() |
| 24 | |
| 25 | archs = { |
| 26 | "CS_ARCH_ARM": CS_ARCH_ARM, |
| 27 | "CS_ARCH_ARM64": CS_ARCH_ARM64, |
| 28 | "CS_ARCH_MIPS": CS_ARCH_MIPS, |
| 29 | "CS_ARCH_PPC": CS_ARCH_PPC, |
| 30 | "CS_ARCH_SPARC": CS_ARCH_SPARC, |
| 31 | "CS_ARCH_SYSZ": CS_ARCH_SYSZ, |
| 32 | "CS_ARCH_X86": CS_ARCH_X86, |
| 33 | "CS_ARCH_XCORE": CS_ARCH_XCORE, |
| 34 | } |
| 35 | |
| 36 | modes = { |
| 37 | "CS_MODE_16": CS_MODE_16, |
| 38 | "CS_MODE_32": CS_MODE_32, |
| 39 | "CS_MODE_64": CS_MODE_64, |
| 40 | "CS_MODE_MIPS32": CS_MODE_MIPS32, |
| 41 | "CS_MODE_MIPS64": CS_MODE_MIPS64, |
| 42 | "0": CS_MODE_ARM, |
| 43 | "CS_MODE_ARM": CS_MODE_ARM, |
| 44 | "CS_MODE_THUMB": CS_MODE_THUMB, |
| 45 | "CS_MODE_ARM+CS_MODE_V8": CS_MODE_ARM+CS_MODE_V8, |
| 46 | "CS_MODE_THUMB+CS_MODE_V8": CS_MODE_THUMB+CS_MODE_V8, |
| 47 | "CS_MODE_THUMB+CS_MODE_MCLASS": CS_MODE_THUMB+CS_MODE_MCLASS, |
| 48 | "CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN, |
| 49 | "CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN, |
| 50 | "CS_MODE_64+CS_MODE_LITTLE_ENDIAN": CS_MODE_64+CS_MODE_LITTLE_ENDIAN, |
| 51 | "CS_MODE_64+CS_MODE_BIG_ENDIAN": CS_MODE_64+CS_MODE_BIG_ENDIAN, |
| 52 | "CS_MODE_MIPS32+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO, |
| 53 | "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN, |
| 54 | "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN, |
| 55 | "CS_MODE_BIG_ENDIAN+CS_MODE_V9": CS_MODE_BIG_ENDIAN + CS_MODE_V9, |
| 56 | "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN, |
| 57 | "CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN, |
| 58 | "CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN, |
| 59 | "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN, |
| 60 | } |
| 61 | |
| 62 | mc_modes = { |
| 63 | ("CS_ARCH_X86", "CS_MODE_32"): 0, |
| 64 | ("CS_ARCH_X86", "CS_MODE_64"): 1, |
| 65 | ("CS_ARCH_ARM", "CS_MODE_ARM"): 2, |
| 66 | ("CS_ARCH_ARM", "CS_MODE_THUMB"): 3, |
| 67 | ("CS_ARCH_ARM", "CS_MODE_ARM+CS_MODE_V8"): 4, |
| 68 | ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8"): 5, |
| 69 | ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_MCLASS"): 6, |
| 70 | ("CS_ARCH_ARM64", "0"): 7, |
| 71 | ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN"): 8, |
| 72 | ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO"): 9, |
| 73 | ("CS_ARCH_MIPS", "CS_MODE_MIPS64"): 10, |
| 74 | ("CS_ARCH_MIPS", "CS_MODE_MIPS32"): 11, |
| 75 | ("CS_ARCH_MIPS", "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN"): 12, |
| 76 | ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): 13, |
| 77 | ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO"): 13, |
| 78 | ("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): 14, |
| 79 | ("CS_ARCH_SPARC", "CS_MODE_BIG_ENDIAN"): 15, |
| 80 | ("CS_ARCH_SPARC", "CS_MODE_BIG_ENDIAN+CS_MODE_V9"): 16, |
| 81 | ("CS_ARCH_SYSZ", "0"): 17, |
| 82 | ("CS_ARCH_XCORE", "0"): 18, |
| 83 | ("CS_ARCH_MIPS", "CS_MODE_MIPS32R6+CS_MODE_BIG_ENDIAN"): 19, |
| 84 | ("CS_ARCH_MIPS", "CS_MODE_MIPS32R6+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): 20, |
| 85 | ("CS_ARCH_MIPS", "CS_MODE_MIPS32R6"): 21, |
| 86 | ("CS_ARCH_MIPS", "CS_MODE_MIPS32R6+CS_MODE_MICRO"): 22, |
| 87 | ("CS_ARCH_M68K", "0"): 23, |
| 88 | ("CS_ARCH_M680X", "CS_MODE_M680X_6809"): 24, |
| 89 | ("CS_ARCH_EVM", "0"): 25, |
| 90 | } |
| 91 | |
| 92 | #if not option in ('', 'None'): |
| 93 | # print archs[arch], modes[mode], options[option] |
| 94 | |
| 95 | for line in lines[1:]: |
| 96 | # ignore all the input lines having # in front. |
| 97 | if line.startswith('#'): |
| 98 | continue |
| 99 | #print("Check %s" %line) |
| 100 | code = line.split(' = ')[0] |
| 101 | if len(code) < 2: |
| 102 | continue |
| 103 | asm = ''.join(line.split(' = ')[1:]) |
| 104 | hex_code = code.replace('0x', '') |
| 105 | hex_code = hex_code.replace(',', '') |
| 106 | hex_data = hex_code.decode('hex') |
| 107 | fout = open("fuzz/corpus/%s_%s" % (os.path.basename(fname), hex_code), 'w') |
| 108 | if (arch, mode) not in mc_modes: |
| 109 | print "fail", arch, mode |
| 110 | fout.write(unichr(mc_modes[(arch, mode)])) |
| 111 | fout.write(hex_data) |
| 112 | fout.close() |
| 113 | |
| 114 | |
| 115 | if __name__ == '__main__': |
| 116 | if len(sys.argv) == 1: |
| 117 | fnames = sys.stdin.readlines() |
| 118 | for fname in fnames: |
| 119 | test_file(fname.strip()) |
| 120 | else: |
| 121 | #print("Usage: ./test_mc.py <input-file.s.cs>") |
| 122 | test_file(sys.argv[1]) |
| 123 | |