Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 1 | #!/usr/bin/python |
Nguyen Anh Quynh | 9025e92 | 2014-11-07 17:28:39 +0800 | [diff] [blame] | 2 | # Test tool to compare Capstone output with llvm-mc. By Nguyen Anh Quynh, 2014 |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 3 | import array, os.path, sys |
| 4 | from subprocess import Popen, PIPE, STDOUT |
| 5 | from capstone import * |
| 6 | |
Nguyen Anh Quynh | df7dde2 | 2014-11-10 21:50:54 +0800 | [diff] [blame] | 7 | |
| 8 | # convert all hex numbers to decimal numbers in a text |
| 9 | def normalize_hex(a): |
| 10 | while(True): |
| 11 | i = a.find('0x') |
| 12 | if i == -1: # no more hex number |
| 13 | break |
| 14 | hexnum = '0x' |
| 15 | for c in a[i + 2:]: |
| 16 | if c in '0123456789abcdefABCDEF': |
| 17 | hexnum += c |
| 18 | else: |
| 19 | break |
| 20 | num = int(hexnum, 16) |
| 21 | a = a.replace(hexnum, str(num)) |
| 22 | return a |
| 23 | |
| 24 | |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 25 | def run_mc(arch, hexcode, option, syntax=None): |
| 26 | def normalize(text): |
| 27 | # remove tabs |
Nguyen Anh Quynh | df7dde2 | 2014-11-10 21:50:54 +0800 | [diff] [blame] | 28 | text = text.lower() |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 29 | items = text.split() |
| 30 | text = ' '.join(items) |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 31 | if arch == CS_ARCH_X86: |
Nguyen Anh Quynh | d5e6341 | 2014-11-08 14:01:18 +0800 | [diff] [blame] | 32 | # remove comment after # |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 33 | i = text.find('# ') |
| 34 | if i != -1: |
Nguyen Anh Quynh | df7dde2 | 2014-11-10 21:50:54 +0800 | [diff] [blame] | 35 | return text[:i].strip() |
| 36 | if arch == CS_ARCH_ARM64: |
| 37 | # remove comment after # |
| 38 | i = text.find('// ') |
| 39 | if i != -1: |
| 40 | return text[:i].strip() |
| 41 | # remove some redundant spaces |
| 42 | text = text.replace('{ ', '{') |
| 43 | text = text.replace(' }', '}') |
| 44 | return text.strip() |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 45 | |
| 46 | #print("Trying to decode: %s" %hexcode) |
| 47 | if syntax: |
| 48 | if arch == CS_ARCH_MIPS: |
| 49 | p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex', '-mattr=+msa', syntax] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT) |
| 50 | else: |
| 51 | p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex', syntax] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT) |
| 52 | else: |
| 53 | if arch == CS_ARCH_MIPS: |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 54 | p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex', '-mattr=+msa'] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT) |
| 55 | else: |
| 56 | p = Popen(['llvm-mc', '-disassemble', '-print-imm-hex'] + option, stdout=PIPE, stdin=PIPE, stderr=STDOUT) |
| 57 | output = p.communicate(input=hexcode)[0] |
| 58 | lines = output.split('\n') |
| 59 | #print lines |
| 60 | if 'invalid' in lines[0]: |
| 61 | #print 'invalid ----' |
Nguyen Anh Quynh | df7dde2 | 2014-11-10 21:50:54 +0800 | [diff] [blame] | 62 | return 'FAILED to disassemble (MC)' |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 63 | else: |
| 64 | #print 'OK:', lines[1] |
| 65 | return normalize(lines[1].strip()) |
| 66 | |
| 67 | def test_file(fname): |
| 68 | print("Test %s" %fname); |
| 69 | f = open(fname) |
| 70 | lines = f.readlines() |
| 71 | f.close() |
| 72 | |
| 73 | if not lines[0].startswith('# '): |
| 74 | print("ERROR: decoding information is missing") |
| 75 | return |
| 76 | |
| 77 | # skip '# ' at the front, then split line to get out hexcode |
| 78 | # Note: option can be '', or 'None' |
| 79 | #print lines[0] |
| 80 | #print lines[0][2:].split(', ') |
| 81 | (arch, mode, option) = lines[0][2:].split(', ') |
| 82 | mode = mode.replace(' ', '') |
| 83 | option = option.strip() |
| 84 | |
| 85 | archs = { |
| 86 | "CS_ARCH_ARM": CS_ARCH_ARM, |
| 87 | "CS_ARCH_ARM64": CS_ARCH_ARM64, |
| 88 | "CS_ARCH_MIPS": CS_ARCH_MIPS, |
| 89 | "CS_ARCH_PPC": CS_ARCH_PPC, |
| 90 | "CS_ARCH_SPARC": CS_ARCH_SPARC, |
| 91 | "CS_ARCH_SYSZ": CS_ARCH_SYSZ, |
| 92 | "CS_ARCH_X86": CS_ARCH_X86, |
| 93 | "CS_ARCH_XCORE": CS_ARCH_XCORE, |
| 94 | } |
| 95 | |
| 96 | modes = { |
| 97 | "CS_MODE_16": CS_MODE_16, |
| 98 | "CS_MODE_32": CS_MODE_32, |
| 99 | "CS_MODE_64": CS_MODE_64, |
Nguyen Anh Quynh | 02cafeb | 2014-11-13 12:46:48 +0800 | [diff] [blame] | 100 | "CS_MODE_MIPS32": CS_MODE_MIPS32, |
| 101 | "CS_MODE_MIPS64": CS_MODE_MIPS64, |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 102 | "0": CS_MODE_ARM, |
| 103 | "CS_MODE_ARM": CS_MODE_ARM, |
| 104 | "CS_MODE_THUMB": CS_MODE_THUMB, |
Nguyen Anh Quynh | df7dde2 | 2014-11-10 21:50:54 +0800 | [diff] [blame] | 105 | "CS_MODE_ARM+CS_MODE_V8": CS_MODE_ARM+CS_MODE_V8, |
| 106 | "CS_MODE_THUMB+CS_MODE_V8": CS_MODE_THUMB+CS_MODE_V8, |
| 107 | "CS_MODE_THUMB+CS_MODE_MCLASS": CS_MODE_THUMB+CS_MODE_MCLASS, |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 108 | "CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN, |
| 109 | "CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN, |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 110 | "CS_MODE_64+CS_MODE_LITTLE_ENDIAN": CS_MODE_64+CS_MODE_LITTLE_ENDIAN, |
| 111 | "CS_MODE_64+CS_MODE_BIG_ENDIAN": CS_MODE_64+CS_MODE_BIG_ENDIAN, |
Nguyen Anh Quynh | 02cafeb | 2014-11-13 12:46:48 +0800 | [diff] [blame] | 112 | "CS_MODE_MIPS32+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO, |
| 113 | "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN, |
| 114 | "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO": CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN, |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 115 | "CS_MODE_BIG_ENDIAN+CS_MODE_V9": CS_MODE_BIG_ENDIAN + CS_MODE_V9, |
Nguyen Anh Quynh | 02cafeb | 2014-11-13 12:46:48 +0800 | [diff] [blame] | 116 | "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN, |
| 117 | "CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS32+CS_MODE_LITTLE_ENDIAN, |
| 118 | "CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN": CS_MODE_MIPS64+CS_MODE_LITTLE_ENDIAN, |
| 119 | "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN": CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN, |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 120 | } |
| 121 | |
| 122 | options = { |
| 123 | "CS_OPT_SYNTAX_ATT": CS_OPT_SYNTAX_ATT, |
| 124 | "CS_OPT_SYNTAX_NOREGNAME": CS_OPT_SYNTAX_NOREGNAME, |
| 125 | } |
| 126 | |
| 127 | mc_modes = { |
| 128 | ("CS_ARCH_X86", "CS_MODE_32"): ['-triple=i386'], |
| 129 | ("CS_ARCH_X86", "CS_MODE_64"): ['-triple=x86_64'], |
| 130 | ("CS_ARCH_ARM", "CS_MODE_ARM"): ['-triple=armv7'], |
Nguyen Anh Quynh | df7dde2 | 2014-11-10 21:50:54 +0800 | [diff] [blame] | 131 | ("CS_ARCH_ARM", "CS_MODE_THUMB"): ['-triple=thumbv7'], |
| 132 | ("CS_ARCH_ARM", "CS_MODE_ARM+CS_MODE_V8"): ['-triple=armv8'], |
| 133 | ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8"): ['-triple=thumbv8'], |
| 134 | ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_MCLASS"): ['-triple=thumbv7m'], |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 135 | ("CS_ARCH_ARM64", "0"): ['-triple=aarch64'], |
Nguyen Anh Quynh | 02cafeb | 2014-11-13 12:46:48 +0800 | [diff] [blame] | 136 | ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN"): ['-triple=mips'], |
| 137 | ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO"): ['-triple=mipsel', '-mattr=+micromips'], |
| 138 | ("CS_ARCH_MIPS", "CS_MODE_MIPS64"): ['-triple=mips64el'], |
| 139 | ("CS_ARCH_MIPS", "CS_MODE_MIPS32"): ['-triple=mipsel'], |
| 140 | ("CS_ARCH_MIPS", "CS_MODE_MIPS64+CS_MODE_BIG_ENDIAN"): ['-triple=mips64'], |
| 141 | ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_MICRO+CS_MODE_BIG_ENDIAN"): ['-triple=mips', '-mattr=+micromips'], |
| 142 | ("CS_ARCH_MIPS", "CS_MODE_MIPS32+CS_MODE_BIG_ENDIAN+CS_MODE_MICRO"): ['-triple=mips', '-mattr=+micromips'], |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 143 | ("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): ['-triple=powerpc64'], |
| 144 | ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN'): ['-triple=sparc'], |
| 145 | ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN+CS_MODE_V9'): ['-triple=sparcv9'], |
Nguyen Anh Quynh | df7dde2 | 2014-11-10 21:50:54 +0800 | [diff] [blame] | 146 | ('CS_ARCH_SYSZ', '0'): ['-triple=s390x', '-mcpu=z196'], |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 147 | } |
| 148 | |
| 149 | #if not option in ('', 'None'): |
| 150 | # print archs[arch], modes[mode], options[option] |
| 151 | |
| 152 | #print(arch, mode, option) |
| 153 | md = Cs(archs[arch], modes[mode]) |
| 154 | |
| 155 | mc_option = None |
| 156 | if arch == 'CS_ARCH_X86': |
Nguyen Anh Quynh | 9025e92 | 2014-11-07 17:28:39 +0800 | [diff] [blame] | 157 | # tell llvm-mc to use Intel syntax |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 158 | mc_option = '-output-asm-variant=1' |
| 159 | |
Nguyen Anh Quynh | df7dde2 | 2014-11-10 21:50:54 +0800 | [diff] [blame] | 160 | if arch == 'CS_ARCH_ARM' or arch == 'CS_ARCH_PPC' : |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 161 | md.syntax = CS_OPT_SYNTAX_NOREGNAME |
| 162 | |
Nguyen Anh Quynh | df7dde2 | 2014-11-10 21:50:54 +0800 | [diff] [blame] | 163 | if fname.endswith('3DNow.s.cs'): |
| 164 | md.syntax = CS_OPT_SYNTAX_ATT |
| 165 | |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 166 | for line in lines[1:]: |
Nguyen Anh Quynh | 9025e92 | 2014-11-07 17:28:39 +0800 | [diff] [blame] | 167 | # ignore all the input lines having # in front. |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 168 | if line.startswith('#'): |
| 169 | continue |
| 170 | #print("Check %s" %line) |
| 171 | code = line.split(' = ')[0] |
Nguyen Anh Quynh | df7dde2 | 2014-11-10 21:50:54 +0800 | [diff] [blame] | 172 | asm = ''.join(line.split(' = ')[1:]) |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 173 | hex_code = code.replace('0x', '') |
| 174 | hex_code = hex_code.replace(',', '') |
| 175 | hex_data = hex_code.decode('hex') |
| 176 | #hex_bytes = array.array('B', hex_data) |
| 177 | |
| 178 | x = list(md.disasm(hex_data, 0)) |
| 179 | if len(x) > 0: |
| 180 | if x[0].op_str != '': |
| 181 | cs_output = "%s %s" %(x[0].mnemonic, x[0].op_str) |
| 182 | else: |
| 183 | cs_output = x[0].mnemonic |
| 184 | else: |
| 185 | cs_output = 'FAILED to disassemble' |
| 186 | |
Nguyen Anh Quynh | df7dde2 | 2014-11-10 21:50:54 +0800 | [diff] [blame] | 187 | cs_output2 = normalize_hex(cs_output) |
| 188 | cs_output2 = cs_output2.replace(' ', '') |
| 189 | |
| 190 | if arch == 'CS_ARCH_MIPS': |
| 191 | # normalize register alias names |
| 192 | cs_output2 = cs_output2.replace('$at', '$1') |
| 193 | cs_output2 = cs_output2.replace('$v0', '$2') |
| 194 | cs_output2 = cs_output2.replace('$v1', '$3') |
| 195 | |
| 196 | cs_output2 = cs_output2.replace('$a0', '$4') |
| 197 | cs_output2 = cs_output2.replace('$a1', '$5') |
| 198 | cs_output2 = cs_output2.replace('$a2', '$6') |
| 199 | cs_output2 = cs_output2.replace('$a3', '$7') |
| 200 | |
| 201 | cs_output2 = cs_output2.replace('$t0', '$8') |
| 202 | cs_output2 = cs_output2.replace('$t1', '$9') |
| 203 | cs_output2 = cs_output2.replace('$t2', '$10') |
| 204 | cs_output2 = cs_output2.replace('$t3', '$11') |
| 205 | cs_output2 = cs_output2.replace('$t4', '$12') |
| 206 | cs_output2 = cs_output2.replace('$t5', '$13') |
| 207 | cs_output2 = cs_output2.replace('$t6', '$14') |
| 208 | cs_output2 = cs_output2.replace('$t7', '$15') |
| 209 | cs_output2 = cs_output2.replace('$t8', '$24') |
| 210 | cs_output2 = cs_output2.replace('$t9', '$25') |
| 211 | |
| 212 | cs_output2 = cs_output2.replace('$s0', '$16') |
| 213 | cs_output2 = cs_output2.replace('$s1', '$17') |
| 214 | cs_output2 = cs_output2.replace('$s2', '$18') |
| 215 | cs_output2 = cs_output2.replace('$s3', '$19') |
| 216 | cs_output2 = cs_output2.replace('$s4', '$20') |
| 217 | cs_output2 = cs_output2.replace('$s5', '$21') |
| 218 | cs_output2 = cs_output2.replace('$s6', '$22') |
| 219 | cs_output2 = cs_output2.replace('$s7', '$23') |
| 220 | |
| 221 | cs_output2 = cs_output2.replace('$k0', '$26') |
| 222 | cs_output2 = cs_output2.replace('$k1', '$27') |
| 223 | |
| 224 | #print("Running MC ...") |
| 225 | if fname.endswith('thumb-fp-armv8.s.cs'): |
| 226 | mc_output = run_mc(archs[arch], code, ['-triple=thumbv8'], mc_option) |
| 227 | elif fname.endswith('mips64-alu-instructions.s.cs'): |
| 228 | mc_output = run_mc(archs[arch], code, ['-triple=mips64el', '-mcpu=mips64r2'], mc_option) |
| 229 | else: |
| 230 | mc_output = run_mc(archs[arch], code, mc_modes[(arch, mode)], mc_option) |
| 231 | mc_output2 = normalize_hex(mc_output) |
Nguyen Anh Quynh | 4c36374 | 2014-11-11 12:51:57 +0800 | [diff] [blame] | 232 | |
Nguyen Anh Quynh | df7dde2 | 2014-11-10 21:50:54 +0800 | [diff] [blame] | 233 | if arch == 'CS_ARCH_MIPS': |
| 234 | mc_output2 = mc_output2.replace(' 0(', '(') |
Nguyen Anh Quynh | 4c36374 | 2014-11-11 12:51:57 +0800 | [diff] [blame] | 235 | |
| 236 | if arch == 'CS_ARCH_PPC': |
| 237 | mc_output2 = mc_output2.replace('.+', '') |
| 238 | mc_output2 = mc_output2.replace('.', '') |
| 239 | mc_output2 = mc_output2.replace(' 0(', '(') |
| 240 | |
Nguyen Anh Quynh | df7dde2 | 2014-11-10 21:50:54 +0800 | [diff] [blame] | 241 | mc_output2 = mc_output2.replace(' ', '') |
| 242 | mc_output2 = mc_output2.replace('opaque', '') |
| 243 | |
| 244 | |
| 245 | if (cs_output2 != mc_output2): |
| 246 | asm = asm.replace(' ', '').strip().lower() |
| 247 | if asm != cs_output2: |
| 248 | print("Mismatch: %s" %line.strip()) |
| 249 | print("\tMC = %s" %mc_output) |
| 250 | print("\tCS = %s" %cs_output) |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 251 | |
| 252 | |
| 253 | if __name__ == '__main__': |
| 254 | if len(sys.argv) == 1: |
| 255 | fnames = sys.stdin.readlines() |
| 256 | for fname in fnames: |
| 257 | test_file(fname.strip()) |
| 258 | else: |
| 259 | #print("Usage: ./test_mc.py <input-file.s.cs>") |
| 260 | test_file(sys.argv[1]) |
Nguyen Anh Quynh | 8ba7250 | 2014-11-07 17:24:01 +0800 | [diff] [blame] | 261 | |