suite: update test_mc.py to better handle output of different formats of MC & CS
diff --git a/suite/test_mc.py b/suite/test_mc.py
index 6e08822..6e7b64d 100755
--- a/suite/test_mc.py
+++ b/suite/test_mc.py
@@ -4,17 +4,44 @@
 from subprocess import Popen, PIPE, STDOUT
 from capstone import *
 
+
+# convert all hex numbers to decimal numbers in a text
+def normalize_hex(a):
+    while(True):
+        i = a.find('0x')
+        if i == -1: # no more hex number
+            break
+        hexnum = '0x'
+        for c in a[i + 2:]:
+            if c in '0123456789abcdefABCDEF':
+                hexnum += c
+            else:
+                break
+        num = int(hexnum, 16)
+        a = a.replace(hexnum, str(num))
+    return a
+
+
 def run_mc(arch, hexcode, option, syntax=None):
     def normalize(text):
         # remove tabs
+        text = text.lower()
         items = text.split()
         text = ' '.join(items)
         if arch == CS_ARCH_X86:
             # remove comment after #
             i = text.find('# ')
             if i != -1:
-                return text[:i].lower()
-        return text.lower()
+                return text[:i].strip()
+        if arch == CS_ARCH_ARM64:
+            # remove comment after #
+            i = text.find('// ')
+            if i != -1:
+                return text[:i].strip()
+        # remove some redundant spaces
+        text = text.replace('{ ', '{')
+        text = text.replace(' }', '}')
+        return text.strip()
 
     #print("Trying to decode: %s" %hexcode)
     if syntax:
@@ -32,7 +59,7 @@
     #print lines
     if 'invalid' in lines[0]:
         #print 'invalid ----'
-        return 'FAILED to disassemble'
+        return 'FAILED to disassemble (MC)'
     else:
         #print 'OK:', lines[1]
         return normalize(lines[1].strip())
@@ -73,6 +100,9 @@
         "0": CS_MODE_ARM,
         "CS_MODE_ARM": CS_MODE_ARM,
         "CS_MODE_THUMB": CS_MODE_THUMB,
+        "CS_MODE_ARM+CS_MODE_V8": CS_MODE_ARM+CS_MODE_V8,
+        "CS_MODE_THUMB+CS_MODE_V8": CS_MODE_THUMB+CS_MODE_V8,
+        "CS_MODE_THUMB+CS_MODE_MCLASS": CS_MODE_THUMB+CS_MODE_MCLASS,
         "CS_MODE_LITTLE_ENDIAN": CS_MODE_LITTLE_ENDIAN,
         "CS_MODE_BIG_ENDIAN": CS_MODE_BIG_ENDIAN,
         "CS_MODE_32+CS_MODE_BIG_ENDIAN": CS_MODE_32+CS_MODE_BIG_ENDIAN,
@@ -94,7 +124,10 @@
         ("CS_ARCH_X86", "CS_MODE_32"): ['-triple=i386'],
         ("CS_ARCH_X86", "CS_MODE_64"): ['-triple=x86_64'],
         ("CS_ARCH_ARM", "CS_MODE_ARM"): ['-triple=armv7'],
-        ("CS_ARCH_ARM", "CS_MODE_THUMB"): ['-triple=armv7'],
+        ("CS_ARCH_ARM", "CS_MODE_THUMB"): ['-triple=thumbv7'],
+        ("CS_ARCH_ARM", "CS_MODE_ARM+CS_MODE_V8"): ['-triple=armv8'],
+        ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_V8"): ['-triple=thumbv8'],
+        ("CS_ARCH_ARM", "CS_MODE_THUMB+CS_MODE_MCLASS"): ['-triple=thumbv7m'],
         ("CS_ARCH_ARM64", "0"): ['-triple=aarch64'],
         ("CS_ARCH_MIPS", "CS_MODE_32+CS_MODE_BIG_ENDIAN"): ['-triple=mips'],
         ("CS_ARCH_MIPS", "CS_MODE_32+CS_MODE_MICRO"): ['-triple=mipsel', '-mattr=+micromips'],
@@ -106,7 +139,7 @@
         ("CS_ARCH_PPC", "CS_MODE_BIG_ENDIAN"): ['-triple=powerpc64'],
         ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN'): ['-triple=sparc'],
         ('CS_ARCH_SPARC', 'CS_MODE_BIG_ENDIAN+CS_MODE_V9'): ['-triple=sparcv9'],
-        ('CS_ARCH_SYSZ', '0'): ['-triple=s390x'],
+        ('CS_ARCH_SYSZ', '0'): ['-triple=s390x', '-mcpu=z196'],
     }
 
     #if not option in ('', 'None'):
@@ -120,15 +153,19 @@
         # tell llvm-mc to use Intel syntax
         mc_option = '-output-asm-variant=1'
 
-    if arch == 'CS_ARCH_ARM':
+    if arch == 'CS_ARCH_ARM' or arch == 'CS_ARCH_PPC' :
         md.syntax = CS_OPT_SYNTAX_NOREGNAME
 
+    if fname.endswith('3DNow.s.cs'):
+        md.syntax = CS_OPT_SYNTAX_ATT
+
     for line in lines[1:]:
         # ignore all the input lines having # in front.
         if line.startswith('#'):
             continue
         #print("Check %s" %line)
         code = line.split(' = ')[0]
+        asm  = ''.join(line.split(' = ')[1:])
         hex_code = code.replace('0x', '')
         hex_code = hex_code.replace(',', '')
         hex_data = hex_code.decode('hex')
@@ -143,11 +180,63 @@
         else:
             cs_output = 'FAILED to disassemble'
 
-        mc_output = run_mc(archs[arch], code, mc_modes[(arch, mode)], mc_option)
-        if (cs_output != mc_output):
-            print("Mismatch: %s" %code)
-            print("\tMC = %s" %mc_output)
-            print("\tCS = %s" %cs_output)
+        cs_output2 = normalize_hex(cs_output)
+        cs_output2 = cs_output2.replace(' ', '')
+
+        if arch == 'CS_ARCH_MIPS':
+            # normalize register alias names
+            cs_output2 = cs_output2.replace('$at', '$1')
+            cs_output2 = cs_output2.replace('$v0', '$2')
+            cs_output2 = cs_output2.replace('$v1', '$3')
+
+            cs_output2 = cs_output2.replace('$a0', '$4')
+            cs_output2 = cs_output2.replace('$a1', '$5')
+            cs_output2 = cs_output2.replace('$a2', '$6')
+            cs_output2 = cs_output2.replace('$a3', '$7')
+
+            cs_output2 = cs_output2.replace('$t0', '$8')
+            cs_output2 = cs_output2.replace('$t1', '$9')
+            cs_output2 = cs_output2.replace('$t2', '$10')
+            cs_output2 = cs_output2.replace('$t3', '$11')
+            cs_output2 = cs_output2.replace('$t4', '$12')
+            cs_output2 = cs_output2.replace('$t5', '$13')
+            cs_output2 = cs_output2.replace('$t6', '$14')
+            cs_output2 = cs_output2.replace('$t7', '$15')
+            cs_output2 = cs_output2.replace('$t8', '$24')
+            cs_output2 = cs_output2.replace('$t9', '$25')
+
+            cs_output2 = cs_output2.replace('$s0', '$16')
+            cs_output2 = cs_output2.replace('$s1', '$17')
+            cs_output2 = cs_output2.replace('$s2', '$18')
+            cs_output2 = cs_output2.replace('$s3', '$19')
+            cs_output2 = cs_output2.replace('$s4', '$20')
+            cs_output2 = cs_output2.replace('$s5', '$21')
+            cs_output2 = cs_output2.replace('$s6', '$22')
+            cs_output2 = cs_output2.replace('$s7', '$23')
+
+            cs_output2 = cs_output2.replace('$k0', '$26')
+            cs_output2 = cs_output2.replace('$k1', '$27')
+
+        #print("Running MC ...")
+        if fname.endswith('thumb-fp-armv8.s.cs'):
+            mc_output = run_mc(archs[arch], code, ['-triple=thumbv8'], mc_option)
+        elif fname.endswith('mips64-alu-instructions.s.cs'):
+            mc_output = run_mc(archs[arch], code, ['-triple=mips64el', '-mcpu=mips64r2'], mc_option)
+        else:
+            mc_output = run_mc(archs[arch], code, mc_modes[(arch, mode)], mc_option)
+        mc_output2 = normalize_hex(mc_output)
+        if arch == 'CS_ARCH_MIPS':
+            mc_output2 = mc_output2.replace(' 0(', '(')
+        mc_output2 = mc_output2.replace(' ', '')
+        mc_output2 = mc_output2.replace('opaque', '')
+
+
+        if (cs_output2 != mc_output2):
+            asm = asm.replace(' ', '').strip().lower()
+            if asm != cs_output2:
+                print("Mismatch: %s" %line.strip())
+                print("\tMC = %s" %mc_output)
+                print("\tCS = %s" %cs_output)
 
 
 if __name__ == '__main__':