scripts/convert-assembly-to-jit.py - platform/external/XNNPACK - Gitiles

 #!/usr/bin/env python3
 # Copyright 2021 Google LLC
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 """Converts hand written assembly (.S files) to C++ files using the JIT.

 Takes a single argument, an assembly file, and prints converted output to stdout.
 """

 import argparse
 import datetime
 import re
 import sys

 SPACES = r'\s*'
 COMMA = r',' + SPACES
 COMMENTS = SPACES + '((//\s+.+)|)$'
 WB = r'!'

 REG_NO_GROUP = r'r\d+|s\d+|d\d+|q\d+|sp|lr|pc|x\d+|(?:v\d+\.(?:\d+)?(?:d|s|h|b))'
 REG = r'(' + REG_NO_GROUP + ')'
 IMM_NO_GROUP = r'\d+'
 IMM = r'(' + IMM_NO_GROUP + ')'
 REG_LANE_NO_GROUP = r'(?:' + REG_NO_GROUP + r')\[' + IMM_NO_GROUP + r'\]'
 REG_OR_IMM = r'(' + REG_LANE_NO_GROUP + '|' + REG_NO_GROUP + '|' + IMM_NO_GROUP + ')'

 REGLIST_CONSEC = r'\{(\w+)-(\w+)\}' + SPACES
 REGLIST_INDIV = r'\{([\w.]+(?:,\s+[\w.]+)*)\}' + SPACES
 REGLIST_INDIV_REPLICATE = r'\{(\w+(?:\[\])(,\s*\w+(?:\[\]))*)\}' + SPACES
 REGLIST_INDEX = r'\{(' + REG_LANE_NO_GROUP + ')\}' + SPACES

 APSR = 'APSR_nzcv'
 FPSCR = '(FPSCR)'

 MEMOP = r'\[' + SPACES + REG + '\]' + SPACES
 MEMOP_MAYBE_WB = r'\[' + SPACES + REG + '\]' + f'({WB})?'
 MEMOP_OFFSET = r'\[' + REG + COMMA + '(-?\d+)\]' + SPACES
 MEMOP_OFFSET_MAYBE_WB = r'\[' + REG + COMMA + '(-?\d+)\]' + f'({WB})?' + SPACES

 B_IMM = r'(\d+)(f|b)'

 INSTR = SPACES + r'([A-Z0-9.]+)' + SPACES

 # e.g. #ifndef __APPLE__
 IFDEF_RE = re.compile(r'\s*#(ifndef|endif|ifdef).*')
 # e.g. # Push 96 bytes
 COMMENT_RE = re.compile(SPACES + r'((//|#)\s*.+)')
 # e.g. 0:
 LABEL = re.compile(r'(\w+):')
 # e.g. NOP
 INSTR_RE = re.compile(INSTR + COMMENTS)
 # e.g. VPUSH {d8-d15}
 INSTR_REGLIST_CONSEC_RE = re.compile(INSTR + REGLIST_CONSEC + COMMENTS)
 # e.g. PUSH {r4, r5}
 INSTR_REGLIST_LIST_RE = re.compile(INSTR + REGLIST_INDIV + COMMENTS)
 # e.g. BX lr
 INSTR_OP_RE = re.compile(INSTR + REG + COMMENTS)
 # e.g. BLO 2f
 INSTR_B_IMM = re.compile(INSTR + B_IMM + COMMENTS)
 # e.g. TBNZ x0, 4, 5f
 INSTR_B_REG_IMM_IMM = re.compile(INSTR + REG + COMMA + IMM + COMMA + B_IMM + COMMENTS)
 # e.g. .p2align 3
 P2ALIGN_RE = re.compile(SPACES + r'\.p2align\s+(\d+)')
 # e.g. CMP r0, 2
 INSTR_REG_IMM_RE = re.compile(INSTR + REG + COMMA + IMM + COMMENTS)
 # e.g. LDR r0, [r12]
 INSTR_REG_MEMOP_RE = re.compile(INSTR + REG + COMMA + MEMOP + COMMENTS)
 # e.g. LDR q0, [x4], 16
 INSTR_REG_MEMOP_IMM_RE = re.compile(INSTR + REG + COMMA + MEMOP + COMMA + IMM + COMMENTS)
 # e.g. LDR r0, [sp, 112]
 INSTR_REG_MEMOP_OFFSET_RE = re.compile(INSTR + REG + COMMA + MEMOP_OFFSET +
                                        COMMENTS)
 # e.g. LDRD r6, r7, [sp]
 INSTR_REG_REG_MEMOP_RE = re.compile(INSTR + REG + COMMA + REG + COMMA +
                                            MEMOP + COMMENTS)
 # e.g. LDRD r6, r7, [sp, 104], STP d8, d9, [sp, -64]!
 INSTR_REG_REG_MEMOP_OFFSET_RE = re.compile(INSTR + REG + COMMA + REG + COMMA +
                                            MEMOP_OFFSET_MAYBE_WB + COMMENTS)
 # e.g. LDP q20, q21, [x5], 32
 INSTR_REG_REG_MEMOP_IMM_RE = re.compile(INSTR + REG + COMMA + REG + COMMA +
                                            MEMOP + COMMA + IMM + COMMENTS)
 # e.g. PLD [r4, 64]
 INSTR_MEMOP_OFFSET_RE = re.compile(INSTR + MEMOP_OFFSET + COMMENTS)
 # e.g. movlo r12, r3, vdup.32 q0, d14[0]
 INSTR_REG_REG_RE = re.compile(INSTR + REG + COMMA + REG_OR_IMM + COMMENTS)
 # e.g. SUBS r5, r2, 16 or SUBS r5, r2, r10 or VMLFA.F32 q8, q4, d0[0]
 INSTR_REG_REG_REG_RE = re.compile(INSTR + REG + COMMA + REG + COMMA +
                                   REG_OR_IMM + COMMENTS)
 # e.g. VEXT.8  q0, q0, q0, 4
 INSTR_REG_REG_REG_IMM_RE = re.compile(INSTR + REG + COMMA + REG + COMMA + REG +
                                       COMMA + IMM + COMMENTS)
 # e.g. VST1.32 {d16}, [r11], r0
 INSTR_REGLIST_INDIV_MEMOP_REG = re.compile(INSTR + REGLIST_INDIV + COMMA +
                                            MEMOP + COMMA + REG + COMMENTS)
 # e.g. VST1.32 {d16-d19}, [r11], r0
 INSTR_REGLIST_CONSEC_MEMOP_REG = re.compile(INSTR + REGLIST_CONSEC + COMMA +
                                             MEMOP + COMMA + REG + COMMENTS)
 # e.g. VLDM r9, {d16-d19}
 INSTR_REG_REGLIST_CONSECT = re.compile(INSTR + REG + COMMA + REGLIST_CONSEC +
                                        COMMENTS)
 # e.g. VLDM r9!, {d16-d19}
 INSTR_REG_REGLIST_CONSECT_WB = re.compile(INSTR + REG + WB + COMMA +
                                           REGLIST_CONSEC + COMMENTS)
 # e.g. VLDM r9!, {d16}
 INSTR_REG_REGLIST_INDIV_WB = re.compile(INSTR + REG + WB + COMMA +
                                         REGLIST_INDIV + COMMENTS)
 # e.g. VLD1.32 {d0}, [r3]{!}
 INSTR_REGLIST_INDIV_MEMOP = re.compile(INSTR + REGLIST_INDIV + COMMA +
                                        MEMOP_MAYBE_WB + COMMENTS)
 # e.g. LD1 {v16.16b, v17.16b, v18.16b}, [x5], 48
 INSTR_REGLIST_INDIV_MEMOP_IMM = re.compile(INSTR + REGLIST_INDIV + COMMA +
                                        MEMOP + COMMA + IMM + COMMENTS)
 # e.g. VST1.32 {d24-d25}, [r11]{!}
 INSTR_REGLIST_CONSEC_MEMOP = re.compile(INSTR + REGLIST_CONSEC + COMMA +
                                         MEMOP_MAYBE_WB + COMMENTS)
 # e.g. VLD1.32 {d0[]}, [r3]!
 INSTR_REGLIST_REPLICATE_MEMOP = re.compile(INSTR + REGLIST_INDIV_REPLICATE +
                                            COMMA + MEMOP + r'(!)?' + COMMENTS)
 # e.g. VST1.32 {d16[0]}, [r11]{!}
 INSTR_REGLIST_INDEX_MEMOP = re.compile(INSTR + REGLIST_INDEX + COMMA +
                                        MEMOP_MAYBE_WB + COMMENTS)
 # e.g. VMRS APSR_nzcv, FPSCR
 INSTR_REG_FPSCR = re.compile(INSTR + f'({APSR}|{REG_NO_GROUP})' + COMMA +
                              FPSCR + COMMENTS)

 # e.g. PRFM PLDL1KEEP, [x5]
 INSTR_PLD_MEMOP = re.compile(INSTR + f'(PLDL1KEEP)' + COMMA + MEMOP + COMMENTS)
 # e.g. PRFM PLDL1KEEP, [x5, 64]
 INSTR_PLD_MEMOP_OFFSET = re.compile(INSTR + f'(PLDL1KEEP)' + COMMA + MEMOP_OFFSET + COMMENTS)

 COND = r'([A-Z]+)'
 # e.g. CSEL x9, x3, x9, LO
 INSTR_REG_REG_REG_COND_RE = re.compile(INSTR + REG + COMMA + REG + COMMA + REG + COMMA + COND + COMMENTS)


 def remove_brackets(s):
   return s.replace('[', '').replace(']', '')


 def fix_replicate_instruction(s):
   return re.sub(r'_(\d+)', r'r_\1', s, 1)


 def fix_instr_name(s):
   return s.lower().replace('.', '_', 2).replace('and', 'and_', 1)


 def fix_comments(s):
   return s.replace('#', '//', 1)


 def maybe_wb(wb):
   return '++' if wb else ''


 def fix_fn_name(name):
   if name.startswith('xnn_'):
     name = name[len('xnn_'):]
   # remove any type of activations from name
   if 'minmax' in name:
     name = name.replace('minmax_', '')
   return f'xnn_generate_{name}'


 def fix_regs(regs):
   # Vector registers with datatype need to be method calls.
   # e.g. v2.4s -> v2.v4s(), v2.s -> v2.s()
   def repl(m):
     if m.group(2):
       return f'{m[1]}v{m[2]}{m[3]}()'
     else:
       return f'{m[1]}{m[3]}()'
   return re.sub(r'(\w+\.)(\d+)?(\w+)', repl, regs)


 IGNORE_LINES = [r'\s*\.\w+']

 AARCH32 = 'aarch32'
 AARCH64 = 'aarch64'
 GEMM = 'GEMM'
 IGEMM = 'IGEMM'

 def main(input_file):
   arch = None
   kernel_type = GEMM

   if 'aarch32' in input_file:
     arch = AARCH32
   elif 'aarch64' in input_file:
     arch = AARCH64
   else:
     print('ERROR: unknown architecture')
     sys.exit(1)

   if 'igemm' in input_file:
     kernel_type = IGEMM

   # Whether we are in the copyright section.
   in_copyright = False
   # Whether we are in the microkernel function.
   in_function = False
   # Instructions that make up the microkernel.
   instructions = []
   # Lines of code or comments before the actual function body.
   prologue = []
   # All labels need to be declared first, collect them and output them after
   # function signature.
   labels = []
   # Name of the microkernel function.
   fn_name = ''
   sc = ';'
   # Whether we are in the auto-generated comment.
   in_autogen = False

   with open(input_file, 'r', encoding='utf-8') as f:
     for line in f:
       line = line.rstrip()

       # Handle all lines before the microkernel instructions begin.
       if not in_function:
         if 'Auto-generated file' in line:
           in_autogen = True
           continue
         elif 'BEGIN_FUNCTION' in line:
           in_function = True
           fn_name = line.split()[1]
           prologue.append(f'// Converted from: {input_file[20:]}')
           if kernel_type == GEMM:
             prologue.append('void Generator::generate(size_t nc, size_t kc, void* params) {')
           else:
             prologue.append('void Generator::generate(size_t nc, size_t kc, size_t ks, void* params) {')
           continue
         elif 'Copyright ' in line:
           in_autogen = False
           # replace year
           prologue.append(
               re.sub('\d{4}', str(datetime.date.today().year), line,
                      1).rstrip())
           continue
         elif '#include <xnnpack/assembly.h>' in line:
           prologue.append(f'#include <cstddef>')
           prologue.append('')
           prologue.append(f'#include <xnnpack/{arch}-assembler.h>')
           prologue.append('#include <xnnpack/allocator.h>')
           if kernel_type == GEMM:
             prologue.append('#include <xnnpack/gemm.h>')
           else:
             prologue.append('#include <xnnpack/igemm.h>')
           prologue.append('')
           prologue.append('namespace xnnpack {')
           prologue.append(f'namespace {arch} {{')
           prologue.append('namespace {')
           prologue.append('class Generator : public Assembler {')
           prologue.append('  using Assembler::Assembler;')
           prologue.append(' public:')
           if kernel_type == GEMM:
             prologue.append('  void generate(size_t nc, size_t kc, void* params);')
           else:
             prologue.append('  void generate(size_t nc, size_t kc, size_t ks, void* params);')
           prologue.append('};')
           continue
         elif any(re.fullmatch(p, line) for p in IGNORE_LINES):
           continue
         elif in_autogen:
           continue
         else:
           prologue.append(fix_comments(line.rstrip()))
           continue

       # We are now in the microkernel function body.
       # Don't keep the ifdefs.
       m = re.fullmatch(IFDEF_RE, line)
       if m:
         continue
       # But keep other comments.
       m = re.fullmatch(COMMENT_RE, line)
       if m:
         instructions.append(m[1])
         continue

       m = re.fullmatch(LABEL, line)
       if m:
         labels.append(m[1])
         instructions.append(f'bind(l{m[1]}){sc}')
         continue
       m = re.fullmatch(INSTR_RE, line)
       if m:
         instructions.append(f'{fix_instr_name(m[1])}(){sc} {m[2]}')
         continue
       m = re.fullmatch(INSTR_OP_RE, line)
       if m:
         instructions.append(f'{fix_instr_name(m[1])}({m[2]}){sc} {m[3]}')
         continue
       m = re.fullmatch(INSTR_REGLIST_CONSEC_MEMOP_REG, line)
       if m:
         instructions.append(
             f'{fix_instr_name(m[1])}({{{m[2]}-{m[3]}}}, mem[{m[4]}], {m[5]}){sc} {m[6]}'
         )
         continue
       m = re.fullmatch(INSTR_REGLIST_INDIV_MEMOP_REG, line)
       if m:
         instructions.append(
             f'{fix_instr_name(m[1])}({{{fix_regs(m[2])}}}, mem[{m[3]}], {m[4]}){sc} {m[5]}')
         continue
       m = re.fullmatch(INSTR_REGLIST_CONSEC_RE, line)
       if m:
         instructions.append(f'{fix_instr_name(m[1])}({{{m[2]}-{m[3]}}}){sc} {m[4]}')
         continue
       m = re.fullmatch(INSTR_REGLIST_LIST_RE, line)
       if m:
         instructions.append(f'{fix_instr_name(m[1])}({{{m[2]}}}){sc} {m[3]}')
         continue
       m = re.fullmatch(INSTR_MEMOP_OFFSET_RE, line)
       if m:
         instructions.append(f'{fix_instr_name(m[1])}(mem[{m[2]}, {m[3]}]){sc} {m[4]}')
         continue
       m = re.fullmatch(INSTR_REG_MEMOP_RE, line)
       if m:
         instructions.append(f'{fix_instr_name(m[1])}({m[2]}, mem[{m[3]}]){sc} {m[4]}')
         continue
       m = re.fullmatch(INSTR_REG_MEMOP_IMM_RE , line)
       if m:
         instructions.append(f'{fix_instr_name(m[1])}({m[2]}, mem[{m[3]}], {m[4]}){sc} {m[5]}')
         continue
       m = re.fullmatch(INSTR_REG_MEMOP_OFFSET_RE, line)
       if m:
         instructions.append(
             f'{fix_instr_name(m[1])}({m[2]}, mem[{m[3]}, {m[4]}]){sc} {m[5]}')
         continue
       m = re.fullmatch(INSTR_REG_REG_MEMOP_RE, line)
       if m:
         instructions.append(
             f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, mem[{m[4]}]){sc} {m[5]}')
         continue
       m = re.fullmatch(INSTR_REG_REG_MEMOP_OFFSET_RE, line)
       if m:
         if m[6]: # wb
           instructions.append(
               f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, mem[{m[4]}, {m[5]}]++){sc} {m[7]}')
         else: #no wb
           instructions.append(
               f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, mem[{m[4]}, {m[5]}]){sc} {m[7]}')
         continue
       m = re.fullmatch(INSTR_REG_REG_MEMOP_IMM_RE , line)
       if m:
         instructions.append(
             f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, mem[{m[4]}], {m[5]}){sc} {m[6]}')
         continue
       m = re.fullmatch(INSTR_REG_IMM_RE, line)
       if m:
         instructions.append(f'{fix_instr_name(m[1])}({fix_regs(m[2])}, {m[3]}){sc} {m[4]}')
         continue
       m = re.fullmatch(INSTR_REG_REG_REG_RE, line)
       if m:
         instructions.append(
             f'{fix_instr_name(m[1])}({fix_regs(m[2])}, {fix_regs(m[3])}, {fix_regs(m[4])}){sc} {m[5]}')
         continue
       m = re.fullmatch(INSTR_REG_REG_REG_IMM_RE, line)
       if m:
         instructions.append(
             f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, {m[4]}, {m[5]}){sc} {m[6]}')
         continue
       m = re.fullmatch(INSTR_REG_REG_RE, line)
       if m:
         instructions.append(f'{fix_instr_name(m[1])}({fix_regs(m[2])}, {fix_regs(m[3])}){sc} {m[4]}')
         continue
       m = re.fullmatch(INSTR_REG_REGLIST_CONSECT, line)
       if m:
         instructions.append(
             f'{fix_instr_name(m[1])}(mem[{m[2]}], {{{m[3]}-{m[4]}}}){sc} {m[5]}')
         continue
       m = re.fullmatch(INSTR_REG_REGLIST_CONSECT_WB, line)
       if m:
         instructions.append(
             f'{fix_instr_name(m[1])}(mem[{m[2]}]++, {{{m[3]}-{m[4]}}}){sc} {m[5]}')
         continue
       m = re.fullmatch(INSTR_REG_REGLIST_INDIV_WB, line)
       if m:
         instructions.append(
             f'{fix_instr_name(m[1])}(mem[{m[2]}]++, {{{m[3]}}}){sc} {m[4]}')
         continue
       m = re.fullmatch(INSTR_B_IMM, line)
       if m:
         instructions.append(f'{fix_instr_name(m[1])}(l{m[2]}){sc} {m[4]}')
         continue
       m = re.fullmatch(INSTR_B_REG_IMM_IMM , line)
       if m:
         instructions.append(f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, l{m[4]}){sc} {m[6]}')
         continue
       m = re.fullmatch(INSTR_REGLIST_INDIV_MEMOP, line)
       if m:
         instructions.append(
             f'{fix_instr_name(m[1])}({{{fix_regs(m[2])}}}, mem[{m[3]}]{maybe_wb(m[4])}){sc} {m[5]}'
         )
         continue
       m = re.fullmatch(INSTR_REGLIST_INDIV_MEMOP_IMM, line)
       if m:
         instructions.append(
             f'{fix_instr_name(m[1])}({{{fix_regs(m[2])}}}, mem[{m[3]}], {m[4]}){sc} {m[5]}'
         )
         continue
       m = re.fullmatch(INSTR_REGLIST_CONSEC_MEMOP, line)
       if m:
         instructions.append(
             f'{fix_instr_name(m[1])}({{{m[2]}-{m[3]}}}, mem[{m[4]}]{maybe_wb(m[5])}){sc} {m[6]}'
         )
         continue
       m = re.fullmatch(INSTR_REGLIST_REPLICATE_MEMOP, line)
       if m:
         if m[5]:
           instructions.append(
               f'{fix_replicate_instruction(fix_instr_name(m[1]))}({{{remove_brackets(m[2])}}}, mem[{m[4]}]++){sc} {m[6]}'
           )
         else:
           instructions.append(
               f'{fix_replicate_instruction(fix_instr_name(m[1]))}({{{remove_brackets(m[2])}}}, mem[{m[4]}]){sc} {m[6]}'
           )
         continue
       m = re.fullmatch(INSTR_REGLIST_INDEX_MEMOP, line)
       if m:
         instructions.append(
             f'{fix_instr_name(m[1])}({{{m[2]}}}, mem[{m[3]}]{maybe_wb(m[4])}){sc} {m[5]}'
         )
         continue
       m = re.fullmatch(P2ALIGN_RE, line)
       if m:
         instructions.append(f'align({1 << int(m[1])}){sc}')
         continue
       m = re.fullmatch(INSTR_REG_FPSCR, line)
       if m:
         instructions.append(f'{fix_instr_name(m[1])}({m[2]}, {m[3]}){sc} {m[4]}')
         continue
       m = re.fullmatch(INSTR_PLD_MEMOP, line)
       if m:
         instructions.append(f'{fix_instr_name(m[1])}(k{m[2]}, mem[{m[3]}]){sc} {m[4]}')
         continue
       m = re.fullmatch(INSTR_PLD_MEMOP_OFFSET, line)
       if m:
         instructions.append(f'{fix_instr_name(m[1])}(k{m[2]}, mem[{m[3]}, {m[4]}]){sc} {m[5]}')
         continue
       m = re.fullmatch(INSTR_REG_REG_REG_COND_RE, line)
       if m:
         instructions.append(f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, {m[4]}, k{m[5]}){sc} {m[6]}')
         continue

       # Keep empty lines for formatting
       if line.strip() == '':
         instructions.append('')
         continue

       # Assembly directives that we don't are about.
       if line.strip().startswith('.'):
         continue

       if line.startswith('END_FUNCTION'):
         continue

       # All other lines are error.
       print(f'ERROR: {line}', file=sys.stderr)
       sys.exit(1)

   # Actually emit the JIT codegen (to stdout).
   for p in prologue:
     print(p)

   labels_str = ', '.join(f'l{l}' for l in labels)
   print(f'  Label {labels_str};')
   print()

   indent = '  '
   for i in instructions:
     if i.strip().startswith('#'):
       print(indent + fix_comments(i))
     elif i.strip().startswith('//'):
       print(indent + i)
     elif i.strip() == '':
       print()
     else:
       print(indent + (i).rstrip())

   print('}')
   print('}  // namespace')
   print(f'}}  // {arch}')
   print('}  // xnnpack')
   print('')
   if kernel_type == GEMM:
     print(f'xnn_status {fix_fn_name(fn_name)}(xnn_code_buffer* code, size_t nc, size_t kc, const void* params) {{')
   else:
     print(f'xnn_status {fix_fn_name(fn_name)}(xnn_code_buffer* code, size_t nc, size_t kc, size_t ks, const void* params) {{')
   print(f'  using namespace xnnpack::{arch};')
   print('  Generator g(code);')
   if kernel_type == GEMM:
     print('  g.generate(nc, kc, nullptr);')
   else:
     print('  g.generate(nc, kc, ks, nullptr);')
   print('  g.finalize();')
   print('  if (g.error() != xnnpack::Error::kNoError) {')
   print('    return xnn_status_invalid_state;')
   print('  }')
   print('  return xnn_status_success;')
   print('}')


 if __name__ == '__main__':
   parser = argparse.ArgumentParser(description='Convert assembly to to JIT C++, writes to stdout.')
   parser.add_argument('input_file', help='Input assembly filename')
   args = parser.parse_args()
   main(args.input_file)
	#!/usr/bin/env python3
	# Copyright 2021 Google LLC
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.
	"""Converts hand written assembly (.S files) to C++ files using the JIT.

	Takes a single argument, an assembly file, and prints converted output to stdout.
	"""

	import argparse
	import datetime
	import re
	import sys

	SPACES = r'\s*'
	COMMA = r',' + SPACES
	COMMENTS = SPACES + '((//\s+.+)\|)$'
	WB = r'!'

	REG_NO_GROUP = r'r\d+\|s\d+\|d\d+\|q\d+\|sp\|lr\|pc\|x\d+\|(?:v\d+\.(?:\d+)?(?:d\|s\|h\|b))'
	REG = r'(' + REG_NO_GROUP + ')'
	IMM_NO_GROUP = r'\d+'
	IMM = r'(' + IMM_NO_GROUP + ')'
	REG_LANE_NO_GROUP = r'(?:' + REG_NO_GROUP + r')\[' + IMM_NO_GROUP + r'\]'
	REG_OR_IMM = r'(' + REG_LANE_NO_GROUP + '\|' + REG_NO_GROUP + '\|' + IMM_NO_GROUP + ')'

	REGLIST_CONSEC = r'\{(\w+)-(\w+)\}' + SPACES
	REGLIST_INDIV = r'\{([\w.]+(?:,\s+[\w.]+)*)\}' + SPACES
	REGLIST_INDIV_REPLICATE = r'\{(\w+(?:\[\])(,\s\w+(?:\[\])))\}' + SPACES
	REGLIST_INDEX = r'\{(' + REG_LANE_NO_GROUP + ')\}' + SPACES

	APSR = 'APSR_nzcv'
	FPSCR = '(FPSCR)'

	MEMOP = r'\[' + SPACES + REG + '\]' + SPACES
	MEMOP_MAYBE_WB = r'\[' + SPACES + REG + '\]' + f'({WB})?'
	MEMOP_OFFSET = r'\[' + REG + COMMA + '(-?\d+)\]' + SPACES
	MEMOP_OFFSET_MAYBE_WB = r'\[' + REG + COMMA + '(-?\d+)\]' + f'({WB})?' + SPACES

	B_IMM = r'(\d+)(f\|b)'

	INSTR = SPACES + r'([A-Z0-9.]+)' + SPACES

	# e.g. #ifndef __APPLE__
	IFDEF_RE = re.compile(r'\s#(ifndef\|endif\|ifdef).')
	# e.g. # Push 96 bytes
	COMMENT_RE = re.compile(SPACES + r'((//\|#)\s*.+)')
	# e.g. 0:
	LABEL = re.compile(r'(\w+):')
	# e.g. NOP
	INSTR_RE = re.compile(INSTR + COMMENTS)
	# e.g. VPUSH {d8-d15}
	INSTR_REGLIST_CONSEC_RE = re.compile(INSTR + REGLIST_CONSEC + COMMENTS)
	# e.g. PUSH {r4, r5}
	INSTR_REGLIST_LIST_RE = re.compile(INSTR + REGLIST_INDIV + COMMENTS)
	# e.g. BX lr
	INSTR_OP_RE = re.compile(INSTR + REG + COMMENTS)
	# e.g. BLO 2f
	INSTR_B_IMM = re.compile(INSTR + B_IMM + COMMENTS)
	# e.g. TBNZ x0, 4, 5f
	INSTR_B_REG_IMM_IMM = re.compile(INSTR + REG + COMMA + IMM + COMMA + B_IMM + COMMENTS)
	# e.g. .p2align 3
	P2ALIGN_RE = re.compile(SPACES + r'\.p2align\s+(\d+)')
	# e.g. CMP r0, 2
	INSTR_REG_IMM_RE = re.compile(INSTR + REG + COMMA + IMM + COMMENTS)
	# e.g. LDR r0, [r12]
	INSTR_REG_MEMOP_RE = re.compile(INSTR + REG + COMMA + MEMOP + COMMENTS)
	# e.g. LDR q0, [x4], 16
	INSTR_REG_MEMOP_IMM_RE = re.compile(INSTR + REG + COMMA + MEMOP + COMMA + IMM + COMMENTS)
	# e.g. LDR r0, [sp, 112]
	INSTR_REG_MEMOP_OFFSET_RE = re.compile(INSTR + REG + COMMA + MEMOP_OFFSET +
	COMMENTS)
	# e.g. LDRD r6, r7, [sp]
	INSTR_REG_REG_MEMOP_RE = re.compile(INSTR + REG + COMMA + REG + COMMA +
	MEMOP + COMMENTS)
	# e.g. LDRD r6, r7, [sp, 104], STP d8, d9, [sp, -64]!
	INSTR_REG_REG_MEMOP_OFFSET_RE = re.compile(INSTR + REG + COMMA + REG + COMMA +
	MEMOP_OFFSET_MAYBE_WB + COMMENTS)
	# e.g. LDP q20, q21, [x5], 32
	INSTR_REG_REG_MEMOP_IMM_RE = re.compile(INSTR + REG + COMMA + REG + COMMA +
	MEMOP + COMMA + IMM + COMMENTS)
	# e.g. PLD [r4, 64]
	INSTR_MEMOP_OFFSET_RE = re.compile(INSTR + MEMOP_OFFSET + COMMENTS)
	# e.g. movlo r12, r3, vdup.32 q0, d14[0]
	INSTR_REG_REG_RE = re.compile(INSTR + REG + COMMA + REG_OR_IMM + COMMENTS)
	# e.g. SUBS r5, r2, 16 or SUBS r5, r2, r10 or VMLFA.F32 q8, q4, d0[0]
	INSTR_REG_REG_REG_RE = re.compile(INSTR + REG + COMMA + REG + COMMA +
	REG_OR_IMM + COMMENTS)
	# e.g. VEXT.8 q0, q0, q0, 4
	INSTR_REG_REG_REG_IMM_RE = re.compile(INSTR + REG + COMMA + REG + COMMA + REG +
	COMMA + IMM + COMMENTS)
	# e.g. VST1.32 {d16}, [r11], r0
	INSTR_REGLIST_INDIV_MEMOP_REG = re.compile(INSTR + REGLIST_INDIV + COMMA +
	MEMOP + COMMA + REG + COMMENTS)
	# e.g. VST1.32 {d16-d19}, [r11], r0
	INSTR_REGLIST_CONSEC_MEMOP_REG = re.compile(INSTR + REGLIST_CONSEC + COMMA +
	MEMOP + COMMA + REG + COMMENTS)
	# e.g. VLDM r9, {d16-d19}
	INSTR_REG_REGLIST_CONSECT = re.compile(INSTR + REG + COMMA + REGLIST_CONSEC +
	COMMENTS)
	# e.g. VLDM r9!, {d16-d19}
	INSTR_REG_REGLIST_CONSECT_WB = re.compile(INSTR + REG + WB + COMMA +
	REGLIST_CONSEC + COMMENTS)
	# e.g. VLDM r9!, {d16}
	INSTR_REG_REGLIST_INDIV_WB = re.compile(INSTR + REG + WB + COMMA +
	REGLIST_INDIV + COMMENTS)
	# e.g. VLD1.32 {d0}, [r3]{!}
	INSTR_REGLIST_INDIV_MEMOP = re.compile(INSTR + REGLIST_INDIV + COMMA +
	MEMOP_MAYBE_WB + COMMENTS)
	# e.g. LD1 {v16.16b, v17.16b, v18.16b}, [x5], 48
	INSTR_REGLIST_INDIV_MEMOP_IMM = re.compile(INSTR + REGLIST_INDIV + COMMA +
	MEMOP + COMMA + IMM + COMMENTS)
	# e.g. VST1.32 {d24-d25}, [r11]{!}
	INSTR_REGLIST_CONSEC_MEMOP = re.compile(INSTR + REGLIST_CONSEC + COMMA +
	MEMOP_MAYBE_WB + COMMENTS)
	# e.g. VLD1.32 {d0[]}, [r3]!
	INSTR_REGLIST_REPLICATE_MEMOP = re.compile(INSTR + REGLIST_INDIV_REPLICATE +
	COMMA + MEMOP + r'(!)?' + COMMENTS)
	# e.g. VST1.32 {d16[0]}, [r11]{!}
	INSTR_REGLIST_INDEX_MEMOP = re.compile(INSTR + REGLIST_INDEX + COMMA +
	MEMOP_MAYBE_WB + COMMENTS)
	# e.g. VMRS APSR_nzcv, FPSCR
	INSTR_REG_FPSCR = re.compile(INSTR + f'({APSR}\|{REG_NO_GROUP})' + COMMA +
	FPSCR + COMMENTS)

	# e.g. PRFM PLDL1KEEP, [x5]
	INSTR_PLD_MEMOP = re.compile(INSTR + f'(PLDL1KEEP)' + COMMA + MEMOP + COMMENTS)
	# e.g. PRFM PLDL1KEEP, [x5, 64]
	INSTR_PLD_MEMOP_OFFSET = re.compile(INSTR + f'(PLDL1KEEP)' + COMMA + MEMOP_OFFSET + COMMENTS)

	COND = r'([A-Z]+)'
	# e.g. CSEL x9, x3, x9, LO
	INSTR_REG_REG_REG_COND_RE = re.compile(INSTR + REG + COMMA + REG + COMMA + REG + COMMA + COND + COMMENTS)


	def remove_brackets(s):
	return s.replace('[', '').replace(']', '')


	def fix_replicate_instruction(s):
	return re.sub(r'_(\d+)', r'r_\1', s, 1)


	def fix_instr_name(s):
	return s.lower().replace('.', '_', 2).replace('and', 'and_', 1)


	def fix_comments(s):
	return s.replace('#', '//', 1)


	def maybe_wb(wb):
	return '++' if wb else ''


	def fix_fn_name(name):
	if name.startswith('xnn_'):
	name = name[len('xnn_'):]
	# remove any type of activations from name
	if 'minmax' in name:
	name = name.replace('minmax_', '')
	return f'xnn_generate_{name}'


	def fix_regs(regs):
	# Vector registers with datatype need to be method calls.
	# e.g. v2.4s -> v2.v4s(), v2.s -> v2.s()
	def repl(m):
	if m.group(2):
	return f'{m[1]}v{m[2]}{m[3]}()'
	else:
	return f'{m[1]}{m[3]}()'
	return re.sub(r'(\w+\.)(\d+)?(\w+)', repl, regs)


	IGNORE_LINES = [r'\s*\.\w+']

	AARCH32 = 'aarch32'
	AARCH64 = 'aarch64'
	GEMM = 'GEMM'
	IGEMM = 'IGEMM'

	def main(input_file):
	arch = None
	kernel_type = GEMM

	if 'aarch32' in input_file:
	arch = AARCH32
	elif 'aarch64' in input_file:
	arch = AARCH64
	else:
	print('ERROR: unknown architecture')
	sys.exit(1)

	if 'igemm' in input_file:
	kernel_type = IGEMM

	# Whether we are in the copyright section.
	in_copyright = False
	# Whether we are in the microkernel function.
	in_function = False
	# Instructions that make up the microkernel.
	instructions = []
	# Lines of code or comments before the actual function body.
	prologue = []
	# All labels need to be declared first, collect them and output them after
	# function signature.
	labels = []
	# Name of the microkernel function.
	fn_name = ''
	sc = ';'
	# Whether we are in the auto-generated comment.
	in_autogen = False

	with open(input_file, 'r', encoding='utf-8') as f:
	for line in f:
	line = line.rstrip()

	# Handle all lines before the microkernel instructions begin.
	if not in_function:
	if 'Auto-generated file' in line:
	in_autogen = True
	continue
	elif 'BEGIN_FUNCTION' in line:
	in_function = True
	fn_name = line.split()[1]
	prologue.append(f'// Converted from: {input_file[20:]}')
	if kernel_type == GEMM:
	prologue.append('void Generator::generate(size_t nc, size_t kc, void* params) {')
	else:
	prologue.append('void Generator::generate(size_t nc, size_t kc, size_t ks, void* params) {')
	continue
	elif 'Copyright ' in line:
	in_autogen = False
	# replace year
	prologue.append(
	re.sub('\d{4}', str(datetime.date.today().year), line,
	1).rstrip())
	continue
	elif '#include <xnnpack/assembly.h>' in line:
	prologue.append(f'#include <cstddef>')
	prologue.append('')
	prologue.append(f'#include <xnnpack/{arch}-assembler.h>')
	prologue.append('#include <xnnpack/allocator.h>')
	if kernel_type == GEMM:
	prologue.append('#include <xnnpack/gemm.h>')
	else:
	prologue.append('#include <xnnpack/igemm.h>')
	prologue.append('')
	prologue.append('namespace xnnpack {')
	prologue.append(f'namespace {arch} {{')
	prologue.append('namespace {')
	prologue.append('class Generator : public Assembler {')
	prologue.append(' using Assembler::Assembler;')
	prologue.append(' public:')
	if kernel_type == GEMM:
	prologue.append(' void generate(size_t nc, size_t kc, void* params);')
	else:
	prologue.append(' void generate(size_t nc, size_t kc, size_t ks, void* params);')
	prologue.append('};')
	continue
	elif any(re.fullmatch(p, line) for p in IGNORE_LINES):
	continue
	elif in_autogen:
	continue
	else:
	prologue.append(fix_comments(line.rstrip()))
	continue

	# We are now in the microkernel function body.
	# Don't keep the ifdefs.
	m = re.fullmatch(IFDEF_RE, line)
	if m:
	continue
	# But keep other comments.
	m = re.fullmatch(COMMENT_RE, line)
	if m:
	instructions.append(m[1])
	continue

	m = re.fullmatch(LABEL, line)
	if m:
	labels.append(m[1])
	instructions.append(f'bind(l{m[1]}){sc}')
	continue
	m = re.fullmatch(INSTR_RE, line)
	if m:
	instructions.append(f'{fix_instr_name(m[1])}(){sc} {m[2]}')
	continue
	m = re.fullmatch(INSTR_OP_RE, line)
	if m:
	instructions.append(f'{fix_instr_name(m[1])}({m[2]}){sc} {m[3]}')
	continue
	m = re.fullmatch(INSTR_REGLIST_CONSEC_MEMOP_REG, line)
	if m:
	instructions.append(
	f'{fix_instr_name(m[1])}({{{m[2]}-{m[3]}}}, mem[{m[4]}], {m[5]}){sc} {m[6]}'
	)
	continue
	m = re.fullmatch(INSTR_REGLIST_INDIV_MEMOP_REG, line)
	if m:
	instructions.append(
	f'{fix_instr_name(m[1])}({{{fix_regs(m[2])}}}, mem[{m[3]}], {m[4]}){sc} {m[5]}')
	continue
	m = re.fullmatch(INSTR_REGLIST_CONSEC_RE, line)
	if m:
	instructions.append(f'{fix_instr_name(m[1])}({{{m[2]}-{m[3]}}}){sc} {m[4]}')
	continue
	m = re.fullmatch(INSTR_REGLIST_LIST_RE, line)
	if m:
	instructions.append(f'{fix_instr_name(m[1])}({{{m[2]}}}){sc} {m[3]}')
	continue
	m = re.fullmatch(INSTR_MEMOP_OFFSET_RE, line)
	if m:
	instructions.append(f'{fix_instr_name(m[1])}(mem[{m[2]}, {m[3]}]){sc} {m[4]}')
	continue
	m = re.fullmatch(INSTR_REG_MEMOP_RE, line)
	if m:
	instructions.append(f'{fix_instr_name(m[1])}({m[2]}, mem[{m[3]}]){sc} {m[4]}')
	continue
	m = re.fullmatch(INSTR_REG_MEMOP_IMM_RE , line)
	if m:
	instructions.append(f'{fix_instr_name(m[1])}({m[2]}, mem[{m[3]}], {m[4]}){sc} {m[5]}')
	continue
	m = re.fullmatch(INSTR_REG_MEMOP_OFFSET_RE, line)
	if m:
	instructions.append(
	f'{fix_instr_name(m[1])}({m[2]}, mem[{m[3]}, {m[4]}]){sc} {m[5]}')
	continue
	m = re.fullmatch(INSTR_REG_REG_MEMOP_RE, line)
	if m:
	instructions.append(
	f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, mem[{m[4]}]){sc} {m[5]}')
	continue
	m = re.fullmatch(INSTR_REG_REG_MEMOP_OFFSET_RE, line)
	if m:
	if m[6]: # wb
	instructions.append(
	f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, mem[{m[4]}, {m[5]}]++){sc} {m[7]}')
	else: #no wb
	instructions.append(
	f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, mem[{m[4]}, {m[5]}]){sc} {m[7]}')
	continue
	m = re.fullmatch(INSTR_REG_REG_MEMOP_IMM_RE , line)
	if m:
	instructions.append(
	f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, mem[{m[4]}], {m[5]}){sc} {m[6]}')
	continue
	m = re.fullmatch(INSTR_REG_IMM_RE, line)
	if m:
	instructions.append(f'{fix_instr_name(m[1])}({fix_regs(m[2])}, {m[3]}){sc} {m[4]}')
	continue
	m = re.fullmatch(INSTR_REG_REG_REG_RE, line)
	if m:
	instructions.append(
	f'{fix_instr_name(m[1])}({fix_regs(m[2])}, {fix_regs(m[3])}, {fix_regs(m[4])}){sc} {m[5]}')
	continue
	m = re.fullmatch(INSTR_REG_REG_REG_IMM_RE, line)
	if m:
	instructions.append(
	f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, {m[4]}, {m[5]}){sc} {m[6]}')
	continue
	m = re.fullmatch(INSTR_REG_REG_RE, line)
	if m:
	instructions.append(f'{fix_instr_name(m[1])}({fix_regs(m[2])}, {fix_regs(m[3])}){sc} {m[4]}')
	continue
	m = re.fullmatch(INSTR_REG_REGLIST_CONSECT, line)
	if m:
	instructions.append(
	f'{fix_instr_name(m[1])}(mem[{m[2]}], {{{m[3]}-{m[4]}}}){sc} {m[5]}')
	continue
	m = re.fullmatch(INSTR_REG_REGLIST_CONSECT_WB, line)
	if m:
	instructions.append(
	f'{fix_instr_name(m[1])}(mem[{m[2]}]++, {{{m[3]}-{m[4]}}}){sc} {m[5]}')
	continue
	m = re.fullmatch(INSTR_REG_REGLIST_INDIV_WB, line)
	if m:
	instructions.append(
	f'{fix_instr_name(m[1])}(mem[{m[2]}]++, {{{m[3]}}}){sc} {m[4]}')
	continue
	m = re.fullmatch(INSTR_B_IMM, line)
	if m:
	instructions.append(f'{fix_instr_name(m[1])}(l{m[2]}){sc} {m[4]}')
	continue
	m = re.fullmatch(INSTR_B_REG_IMM_IMM , line)
	if m:
	instructions.append(f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, l{m[4]}){sc} {m[6]}')
	continue
	m = re.fullmatch(INSTR_REGLIST_INDIV_MEMOP, line)
	if m:
	instructions.append(
	f'{fix_instr_name(m[1])}({{{fix_regs(m[2])}}}, mem[{m[3]}]{maybe_wb(m[4])}){sc} {m[5]}'
	)
	continue
	m = re.fullmatch(INSTR_REGLIST_INDIV_MEMOP_IMM, line)
	if m:
	instructions.append(
	f'{fix_instr_name(m[1])}({{{fix_regs(m[2])}}}, mem[{m[3]}], {m[4]}){sc} {m[5]}'
	)
	continue
	m = re.fullmatch(INSTR_REGLIST_CONSEC_MEMOP, line)
	if m:
	instructions.append(
	f'{fix_instr_name(m[1])}({{{m[2]}-{m[3]}}}, mem[{m[4]}]{maybe_wb(m[5])}){sc} {m[6]}'
	)
	continue
	m = re.fullmatch(INSTR_REGLIST_REPLICATE_MEMOP, line)
	if m:
	if m[5]:
	instructions.append(
	f'{fix_replicate_instruction(fix_instr_name(m[1]))}({{{remove_brackets(m[2])}}}, mem[{m[4]}]++){sc} {m[6]}'
	)
	else:
	instructions.append(
	f'{fix_replicate_instruction(fix_instr_name(m[1]))}({{{remove_brackets(m[2])}}}, mem[{m[4]}]){sc} {m[6]}'
	)
	continue
	m = re.fullmatch(INSTR_REGLIST_INDEX_MEMOP, line)
	if m:
	instructions.append(
	f'{fix_instr_name(m[1])}({{{m[2]}}}, mem[{m[3]}]{maybe_wb(m[4])}){sc} {m[5]}'
	)
	continue
	m = re.fullmatch(P2ALIGN_RE, line)
	if m:
	instructions.append(f'align({1 << int(m[1])}){sc}')
	continue
	m = re.fullmatch(INSTR_REG_FPSCR, line)
	if m:
	instructions.append(f'{fix_instr_name(m[1])}({m[2]}, {m[3]}){sc} {m[4]}')
	continue
	m = re.fullmatch(INSTR_PLD_MEMOP, line)
	if m:
	instructions.append(f'{fix_instr_name(m[1])}(k{m[2]}, mem[{m[3]}]){sc} {m[4]}')
	continue
	m = re.fullmatch(INSTR_PLD_MEMOP_OFFSET, line)
	if m:
	instructions.append(f'{fix_instr_name(m[1])}(k{m[2]}, mem[{m[3]}, {m[4]}]){sc} {m[5]}')
	continue
	m = re.fullmatch(INSTR_REG_REG_REG_COND_RE, line)
	if m:
	instructions.append(f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, {m[4]}, k{m[5]}){sc} {m[6]}')
	continue

	# Keep empty lines for formatting
	if line.strip() == '':
	instructions.append('')
	continue

	# Assembly directives that we don't are about.
	if line.strip().startswith('.'):
	continue

	if line.startswith('END_FUNCTION'):
	continue

	# All other lines are error.
	print(f'ERROR: {line}', file=sys.stderr)
	sys.exit(1)

	# Actually emit the JIT codegen (to stdout).
	for p in prologue:
	print(p)

	labels_str = ', '.join(f'l{l}' for l in labels)
	print(f' Label {labels_str};')
	print()

	indent = ' '
	for i in instructions:
	if i.strip().startswith('#'):
	print(indent + fix_comments(i))
	elif i.strip().startswith('//'):
	print(indent + i)
	elif i.strip() == '':
	print()
	else:
	print(indent + (i).rstrip())

	print('}')
	print('} // namespace')
	print(f'}} // {arch}')
	print('} // xnnpack')
	print('')
	if kernel_type == GEMM:
	print(f'xnn_status {fix_fn_name(fn_name)}(xnn_code_buffer* code, size_t nc, size_t kc, const void* params) {{')
	else:
	print(f'xnn_status {fix_fn_name(fn_name)}(xnn_code_buffer* code, size_t nc, size_t kc, size_t ks, const void* params) {{')
	print(f' using namespace xnnpack::{arch};')
	print(' Generator g(code);')
	if kernel_type == GEMM:
	print(' g.generate(nc, kc, nullptr);')
	else:
	print(' g.generate(nc, kc, ks, nullptr);')
	print(' g.finalize();')
	print(' if (g.error() != xnnpack::Error::kNoError) {')
	print(' return xnn_status_invalid_state;')
	print(' }')
	print(' return xnn_status_success;')
	print('}')


	if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='Convert assembly to to JIT C++, writes to stdout.')
	parser.add_argument('input_file', help='Input assembly filename')
	args = parser.parse_args()
	main(args.input_file)