blob: 8e9ee7b5b3a0803da5cdd451578111c9e98dc57d [file] [log] [blame]
#!/usr/bin/env python3
# Copyright 2021 Google LLC
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""Converts hand written assembly (.S files) to C++ files using the JIT.
Takes a single argument, an assembly file, and prints converted output to stdout.
"""
import argparse
import datetime
import re
import sys
SPACES = r'\s*'
COMMA = r',' + SPACES
COMMENTS = SPACES + '((//\s+.+)|)$'
WB = r'!'
REG_NO_GROUP = r'r\d+|s\d+|d\d+|q\d+|sp|lr|pc|x\d+|(?:v\d+\.(?:\d+)?(?:d|s|h|b))'
REG = r'(' + REG_NO_GROUP + ')'
IMM_NO_GROUP = r'\d+'
IMM = r'(' + IMM_NO_GROUP + ')'
REG_LANE_NO_GROUP = r'(?:' + REG_NO_GROUP + r')\[' + IMM_NO_GROUP + r'\]'
REG_OR_IMM = r'(' + REG_LANE_NO_GROUP + '|' + REG_NO_GROUP + '|' + IMM_NO_GROUP + ')'
REGLIST_CONSEC = r'\{(\w+)-(\w+)\}' + SPACES
REGLIST_INDIV = r'\{([\w.]+(?:,\s+[\w.]+)*)\}' + SPACES
REGLIST_INDIV_REPLICATE = r'\{(\w+(?:\[\])(,\s*\w+(?:\[\]))*)\}' + SPACES
REGLIST_INDEX = r'\{(' + REG_LANE_NO_GROUP + ')\}' + SPACES
APSR = 'APSR_nzcv'
FPSCR = '(FPSCR)'
MEMOP = r'\[' + SPACES + REG + '\]' + SPACES
MEMOP_MAYBE_WB = r'\[' + SPACES + REG + '\]' + f'({WB})?'
MEMOP_OFFSET = r'\[' + REG + COMMA + '(-?\d+)\]' + SPACES
MEMOP_OFFSET_MAYBE_WB = r'\[' + REG + COMMA + '(-?\d+)\]' + f'({WB})?' + SPACES
B_IMM = r'(\d+)(f|b)'
INSTR = SPACES + r'([A-Z0-9.]+)' + SPACES
# e.g. #ifndef __APPLE__
IFDEF_RE = re.compile(r'\s*#(ifndef|endif|ifdef).*')
# e.g. # Push 96 bytes
COMMENT_RE = re.compile(SPACES + r'((//|#)\s*.+)')
# e.g. 0:
LABEL = re.compile(r'(\w+):')
# e.g. NOP
INSTR_RE = re.compile(INSTR + COMMENTS)
# e.g. VPUSH {d8-d15}
INSTR_REGLIST_CONSEC_RE = re.compile(INSTR + REGLIST_CONSEC + COMMENTS)
# e.g. PUSH {r4, r5}
INSTR_REGLIST_LIST_RE = re.compile(INSTR + REGLIST_INDIV + COMMENTS)
# e.g. BX lr
INSTR_OP_RE = re.compile(INSTR + REG + COMMENTS)
# e.g. BLO 2f
INSTR_B_IMM = re.compile(INSTR + B_IMM + COMMENTS)
# e.g. TBNZ x0, 4, 5f
INSTR_B_REG_IMM_IMM = re.compile(INSTR + REG + COMMA + IMM + COMMA + B_IMM + COMMENTS)
# e.g. .p2align 3
P2ALIGN_RE = re.compile(SPACES + r'\.p2align\s+(\d+)')
# e.g. CMP r0, 2
INSTR_REG_IMM_RE = re.compile(INSTR + REG + COMMA + IMM + COMMENTS)
# e.g. LDR r0, [r12]
INSTR_REG_MEMOP_RE = re.compile(INSTR + REG + COMMA + MEMOP + COMMENTS)
# e.g. LDR q0, [x4], 16
INSTR_REG_MEMOP_IMM_RE = re.compile(INSTR + REG + COMMA + MEMOP + COMMA + IMM + COMMENTS)
# e.g. LDR r0, [sp, 112]
INSTR_REG_MEMOP_OFFSET_RE = re.compile(INSTR + REG + COMMA + MEMOP_OFFSET +
COMMENTS)
# e.g. LDRD r6, r7, [sp]
INSTR_REG_REG_MEMOP_RE = re.compile(INSTR + REG + COMMA + REG + COMMA +
MEMOP + COMMENTS)
# e.g. LDRD r6, r7, [sp, 104], STP d8, d9, [sp, -64]!
INSTR_REG_REG_MEMOP_OFFSET_RE = re.compile(INSTR + REG + COMMA + REG + COMMA +
MEMOP_OFFSET_MAYBE_WB + COMMENTS)
# e.g. LDP q20, q21, [x5], 32
INSTR_REG_REG_MEMOP_IMM_RE = re.compile(INSTR + REG + COMMA + REG + COMMA +
MEMOP + COMMA + IMM + COMMENTS)
# e.g. PLD [r4, 64]
INSTR_MEMOP_OFFSET_RE = re.compile(INSTR + MEMOP_OFFSET + COMMENTS)
# e.g. movlo r12, r3, vdup.32 q0, d14[0]
INSTR_REG_REG_RE = re.compile(INSTR + REG + COMMA + REG_OR_IMM + COMMENTS)
# e.g. SUBS r5, r2, 16 or SUBS r5, r2, r10 or VMLFA.F32 q8, q4, d0[0]
INSTR_REG_REG_REG_RE = re.compile(INSTR + REG + COMMA + REG + COMMA +
REG_OR_IMM + COMMENTS)
# e.g. VEXT.8 q0, q0, q0, 4
INSTR_REG_REG_REG_IMM_RE = re.compile(INSTR + REG + COMMA + REG + COMMA + REG +
COMMA + IMM + COMMENTS)
# e.g. VST1.32 {d16}, [r11], r0
INSTR_REGLIST_INDIV_MEMOP_REG = re.compile(INSTR + REGLIST_INDIV + COMMA +
MEMOP + COMMA + REG + COMMENTS)
# e.g. VST1.32 {d16-d19}, [r11], r0
INSTR_REGLIST_CONSEC_MEMOP_REG = re.compile(INSTR + REGLIST_CONSEC + COMMA +
MEMOP + COMMA + REG + COMMENTS)
# e.g. VLDM r9, {d16-d19}
INSTR_REG_REGLIST_CONSECT = re.compile(INSTR + REG + COMMA + REGLIST_CONSEC +
COMMENTS)
# e.g. VLDM r9!, {d16-d19}
INSTR_REG_REGLIST_CONSECT_WB = re.compile(INSTR + REG + WB + COMMA +
REGLIST_CONSEC + COMMENTS)
# e.g. VLDM r9!, {d16}
INSTR_REG_REGLIST_INDIV_WB = re.compile(INSTR + REG + WB + COMMA +
REGLIST_INDIV + COMMENTS)
# e.g. VLD1.32 {d0}, [r3]{!}
INSTR_REGLIST_INDIV_MEMOP = re.compile(INSTR + REGLIST_INDIV + COMMA +
MEMOP_MAYBE_WB + COMMENTS)
# e.g. LD1 {v16.16b, v17.16b, v18.16b}, [x5], 48
INSTR_REGLIST_INDIV_MEMOP_IMM = re.compile(INSTR + REGLIST_INDIV + COMMA +
MEMOP + COMMA + IMM + COMMENTS)
# e.g. VST1.32 {d24-d25}, [r11]{!}
INSTR_REGLIST_CONSEC_MEMOP = re.compile(INSTR + REGLIST_CONSEC + COMMA +
MEMOP_MAYBE_WB + COMMENTS)
# e.g. VLD1.32 {d0[]}, [r3]!
INSTR_REGLIST_REPLICATE_MEMOP = re.compile(INSTR + REGLIST_INDIV_REPLICATE +
COMMA + MEMOP + r'(!)?' + COMMENTS)
# e.g. VST1.32 {d16[0]}, [r11]{!}
INSTR_REGLIST_INDEX_MEMOP = re.compile(INSTR + REGLIST_INDEX + COMMA +
MEMOP_MAYBE_WB + COMMENTS)
# e.g. VMRS APSR_nzcv, FPSCR
INSTR_REG_FPSCR = re.compile(INSTR + f'({APSR}|{REG_NO_GROUP})' + COMMA +
FPSCR + COMMENTS)
# e.g. PRFM PLDL1KEEP, [x5]
INSTR_PLD_MEMOP = re.compile(INSTR + f'(PLDL1KEEP)' + COMMA + MEMOP + COMMENTS)
# e.g. PRFM PLDL1KEEP, [x5, 64]
INSTR_PLD_MEMOP_OFFSET = re.compile(INSTR + f'(PLDL1KEEP)' + COMMA + MEMOP_OFFSET + COMMENTS)
COND = r'([A-Z]+)'
# e.g. CSEL x9, x3, x9, LO
INSTR_REG_REG_REG_COND_RE = re.compile(INSTR + REG + COMMA + REG + COMMA + REG + COMMA + COND + COMMENTS)
def remove_brackets(s):
return s.replace('[', '').replace(']', '')
def fix_replicate_instruction(s):
return re.sub(r'_(\d+)', r'r_\1', s, 1)
def fix_instr_name(s):
return s.lower().replace('.', '_', 2).replace('and', 'and_', 1)
def fix_comments(s):
return s.replace('#', '//', 1)
def maybe_wb(wb):
return '++' if wb else ''
def fix_fn_name(name):
if name.startswith('xnn_'):
name = name[len('xnn_'):]
# remove any type of activations from name
if 'minmax' in name:
name = name.replace('minmax_', '')
return f'xnn_generate_{name}'
def fix_regs(regs):
# Vector registers with datatype need to be method calls.
# e.g. v2.4s -> v2.v4s(), v2.s -> v2.s()
def repl(m):
if m.group(2):
return f'{m[1]}v{m[2]}{m[3]}()'
else:
return f'{m[1]}{m[3]}()'
return re.sub(r'(\w+\.)(\d+)?(\w+)', repl, regs)
IGNORE_LINES = [r'\s*\.\w+']
AARCH32 = 'aarch32'
AARCH64 = 'aarch64'
GEMM = 'GEMM'
IGEMM = 'IGEMM'
def main(input_file):
arch = None
kernel_type = GEMM
if 'aarch32' in input_file:
arch = AARCH32
elif 'aarch64' in input_file:
arch = AARCH64
else:
print('ERROR: unknown architecture')
sys.exit(1)
if 'igemm' in input_file:
kernel_type = IGEMM
# Whether we are in the copyright section.
in_copyright = False
# Whether we are in the microkernel function.
in_function = False
# Instructions that make up the microkernel.
instructions = []
# Lines of code or comments before the actual function body.
prologue = []
# All labels need to be declared first, collect them and output them after
# function signature.
labels = []
# Name of the microkernel function.
fn_name = ''
sc = ';'
# Whether we are in the auto-generated comment.
in_autogen = False
with open(input_file, 'r', encoding='utf-8') as f:
for line in f:
line = line.rstrip()
# Handle all lines before the microkernel instructions begin.
if not in_function:
if 'Auto-generated file' in line:
in_autogen = True
continue
elif 'BEGIN_FUNCTION' in line:
in_function = True
fn_name = line.split()[1]
prologue.append(f'// Converted from: {input_file[20:]}')
if kernel_type == GEMM:
prologue.append('void Generator::generate(size_t nc, size_t kc, void* params) {')
else:
prologue.append('void Generator::generate(size_t nc, size_t kc, size_t ks, void* params) {')
continue
elif 'Copyright ' in line:
in_autogen = False
# replace year
prologue.append(
re.sub('\d{4}', str(datetime.date.today().year), line,
1).rstrip())
continue
elif '#include <xnnpack/assembly.h>' in line:
prologue.append(f'#include <cstddef>')
prologue.append('')
prologue.append(f'#include <xnnpack/{arch}-assembler.h>')
prologue.append('#include <xnnpack/allocator.h>')
if kernel_type == GEMM:
prologue.append('#include <xnnpack/gemm.h>')
else:
prologue.append('#include <xnnpack/igemm.h>')
prologue.append('')
prologue.append('namespace xnnpack {')
prologue.append(f'namespace {arch} {{')
prologue.append('namespace {')
prologue.append('class Generator : public Assembler {')
prologue.append(' using Assembler::Assembler;')
prologue.append(' public:')
if kernel_type == GEMM:
prologue.append(' void generate(size_t nc, size_t kc, void* params);')
else:
prologue.append(' void generate(size_t nc, size_t kc, size_t ks, void* params);')
prologue.append('};')
continue
elif any(re.fullmatch(p, line) for p in IGNORE_LINES):
continue
elif in_autogen:
continue
else:
prologue.append(fix_comments(line.rstrip()))
continue
# We are now in the microkernel function body.
# Don't keep the ifdefs.
m = re.fullmatch(IFDEF_RE, line)
if m:
continue
# But keep other comments.
m = re.fullmatch(COMMENT_RE, line)
if m:
instructions.append(m[1])
continue
m = re.fullmatch(LABEL, line)
if m:
labels.append(m[1])
instructions.append(f'bind(l{m[1]}){sc}')
continue
m = re.fullmatch(INSTR_RE, line)
if m:
instructions.append(f'{fix_instr_name(m[1])}(){sc} {m[2]}')
continue
m = re.fullmatch(INSTR_OP_RE, line)
if m:
instructions.append(f'{fix_instr_name(m[1])}({m[2]}){sc} {m[3]}')
continue
m = re.fullmatch(INSTR_REGLIST_CONSEC_MEMOP_REG, line)
if m:
instructions.append(
f'{fix_instr_name(m[1])}({{{m[2]}-{m[3]}}}, mem[{m[4]}], {m[5]}){sc} {m[6]}'
)
continue
m = re.fullmatch(INSTR_REGLIST_INDIV_MEMOP_REG, line)
if m:
instructions.append(
f'{fix_instr_name(m[1])}({{{fix_regs(m[2])}}}, mem[{m[3]}], {m[4]}){sc} {m[5]}')
continue
m = re.fullmatch(INSTR_REGLIST_CONSEC_RE, line)
if m:
instructions.append(f'{fix_instr_name(m[1])}({{{m[2]}-{m[3]}}}){sc} {m[4]}')
continue
m = re.fullmatch(INSTR_REGLIST_LIST_RE, line)
if m:
instructions.append(f'{fix_instr_name(m[1])}({{{m[2]}}}){sc} {m[3]}')
continue
m = re.fullmatch(INSTR_MEMOP_OFFSET_RE, line)
if m:
instructions.append(f'{fix_instr_name(m[1])}(mem[{m[2]}, {m[3]}]){sc} {m[4]}')
continue
m = re.fullmatch(INSTR_REG_MEMOP_RE, line)
if m:
instructions.append(f'{fix_instr_name(m[1])}({m[2]}, mem[{m[3]}]){sc} {m[4]}')
continue
m = re.fullmatch(INSTR_REG_MEMOP_IMM_RE , line)
if m:
instructions.append(f'{fix_instr_name(m[1])}({m[2]}, mem[{m[3]}], {m[4]}){sc} {m[5]}')
continue
m = re.fullmatch(INSTR_REG_MEMOP_OFFSET_RE, line)
if m:
instructions.append(
f'{fix_instr_name(m[1])}({m[2]}, mem[{m[3]}, {m[4]}]){sc} {m[5]}')
continue
m = re.fullmatch(INSTR_REG_REG_MEMOP_RE, line)
if m:
instructions.append(
f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, mem[{m[4]}]){sc} {m[5]}')
continue
m = re.fullmatch(INSTR_REG_REG_MEMOP_OFFSET_RE, line)
if m:
if m[6]: # wb
instructions.append(
f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, mem[{m[4]}, {m[5]}]++){sc} {m[7]}')
else: #no wb
instructions.append(
f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, mem[{m[4]}, {m[5]}]){sc} {m[7]}')
continue
m = re.fullmatch(INSTR_REG_REG_MEMOP_IMM_RE , line)
if m:
instructions.append(
f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, mem[{m[4]}], {m[5]}){sc} {m[6]}')
continue
m = re.fullmatch(INSTR_REG_IMM_RE, line)
if m:
instructions.append(f'{fix_instr_name(m[1])}({fix_regs(m[2])}, {m[3]}){sc} {m[4]}')
continue
m = re.fullmatch(INSTR_REG_REG_REG_RE, line)
if m:
instructions.append(
f'{fix_instr_name(m[1])}({fix_regs(m[2])}, {fix_regs(m[3])}, {fix_regs(m[4])}){sc} {m[5]}')
continue
m = re.fullmatch(INSTR_REG_REG_REG_IMM_RE, line)
if m:
instructions.append(
f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, {m[4]}, {m[5]}){sc} {m[6]}')
continue
m = re.fullmatch(INSTR_REG_REG_RE, line)
if m:
instructions.append(f'{fix_instr_name(m[1])}({fix_regs(m[2])}, {fix_regs(m[3])}){sc} {m[4]}')
continue
m = re.fullmatch(INSTR_REG_REGLIST_CONSECT, line)
if m:
instructions.append(
f'{fix_instr_name(m[1])}(mem[{m[2]}], {{{m[3]}-{m[4]}}}){sc} {m[5]}')
continue
m = re.fullmatch(INSTR_REG_REGLIST_CONSECT_WB, line)
if m:
instructions.append(
f'{fix_instr_name(m[1])}(mem[{m[2]}]++, {{{m[3]}-{m[4]}}}){sc} {m[5]}')
continue
m = re.fullmatch(INSTR_REG_REGLIST_INDIV_WB, line)
if m:
instructions.append(
f'{fix_instr_name(m[1])}(mem[{m[2]}]++, {{{m[3]}}}){sc} {m[4]}')
continue
m = re.fullmatch(INSTR_B_IMM, line)
if m:
instructions.append(f'{fix_instr_name(m[1])}(l{m[2]}){sc} {m[4]}')
continue
m = re.fullmatch(INSTR_B_REG_IMM_IMM , line)
if m:
instructions.append(f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, l{m[4]}){sc} {m[6]}')
continue
m = re.fullmatch(INSTR_REGLIST_INDIV_MEMOP, line)
if m:
instructions.append(
f'{fix_instr_name(m[1])}({{{fix_regs(m[2])}}}, mem[{m[3]}]{maybe_wb(m[4])}){sc} {m[5]}'
)
continue
m = re.fullmatch(INSTR_REGLIST_INDIV_MEMOP_IMM, line)
if m:
instructions.append(
f'{fix_instr_name(m[1])}({{{fix_regs(m[2])}}}, mem[{m[3]}], {m[4]}){sc} {m[5]}'
)
continue
m = re.fullmatch(INSTR_REGLIST_CONSEC_MEMOP, line)
if m:
instructions.append(
f'{fix_instr_name(m[1])}({{{m[2]}-{m[3]}}}, mem[{m[4]}]{maybe_wb(m[5])}){sc} {m[6]}'
)
continue
m = re.fullmatch(INSTR_REGLIST_REPLICATE_MEMOP, line)
if m:
if m[5]:
instructions.append(
f'{fix_replicate_instruction(fix_instr_name(m[1]))}({{{remove_brackets(m[2])}}}, mem[{m[4]}]++){sc} {m[6]}'
)
else:
instructions.append(
f'{fix_replicate_instruction(fix_instr_name(m[1]))}({{{remove_brackets(m[2])}}}, mem[{m[4]}]){sc} {m[6]}'
)
continue
m = re.fullmatch(INSTR_REGLIST_INDEX_MEMOP, line)
if m:
instructions.append(
f'{fix_instr_name(m[1])}({{{m[2]}}}, mem[{m[3]}]{maybe_wb(m[4])}){sc} {m[5]}'
)
continue
m = re.fullmatch(P2ALIGN_RE, line)
if m:
instructions.append(f'align({1 << int(m[1])}){sc}')
continue
m = re.fullmatch(INSTR_REG_FPSCR, line)
if m:
instructions.append(f'{fix_instr_name(m[1])}({m[2]}, {m[3]}){sc} {m[4]}')
continue
m = re.fullmatch(INSTR_PLD_MEMOP, line)
if m:
instructions.append(f'{fix_instr_name(m[1])}(k{m[2]}, mem[{m[3]}]){sc} {m[4]}')
continue
m = re.fullmatch(INSTR_PLD_MEMOP_OFFSET, line)
if m:
instructions.append(f'{fix_instr_name(m[1])}(k{m[2]}, mem[{m[3]}, {m[4]}]){sc} {m[5]}')
continue
m = re.fullmatch(INSTR_REG_REG_REG_COND_RE, line)
if m:
instructions.append(f'{fix_instr_name(m[1])}({m[2]}, {m[3]}, {m[4]}, k{m[5]}){sc} {m[6]}')
continue
# Keep empty lines for formatting
if line.strip() == '':
instructions.append('')
continue
# Assembly directives that we don't are about.
if line.strip().startswith('.'):
continue
if line.startswith('END_FUNCTION'):
continue
# All other lines are error.
print(f'ERROR: {line}', file=sys.stderr)
sys.exit(1)
# Actually emit the JIT codegen (to stdout).
for p in prologue:
print(p)
labels_str = ', '.join(f'l{l}' for l in labels)
print(f' Label {labels_str};')
print()
indent = ' '
for i in instructions:
if i.strip().startswith('#'):
print(indent + fix_comments(i))
elif i.strip().startswith('//'):
print(indent + i)
elif i.strip() == '':
print()
else:
print(indent + (i).rstrip())
print('}')
print('} // namespace')
print(f'}} // {arch}')
print('} // xnnpack')
print('')
if kernel_type == GEMM:
print(f'xnn_status {fix_fn_name(fn_name)}(xnn_code_buffer* code, size_t nc, size_t kc, const void* params) {{')
else:
print(f'xnn_status {fix_fn_name(fn_name)}(xnn_code_buffer* code, size_t nc, size_t kc, size_t ks, const void* params) {{')
print(f' using namespace xnnpack::{arch};')
print(' Generator g(code);')
if kernel_type == GEMM:
print(' g.generate(nc, kc, nullptr);')
else:
print(' g.generate(nc, kc, ks, nullptr);')
print(' g.finalize();')
print(' if (g.error() != xnnpack::Error::kNoError) {')
print(' return xnn_status_invalid_state;')
print(' }')
print(' return xnn_status_success;')
print('}')
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Convert assembly to to JIT C++, writes to stdout.')
parser.add_argument('input_file', help='Input assembly filename')
args = parser.parse_args()
main(args.input_file)