tools/generate-dwconv-test.py - platform/external/XNNPACK - Gitiles

 #!/usr/bin/env python
 # Copyright 2019 Google LLC
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.

 import argparse
 import bisect
 import codecs
 import math
 import os
 import sys
 import yaml

 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 from primes import next_prime
 import xngen


 parser = argparse.ArgumentParser(description='XNNPACK generator')
 parser.add_argument("-s", "--spec", metavar="FILE", required=True,
                     help="Spec (YAML) file")
 parser.add_argument("-o", "--output", metavar="FILE", required=True,
                     help='Output (C++ source) file')
 parser.set_defaults(defines=list())


 def indent(text):
   return "\n".join(map(lambda t: "  " + t if t else t, text.splitlines()))


 def remove_duplicate_newlines(text):
   filtered_lines = list()
   last_newline = False
   for line in text.splitlines():
     is_newline = len(line.strip()) == 0
     if not is_newline or not last_newline:
       filtered_lines.append(line)
     last_newline = is_newline
   return "\n".join(filtered_lines)


 ARCH_TO_MACRO_MAP = {
   "aarch32": "XNN_ARCH_ARM",
   "aarch64": "XNN_ARCH_ARM64",
   "x86": "XNN_ARCH_X86",
   "x86-64": "XNN_ARCH_X86_64",
 }

 ISA_TO_ARCH_MAP = {
   "neon": ["aarch32", "aarch64"],
   "neonfma": ["aarch32", "aarch64"],
   "neonfp16arith": ["aarch32", "aarch64"],
   "sse": ["x86", "x86-64"],
   "sse2": ["x86", "x86-64"],
   "avx": ["x86", "x86-64"],
   "avx512f": ["x86", "x86-64"],
   "psimd": [],
 }

 ISA_TO_CHECK_MAP = {
   "neon": "TEST_REQUIRES_ARM_NEON",
   "neonfma": "TEST_REQUIRES_ARM_NEON_FMA",
   "neonfp16arith": "TEST_REQUIRES_ARM_NEON_FP16_ARITH",
   "sse": "TEST_REQUIRES_X86_SSE",
   "sse2": "TEST_REQUIRES_X86_SSE2",
   "avx": "TEST_REQUIRES_X86_AVX",
   "avx512f": "TEST_REQUIRES_X86_AVX512F",
   "psimd": "TEST_REQUIRES_PSIMD",
 }


 def split_ukernel_name(name):
   common_name, target_name = name.split("__", 1)
   common_parts = common_name.split("_")
   param_spec = common_parts[-1]
   assert param_spec.startswith("up")
   cr, kr = map(int, param_spec[2:].split("x"))
   arch = list()
   isa = None
   for target_part in target_name.split("_"):
     if target_part in ARCH_TO_MACRO_MAP:
       arch = [target_part]
     elif target_part in ISA_TO_ARCH_MAP:
       isa = target_part
   if isa and not arch:
     arch = ISA_TO_ARCH_MAP[isa]
   return cr, kr, arch, isa


 DWCONV_TEST_CODE = """\
 TEST(${TEST_NAME}, c_eq_${CBLOCK}) {
   $if ISA_CHECK:
     ${ISA_CHECK};
   DWConvMicrokernelTester()
     .cr(${CR})
     .kr(${KR})
     .channels(${CBLOCK})
     .Test(${", ".join(TEST_ARGS)});
 }

 $if IS_PIPELINED:
   TEST(${TEST_NAME}, c_eq_${CBLOCK * 2}) {
     $if ISA_CHECK:
       ${ISA_CHECK};
     DWConvMicrokernelTester()
       .cr(${CR})
       .kr(${KR})
       .channels(${CBLOCK * 2})
       .Test(${", ".join(TEST_ARGS)});
   }

 $if CBLOCK > 1:
   TEST(${TEST_NAME}, c_div_${CBLOCK}) {
     $if ISA_CHECK:
       ${ISA_CHECK};
     for (uint32_t channels = ${ADJCBLOCK + CBLOCK}; channels < ${CR * 16}; channels += ${CR * 3}) {
       DWConvMicrokernelTester()
         .cr(${CR})
         .kr(${KR})
         .channels(channels)
         .Test(${", ".join(TEST_ARGS)});
     }
   }

   TEST(${TEST_NAME}, c_div_${CBLOCK}_with_qmin) {
     $if ISA_CHECK:
       ${ISA_CHECK};
     for (uint32_t channels = ${ADJCBLOCK + CBLOCK}; channels < ${CR * 16}; channels += ${CR * 3}) {
       DWConvMicrokernelTester()
         .cr(${CR})
         .kr(${KR})
         .channels(channels)
         .qmin(128)
         .Test(${", ".join(TEST_ARGS)});
     }
   }

   TEST(${TEST_NAME}, c_div_${CBLOCK}_with_qmax) {
     $if ISA_CHECK:
       ${ISA_CHECK};
     for (uint32_t channels = ${ADJCBLOCK + CBLOCK}; channels < ${CR * 16}; channels += ${CR * 3}) {
       DWConvMicrokernelTester()
         .cr(${CR})
         .kr(${KR})
         .channels(channels)
         .qmax(128)
         .Test(${", ".join(TEST_ARGS)});
     }
   }

   TEST(${TEST_NAME}, c_lt_${ADJCBLOCK}) {
     $if ISA_CHECK:
       ${ISA_CHECK};
     for (uint32_t channels = 1; channels < ${ADJCBLOCK}; channels++) {
       DWConvMicrokernelTester()
         .cr(${CR})
         .kr(${KR})
         .channels(channels)
         .Test(${", ".join(TEST_ARGS)});
     }
   }

 TEST(${TEST_NAME}, c_gt_${ADJCBLOCK}) {
   $if ISA_CHECK:
     ${ISA_CHECK};
   for (uint32_t channels = ${ADJCBLOCK + 1}; channels < ${10 if CBLOCK == 1 else ADJCBLOCK + CBLOCK}; channels++) {
     DWConvMicrokernelTester()
       .cr(${CR})
       .kr(${KR})
       .channels(channels)
       .Test(${", ".join(TEST_ARGS)});
   }
 }

 TEST(${TEST_NAME}, c_gt_${ADJCBLOCK}_with_qmin) {
   $if ISA_CHECK:
     ${ISA_CHECK};
   for (uint32_t channels = ${ADJCBLOCK + 1}; channels < ${10 if CBLOCK == 1 else ADJCBLOCK + CBLOCK}; channels++) {
     DWConvMicrokernelTester()
       .cr(${CR})
       .kr(${KR})
       .channels(channels)
       .qmin(128)
       .Test(${", ".join(TEST_ARGS)});
   }
 }

 TEST(${TEST_NAME}, c_gt_${ADJCBLOCK}_with_qmax) {
   $if ISA_CHECK:
     ${ISA_CHECK};
   for (uint32_t channels = ${ADJCBLOCK + 1}; channels < ${10 if CBLOCK == 1 else ADJCBLOCK + CBLOCK}; channels++) {
     DWConvMicrokernelTester()
       .cr(${CR})
       .kr(${KR})
       .channels(channels)
       .qmax(128)
       .Test(${", ".join(TEST_ARGS)});
   }
 }

 TEST(${TEST_NAME}, multipixel) {
   $if ISA_CHECK:
     ${ISA_CHECK};
   for (size_t channels = 1; channels <= ${CBLOCK * 5}; channels += ${max(1, CBLOCK - 1)}) {
     DWConvMicrokernelTester()
       .cr(${CR})
       .kr(${KR})
       .channels(channels)
       .width(3)
       .Test(${", ".join(TEST_ARGS)});
   }
 }

 TEST(${TEST_NAME}, multipixel_with_step) {
   $if ISA_CHECK:
     ${ISA_CHECK};
   for (size_t channels = 1; channels <= ${CBLOCK * 5}; channels += ${max(1, CBLOCK - 1)}) {
     for (size_t step = 2; step <= ${KR}; step++) {
       DWConvMicrokernelTester()
         .cr(${CR})
         .kr(${KR})
         .channels(channels)
         .width(3)
         .step(step)
         .Test(${", ".join(TEST_ARGS)});
     }
   }
 }

 TEST(${TEST_NAME}, multipixel_with_output_stride) {
   $if ISA_CHECK:
     ${ISA_CHECK};
   for (size_t channels = 1; channels <= ${CBLOCK * 5}; channels += ${max(1, CBLOCK - 1)}) {
     DWConvMicrokernelTester()
       .cr(${CR})
       .kr(${KR})
       .channels(${CR})
       .width(5)
       .output_stride(${next_prime(CR * 5 + 1)})
       .Test(${", ".join(TEST_ARGS)});
   }
 }

 TEST(${TEST_NAME}, multipixel_with_qmin) {
   $if ISA_CHECK:
     ${ISA_CHECK};
   for (size_t channels = 1; channels <= ${CBLOCK * 5}; channels += ${max(1, CBLOCK - 1)}) {
     DWConvMicrokernelTester()
       .cr(${CR})
       .kr(${KR})
       .channels(channels)
       .width(3)
       .qmin(128)
       .Test(${", ".join(TEST_ARGS)});
   }
 }

 TEST(${TEST_NAME}, multipixel_with_qmax) {
   $if ISA_CHECK:
     ${ISA_CHECK};
   for (size_t channels = 1; channels <= ${CBLOCK * 5}; channels += ${max(1, CBLOCK - 1)}) {
     DWConvMicrokernelTester()
       .cr(${CR})
       .kr(${KR})
       .channels(channels)
       .width(3)
       .qmax(128)
       .Test(${", ".join(TEST_ARGS)});
   }
 }

 $if DATATYPE == "q8":
   TEST(${TEST_NAME}, input_zero_point_only) {
     $if ISA_CHECK:
       ${ISA_CHECK};
     for (size_t channels = 1; channels <= ${CBLOCK * 5}; channels += ${max(1, CBLOCK - 1)}) {
       DWConvMicrokernelTester()
         .cr(${CR})
         .kr(${KR})
         .channels(channels)
         .width(3)
         .input_zero_point(255)
         .kernel_zero_point(0)
         .Test(${", ".join(TEST_ARGS)});
     }
   }

   TEST(${TEST_NAME}, kernel_zero_point_only) {
     $if ISA_CHECK:
       ${ISA_CHECK};
     for (size_t channels = 1; channels <= ${CBLOCK * 5}; channels += ${max(1, CBLOCK - 1)}) {
       DWConvMicrokernelTester()
         .cr(${CR})
         .kr(${KR})
         .channels(channels)
         .width(3)
         .input_zero_point(0)
         .kernel_zero_point(255)
         .Test(${", ".join(TEST_ARGS)});
     }
   }
 """


 def generate_test_cases(ukernel, cr, kr, c_block, is_pipelined, isa):
   """Generates all tests cases for a DWCONV micro-kernel.

   Args:
     ukernel: C name of the micro-kernel function.
     cr: CR parameter of the DWCONV micro-kernel.
     kr: KR parameter of the DWCONV micro-kernel.
     k_block: Number of C values processed per one iteration of the main loop of
              the micro-kernel.
     is_pipelined: Indicates if the micro-kernel is implemented with software
                   pipelining. Additional test cases are generated for software
                   pipelined micro-kernels to separately test prologue + epiloque
                   of the pipelined loop and iteration of the pipelined loop.
     isa: instruction set required to run the micro-kernel. Generated unit test
          will skip execution if the host processor doesn't support this ISA.

   Returns:
     Code for the test case.
   """
   _, test_name = ukernel.split("_", 1)
   _, datatype, ukernel_type, _ = ukernel.split("_", 3)
   test_args = [ukernel]
   if not isa or isa == "psimd":
     test_args.append("DWConvMicrokernelTester::Variant::Scalar")
   return xngen.preprocess(DWCONV_TEST_CODE, {
       "TEST_NAME": test_name.upper().replace("UKERNEL_", ""),
       "TEST_ARGS": test_args,
       "UKERNEL_TYPE": ukernel_type.upper(),
       "DATATYPE": datatype,
       "CR": cr,
       "KR": kr,
       "CBLOCK": c_block,
       "ADJCBLOCK": 2 * c_block if is_pipelined else c_block,
       "IS_PIPELINED": is_pipelined,
       "ISA_CHECK": ISA_TO_CHECK_MAP.get(isa, ""),
       "next_prime": next_prime,
       "sqrt": math.sqrt,
     })


 def main(args):
   options = parser.parse_args(args)

   with codecs.open(options.spec, "r", encoding="utf-8") as spec_file:
     spec_yaml = yaml.safe_load(spec_file)
     if not isinstance(spec_yaml, list):
       raise ValueError("expected a list of micro-kernels in the spec")

     tests = """\
 // Copyright (c) Facebook, Inc. and its affiliates.
 // All rights reserved.
 //
 // Copyright 2019 Google LLC
 //
 // This source code is licensed under the BSD-style license found in the
 // LICENSE file in the root directory of this source tree.
 //
 // Auto-generated file. Do not edit!
 //   Specification: {specification}
 //   Generator: {generator}


 #include <gtest/gtest.h>

 #include <xnnpack/common.h>
 #include <xnnpack/isa-checks.h>

 #include <xnnpack/dwconv.h>
 #include "dwconv-microkernel-tester.h"
 """.format(specification=options.spec, generator=sys.argv[0])

     for ukernel_spec in spec_yaml:
       name = ukernel_spec["name"]
       pipelined = bool(ukernel_spec.get("pipelined", False))
       assembly = bool(ukernel_spec.get("assembly", False))
       cr, kr, arch, isa = split_ukernel_name(name)

       # specification can override architecture
       arch = ukernel_spec.get("arch", arch)

       test_case = generate_test_cases(name, cr, kr, cr, pipelined, isa)
       test_case = remove_duplicate_newlines(test_case)
       tests += "\n\n"
       if arch:
         guard_macro = " || ".join(map(ARCH_TO_MACRO_MAP.get, arch))
         if assembly:
           guard_macro += " && XNN_ENABLE_ASSEMBLY"
         tests += "#if %s\n" % guard_macro
         tests += indent(test_case) + "\n"
         tests += "#endif  // %s\n" % guard_macro
       elif isa == "psimd":
         guard_macro = "!XNN_ARCH_ASMJS && !XNN_ARCH_WASM"
         tests += "#if %s\n" % guard_macro
         tests += indent(test_case) + "\n"
         tests += "#endif  // %s\n" % guard_macro
       else:
         tests += test_case

     with codecs.open(options.output, "w", encoding="utf-8") as output_file:
       output_file.write(tests)


 if __name__ == "__main__":
   main(sys.argv[1:])
	#!/usr/bin/env python
	# Copyright 2019 Google LLC
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	import argparse
	import bisect
	import codecs
	import math
	import os
	import sys
	import yaml

	sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
	from primes import next_prime
	import xngen


	parser = argparse.ArgumentParser(description='XNNPACK generator')
	parser.add_argument("-s", "--spec", metavar="FILE", required=True,
	help="Spec (YAML) file")
	parser.add_argument("-o", "--output", metavar="FILE", required=True,
	help='Output (C++ source) file')
	parser.set_defaults(defines=list())


	def indent(text):
	return "\n".join(map(lambda t: " " + t if t else t, text.splitlines()))


	def remove_duplicate_newlines(text):
	filtered_lines = list()
	last_newline = False
	for line in text.splitlines():
	is_newline = len(line.strip()) == 0
	if not is_newline or not last_newline:
	filtered_lines.append(line)
	last_newline = is_newline
	return "\n".join(filtered_lines)


	ARCH_TO_MACRO_MAP = {
	"aarch32": "XNN_ARCH_ARM",
	"aarch64": "XNN_ARCH_ARM64",
	"x86": "XNN_ARCH_X86",
	"x86-64": "XNN_ARCH_X86_64",
	}

	ISA_TO_ARCH_MAP = {
	"neon": ["aarch32", "aarch64"],
	"neonfma": ["aarch32", "aarch64"],
	"neonfp16arith": ["aarch32", "aarch64"],
	"sse": ["x86", "x86-64"],
	"sse2": ["x86", "x86-64"],
	"avx": ["x86", "x86-64"],
	"avx512f": ["x86", "x86-64"],
	"psimd": [],
	}

	ISA_TO_CHECK_MAP = {
	"neon": "TEST_REQUIRES_ARM_NEON",
	"neonfma": "TEST_REQUIRES_ARM_NEON_FMA",
	"neonfp16arith": "TEST_REQUIRES_ARM_NEON_FP16_ARITH",
	"sse": "TEST_REQUIRES_X86_SSE",
	"sse2": "TEST_REQUIRES_X86_SSE2",
	"avx": "TEST_REQUIRES_X86_AVX",
	"avx512f": "TEST_REQUIRES_X86_AVX512F",
	"psimd": "TEST_REQUIRES_PSIMD",
	}


	def split_ukernel_name(name):
	common_name, target_name = name.split("__", 1)
	common_parts = common_name.split("_")
	param_spec = common_parts[-1]
	assert param_spec.startswith("up")
	cr, kr = map(int, param_spec[2:].split("x"))
	arch = list()
	isa = None
	for target_part in target_name.split("_"):
	if target_part in ARCH_TO_MACRO_MAP:
	arch = [target_part]
	elif target_part in ISA_TO_ARCH_MAP:
	isa = target_part
	if isa and not arch:
	arch = ISA_TO_ARCH_MAP[isa]
	return cr, kr, arch, isa


	DWCONV_TEST_CODE = """\
	TEST(${TEST_NAME}, c_eq_${CBLOCK}) {
	$if ISA_CHECK:
	${ISA_CHECK};
	DWConvMicrokernelTester()
	.cr(${CR})
	.kr(${KR})
	.channels(${CBLOCK})
	.Test(${", ".join(TEST_ARGS)});
	}

	$if IS_PIPELINED:
	TEST(${TEST_NAME}, c_eq_${CBLOCK * 2}) {
	$if ISA_CHECK:
	${ISA_CHECK};
	DWConvMicrokernelTester()
	.cr(${CR})
	.kr(${KR})
	.channels(${CBLOCK * 2})
	.Test(${", ".join(TEST_ARGS)});
	}

	$if CBLOCK > 1:
	TEST(${TEST_NAME}, c_div_${CBLOCK}) {
	$if ISA_CHECK:
	${ISA_CHECK};
	for (uint32_t channels = ${ADJCBLOCK + CBLOCK}; channels < ${CR * 16}; channels += ${CR * 3}) {
	DWConvMicrokernelTester()
	.cr(${CR})
	.kr(${KR})
	.channels(channels)
	.Test(${", ".join(TEST_ARGS)});
	}
	}

	TEST(${TEST_NAME}, c_div_${CBLOCK}_with_qmin) {
	$if ISA_CHECK:
	${ISA_CHECK};
	for (uint32_t channels = ${ADJCBLOCK + CBLOCK}; channels < ${CR * 16}; channels += ${CR * 3}) {
	DWConvMicrokernelTester()
	.cr(${CR})
	.kr(${KR})
	.channels(channels)
	.qmin(128)
	.Test(${", ".join(TEST_ARGS)});
	}
	}

	TEST(${TEST_NAME}, c_div_${CBLOCK}_with_qmax) {
	$if ISA_CHECK:
	${ISA_CHECK};
	for (uint32_t channels = ${ADJCBLOCK + CBLOCK}; channels < ${CR * 16}; channels += ${CR * 3}) {
	DWConvMicrokernelTester()
	.cr(${CR})
	.kr(${KR})
	.channels(channels)
	.qmax(128)
	.Test(${", ".join(TEST_ARGS)});
	}
	}

	TEST(${TEST_NAME}, c_lt_${ADJCBLOCK}) {
	$if ISA_CHECK:
	${ISA_CHECK};
	for (uint32_t channels = 1; channels < ${ADJCBLOCK}; channels++) {
	DWConvMicrokernelTester()
	.cr(${CR})
	.kr(${KR})
	.channels(channels)
	.Test(${", ".join(TEST_ARGS)});
	}
	}

	TEST(${TEST_NAME}, c_gt_${ADJCBLOCK}) {
	$if ISA_CHECK:
	${ISA_CHECK};
	for (uint32_t channels = ${ADJCBLOCK + 1}; channels < ${10 if CBLOCK == 1 else ADJCBLOCK + CBLOCK}; channels++) {
	DWConvMicrokernelTester()
	.cr(${CR})
	.kr(${KR})
	.channels(channels)
	.Test(${", ".join(TEST_ARGS)});
	}
	}

	TEST(${TEST_NAME}, c_gt_${ADJCBLOCK}_with_qmin) {
	$if ISA_CHECK:
	${ISA_CHECK};
	for (uint32_t channels = ${ADJCBLOCK + 1}; channels < ${10 if CBLOCK == 1 else ADJCBLOCK + CBLOCK}; channels++) {
	DWConvMicrokernelTester()
	.cr(${CR})
	.kr(${KR})
	.channels(channels)
	.qmin(128)
	.Test(${", ".join(TEST_ARGS)});
	}
	}

	TEST(${TEST_NAME}, c_gt_${ADJCBLOCK}_with_qmax) {
	$if ISA_CHECK:
	${ISA_CHECK};
	for (uint32_t channels = ${ADJCBLOCK + 1}; channels < ${10 if CBLOCK == 1 else ADJCBLOCK + CBLOCK}; channels++) {
	DWConvMicrokernelTester()
	.cr(${CR})
	.kr(${KR})
	.channels(channels)
	.qmax(128)
	.Test(${", ".join(TEST_ARGS)});
	}
	}

	TEST(${TEST_NAME}, multipixel) {
	$if ISA_CHECK:
	${ISA_CHECK};
	for (size_t channels = 1; channels <= ${CBLOCK * 5}; channels += ${max(1, CBLOCK - 1)}) {
	DWConvMicrokernelTester()
	.cr(${CR})
	.kr(${KR})
	.channels(channels)
	.width(3)
	.Test(${", ".join(TEST_ARGS)});
	}
	}

	TEST(${TEST_NAME}, multipixel_with_step) {
	$if ISA_CHECK:
	${ISA_CHECK};
	for (size_t channels = 1; channels <= ${CBLOCK * 5}; channels += ${max(1, CBLOCK - 1)}) {
	for (size_t step = 2; step <= ${KR}; step++) {
	DWConvMicrokernelTester()
	.cr(${CR})
	.kr(${KR})
	.channels(channels)
	.width(3)
	.step(step)
	.Test(${", ".join(TEST_ARGS)});
	}
	}
	}

	TEST(${TEST_NAME}, multipixel_with_output_stride) {
	$if ISA_CHECK:
	${ISA_CHECK};
	for (size_t channels = 1; channels <= ${CBLOCK * 5}; channels += ${max(1, CBLOCK - 1)}) {
	DWConvMicrokernelTester()
	.cr(${CR})
	.kr(${KR})
	.channels(${CR})
	.width(5)
	.output_stride(${next_prime(CR * 5 + 1)})
	.Test(${", ".join(TEST_ARGS)});
	}
	}

	TEST(${TEST_NAME}, multipixel_with_qmin) {
	$if ISA_CHECK:
	${ISA_CHECK};
	for (size_t channels = 1; channels <= ${CBLOCK * 5}; channels += ${max(1, CBLOCK - 1)}) {
	DWConvMicrokernelTester()
	.cr(${CR})
	.kr(${KR})
	.channels(channels)
	.width(3)
	.qmin(128)
	.Test(${", ".join(TEST_ARGS)});
	}
	}

	TEST(${TEST_NAME}, multipixel_with_qmax) {
	$if ISA_CHECK:
	${ISA_CHECK};
	for (size_t channels = 1; channels <= ${CBLOCK * 5}; channels += ${max(1, CBLOCK - 1)}) {
	DWConvMicrokernelTester()
	.cr(${CR})
	.kr(${KR})
	.channels(channels)
	.width(3)
	.qmax(128)
	.Test(${", ".join(TEST_ARGS)});
	}
	}

	$if DATATYPE == "q8":
	TEST(${TEST_NAME}, input_zero_point_only) {
	$if ISA_CHECK:
	${ISA_CHECK};
	for (size_t channels = 1; channels <= ${CBLOCK * 5}; channels += ${max(1, CBLOCK - 1)}) {
	DWConvMicrokernelTester()
	.cr(${CR})
	.kr(${KR})
	.channels(channels)
	.width(3)
	.input_zero_point(255)
	.kernel_zero_point(0)
	.Test(${", ".join(TEST_ARGS)});
	}
	}

	TEST(${TEST_NAME}, kernel_zero_point_only) {
	$if ISA_CHECK:
	${ISA_CHECK};
	for (size_t channels = 1; channels <= ${CBLOCK * 5}; channels += ${max(1, CBLOCK - 1)}) {
	DWConvMicrokernelTester()
	.cr(${CR})
	.kr(${KR})
	.channels(channels)
	.width(3)
	.input_zero_point(0)
	.kernel_zero_point(255)
	.Test(${", ".join(TEST_ARGS)});
	}
	}
	"""


	def generate_test_cases(ukernel, cr, kr, c_block, is_pipelined, isa):
	"""Generates all tests cases for a DWCONV micro-kernel.

	Args:
	ukernel: C name of the micro-kernel function.
	cr: CR parameter of the DWCONV micro-kernel.
	kr: KR parameter of the DWCONV micro-kernel.
	k_block: Number of C values processed per one iteration of the main loop of
	the micro-kernel.
	is_pipelined: Indicates if the micro-kernel is implemented with software
	pipelining. Additional test cases are generated for software
	pipelined micro-kernels to separately test prologue + epiloque
	of the pipelined loop and iteration of the pipelined loop.
	isa: instruction set required to run the micro-kernel. Generated unit test
	will skip execution if the host processor doesn't support this ISA.

	Returns:
	Code for the test case.
	"""
	_, test_name = ukernel.split("_", 1)
	_, datatype, ukernel_type, _ = ukernel.split("_", 3)
	test_args = [ukernel]
	if not isa or isa == "psimd":
	test_args.append("DWConvMicrokernelTester::Variant::Scalar")
	return xngen.preprocess(DWCONV_TEST_CODE, {
	"TEST_NAME": test_name.upper().replace("UKERNEL_", ""),
	"TEST_ARGS": test_args,
	"UKERNEL_TYPE": ukernel_type.upper(),
	"DATATYPE": datatype,
	"CR": cr,
	"KR": kr,
	"CBLOCK": c_block,
	"ADJCBLOCK": 2 * c_block if is_pipelined else c_block,
	"IS_PIPELINED": is_pipelined,
	"ISA_CHECK": ISA_TO_CHECK_MAP.get(isa, ""),
	"next_prime": next_prime,
	"sqrt": math.sqrt,
	})


	def main(args):
	options = parser.parse_args(args)

	with codecs.open(options.spec, "r", encoding="utf-8") as spec_file:
	spec_yaml = yaml.safe_load(spec_file)
	if not isinstance(spec_yaml, list):
	raise ValueError("expected a list of micro-kernels in the spec")

	tests = """\
	// Copyright (c) Facebook, Inc. and its affiliates.
	// All rights reserved.
	//
	// Copyright 2019 Google LLC
	//
	// This source code is licensed under the BSD-style license found in the
	// LICENSE file in the root directory of this source tree.
	//
	// Auto-generated file. Do not edit!
	// Specification: {specification}
	// Generator: {generator}


	#include <gtest/gtest.h>

	#include <xnnpack/common.h>
	#include <xnnpack/isa-checks.h>

	#include <xnnpack/dwconv.h>
	#include "dwconv-microkernel-tester.h"
	""".format(specification=options.spec, generator=sys.argv[0])

	for ukernel_spec in spec_yaml:
	name = ukernel_spec["name"]
	pipelined = bool(ukernel_spec.get("pipelined", False))
	assembly = bool(ukernel_spec.get("assembly", False))
	cr, kr, arch, isa = split_ukernel_name(name)

	# specification can override architecture
	arch = ukernel_spec.get("arch", arch)

	test_case = generate_test_cases(name, cr, kr, cr, pipelined, isa)
	test_case = remove_duplicate_newlines(test_case)
	tests += "\n\n"
	if arch:
	guard_macro = " \|\| ".join(map(ARCH_TO_MACRO_MAP.get, arch))
	if assembly:
	guard_macro += " && XNN_ENABLE_ASSEMBLY"
	tests += "#if %s\n" % guard_macro
	tests += indent(test_case) + "\n"
	tests += "#endif // %s\n" % guard_macro
	elif isa == "psimd":
	guard_macro = "!XNN_ARCH_ASMJS && !XNN_ARCH_WASM"
	tests += "#if %s\n" % guard_macro
	tests += indent(test_case) + "\n"
	tests += "#endif // %s\n" % guard_macro
	else:
	tests += test_case

	with codecs.open(options.output, "w", encoding="utf-8") as output_file:
	output_file.write(tests)


	if __name__ == "__main__":
	main(sys.argv[1:])