blob: 62434a822a31d08528b2d6bbc1bf310a54d45db8 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001#!/usr/bin/env python
2# Copyright 2019 Google LLC
3#
4# This source code is licensed under the BSD-style license found in the
5# LICENSE file in the root directory of this source tree.
6
7import argparse
8import bisect
9import codecs
10import os
11import sys
12import yaml
13
14sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
15from primes import next_prime
16import xngen
17
18
19parser = argparse.ArgumentParser(description='XNNPACK generator')
20parser.add_argument("-s", "--spec", metavar="FILE", required=True,
21 help="Spec (YAML) file")
22parser.add_argument("-o", "--output", metavar="FILE", required=True,
23 help='Output (C++ source) file')
24parser.set_defaults(defines=list())
25
26
27def indent(text):
28 return "\n".join(map(lambda t: " " + t if t else t, text.splitlines()))
29
30
31def remove_duplicate_newlines(text):
32 filtered_lines = list()
33 last_newline = False
34 for line in text.splitlines():
35 is_newline = len(line.strip()) == 0
36 if not is_newline or not last_newline:
37 filtered_lines.append(line)
38 last_newline = is_newline
39 return "\n".join(filtered_lines)
40
41
42ARCH_TO_MACRO_MAP = {
43 "aarch32": "CPUINFO_ARCH_ARM",
44 "aarch64": "CPUINFO_ARCH_ARM64",
45 "x86": "CPUINFO_ARCH_X86",
46 "x86-64": "CPUINFO_ARCH_X86_64",
47}
48
49ISA_TO_ARCH_MAP = {
50 "neon": ["aarch32", "aarch64"],
51 "neonfma": ["aarch32", "aarch64"],
52 "neonfp16arith": ["aarch32", "aarch64"],
53 "sse": ["x86", "x86-64"],
54 "sse2": ["x86", "x86-64"],
55 "avx": ["x86", "x86-64"],
56 "avx512f": ["x86", "x86-64"],
57 "psimd": [],
58}
59
60ISA_TO_CHECK_MAP = {
61 "neon": "TEST_REQUIRES_ARM_NEON",
62 "neonfma": "TEST_REQUIRES_ARM_NEON_FMA",
63 "neonfp16arith": "TEST_REQUIRES_ARM_NEON_FP16_ARITH",
64 "sse": "TEST_REQUIRES_X86_SSE",
65 "sse2": "TEST_REQUIRES_X86_SSE2",
66 "avx": "TEST_REQUIRES_X86_AVX",
67 "avx512f": "TEST_REQUIRES_X86_AVX512F",
68 "psimd": "TEST_REQUIRES_PSIMD",
69}
70
71
72def split_ukernel_name(name):
73 common_name, target_name = name.split("__", 1)
74 common_parts = common_name.split("_")
75 param_spec = common_parts[-1]
76 mr, nr = map(int, param_spec.split("x"))
77 arch = list()
78 isa = None
79 for target_part in target_name.split("_"):
80 if target_part in ARCH_TO_MACRO_MAP:
81 arch = [target_part]
82 elif target_part in ISA_TO_ARCH_MAP:
83 isa = target_part
84 if isa and not arch:
85 arch = ISA_TO_ARCH_MAP[isa]
86 return mr, nr, arch, isa
87
88
89TEST_TEMPLATE = """\
90TEST(${TEST_NAME}, k_eq_${KBLOCK}) {
91 $if ISA_CHECK:
92 ${ISA_CHECK};
93 SpMMMicrokernelTester()
94 .mr(${MR})
95 .nr(${NR})
96 .m(${MR})
97 .n(${NR})
98 .k(${KBLOCK})
99 .sparsity(0.0f)
100 .Test(${", ".join(TEST_ARGS)});
101}
102
103$if NR > 1:
104 TEST(${TEST_NAME}, k_eq_${KBLOCK}_subtile) {
105 $if ISA_CHECK:
106 ${ISA_CHECK};
107 for (uint32_t n = 1; n <= ${NR}; n++) {
108 SpMMMicrokernelTester()
109 .mr(${MR})
110 .nr(${NR})
111 .m(${MR})
112 .n(n)
113 .k(${KBLOCK})
114 .sparsity(0.0f)
115 .Test(${", ".join(TEST_ARGS)});
116 }
117 }
118
119$if IS_PIPELINED:
120 TEST(${TEST_NAME}, k_eq_${KBLOCK * 2}) {
121 $if ISA_CHECK:
122 ${ISA_CHECK};
123 SpMMMicrokernelTester()
124 .mr(${MR})
125 .nr(${NR})
126 .m(${MR})
127 .n(${NR})
128 .k(${KBLOCK * 2})
129 .sparsity(0.0f)
130 .Test(${", ".join(TEST_ARGS)});
131 }
132
133 $if NR > 1:
134 TEST(${TEST_NAME}, k_eq_${KBLOCK * 2}_subtile) {
135 $if ISA_CHECK:
136 ${ISA_CHECK};
137 for (uint32_t n = 1; n <= ${NR}; n++) {
138 SpMMMicrokernelTester()
139 .mr(${MR})
140 .nr(${NR})
141 .m(${MR})
142 .n(n)
143 .k(${KBLOCK * 2})
144 .sparsity(0.0f)
145 .Test(${", ".join(TEST_ARGS)});
146 }
147 }
148
149$if KBLOCK > 1:
150 TEST(${TEST_NAME}, k_lt_${ADJKBLOCK}) {
151 $if ISA_CHECK:
152 ${ISA_CHECK};
153 for (size_t k = 1; k < ${ADJKBLOCK}; k++) {
154 SpMMMicrokernelTester()
155 .mr(${MR})
156 .nr(${NR})
157 .m(${MR})
158 .n(${NR})
159 .k(k)
160 .sparsity(0.0f)
161 .Test(${", ".join(TEST_ARGS)});
162 }
163 }
164
165 $if NR > 1:
166 TEST(${TEST_NAME}, k_lt_${ADJKBLOCK}_subtile) {
167 $if ISA_CHECK:
168 ${ISA_CHECK};
169 for (size_t k = 1; k < ${ADJKBLOCK}; k++) {
170 for (uint32_t n = 1; n <= ${NR}; n++) {
171 SpMMMicrokernelTester()
172 .mr(${MR})
173 .nr(${NR})
174 .m(${MR})
175 .n(n)
176 .k(k)
177 .sparsity(0.0f)
178 .Test(${", ".join(TEST_ARGS)});
179 }
180 }
181 }
182
183TEST(${TEST_NAME}, k_gt_${ADJKBLOCK}) {
184 $if ISA_CHECK:
185 ${ISA_CHECK};
186 for (size_t k = ${ADJKBLOCK + 1}; k < ${KBLOCK * 10 if KBLOCK == 1 else KBLOCK * 2}; k++) {
187 SpMMMicrokernelTester()
188 .mr(${MR})
189 .nr(${NR})
190 .m(${MR})
191 .n(${NR})
192 .k(k)
193 .sparsity(0.0f)
194 .Test(${", ".join(TEST_ARGS)});
195 }
196}
197
198$if NR > 1:
199 TEST(${TEST_NAME}, k_gt_${KBLOCK}_subtile) {
200 $if ISA_CHECK:
201 ${ISA_CHECK};
202 for (size_t k = ${ADJKBLOCK + 1}; k < ${10 if KBLOCK == 1 else KBLOCK * 2}; k++) {
203 for (uint32_t n = 1; n <= ${NR}; n++) {
204 SpMMMicrokernelTester()
205 .mr(${MR})
206 .nr(${NR})
207 .m(${MR})
208 .n(n)
209 .k(k)
210 .sparsity(0.0f)
211 .Test(${", ".join(TEST_ARGS)});
212 }
213 }
214 }
215
216$if KBLOCK > 1:
217 TEST(${TEST_NAME}, k_div_${KBLOCK}) {
218 $if ISA_CHECK:
219 ${ISA_CHECK};
220 for (size_t k = ${ADJKBLOCK + KBLOCK}; k <= ${KBLOCK * 10}; k += ${KBLOCK}) {
221 SpMMMicrokernelTester()
222 .mr(${MR})
223 .nr(${NR})
224 .m(${MR})
225 .n(${NR})
226 .k(k)
227 .sparsity(0.0f)
228 .Test(${", ".join(TEST_ARGS)});
229 }
230 }
231
232 $if NR > 1:
233 TEST(${TEST_NAME}, k_div_${KBLOCK}_subtile) {
234 $if ISA_CHECK:
235 ${ISA_CHECK};
236 for (size_t k = ${ADJKBLOCK + KBLOCK}; k <= ${KBLOCK * 10}; k += ${KBLOCK}) {
237 for (uint32_t n = 1; n <= ${NR}; n++) {
238 SpMMMicrokernelTester()
239 .mr(${MR})
240 .nr(${NR})
241 .m(${MR})
242 .n(n)
243 .k(k)
244 .sparsity(0.0f)
245 .Test(${", ".join(TEST_ARGS)});
246 }
247 }
248 }
249
250TEST(${TEST_NAME}, n_gt_${NR}) {
251 $if ISA_CHECK:
252 ${ISA_CHECK};
253 for (uint32_t n = ${NR + 1}; n < ${max(10, NR * 2)}; n++) {
254 for (size_t k = 1; k <= ${KBLOCK * 5}; k += ${KBLOCK + 1}) {
255 SpMMMicrokernelTester()
256 .mr(${MR})
257 .nr(${NR})
258 .m(${MR})
259 .n(n)
260 .k(k)
261 .sparsity(0.0f)
262 .Test(${", ".join(TEST_ARGS)});
263 }
264 }
265}
266
267$if NR > 1:
268 TEST(${TEST_NAME}, n_div_${NR}) {
269 $if ISA_CHECK:
270 ${ISA_CHECK};
271 for (uint32_t n = ${2 * NR}; n <= ${3 * NR}; n += ${NR}) {
272 for (size_t k = 1; k <= ${KBLOCK * 5}; k += ${KBLOCK + 1}) {
273 SpMMMicrokernelTester()
274 .mr(${MR})
275 .nr(${NR})
276 .m(${MR})
277 .n(n)
278 .k(k)
279 .Test(${", ".join(TEST_ARGS)});
280 }
281 }
282 }
283
284TEST(${TEST_NAME}, m_lt_${MR}) {
285 $if ISA_CHECK:
286 ${ISA_CHECK};
287 for (uint32_t m = ${1}; m < ${MR}; m++) {
288 for (uint32_t n = 1; n < ${max(10, NR * 5)}; n += ${NR + 1}) {
289 for (size_t k = 1; k <= ${KBLOCK * 5}; k += ${KBLOCK + 1}) {
290 SpMMMicrokernelTester()
291 .mr(${MR})
292 .nr(${NR})
293 .m(m)
294 .n(n)
295 .k(k)
296 .sparsity(0.0f)
297 .Test(${", ".join(TEST_ARGS)});
298 }
299 }
300 }
301}
302
303TEST(${TEST_NAME}, m_div_${MR}) {
304 $if ISA_CHECK:
305 ${ISA_CHECK};
306 for (uint32_t m = ${MR * 2}; m <= ${MR * 3}; m += ${MR}) {
307 for (uint32_t n = 1; n < ${max(10, NR * 5)}; n += ${NR + 1}) {
308 for (size_t k = 1; k <= ${KBLOCK * 5}; k += ${KBLOCK + 1}) {
309 SpMMMicrokernelTester()
310 .mr(${MR})
311 .nr(${NR})
312 .m(m)
313 .n(n)
314 .k(k)
315 .sparsity(0.0f)
316 .Test(${", ".join(TEST_ARGS)});
317 }
318 }
319 }
320}
321
322TEST(${TEST_NAME}, m_gt_${MR}) {
323 $if ISA_CHECK:
324 ${ISA_CHECK};
325 for (uint32_t m = ${MR + 1}; m < ${MR * 2}; m++) {
326 for (uint32_t n = 1; n < ${max(10, NR * 5)}; n += ${NR + 1}) {
327 for (size_t k = 1; k <= ${KBLOCK * 5}; k += ${KBLOCK + 1}) {
328 SpMMMicrokernelTester()
329 .mr(${MR})
330 .nr(${NR})
331 .m(m)
332 .n(n)
333 .k(k)
334 .sparsity(0.0f)
335 .Test(${", ".join(TEST_ARGS)});
336 }
337 }
338 }
339}
340
341TEST(${TEST_NAME}, qmin) {
342 $if ISA_CHECK:
343 ${ISA_CHECK};
344 for (uint32_t n = 1; n < ${max(10, NR * 5)}; n += ${NR + 1}) {
345 for (size_t k = 1; k <= ${KBLOCK * 5}; k += ${KBLOCK + 1}) {
346 SpMMMicrokernelTester()
347 .mr(${MR})
348 .nr(${NR})
349 .m(${MR * 2})
350 .n(n)
351 .k(k)
352 .sparsity(0.0f)
353 .qmin(128)
354 .Test(${", ".join(TEST_ARGS)});
355 }
356 }
357}
358
359TEST(${TEST_NAME}, qmax) {
360 $if ISA_CHECK:
361 ${ISA_CHECK};
362 for (uint32_t n = 1; n < ${max(10, NR * 5)}; n += ${NR + 1}) {
363 for (size_t k = 1; k <= ${KBLOCK * 5}; k += ${KBLOCK + 1}) {
364 SpMMMicrokernelTester()
365 .mr(${MR})
366 .nr(${NR})
367 .m(${MR * 2})
368 .n(n)
369 .k(k)
370 .sparsity(0.0f)
371 .qmax(128)
372 .Test(${", ".join(TEST_ARGS)});
373 }
374 }
375}
376
377TEST(${TEST_NAME}, half_sparse) {
378 $if ISA_CHECK:
379 ${ISA_CHECK};
380 for (uint32_t n = 1; n < ${max(10, NR * 5)}; n += ${NR + 1}) {
381 for (size_t k = 1; k <= ${KBLOCK * 5}; k += ${KBLOCK + 1}) {
382 SpMMMicrokernelTester()
383 .mr(${MR})
384 .nr(${NR})
385 .m(${MR * 2})
386 .n(n)
387 .k(k)
388 .sparsity(0.5f)
389 .Test(${", ".join(TEST_ARGS)});
390 }
391 }
392}
393
394TEST(${TEST_NAME}, zero_weights) {
395 $if ISA_CHECK:
396 ${ISA_CHECK};
397 for (uint32_t n = 1; n < ${max(10, NR * 5)}; n += ${NR + 1}) {
398 for (size_t k = 1; k <= ${KBLOCK * 5}; k += ${KBLOCK + 1}) {
399 SpMMMicrokernelTester()
400 .mr(${MR})
401 .nr(${NR})
402 .m(${MR * 2})
403 .n(n)
404 .k(k)
405 .sparsity(1.0f)
406 .Test(${", ".join(TEST_ARGS)});
407 }
408 }
409}
410"""
411
412
413def generate_test_cases(ukernel, mr, nr, k_block, is_pipelined, isa):
414 """Generates all tests cases for a GEMM micro-kernel.
415
416 Args:
417 ukernel: C name of the micro-kernel function.
418 mr: MR parameter of the GEMM micro-kernel.
419 nr: NR parameter of the GEMM micro-kernel.
420 k_block: Number of K values processed per one iteration of the main loop of
421 the micro-kernel.
422 is_pipelined: Indicates if the micro-kernel is implemented with software
423 pipelining. Additional test cases are generated for software
424 pipelined micro-kernels to separately test prologue + epiloque
425 of the pipelined loop and iteration of the pipelined loop.
426 isa: instruction set required to run the micro-kernel. Generated unit test
427 will skip execution if the host processor doesn't support this ISA.
428
429 Returns:
430 Code for the test case.
431 """
432 _, test_name = ukernel.split("_", 1)
433 _, datatype, ukernel_type, _ = ukernel.split("_", 3)
434 test_args = [ukernel]
435 if not isa or isa == "psimd":
436 test_args.append("SpMMMicrokernelTester::Variant::Scalar")
437 return xngen.preprocess(TEST_TEMPLATE, {
438 "TEST_NAME": test_name.upper().replace("UKERNEL_", ""),
439 "TEST_ARGS": test_args,
440 "UKERNEL_TYPE": ukernel_type.upper(),
441 "DATATYPE": datatype,
442 "MR": mr,
443 "NR": nr,
444 "KBLOCK": k_block,
445 "ADJKBLOCK": 2 * k_block if is_pipelined else k_block,
446 "IS_PIPELINED": is_pipelined,
447 "ISA_CHECK": ISA_TO_CHECK_MAP.get(isa, ""),
448 "next_prime": next_prime,
449 })
450
451
452def main(args):
453 options = parser.parse_args(args)
454
455 with codecs.open(options.spec, "r", encoding="utf-8") as spec_file:
456 spec_yaml = yaml.safe_load(spec_file)
457 if not isinstance(spec_yaml, list):
458 raise ValueError("expected a list of micro-kernels in the spec")
459
460 tests = """\
461// Copyright 2019 Google LLC
462//
463// This source code is licensed under the BSD-style license found in the
464// LICENSE file in the root directory of this source tree.
465//
466// Auto-generated file. Do not edit!
467// Specification: {specification}
468// Generator: {generator}
469
470
471#include <cpuinfo.h>
Marat Dukhan629a33e2019-10-01 10:39:14 -0700472#include <gtest/gtest.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -0700473
474#include <xnnpack/spmm.h>
475#include <xnnpack/isa-checks.h>
476
477#include "spmm-microkernel-tester.h"
478""".format(specification=options.spec, generator=sys.argv[0])
479
480 for ukernel_spec in spec_yaml:
481 name = ukernel_spec["name"]
482 k_block = int(ukernel_spec["k-block"])
483 pipelined = bool(ukernel_spec.get("pipelined", False))
484 mr, nr, arch, isa = split_ukernel_name(name)
485
486 # specification can override architecture
487 arch = ukernel_spec.get("arch", arch)
488
489 test_case = generate_test_cases(name, mr, nr, k_block, pipelined, isa)
490 test_case = remove_duplicate_newlines(test_case)
491 tests += "\n\n"
492 if arch:
493 guard_macro = " || ".join(map(ARCH_TO_MACRO_MAP.get, arch))
494 tests += "#if %s\n" % guard_macro
495 tests += indent(test_case) + "\n"
496 tests += "#endif // %s\n" % guard_macro
497 elif isa == "psimd":
498 guard_macro = "!CPUINFO_ARCH_ASMJS && !CPUINFO_ARCH_WASM"
499 tests += "#if %s\n" % guard_macro
500 tests += indent(test_case) + "\n"
501 tests += "#endif // %s\n" % guard_macro
502 else:
503 tests += test_case
504
505 with codecs.open(options.output, "w", encoding="utf-8") as output_file:
506 output_file.write(tests)
507
508
509if __name__ == "__main__":
510 main(sys.argv[1:])