blob: c3a71b764cf5ce924eb350bb4ae20d10ffbe7d6e [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#pragma once
10
11#include <stddef.h>
12#include <stdint.h>
13
14#include <xnnpack/params.h>
15#include <xnnpack/common.h>
16
17#ifdef __cplusplus
18extern "C" {
19#endif
20
21
Marat Dukhan163a7e62020-04-09 04:19:26 -070022#define DECLARE_F32_GEMM_UKERNEL_FUNCTION(fn_name) \
23 XNN_INTERNAL void fn_name( \
24 size_t mr, \
25 size_t nr, \
26 size_t k, \
27 const float* a, \
28 size_t a_stride, \
29 const float* w, \
30 float* c, \
31 size_t cm_stride, \
32 size_t cn_stride, \
33 const union xnn_f32_default_params* params);
34
Marat Dukhande06f492020-04-09 00:19:31 -070035#define DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(fn_name) \
36 XNN_INTERNAL void fn_name( \
37 size_t mr, \
38 size_t nr, \
39 size_t k, \
40 const float* a, \
41 size_t a_stride, \
42 const float* w, \
43 float* c, \
44 size_t cm_stride, \
45 size_t cn_stride, \
Marat Dukhaneb09a6b2020-04-08 17:34:32 -070046 const union xnn_f32_minmax_params* params);
XNNPACK Teamb455b122019-09-27 18:10:33 -070047
Marat Dukhande06f492020-04-09 00:19:31 -070048DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64)
49DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64)
50DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64)
51DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128)
52DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64)
53DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64)
54DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128)
Marat Dukhaneccfd712019-12-08 16:49:27 -080055
Marat Dukhande06f492020-04-09 00:19:31 -070056DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64)
57DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64)
58DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64)
59DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128)
60DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64)
61DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64)
62DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128)
Marat Dukhaneccfd712019-12-08 16:49:27 -080063
Marat Dukhande06f492020-04-09 00:19:31 -070064DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64)
65DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld64)
66DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128)
67DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64)
68DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld128)
Marat Dukhaneccfd712019-12-08 16:49:27 -080069
Marat Dukhande06f492020-04-09 00:19:31 -070070DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64)
71DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64)
72DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld128)
73DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64)
74DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld128)
Marat Dukhaneccfd712019-12-08 16:49:27 -080075
Marat Dukhande06f492020-04-09 00:19:31 -070076DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8s4__neon)
77DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8s4__neon)
78DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8s4__neon)
79DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_8x8s4__neon)
Marat Dukhaneccfd712019-12-08 16:49:27 -080080
Marat Dukhande06f492020-04-09 00:19:31 -070081DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma)
82DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8s4__neonfma)
83DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma)
84DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma)
Marat Dukhaneccfd712019-12-08 16:49:27 -080085
Marat Dukhan32f93812020-05-17 20:31:21 -070086DECLARE_F32_GEMM_UKERNEL_FUNCTION(xnn_f32_gemm_ukernel_4x4__aarch32_vfp_ld64)
87
Marat Dukhan3b98f6b2020-05-17 10:09:22 -070088DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x4__aarch32_vfp_ld64)
Marat Dukhande06f492020-04-09 00:19:31 -070089DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_ld64)
90DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53)
91DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55)
92DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75)
93DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_pld_cortex_a75)
Marat Dukhaneccfd712019-12-08 16:49:27 -080094
Frank Barchard3cb54f92020-04-10 10:46:08 -070095DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64)
Marat Dukhande06f492020-04-09 00:19:31 -070096DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53)
97DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53)
98DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a57)
99DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75)
100DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53)
101DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53)
102DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55)
103DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a57)
104DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75)
105DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld128)
106DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld64)
107DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a57)
108DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75)
109DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53)
110DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55)
111DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73)
112DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a57)
113DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75)
114DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ios)
115DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128)
116DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800117
Marat Dukhande06f492020-04-09 00:19:31 -0700118DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8__sse_load1)
119DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__sse_load1)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800120
Marat Dukhande06f492020-04-09 00:19:31 -0700121DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8__sse_dup)
122DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__sse_dup)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800123
Marat Dukhande06f492020-04-09 00:19:31 -0700124DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8s4__sse)
125DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8s4__sse)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800126
Marat Dukhande06f492020-04-09 00:19:31 -0700127DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x2c4__sse)
Marat Dukhanb00004d2020-02-13 09:28:35 -0800128
Marat Dukhande06f492020-04-09 00:19:31 -0700129DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast)
130DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__avx_broadcast)
131DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast)
132DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast)
133DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast)
134DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast)
135DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast)
136DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast)
137DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800138
Marat Dukhande06f492020-04-09 00:19:31 -0700139DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast)
140DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast)
141DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast)
142DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__fma3_broadcast)
143DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast)
144DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast)
145DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast)
146DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast)
147DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast)
148DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800149
Marat Dukhande06f492020-04-09 00:19:31 -0700150DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast)
151DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast)
152DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast)
153DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast)
Marat Dukhan27121322019-12-09 14:57:40 -0800154
Marat Dukhande06f492020-04-09 00:19:31 -0700155DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast)
156DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast)
157DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast)
158DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast)
159DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast)
160DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800161
Marat Dukhande06f492020-04-09 00:19:31 -0700162DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8__psimd_loadsplat)
163DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__psimd_loadsplat)
164DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__psimd_loadsplat)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800165
Marat Dukhande06f492020-04-09 00:19:31 -0700166DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8__psimd_splat)
167DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8__psimd_splat)
168DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8__psimd_splat)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800169
Marat Dukhande06f492020-04-09 00:19:31 -0700170DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x8s4__psimd)
171DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x8s4__psimd)
172DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_6x8s4__psimd)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800173
Marat Dukhande06f492020-04-09 00:19:31 -0700174DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x2c4__psimd)
Marat Dukhanb00004d2020-02-13 09:28:35 -0800175
Marat Dukhan163a7e62020-04-09 04:19:26 -0700176DECLARE_F32_GEMM_UKERNEL_FUNCTION(xnn_f32_gemm_ukernel_4x2__wasm)
177DECLARE_F32_GEMM_UKERNEL_FUNCTION(xnn_f32_gemm_ukernel_1x4__wasm)
178DECLARE_F32_GEMM_UKERNEL_FUNCTION(xnn_f32_gemm_ukernel_2x4__wasm)
179DECLARE_F32_GEMM_UKERNEL_FUNCTION(xnn_f32_gemm_ukernel_4x4__wasm)
180
Marat Dukhande06f492020-04-09 00:19:31 -0700181DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x2__wasm)
182DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x4__wasm)
183DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_2x4__wasm)
184DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x4__wasm)
Marat Dukhan436ebe62019-12-04 15:10:12 -0800185
Marat Dukhan163a7e62020-04-09 04:19:26 -0700186DECLARE_F32_GEMM_UKERNEL_FUNCTION(xnn_f32_gemm_ukernel_4x2__scalar)
187DECLARE_F32_GEMM_UKERNEL_FUNCTION(xnn_f32_gemm_ukernel_1x4__scalar)
188DECLARE_F32_GEMM_UKERNEL_FUNCTION(xnn_f32_gemm_ukernel_2x4__scalar)
189DECLARE_F32_GEMM_UKERNEL_FUNCTION(xnn_f32_gemm_ukernel_4x4__scalar)
190
Marat Dukhande06f492020-04-09 00:19:31 -0700191DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x2__scalar)
192DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_1x4__scalar)
193DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_2x4__scalar)
194DECLARE_F32_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemm_minmax_ukernel_4x4__scalar)
Marat Dukhan436ebe62019-12-04 15:10:12 -0800195
Marat Dukhande06f492020-04-09 00:19:31 -0700196#define DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(fn_name) \
197 XNN_INTERNAL void fn_name( \
198 size_t mr, \
199 size_t nr, \
200 size_t k, \
201 const float* a, \
202 size_t a_stride, \
203 const float* w, \
204 float* c, \
205 size_t cm_stride, \
206 size_t cn_stride, \
207 const float* acc, \
Marat Dukhaneb09a6b2020-04-08 17:34:32 -0700208 const union xnn_f32_minmax_params* params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700209
Marat Dukhande06f492020-04-09 00:19:31 -0700210DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8__neon_lane_ld64)
211DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld64)
212DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__neon_lane_ld128)
213DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64)
214DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64)
215DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800216
Marat Dukhande06f492020-04-09 00:19:31 -0700217DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_lane_ld64)
218DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64)
219DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld128)
220DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64)
221DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64)
222DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800223
Marat Dukhande06f492020-04-09 00:19:31 -0700224DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8__neon_dup_ld64)
225DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64)
226DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld128)
227DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64)
228DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld128)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800229
Marat Dukhande06f492020-04-09 00:19:31 -0700230DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8__neonfma_dup_ld64)
231DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld64)
232DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_dup_ld128)
233DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64)
234DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld128)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800235
Marat Dukhande06f492020-04-09 00:19:31 -0700236DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8s4__neon)
237DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8s4__neon)
238DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8s4__neon)
239DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_8x8s4__neon)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800240
Marat Dukhande06f492020-04-09 00:19:31 -0700241DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8s4__neonfma)
242DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8s4__neonfma)
243DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma)
244DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800245
Frank Barchard3cb54f92020-04-10 10:46:08 -0700246DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_ld64)
Marat Dukhande06f492020-04-09 00:19:31 -0700247DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53)
248DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53)
249DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a57)
250DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75)
251DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53)
252DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53)
253DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55)
254DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a57)
255DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75)
256DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld128)
257DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__aarch64_neonfma_ld64)
258DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_cortex_a57)
259DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75)
260DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53)
261DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55)
262DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73)
263DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a57)
264DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75)
265DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ios)
266DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld128)
267DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__aarch64_neonfma_ld64)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800268
Marat Dukhande06f492020-04-09 00:19:31 -0700269DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8__sse_load1)
270DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__sse_load1)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800271
Marat Dukhande06f492020-04-09 00:19:31 -0700272DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8__sse_dup)
273DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__sse_dup)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800274
Marat Dukhande06f492020-04-09 00:19:31 -0700275DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8s4__sse)
276DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8s4__sse)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800277
Marat Dukhande06f492020-04-09 00:19:31 -0700278DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8__avx_broadcast)
279DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__avx_broadcast)
280DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_5x8__avx_broadcast)
281DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__avx_broadcast)
282DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_7x8__avx_broadcast)
283DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x16__avx_broadcast)
284DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_3x16__avx_broadcast)
285DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x16__avx_broadcast)
286DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_5x16__avx_broadcast)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800287
Marat Dukhande06f492020-04-09 00:19:31 -0700288DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8__fma3_broadcast)
289DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__fma3_broadcast)
290DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_5x8__fma3_broadcast)
291DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__fma3_broadcast)
292DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_7x8__fma3_broadcast)
293DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast)
294DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x16__fma3_broadcast)
295DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_3x16__fma3_broadcast)
296DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x16__fma3_broadcast)
297DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_5x16__fma3_broadcast)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800298
Marat Dukhande06f492020-04-09 00:19:31 -0700299DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x16s4__fma3_broadcast)
300DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_3x16s4__fma3_broadcast)
301DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x16s4__fma3_broadcast)
302DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_5x16s4__fma3_broadcast)
Marat Dukhan27121322019-12-09 14:57:40 -0800303
Marat Dukhande06f492020-04-09 00:19:31 -0700304DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x16__avx512f_broadcast)
305DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x16__avx512f_broadcast)
306DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_5x16__avx512f_broadcast)
307DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x16__avx512f_broadcast)
308DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_7x16__avx512f_broadcast)
309DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_8x16__avx512f_broadcast)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800310
Marat Dukhande06f492020-04-09 00:19:31 -0700311DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8__psimd_loadsplat)
312DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__psimd_loadsplat)
313DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__psimd_loadsplat)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800314
Marat Dukhande06f492020-04-09 00:19:31 -0700315DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8__psimd_splat)
316DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8__psimd_splat)
317DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8__psimd_splat)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800318
Marat Dukhande06f492020-04-09 00:19:31 -0700319DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x8s4__psimd)
320DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x8s4__psimd)
321DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_6x8s4__psimd)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700322
Marat Dukhande06f492020-04-09 00:19:31 -0700323DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x4__wasm)
324DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_2x4__wasm)
325DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x2__wasm)
326DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x4__wasm)
Marat Dukhan436ebe62019-12-04 15:10:12 -0800327
Marat Dukhande06f492020-04-09 00:19:31 -0700328DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_1x4__scalar)
329DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_2x4__scalar)
330DECLARE_F32_GEMMINC_MINMAX_UKERNEL_FUNCTION(xnn_f32_gemminc_minmax_ukernel_4x4__scalar)
Marat Dukhan436ebe62019-12-04 15:10:12 -0800331
Marat Dukhande06f492020-04-09 00:19:31 -0700332#define DECLARE_F16_GEMM_MINMAX_UKERNEL_FUNCTION(fn_name) \
333 void fn_name( \
334 size_t mr, \
335 size_t nr, \
336 size_t k, \
337 const void* a, \
338 size_t a_stride, \
339 const void* w, \
340 void* c, \
341 size_t cm_stride, \
342 size_t cn_stride, \
Frank Barchard875be772020-04-16 09:50:09 -0700343 const struct xnn_f16_scaleminmax_params* params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700344
Frank Barchard36b76b62020-04-10 12:39:17 -0700345DECLARE_F16_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f16_gemm_minmax_ukernel_1x16__aarch64_neonfp16arith_ld32)
346DECLARE_F16_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f16_gemm_minmax_ukernel_4x16__aarch64_neonfp16arith_ld32)
347DECLARE_F16_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f16_gemm_minmax_ukernel_6x16__aarch64_neonfp16arith_ld32)
Frank Barchardbddfbcd2020-04-15 12:32:41 -0700348DECLARE_F16_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f16_gemm_minmax_ukernel_1x8__aarch64_neonfp16arith_ld64)
349DECLARE_F16_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f16_gemm_minmax_ukernel_4x8__aarch64_neonfp16arith_ld64)
350DECLARE_F16_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f16_gemm_minmax_ukernel_6x8__aarch64_neonfp16arith_ld64)
Frank Barchard3b8e5662020-04-20 12:12:53 -0700351DECLARE_F16_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f16_gemm_minmax_ukernel_8x8__aarch64_neonfp16arith_ld64)
Frank Barchard1f4e4612020-04-13 18:24:54 -0700352DECLARE_F16_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f16_gemm_minmax_ukernel_1x8__neonfp16arith_ld64)
Marat Dukhande06f492020-04-09 00:19:31 -0700353DECLARE_F16_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f16_gemm_minmax_ukernel_4x8__neonfp16arith_ld64)
354DECLARE_F16_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f16_gemm_minmax_ukernel_6x8__neonfp16arith_ld64)
355DECLARE_F16_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64)
Frank Barchard3f9f99f2020-05-06 01:12:04 -0700356DECLARE_F16_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f16_gemm_minmax_ukernel_1x16__neonfp16arith_ld64)
357DECLARE_F16_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f16_gemm_minmax_ukernel_4x16__neonfp16arith_ld64)
358DECLARE_F16_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f16_gemm_minmax_ukernel_6x16__neonfp16arith_ld64)
359DECLARE_F16_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700360
Marat Dukhande06f492020-04-09 00:19:31 -0700361#define DECLARE_Q8_GEMM_MINMAX_UKERNEL_FUNCTION(fn_name) \
362 XNN_INTERNAL void fn_name( \
363 size_t mr, \
364 size_t nr, \
365 size_t k, \
366 const uint8_t* a, \
367 size_t a_stride, \
368 const void* w, \
369 uint8_t* c, \
370 size_t cm_stride, \
371 size_t cn_stride, \
XNNPACK Teamb455b122019-09-27 18:10:33 -0700372 const union xnn_q8_gemm_params* params);
373
Marat Dukhaneccfd712019-12-08 16:49:27 -0800374
Marat Dukhande06f492020-04-09 00:19:31 -0700375DECLARE_Q8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_q8_gemm_minmax_ukernel_4x8__neon)
376DECLARE_Q8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_q8_gemm_minmax_ukernel_8x8__neon)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700377
Marat Dukhande06f492020-04-09 00:19:31 -0700378DECLARE_Q8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_q8_gemm_minmax_ukernel_4x8__aarch32_neon)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800379
Marat Dukhande06f492020-04-09 00:19:31 -0700380DECLARE_Q8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_q8_gemm_minmax_ukernel_8x8__aarch64_neon)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800381
Marat Dukhande06f492020-04-09 00:19:31 -0700382DECLARE_Q8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_q8_gemm_minmax_ukernel_2x4c8__sse2)
383DECLARE_Q8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_q8_gemm_minmax_ukernel_4x4c2__sse2)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800384
Marat Dukhande06f492020-04-09 00:19:31 -0700385DECLARE_Q8_GEMM_MINMAX_UKERNEL_FUNCTION(xnn_q8_gemm_minmax_ukernel_2x2__scalar)
Marat Dukhaneccfd712019-12-08 16:49:27 -0800386
XNNPACK Teamb455b122019-09-27 18:10:33 -0700387#ifdef __cplusplus
Marat Dukhan80fc9322019-09-29 21:06:36 -0700388} // extern "C"
XNNPACK Teamb455b122019-09-27 18:10:33 -0700389#endif