blob: 48cb6049d144ebd42778dfb4e32f78da53c6f9c9 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001# Copyright 2019 Google LLC
2#
3# This source code is licensed under the BSD-style license found in the
4# LICENSE file in the root directory of this source tree.
Frank Barchard21be34f2019-10-09 19:32:19 -07005- name: xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a53
6 k-block: 8
7 pipelined: true
Frank Barchard7e955972019-10-11 10:34:25 -07008 assembly: true
XNNPACK Teamb455b122019-09-27 18:10:33 -07009- name: xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a57
10 k-block: 8
11 pipelined: true
Frank Barchard7e955972019-10-11 10:34:25 -070012 assembly: true
XNNPACK Teamb455b122019-09-27 18:10:33 -070013- name: xnn_f32_gemm_ukernel_1x8__aarch64_neonfma_cortex_a75
14 k-block: 8
15 pipelined: true
Frank Barchard7e955972019-10-11 10:34:25 -070016 assembly: true
Frank Barchard46fb8072019-10-25 12:54:22 -070017- name: xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a53
Frank Barchard0ecc2ab2019-11-14 10:57:48 -080018 k-block: 4
19 pipelined: true
Frank Barchard46fb8072019-10-25 12:54:22 -070020 assembly: true
XNNPACK Teamb455b122019-09-27 18:10:33 -070021- name: xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a57
22 k-block: 8
23 pipelined: true
Frank Barchard7e955972019-10-11 10:34:25 -070024 assembly: true
XNNPACK Teamb455b122019-09-27 18:10:33 -070025- name: xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_cortex_a75
26 k-block: 8
27 pipelined: true
Frank Barchard7e955972019-10-11 10:34:25 -070028 assembly: true
Frank Barchard387c2d12019-12-16 19:14:07 -080029- name: xnn_f32_gemm_ukernel_5x8__aarch64_neonfma_cortex_a57
30 k-block: 8
31 pipelined: true
32 assembly: true
XNNPACK Teamb455b122019-09-27 18:10:33 -070033- name: xnn_f32_gemm_ukernel_5x8__aarch64_neonfma_cortex_a75
34 k-block: 8
35 pipelined: true
Frank Barchard7e955972019-10-11 10:34:25 -070036 assembly: true
Frank Barcharda7fb8552019-10-23 17:14:17 -070037- name: xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a53
Frank Barchard00bf68e2019-10-27 03:00:09 -070038 k-block: 4
Frank Barcharde64f91a2019-11-11 13:18:00 -080039 pipelined: true
Frank Barcharda7fb8552019-10-23 17:14:17 -070040 assembly: true
XNNPACK Teamb455b122019-09-27 18:10:33 -070041- name: xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a73
42 k-block: 8
43 pipelined: true
Frank Barchard7e955972019-10-11 10:34:25 -070044 assembly: true
Frank Barchard387c2d12019-12-16 19:14:07 -080045- name: xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a57
46 k-block: 8
47 pipelined: true
48 assembly: true
XNNPACK Teamb455b122019-09-27 18:10:33 -070049- name: xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_cortex_a75
50 k-block: 8
51 pipelined: true
Frank Barchard7e955972019-10-11 10:34:25 -070052 assembly: true
XNNPACK Teamb455b122019-09-27 18:10:33 -070053- name: xnn_f32_gemm_ukernel_1x12__aarch64_neonfma_cortex_a53
54 k-block: 4
55 pipelined: true
Frank Barchard7e955972019-10-11 10:34:25 -070056 assembly: true
XNNPACK Teamb455b122019-09-27 18:10:33 -070057- name: xnn_f32_gemm_ukernel_4x12__aarch64_neonfma_cortex_a53
58 k-block: 4
59 pipelined: true
Frank Barchard7e955972019-10-11 10:34:25 -070060 assembly: true
XNNPACK Teamb455b122019-09-27 18:10:33 -070061- name: xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_ld64
62 k-block: 2
Frank Barchard7e955972019-10-11 10:34:25 -070063 assembly: true
Frank Barchard13916042019-12-11 10:56:34 -080064- name: xnn_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a53
65 k-block: 4
Frank Barchardca27b402020-02-03 17:47:32 -080066 pipelined: true
Frank Barchard3e237f22019-12-04 23:08:51 -080067- name: xnn_f32_gemm_ukernel_4x8__aarch32_neon_cortex_a75
68 k-block: 4
Frank Barchardca27b402020-02-03 17:47:32 -080069 pipelined: true
Frank Barchard9f7d5552019-12-12 10:58:10 -080070- name: xnn_f32_gemm_ukernel_4x8__aarch32_neon_pld_cortex_a75
71 k-block: 4
Frank Barchardca27b402020-02-03 17:47:32 -080072 pipelined: true
Frank Barchard8b0f0262019-11-27 23:18:40 -080073- name: xnn_f32_gemm_ukernel_4x8__aarch32_neon_ld64
74 k-block: 2
XNNPACK Teamb455b122019-09-27 18:10:33 -070075- name: xnn_f32_gemm_ukernel_4x8__aarch64_neonfma_ld128
76 k-block: 4
Frank Barchard7e955972019-10-11 10:34:25 -070077 assembly: true
XNNPACK Teamb455b122019-09-27 18:10:33 -070078- name: xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_ld64
79 k-block: 2
Frank Barchard7e955972019-10-11 10:34:25 -070080 assembly: true
XNNPACK Teamb455b122019-09-27 18:10:33 -070081- name: xnn_f32_gemm_ukernel_6x8__aarch64_neonfma_ld128
82 k-block: 4
Frank Barchard7e955972019-10-11 10:34:25 -070083 assembly: true
Frank Barchard91317c52019-11-22 10:54:35 -080084- name: xnn_f32_gemm_ukernel_1x8__neon_lane_ld64
XNNPACK Teamb455b122019-09-27 18:10:33 -070085 k-block: 2
Frank Barchard91317c52019-11-22 10:54:35 -080086- name: xnn_f32_gemm_ukernel_4x2__neon_lane_ld64
XNNPACK Teamb455b122019-09-27 18:10:33 -070087 k-block: 2
Frank Barchard91317c52019-11-22 10:54:35 -080088- name: xnn_f32_gemm_ukernel_4x8__neon_lane_ld64
XNNPACK Teamb455b122019-09-27 18:10:33 -070089 k-block: 2
Frank Barchard91317c52019-11-22 10:54:35 -080090- name: xnn_f32_gemm_ukernel_4x8__neon_lane_ld128
XNNPACK Teamb455b122019-09-27 18:10:33 -070091 k-block: 4
Frank Barchard91317c52019-11-22 10:54:35 -080092- name: xnn_f32_gemm_ukernel_5x8__neon_lane_ld64
XNNPACK Teamb455b122019-09-27 18:10:33 -070093 k-block: 2
Frank Barchard91317c52019-11-22 10:54:35 -080094- name: xnn_f32_gemm_ukernel_6x8__neon_lane_ld64
XNNPACK Teamb455b122019-09-27 18:10:33 -070095 k-block: 2
Frank Barchard69172d92019-11-26 16:22:39 -080096- name: xnn_f32_gemm_ukernel_6x8__neon_lane_ld128
97 k-block: 4
Frank Barchard91317c52019-11-22 10:54:35 -080098- name: xnn_f32_gemm_ukernel_1x8__neonfma_lane_ld64
XNNPACK Teamb455b122019-09-27 18:10:33 -070099 k-block: 2
Frank Barchard91317c52019-11-22 10:54:35 -0800100 arch:
101 - aarch64
102- name: xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700103 k-block: 2
Frank Barchard91317c52019-11-22 10:54:35 -0800104 arch:
105 - aarch64
106- name: xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld128
XNNPACK Teamb455b122019-09-27 18:10:33 -0700107 k-block: 4
Frank Barchard91317c52019-11-22 10:54:35 -0800108 arch:
109 - aarch64
110- name: xnn_f32_gemm_ukernel_5x8__neonfma_lane_ld64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700111 k-block: 2
Frank Barchard91317c52019-11-22 10:54:35 -0800112 arch:
113 - aarch64
114- name: xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700115 k-block: 2
Frank Barchard91317c52019-11-22 10:54:35 -0800116 arch:
117 - aarch64
Frank Barchard69172d92019-11-26 16:22:39 -0800118- name: xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld128
119 k-block: 4
120 arch:
121 - aarch64
Frank Barchard5243bb02019-11-22 16:37:50 -0800122- name: xnn_f32_gemm_ukernel_1x8__neon_dup_ld64
123 k-block: 2
124- name: xnn_f32_gemm_ukernel_4x8__neon_dup_ld64
125 k-block: 2
126- name: xnn_f32_gemm_ukernel_4x8__neon_dup_ld128
127 k-block: 4
128- name: xnn_f32_gemm_ukernel_6x8__neon_dup_ld64
129 k-block: 2
Frank Barchard69172d92019-11-26 16:22:39 -0800130- name: xnn_f32_gemm_ukernel_6x8__neon_dup_ld128
131 k-block: 4
Frank Barchard5243bb02019-11-22 16:37:50 -0800132- name: xnn_f32_gemm_ukernel_1x8__neonfma_dup_ld64
133 k-block: 2
134- name: xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld64
135 k-block: 2
136- name: xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld128
137 k-block: 4
138- name: xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld64
139 k-block: 2
Frank Barchard69172d92019-11-26 16:22:39 -0800140- name: xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld128
141 k-block: 4
Frank Barchard5243bb02019-11-22 16:37:50 -0800142- name: xnn_f32_gemm_ukernel_1x8s4__neon
143 k-block: 4
144- name: xnn_f32_gemm_ukernel_4x8s4__neon
145 k-block: 4
146- name: xnn_f32_gemm_ukernel_6x8s4__neon
147 k-block: 4
148- name: xnn_f32_gemm_ukernel_8x8s4__neon
149 k-block: 4
Frank Barcharddf06d802019-11-20 15:53:46 -0800150- name: xnn_f32_gemm_ukernel_1x8s4__neonfma
151 k-block: 4
152- name: xnn_f32_gemm_ukernel_4x8s4__neonfma
153 k-block: 4
154- name: xnn_f32_gemm_ukernel_6x8s4__neonfma
155 k-block: 4
156- name: xnn_f32_gemm_ukernel_8x8s4__neonfma
157 k-block: 4
XNNPACK Teamb455b122019-09-27 18:10:33 -0700158- name: xnn_f32_gemm_ukernel_1x8__sse_load1
159 k-block: 1
160- name: xnn_f32_gemm_ukernel_4x8__sse_load1
161 k-block: 1
162- name: xnn_f32_gemm_ukernel_1x8__sse_dup
163 k-block: 4
164- name: xnn_f32_gemm_ukernel_4x8__sse_dup
165 k-block: 4
166- name: xnn_f32_gemm_ukernel_1x8s4__sse
167 k-block: 4
168- name: xnn_f32_gemm_ukernel_4x8s4__sse
169 k-block: 4
Marat Dukhanfda12b82019-11-21 12:27:59 -0800170- name: xnn_f32_gemm_ukernel_1x8__avx_broadcast
171 k-block: 1
172- name: xnn_f32_gemm_ukernel_4x8__avx_broadcast
173 k-block: 1
174- name: xnn_f32_gemm_ukernel_5x8__avx_broadcast
175 k-block: 1
176- name: xnn_f32_gemm_ukernel_6x8__avx_broadcast
177 k-block: 1
178- name: xnn_f32_gemm_ukernel_7x8__avx_broadcast
179 k-block: 1
Marat Dukhaneccfd712019-12-08 16:49:27 -0800180- name: xnn_f32_gemm_ukernel_1x16__avx_broadcast
181 k-block: 1
182- name: xnn_f32_gemm_ukernel_3x16__avx_broadcast
183 k-block: 1
184- name: xnn_f32_gemm_ukernel_4x16__avx_broadcast
185 k-block: 1
186- name: xnn_f32_gemm_ukernel_5x16__avx_broadcast
187 k-block: 1
Marat Dukhanfda12b82019-11-21 12:27:59 -0800188- name: xnn_f32_gemm_ukernel_1x8__fma3_broadcast
189 k-block: 1
190- name: xnn_f32_gemm_ukernel_4x8__fma3_broadcast
191 k-block: 1
192- name: xnn_f32_gemm_ukernel_5x8__fma3_broadcast
193 k-block: 1
194- name: xnn_f32_gemm_ukernel_6x8__fma3_broadcast
195 k-block: 1
196- name: xnn_f32_gemm_ukernel_7x8__fma3_broadcast
197 k-block: 1
198- name: xnn_f32_gemm_ukernel_8x8__fma3_broadcast
199 k-block: 1
Marat Dukhaneccfd712019-12-08 16:49:27 -0800200- name: xnn_f32_gemm_ukernel_1x16__fma3_broadcast
201 k-block: 1
202- name: xnn_f32_gemm_ukernel_3x16__fma3_broadcast
203 k-block: 1
204- name: xnn_f32_gemm_ukernel_4x16__fma3_broadcast
205 k-block: 1
206- name: xnn_f32_gemm_ukernel_5x16__fma3_broadcast
207 k-block: 1
Marat Dukhan27121322019-12-09 14:57:40 -0800208- name: xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast
209 k-block: 4
210- name: xnn_f32_gemm_ukernel_3x16s4__fma3_broadcast
211 k-block: 4
212- name: xnn_f32_gemm_ukernel_4x16s4__fma3_broadcast
213 k-block: 4
214- name: xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast
215 k-block: 4
Marat Dukhan0f349c42019-11-27 11:58:54 -0800216- name: xnn_f32_gemm_ukernel_1x16__avx512f_broadcast
217 k-block: 1
218- name: xnn_f32_gemm_ukernel_4x16__avx512f_broadcast
219 k-block: 1
220- name: xnn_f32_gemm_ukernel_5x16__avx512f_broadcast
221 k-block: 1
222- name: xnn_f32_gemm_ukernel_6x16__avx512f_broadcast
223 k-block: 1
224- name: xnn_f32_gemm_ukernel_7x16__avx512f_broadcast
225 k-block: 1
226- name: xnn_f32_gemm_ukernel_8x16__avx512f_broadcast
227 k-block: 1
XNNPACK Teamb455b122019-09-27 18:10:33 -0700228- name: xnn_f32_gemm_ukernel_1x8__psimd_loadsplat
229 k-block: 1
230- name: xnn_f32_gemm_ukernel_4x8__psimd_loadsplat
231 k-block: 1
232- name: xnn_f32_gemm_ukernel_6x8__psimd_loadsplat
233 k-block: 1
234- name: xnn_f32_gemm_ukernel_1x8__psimd_splat
235 k-block: 4
236- name: xnn_f32_gemm_ukernel_4x8__psimd_splat
237 k-block: 4
238- name: xnn_f32_gemm_ukernel_6x8__psimd_splat
239 k-block: 4
240- name: xnn_f32_gemm_ukernel_1x8s4__psimd
241 k-block: 4
242- name: xnn_f32_gemm_ukernel_4x8s4__psimd
243 k-block: 4
244- name: xnn_f32_gemm_ukernel_6x8s4__psimd
245 k-block: 4
Marat Dukhan436ebe62019-12-04 15:10:12 -0800246- name: xnn_f32_gemm_ukernel_1x4__wasm
247 k-block: 1
248- name: xnn_f32_gemm_ukernel_2x4__wasm
249 k-block: 1
250- name: xnn_f32_gemm_ukernel_4x4__wasm
251 k-block: 1
252- name: xnn_f32_gemm_ukernel_4x2__wasm
253 k-block: 1
XNNPACK Teamb455b122019-09-27 18:10:33 -0700254- name: xnn_f32_gemm_ukernel_1x4__scalar
255 k-block: 1
256- name: xnn_f32_gemm_ukernel_2x4__scalar
257 k-block: 1
258- name: xnn_f32_gemm_ukernel_4x4__scalar
259 k-block: 1
260- name: xnn_f32_gemm_ukernel_4x2__scalar
261 k-block: 1