blob: 074c54501dc96cc22415d9226203b4f210c60a06 [file] [log] [blame]
Marat Dukhan5f18d262019-10-31 10:24:14 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <functional>
9#include <random>
10#include <vector>
11
12#include <xnnpack.h>
13
14#include <benchmark/benchmark.h>
15
Marat Dukhanc08cdf52019-12-09 09:17:51 -080016#include "bench/end2end.h"
Frank Barcharde72e2872019-10-31 11:12:15 -070017#include "bench/utils.h"
Marat Dukhan5f18d262019-10-31 10:24:14 -070018#include "models/models.h"
19#include <xnnpack/gemm.h>
20#include <xnnpack/igemm.h>
21#include <xnnpack/params.h>
22
23
24static void GEMMEnd2EndBenchmark(
25 benchmark::State& state,
Frank Barcharde72e2872019-10-31 11:12:15 -070026 models::ExecutionPlanFactory model_factory,
Marat Dukhande06f492020-04-09 00:19:31 -070027 xnn_f32_gemm_minmax_ukernel_function gemm,
28 xnn_f32_igemm_minmax_ukernel_function igemm,
29 xnn_f32_gemm_minmax_ukernel_function gemm1,
30 xnn_f32_igemm_minmax_ukernel_function igemm1,
Marat Dukhanc8466f52019-11-25 18:01:10 -080031 uint8_t mr, uint8_t nr, uint8_t log2_kr = 0, uint8_t log2_sr = 0,
32 benchmark::utils::IsaCheckFunction isa_check = nullptr)
Marat Dukhan5f18d262019-10-31 10:24:14 -070033{
Marat Dukhanc8466f52019-11-25 18:01:10 -080034 if (isa_check && !isa_check(state)) {
35 return;
36 }
Marat Dukhan04f03be2019-11-19 12:36:47 -080037 if (xnn_initialize(nullptr /* allocator */) != xnn_status_success) {
Marat Dukhan5f18d262019-10-31 10:24:14 -070038 state.SkipWithError("failed to initialize XNNPACK");
39 return;
40 }
41
42 // Override microkernels chosen in xnn_initialize
Marat Dukhan99103dc2020-03-13 00:16:53 -070043 // Note: do not directly assign to xnn_params.f32.gemm because it breaks older gcc.
Marat Dukhanaefaef32020-04-09 07:09:34 -070044 xnn_params.f32.gemm.minmax.gemm = xnn_init_hmp_gemm_ukernel(xnn_gemm_ukernel_function(gemm));
45 xnn_params.f32.gemm.minmax.igemm = xnn_init_hmp_igemm_ukernel(xnn_igemm_ukernel_function(igemm));
46 xnn_params.f32.gemm.minmax.gemm1 = xnn_init_hmp_gemm_ukernel(xnn_gemm_ukernel_function(gemm1));
47 xnn_params.f32.gemm.minmax.igemm1 = xnn_init_hmp_igemm_ukernel(xnn_igemm_ukernel_function(igemm1));
48 xnn_params.f32.gemm.mr = mr;
49 xnn_params.f32.gemm.nr = nr;
50 xnn_params.f32.gemm.log2_kr = log2_kr;
51 xnn_params.f32.gemm.log2_sr = log2_sr;
Marat Dukhan5f18d262019-10-31 10:24:14 -070052
53 auto execution_plan = model_factory(nullptr);
54 if (execution_plan.empty()) {
55 state.SkipWithError("failed to create a model");
56 return;
57 }
58
59 for (auto _ : state) {
60 for (const std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)>& op : execution_plan) {
61 xnn_status status = xnn_run_operator(op.get(), nullptr);
62 if (status != xnn_status_success) {
63 state.SkipWithError("failed to run a model");
64 return;
65 }
66 }
67 }
Marat Dukhand713e8a2020-12-04 14:23:12 -080068
69 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
70 if (cpu_frequency != 0) {
71 state.counters["cpufreq"] = cpu_frequency;
72 }
Marat Dukhan5f18d262019-10-31 10:24:14 -070073}
74
75#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
76 static void f32_gemm_4x12__aarch64_neonfma_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
77 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -070078 xnn_f32_gemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53,
79 xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53,
80 xnn_f32_gemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53,
81 xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53,
Marat Dukhan5f18d262019-10-31 10:24:14 -070082 4 /* mr */, 12 /* nr */);
83 }
84
85 static void f32_gemm_4x8__aarch64_neonfma_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
86 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -070087 xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53,
88 xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53,
89 xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53,
90 xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53,
Marat Dukhan5f18d262019-10-31 10:24:14 -070091 4 /* mr */, 8 /* nr */);
92 }
93
Frank Barchard8fb90552020-03-16 11:36:09 -070094 static void f32_gemm_4x8__aarch64_neonfma_cortex_a55(benchmark::State& state, models::ExecutionPlanFactory model) {
95 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -070096 xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55,
97 xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55,
98 xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53,
99 xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53,
Frank Barchard8fb90552020-03-16 11:36:09 -0700100 4 /* mr */, 8 /* nr */);
101 }
102
Marat Dukhan5f18d262019-10-31 10:24:14 -0700103 static void f32_gemm_4x8__aarch64_neonfma_cortex_a57(benchmark::State& state, models::ExecutionPlanFactory model) {
104 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700105 xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a57,
106 xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a57,
107 xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a57,
108 xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a57,
Marat Dukhan5f18d262019-10-31 10:24:14 -0700109 4 /* mr */, 8 /* nr */);
110 }
111
112 static void f32_gemm_4x8__aarch64_neonfma_cortex_a75(benchmark::State& state, models::ExecutionPlanFactory model) {
113 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700114 xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75,
115 xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75,
116 xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75,
117 xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75,
Marat Dukhan5f18d262019-10-31 10:24:14 -0700118 4 /* mr */, 8 /* nr */);
119 }
120
121 static void f32_gemm_4x8__aarch64_neonfma_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
122 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700123 xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld64,
124 xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64,
Frank Barchard3cb54f92020-04-10 10:46:08 -0700125 xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64,
Marat Dukhande06f492020-04-09 00:19:31 -0700126 xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64,
Marat Dukhan5f18d262019-10-31 10:24:14 -0700127 4 /* mr */, 8 /* nr */);
128 }
129
130 static void f32_gemm_4x8__aarch64_neonfma_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
131 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700132 xnn_f32_gemm_minmax_ukernel_4x8__aarch64_neonfma_ld128,
133 xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128,
Frank Barchard3cb54f92020-04-10 10:46:08 -0700134 xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_ld64,
Marat Dukhande06f492020-04-09 00:19:31 -0700135 xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64,
Marat Dukhan5f18d262019-10-31 10:24:14 -0700136 4 /* mr */, 8 /* nr */);
137 }
138
Frank Barchard387c2d12019-12-16 19:14:07 -0800139 static void f32_gemm_5x8__aarch64_neonfma_cortex_a57(benchmark::State& state, models::ExecutionPlanFactory model) {
140 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700141 xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a57,
142 xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a57,
143 xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a57,
144 xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a57,
Frank Barchard387c2d12019-12-16 19:14:07 -0800145 5 /* mr */, 8 /* nr */);
146 }
147
Marat Dukhan5f18d262019-10-31 10:24:14 -0700148 static void f32_gemm_5x8__aarch64_neonfma_cortex_a75(benchmark::State& state, models::ExecutionPlanFactory model) {
149 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700150 xnn_f32_gemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75,
151 xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75,
152 xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75,
153 xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75,
Marat Dukhan5f18d262019-10-31 10:24:14 -0700154 5 /* mr */, 8 /* nr */);
155 }
156
157 static void f32_gemm_6x8__aarch64_neonfma_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
158 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700159 xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53,
160 xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53,
161 xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53,
162 xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53,
Marat Dukhan5f18d262019-10-31 10:24:14 -0700163 6 /* mr */, 8 /* nr */);
164 }
165
Frank Barchard91e19992020-03-09 18:46:14 -0700166 static void f32_gemm_6x8__aarch64_neonfma_cortex_a55(benchmark::State& state, models::ExecutionPlanFactory model) {
167 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700168 xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55,
169 xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55,
170 xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53,
171 xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53,
Frank Barchard91e19992020-03-09 18:46:14 -0700172 6 /* mr */, 8 /* nr */);
173 }
174
Marat Dukhan5f18d262019-10-31 10:24:14 -0700175 static void f32_gemm_6x8__aarch64_neonfma_cortex_a73(benchmark::State& state, models::ExecutionPlanFactory model) {
176 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700177 xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73,
178 xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73,
179 xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75,
180 xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75,
Marat Dukhan5f18d262019-10-31 10:24:14 -0700181 6 /* mr */, 8 /* nr */);
182 }
183
Frank Barchard387c2d12019-12-16 19:14:07 -0800184 static void f32_gemm_6x8__aarch64_neonfma_cortex_a57(benchmark::State& state, models::ExecutionPlanFactory model) {
185 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700186 xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a57,
187 xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a57,
188 xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a57,
189 xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a57,
Frank Barchard387c2d12019-12-16 19:14:07 -0800190 6 /* mr */, 8 /* nr */);
191 }
192
Marat Dukhan5f18d262019-10-31 10:24:14 -0700193 static void f32_gemm_6x8__aarch64_neonfma_cortex_a75(benchmark::State& state, models::ExecutionPlanFactory model) {
194 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700195 xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75,
196 xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75,
197 xnn_f32_gemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75,
198 xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75,
Marat Dukhan5f18d262019-10-31 10:24:14 -0700199 6 /* mr */, 8 /* nr */);
200 }
201
202 static void f32_gemm_6x8__aarch64_neonfma_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
203 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700204 xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld64,
205 xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64,
206 xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64,
207 xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64,
Marat Dukhan5f18d262019-10-31 10:24:14 -0700208 6 /* mr */, 8 /* nr */);
209 }
210
211 static void f32_gemm_6x8__aarch64_neonfma_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
212 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700213 xnn_f32_gemm_minmax_ukernel_6x8__aarch64_neonfma_ld128,
214 xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64,
215 xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64,
216 xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64,
Frank Barchard91317c52019-11-22 10:54:35 -0800217 6 /* mr */, 8 /* nr */);
218 }
219
220 static void f32_gemm_4x8__neonfma_lane_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
221 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700222 xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64,
223 xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64,
224 xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64,
225 xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64,
Frank Barchard91317c52019-11-22 10:54:35 -0800226 4 /* mr */, 8 /* nr */);
227 }
228
229 static void f32_gemm_4x8__neonfma_lane_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
230 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700231 xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128,
232 xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128,
233 xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64,
234 xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64,
Frank Barchard91317c52019-11-22 10:54:35 -0800235 4 /* mr */, 8 /* nr */);
236 }
237
238 static void f32_gemm_6x8__neonfma_lane_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
239 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700240 xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64,
241 xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64,
242 xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64,
243 xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64,
Marat Dukhan5f18d262019-10-31 10:24:14 -0700244 6 /* mr */, 8 /* nr */);
245 }
246
Frank Barchard69172d92019-11-26 16:22:39 -0800247 static void f32_gemm_6x8__neonfma_lane_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
248 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700249 xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128,
250 xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128,
251 xnn_f32_gemm_minmax_ukernel_1x8__neonfma_lane_ld64,
252 xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64,
Frank Barchard69172d92019-11-26 16:22:39 -0800253 6 /* mr */, 8 /* nr */);
254 }
255
Marat Dukhan270a2c42020-06-26 16:45:52 -0700256 BENCHMARK_FP32_END2END(f32_gemm_4x8__aarch64_neonfma_ld64)
257 BENCHMARK_FP32_END2END(f32_gemm_4x8__aarch64_neonfma_ld128);
258 BENCHMARK_FP32_END2END(f32_gemm_6x8__aarch64_neonfma_ld64);
259 BENCHMARK_FP32_END2END(f32_gemm_6x8__aarch64_neonfma_ld128);
260 BENCHMARK_FP32_END2END(f32_gemm_4x8__aarch64_neonfma_cortex_a53)
261 BENCHMARK_FP32_END2END(f32_gemm_4x8__aarch64_neonfma_cortex_a55)
262 BENCHMARK_FP32_END2END(f32_gemm_4x8__aarch64_neonfma_cortex_a57)
263 BENCHMARK_FP32_END2END(f32_gemm_4x8__aarch64_neonfma_cortex_a75)
264 BENCHMARK_FP32_END2END(f32_gemm_5x8__aarch64_neonfma_cortex_a57);
265 BENCHMARK_FP32_END2END(f32_gemm_5x8__aarch64_neonfma_cortex_a75);
266 BENCHMARK_FP32_END2END(f32_gemm_6x8__aarch64_neonfma_cortex_a53);
267 BENCHMARK_FP32_END2END(f32_gemm_6x8__aarch64_neonfma_cortex_a55);
268 BENCHMARK_FP32_END2END(f32_gemm_6x8__aarch64_neonfma_cortex_a73);
269 BENCHMARK_FP32_END2END(f32_gemm_6x8__aarch64_neonfma_cortex_a57);
270 BENCHMARK_FP32_END2END(f32_gemm_6x8__aarch64_neonfma_cortex_a75);
271 BENCHMARK_FP32_END2END(f32_gemm_4x12__aarch64_neonfma_cortex_a53)
Marat Dukhan5f18d262019-10-31 10:24:14 -0700272
Marat Dukhan270a2c42020-06-26 16:45:52 -0700273 BENCHMARK_FP32_END2END(f32_gemm_4x8__neonfma_lane_ld64);
274 BENCHMARK_FP32_END2END(f32_gemm_4x8__neonfma_lane_ld128);
Marat Dukhan5f18d262019-10-31 10:24:14 -0700275
Marat Dukhan270a2c42020-06-26 16:45:52 -0700276 BENCHMARK_FP32_END2END(f32_gemm_6x8__neonfma_lane_ld64);
277 BENCHMARK_FP32_END2END(f32_gemm_6x8__neonfma_lane_ld128);
Marat Dukhan5f18d262019-10-31 10:24:14 -0700278#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
279
Frank Barchardcab94932019-12-03 10:48:54 -0800280#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
281 static void f32_gemm_4x8__aarch32_neon_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
282 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700283 xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_ld64,
284 xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64,
285 xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64,
286 xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64,
Frank Barchardcab94932019-12-03 10:48:54 -0800287 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
288 benchmark::utils::CheckNEON);
289 }
Frank Barchard490febe2020-07-16 18:42:17 -0700290 static void f32_gemm_4x8__aarch32_neon_cortex_a7(benchmark::State& state, models::ExecutionPlanFactory model) {
Frank Barchard569561d2020-06-17 13:11:12 -0700291 GEMMEnd2EndBenchmark(state, model,
Frank Barchard490febe2020-07-16 18:42:17 -0700292 xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7,
293 xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7,
Frank Barchard569561d2020-06-17 13:11:12 -0700294 xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64,
295 xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64,
296 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
297 benchmark::utils::CheckNEON);
298 }
Frank Barchard13916042019-12-11 10:56:34 -0800299 static void f32_gemm_4x8__aarch32_neon_cortex_a53(benchmark::State& state, models::ExecutionPlanFactory model) {
300 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700301 xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53,
302 xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53,
303 xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64,
304 xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64,
Frank Barchard13916042019-12-11 10:56:34 -0800305 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
306 benchmark::utils::CheckNEON);
307 }
Frank Barchardb7dd29e2020-03-11 12:37:10 -0700308 static void f32_gemm_4x8__aarch32_neon_cortex_a55(benchmark::State& state, models::ExecutionPlanFactory model) {
309 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700310 xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55,
311 xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55,
312 xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64,
313 xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64,
Frank Barchardb7dd29e2020-03-11 12:37:10 -0700314 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
315 benchmark::utils::CheckNEON);
316 }
Frank Barchard3e237f22019-12-04 23:08:51 -0800317 static void f32_gemm_4x8__aarch32_neon_cortex_a75(benchmark::State& state, models::ExecutionPlanFactory model) {
318 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700319 xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75,
320 xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75,
321 xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64,
322 xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64,
Frank Barchard3e237f22019-12-04 23:08:51 -0800323 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
324 benchmark::utils::CheckNEON);
325 }
Frank Barchard9f7d5552019-12-12 10:58:10 -0800326 static void f32_gemm_4x8__aarch32_neon_pld_cortex_a75(benchmark::State& state, models::ExecutionPlanFactory model) {
327 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700328 xnn_f32_gemm_minmax_ukernel_4x8__aarch32_neon_pld_cortex_a75,
329 xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_pld_cortex_a75,
330 xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64,
331 xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64,
Frank Barchard9f7d5552019-12-12 10:58:10 -0800332 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
333 benchmark::utils::CheckNEON);
334 }
Frank Barchardcab94932019-12-03 10:48:54 -0800335
Marat Dukhan270a2c42020-06-26 16:45:52 -0700336 BENCHMARK_FP32_END2END(f32_gemm_4x8__aarch32_neon_ld64);
Frank Barchard490febe2020-07-16 18:42:17 -0700337 BENCHMARK_FP32_END2END(f32_gemm_4x8__aarch32_neon_cortex_a7);
Marat Dukhan270a2c42020-06-26 16:45:52 -0700338 BENCHMARK_FP32_END2END(f32_gemm_4x8__aarch32_neon_cortex_a53);
339 BENCHMARK_FP32_END2END(f32_gemm_4x8__aarch32_neon_cortex_a55);
340 BENCHMARK_FP32_END2END(f32_gemm_4x8__aarch32_neon_cortex_a75);
341 BENCHMARK_FP32_END2END(f32_gemm_4x8__aarch32_neon_pld_cortex_a75);
Frank Barchardcab94932019-12-03 10:48:54 -0800342#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
343
344
Marat Dukhan5f18d262019-10-31 10:24:14 -0700345#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard91317c52019-11-22 10:54:35 -0800346 static void f32_gemm_4x8__neon_lane_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan5f18d262019-10-31 10:24:14 -0700347 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700348 xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64,
349 xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64,
350 xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64,
351 xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800352 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
353 benchmark::utils::CheckNEON);
Marat Dukhan5f18d262019-10-31 10:24:14 -0700354 }
355
Frank Barchard91317c52019-11-22 10:54:35 -0800356 static void f32_gemm_4x8__neon_lane_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan5f18d262019-10-31 10:24:14 -0700357 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700358 xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128,
359 xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128,
360 xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64,
361 xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800362 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
363 benchmark::utils::CheckNEON);
Marat Dukhan5f18d262019-10-31 10:24:14 -0700364 }
365
Frank Barchard91317c52019-11-22 10:54:35 -0800366 static void f32_gemm_6x8__neon_lane_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan5f18d262019-10-31 10:24:14 -0700367 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700368 xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64,
369 xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64,
370 xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64,
371 xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800372 6 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
373 benchmark::utils::CheckNEON);
Marat Dukhan5f18d262019-10-31 10:24:14 -0700374 }
Frank Barchard69172d92019-11-26 16:22:39 -0800375
376 static void f32_gemm_6x8__neon_lane_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
377 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700378 xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128,
379 xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128,
380 xnn_f32_gemm_minmax_ukernel_1x8__neon_lane_ld64,
381 xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64,
Frank Barchard69172d92019-11-26 16:22:39 -0800382 6 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
383 benchmark::utils::CheckNEON);
384 }
385
Frank Barchard5243bb02019-11-22 16:37:50 -0800386 static void f32_gemm_4x8__neon_dup_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
387 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700388 xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld64,
389 xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64,
390 xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64,
391 xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800392 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
393 benchmark::utils::CheckNEON);
Frank Barchard5243bb02019-11-22 16:37:50 -0800394 }
395
396 static void f32_gemm_4x8__neon_dup_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
397 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700398 xnn_f32_gemm_minmax_ukernel_4x8__neon_dup_ld128,
399 xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128,
400 xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64,
401 xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800402 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
403 benchmark::utils::CheckNEON);
Frank Barchard5243bb02019-11-22 16:37:50 -0800404 }
405
406 static void f32_gemm_6x8__neon_dup_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
407 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700408 xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64,
409 xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64,
410 xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64,
411 xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800412 6 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
413 benchmark::utils::CheckNEON);
Frank Barchard5243bb02019-11-22 16:37:50 -0800414 }
415
Frank Barchard69172d92019-11-26 16:22:39 -0800416 static void f32_gemm_6x8__neon_dup_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
417 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700418 xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld128,
419 xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128,
420 xnn_f32_gemm_minmax_ukernel_1x8__neon_dup_ld64,
421 xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64,
Frank Barchard69172d92019-11-26 16:22:39 -0800422 6 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
423 benchmark::utils::CheckNEON);
424 }
425
Frank Barchard5243bb02019-11-22 16:37:50 -0800426 static void f32_gemm_4x8__neonfma_dup_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
427 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700428 xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64,
429 xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64,
430 xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64,
431 xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800432 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
433 benchmark::utils::CheckNEONFMA);
Frank Barchard5243bb02019-11-22 16:37:50 -0800434 }
435
436 static void f32_gemm_4x8__neonfma_dup_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
437 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700438 xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld128,
439 xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128,
440 xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64,
441 xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800442 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
443 benchmark::utils::CheckNEONFMA);
Frank Barchard5243bb02019-11-22 16:37:50 -0800444 }
445
446 static void f32_gemm_6x8__neonfma_dup_ld64(benchmark::State& state, models::ExecutionPlanFactory model) {
447 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700448 xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64,
449 xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64,
450 xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64,
451 xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800452 6 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
453 benchmark::utils::CheckNEONFMA);
Frank Barchard5243bb02019-11-22 16:37:50 -0800454 }
Marat Dukhan5f18d262019-10-31 10:24:14 -0700455
Frank Barchard69172d92019-11-26 16:22:39 -0800456 static void f32_gemm_6x8__neonfma_dup_ld128(benchmark::State& state, models::ExecutionPlanFactory model) {
457 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700458 xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld128,
459 xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128,
460 xnn_f32_gemm_minmax_ukernel_1x8__neonfma_dup_ld64,
461 xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64,
Frank Barchard69172d92019-11-26 16:22:39 -0800462 6 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
463 benchmark::utils::CheckNEONFMA);
464 }
465
Frank Barcharddf06d802019-11-20 15:53:46 -0800466 static void f32_gemm_4x8s4__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
467 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700468 xnn_f32_gemm_minmax_ukernel_4x8s4__neon,
469 xnn_f32_igemm_minmax_ukernel_4x8s4__neon,
470 xnn_f32_gemm_minmax_ukernel_1x8s4__neon,
471 xnn_f32_igemm_minmax_ukernel_1x8s4__neon,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800472 4 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */,
473 benchmark::utils::CheckNEON);
Frank Barcharddf06d802019-11-20 15:53:46 -0800474 }
475
476 static void f32_gemm_4x8s4__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
477 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700478 xnn_f32_gemm_minmax_ukernel_4x8s4__neonfma,
479 xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma,
480 xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma,
481 xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800482 4 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */,
483 benchmark::utils::CheckNEONFMA);
Frank Barcharddf06d802019-11-20 15:53:46 -0800484 }
485
486 static void f32_gemm_6x8s4__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
487 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700488 xnn_f32_gemm_minmax_ukernel_6x8s4__neon,
489 xnn_f32_igemm_minmax_ukernel_6x8s4__neon,
490 xnn_f32_gemm_minmax_ukernel_1x8s4__neon,
491 xnn_f32_igemm_minmax_ukernel_1x8s4__neon,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800492 6 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */,
493 benchmark::utils::CheckNEON);
Frank Barcharddf06d802019-11-20 15:53:46 -0800494 }
495
496 static void f32_gemm_6x8s4__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
497 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700498 xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma,
499 xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma,
500 xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma,
501 xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800502 6 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */,
503 benchmark::utils::CheckNEONFMA);
Frank Barcharddf06d802019-11-20 15:53:46 -0800504 }
505
506 static void f32_gemm_8x8s4__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
507 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700508 xnn_f32_gemm_minmax_ukernel_8x8s4__neon,
509 xnn_f32_igemm_minmax_ukernel_8x8s4__neon,
510 xnn_f32_gemm_minmax_ukernel_1x8s4__neon,
511 xnn_f32_igemm_minmax_ukernel_1x8s4__neon,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800512 8 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */,
513 benchmark::utils::CheckNEON);
Frank Barcharddf06d802019-11-20 15:53:46 -0800514 }
515
516 static void f32_gemm_8x8s4__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
517 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700518 xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma,
519 xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma,
520 xnn_f32_gemm_minmax_ukernel_1x8s4__neonfma,
521 xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800522 8 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */,
523 benchmark::utils::CheckNEONFMA);
Frank Barcharddf06d802019-11-20 15:53:46 -0800524 }
525
Marat Dukhan270a2c42020-06-26 16:45:52 -0700526 BENCHMARK_FP32_END2END(f32_gemm_4x8__neon_lane_ld64);
527 BENCHMARK_FP32_END2END(f32_gemm_4x8__neon_lane_ld128);
528 BENCHMARK_FP32_END2END(f32_gemm_6x8__neon_lane_ld64);
529 BENCHMARK_FP32_END2END(f32_gemm_6x8__neon_lane_ld128);
Marat Dukhan5f18d262019-10-31 10:24:14 -0700530
Marat Dukhan270a2c42020-06-26 16:45:52 -0700531 BENCHMARK_FP32_END2END(f32_gemm_4x8__neon_dup_ld64);
532 BENCHMARK_FP32_END2END(f32_gemm_4x8__neon_dup_ld128);
533 BENCHMARK_FP32_END2END(f32_gemm_6x8__neon_dup_ld64);
534 BENCHMARK_FP32_END2END(f32_gemm_6x8__neon_dup_ld128);
Marat Dukhan5f18d262019-10-31 10:24:14 -0700535
Marat Dukhan270a2c42020-06-26 16:45:52 -0700536 BENCHMARK_FP32_END2END(f32_gemm_4x8__neonfma_dup_ld64);
537 BENCHMARK_FP32_END2END(f32_gemm_4x8__neonfma_dup_ld128);
538 BENCHMARK_FP32_END2END(f32_gemm_6x8__neonfma_dup_ld64);
539 BENCHMARK_FP32_END2END(f32_gemm_6x8__neonfma_dup_ld128);
Frank Barcharddf06d802019-11-20 15:53:46 -0800540
Marat Dukhan270a2c42020-06-26 16:45:52 -0700541 BENCHMARK_FP32_END2END(f32_gemm_4x8s4__neon);
542 BENCHMARK_FP32_END2END(f32_gemm_6x8s4__neon);
543 BENCHMARK_FP32_END2END(f32_gemm_8x8s4__neon);
Frank Barchard69172d92019-11-26 16:22:39 -0800544
Marat Dukhan270a2c42020-06-26 16:45:52 -0700545 BENCHMARK_FP32_END2END(f32_gemm_4x8s4__neonfma);
546 BENCHMARK_FP32_END2END(f32_gemm_6x8s4__neonfma);
547 BENCHMARK_FP32_END2END(f32_gemm_8x8s4__neonfma);
Marat Dukhan5f18d262019-10-31 10:24:14 -0700548#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
549
Marat Dukhan5f18d262019-10-31 10:24:14 -0700550#if XNN_ARCH_X86 || XNN_ARCH_X86_64
Marat Dukhan802fcae2020-12-11 14:37:25 -0800551 static void f32_gemm_3x8__sse_load1(benchmark::State& state, models::ExecutionPlanFactory model) {
552 GEMMEnd2EndBenchmark(state, model,
553 xnn_f32_gemm_minmax_ukernel_3x8__sse_load1,
554 xnn_f32_igemm_minmax_ukernel_3x8__sse_load1,
555 xnn_f32_gemm_minmax_ukernel_1x8__sse_load1,
556 xnn_f32_igemm_minmax_ukernel_1x8__sse_load1,
557 3 /* mr */, 8 /* nr */);
558 }
559
Marat Dukhan5f18d262019-10-31 10:24:14 -0700560 static void f32_gemm_4x8__sse_load1(benchmark::State& state, models::ExecutionPlanFactory model) {
561 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700562 xnn_f32_gemm_minmax_ukernel_4x8__sse_load1,
563 xnn_f32_igemm_minmax_ukernel_4x8__sse_load1,
564 xnn_f32_gemm_minmax_ukernel_1x8__sse_load1,
565 xnn_f32_igemm_minmax_ukernel_1x8__sse_load1,
Marat Dukhan5f18d262019-10-31 10:24:14 -0700566 4 /* mr */, 8 /* nr */);
567 }
568
Marat Dukhan802fcae2020-12-11 14:37:25 -0800569 static void f32_gemm_5x8__sse_load1(benchmark::State& state, models::ExecutionPlanFactory model) {
570 GEMMEnd2EndBenchmark(state, model,
571 xnn_f32_gemm_minmax_ukernel_5x8__sse_load1,
572 xnn_f32_igemm_minmax_ukernel_5x8__sse_load1,
573 xnn_f32_gemm_minmax_ukernel_1x8__sse_load1,
574 xnn_f32_igemm_minmax_ukernel_1x8__sse_load1,
575 5 /* mr */, 8 /* nr */);
576 }
577
578 static void f32_gemm_3x8__sse_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
579 GEMMEnd2EndBenchmark(state, model,
580 xnn_f32_gemm_minmax_ukernel_3x8__sse_dup,
581 xnn_f32_igemm_minmax_ukernel_3x8__sse_dup,
582 xnn_f32_gemm_minmax_ukernel_1x8__sse_dup,
583 xnn_f32_igemm_minmax_ukernel_1x8__sse_dup,
584 3 /* mr */, 8 /* nr */);
585 }
586
Marat Dukhan5f18d262019-10-31 10:24:14 -0700587 static void f32_gemm_4x8__sse_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
588 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700589 xnn_f32_gemm_minmax_ukernel_4x8__sse_dup,
590 xnn_f32_igemm_minmax_ukernel_4x8__sse_dup,
591 xnn_f32_gemm_minmax_ukernel_1x8__sse_dup,
592 xnn_f32_igemm_minmax_ukernel_1x8__sse_dup,
Marat Dukhan5f18d262019-10-31 10:24:14 -0700593 4 /* mr */, 8 /* nr */);
594 }
595
Marat Dukhan802fcae2020-12-11 14:37:25 -0800596 static void f32_gemm_5x8__sse_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
597 GEMMEnd2EndBenchmark(state, model,
598 xnn_f32_gemm_minmax_ukernel_5x8__sse_dup,
599 xnn_f32_igemm_minmax_ukernel_5x8__sse_dup,
600 xnn_f32_gemm_minmax_ukernel_1x8__sse_dup,
601 xnn_f32_igemm_minmax_ukernel_1x8__sse_dup,
602 5 /* mr */, 8 /* nr */);
603 }
604
605 static void f32_gemm_3x8s4__sse(benchmark::State& state, models::ExecutionPlanFactory model) {
606 GEMMEnd2EndBenchmark(state, model,
607 xnn_f32_gemm_minmax_ukernel_3x8s4__sse,
608 xnn_f32_igemm_minmax_ukernel_3x8s4__sse,
609 xnn_f32_gemm_minmax_ukernel_1x8s4__sse,
610 xnn_f32_igemm_minmax_ukernel_1x8s4__sse,
611 3 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
612 }
613
Marat Dukhan5f18d262019-10-31 10:24:14 -0700614 static void f32_gemm_4x8s4__sse(benchmark::State& state, models::ExecutionPlanFactory model) {
615 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700616 xnn_f32_gemm_minmax_ukernel_4x8s4__sse,
617 xnn_f32_igemm_minmax_ukernel_4x8s4__sse,
618 xnn_f32_gemm_minmax_ukernel_1x8s4__sse,
619 xnn_f32_igemm_minmax_ukernel_1x8s4__sse,
Marat Dukhan5f18d262019-10-31 10:24:14 -0700620 4 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
621 }
622
Marat Dukhan802fcae2020-12-11 14:37:25 -0800623 static void f32_gemm_5x8s4__sse(benchmark::State& state, models::ExecutionPlanFactory model) {
624 GEMMEnd2EndBenchmark(state, model,
625 xnn_f32_gemm_minmax_ukernel_5x8s4__sse,
626 xnn_f32_igemm_minmax_ukernel_5x8s4__sse,
627 xnn_f32_gemm_minmax_ukernel_1x8s4__sse,
628 xnn_f32_igemm_minmax_ukernel_1x8s4__sse,
629 5 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
630 }
631
632 static void f32_gemm_3x8__sse2_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
633 GEMMEnd2EndBenchmark(state, model,
634 xnn_f32_gemm_minmax_ukernel_3x8__sse2_dup,
635 xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup,
636 xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup,
637 xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup,
638 3 /* mr */, 8 /* nr */);
639 }
640
641 static void f32_gemm_4x8__sse2_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
642 GEMMEnd2EndBenchmark(state, model,
643 xnn_f32_gemm_minmax_ukernel_4x8__sse2_dup,
644 xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup,
645 xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup,
646 xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup,
647 4 /* mr */, 8 /* nr */);
648 }
649
650 static void f32_gemm_5x8__sse2_dup(benchmark::State& state, models::ExecutionPlanFactory model) {
651 GEMMEnd2EndBenchmark(state, model,
652 xnn_f32_gemm_minmax_ukernel_5x8__sse2_dup,
653 xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup,
654 xnn_f32_gemm_minmax_ukernel_1x8__sse2_dup,
655 xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup,
656 5 /* mr */, 8 /* nr */);
657 }
658
Marat Dukhanfda12b82019-11-21 12:27:59 -0800659 static void f32_gemm_4x8__avx_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
660 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700661 xnn_f32_gemm_minmax_ukernel_4x8__avx_broadcast,
662 xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast,
663 xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast,
664 xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800665 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
666 benchmark::utils::CheckAVX);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800667 }
668
669 static void f32_gemm_5x8__avx_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
670 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700671 xnn_f32_gemm_minmax_ukernel_5x8__avx_broadcast,
672 xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast,
673 xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast,
674 xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800675 5 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
676 benchmark::utils::CheckAVX);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800677 }
678
679 static void f32_gemm_6x8__avx_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
680 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700681 xnn_f32_gemm_minmax_ukernel_6x8__avx_broadcast,
682 xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast,
683 xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast,
684 xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800685 6 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
686 benchmark::utils::CheckAVX);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800687 }
688
689 static void f32_gemm_7x8__avx_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
690 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700691 xnn_f32_gemm_minmax_ukernel_7x8__avx_broadcast,
692 xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast,
693 xnn_f32_gemm_minmax_ukernel_1x8__avx_broadcast,
694 xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800695 7 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
696 benchmark::utils::CheckAVX);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800697 }
698
Marat Dukhaneccfd712019-12-08 16:49:27 -0800699 static void f32_gemm_3x16__avx_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
700 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700701 xnn_f32_gemm_minmax_ukernel_3x16__avx_broadcast,
702 xnn_f32_igemm_minmax_ukernel_3x16__avx_broadcast,
703 xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast,
704 xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast,
Marat Dukhaneccfd712019-12-08 16:49:27 -0800705 3 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
706 benchmark::utils::CheckAVX);
707 }
708
709 static void f32_gemm_4x16__avx_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
710 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700711 xnn_f32_gemm_minmax_ukernel_4x16__avx_broadcast,
712 xnn_f32_igemm_minmax_ukernel_4x16__avx_broadcast,
713 xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast,
714 xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast,
Marat Dukhaneccfd712019-12-08 16:49:27 -0800715 4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
716 benchmark::utils::CheckAVX);
717 }
718
719 static void f32_gemm_5x16__avx_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
720 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700721 xnn_f32_gemm_minmax_ukernel_5x16__avx_broadcast,
722 xnn_f32_igemm_minmax_ukernel_5x16__avx_broadcast,
723 xnn_f32_gemm_minmax_ukernel_1x16__avx_broadcast,
724 xnn_f32_igemm_minmax_ukernel_1x16__avx_broadcast,
Marat Dukhaneccfd712019-12-08 16:49:27 -0800725 5 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
726 benchmark::utils::CheckAVX);
727 }
728
Marat Dukhanfda12b82019-11-21 12:27:59 -0800729 static void f32_gemm_4x8__fma3_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
730 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700731 xnn_f32_gemm_minmax_ukernel_4x8__fma3_broadcast,
732 xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast,
733 xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast,
734 xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800735 4 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
736 benchmark::utils::CheckFMA3);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800737 }
738
739 static void f32_gemm_5x8__fma3_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
740 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700741 xnn_f32_gemm_minmax_ukernel_5x8__fma3_broadcast,
742 xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast,
743 xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast,
744 xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800745 5 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
746 benchmark::utils::CheckFMA3);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800747 }
748
749 static void f32_gemm_6x8__fma3_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
750 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700751 xnn_f32_gemm_minmax_ukernel_6x8__fma3_broadcast,
752 xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast,
753 xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast,
754 xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800755 6 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
756 benchmark::utils::CheckFMA3);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800757 }
758
759 static void f32_gemm_7x8__fma3_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
760 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700761 xnn_f32_gemm_minmax_ukernel_7x8__fma3_broadcast,
762 xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast,
763 xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast,
764 xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800765 7 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
766 benchmark::utils::CheckFMA3);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800767 }
768
769 static void f32_gemm_8x8__fma3_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
770 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700771 xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast,
772 xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast,
773 xnn_f32_gemm_minmax_ukernel_1x8__fma3_broadcast,
774 xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800775 8 /* mr */, 8 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
776 benchmark::utils::CheckFMA3);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800777 }
778
Marat Dukhaneccfd712019-12-08 16:49:27 -0800779 static void f32_gemm_3x16__fma3_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
780 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700781 xnn_f32_gemm_minmax_ukernel_3x16__fma3_broadcast,
782 xnn_f32_igemm_minmax_ukernel_3x16__fma3_broadcast,
783 xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast,
784 xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast,
Marat Dukhaneccfd712019-12-08 16:49:27 -0800785 3 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
786 benchmark::utils::CheckFMA3);
787 }
788
789 static void f32_gemm_4x16__fma3_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
790 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700791 xnn_f32_gemm_minmax_ukernel_4x16__fma3_broadcast,
792 xnn_f32_igemm_minmax_ukernel_4x16__fma3_broadcast,
793 xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast,
794 xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast,
Marat Dukhaneccfd712019-12-08 16:49:27 -0800795 4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
796 benchmark::utils::CheckFMA3);
797 }
798
799 static void f32_gemm_5x16__fma3_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
800 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700801 xnn_f32_gemm_minmax_ukernel_5x16__fma3_broadcast,
802 xnn_f32_igemm_minmax_ukernel_5x16__fma3_broadcast,
803 xnn_f32_gemm_minmax_ukernel_1x16__fma3_broadcast,
804 xnn_f32_igemm_minmax_ukernel_1x16__fma3_broadcast,
Marat Dukhaneccfd712019-12-08 16:49:27 -0800805 5 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
806 benchmark::utils::CheckFMA3);
807 }
808
Marat Dukhan27121322019-12-09 14:57:40 -0800809 static void f32_gemm_3x16s4__fma3_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
810 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700811 xnn_f32_gemm_minmax_ukernel_3x16s4__fma3_broadcast,
812 xnn_f32_igemm_minmax_ukernel_3x16s4__fma3_broadcast,
813 xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast,
814 xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast,
Marat Dukhan27121322019-12-09 14:57:40 -0800815 3 /* mr */, 16 /* nr */, 0 /* log2_kr */, 2 /* log2_sr */,
816 benchmark::utils::CheckFMA3);
817 }
818
819 static void f32_gemm_4x16s4__fma3_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
820 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700821 xnn_f32_gemm_minmax_ukernel_4x16s4__fma3_broadcast,
822 xnn_f32_igemm_minmax_ukernel_4x16s4__fma3_broadcast,
823 xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast,
824 xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast,
Marat Dukhan27121322019-12-09 14:57:40 -0800825 4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 2 /* log2_sr */,
826 benchmark::utils::CheckFMA3);
827 }
828
829 static void f32_gemm_5x16s4__fma3_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
830 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700831 xnn_f32_gemm_minmax_ukernel_5x16s4__fma3_broadcast,
832 xnn_f32_igemm_minmax_ukernel_5x16s4__fma3_broadcast,
833 xnn_f32_gemm_minmax_ukernel_1x16s4__fma3_broadcast,
834 xnn_f32_igemm_minmax_ukernel_1x16s4__fma3_broadcast,
Marat Dukhan27121322019-12-09 14:57:40 -0800835 5 /* mr */, 16 /* nr */, 0 /* log2_kr */, 2 /* log2_sr */,
836 benchmark::utils::CheckFMA3);
837 }
838
Marat Dukhan0f349c42019-11-27 11:58:54 -0800839 static void f32_gemm_4x16__avx512f_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
840 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700841 xnn_f32_gemm_minmax_ukernel_4x16__avx512f_broadcast,
842 xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast,
843 xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast,
844 xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast,
Marat Dukhan0f349c42019-11-27 11:58:54 -0800845 4 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
846 benchmark::utils::CheckAVX512F);
847 }
848
849 static void f32_gemm_5x16__avx512f_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
850 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700851 xnn_f32_gemm_minmax_ukernel_5x16__avx512f_broadcast,
852 xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast,
853 xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast,
854 xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast,
Marat Dukhan0f349c42019-11-27 11:58:54 -0800855 5 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
856 benchmark::utils::CheckAVX512F);
857 }
858
859 static void f32_gemm_6x16__avx512f_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
860 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700861 xnn_f32_gemm_minmax_ukernel_6x16__avx512f_broadcast,
862 xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast,
863 xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast,
864 xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast,
Marat Dukhan0f349c42019-11-27 11:58:54 -0800865 6 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
866 benchmark::utils::CheckAVX512F);
867 }
868
869 static void f32_gemm_7x16__avx512f_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
870 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700871 xnn_f32_gemm_minmax_ukernel_7x16__avx512f_broadcast,
872 xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast,
873 xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast,
874 xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast,
Marat Dukhan0f349c42019-11-27 11:58:54 -0800875 7 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
876 benchmark::utils::CheckAVX512F);
877 }
878
879 static void f32_gemm_8x16__avx512f_broadcast(benchmark::State& state, models::ExecutionPlanFactory model) {
880 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -0700881 xnn_f32_gemm_minmax_ukernel_8x16__avx512f_broadcast,
882 xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast,
883 xnn_f32_gemm_minmax_ukernel_1x16__avx512f_broadcast,
884 xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast,
Marat Dukhan0f349c42019-11-27 11:58:54 -0800885 8 /* mr */, 16 /* nr */, 0 /* log2_kr */, 0 /* log2_sr */,
886 benchmark::utils::CheckAVX512F);
887 }
888
Frank Barchard4c032f32021-02-10 15:18:49 -0800889 BENCHMARK_FP32_END2END(f32_gemm_4x16__avx512f_broadcast);
890 BENCHMARK_FP32_END2END(f32_gemm_5x16__avx512f_broadcast);
891 BENCHMARK_FP32_END2END(f32_gemm_6x16__avx512f_broadcast);
892 BENCHMARK_FP32_END2END(f32_gemm_7x16__avx512f_broadcast);
893 BENCHMARK_FP32_END2END(f32_gemm_8x16__avx512f_broadcast);
894
895 BENCHMARK_FP32_END2END(f32_gemm_3x16s4__fma3_broadcast);
896 BENCHMARK_FP32_END2END(f32_gemm_4x16s4__fma3_broadcast);
897 BENCHMARK_FP32_END2END(f32_gemm_5x16s4__fma3_broadcast);
898
899 BENCHMARK_FP32_END2END(f32_gemm_4x8__fma3_broadcast);
900 BENCHMARK_FP32_END2END(f32_gemm_5x8__fma3_broadcast);
901 BENCHMARK_FP32_END2END(f32_gemm_6x8__fma3_broadcast);
902 BENCHMARK_FP32_END2END(f32_gemm_7x8__fma3_broadcast);
903 BENCHMARK_FP32_END2END(f32_gemm_8x8__fma3_broadcast);
904 BENCHMARK_FP32_END2END(f32_gemm_3x16__fma3_broadcast);
905 BENCHMARK_FP32_END2END(f32_gemm_4x16__fma3_broadcast);
906 BENCHMARK_FP32_END2END(f32_gemm_5x16__fma3_broadcast);
907
908 BENCHMARK_FP32_END2END(f32_gemm_4x8__avx_broadcast);
909 BENCHMARK_FP32_END2END(f32_gemm_5x8__avx_broadcast);
910 BENCHMARK_FP32_END2END(f32_gemm_6x8__avx_broadcast);
911 BENCHMARK_FP32_END2END(f32_gemm_7x8__avx_broadcast);
912 BENCHMARK_FP32_END2END(f32_gemm_3x16__avx_broadcast);
913 BENCHMARK_FP32_END2END(f32_gemm_4x16__avx_broadcast);
914 BENCHMARK_FP32_END2END(f32_gemm_5x16__avx_broadcast);
915
916 BENCHMARK_FP32_END2END(f32_gemm_3x8__sse2_dup);
917 BENCHMARK_FP32_END2END(f32_gemm_4x8__sse2_dup);
918 BENCHMARK_FP32_END2END(f32_gemm_5x8__sse2_dup);
919
Marat Dukhan802fcae2020-12-11 14:37:25 -0800920 BENCHMARK_FP32_END2END(f32_gemm_3x8__sse_load1);
Marat Dukhan270a2c42020-06-26 16:45:52 -0700921 BENCHMARK_FP32_END2END(f32_gemm_4x8__sse_load1);
Marat Dukhan802fcae2020-12-11 14:37:25 -0800922 BENCHMARK_FP32_END2END(f32_gemm_5x8__sse_load1);
Marat Dukhan5f18d262019-10-31 10:24:14 -0700923
Marat Dukhan802fcae2020-12-11 14:37:25 -0800924 BENCHMARK_FP32_END2END(f32_gemm_3x8__sse_dup);
Marat Dukhan270a2c42020-06-26 16:45:52 -0700925 BENCHMARK_FP32_END2END(f32_gemm_4x8__sse_dup);
Marat Dukhan802fcae2020-12-11 14:37:25 -0800926 BENCHMARK_FP32_END2END(f32_gemm_5x8__sse_dup);
Marat Dukhan5f18d262019-10-31 10:24:14 -0700927
Marat Dukhan802fcae2020-12-11 14:37:25 -0800928 BENCHMARK_FP32_END2END(f32_gemm_3x8s4__sse);
Marat Dukhan270a2c42020-06-26 16:45:52 -0700929 BENCHMARK_FP32_END2END(f32_gemm_4x8s4__sse);
Marat Dukhan802fcae2020-12-11 14:37:25 -0800930 BENCHMARK_FP32_END2END(f32_gemm_5x8s4__sse);
Marat Dukhan5f18d262019-10-31 10:24:14 -0700931#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
932
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700933#if XNN_ARCH_WASMSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -0800934 static void f32_gemm_3x8__wasmsimd_arm_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700935 GEMMEnd2EndBenchmark(state, model,
Frank Barchard0725b8d2020-12-07 11:07:35 -0800936 xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat,
937 xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat,
938 xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat,
939 xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat,
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700940 3 /* mr */, 8 /* nr */);
941 }
942
Frank Barchard0725b8d2020-12-07 11:07:35 -0800943 static void f32_gemm_4x8__wasmsimd_arm_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700944 GEMMEnd2EndBenchmark(state, model,
Frank Barchard0725b8d2020-12-07 11:07:35 -0800945 xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat,
946 xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat,
947 xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat,
948 xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat,
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700949 4 /* mr */, 8 /* nr */);
950 }
951
Frank Barchard0725b8d2020-12-07 11:07:35 -0800952 static void f32_gemm_5x8__wasmsimd_arm_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700953 GEMMEnd2EndBenchmark(state, model,
Frank Barchard0725b8d2020-12-07 11:07:35 -0800954 xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat,
955 xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat,
956 xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat,
957 xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat,
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700958 5 /* mr */, 8 /* nr */);
959 }
960
Frank Barchard0725b8d2020-12-07 11:07:35 -0800961 static void f32_gemm_6x8__wasmsimd_arm_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700962 GEMMEnd2EndBenchmark(state, model,
Frank Barchard0725b8d2020-12-07 11:07:35 -0800963 xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat,
964 xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat,
965 xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat,
966 xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_loadsplat,
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700967 6 /* mr */, 8 /* nr */);
968 }
969
Frank Barchard0725b8d2020-12-07 11:07:35 -0800970 static void f32_gemm_3x8__wasmsimd_x86_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700971 GEMMEnd2EndBenchmark(state, model,
Frank Barchard0725b8d2020-12-07 11:07:35 -0800972 xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat,
973 xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat,
974 xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat,
975 xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat,
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700976 3 /* mr */, 8 /* nr */);
977 }
978
Frank Barchard0725b8d2020-12-07 11:07:35 -0800979 static void f32_gemm_4x8__wasmsimd_x86_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700980 GEMMEnd2EndBenchmark(state, model,
Frank Barchard0725b8d2020-12-07 11:07:35 -0800981 xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat,
982 xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat,
983 xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat,
984 xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat,
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700985 4 /* mr */, 8 /* nr */);
986 }
987
Frank Barchard0725b8d2020-12-07 11:07:35 -0800988 static void f32_gemm_5x8__wasmsimd_x86_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700989 GEMMEnd2EndBenchmark(state, model,
Frank Barchard0725b8d2020-12-07 11:07:35 -0800990 xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat,
991 xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat,
992 xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat,
993 xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat,
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700994 5 /* mr */, 8 /* nr */);
995 }
996
Frank Barchard0725b8d2020-12-07 11:07:35 -0800997 static void f32_gemm_6x8__wasmsimd_x86_loadsplat(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700998 GEMMEnd2EndBenchmark(state, model,
Frank Barchard0725b8d2020-12-07 11:07:35 -0800999 xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat,
1000 xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat,
1001 xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat,
1002 xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_loadsplat,
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001003 6 /* mr */, 8 /* nr */);
1004 }
1005
Frank Barchard0725b8d2020-12-07 11:07:35 -08001006 static void f32_gemm_3x8__wasmsimd_arm_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001007 GEMMEnd2EndBenchmark(state, model,
Frank Barchard0725b8d2020-12-07 11:07:35 -08001008 xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_arm_splat,
1009 xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat,
1010 xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat,
1011 xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat,
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001012 3 /* mr */, 8 /* nr */);
1013 }
1014
Frank Barchard0725b8d2020-12-07 11:07:35 -08001015 static void f32_gemm_4x8__wasmsimd_arm_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001016 GEMMEnd2EndBenchmark(state, model,
Frank Barchard0725b8d2020-12-07 11:07:35 -08001017 xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_arm_splat,
1018 xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat,
1019 xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat,
1020 xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat,
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001021 4 /* mr */, 8 /* nr */);
1022 }
1023
Frank Barchard0725b8d2020-12-07 11:07:35 -08001024 static void f32_gemm_5x8__wasmsimd_arm_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001025 GEMMEnd2EndBenchmark(state, model,
Frank Barchard0725b8d2020-12-07 11:07:35 -08001026 xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_arm_splat,
1027 xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat,
1028 xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat,
1029 xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat,
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001030 5 /* mr */, 8 /* nr */);
1031 }
1032
Frank Barchard0725b8d2020-12-07 11:07:35 -08001033 static void f32_gemm_6x8__wasmsimd_arm_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001034 GEMMEnd2EndBenchmark(state, model,
Frank Barchard0725b8d2020-12-07 11:07:35 -08001035 xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_arm_splat,
1036 xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat,
1037 xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_arm_splat,
1038 xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_arm_splat,
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001039 6 /* mr */, 8 /* nr */);
1040 }
1041
Frank Barchard0725b8d2020-12-07 11:07:35 -08001042 static void f32_gemm_3x8__wasmsimd_x86_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001043 GEMMEnd2EndBenchmark(state, model,
Frank Barchard0725b8d2020-12-07 11:07:35 -08001044 xnn_f32_gemm_minmax_ukernel_3x8__wasmsimd_x86_splat,
1045 xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat,
1046 xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat,
1047 xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat,
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001048 3 /* mr */, 8 /* nr */);
1049 }
1050
Frank Barchard0725b8d2020-12-07 11:07:35 -08001051 static void f32_gemm_4x8__wasmsimd_x86_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001052 GEMMEnd2EndBenchmark(state, model,
Frank Barchard0725b8d2020-12-07 11:07:35 -08001053 xnn_f32_gemm_minmax_ukernel_4x8__wasmsimd_x86_splat,
1054 xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat,
1055 xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat,
1056 xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat,
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001057 4 /* mr */, 8 /* nr */);
1058 }
1059
Frank Barchard0725b8d2020-12-07 11:07:35 -08001060 static void f32_gemm_5x8__wasmsimd_x86_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001061 GEMMEnd2EndBenchmark(state, model,
Frank Barchard0725b8d2020-12-07 11:07:35 -08001062 xnn_f32_gemm_minmax_ukernel_5x8__wasmsimd_x86_splat,
1063 xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat,
1064 xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat,
1065 xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat,
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001066 5 /* mr */, 8 /* nr */);
1067 }
1068
Frank Barchard0725b8d2020-12-07 11:07:35 -08001069 static void f32_gemm_6x8__wasmsimd_x86_splat(benchmark::State& state, models::ExecutionPlanFactory model) {
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001070 GEMMEnd2EndBenchmark(state, model,
Frank Barchard0725b8d2020-12-07 11:07:35 -08001071 xnn_f32_gemm_minmax_ukernel_6x8__wasmsimd_x86_splat,
1072 xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat,
1073 xnn_f32_gemm_minmax_ukernel_1x8__wasmsimd_x86_splat,
1074 xnn_f32_igemm_minmax_ukernel_1x8__wasmsimd_x86_splat,
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001075 6 /* mr */, 8 /* nr */);
1076 }
1077
1078 static void f32_gemm_3x8s4__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
1079 GEMMEnd2EndBenchmark(state, model,
1080 xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_arm,
1081 xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm,
1082 xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm,
1083 xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm,
1084 3 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
1085 }
1086
1087 static void f32_gemm_4x8s4__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
1088 GEMMEnd2EndBenchmark(state, model,
1089 xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_arm,
1090 xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm,
1091 xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm,
1092 xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm,
1093 4 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
1094 }
1095
1096 static void f32_gemm_5x8s4__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
1097 GEMMEnd2EndBenchmark(state, model,
1098 xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm,
1099 xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm,
1100 xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm,
1101 xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm,
1102 5 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
1103 }
1104
1105 static void f32_gemm_6x8s4__wasmsimd_arm(benchmark::State& state, models::ExecutionPlanFactory model) {
1106 GEMMEnd2EndBenchmark(state, model,
1107 xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_arm,
1108 xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm,
1109 xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_arm,
1110 xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_arm,
1111 6 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
1112 }
1113
1114 static void f32_gemm_3x8s4__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
1115 GEMMEnd2EndBenchmark(state, model,
1116 xnn_f32_gemm_minmax_ukernel_3x8s4__wasmsimd_x86,
1117 xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86,
1118 xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86,
1119 xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86,
1120 3 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
1121 }
1122
1123 static void f32_gemm_4x8s4__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
1124 GEMMEnd2EndBenchmark(state, model,
1125 xnn_f32_gemm_minmax_ukernel_4x8s4__wasmsimd_x86,
1126 xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86,
1127 xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86,
1128 xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86,
1129 4 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
1130 }
1131
1132 static void f32_gemm_5x8s4__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
1133 GEMMEnd2EndBenchmark(state, model,
1134 xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_x86,
1135 xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86,
1136 xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86,
1137 xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86,
1138 5 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
1139 }
1140
1141 static void f32_gemm_6x8s4__wasmsimd_x86(benchmark::State& state, models::ExecutionPlanFactory model) {
1142 GEMMEnd2EndBenchmark(state, model,
1143 xnn_f32_gemm_minmax_ukernel_6x8s4__wasmsimd_x86,
1144 xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86,
1145 xnn_f32_gemm_minmax_ukernel_1x8s4__wasmsimd_x86,
1146 xnn_f32_igemm_minmax_ukernel_1x8s4__wasmsimd_x86,
1147 6 /* mr */, 8 /* nr */, 0 /* log2(kr) */, 2 /* log2(sr) */);
1148 }
1149
Frank Barchard0725b8d2020-12-07 11:07:35 -08001150 BENCHMARK_FP32_END2END(f32_gemm_3x8__wasmsimd_arm_loadsplat);
1151 BENCHMARK_FP32_END2END(f32_gemm_4x8__wasmsimd_arm_loadsplat);
1152 BENCHMARK_FP32_END2END(f32_gemm_5x8__wasmsimd_arm_loadsplat);
1153 BENCHMARK_FP32_END2END(f32_gemm_6x8__wasmsimd_arm_loadsplat);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001154
Frank Barchard0725b8d2020-12-07 11:07:35 -08001155 BENCHMARK_FP32_END2END(f32_gemm_3x8__wasmsimd_x86_loadsplat);
1156 BENCHMARK_FP32_END2END(f32_gemm_4x8__wasmsimd_x86_loadsplat);
1157 BENCHMARK_FP32_END2END(f32_gemm_5x8__wasmsimd_x86_loadsplat);
1158 BENCHMARK_FP32_END2END(f32_gemm_6x8__wasmsimd_x86_loadsplat);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001159
Frank Barchard0725b8d2020-12-07 11:07:35 -08001160 BENCHMARK_FP32_END2END(f32_gemm_3x8__wasmsimd_arm_splat);
1161 BENCHMARK_FP32_END2END(f32_gemm_4x8__wasmsimd_arm_splat);
1162 BENCHMARK_FP32_END2END(f32_gemm_5x8__wasmsimd_arm_splat);
1163 BENCHMARK_FP32_END2END(f32_gemm_6x8__wasmsimd_arm_splat);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001164
Frank Barchard0725b8d2020-12-07 11:07:35 -08001165 BENCHMARK_FP32_END2END(f32_gemm_3x8__wasmsimd_x86_splat);
1166 BENCHMARK_FP32_END2END(f32_gemm_4x8__wasmsimd_x86_splat);
1167 BENCHMARK_FP32_END2END(f32_gemm_5x8__wasmsimd_x86_splat);
1168 BENCHMARK_FP32_END2END(f32_gemm_6x8__wasmsimd_x86_splat);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001169
Marat Dukhan270a2c42020-06-26 16:45:52 -07001170 BENCHMARK_FP32_END2END(f32_gemm_3x8s4__wasmsimd_arm);
1171 BENCHMARK_FP32_END2END(f32_gemm_4x8s4__wasmsimd_arm);
1172 BENCHMARK_FP32_END2END(f32_gemm_5x8s4__wasmsimd_arm);
1173 BENCHMARK_FP32_END2END(f32_gemm_6x8s4__wasmsimd_arm);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001174
Marat Dukhan270a2c42020-06-26 16:45:52 -07001175 BENCHMARK_FP32_END2END(f32_gemm_3x8s4__wasmsimd_x86);
1176 BENCHMARK_FP32_END2END(f32_gemm_4x8s4__wasmsimd_x86);
1177 BENCHMARK_FP32_END2END(f32_gemm_5x8s4__wasmsimd_x86);
1178 BENCHMARK_FP32_END2END(f32_gemm_6x8s4__wasmsimd_x86);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -07001179#endif // XNN_ARCH_WASMSIMD
1180
Marat Dukhanc08cdf52019-12-09 09:17:51 -08001181#if XNN_ARCH_WASM
1182 static void f32_gemm_2x4__wasm(benchmark::State& state, models::ExecutionPlanFactory model) {
1183 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -07001184 xnn_f32_gemm_minmax_ukernel_2x4__wasm,
1185 xnn_f32_igemm_minmax_ukernel_2x4__wasm,
1186 xnn_f32_gemm_minmax_ukernel_1x4__wasm,
1187 xnn_f32_igemm_minmax_ukernel_1x4__wasm,
Marat Dukhanc08cdf52019-12-09 09:17:51 -08001188 2 /* mr */, 4 /* nr */);
1189 }
1190
1191 static void f32_gemm_4x4__wasm(benchmark::State& state, models::ExecutionPlanFactory model) {
1192 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -07001193 xnn_f32_gemm_minmax_ukernel_4x4__wasm,
1194 xnn_f32_igemm_minmax_ukernel_4x4__wasm,
1195 xnn_f32_gemm_minmax_ukernel_1x4__wasm,
1196 xnn_f32_igemm_minmax_ukernel_1x4__wasm,
Marat Dukhanc08cdf52019-12-09 09:17:51 -08001197 4 /* mr */, 4 /* nr */);
1198 }
1199
Marat Dukhan270a2c42020-06-26 16:45:52 -07001200 BENCHMARK_FP32_END2END(f32_gemm_2x4__wasm);
1201 BENCHMARK_FP32_END2END(f32_gemm_4x4__wasm);
Marat Dukhanc08cdf52019-12-09 09:17:51 -08001202#endif // XNN_ARCH_WASM
1203
Marat Dukhan5f18d262019-10-31 10:24:14 -07001204static void f32_gemm_2x4__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
1205 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -07001206 xnn_f32_gemm_minmax_ukernel_2x4__scalar,
1207 xnn_f32_igemm_minmax_ukernel_2x4__scalar,
1208 xnn_f32_gemm_minmax_ukernel_1x4__scalar,
1209 xnn_f32_igemm_minmax_ukernel_1x4__scalar,
Marat Dukhan5f18d262019-10-31 10:24:14 -07001210 2 /* mr */, 4 /* nr */);
1211}
1212
1213static void f32_gemm_4x4__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
1214 GEMMEnd2EndBenchmark(state, model,
Marat Dukhande06f492020-04-09 00:19:31 -07001215 xnn_f32_gemm_minmax_ukernel_4x4__scalar,
1216 xnn_f32_igemm_minmax_ukernel_4x4__scalar,
1217 xnn_f32_gemm_minmax_ukernel_1x4__scalar,
1218 xnn_f32_igemm_minmax_ukernel_1x4__scalar,
Marat Dukhan5f18d262019-10-31 10:24:14 -07001219 4 /* mr */, 4 /* nr */);
1220}
1221
Marat Dukhan270a2c42020-06-26 16:45:52 -07001222BENCHMARK_FP32_END2END(f32_gemm_2x4__scalar);
1223BENCHMARK_FP32_END2END(f32_gemm_4x4__scalar);
Marat Dukhan5f18d262019-10-31 10:24:14 -07001224
1225#ifndef XNNPACK_BENCHMARK_NO_MAIN
1226BENCHMARK_MAIN();
1227#endif