blob: 205361a7a6bd2526bf7065db5c0c8c394a0e34ac [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cfloat>
8#include <cmath>
9#include <functional>
10#include <random>
11#include <vector>
12
XNNPACK Teamb455b122019-09-27 18:10:33 -070013#include <benchmark/benchmark.h>
14#include "bench/conv.h"
15#include "bench/utils.h"
16#include <xnnpack/AlignedAllocator.h>
Marat Dukhan1dadbf72019-10-01 10:46:20 -070017#include <xnnpack/common.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070018#include <xnnpack/igemm.h>
19#include <xnnpack/indirection.h>
20#include <xnnpack/operator.h>
21#include <xnnpack/pack.h>
Marat Dukhaneeaa7bd2019-10-25 17:31:25 -070022#include <xnnpack/params-init.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070023#include <xnnpack/params.h>
XNNPACK Teamb455b122019-09-27 18:10:33 -070024
25
26static void IGEMMBenchmark(benchmark::State& state,
Marat Dukhande06f492020-04-09 00:19:31 -070027 xnn_f32_igemm_minmax_ukernel_function f32_igemm,
Marat Dukhanc8466f52019-11-25 18:01:10 -080028 uint32_t mr, uint32_t nr, uint32_t kr, uint32_t sr,
Marat Dukhan104ae5e2021-05-24 13:41:57 -070029 xnn_init_f32_minmax_params_fn init_params,
Marat Dukhanc8466f52019-11-25 18:01:10 -080030 benchmark::utils::IsaCheckFunction isa_check = nullptr)
XNNPACK Teamb455b122019-09-27 18:10:33 -070031{
Marat Dukhanc8466f52019-11-25 18:01:10 -080032 if (isa_check && !isa_check(state)) {
33 return;
34 }
XNNPACK Teamb455b122019-09-27 18:10:33 -070035
36 const size_t input_height = state.range(0);
37 const size_t input_width = state.range(1);
38 const size_t kernel_height = state.range(2);
39 const size_t kernel_width = state.range(3);
40 const size_t kernel_size = kernel_height * kernel_width;
41 const size_t padding_height = state.range(4);
42 const size_t padding_width = state.range(5);
43 const size_t subsampling = state.range(6);
44 const size_t dilation = state.range(7);
45 const size_t group_input_channels = state.range(8);
46 const size_t group_output_channels = state.range(9);
47
48 std::random_device random_device;
49 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -070050 auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070051
52 const size_t output_pixel_stride = group_output_channels;
53 const size_t input_pixel_stride = group_input_channels;
54 const size_t effective_kernel_height = (kernel_height - 1) * dilation + 1;
55 const size_t effective_kernel_width = (kernel_width - 1) * dilation + 1;
56 const size_t padding_left = padding_width / 2;
57 const size_t padding_top = padding_height / 2;
58 const size_t output_height = (input_height + padding_height - effective_kernel_height) / subsampling + 1;
59 const size_t output_width = (input_width + padding_width - effective_kernel_width) / subsampling + 1;
60 const size_t output_size = output_height * output_width;
61
Marat Dukhan42323232019-10-23 02:09:02 -070062 const size_t mc_stride = benchmark::utils::RoundUp<size_t>(output_size, mr);
63 const size_t nc_stride = benchmark::utils::RoundUp<size_t>(group_output_channels, nr);
64 const size_t kc_stride = benchmark::utils::RoundUp<size_t>(group_input_channels, kr);
XNNPACK Teamb455b122019-09-27 18:10:33 -070065
66 std::vector<float> a(input_height * input_width * input_pixel_stride);
67 std::generate(a.begin(), a.end(), std::ref(f32rng));
68 std::vector<float> k(group_output_channels * kernel_height * kernel_width * group_input_channels);
69 std::generate(k.begin(), k.end(), std::ref(f32rng));
70 std::vector<float> b(group_output_channels);
71 std::generate(b.begin(), b.end(), std::ref(f32rng));
72
73 std::vector<float> z(group_input_channels);
74
75 const size_t w_elements = (kernel_size * kc_stride + 1) * nc_stride;
76 const size_t i_elements = mc_stride * kernel_size;
77 const size_t c_elements = output_height * output_width * output_pixel_stride;
78 const size_t num_buffers = 1 +
Marat Dukhan42323232019-10-23 02:09:02 -070079 benchmark::utils::DivideRoundUp<size_t>(benchmark::utils::GetMaxCacheSize(),
XNNPACK Teamb455b122019-09-27 18:10:33 -070080 sizeof(float) * (w_elements + c_elements) + sizeof(void*) * i_elements);
81
82 std::vector<float, AlignedAllocator<float, 32>> w(w_elements * num_buffers);
83 std::fill(w.begin(), w.end(), 0.0f);
84 xnn_pack_f32_conv_goki_w(
85 1 /* groups */, group_output_channels, kernel_size, group_input_channels,
Marat Dukhane06c8132021-06-03 08:59:11 -070086 nr, kr, sr, k.data(), b.data(), w.data(), 0 /* extra bytes */, nullptr);
XNNPACK Teamb455b122019-09-27 18:10:33 -070087 for (size_t n = 1; n < num_buffers; n++) {
88 std::copy(w.cbegin(), w.cbegin() + w_elements, w.begin() + n * w_elements);
89 }
90
91 std::vector<const float*> i(i_elements * num_buffers);
92 xnn_operator convolution_op = { };
93 convolution_op.indirection_buffer = reinterpret_cast<const void**>(i.data());
94 convolution_op.input = a.data();
95 convolution_op.input_pixel_stride = input_pixel_stride;
96 convolution_op.zero_buffer = z.data();
97 convolution_op.groups = 1;
98 convolution_op.group_input_channels = group_input_channels;
99 convolution_op.batch_size = 1;
100 convolution_op.input_height = input_height;
101 convolution_op.input_width = input_width;
102 convolution_op.output_height = output_height;
103 convolution_op.output_width = output_width;
104 convolution_op.kernel_height = kernel_height;
105 convolution_op.kernel_width = kernel_width;
106 convolution_op.stride_height = subsampling;
107 convolution_op.stride_width = subsampling;
108 convolution_op.dilation_height = dilation;
109 convolution_op.dilation_width = dilation;
110 convolution_op.padding_top = padding_top;
111 convolution_op.padding_left = padding_left;
112 xnn_indirection_init_conv2d(&convolution_op, mr, 2 /* log2(sizeof(float)) */);
113 for (size_t n = 1; n < num_buffers; n++) {
114 std::copy(i.cbegin(), i.cbegin() + i_elements, i.begin() + n * i_elements);
115 }
116
117 std::vector<float> c(c_elements * num_buffers);
118 std::fill(c.begin(), c.end(), std::nanf(""));
119
Marat Dukhanf56f4c42021-05-17 01:47:20 -0700120 xnn_f32_minmax_params params;
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700121 init_params(&params,
122 -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity());
XNNPACK Teamb455b122019-09-27 18:10:33 -0700123
124 size_t buffer_index = 0;
125 for (auto _ : state) {
126 state.PauseTiming();
Marat Dukhan42323232019-10-23 02:09:02 -0700127 benchmark::utils::PrefetchToL1(a.data(), a.size() * sizeof(float));
XNNPACK Teamb455b122019-09-27 18:10:33 -0700128 buffer_index = (buffer_index + 1) % num_buffers;
129 state.ResumeTiming();
130
131 for (uint32_t m = 0; m < output_size; m += mr) {
132 const uint32_t mb = min(output_size - m, mr);
133 for (uint32_t n = 0; n < group_output_channels; n += nr) {
134 const uint32_t nb = min(group_output_channels - n, nr);
135 f32_igemm(
136 mb, nb, group_input_channels * sizeof(float), kernel_size * mr * sizeof(void*),
137 i.data() + buffer_index * i_elements + m,
138 w.data() + buffer_index * w_elements + n * (kc_stride * kernel_size + 1),
139 c.data() + buffer_index * c_elements + m * group_output_channels + n, group_output_channels * sizeof(float), nr * sizeof(float),
Frank Barcharde70dbeb2020-05-01 15:46:41 -0700140 0, z.data(), &params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700141 }
142 }
143 }
144
Marat Dukhand713e8a2020-12-04 14:23:12 -0800145 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
146 if (cpu_frequency != 0) {
147 state.counters["cpufreq"] = cpu_frequency;
148 }
149
XNNPACK Teamb455b122019-09-27 18:10:33 -0700150 state.counters["FLOPS"] = benchmark::Counter(
151 uint64_t(state.iterations()) * 2 *
152 output_height * output_width *
153 group_input_channels * group_output_channels *
154 kernel_height * kernel_width,
155 benchmark::Counter::kIsRate);
156}
157
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700158
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700159#if XNN_ARCH_ARM || XNN_ARCH_ARM64
Frank Barchard91317c52019-11-22 10:54:35 -0800160 static void f32_igemm_1x8__neon_lane_ld64(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700161 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x8__neon_lane_ld64, 1, 8, 1, 1,
162 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
Frank Barcharddb45b6a2019-10-09 16:42:45 -0700163 }
Frank Barchard91317c52019-11-22 10:54:35 -0800164 static void f32_igemm_4x2__neon_lane_ld64(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700165 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64, 4, 2, 1, 1,
166 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700167 }
Frank Barchard91317c52019-11-22 10:54:35 -0800168 static void f32_igemm_4x4__neon_lane_ld64(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700169 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64, 4, 4, 1, 1,
170 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700171 }
Frank Barchard91317c52019-11-22 10:54:35 -0800172 static void f32_igemm_4x8__neon_lane_ld64(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700173 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64, 4, 8, 1, 1,
174 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700175 }
Frank Barchard69172d92019-11-26 16:22:39 -0800176 static void f32_igemm_4x8__neon_lane_ld128(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700177 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld128, 4, 8, 1, 1,
178 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
Frank Barchard69172d92019-11-26 16:22:39 -0800179 }
180 static void f32_igemm_6x8__neon_lane_ld64(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700181 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64, 6, 8, 1, 1,
182 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
Frank Barchard69172d92019-11-26 16:22:39 -0800183 }
184 static void f32_igemm_6x8__neon_lane_ld128(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700185 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128, 6, 8, 1, 1,
186 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
Frank Barchard69172d92019-11-26 16:22:39 -0800187 }
Frank Barchard5243bb02019-11-22 16:37:50 -0800188 static void f32_igemm_1x8__neon_dup_ld64(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700189 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x8__neon_dup_ld64, 1, 8, 1, 1,
190 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
Frank Barchard5243bb02019-11-22 16:37:50 -0800191 }
192 static void f32_igemm_4x8__neon_dup_ld128(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700193 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld128, 4, 8, 1, 1,
194 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
Frank Barchard5243bb02019-11-22 16:37:50 -0800195 }
196 static void f32_igemm_4x8__neon_dup_ld64(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700197 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64, 4, 8, 1, 1,
198 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
Frank Barchard5243bb02019-11-22 16:37:50 -0800199 }
200 static void f32_igemm_6x8__neon_dup_ld64(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700201 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64, 6, 8, 1, 1,
202 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
Frank Barchard5243bb02019-11-22 16:37:50 -0800203 }
Frank Barchard69172d92019-11-26 16:22:39 -0800204 static void f32_igemm_6x8__neon_dup_ld128(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700205 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld128, 6, 8, 1, 1,
206 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
Frank Barchard69172d92019-11-26 16:22:39 -0800207 }
Frank Barchard5243bb02019-11-22 16:37:50 -0800208 static void f32_igemm_1x8__neonfma_dup_ld64(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700209 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x8__neonfma_dup_ld64, 1, 8, 1, 1,
210 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
Frank Barchard5243bb02019-11-22 16:37:50 -0800211 }
212 static void f32_igemm_4x8__neonfma_dup_ld128(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700213 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld128, 4, 8, 1, 1,
214 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
Frank Barchard5243bb02019-11-22 16:37:50 -0800215 }
216 static void f32_igemm_4x8__neonfma_dup_ld64(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700217 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64, 4, 8, 1, 1,
218 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
Frank Barchard5243bb02019-11-22 16:37:50 -0800219 }
220 static void f32_igemm_6x8__neonfma_dup_ld64(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700221 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64, 6, 8, 1, 1,
222 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
Frank Barchard5243bb02019-11-22 16:37:50 -0800223 }
Frank Barchard69172d92019-11-26 16:22:39 -0800224 static void f32_igemm_6x8__neonfma_dup_ld128(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700225 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld128, 6, 8, 1, 1,
226 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700227 }
Frank Barcharddf06d802019-11-20 15:53:46 -0800228 static void f32_igemm_1x8s4__neon(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700229 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x8s4__neon, 1, 8, 1, 4,
230 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
Frank Barcharddf06d802019-11-20 15:53:46 -0800231 }
Frank Barcharddf06d802019-11-20 15:53:46 -0800232 static void f32_igemm_4x8s4__neon(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700233 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8s4__neon, 4, 8, 1, 4,
234 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
Frank Barcharddf06d802019-11-20 15:53:46 -0800235 }
Frank Barcharddf06d802019-11-20 15:53:46 -0800236 static void f32_igemm_6x8s4__neon(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700237 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8s4__neon, 6, 8, 1, 4,
238 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
Frank Barcharddf06d802019-11-20 15:53:46 -0800239 }
Frank Barcharddf06d802019-11-20 15:53:46 -0800240 static void f32_igemm_8x8s4__neon(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700241 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_8x8s4__neon, 8, 8, 1, 4,
242 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEON);
243 }
244 static void f32_igemm_1x8s4__neonfma(benchmark::State& state, const char* net) {
245 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x8s4__neonfma, 1, 8, 1, 4,
246 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
247 }
248 static void f32_igemm_4x8s4__neonfma(benchmark::State& state, const char* net) {
249 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8s4__neonfma, 4, 8, 1, 4,
250 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
251 }
252 static void f32_igemm_6x8s4__neonfma(benchmark::State& state, const char* net) {
253 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8s4__neonfma, 6, 8, 1, 4,
254 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
Frank Barcharddf06d802019-11-20 15:53:46 -0800255 }
Frank Barcharddf06d802019-11-20 15:53:46 -0800256 static void f32_igemm_8x8s4__neonfma(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700257 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma, 8, 8, 1, 4,
258 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckNEONFMA);
Frank Barcharddf06d802019-11-20 15:53:46 -0800259 }
260
Frank Barchard91317c52019-11-22 10:54:35 -0800261 BENCHMARK_CONV(f32_igemm_1x8__neon_lane_ld64)
262 BENCHMARK_CONV(f32_igemm_4x2__neon_lane_ld64)
263 BENCHMARK_CONV(f32_igemm_4x4__neon_lane_ld64)
264 BENCHMARK_CONV(f32_igemm_4x8__neon_lane_ld128)
265 BENCHMARK_CONV(f32_igemm_4x8__neon_lane_ld64)
266 BENCHMARK_CONV(f32_igemm_6x8__neon_lane_ld64)
Frank Barchard69172d92019-11-26 16:22:39 -0800267 BENCHMARK_CONV(f32_igemm_6x8__neon_lane_ld128)
Frank Barchard5243bb02019-11-22 16:37:50 -0800268 BENCHMARK_CONV(f32_igemm_1x8__neon_dup_ld64)
269 BENCHMARK_CONV(f32_igemm_4x8__neon_dup_ld128)
270 BENCHMARK_CONV(f32_igemm_4x8__neon_dup_ld64)
271 BENCHMARK_CONV(f32_igemm_6x8__neon_dup_ld64)
Frank Barchard69172d92019-11-26 16:22:39 -0800272 BENCHMARK_CONV(f32_igemm_6x8__neon_dup_ld128)
Frank Barchard5243bb02019-11-22 16:37:50 -0800273 BENCHMARK_CONV(f32_igemm_1x8__neonfma_dup_ld64)
274 BENCHMARK_CONV(f32_igemm_4x8__neonfma_dup_ld128)
275 BENCHMARK_CONV(f32_igemm_4x8__neonfma_dup_ld64)
276 BENCHMARK_CONV(f32_igemm_6x8__neonfma_dup_ld64)
Frank Barchard69172d92019-11-26 16:22:39 -0800277 BENCHMARK_CONV(f32_igemm_6x8__neonfma_dup_ld128)
Frank Barcharddf06d802019-11-20 15:53:46 -0800278 BENCHMARK_CONV(f32_igemm_1x8s4__neon)
Frank Barcharddf06d802019-11-20 15:53:46 -0800279 BENCHMARK_CONV(f32_igemm_4x8s4__neon)
Frank Barcharddf06d802019-11-20 15:53:46 -0800280 BENCHMARK_CONV(f32_igemm_6x8s4__neon)
Frank Barcharddf06d802019-11-20 15:53:46 -0800281 BENCHMARK_CONV(f32_igemm_8x8s4__neon)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700282 BENCHMARK_CONV(f32_igemm_1x8s4__neonfma)
283 BENCHMARK_CONV(f32_igemm_4x8s4__neonfma)
284 BENCHMARK_CONV(f32_igemm_6x8s4__neonfma)
Frank Barcharddf06d802019-11-20 15:53:46 -0800285 BENCHMARK_CONV(f32_igemm_8x8s4__neonfma)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700286#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
287
XNNPACK Teamb455b122019-09-27 18:10:33 -0700288
Frank Barcharddc38f072020-02-10 13:21:42 -0800289#if XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
290 static void f32_igemm_4x8__aarch32_neon_ld64(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700291 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_ld64, 4, 8, 1, 1,
292 xnn_init_f32_minmax_scalar_params);
Frank Barcharddc38f072020-02-10 13:21:42 -0800293 }
Frank Barchard490febe2020-07-16 18:42:17 -0700294 static void f32_igemm_4x8__aarch32_neon_cortex_a7(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700295 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a7, 4, 8, 1, 1,
296 xnn_init_f32_minmax_scalar_params);
Frank Barchard569561d2020-06-17 13:11:12 -0700297 }
Frank Barchardb7dd29e2020-03-11 12:37:10 -0700298 static void f32_igemm_4x8__aarch32_neon_cortex_a53(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700299 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a53, 4, 8, 1, 1,
300 xnn_init_f32_minmax_scalar_params);
Frank Barchardb7dd29e2020-03-11 12:37:10 -0700301 }
302 static void f32_igemm_4x8__aarch32_neon_cortex_a55(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700303 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a55, 4, 8, 1, 1,
304 xnn_init_f32_minmax_scalar_params);
Frank Barchardb7dd29e2020-03-11 12:37:10 -0700305 }
306 static void f32_igemm_4x8__aarch32_neon_pld_cortex_a75(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700307 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_pld_cortex_a75, 4, 8, 1, 1,
308 xnn_init_f32_minmax_scalar_params);
Frank Barchardb7dd29e2020-03-11 12:37:10 -0700309 }
310 static void f32_igemm_4x8__aarch32_neon_cortex_a75(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700311 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch32_neon_cortex_a75, 4, 8, 1, 1,
312 xnn_init_f32_minmax_scalar_params);
Frank Barchardb7dd29e2020-03-11 12:37:10 -0700313 }
Frank Barcharddc38f072020-02-10 13:21:42 -0800314
315 BENCHMARK_CONV(f32_igemm_4x8__aarch32_neon_ld64)
Frank Barchard490febe2020-07-16 18:42:17 -0700316 BENCHMARK_CONV(f32_igemm_4x8__aarch32_neon_cortex_a7)
Frank Barchardb7dd29e2020-03-11 12:37:10 -0700317 BENCHMARK_CONV(f32_igemm_4x8__aarch32_neon_cortex_a53)
318 BENCHMARK_CONV(f32_igemm_4x8__aarch32_neon_cortex_a55)
319 BENCHMARK_CONV(f32_igemm_4x8__aarch32_neon_pld_cortex_a75)
320 BENCHMARK_CONV(f32_igemm_4x8__aarch32_neon_cortex_a75)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700321#endif // XNN_ARCH_ARM && XNN_ENABLE_ASSEMBLY
322
Frank Barcharddc38f072020-02-10 13:21:42 -0800323
Frank Barcharddbafc582019-10-09 16:30:48 -0700324#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
XNNPACK Teamb455b122019-09-27 18:10:33 -0700325 static void f32_igemm_1x12__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700326 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x12__aarch64_neonfma_cortex_a53, 1, 12, 1, 1,
327 xnn_init_f32_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700328 }
Frank Barchard21be34f2019-10-09 19:32:19 -0700329 static void f32_igemm_1x8__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700330 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a53, 1, 8, 1, 1,
331 xnn_init_f32_minmax_scalar_params);
Frank Barchard21be34f2019-10-09 19:32:19 -0700332 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700333 static void f32_igemm_1x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700334 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_cortex_a75, 1, 8, 1, 1,
335 xnn_init_f32_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700336 }
Frank Barchard143a1102021-06-15 09:15:34 -0700337 static void f32_igemm_1x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net) {
338 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x8__aarch64_neonfma_prfm_cortex_a75, 1, 8, 1, 1,
339 xnn_init_f32_minmax_scalar_params);
340 }
Frank Barchard46fb8072019-10-25 12:54:22 -0700341 static void f32_igemm_4x8__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700342 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a53, 4, 8, 1, 1,
343 xnn_init_f32_minmax_scalar_params);
Frank Barchard46fb8072019-10-25 12:54:22 -0700344 }
Frank Barchard8fb90552020-03-16 11:36:09 -0700345 static void f32_igemm_4x8__aarch64_neonfma_cortex_a55(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700346 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a55, 4, 8, 1, 1,
347 xnn_init_f32_minmax_scalar_params);
Frank Barchard8fb90552020-03-16 11:36:09 -0700348 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700349 static void f32_igemm_4x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700350 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_cortex_a75, 4, 8, 1, 1,
351 xnn_init_f32_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700352 }
Frank Barchard143a1102021-06-15 09:15:34 -0700353 static void f32_igemm_4x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net) {
354 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_prfm_cortex_a75, 4, 8, 1, 1,
355 xnn_init_f32_minmax_scalar_params);
356 }
Frank Barcharde3491242021-06-11 14:04:57 -0700357 static void f32_igemm_4x8__aarch64_neonfma_ld64(benchmark::State& state, const char* net) {
358 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__aarch64_neonfma_ld64, 4, 8, 1, 1,
359 xnn_init_f32_minmax_scalar_params);
360 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700361 static void f32_igemm_5x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700362 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_cortex_a75, 5, 8, 1, 1,
363 xnn_init_f32_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700364 }
Frank Barchard143a1102021-06-15 09:15:34 -0700365 static void f32_igemm_5x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net) {
366 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_5x8__aarch64_neonfma_prfm_cortex_a75, 5, 8, 1, 1,
367 xnn_init_f32_minmax_scalar_params);
368 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700369 static void f32_igemm_4x12__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700370 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x12__aarch64_neonfma_cortex_a53, 4, 12, 1, 1,
371 xnn_init_f32_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700372 }
Frank Barcharda7fb8552019-10-23 17:14:17 -0700373 static void f32_igemm_6x8__aarch64_neonfma_cortex_a53(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700374 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a53, 6, 8, 1, 1,
375 xnn_init_f32_minmax_scalar_params);
Frank Barcharda7fb8552019-10-23 17:14:17 -0700376 }
Frank Barchard91e19992020-03-09 18:46:14 -0700377 static void f32_igemm_6x8__aarch64_neonfma_cortex_a55(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700378 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a55, 6, 8, 1, 1,
379 xnn_init_f32_minmax_scalar_params);
Frank Barchard91e19992020-03-09 18:46:14 -0700380 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700381 static void f32_igemm_6x8__aarch64_neonfma_cortex_a73(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700382 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a73, 6, 8, 1, 1,
383 xnn_init_f32_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700384 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700385 static void f32_igemm_6x8__aarch64_neonfma_cortex_a75(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700386 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_cortex_a75, 6, 8, 1, 1,
387 xnn_init_f32_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700388 }
Frank Barchard143a1102021-06-15 09:15:34 -0700389 static void f32_igemm_6x8__aarch64_neonfma_prfm_cortex_a75(benchmark::State& state, const char* net) {
390 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_prfm_cortex_a75, 6, 8, 1, 1,
391 xnn_init_f32_minmax_scalar_params);
392 }
Frank Barcharde3491242021-06-11 14:04:57 -0700393 static void f32_igemm_6x8__aarch64_neonfma_ld64(benchmark::State& state, const char* net) {
394 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__aarch64_neonfma_ld64, 6, 8, 1, 1,
395 xnn_init_f32_minmax_scalar_params);
396 }
Frank Barchard91317c52019-11-22 10:54:35 -0800397 static void f32_igemm_1x8__neonfma_lane_ld64(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700398 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x8__neonfma_lane_ld64, 1, 8, 1, 1,
399 xnn_init_f32_minmax_scalar_params);
Frank Barchard91317c52019-11-22 10:54:35 -0800400 }
Frank Barchard91317c52019-11-22 10:54:35 -0800401 static void f32_igemm_4x2__neonfma_lane_ld64(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700402 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64, 4, 2, 1, 1,
403 xnn_init_f32_minmax_scalar_params);
Frank Barchard91317c52019-11-22 10:54:35 -0800404 }
Frank Barchard91317c52019-11-22 10:54:35 -0800405 static void f32_igemm_4x4__neonfma_lane_ld64(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700406 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64, 4, 4, 1, 1,
407 xnn_init_f32_minmax_scalar_params);
Frank Barchard91317c52019-11-22 10:54:35 -0800408 }
Frank Barchard91317c52019-11-22 10:54:35 -0800409 static void f32_igemm_4x8__neonfma_lane_ld128(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700410 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld128, 4, 8, 1, 1,
411 xnn_init_f32_minmax_scalar_params);
Frank Barchard91317c52019-11-22 10:54:35 -0800412 }
Frank Barchard91317c52019-11-22 10:54:35 -0800413 static void f32_igemm_4x8__neonfma_lane_ld64(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700414 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__neonfma_lane_ld64, 4, 8, 1, 1,
415 xnn_init_f32_minmax_scalar_params);
Frank Barchard91317c52019-11-22 10:54:35 -0800416 }
Frank Barchard91317c52019-11-22 10:54:35 -0800417 static void f32_igemm_6x8__neonfma_lane_ld64(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700418 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64, 6, 8, 1, 1,
419 xnn_init_f32_minmax_scalar_params);
Frank Barchard91317c52019-11-22 10:54:35 -0800420 }
Frank Barchard69172d92019-11-26 16:22:39 -0800421 static void f32_igemm_6x8__neonfma_lane_ld128(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700422 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128, 6, 8, 1, 1,
423 xnn_init_f32_minmax_scalar_params);
Frank Barchard69172d92019-11-26 16:22:39 -0800424 }
425
XNNPACK Teamb455b122019-09-27 18:10:33 -0700426 BENCHMARK_CONV(f32_igemm_1x12__aarch64_neonfma_cortex_a53)
Frank Barchard21be34f2019-10-09 19:32:19 -0700427 BENCHMARK_CONV(f32_igemm_1x8__aarch64_neonfma_cortex_a53)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700428 BENCHMARK_CONV(f32_igemm_1x8__aarch64_neonfma_cortex_a75)
Frank Barchard143a1102021-06-15 09:15:34 -0700429 BENCHMARK_CONV(f32_igemm_1x8__aarch64_neonfma_prfm_cortex_a75)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700430 BENCHMARK_CONV(f32_igemm_4x12__aarch64_neonfma_cortex_a53)
Frank Barchard46fb8072019-10-25 12:54:22 -0700431 BENCHMARK_CONV(f32_igemm_4x8__aarch64_neonfma_cortex_a53)
Frank Barchard8fb90552020-03-16 11:36:09 -0700432 BENCHMARK_CONV(f32_igemm_4x8__aarch64_neonfma_cortex_a55)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700433 BENCHMARK_CONV(f32_igemm_4x8__aarch64_neonfma_cortex_a75)
Frank Barchard143a1102021-06-15 09:15:34 -0700434 BENCHMARK_CONV(f32_igemm_4x8__aarch64_neonfma_prfm_cortex_a75)
Frank Barcharde3491242021-06-11 14:04:57 -0700435 BENCHMARK_CONV(f32_igemm_4x8__aarch64_neonfma_ld64)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700436 BENCHMARK_CONV(f32_igemm_5x8__aarch64_neonfma_cortex_a75)
Frank Barchard143a1102021-06-15 09:15:34 -0700437 BENCHMARK_CONV(f32_igemm_5x8__aarch64_neonfma_prfm_cortex_a75)
Frank Barcharda7fb8552019-10-23 17:14:17 -0700438 BENCHMARK_CONV(f32_igemm_6x8__aarch64_neonfma_cortex_a53)
Frank Barchard91e19992020-03-09 18:46:14 -0700439 BENCHMARK_CONV(f32_igemm_6x8__aarch64_neonfma_cortex_a55)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700440 BENCHMARK_CONV(f32_igemm_6x8__aarch64_neonfma_cortex_a73)
441 BENCHMARK_CONV(f32_igemm_6x8__aarch64_neonfma_cortex_a75)
Frank Barchard143a1102021-06-15 09:15:34 -0700442 BENCHMARK_CONV(f32_igemm_6x8__aarch64_neonfma_prfm_cortex_a75)
Frank Barcharde3491242021-06-11 14:04:57 -0700443 BENCHMARK_CONV(f32_igemm_6x8__aarch64_neonfma_ld64)
Frank Barchard91317c52019-11-22 10:54:35 -0800444 BENCHMARK_CONV(f32_igemm_1x8__neonfma_lane_ld64)
445 BENCHMARK_CONV(f32_igemm_4x2__neonfma_lane_ld64)
446 BENCHMARK_CONV(f32_igemm_4x4__neonfma_lane_ld64)
447 BENCHMARK_CONV(f32_igemm_4x8__neonfma_lane_ld128)
448 BENCHMARK_CONV(f32_igemm_4x8__neonfma_lane_ld64)
449 BENCHMARK_CONV(f32_igemm_6x8__neonfma_lane_ld64)
Frank Barchard69172d92019-11-26 16:22:39 -0800450 BENCHMARK_CONV(f32_igemm_6x8__neonfma_lane_ld128)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700451#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
452
XNNPACK Teamb455b122019-09-27 18:10:33 -0700453
Marat Dukhan1dadbf72019-10-01 10:46:20 -0700454#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNNPACK Teamb455b122019-09-27 18:10:33 -0700455 static void f32_igemm_1x8__sse_load1(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700456 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x8__sse_load1, 1, 8, 1, 1,
457 xnn_init_f32_minmax_sse_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700458 }
Marat Dukhan802fcae2020-12-11 14:37:25 -0800459 static void f32_igemm_3x8__sse_load1(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700460 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_3x8__sse_load1, 3, 8, 1, 1,
461 xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -0800462 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700463 static void f32_igemm_4x8__sse_load1(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700464 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__sse_load1, 4, 8, 1, 1,
465 xnn_init_f32_minmax_sse_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700466 }
Marat Dukhan802fcae2020-12-11 14:37:25 -0800467 static void f32_igemm_5x8__sse_load1(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700468 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_5x8__sse_load1, 5, 8, 1, 1,
469 xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -0800470 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700471
472 static void f32_igemm_1x8__sse_dup(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700473 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x8__sse_dup, 1, 8, 1, 1,
474 xnn_init_f32_minmax_sse_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700475 }
Marat Dukhan802fcae2020-12-11 14:37:25 -0800476 static void f32_igemm_3x8__sse_dup(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700477 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_3x8__sse_dup, 3, 8, 1, 1,
478 xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -0800479 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700480 static void f32_igemm_4x8__sse_dup(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700481 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__sse_dup, 4, 8, 1, 1,
482 xnn_init_f32_minmax_sse_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700483 }
Marat Dukhan802fcae2020-12-11 14:37:25 -0800484 static void f32_igemm_5x8__sse_dup(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700485 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_5x8__sse_dup, 5, 8, 1, 1,
486 xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -0800487 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700488
489 static void f32_igemm_1x8s4__sse(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700490 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x8s4__sse, 1, 8, 1, 4,
491 xnn_init_f32_minmax_sse_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700492 }
Marat Dukhan802fcae2020-12-11 14:37:25 -0800493 static void f32_igemm_3x8s4__sse(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700494 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_3x8s4__sse, 3, 8, 1, 4,
495 xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -0800496 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700497 static void f32_igemm_4x8s4__sse(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700498 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8s4__sse, 4, 8, 1, 4,
499 xnn_init_f32_minmax_sse_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700500 }
Marat Dukhan802fcae2020-12-11 14:37:25 -0800501 static void f32_igemm_5x8s4__sse(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700502 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_5x8s4__sse, 5, 8, 1, 4,
503 xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -0800504 }
505
506 static void f32_igemm_1x8__sse2_dup(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700507 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x8__sse2_dup, 1, 8, 1, 1,
508 xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -0800509 }
510 static void f32_igemm_3x8__sse2_dup(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700511 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_3x8__sse2_dup, 3, 8, 1, 1,
512 xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -0800513 }
514 static void f32_igemm_4x8__sse2_dup(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700515 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__sse2_dup, 4, 8, 1, 1,
516 xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -0800517 }
518 static void f32_igemm_5x8__sse2_dup(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700519 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_5x8__sse2_dup, 5, 8, 1, 1,
520 xnn_init_f32_minmax_sse_params);
Marat Dukhan802fcae2020-12-11 14:37:25 -0800521 }
XNNPACK Teamb455b122019-09-27 18:10:33 -0700522
Marat Dukhanfda12b82019-11-21 12:27:59 -0800523 static void f32_igemm_1x8__avx_broadcast(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700524 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x8__avx_broadcast, 1, 8, 1, 1,
525 xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800526 }
Marat Dukhanfda12b82019-11-21 12:27:59 -0800527 static void f32_igemm_4x8__avx_broadcast(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700528 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__avx_broadcast, 4, 8, 1, 1,
529 xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800530 }
Marat Dukhanfda12b82019-11-21 12:27:59 -0800531 static void f32_igemm_5x8__avx_broadcast(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700532 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_5x8__avx_broadcast, 5, 8, 1, 1,
533 xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800534 }
Marat Dukhanfda12b82019-11-21 12:27:59 -0800535 static void f32_igemm_6x8__avx_broadcast(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700536 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__avx_broadcast, 6, 8, 1, 1,
537 xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800538 }
Marat Dukhanfda12b82019-11-21 12:27:59 -0800539 static void f32_igemm_7x8__avx_broadcast(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700540 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_7x8__avx_broadcast, 7, 8, 1, 1,
541 xnn_init_f32_minmax_avx_params, benchmark::utils::CheckAVX);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800542 }
543
544 static void f32_igemm_1x8__fma3_broadcast(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700545 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x8__fma3_broadcast, 1, 8, 1, 1,
546 xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800547 }
Marat Dukhanfda12b82019-11-21 12:27:59 -0800548 static void f32_igemm_4x8__fma3_broadcast(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700549 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__fma3_broadcast, 4, 8, 1, 1,
550 xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800551 }
Marat Dukhanfda12b82019-11-21 12:27:59 -0800552 static void f32_igemm_5x8__fma3_broadcast(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700553 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_5x8__fma3_broadcast, 5, 8, 1, 1,
554 xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800555 }
Marat Dukhanfda12b82019-11-21 12:27:59 -0800556 static void f32_igemm_6x8__fma3_broadcast(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700557 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__fma3_broadcast, 6, 8, 1, 1,
558 xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800559 }
Marat Dukhanfda12b82019-11-21 12:27:59 -0800560 static void f32_igemm_7x8__fma3_broadcast(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700561 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_7x8__fma3_broadcast, 7, 8, 1, 1,
562 xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800563 }
Marat Dukhanfda12b82019-11-21 12:27:59 -0800564 static void f32_igemm_8x8__fma3_broadcast(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700565 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast, 8, 8, 1, 1,
566 xnn_init_f32_minmax_avx_params, benchmark::utils::CheckFMA3);
Marat Dukhanfda12b82019-11-21 12:27:59 -0800567 }
568
Marat Dukhan0f349c42019-11-27 11:58:54 -0800569 static void f32_igemm_1x16__avx512f_broadcast(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700570 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x16__avx512f_broadcast, 1, 16, 1, 1,
571 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckFMA3);
Marat Dukhan0f349c42019-11-27 11:58:54 -0800572 }
573 static void f32_igemm_4x16__avx512f_broadcast(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700574 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x16__avx512f_broadcast, 4, 16, 1, 1,
575 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckFMA3);
Marat Dukhan0f349c42019-11-27 11:58:54 -0800576 }
577 static void f32_igemm_5x16__avx512f_broadcast(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700578 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_5x16__avx512f_broadcast, 5, 16, 1, 1,
579 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckFMA3);
Marat Dukhan0f349c42019-11-27 11:58:54 -0800580 }
581 static void f32_igemm_6x16__avx512f_broadcast(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700582 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x16__avx512f_broadcast, 6, 16, 1, 1,
583 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckFMA3);
Marat Dukhan0f349c42019-11-27 11:58:54 -0800584 }
585 static void f32_igemm_7x16__avx512f_broadcast(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700586 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_7x16__avx512f_broadcast, 7, 16, 1, 1,
587 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckFMA3);
Marat Dukhan0f349c42019-11-27 11:58:54 -0800588 }
589 static void f32_igemm_8x16__avx512f_broadcast(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700590 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_8x16__avx512f_broadcast, 8, 16, 1, 1,
591 xnn_init_f32_minmax_scalar_params, benchmark::utils::CheckFMA3);
Marat Dukhan0f349c42019-11-27 11:58:54 -0800592 }
593
XNNPACK Teamb455b122019-09-27 18:10:33 -0700594 BENCHMARK_CONV(f32_igemm_1x8__sse_load1)
Marat Dukhan802fcae2020-12-11 14:37:25 -0800595 BENCHMARK_CONV(f32_igemm_3x8__sse_load1)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700596 BENCHMARK_CONV(f32_igemm_4x8__sse_load1)
Marat Dukhan802fcae2020-12-11 14:37:25 -0800597 BENCHMARK_CONV(f32_igemm_5x8__sse_load1)
Marat Dukhan0f349c42019-11-27 11:58:54 -0800598
XNNPACK Teamb455b122019-09-27 18:10:33 -0700599 BENCHMARK_CONV(f32_igemm_1x8__sse_dup)
Marat Dukhan802fcae2020-12-11 14:37:25 -0800600 BENCHMARK_CONV(f32_igemm_3x8__sse_dup)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700601 BENCHMARK_CONV(f32_igemm_4x8__sse_dup)
Marat Dukhan802fcae2020-12-11 14:37:25 -0800602 BENCHMARK_CONV(f32_igemm_5x8__sse_dup)
Marat Dukhan0f349c42019-11-27 11:58:54 -0800603
XNNPACK Teamb455b122019-09-27 18:10:33 -0700604 BENCHMARK_CONV(f32_igemm_1x8s4__sse)
Marat Dukhan802fcae2020-12-11 14:37:25 -0800605 BENCHMARK_CONV(f32_igemm_3x8s4__sse)
XNNPACK Teamb455b122019-09-27 18:10:33 -0700606 BENCHMARK_CONV(f32_igemm_4x8s4__sse)
Marat Dukhan802fcae2020-12-11 14:37:25 -0800607 BENCHMARK_CONV(f32_igemm_5x8s4__sse)
608
609 BENCHMARK_CONV(f32_igemm_1x8__sse2_dup)
610 BENCHMARK_CONV(f32_igemm_3x8__sse2_dup)
611 BENCHMARK_CONV(f32_igemm_4x8__sse2_dup)
612 BENCHMARK_CONV(f32_igemm_5x8__sse2_dup)
Marat Dukhan0f349c42019-11-27 11:58:54 -0800613
Marat Dukhanfda12b82019-11-21 12:27:59 -0800614 BENCHMARK_CONV(f32_igemm_1x8__avx_broadcast)
615 BENCHMARK_CONV(f32_igemm_4x8__avx_broadcast)
616 BENCHMARK_CONV(f32_igemm_5x8__avx_broadcast)
617 BENCHMARK_CONV(f32_igemm_6x8__avx_broadcast)
618 BENCHMARK_CONV(f32_igemm_7x8__avx_broadcast)
Marat Dukhan0f349c42019-11-27 11:58:54 -0800619
Marat Dukhanfda12b82019-11-21 12:27:59 -0800620 BENCHMARK_CONV(f32_igemm_1x8__fma3_broadcast)
621 BENCHMARK_CONV(f32_igemm_4x8__fma3_broadcast)
622 BENCHMARK_CONV(f32_igemm_5x8__fma3_broadcast)
623 BENCHMARK_CONV(f32_igemm_6x8__fma3_broadcast)
624 BENCHMARK_CONV(f32_igemm_7x8__fma3_broadcast)
625 BENCHMARK_CONV(f32_igemm_8x8__fma3_broadcast)
Marat Dukhan0f349c42019-11-27 11:58:54 -0800626
627 BENCHMARK_CONV(f32_igemm_1x16__avx512f_broadcast)
628 BENCHMARK_CONV(f32_igemm_4x16__avx512f_broadcast)
629 BENCHMARK_CONV(f32_igemm_5x16__avx512f_broadcast)
630 BENCHMARK_CONV(f32_igemm_6x16__avx512f_broadcast)
631 BENCHMARK_CONV(f32_igemm_7x16__avx512f_broadcast)
632 BENCHMARK_CONV(f32_igemm_8x16__avx512f_broadcast)
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700633#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
634
XNNPACK Teamb455b122019-09-27 18:10:33 -0700635
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700636#if XNN_ARCH_WASMSIMD
Frank Barchard0725b8d2020-12-07 11:07:35 -0800637 static void f32_igemm_3x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700638 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_loadsplat, 3, 8, 1, 1,
639 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700640 }
641
Frank Barchard0725b8d2020-12-07 11:07:35 -0800642 static void f32_igemm_4x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700643 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_loadsplat, 4, 8, 1, 1,
644 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700645 }
646
Frank Barchard0725b8d2020-12-07 11:07:35 -0800647 static void f32_igemm_5x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700648 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_loadsplat, 5, 8, 1, 1,
649 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700650 }
651
Frank Barchard0725b8d2020-12-07 11:07:35 -0800652 static void f32_igemm_6x8__wasmsimd_arm_loadsplat(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700653 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_loadsplat, 6, 8, 1, 1,
654 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700655 }
656
Frank Barchard0725b8d2020-12-07 11:07:35 -0800657 static void f32_igemm_3x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700658 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_loadsplat, 3, 8, 1, 1,
659 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700660 }
661
Frank Barchard0725b8d2020-12-07 11:07:35 -0800662 static void f32_igemm_4x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700663 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_loadsplat, 4, 8, 1, 1,
664 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700665 }
666
Frank Barchard0725b8d2020-12-07 11:07:35 -0800667 static void f32_igemm_5x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700668 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_loadsplat, 5, 8, 1, 1,
669 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700670 }
671
Frank Barchard0725b8d2020-12-07 11:07:35 -0800672 static void f32_igemm_6x8__wasmsimd_x86_loadsplat(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700673 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_loadsplat, 6, 8, 1, 1,
674 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700675 }
676
Frank Barchard0725b8d2020-12-07 11:07:35 -0800677 static void f32_igemm_3x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700678 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_arm_splat, 3, 8, 1, 1,
679 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700680 }
681
Frank Barchard0725b8d2020-12-07 11:07:35 -0800682 static void f32_igemm_4x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700683 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_arm_splat, 4, 8, 1, 1,
684 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700685 }
686
Frank Barchard0725b8d2020-12-07 11:07:35 -0800687 static void f32_igemm_5x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700688 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_arm_splat, 5, 8, 1, 1,
689 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700690 }
691
Frank Barchard0725b8d2020-12-07 11:07:35 -0800692 static void f32_igemm_6x8__wasmsimd_arm_splat(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700693 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_arm_splat, 6, 8, 1, 1,
694 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700695 }
696
Frank Barchard0725b8d2020-12-07 11:07:35 -0800697 static void f32_igemm_3x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700698 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_3x8__wasmsimd_x86_splat, 3, 8, 1, 1,
699 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700700 }
701
Frank Barchard0725b8d2020-12-07 11:07:35 -0800702 static void f32_igemm_4x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700703 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8__wasmsimd_x86_splat, 4, 8, 1, 1,
704 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700705 }
706
Frank Barchard0725b8d2020-12-07 11:07:35 -0800707 static void f32_igemm_5x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700708 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_5x8__wasmsimd_x86_splat, 5, 8, 1, 1,
709 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700710 }
711
Frank Barchard0725b8d2020-12-07 11:07:35 -0800712 static void f32_igemm_6x8__wasmsimd_x86_splat(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700713 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8__wasmsimd_x86_splat, 6, 8, 1, 1,
714 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700715 }
716
717 static void f32_igemm_3x8s4__wasmsimd_arm(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700718 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_arm, 3, 8, 1, 4,
719 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700720 }
721
722 static void f32_igemm_4x8s4__wasmsimd_arm(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700723 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_arm, 4, 8, 1, 4,
724 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700725 }
726
727 static void f32_igemm_5x8s4__wasmsimd_arm(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700728 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_arm, 5, 8, 1, 4,
729 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700730 }
731
732 static void f32_igemm_6x8s4__wasmsimd_arm(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700733 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_arm, 6, 8, 1, 4,
734 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700735 }
736
737 static void f32_igemm_3x8s4__wasmsimd_x86(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700738 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_3x8s4__wasmsimd_x86, 3, 8, 1, 4,
739 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700740 }
741
742 static void f32_igemm_4x8s4__wasmsimd_x86(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700743 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x8s4__wasmsimd_x86, 4, 8, 1, 4,
744 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700745 }
746
747 static void f32_igemm_5x8s4__wasmsimd_x86(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700748 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_5x8s4__wasmsimd_x86, 5, 8, 1, 4,
749 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700750 }
751
752 static void f32_igemm_6x8s4__wasmsimd_x86(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700753 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_6x8s4__wasmsimd_x86, 6, 8, 1, 4,
754 xnn_init_f32_minmax_scalar_params);
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700755 }
756
Frank Barchard0725b8d2020-12-07 11:07:35 -0800757 BENCHMARK_CONV(f32_igemm_3x8__wasmsimd_arm_loadsplat)
758 BENCHMARK_CONV(f32_igemm_4x8__wasmsimd_arm_loadsplat)
759 BENCHMARK_CONV(f32_igemm_5x8__wasmsimd_arm_loadsplat)
760 BENCHMARK_CONV(f32_igemm_6x8__wasmsimd_arm_loadsplat)
761 BENCHMARK_CONV(f32_igemm_3x8__wasmsimd_x86_loadsplat)
762 BENCHMARK_CONV(f32_igemm_4x8__wasmsimd_x86_loadsplat)
763 BENCHMARK_CONV(f32_igemm_5x8__wasmsimd_x86_loadsplat)
764 BENCHMARK_CONV(f32_igemm_6x8__wasmsimd_x86_loadsplat)
765 BENCHMARK_CONV(f32_igemm_3x8__wasmsimd_arm_splat)
766 BENCHMARK_CONV(f32_igemm_4x8__wasmsimd_arm_splat)
767 BENCHMARK_CONV(f32_igemm_5x8__wasmsimd_arm_splat)
768 BENCHMARK_CONV(f32_igemm_6x8__wasmsimd_arm_splat)
769 BENCHMARK_CONV(f32_igemm_3x8__wasmsimd_x86_splat)
770 BENCHMARK_CONV(f32_igemm_4x8__wasmsimd_x86_splat)
771 BENCHMARK_CONV(f32_igemm_5x8__wasmsimd_x86_splat)
772 BENCHMARK_CONV(f32_igemm_6x8__wasmsimd_x86_splat)
Marat Dukhan1bbf96b2020-06-15 23:01:20 -0700773 BENCHMARK_CONV(f32_igemm_3x8s4__wasmsimd_arm)
774 BENCHMARK_CONV(f32_igemm_4x8s4__wasmsimd_arm)
775 BENCHMARK_CONV(f32_igemm_5x8s4__wasmsimd_arm)
776 BENCHMARK_CONV(f32_igemm_6x8s4__wasmsimd_arm)
777 BENCHMARK_CONV(f32_igemm_3x8s4__wasmsimd_x86)
778 BENCHMARK_CONV(f32_igemm_4x8s4__wasmsimd_x86)
779 BENCHMARK_CONV(f32_igemm_5x8s4__wasmsimd_x86)
780 BENCHMARK_CONV(f32_igemm_6x8s4__wasmsimd_x86)
781#endif // XNN_ARCH_WASMSIMD
782
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700783
XNNPACK Teamb455b122019-09-27 18:10:33 -0700784static void f32_igemm_1x4__scalar(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700785 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_1x4__scalar, 1, 4, 1, 1,
786 xnn_init_f32_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700787}
788
789static void f32_igemm_2x4__scalar(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700790 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_2x4__scalar, 2, 4, 1, 1,
791 xnn_init_f32_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700792}
793
794static void f32_igemm_4x4__scalar(benchmark::State& state, const char* net) {
Marat Dukhan104ae5e2021-05-24 13:41:57 -0700795 IGEMMBenchmark(state, xnn_f32_igemm_minmax_ukernel_4x4__scalar, 4, 4, 1, 1,
796 xnn_init_f32_minmax_scalar_params);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700797}
798
799BENCHMARK_CONV(f32_igemm_1x4__scalar)
800BENCHMARK_CONV(f32_igemm_2x4__scalar)
801BENCHMARK_CONV(f32_igemm_4x4__scalar)
802
803
804#ifndef XNNPACK_BENCHMARK_NO_MAIN
805BENCHMARK_MAIN();
806#endif