blob: ec59afdb7d872661eaa5e33c00b13f74a53e7ef4 [file] [log] [blame]
Marat Dukhanef4416e2019-10-31 13:44:40 -07001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
6#include <algorithm>
7#include <cmath>
8#include <functional>
9#include <random>
10#include <vector>
11
12#include <xnnpack.h>
13
14#include <benchmark/benchmark.h>
15
16#include "bench/utils.h"
17#include "models/models.h"
18#include <xnnpack/dwconv.h>
19#include <xnnpack/params.h>
20
21
22static void DWConvEnd2EndBenchmark(
23 benchmark::State& state,
24 models::ExecutionPlanFactory model_factory,
25 xnn_f32_dwconv_up_ukernel_function dwconv,
Marat Dukhanc8466f52019-11-25 18:01:10 -080026 uint8_t cr, uint8_t mr,
27 benchmark::utils::IsaCheckFunction isa_check = nullptr)
Marat Dukhanef4416e2019-10-31 13:44:40 -070028{
Marat Dukhanc8466f52019-11-25 18:01:10 -080029 if (isa_check && !isa_check(state)) {
30 return;
31 }
Marat Dukhan04f03be2019-11-19 12:36:47 -080032 if (xnn_initialize(nullptr /* allocator */) != xnn_status_success) {
Marat Dukhanef4416e2019-10-31 13:44:40 -070033 state.SkipWithError("failed to initialize XNNPACK");
34 return;
35 }
36
37 // Override microkernels chosen in xnn_initialize
38 for (size_t i = 0; i < XNN_MAX_F32_DWCONV_UKERNELS; i++) {
39 // Replace only the microkernel the matching kernel size.
40 if (xnn_params.f32.dwconv[i].mr == mr) {
41 xnn_params.f32.dwconv[i] = (struct dwconv_parameters) {
42 .up = (xnn_dwconv_up_ukernel_function) dwconv,
43 .cr = cr,
44 .mr = mr,
45 };
46 break;
47 }
48 }
49
50 auto execution_plan = model_factory(nullptr);
51 if (execution_plan.empty()) {
52 state.SkipWithError("failed to create a model");
53 return;
54 }
55
56 for (auto _ : state) {
57 for (const std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)>& op : execution_plan) {
58 xnn_status status = xnn_run_operator(op.get(), nullptr);
59 if (status != xnn_status_success) {
60 state.SkipWithError("failed to run a model");
61 return;
62 }
63 }
64 }
65 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
66}
67
Marat Dukhanef4416e2019-10-31 13:44:40 -070068#if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
69 static void f32_dwconv_up4x9__aarch64_neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
70 DWConvEnd2EndBenchmark(state, model,
71 xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma,
72 4 /* cr */, 9 /* mr */);
73 }
74
75 static void f32_dwconv_up4x9__aarch64_neonfma_cortex_a55(benchmark::State& state, models::ExecutionPlanFactory model) {
76 DWConvEnd2EndBenchmark(state, model,
77 xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55,
78 4 /* cr */, 9 /* mr */);
79 }
80
81 BENCHMARK_CAPTURE(f32_dwconv_up4x9__aarch64_neonfma, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
82 BENCHMARK_CAPTURE(f32_dwconv_up4x9__aarch64_neonfma, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
83
84 BENCHMARK_CAPTURE(f32_dwconv_up4x9__aarch64_neonfma_cortex_a55, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
85 BENCHMARK_CAPTURE(f32_dwconv_up4x9__aarch64_neonfma_cortex_a55, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
86#endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY
87
88#if XNN_ARCH_ARM || XNN_ARCH_ARM64
89 static void f32_dwconv_up4x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
90 DWConvEnd2EndBenchmark(state, model,
91 xnn_f32_dwconv_ukernel_up4x9__neon,
Marat Dukhanc8466f52019-11-25 18:01:10 -080092 4 /* cr */, 9 /* mr */, benchmark::utils::CheckNEON);
Marat Dukhanef4416e2019-10-31 13:44:40 -070093 }
94
Marat Dukhan5098c3e2019-11-07 12:01:19 -080095 static void f32_dwconv_up4x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
96 DWConvEnd2EndBenchmark(state, model,
97 xnn_f32_dwconv_ukernel_up4x9__neon_acc2,
Marat Dukhanc8466f52019-11-25 18:01:10 -080098 4 /* cr */, 9 /* mr */, benchmark::utils::CheckNEON);
Marat Dukhan5098c3e2019-11-07 12:01:19 -080099 }
100
101 static void f32_dwconv_up8x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) {
102 DWConvEnd2EndBenchmark(state, model,
103 xnn_f32_dwconv_ukernel_up8x9__neon,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800104 8 /* cr */, 9 /* mr */, benchmark::utils::CheckNEON);
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800105 }
106
107 static void f32_dwconv_up8x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
108 DWConvEnd2EndBenchmark(state, model,
109 xnn_f32_dwconv_ukernel_up8x9__neon_acc2,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800110 8 /* cr */, 9 /* mr */, benchmark::utils::CheckNEON);
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800111 }
112
Marat Dukhanef4416e2019-10-31 13:44:40 -0700113 static void f32_dwconv_up4x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
114 DWConvEnd2EndBenchmark(state, model,
115 xnn_f32_dwconv_ukernel_up4x9__neonfma,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800116 4 /* cr */, 9 /* mr */, benchmark::utils::CheckNEONFMA);
Marat Dukhanef4416e2019-10-31 13:44:40 -0700117 }
118
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800119 static void f32_dwconv_up4x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
120 DWConvEnd2EndBenchmark(state, model,
121 xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800122 4 /* cr */, 9 /* mr */, benchmark::utils::CheckNEONFMA);
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800123 }
124
Marat Dukhanef4416e2019-10-31 13:44:40 -0700125 static void f32_dwconv_up8x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) {
126 DWConvEnd2EndBenchmark(state, model,
127 xnn_f32_dwconv_ukernel_up8x9__neonfma,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800128 8 /* cr */, 9 /* mr */, benchmark::utils::CheckNEONFMA);
Marat Dukhanef4416e2019-10-31 13:44:40 -0700129 }
130
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800131 static void f32_dwconv_up8x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
132 DWConvEnd2EndBenchmark(state, model,
133 xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800134 8 /* cr */, 9 /* mr */, benchmark::utils::CheckNEONFMA);
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800135 }
136
Marat Dukhanef4416e2019-10-31 13:44:40 -0700137 BENCHMARK_CAPTURE(f32_dwconv_up4x9__neon, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
138 BENCHMARK_CAPTURE(f32_dwconv_up4x9__neon, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
139
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800140 BENCHMARK_CAPTURE(f32_dwconv_up4x9__neon_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
141 BENCHMARK_CAPTURE(f32_dwconv_up4x9__neon_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
142
143 BENCHMARK_CAPTURE(f32_dwconv_up8x9__neon, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
144 BENCHMARK_CAPTURE(f32_dwconv_up8x9__neon, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
145
146 BENCHMARK_CAPTURE(f32_dwconv_up8x9__neon_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
147 BENCHMARK_CAPTURE(f32_dwconv_up8x9__neon_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
148
Marat Dukhanef4416e2019-10-31 13:44:40 -0700149 BENCHMARK_CAPTURE(f32_dwconv_up4x9__neonfma, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
150 BENCHMARK_CAPTURE(f32_dwconv_up4x9__neonfma, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
151
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800152 BENCHMARK_CAPTURE(f32_dwconv_up4x9__neonfma_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
153 BENCHMARK_CAPTURE(f32_dwconv_up4x9__neonfma_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
154
Marat Dukhanef4416e2019-10-31 13:44:40 -0700155 BENCHMARK_CAPTURE(f32_dwconv_up8x9__neonfma, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
156 BENCHMARK_CAPTURE(f32_dwconv_up8x9__neonfma, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800157
158 BENCHMARK_CAPTURE(f32_dwconv_up8x9__neonfma_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
159 BENCHMARK_CAPTURE(f32_dwconv_up8x9__neonfma_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
Marat Dukhanef4416e2019-10-31 13:44:40 -0700160#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
161
162
163#if XNN_ARCH_X86 || XNN_ARCH_X86_64
164 static void f32_dwconv_up4x9__sse(benchmark::State& state, models::ExecutionPlanFactory model) {
165 DWConvEnd2EndBenchmark(state, model,
166 xnn_f32_dwconv_ukernel_up4x9__sse,
167 4 /* cr */, 9 /* mr */);
168 }
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800169 static void f32_dwconv_up4x9__sse_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
170 DWConvEnd2EndBenchmark(state, model,
171 xnn_f32_dwconv_ukernel_up4x9__sse_acc2,
172 4 /* cr */, 9 /* mr */);
173 }
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800174 static void f32_dwconv_up8x9__sse(benchmark::State& state, models::ExecutionPlanFactory model) {
175 DWConvEnd2EndBenchmark(state, model,
176 xnn_f32_dwconv_ukernel_up8x9__sse,
177 8 /* cr */, 9 /* mr */);
178 }
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800179 static void f32_dwconv_up8x9__sse_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
180 DWConvEnd2EndBenchmark(state, model,
181 xnn_f32_dwconv_ukernel_up8x9__sse_acc2,
182 8 /* cr */, 9 /* mr */);
183 }
184
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800185 static void f32_dwconv_up8x9__avx(benchmark::State& state, models::ExecutionPlanFactory model) {
186 DWConvEnd2EndBenchmark(state, model,
187 xnn_f32_dwconv_ukernel_up8x9__avx,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800188 8 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX);
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800189 }
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800190 static void f32_dwconv_up8x9__avx_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
191 DWConvEnd2EndBenchmark(state, model,
192 xnn_f32_dwconv_ukernel_up8x9__avx_acc2,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800193 8 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX);
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800194 }
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800195 static void f32_dwconv_up16x9__avx(benchmark::State& state, models::ExecutionPlanFactory model) {
196 DWConvEnd2EndBenchmark(state, model,
197 xnn_f32_dwconv_ukernel_up16x9__avx,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800198 16 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX);
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800199 }
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800200 static void f32_dwconv_up16x9__avx_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
201 DWConvEnd2EndBenchmark(state, model,
202 xnn_f32_dwconv_ukernel_up16x9__avx_acc2,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800203 16 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX);
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800204 }
205
206 static void f32_dwconv_up8x9__fma3(benchmark::State& state, models::ExecutionPlanFactory model) {
207 DWConvEnd2EndBenchmark(state, model,
208 xnn_f32_dwconv_ukernel_up8x9__fma3,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800209 8 /* cr */, 9 /* mr */, benchmark::utils::CheckFMA3);
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800210 }
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800211 static void f32_dwconv_up8x9__fma3_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
212 DWConvEnd2EndBenchmark(state, model,
213 xnn_f32_dwconv_ukernel_up8x9__fma3_acc2,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800214 8 /* cr */, 9 /* mr */, benchmark::utils::CheckFMA3);
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800215 }
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800216 static void f32_dwconv_up16x9__fma3(benchmark::State& state, models::ExecutionPlanFactory model) {
217 DWConvEnd2EndBenchmark(state, model,
218 xnn_f32_dwconv_ukernel_up16x9__fma3,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800219 16 /* cr */, 9 /* mr */, benchmark::utils::CheckFMA3);
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800220 }
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800221 static void f32_dwconv_up16x9__fma3_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
222 DWConvEnd2EndBenchmark(state, model,
223 xnn_f32_dwconv_ukernel_up16x9__fma3_acc2,
Marat Dukhanc8466f52019-11-25 18:01:10 -0800224 16 /* cr */, 9 /* mr */, benchmark::utils::CheckFMA3);
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800225 }
226
Marat Dukhan479f87e2019-11-27 15:17:06 -0800227 static void f32_dwconv_up16x9__avx512f(benchmark::State& state, models::ExecutionPlanFactory model) {
228 DWConvEnd2EndBenchmark(state, model,
229 xnn_f32_dwconv_ukernel_up16x9__avx512f,
230 16 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX512F);
231 }
232 static void f32_dwconv_up16x9__avx512f_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
233 DWConvEnd2EndBenchmark(state, model,
234 xnn_f32_dwconv_ukernel_up16x9__avx512f_acc2,
235 16 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX512F);
236 }
237 static void f32_dwconv_up32x9__avx512f(benchmark::State& state, models::ExecutionPlanFactory model) {
238 DWConvEnd2EndBenchmark(state, model,
239 xnn_f32_dwconv_ukernel_up32x9__avx512f,
240 32 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX512F);
241 }
242 static void f32_dwconv_up32x9__avx512f_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
243 DWConvEnd2EndBenchmark(state, model,
244 xnn_f32_dwconv_ukernel_up32x9__avx512f_acc2,
245 32 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX512F);
246 }
247
Marat Dukhanef4416e2019-10-31 13:44:40 -0700248 BENCHMARK_CAPTURE(f32_dwconv_up4x9__sse, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
249 BENCHMARK_CAPTURE(f32_dwconv_up4x9__sse, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800250
251 BENCHMARK_CAPTURE(f32_dwconv_up4x9__sse_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
252 BENCHMARK_CAPTURE(f32_dwconv_up4x9__sse_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
253
254 BENCHMARK_CAPTURE(f32_dwconv_up8x9__sse, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
255 BENCHMARK_CAPTURE(f32_dwconv_up8x9__sse, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
256
257 BENCHMARK_CAPTURE(f32_dwconv_up8x9__sse_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
258 BENCHMARK_CAPTURE(f32_dwconv_up8x9__sse_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
Marat Dukhan17ec5f32019-11-22 13:34:16 -0800259
260 BENCHMARK_CAPTURE(f32_dwconv_up8x9__avx, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
261 BENCHMARK_CAPTURE(f32_dwconv_up8x9__avx, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
262
263 BENCHMARK_CAPTURE(f32_dwconv_up8x9__avx_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
264 BENCHMARK_CAPTURE(f32_dwconv_up8x9__avx_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
265
266 BENCHMARK_CAPTURE(f32_dwconv_up16x9__avx, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
267 BENCHMARK_CAPTURE(f32_dwconv_up16x9__avx, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
268
269 BENCHMARK_CAPTURE(f32_dwconv_up16x9__avx_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
270 BENCHMARK_CAPTURE(f32_dwconv_up16x9__avx_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
271
272 BENCHMARK_CAPTURE(f32_dwconv_up8x9__fma3, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
273 BENCHMARK_CAPTURE(f32_dwconv_up8x9__fma3, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
274
275 BENCHMARK_CAPTURE(f32_dwconv_up8x9__fma3_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
276 BENCHMARK_CAPTURE(f32_dwconv_up8x9__fma3_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
277
278 BENCHMARK_CAPTURE(f32_dwconv_up16x9__fma3, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
279 BENCHMARK_CAPTURE(f32_dwconv_up16x9__fma3, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
280
281 BENCHMARK_CAPTURE(f32_dwconv_up16x9__fma3_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
282 BENCHMARK_CAPTURE(f32_dwconv_up16x9__fma3_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
Marat Dukhan479f87e2019-11-27 15:17:06 -0800283
284 BENCHMARK_CAPTURE(f32_dwconv_up16x9__avx512f, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
285 BENCHMARK_CAPTURE(f32_dwconv_up16x9__avx512f, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
286
287 BENCHMARK_CAPTURE(f32_dwconv_up16x9__avx512f_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
288 BENCHMARK_CAPTURE(f32_dwconv_up16x9__avx512f_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
289
290 BENCHMARK_CAPTURE(f32_dwconv_up32x9__avx512f, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
291 BENCHMARK_CAPTURE(f32_dwconv_up32x9__avx512f, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
292
293 BENCHMARK_CAPTURE(f32_dwconv_up32x9__avx512f_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
294 BENCHMARK_CAPTURE(f32_dwconv_up32x9__avx512f_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
Marat Dukhanef4416e2019-10-31 13:44:40 -0700295#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
296
297#if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
298 static void f32_dwconv_up4x9__psimd(benchmark::State& state, models::ExecutionPlanFactory model) {
299 DWConvEnd2EndBenchmark(state, model,
300 xnn_f32_dwconv_ukernel_up4x9__psimd,
301 4 /* cr */, 9 /* mr */);
302 }
303
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800304 static void f32_dwconv_up4x9__psimd_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
305 DWConvEnd2EndBenchmark(state, model,
306 xnn_f32_dwconv_ukernel_up4x9__psimd_acc2,
307 4 /* cr */, 9 /* mr */);
308 }
309
310 static void f32_dwconv_up8x9__psimd(benchmark::State& state, models::ExecutionPlanFactory model) {
311 DWConvEnd2EndBenchmark(state, model,
312 xnn_f32_dwconv_ukernel_up8x9__psimd,
313 8 /* cr */, 9 /* mr */);
314 }
315
316 static void f32_dwconv_up8x9__psimd_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
317 DWConvEnd2EndBenchmark(state, model,
318 xnn_f32_dwconv_ukernel_up8x9__psimd_acc2,
319 8 /* cr */, 9 /* mr */);
320 }
321
Marat Dukhanef4416e2019-10-31 13:44:40 -0700322 BENCHMARK_CAPTURE(f32_dwconv_up4x9__psimd, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
323 BENCHMARK_CAPTURE(f32_dwconv_up4x9__psimd, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800324
325 BENCHMARK_CAPTURE(f32_dwconv_up4x9__psimd_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
326 BENCHMARK_CAPTURE(f32_dwconv_up4x9__psimd_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
327
328 BENCHMARK_CAPTURE(f32_dwconv_up8x9__psimd, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
329 BENCHMARK_CAPTURE(f32_dwconv_up8x9__psimd, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
330
331 BENCHMARK_CAPTURE(f32_dwconv_up8x9__psimd_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
332 BENCHMARK_CAPTURE(f32_dwconv_up8x9__psimd_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
Marat Dukhanef4416e2019-10-31 13:44:40 -0700333#endif // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS
334
335static void f32_dwconv_up1x9__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
336 DWConvEnd2EndBenchmark(state, model,
337 xnn_f32_dwconv_ukernel_up1x9__scalar,
338 1 /* cr */, 9 /* mr */);
339}
340
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800341static void f32_dwconv_up1x9__scalar_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
342 DWConvEnd2EndBenchmark(state, model,
343 xnn_f32_dwconv_ukernel_up1x9__scalar_acc2,
344 1 /* cr */, 9 /* mr */);
345}
346
347static void f32_dwconv_up2x9__scalar(benchmark::State& state, models::ExecutionPlanFactory model) {
348 DWConvEnd2EndBenchmark(state, model,
349 xnn_f32_dwconv_ukernel_up2x9__scalar,
350 2 /* cr */, 9 /* mr */);
351}
352
353static void f32_dwconv_up2x9__scalar_acc2(benchmark::State& state, models::ExecutionPlanFactory model) {
354 DWConvEnd2EndBenchmark(state, model,
355 xnn_f32_dwconv_ukernel_up2x9__scalar_acc2,
356 2 /* cr */, 9 /* mr */);
357}
358
Marat Dukhanef4416e2019-10-31 13:44:40 -0700359BENCHMARK_CAPTURE(f32_dwconv_up1x9__scalar, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
360BENCHMARK_CAPTURE(f32_dwconv_up1x9__scalar, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
361
Marat Dukhan5098c3e2019-11-07 12:01:19 -0800362BENCHMARK_CAPTURE(f32_dwconv_up1x9__scalar_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
363BENCHMARK_CAPTURE(f32_dwconv_up1x9__scalar_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
364
365BENCHMARK_CAPTURE(f32_dwconv_up2x9__scalar, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
366BENCHMARK_CAPTURE(f32_dwconv_up2x9__scalar, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
367
368BENCHMARK_CAPTURE(f32_dwconv_up2x9__scalar_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime();
369BENCHMARK_CAPTURE(f32_dwconv_up2x9__scalar_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime();
370
Marat Dukhanef4416e2019-10-31 13:44:40 -0700371#ifndef XNNPACK_BENCHMARK_NO_MAIN
372BENCHMARK_MAIN();
373#endif