Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 1 | // Copyright 2019 Google LLC |
| 2 | // |
| 3 | // This source code is licensed under the BSD-style license found in the |
| 4 | // LICENSE file in the root directory of this source tree. |
| 5 | |
| 6 | #include <algorithm> |
| 7 | #include <cmath> |
| 8 | #include <functional> |
| 9 | #include <random> |
| 10 | #include <vector> |
| 11 | |
| 12 | #include <xnnpack.h> |
| 13 | |
| 14 | #include <benchmark/benchmark.h> |
| 15 | |
| 16 | #include "bench/utils.h" |
| 17 | #include "models/models.h" |
| 18 | #include <xnnpack/dwconv.h> |
| 19 | #include <xnnpack/params.h> |
| 20 | |
| 21 | |
| 22 | static void DWConvEnd2EndBenchmark( |
| 23 | benchmark::State& state, |
| 24 | models::ExecutionPlanFactory model_factory, |
| 25 | xnn_f32_dwconv_up_ukernel_function dwconv, |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 26 | uint8_t cr, uint8_t mr, |
| 27 | benchmark::utils::IsaCheckFunction isa_check = nullptr) |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 28 | { |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 29 | if (isa_check && !isa_check(state)) { |
| 30 | return; |
| 31 | } |
Marat Dukhan | 04f03be | 2019-11-19 12:36:47 -0800 | [diff] [blame] | 32 | if (xnn_initialize(nullptr /* allocator */) != xnn_status_success) { |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 33 | state.SkipWithError("failed to initialize XNNPACK"); |
| 34 | return; |
| 35 | } |
| 36 | |
| 37 | // Override microkernels chosen in xnn_initialize |
| 38 | for (size_t i = 0; i < XNN_MAX_F32_DWCONV_UKERNELS; i++) { |
| 39 | // Replace only the microkernel the matching kernel size. |
| 40 | if (xnn_params.f32.dwconv[i].mr == mr) { |
| 41 | xnn_params.f32.dwconv[i] = (struct dwconv_parameters) { |
| 42 | .up = (xnn_dwconv_up_ukernel_function) dwconv, |
| 43 | .cr = cr, |
| 44 | .mr = mr, |
| 45 | }; |
| 46 | break; |
| 47 | } |
| 48 | } |
| 49 | |
| 50 | auto execution_plan = model_factory(nullptr); |
| 51 | if (execution_plan.empty()) { |
| 52 | state.SkipWithError("failed to create a model"); |
| 53 | return; |
| 54 | } |
| 55 | |
| 56 | for (auto _ : state) { |
| 57 | for (const std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)>& op : execution_plan) { |
| 58 | xnn_status status = xnn_run_operator(op.get(), nullptr); |
| 59 | if (status != xnn_status_success) { |
| 60 | state.SkipWithError("failed to run a model"); |
| 61 | return; |
| 62 | } |
| 63 | } |
| 64 | } |
| 65 | state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency(); |
| 66 | } |
| 67 | |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 68 | #if XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY |
| 69 | static void f32_dwconv_up4x9__aarch64_neonfma(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 70 | DWConvEnd2EndBenchmark(state, model, |
| 71 | xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma, |
| 72 | 4 /* cr */, 9 /* mr */); |
| 73 | } |
| 74 | |
| 75 | static void f32_dwconv_up4x9__aarch64_neonfma_cortex_a55(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 76 | DWConvEnd2EndBenchmark(state, model, |
| 77 | xnn_f32_dwconv_ukernel_up4x9__aarch64_neonfma_cortex_a55, |
| 78 | 4 /* cr */, 9 /* mr */); |
| 79 | } |
| 80 | |
| 81 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__aarch64_neonfma, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 82 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__aarch64_neonfma, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 83 | |
| 84 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__aarch64_neonfma_cortex_a55, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 85 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__aarch64_neonfma_cortex_a55, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 86 | #endif // XNN_ARCH_ARM64 && XNN_ENABLE_ASSEMBLY |
| 87 | |
| 88 | #if XNN_ARCH_ARM || XNN_ARCH_ARM64 |
| 89 | static void f32_dwconv_up4x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 90 | DWConvEnd2EndBenchmark(state, model, |
| 91 | xnn_f32_dwconv_ukernel_up4x9__neon, |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 92 | 4 /* cr */, 9 /* mr */, benchmark::utils::CheckNEON); |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 93 | } |
| 94 | |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 95 | static void f32_dwconv_up4x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 96 | DWConvEnd2EndBenchmark(state, model, |
| 97 | xnn_f32_dwconv_ukernel_up4x9__neon_acc2, |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 98 | 4 /* cr */, 9 /* mr */, benchmark::utils::CheckNEON); |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 99 | } |
| 100 | |
| 101 | static void f32_dwconv_up8x9__neon(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 102 | DWConvEnd2EndBenchmark(state, model, |
| 103 | xnn_f32_dwconv_ukernel_up8x9__neon, |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 104 | 8 /* cr */, 9 /* mr */, benchmark::utils::CheckNEON); |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 105 | } |
| 106 | |
| 107 | static void f32_dwconv_up8x9__neon_acc2(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 108 | DWConvEnd2EndBenchmark(state, model, |
| 109 | xnn_f32_dwconv_ukernel_up8x9__neon_acc2, |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 110 | 8 /* cr */, 9 /* mr */, benchmark::utils::CheckNEON); |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 111 | } |
| 112 | |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 113 | static void f32_dwconv_up4x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 114 | DWConvEnd2EndBenchmark(state, model, |
| 115 | xnn_f32_dwconv_ukernel_up4x9__neonfma, |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 116 | 4 /* cr */, 9 /* mr */, benchmark::utils::CheckNEONFMA); |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 117 | } |
| 118 | |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 119 | static void f32_dwconv_up4x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 120 | DWConvEnd2EndBenchmark(state, model, |
| 121 | xnn_f32_dwconv_ukernel_up4x9__neonfma_acc2, |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 122 | 4 /* cr */, 9 /* mr */, benchmark::utils::CheckNEONFMA); |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 123 | } |
| 124 | |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 125 | static void f32_dwconv_up8x9__neonfma(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 126 | DWConvEnd2EndBenchmark(state, model, |
| 127 | xnn_f32_dwconv_ukernel_up8x9__neonfma, |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 128 | 8 /* cr */, 9 /* mr */, benchmark::utils::CheckNEONFMA); |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 129 | } |
| 130 | |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 131 | static void f32_dwconv_up8x9__neonfma_acc2(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 132 | DWConvEnd2EndBenchmark(state, model, |
| 133 | xnn_f32_dwconv_ukernel_up8x9__neonfma_acc2, |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 134 | 8 /* cr */, 9 /* mr */, benchmark::utils::CheckNEONFMA); |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 135 | } |
| 136 | |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 137 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__neon, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 138 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__neon, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 139 | |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 140 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__neon_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 141 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__neon_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 142 | |
| 143 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__neon, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 144 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__neon, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 145 | |
| 146 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__neon_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 147 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__neon_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 148 | |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 149 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__neonfma, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 150 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__neonfma, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 151 | |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 152 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__neonfma_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 153 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__neonfma_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 154 | |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 155 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__neonfma, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 156 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__neonfma, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 157 | |
| 158 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__neonfma_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 159 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__neonfma_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 160 | #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 |
| 161 | |
| 162 | |
| 163 | #if XNN_ARCH_X86 || XNN_ARCH_X86_64 |
| 164 | static void f32_dwconv_up4x9__sse(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 165 | DWConvEnd2EndBenchmark(state, model, |
| 166 | xnn_f32_dwconv_ukernel_up4x9__sse, |
| 167 | 4 /* cr */, 9 /* mr */); |
| 168 | } |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 169 | static void f32_dwconv_up4x9__sse_acc2(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 170 | DWConvEnd2EndBenchmark(state, model, |
| 171 | xnn_f32_dwconv_ukernel_up4x9__sse_acc2, |
| 172 | 4 /* cr */, 9 /* mr */); |
| 173 | } |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 174 | static void f32_dwconv_up8x9__sse(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 175 | DWConvEnd2EndBenchmark(state, model, |
| 176 | xnn_f32_dwconv_ukernel_up8x9__sse, |
| 177 | 8 /* cr */, 9 /* mr */); |
| 178 | } |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 179 | static void f32_dwconv_up8x9__sse_acc2(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 180 | DWConvEnd2EndBenchmark(state, model, |
| 181 | xnn_f32_dwconv_ukernel_up8x9__sse_acc2, |
| 182 | 8 /* cr */, 9 /* mr */); |
| 183 | } |
| 184 | |
Marat Dukhan | 17ec5f3 | 2019-11-22 13:34:16 -0800 | [diff] [blame] | 185 | static void f32_dwconv_up8x9__avx(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 186 | DWConvEnd2EndBenchmark(state, model, |
| 187 | xnn_f32_dwconv_ukernel_up8x9__avx, |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 188 | 8 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX); |
Marat Dukhan | 17ec5f3 | 2019-11-22 13:34:16 -0800 | [diff] [blame] | 189 | } |
Marat Dukhan | 17ec5f3 | 2019-11-22 13:34:16 -0800 | [diff] [blame] | 190 | static void f32_dwconv_up8x9__avx_acc2(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 191 | DWConvEnd2EndBenchmark(state, model, |
| 192 | xnn_f32_dwconv_ukernel_up8x9__avx_acc2, |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 193 | 8 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX); |
Marat Dukhan | 17ec5f3 | 2019-11-22 13:34:16 -0800 | [diff] [blame] | 194 | } |
Marat Dukhan | 17ec5f3 | 2019-11-22 13:34:16 -0800 | [diff] [blame] | 195 | static void f32_dwconv_up16x9__avx(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 196 | DWConvEnd2EndBenchmark(state, model, |
| 197 | xnn_f32_dwconv_ukernel_up16x9__avx, |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 198 | 16 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX); |
Marat Dukhan | 17ec5f3 | 2019-11-22 13:34:16 -0800 | [diff] [blame] | 199 | } |
Marat Dukhan | 17ec5f3 | 2019-11-22 13:34:16 -0800 | [diff] [blame] | 200 | static void f32_dwconv_up16x9__avx_acc2(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 201 | DWConvEnd2EndBenchmark(state, model, |
| 202 | xnn_f32_dwconv_ukernel_up16x9__avx_acc2, |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 203 | 16 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX); |
Marat Dukhan | 17ec5f3 | 2019-11-22 13:34:16 -0800 | [diff] [blame] | 204 | } |
| 205 | |
| 206 | static void f32_dwconv_up8x9__fma3(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 207 | DWConvEnd2EndBenchmark(state, model, |
| 208 | xnn_f32_dwconv_ukernel_up8x9__fma3, |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 209 | 8 /* cr */, 9 /* mr */, benchmark::utils::CheckFMA3); |
Marat Dukhan | 17ec5f3 | 2019-11-22 13:34:16 -0800 | [diff] [blame] | 210 | } |
Marat Dukhan | 17ec5f3 | 2019-11-22 13:34:16 -0800 | [diff] [blame] | 211 | static void f32_dwconv_up8x9__fma3_acc2(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 212 | DWConvEnd2EndBenchmark(state, model, |
| 213 | xnn_f32_dwconv_ukernel_up8x9__fma3_acc2, |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 214 | 8 /* cr */, 9 /* mr */, benchmark::utils::CheckFMA3); |
Marat Dukhan | 17ec5f3 | 2019-11-22 13:34:16 -0800 | [diff] [blame] | 215 | } |
Marat Dukhan | 17ec5f3 | 2019-11-22 13:34:16 -0800 | [diff] [blame] | 216 | static void f32_dwconv_up16x9__fma3(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 217 | DWConvEnd2EndBenchmark(state, model, |
| 218 | xnn_f32_dwconv_ukernel_up16x9__fma3, |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 219 | 16 /* cr */, 9 /* mr */, benchmark::utils::CheckFMA3); |
Marat Dukhan | 17ec5f3 | 2019-11-22 13:34:16 -0800 | [diff] [blame] | 220 | } |
Marat Dukhan | 17ec5f3 | 2019-11-22 13:34:16 -0800 | [diff] [blame] | 221 | static void f32_dwconv_up16x9__fma3_acc2(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 222 | DWConvEnd2EndBenchmark(state, model, |
| 223 | xnn_f32_dwconv_ukernel_up16x9__fma3_acc2, |
Marat Dukhan | c8466f5 | 2019-11-25 18:01:10 -0800 | [diff] [blame] | 224 | 16 /* cr */, 9 /* mr */, benchmark::utils::CheckFMA3); |
Marat Dukhan | 17ec5f3 | 2019-11-22 13:34:16 -0800 | [diff] [blame] | 225 | } |
| 226 | |
Marat Dukhan | 479f87e | 2019-11-27 15:17:06 -0800 | [diff] [blame] | 227 | static void f32_dwconv_up16x9__avx512f(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 228 | DWConvEnd2EndBenchmark(state, model, |
| 229 | xnn_f32_dwconv_ukernel_up16x9__avx512f, |
| 230 | 16 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX512F); |
| 231 | } |
| 232 | static void f32_dwconv_up16x9__avx512f_acc2(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 233 | DWConvEnd2EndBenchmark(state, model, |
| 234 | xnn_f32_dwconv_ukernel_up16x9__avx512f_acc2, |
| 235 | 16 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX512F); |
| 236 | } |
| 237 | static void f32_dwconv_up32x9__avx512f(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 238 | DWConvEnd2EndBenchmark(state, model, |
| 239 | xnn_f32_dwconv_ukernel_up32x9__avx512f, |
| 240 | 32 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX512F); |
| 241 | } |
| 242 | static void f32_dwconv_up32x9__avx512f_acc2(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 243 | DWConvEnd2EndBenchmark(state, model, |
| 244 | xnn_f32_dwconv_ukernel_up32x9__avx512f_acc2, |
| 245 | 32 /* cr */, 9 /* mr */, benchmark::utils::CheckAVX512F); |
| 246 | } |
| 247 | |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 248 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__sse, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 249 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__sse, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 250 | |
| 251 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__sse_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 252 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__sse_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 253 | |
| 254 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__sse, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 255 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__sse, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 256 | |
| 257 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__sse_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 258 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__sse_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
Marat Dukhan | 17ec5f3 | 2019-11-22 13:34:16 -0800 | [diff] [blame] | 259 | |
| 260 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__avx, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 261 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__avx, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 262 | |
| 263 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__avx_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 264 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__avx_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 265 | |
| 266 | BENCHMARK_CAPTURE(f32_dwconv_up16x9__avx, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 267 | BENCHMARK_CAPTURE(f32_dwconv_up16x9__avx, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 268 | |
| 269 | BENCHMARK_CAPTURE(f32_dwconv_up16x9__avx_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 270 | BENCHMARK_CAPTURE(f32_dwconv_up16x9__avx_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 271 | |
| 272 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__fma3, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 273 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__fma3, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 274 | |
| 275 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__fma3_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 276 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__fma3_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 277 | |
| 278 | BENCHMARK_CAPTURE(f32_dwconv_up16x9__fma3, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 279 | BENCHMARK_CAPTURE(f32_dwconv_up16x9__fma3, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 280 | |
| 281 | BENCHMARK_CAPTURE(f32_dwconv_up16x9__fma3_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 282 | BENCHMARK_CAPTURE(f32_dwconv_up16x9__fma3_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
Marat Dukhan | 479f87e | 2019-11-27 15:17:06 -0800 | [diff] [blame] | 283 | |
| 284 | BENCHMARK_CAPTURE(f32_dwconv_up16x9__avx512f, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 285 | BENCHMARK_CAPTURE(f32_dwconv_up16x9__avx512f, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 286 | |
| 287 | BENCHMARK_CAPTURE(f32_dwconv_up16x9__avx512f_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 288 | BENCHMARK_CAPTURE(f32_dwconv_up16x9__avx512f_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 289 | |
| 290 | BENCHMARK_CAPTURE(f32_dwconv_up32x9__avx512f, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 291 | BENCHMARK_CAPTURE(f32_dwconv_up32x9__avx512f, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 292 | |
| 293 | BENCHMARK_CAPTURE(f32_dwconv_up32x9__avx512f_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 294 | BENCHMARK_CAPTURE(f32_dwconv_up32x9__avx512f_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 295 | #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 |
| 296 | |
| 297 | #if !XNN_ARCH_WASM && !XNN_ARCH_ASMJS |
| 298 | static void f32_dwconv_up4x9__psimd(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 299 | DWConvEnd2EndBenchmark(state, model, |
| 300 | xnn_f32_dwconv_ukernel_up4x9__psimd, |
| 301 | 4 /* cr */, 9 /* mr */); |
| 302 | } |
| 303 | |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 304 | static void f32_dwconv_up4x9__psimd_acc2(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 305 | DWConvEnd2EndBenchmark(state, model, |
| 306 | xnn_f32_dwconv_ukernel_up4x9__psimd_acc2, |
| 307 | 4 /* cr */, 9 /* mr */); |
| 308 | } |
| 309 | |
| 310 | static void f32_dwconv_up8x9__psimd(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 311 | DWConvEnd2EndBenchmark(state, model, |
| 312 | xnn_f32_dwconv_ukernel_up8x9__psimd, |
| 313 | 8 /* cr */, 9 /* mr */); |
| 314 | } |
| 315 | |
| 316 | static void f32_dwconv_up8x9__psimd_acc2(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 317 | DWConvEnd2EndBenchmark(state, model, |
| 318 | xnn_f32_dwconv_ukernel_up8x9__psimd_acc2, |
| 319 | 8 /* cr */, 9 /* mr */); |
| 320 | } |
| 321 | |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 322 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__psimd, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 323 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__psimd, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 324 | |
| 325 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__psimd_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 326 | BENCHMARK_CAPTURE(f32_dwconv_up4x9__psimd_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 327 | |
| 328 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__psimd, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 329 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__psimd, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 330 | |
| 331 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__psimd_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 332 | BENCHMARK_CAPTURE(f32_dwconv_up8x9__psimd_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 333 | #endif // !XNN_ARCH_WASM && !XNN_ARCH_ASMJS |
| 334 | |
| 335 | static void f32_dwconv_up1x9__scalar(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 336 | DWConvEnd2EndBenchmark(state, model, |
| 337 | xnn_f32_dwconv_ukernel_up1x9__scalar, |
| 338 | 1 /* cr */, 9 /* mr */); |
| 339 | } |
| 340 | |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 341 | static void f32_dwconv_up1x9__scalar_acc2(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 342 | DWConvEnd2EndBenchmark(state, model, |
| 343 | xnn_f32_dwconv_ukernel_up1x9__scalar_acc2, |
| 344 | 1 /* cr */, 9 /* mr */); |
| 345 | } |
| 346 | |
| 347 | static void f32_dwconv_up2x9__scalar(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 348 | DWConvEnd2EndBenchmark(state, model, |
| 349 | xnn_f32_dwconv_ukernel_up2x9__scalar, |
| 350 | 2 /* cr */, 9 /* mr */); |
| 351 | } |
| 352 | |
| 353 | static void f32_dwconv_up2x9__scalar_acc2(benchmark::State& state, models::ExecutionPlanFactory model) { |
| 354 | DWConvEnd2EndBenchmark(state, model, |
| 355 | xnn_f32_dwconv_ukernel_up2x9__scalar_acc2, |
| 356 | 2 /* cr */, 9 /* mr */); |
| 357 | } |
| 358 | |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 359 | BENCHMARK_CAPTURE(f32_dwconv_up1x9__scalar, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 360 | BENCHMARK_CAPTURE(f32_dwconv_up1x9__scalar, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 361 | |
Marat Dukhan | 5098c3e | 2019-11-07 12:01:19 -0800 | [diff] [blame] | 362 | BENCHMARK_CAPTURE(f32_dwconv_up1x9__scalar_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 363 | BENCHMARK_CAPTURE(f32_dwconv_up1x9__scalar_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 364 | |
| 365 | BENCHMARK_CAPTURE(f32_dwconv_up2x9__scalar, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 366 | BENCHMARK_CAPTURE(f32_dwconv_up2x9__scalar, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 367 | |
| 368 | BENCHMARK_CAPTURE(f32_dwconv_up2x9__scalar_acc2, mobilenet_v1, models::MobileNetV1)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 369 | BENCHMARK_CAPTURE(f32_dwconv_up2x9__scalar_acc2, mobilenet_v2, models::MobileNetV2)->Unit(benchmark::kMicrosecond)->UseRealTime(); |
| 370 | |
Marat Dukhan | ef4416e | 2019-10-31 13:44:40 -0700 | [diff] [blame] | 371 | #ifndef XNNPACK_BENCHMARK_NO_MAIN |
| 372 | BENCHMARK_MAIN(); |
| 373 | #endif |