blob: 7c997bd23f3be6b0cf78b767131dc55739d885d6 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
Marat Dukhan401d97b2020-12-02 12:32:09 -08004// Copyright 2020 Google LLC
5//
XNNPACK Teamb455b122019-09-27 18:10:33 -07006// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <algorithm>
Marat Dukhan401d97b2020-12-02 12:32:09 -080010#include <array>
XNNPACK Teamb455b122019-09-27 18:10:33 -070011#include <cmath>
12#include <functional>
Marat Dukhan5ce30d92020-04-14 03:31:26 -070013#include <limits>
XNNPACK Teamb455b122019-09-27 18:10:33 -070014#include <random>
15#include <vector>
16
17#include <xnnpack.h>
18
19#include <benchmark/benchmark.h>
Frank Barchardbb4c18b2019-09-30 11:05:52 -070020#include "bench/utils.h"
Marat Dukhanc3b9e862019-11-17 13:18:54 -080021#ifdef BENCHMARK_TENSORFLOW_LITE
22#include "flatbuffers/include/flatbuffers/flatbuffers.h"
23#include "tensorflow/lite/interpreter.h"
24#include "tensorflow/lite/kernels/register.h"
25#include "tensorflow/lite/model.h"
26#include "tensorflow/lite/schema/schema_generated.h"
27#include "tensorflow/lite/version.h"
28#endif // BENCHMARK_TENSORFLOW_LITE
XNNPACK Teamb455b122019-09-27 18:10:33 -070029
Marat Dukhan401d97b2020-12-02 12:32:09 -080030
Chao Meic6640272020-07-23 09:35:11 -070031#ifndef XNN_NO_QU8_OPERATORS
Marat Dukhan08b7a972020-07-14 18:17:29 -070032static void xnnpack_sigmoid_qu8(benchmark::State& state) {
Marat Dukhanc3b9e862019-11-17 13:18:54 -080033 const size_t batch_size = state.range(0);
34 const size_t channels = state.range(1);
XNNPACK Teamb455b122019-09-27 18:10:33 -070035
36 std::random_device random_device;
37 auto rng = std::mt19937(random_device());
Marat Dukhan401d97b2020-12-02 12:32:09 -080038 auto u8rng = std::bind(
39 std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
XNNPACK Teamb455b122019-09-27 18:10:33 -070040
41 std::vector<uint8_t> input(batch_size * channels);
42 std::vector<uint8_t> output(batch_size * channels);
43 std::generate(input.begin(), input.end(), std::ref(u8rng));
44 std::fill(output.begin(), output.end(), 0xA5);
45
Marat Dukhan04f03be2019-11-19 12:36:47 -080046 xnn_status status = xnn_initialize(nullptr /* allocator */);
XNNPACK Teamb455b122019-09-27 18:10:33 -070047 if (status != xnn_status_success) {
48 state.SkipWithError("failed to initialize XNNPACK");
49 return;
50 }
51
52 xnn_operator_t sigmoid_op = nullptr;
Marat Dukhan08b7a972020-07-14 18:17:29 -070053 status = xnn_create_sigmoid_nc_qu8(
XNNPACK Teamb455b122019-09-27 18:10:33 -070054 channels, channels /* input stride */, channels /* output stride */,
55 127 /* input zero point */, 1.0f /* input scale */,
56 0 /* output zero point */, 1.0f / 256.0f /* output scale */,
57 0 /* output min */, 255 /* output max */,
58 0 /* flags */, &sigmoid_op);
59 if (status != xnn_status_success || sigmoid_op == nullptr) {
60 state.SkipWithError("failed to create Sigmoid operator");
61 return;
62 }
63
Marat Dukhan08b7a972020-07-14 18:17:29 -070064 status = xnn_setup_sigmoid_nc_qu8(
XNNPACK Teamb455b122019-09-27 18:10:33 -070065 sigmoid_op,
66 batch_size,
67 input.data(), output.data(),
68 nullptr /* thread pool */);
69 if (status != xnn_status_success) {
70 state.SkipWithError("failed to setup Sigmoid operator");
71 return;
72 }
73
74 for (auto _ : state) {
75 status = xnn_run_operator(sigmoid_op, nullptr /* thread pool */);
76 if (status != xnn_status_success) {
77 state.SkipWithError("failed to run Sigmoid operator");
78 return;
79 }
80 }
81
82 status = xnn_delete_operator(sigmoid_op);
83 if (status != xnn_status_success) {
84 state.SkipWithError("failed to delete Sigmoid operator");
85 return;
86 }
87
Marat Dukhan401d97b2020-12-02 12:32:09 -080088 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
89 if (cpu_frequency != 0) {
90 state.counters["cpufreq"] = cpu_frequency;
91 }
Frank Barchardbb4c18b2019-09-30 11:05:52 -070092
XNNPACK Teamb455b122019-09-27 18:10:33 -070093 const size_t elements_per_iteration = batch_size * channels;
94 state.counters["elements"] =
95 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
96
97 const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(uint8_t);
98 state.counters["bytes"] =
99 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
100}
Chao Meic6640272020-07-23 09:35:11 -0700101#endif // XNN_NO_QU8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -0700102
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800103static void xnnpack_sigmoid_f32(benchmark::State& state) {
104 const size_t batch_size = state.range(0);
105 const size_t channels = state.range(1);
Marat Dukhan346a9e52019-11-15 09:06:30 -0800106
107 std::random_device random_device;
108 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -0700109 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
Marat Dukhan346a9e52019-11-15 09:06:30 -0800110
111 std::vector<float> input(batch_size * channels);
112 std::vector<float> output(batch_size * channels);
113 std::generate(input.begin(), input.end(), std::ref(f32rng));
114 std::fill(output.begin(), output.end(), std::nanf(""));
115
Marat Dukhan04f03be2019-11-19 12:36:47 -0800116 xnn_status status = xnn_initialize(nullptr /* allocator */);
Marat Dukhan346a9e52019-11-15 09:06:30 -0800117 if (status != xnn_status_success) {
118 state.SkipWithError("failed to initialize XNNPACK");
119 return;
120 }
121
122 xnn_operator_t sigmoid_op = nullptr;
123 status = xnn_create_sigmoid_nc_f32(
124 channels, channels /* input stride */, channels /* output stride */,
125 0 /* flags */, &sigmoid_op);
126 if (status != xnn_status_success || sigmoid_op == nullptr) {
127 state.SkipWithError("failed to create Sigmoid operator");
128 return;
129 }
130
131 status = xnn_setup_sigmoid_nc_f32(
132 sigmoid_op,
133 batch_size,
134 input.data(), output.data(),
135 nullptr /* thread pool */);
136 if (status != xnn_status_success) {
137 state.SkipWithError("failed to setup Sigmoid operator");
138 return;
139 }
140
141 for (auto _ : state) {
142 status = xnn_run_operator(sigmoid_op, nullptr /* thread pool */);
143 if (status != xnn_status_success) {
144 state.SkipWithError("failed to run Sigmoid operator");
145 return;
146 }
147 }
148
149 status = xnn_delete_operator(sigmoid_op);
150 if (status != xnn_status_success) {
151 state.SkipWithError("failed to delete Sigmoid operator");
152 return;
153 }
154
Marat Dukhan401d97b2020-12-02 12:32:09 -0800155 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
156 if (cpu_frequency != 0) {
157 state.counters["cpufreq"] = cpu_frequency;
158 }
Marat Dukhan346a9e52019-11-15 09:06:30 -0800159
160 const size_t elements_per_iteration = batch_size * channels;
161 state.counters["elements"] =
162 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
163
164 const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(float);
165 state.counters["bytes"] =
166 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
167}
168
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800169#ifdef BENCHMARK_TENSORFLOW_LITE
170static void tflite_sigmoid_f32(benchmark::State& state) {
171 const size_t batch_size = state.range(0);
172 const size_t channels = state.range(1);
173
174 std::random_device random_device;
175 auto rng = std::mt19937(random_device());
Marat Dukhan44f0ca72020-08-02 21:46:58 -0700176 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), std::ref(rng));
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800177
178 flatbuffers::FlatBufferBuilder builder;
Marat Dukhan401d97b2020-12-02 12:32:09 -0800179 const flatbuffers::Offset<tflite::OperatorCode> operator_code =
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800180 CreateOperatorCode(builder, tflite::BuiltinOperator_LOGISTIC);
181
Marat Dukhan401d97b2020-12-02 12:32:09 -0800182 const std::array<flatbuffers::Offset<tflite::Buffer>, 1> buffers{{
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800183 tflite::CreateBuffer(builder, builder.CreateVector({})),
Marat Dukhan401d97b2020-12-02 12:32:09 -0800184 }};
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800185
Marat Dukhan401d97b2020-12-02 12:32:09 -0800186 const std::array<int32_t, 4> input_shape{{
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800187 static_cast<int32_t>(batch_size),
188 static_cast<int32_t>(1 /* height */),
189 static_cast<int32_t>(1 /* width */),
190 static_cast<int32_t>(channels)
Marat Dukhan401d97b2020-12-02 12:32:09 -0800191 }};
192 const std::array<int32_t, 4> output_shape{{
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800193 static_cast<int32_t>(batch_size),
194 static_cast<int32_t>(1 /* height */),
195 static_cast<int32_t>(1 /* width */),
196 static_cast<int32_t>(channels)
Marat Dukhan401d97b2020-12-02 12:32:09 -0800197 }};
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800198
Marat Dukhan401d97b2020-12-02 12:32:09 -0800199 const std::array<flatbuffers::Offset<tflite::Tensor>, 2> tensors{{
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800200 tflite::CreateTensor(builder,
Marat Dukhan401d97b2020-12-02 12:32:09 -0800201 builder.CreateVector<int32_t>(input_shape.data(), input_shape.size()),
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800202 tflite::TensorType_FLOAT32),
203 tflite::CreateTensor(builder,
Marat Dukhan401d97b2020-12-02 12:32:09 -0800204 builder.CreateVector<int32_t>(output_shape.data(), output_shape.size()),
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800205 tflite::TensorType_FLOAT32),
Marat Dukhan401d97b2020-12-02 12:32:09 -0800206 }};
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800207
Marat Dukhan401d97b2020-12-02 12:32:09 -0800208 const std::array<int32_t, 1> op_inputs{{ 0 }};
209 const std::array<int32_t, 1> op_outputs{{ 1 }};
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800210 flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
211 builder,
212 0 /* opcode_index */,
Marat Dukhan401d97b2020-12-02 12:32:09 -0800213 builder.CreateVector<int32_t>(op_inputs.data(), op_inputs.size()),
214 builder.CreateVector<int32_t>(op_outputs.data(), op_outputs.size()));
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800215
Marat Dukhan401d97b2020-12-02 12:32:09 -0800216 const std::array<int32_t, 1> graph_inputs{{ 0 }};
217 const std::array<int32_t, 1> graph_outputs{{ 1 }};
218 const flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800219 builder,
Marat Dukhan401d97b2020-12-02 12:32:09 -0800220 builder.CreateVector(tensors.data(), tensors.size()),
221 builder.CreateVector<int32_t>(graph_inputs.data(), graph_inputs.size()),
222 builder.CreateVector<int32_t>(graph_outputs.data(), graph_outputs.size()),
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800223 builder.CreateVector(&op, 1));
224
Marat Dukhan401d97b2020-12-02 12:32:09 -0800225 const flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800226 TFLITE_SCHEMA_VERSION,
227 builder.CreateVector(&operator_code, 1),
228 builder.CreateVector(&subgraph, 1),
Marat Dukhan401d97b2020-12-02 12:32:09 -0800229 builder.CreateString("Sigmoid model"),
230 builder.CreateVector(buffers.data(), buffers.size()));
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800231
232 builder.Finish(model_buffer);
233
234 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
235 tflite::ops::builtin::BuiltinOpResolver resolver;
236 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
237 std::unique_ptr<tflite::Interpreter> interpreter;
238 if (interpreterBuilder(&interpreter) != kTfLiteOk) {
239 state.SkipWithError("failed to create TFLite interpreter");
240 return;
241 }
242 if (interpreter == nullptr) {
243 state.SkipWithError("TFLite interpreter is null");
244 return;
245 }
246 interpreter->SetNumThreads(1);
247
248 if (interpreter->AllocateTensors() != kTfLiteOk) {
249 state.SkipWithError("failed to allocate tensors");
250 return;
251 }
252
253 std::generate(
254 interpreter->typed_tensor<float>(0),
255 interpreter->typed_tensor<float>(0) + batch_size * channels,
256 std::ref(f32rng));
257
258 for (auto _ : state) {
259 if (interpreter->Invoke() != kTfLiteOk) {
260 state.SkipWithError("failed to invoke TFLite interpreter");
261 return;
262 }
263 }
264
Marat Dukhan401d97b2020-12-02 12:32:09 -0800265 const uint64_t cpu_frequency = benchmark::utils::GetCurrentCpuFrequency();
266 if (cpu_frequency != 0) {
267 state.counters["cpufreq"] = cpu_frequency;
268 }
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800269
270 const size_t elements_per_iteration = batch_size * channels;
271 state.counters["elements"] =
272 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
273
274 const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(float);
275 state.counters["bytes"] =
276 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
277
278 interpreter.reset();
279}
280#endif // BENCHMARK_TENSORFLOW_LITE
281
XNNPACK Teamb455b122019-09-27 18:10:33 -0700282static void CharacteristicArguments(benchmark::internal::Benchmark* b)
283{
284 b->ArgNames({"N", "C"});
285
286 int32_t c = 16;
287 for (int32_t n = 224; n >= 7; n /= 2) {
288 b->Args({n * n, c});
289 c *= 2;
290 }
291}
292
Chao Meic6640272020-07-23 09:35:11 -0700293#ifndef XNN_NO_QU8_OPERATORS
Marat Dukhan08b7a972020-07-14 18:17:29 -0700294BENCHMARK(xnnpack_sigmoid_qu8)->Apply(CharacteristicArguments)->UseRealTime();
Chao Meic6640272020-07-23 09:35:11 -0700295#endif // XNN_NO_QU8_OPERATORS
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800296BENCHMARK(xnnpack_sigmoid_f32)->Apply(CharacteristicArguments)->UseRealTime();
297
298#ifdef BENCHMARK_TENSORFLOW_LITE
299 BENCHMARK(tflite_sigmoid_f32)->Apply(CharacteristicArguments)->UseRealTime();
300#endif // BENCHMARK_TENSORFLOW_LITE
XNNPACK Teamb455b122019-09-27 18:10:33 -0700301
302#ifndef XNNPACK_BENCHMARK_NO_MAIN
303BENCHMARK_MAIN();
304#endif