blob: 264cd49454703d2315992b59dfd53f319de3d518 [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// This source code is licensed under the BSD-style license found in the
5// LICENSE file in the root directory of this source tree.
6
7#include <algorithm>
8#include <cmath>
9#include <functional>
Marat Dukhan5ce30d92020-04-14 03:31:26 -070010#include <limits>
XNNPACK Teamb455b122019-09-27 18:10:33 -070011#include <random>
12#include <vector>
13
14#include <xnnpack.h>
15
16#include <benchmark/benchmark.h>
Frank Barchardbb4c18b2019-09-30 11:05:52 -070017#include "bench/utils.h"
Marat Dukhanc3b9e862019-11-17 13:18:54 -080018#ifdef BENCHMARK_TENSORFLOW_LITE
19#include "flatbuffers/include/flatbuffers/flatbuffers.h"
20#include "tensorflow/lite/interpreter.h"
21#include "tensorflow/lite/kernels/register.h"
22#include "tensorflow/lite/model.h"
23#include "tensorflow/lite/schema/schema_generated.h"
24#include "tensorflow/lite/version.h"
25#endif // BENCHMARK_TENSORFLOW_LITE
XNNPACK Teamb455b122019-09-27 18:10:33 -070026
Chao Meic6640272020-07-23 09:35:11 -070027#ifndef XNN_NO_QU8_OPERATORS
Marat Dukhan08b7a972020-07-14 18:17:29 -070028static void xnnpack_sigmoid_qu8(benchmark::State& state) {
Marat Dukhanc3b9e862019-11-17 13:18:54 -080029 const size_t batch_size = state.range(0);
30 const size_t channels = state.range(1);
XNNPACK Teamb455b122019-09-27 18:10:33 -070031
32 std::random_device random_device;
33 auto rng = std::mt19937(random_device());
Marat Dukhan5ce30d92020-04-14 03:31:26 -070034 auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
XNNPACK Teamb455b122019-09-27 18:10:33 -070035
36 std::vector<uint8_t> input(batch_size * channels);
37 std::vector<uint8_t> output(batch_size * channels);
38 std::generate(input.begin(), input.end(), std::ref(u8rng));
39 std::fill(output.begin(), output.end(), 0xA5);
40
Marat Dukhan04f03be2019-11-19 12:36:47 -080041 xnn_status status = xnn_initialize(nullptr /* allocator */);
XNNPACK Teamb455b122019-09-27 18:10:33 -070042 if (status != xnn_status_success) {
43 state.SkipWithError("failed to initialize XNNPACK");
44 return;
45 }
46
47 xnn_operator_t sigmoid_op = nullptr;
Marat Dukhan08b7a972020-07-14 18:17:29 -070048 status = xnn_create_sigmoid_nc_qu8(
XNNPACK Teamb455b122019-09-27 18:10:33 -070049 channels, channels /* input stride */, channels /* output stride */,
50 127 /* input zero point */, 1.0f /* input scale */,
51 0 /* output zero point */, 1.0f / 256.0f /* output scale */,
52 0 /* output min */, 255 /* output max */,
53 0 /* flags */, &sigmoid_op);
54 if (status != xnn_status_success || sigmoid_op == nullptr) {
55 state.SkipWithError("failed to create Sigmoid operator");
56 return;
57 }
58
Marat Dukhan08b7a972020-07-14 18:17:29 -070059 status = xnn_setup_sigmoid_nc_qu8(
XNNPACK Teamb455b122019-09-27 18:10:33 -070060 sigmoid_op,
61 batch_size,
62 input.data(), output.data(),
63 nullptr /* thread pool */);
64 if (status != xnn_status_success) {
65 state.SkipWithError("failed to setup Sigmoid operator");
66 return;
67 }
68
69 for (auto _ : state) {
70 status = xnn_run_operator(sigmoid_op, nullptr /* thread pool */);
71 if (status != xnn_status_success) {
72 state.SkipWithError("failed to run Sigmoid operator");
73 return;
74 }
75 }
76
77 status = xnn_delete_operator(sigmoid_op);
78 if (status != xnn_status_success) {
79 state.SkipWithError("failed to delete Sigmoid operator");
80 return;
81 }
82
Frank Barchardbb4c18b2019-09-30 11:05:52 -070083 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
84
XNNPACK Teamb455b122019-09-27 18:10:33 -070085 const size_t elements_per_iteration = batch_size * channels;
86 state.counters["elements"] =
87 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
88
89 const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(uint8_t);
90 state.counters["bytes"] =
91 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
92}
Chao Meic6640272020-07-23 09:35:11 -070093#endif // XNN_NO_QU8_OPERATORS
XNNPACK Teamb455b122019-09-27 18:10:33 -070094
Marat Dukhanc3b9e862019-11-17 13:18:54 -080095static void xnnpack_sigmoid_f32(benchmark::State& state) {
96 const size_t batch_size = state.range(0);
97 const size_t channels = state.range(1);
Marat Dukhan346a9e52019-11-15 09:06:30 -080098
99 std::random_device random_device;
100 auto rng = std::mt19937(random_device());
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800101 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), rng);
Marat Dukhan346a9e52019-11-15 09:06:30 -0800102
103 std::vector<float> input(batch_size * channels);
104 std::vector<float> output(batch_size * channels);
105 std::generate(input.begin(), input.end(), std::ref(f32rng));
106 std::fill(output.begin(), output.end(), std::nanf(""));
107
Marat Dukhan04f03be2019-11-19 12:36:47 -0800108 xnn_status status = xnn_initialize(nullptr /* allocator */);
Marat Dukhan346a9e52019-11-15 09:06:30 -0800109 if (status != xnn_status_success) {
110 state.SkipWithError("failed to initialize XNNPACK");
111 return;
112 }
113
114 xnn_operator_t sigmoid_op = nullptr;
115 status = xnn_create_sigmoid_nc_f32(
116 channels, channels /* input stride */, channels /* output stride */,
117 0 /* flags */, &sigmoid_op);
118 if (status != xnn_status_success || sigmoid_op == nullptr) {
119 state.SkipWithError("failed to create Sigmoid operator");
120 return;
121 }
122
123 status = xnn_setup_sigmoid_nc_f32(
124 sigmoid_op,
125 batch_size,
126 input.data(), output.data(),
127 nullptr /* thread pool */);
128 if (status != xnn_status_success) {
129 state.SkipWithError("failed to setup Sigmoid operator");
130 return;
131 }
132
133 for (auto _ : state) {
134 status = xnn_run_operator(sigmoid_op, nullptr /* thread pool */);
135 if (status != xnn_status_success) {
136 state.SkipWithError("failed to run Sigmoid operator");
137 return;
138 }
139 }
140
141 status = xnn_delete_operator(sigmoid_op);
142 if (status != xnn_status_success) {
143 state.SkipWithError("failed to delete Sigmoid operator");
144 return;
145 }
146
147 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
148
149 const size_t elements_per_iteration = batch_size * channels;
150 state.counters["elements"] =
151 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
152
153 const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(float);
154 state.counters["bytes"] =
155 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
156}
157
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800158#ifdef BENCHMARK_TENSORFLOW_LITE
159static void tflite_sigmoid_f32(benchmark::State& state) {
160 const size_t batch_size = state.range(0);
161 const size_t channels = state.range(1);
162
163 std::random_device random_device;
164 auto rng = std::mt19937(random_device());
165 auto f32rng = std::bind(std::uniform_real_distribution<float>(-10.0f, 10.0f), rng);
166
167 flatbuffers::FlatBufferBuilder builder;
168 flatbuffers::Offset<tflite::OperatorCode> operator_code =
169 CreateOperatorCode(builder, tflite::BuiltinOperator_LOGISTIC);
170
171 flatbuffers::Offset<tflite::Buffer> buffers[1] = {
172 tflite::CreateBuffer(builder, builder.CreateVector({})),
173 };
174
175 const int32_t input_shape[4] = {
176 static_cast<int32_t>(batch_size),
177 static_cast<int32_t>(1 /* height */),
178 static_cast<int32_t>(1 /* width */),
179 static_cast<int32_t>(channels)
180 };
181 const int32_t output_shape[4] = {
182 static_cast<int32_t>(batch_size),
183 static_cast<int32_t>(1 /* height */),
184 static_cast<int32_t>(1 /* width */),
185 static_cast<int32_t>(channels)
186 };
187
188 flatbuffers::Offset<tflite::Tensor> tensors[2] = {
189 tflite::CreateTensor(builder,
190 builder.CreateVector<int32_t>(input_shape, 4),
191 tflite::TensorType_FLOAT32),
192 tflite::CreateTensor(builder,
193 builder.CreateVector<int32_t>(output_shape, 4),
194 tflite::TensorType_FLOAT32),
195 };
196
197 const int32_t op_inputs[1] = { 0 };
198 const int32_t op_outputs[1] = { 1 };
199 flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
200 builder,
201 0 /* opcode_index */,
202 builder.CreateVector<int32_t>(op_inputs, 1),
203 builder.CreateVector<int32_t>(op_outputs, 1));
204
205 const int32_t graph_inputs[1] = { 0 };
206 const int32_t graph_outputs[1] = { 1 };
207 flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
208 builder,
209 builder.CreateVector(tensors, 2),
210 builder.CreateVector<int32_t>(graph_inputs, 1),
211 builder.CreateVector<int32_t>(graph_outputs, 1),
212 builder.CreateVector(&op, 1));
213
214 flatbuffers::Offset<flatbuffers::String> description = builder.CreateString("Sigmoid model");
215
216 flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
217 TFLITE_SCHEMA_VERSION,
218 builder.CreateVector(&operator_code, 1),
219 builder.CreateVector(&subgraph, 1),
220 description,
221 builder.CreateVector(buffers, 1));
222
223 builder.Finish(model_buffer);
224
225 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
226 tflite::ops::builtin::BuiltinOpResolver resolver;
227 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
228 std::unique_ptr<tflite::Interpreter> interpreter;
229 if (interpreterBuilder(&interpreter) != kTfLiteOk) {
230 state.SkipWithError("failed to create TFLite interpreter");
231 return;
232 }
233 if (interpreter == nullptr) {
234 state.SkipWithError("TFLite interpreter is null");
235 return;
236 }
237 interpreter->SetNumThreads(1);
238
239 if (interpreter->AllocateTensors() != kTfLiteOk) {
240 state.SkipWithError("failed to allocate tensors");
241 return;
242 }
243
244 std::generate(
245 interpreter->typed_tensor<float>(0),
246 interpreter->typed_tensor<float>(0) + batch_size * channels,
247 std::ref(f32rng));
248
249 for (auto _ : state) {
250 if (interpreter->Invoke() != kTfLiteOk) {
251 state.SkipWithError("failed to invoke TFLite interpreter");
252 return;
253 }
254 }
255
256 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
257
258 const size_t elements_per_iteration = batch_size * channels;
259 state.counters["elements"] =
260 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
261
262 const size_t bytes_per_iteration = 2 * elements_per_iteration * sizeof(float);
263 state.counters["bytes"] =
264 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
265
266 interpreter.reset();
267}
268#endif // BENCHMARK_TENSORFLOW_LITE
269
XNNPACK Teamb455b122019-09-27 18:10:33 -0700270static void CharacteristicArguments(benchmark::internal::Benchmark* b)
271{
272 b->ArgNames({"N", "C"});
273
274 int32_t c = 16;
275 for (int32_t n = 224; n >= 7; n /= 2) {
276 b->Args({n * n, c});
277 c *= 2;
278 }
279}
280
Chao Meic6640272020-07-23 09:35:11 -0700281#ifndef XNN_NO_QU8_OPERATORS
Marat Dukhan08b7a972020-07-14 18:17:29 -0700282BENCHMARK(xnnpack_sigmoid_qu8)->Apply(CharacteristicArguments)->UseRealTime();
Chao Meic6640272020-07-23 09:35:11 -0700283#endif // XNN_NO_QU8_OPERATORS
Marat Dukhanc3b9e862019-11-17 13:18:54 -0800284BENCHMARK(xnnpack_sigmoid_f32)->Apply(CharacteristicArguments)->UseRealTime();
285
286#ifdef BENCHMARK_TENSORFLOW_LITE
287 BENCHMARK(tflite_sigmoid_f32)->Apply(CharacteristicArguments)->UseRealTime();
288#endif // BENCHMARK_TENSORFLOW_LITE
XNNPACK Teamb455b122019-09-27 18:10:33 -0700289
290#ifndef XNNPACK_BENCHMARK_NO_MAIN
291BENCHMARK_MAIN();
292#endif