blob: dba6c5bfd5c8dfa88d48eb55ff8f2958b8292adc [file] [log] [blame]
Marat Dukhan1b092292019-11-18 08:46:36 -08001// Copyright 2019 Google LLC
2//
3// This source code is licensed under the BSD-style license found in the
4// LICENSE file in the root directory of this source tree.
5
Marat Dukhan95b22432019-10-30 16:30:14 -07006#include <algorithm>
7#include <cfloat>
8#include <cmath>
9#include <functional>
10#include <random>
11#include <vector>
12
13#include <xnnpack.h>
14
15#include <benchmark/benchmark.h>
Marat Dukhan1b092292019-11-18 08:46:36 -080016#include "bench/utils.h"
Marat Dukhan95b22432019-10-30 16:30:14 -070017#ifdef BENCHMARK_TENSORFLOW_LITE
18#include "flatbuffers/include/flatbuffers/flatbuffers.h"
19#include "tensorflow/lite/interpreter.h"
20#include "tensorflow/lite/kernels/register.h"
21#include "tensorflow/lite/model.h"
22#include "tensorflow/lite/schema/schema_generated.h"
23#include "tensorflow/lite/version.h"
24#endif // BENCHMARK_TENSORFLOW_LITE
25
26
27void xnnpack_prelu_f32(benchmark::State& state, const char* net) {
28 const size_t batch_size = state.range(0);
29 const size_t height = state.range(1);
30 const size_t width = state.range(2);
31 const size_t channels = state.range(3);
32
33 std::random_device random_device;
34 auto rng = std::mt19937(random_device());
35 auto f32irng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), rng);
36 auto f32wrng = std::bind(std::uniform_real_distribution<float>(0.25f, 0.75f), rng);
37
38 std::vector<float> input(batch_size * height * width * channels + XNN_EXTRA_BYTES / sizeof(float));
39 std::generate(input.begin(), input.end(), std::ref(f32irng));
40 std::vector<float> slope(channels);
41 std::generate(slope.begin(), slope.end(), std::ref(f32wrng));
42 std::vector<float> output(batch_size * height * width * channels);
43
Marat Dukhan04f03be2019-11-19 12:36:47 -080044 xnn_status status = xnn_initialize(nullptr /* allocator */);
Marat Dukhan95b22432019-10-30 16:30:14 -070045 if (status != xnn_status_success) {
46 state.SkipWithError("failed to initialize XNNPACK");
47 return;
48 }
49
50 xnn_operator_t prelu_op = nullptr;
51 status = xnn_create_prelu_nc_f32(
52 channels, channels /* input stride */, channels /* output stride */,
53 slope.data(),
54 -std::numeric_limits<float>::infinity(), +std::numeric_limits<float>::infinity(),
55 0 /* flags */, &prelu_op);
56 if (status != xnn_status_success) {
57 state.SkipWithError("failed to create FP32 PReLU operator");
58 return;
59 }
60
61 status = xnn_setup_prelu_nc_f32(
62 prelu_op,
63 batch_size * height * width,
64 input.data(), output.data(),
65 nullptr /* thread pool */);
66 if (status != xnn_status_success) {
67 state.SkipWithError("failed to setup FP32 PReLU operator");
68 return;
69 }
70
71 for (auto _ : state) {
72 status = xnn_run_operator(prelu_op, nullptr /* thread pool */);
73 if (status != xnn_status_success) {
74 state.SkipWithError("failed to run FP32 PReLU operator");
75 return;
76 }
77 }
78
79 status = xnn_delete_operator(prelu_op);
80 if (status != xnn_status_success) {
81 state.SkipWithError("failed to delete FP32 PReLU operator");
82 return;
83 }
84 prelu_op = nullptr;
85
Marat Dukhan1b092292019-11-18 08:46:36 -080086 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
87
Marat Dukhan95b22432019-10-30 16:30:14 -070088 const size_t elements_per_iteration = batch_size * height * width * channels;
89 state.counters["elements"] =
90 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
91
Marat Dukhan1b092292019-11-18 08:46:36 -080092 const size_t bytes_per_iteration = (2 * elements_per_iteration + channels) * sizeof(float);
Marat Dukhan95b22432019-10-30 16:30:14 -070093 state.counters["bytes"] =
94 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
95}
96
97#ifdef BENCHMARK_TENSORFLOW_LITE
98void tflite_prelu_f32(benchmark::State& state, const char* net) {
99 const size_t batch_size = state.range(0);
100 const size_t height = state.range(1);
101 const size_t width = state.range(2);
102 const size_t channels = state.range(3);
103
104 std::random_device random_device;
105 auto rng = std::mt19937(random_device());
106 auto f32irng = std::bind(std::uniform_real_distribution<float>(-1.0f, 1.0f), rng);
107 auto f32wrng = std::bind(std::uniform_real_distribution<float>(0.25f, 0.75f), rng);
108
109 std::vector<float> slope(channels);
110 std::generate(slope.begin(), slope.end(), std::ref(f32wrng));
111
112 flatbuffers::FlatBufferBuilder builder;
113 flatbuffers::Offset<tflite::OperatorCode> operator_code =
Marat Dukhan1b092292019-11-18 08:46:36 -0800114 CreateOperatorCode(builder, tflite::BuiltinOperator_PRELU);
Marat Dukhan95b22432019-10-30 16:30:14 -0700115
Marat Dukhan1b092292019-11-18 08:46:36 -0800116 flatbuffers::Offset<tflite::Buffer> buffers[2] = {
Marat Dukhan95b22432019-10-30 16:30:14 -0700117 tflite::CreateBuffer(builder, builder.CreateVector({})),
118 tflite::CreateBuffer(builder, builder.CreateVector(
119 reinterpret_cast<const uint8_t*>(slope.data()),
120 sizeof(float) * slope.size())),
121 };
122
123 const int32_t input_shape[4] = {
124 static_cast<int32_t>(batch_size),
125 static_cast<int32_t>(height),
126 static_cast<int32_t>(width),
127 static_cast<int32_t>(channels)
128 };
129 const int32_t output_shape[4] = {
130 static_cast<int32_t>(batch_size),
131 static_cast<int32_t>(height),
132 static_cast<int32_t>(width),
133 static_cast<int32_t>(channels)
134 };
135 const int32_t slope_shape[1] = {
136 static_cast<int32_t>(channels)
137 };
138
139 flatbuffers::Offset<tflite::Tensor> tensors[3] = {
140 tflite::CreateTensor(builder,
141 builder.CreateVector<int32_t>(input_shape, 4),
Marat Dukhan1b092292019-11-18 08:46:36 -0800142 tflite::TensorType_FLOAT32),
Marat Dukhan95b22432019-10-30 16:30:14 -0700143 tflite::CreateTensor(builder,
144 builder.CreateVector<int32_t>(slope_shape, 1),
145 tflite::TensorType_FLOAT32,
Marat Dukhan1b092292019-11-18 08:46:36 -0800146 1 /* buffer id */),
Marat Dukhan95b22432019-10-30 16:30:14 -0700147 tflite::CreateTensor(builder,
148 builder.CreateVector<int32_t>(output_shape, 4),
Marat Dukhan1b092292019-11-18 08:46:36 -0800149 tflite::TensorType_FLOAT32),
Marat Dukhan95b22432019-10-30 16:30:14 -0700150 };
151
152 const int32_t op_inputs[2] = { 0, 1 };
153 const int32_t op_outputs[1] = { 2 };
154 flatbuffers::Offset<tflite::Operator> op = tflite::CreateOperator(
155 builder,
156 0 /* opcode_index */,
157 builder.CreateVector<int32_t>(op_inputs, 2),
158 builder.CreateVector<int32_t>(op_outputs, 1));
159
160 const int32_t graph_inputs[1] = { 0 };
161 const int32_t graph_outputs[1] = { 2 };
162 flatbuffers::Offset<tflite::SubGraph> subgraph = tflite::CreateSubGraph(
163 builder,
164 builder.CreateVector(tensors, 3),
165 builder.CreateVector<int32_t>(graph_inputs, 1),
166 builder.CreateVector<int32_t>(graph_outputs, 1),
Marat Dukhan1b092292019-11-18 08:46:36 -0800167 builder.CreateVector(&op, 1));
Marat Dukhan95b22432019-10-30 16:30:14 -0700168
169 flatbuffers::Offset<flatbuffers::String> description = builder.CreateString("PReLU model");
170
171 flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
172 TFLITE_SCHEMA_VERSION,
173 builder.CreateVector(&operator_code, 1),
174 builder.CreateVector(&subgraph, 1),
175 description,
Marat Dukhan1b092292019-11-18 08:46:36 -0800176 builder.CreateVector(buffers, 2));
Marat Dukhan95b22432019-10-30 16:30:14 -0700177
178 builder.Finish(model_buffer);
179
180 const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
181 tflite::ops::builtin::BuiltinOpResolver resolver;
182 tflite::InterpreterBuilder interpreterBuilder(model, resolver);
183 std::unique_ptr<tflite::Interpreter> interpreter;
184 if (interpreterBuilder(&interpreter) != kTfLiteOk) {
185 state.SkipWithError("failed to create TFLite interpreter");
186 return;
187 }
188 if (interpreter == nullptr) {
189 state.SkipWithError("TFLite interpreter is null");
190 return;
191 }
192 interpreter->SetNumThreads(1);
193
194 if (interpreter->AllocateTensors() != kTfLiteOk) {
195 state.SkipWithError("failed to allocate tensors");
196 return;
197 }
198
199 std::generate(
200 interpreter->typed_tensor<float>(0),
201 interpreter->typed_tensor<float>(0) + batch_size * height * width * channels,
202 std::ref(f32irng));
203
204 for (auto _ : state) {
205 if (interpreter->Invoke() != kTfLiteOk) {
206 state.SkipWithError("failed to invoke TFLite interpreter");
207 return;
208 }
209 }
210
Marat Dukhan1b092292019-11-18 08:46:36 -0800211 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
212
Marat Dukhan95b22432019-10-30 16:30:14 -0700213 const size_t elements_per_iteration = batch_size * height * width * channels;
214 state.counters["elements"] =
215 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
216
Marat Dukhan1b092292019-11-18 08:46:36 -0800217 const size_t bytes_per_iteration = (2 * elements_per_iteration + channels) * sizeof(float);
Marat Dukhan95b22432019-10-30 16:30:14 -0700218 state.counters["bytes"] =
219 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
220
221 interpreter.reset();
222}
223#endif // BENCHMARK_TENSORFLOW_LITE
224
225// Characteristic arguments for ImageNet classification models
226static void ImageNet(benchmark::internal::Benchmark* b)
227{
228 b->ArgNames({"N", "H", "W", "C"});
229
230 int32_t c = 16;
231 for (int32_t hw = 224 / 2; hw >= 7; hw /= 2) {
232 b->Args({1, hw, hw, c});
233 b->Args({1, hw, hw, c * 2});
234 c *= 2;
235 }
236}
237
238BENCHMARK_CAPTURE(xnnpack_prelu_f32, imagenet, "ImageNet 224x224")->Apply(ImageNet)->UseRealTime();
239
240#ifdef BENCHMARK_TENSORFLOW_LITE
241 BENCHMARK_CAPTURE(tflite_prelu_f32, imagenet, "ImageNet 224x224")->Apply(ImageNet)->UseRealTime();
242#endif // BENCHMARK_TENSORFLOW_LITE
243
244#ifndef XNNPACK_BENCHMARK_NO_MAIN
245BENCHMARK_MAIN();
246#endif