blob: 224b0c36fd05cfe847c00634e1e09a197acde61a [file] [log] [blame]
XNNPACK Teamb455b122019-09-27 18:10:33 -07001// Copyright (c) Facebook, Inc. and its affiliates.
2// All rights reserved.
3//
4// Copyright 2019 Google LLC
5//
6// This source code is licensed under the BSD-style license found in the
7// LICENSE file in the root directory of this source tree.
8
9#include <algorithm>
10#include <cmath>
11#include <functional>
12#include <random>
13#include <vector>
14
15#include <xnnpack.h>
16
17#include <benchmark/benchmark.h>
Frank Barchardbb4c18b2019-09-30 11:05:52 -070018#include "bench/utils.h"
XNNPACK Teamb455b122019-09-27 18:10:33 -070019
20
21static void add_nc_q8(benchmark::State& state) {
22 const size_t batch_size = static_cast<size_t>(state.range(0));
23 const size_t channels = static_cast<size_t>(state.range(1));
24
25 std::random_device random_device;
26 auto rng = std::mt19937(random_device());
27 auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
28
29 std::vector<uint8_t> a(batch_size * channels);
30 std::vector<uint8_t> b(batch_size * channels);
31 std::vector<uint8_t> y(batch_size * channels);
32 std::generate(a.begin(), a.end(), std::ref(u8rng));
33 std::generate(b.begin(), b.end(), std::ref(u8rng));
34
Marat Dukhan04f03be2019-11-19 12:36:47 -080035 xnn_status status = xnn_initialize(nullptr /* allocator */);
XNNPACK Teamb455b122019-09-27 18:10:33 -070036 if (status != xnn_status_success) {
37 state.SkipWithError("failed to initialize XNNPACK");
38 return;
39 }
40
41 xnn_operator_t add_op = nullptr;
42 status = xnn_create_add_nc_q8(
43 channels, channels /* a_stride */, channels /* b_stride */, channels /* sum_stride */,
44 127 /* a:zero point */, 1.0f /* a:scale */,
45 127 /* b:zero point */, 1.0f /* b:scale */,
46 127 /* y:zero point */, 1.0f /* y:scale */,
47 1 /* y:min */, 254 /* y:max */,
48 0 /* flags */, &add_op);
49 if (status != xnn_status_success || add_op == nullptr) {
50 state.SkipWithError("failed to create Q8 Add operator");
51 return;
52 }
53
54 status = xnn_setup_add_nc_q8(
55 add_op,
56 batch_size,
57 a.data(), b.data(), y.data(),
58 nullptr /* thread pool */);
59 if (status != xnn_status_success) {
60 state.SkipWithError("failed to setup Q8 Add operator");
61 return;
62 }
63
64 for (auto _ : state) {
65 status = xnn_run_operator(add_op, nullptr /* thread pool */);
66 if (status != xnn_status_success) {
67 state.SkipWithError("failed to run Q8 Add operator");
68 return;
69 }
70 }
71
72 status = xnn_delete_operator(add_op);
73 if (status != xnn_status_success) {
74 state.SkipWithError("failed to delete Q8 Add operator");
75 return;
76 }
77
Frank Barchardbb4c18b2019-09-30 11:05:52 -070078 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
79
XNNPACK Teamb455b122019-09-27 18:10:33 -070080 const size_t elements_per_iteration = batch_size * channels;
81 state.counters["elements"] =
82 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
83
84 const size_t bytes_per_iteration = 3 * elements_per_iteration * sizeof(uint8_t);
85 state.counters["bytes"] =
86 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
87}
88
89static void add_nc_q8_inplace(benchmark::State& state) {
90 const size_t batch_size = static_cast<size_t>(state.range(0));
91 const size_t channels = static_cast<size_t>(state.range(1));
92
93 std::random_device random_device;
94 auto rng = std::mt19937(random_device());
95 auto u8rng = std::bind(std::uniform_int_distribution<uint8_t>(), rng);
96
97 std::vector<uint8_t> a(batch_size * channels);
98 std::vector<uint8_t> y(batch_size * channels);
99 std::generate(a.begin(), a.end(), std::ref(u8rng));
100
Marat Dukhan04f03be2019-11-19 12:36:47 -0800101 xnn_status status = xnn_initialize(nullptr /* allocator */);
XNNPACK Teamb455b122019-09-27 18:10:33 -0700102 if (status != xnn_status_success) {
103 state.SkipWithError("failed to initialize XNNPACK");
104 return;
105 }
106
107 xnn_operator_t add_op = nullptr;
108 status = xnn_create_add_nc_q8(
109 channels, channels /* a_stride */, channels /* b_stride */, channels /* sum_stride */,
110 127 /* a:zero point */, 1.0f /* a:scale */,
111 127 /* b:zero point */, 1.0f /* b:scale */,
112 127 /* y:zero point */, 1.0f /* y:scale */,
113 1 /* y:min */, 254 /* y:max */,
114 0 /* flags */, &add_op);
115 if (status != xnn_status_success || add_op == nullptr) {
116 state.SkipWithError("failed to create Q8 Add operator");
117 return;
118 }
119
120 status = xnn_setup_add_nc_q8(
121 add_op,
122 batch_size,
123 a.data(), y.data(), y.data(),
124 nullptr /* thread pool */);
125 if (status != xnn_status_success) {
126 state.SkipWithError("failed to setup Q8 Add operator");
127 return;
128 }
129
130 for (auto _ : state) {
131 status = xnn_run_operator(add_op, nullptr /* thread pool */);
132 if (status != xnn_status_success) {
133 state.SkipWithError("failed to run Q8 Add operator");
134 return;
135 }
136 }
137
138 status = xnn_delete_operator(add_op);
139 if (status != xnn_status_success) {
140 state.SkipWithError("failed to delete Q8 Add operator");
141 return;
142 }
143
Frank Barchardbb4c18b2019-09-30 11:05:52 -0700144 state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
145
XNNPACK Teamb455b122019-09-27 18:10:33 -0700146 const size_t elements_per_iteration = batch_size * channels;
147 state.counters["elements"] =
148 benchmark::Counter(uint64_t(state.iterations()) * elements_per_iteration, benchmark::Counter::kIsRate);
149
150 const size_t bytes_per_iteration = 3 * elements_per_iteration * sizeof(uint8_t);
151 state.counters["bytes"] =
152 benchmark::Counter(uint64_t(state.iterations()) * bytes_per_iteration, benchmark::Counter::kIsRate);
153}
154
155static void CharacteristicArguments(benchmark::internal::Benchmark* b)
156{
157 b->ArgNames({"N", "C"});
158
159 int32_t c = 16;
160 for (int32_t n = 224; n >= 7; n /= 2) {
161 b->Args({n * n, c});
162 c *= 2;
163 }
164}
165
166BENCHMARK(add_nc_q8)->Apply(CharacteristicArguments)->UseRealTime();
167BENCHMARK(add_nc_q8_inplace)->Apply(CharacteristicArguments)->UseRealTime();
168
169#ifndef XNNPACK_BENCHMARK_NO_MAIN
170BENCHMARK_MAIN();
171#endif