blob: db8ab7deb169ac88c43c6a261c87c28f97885650 [file] [log] [blame]
Miao Wanga9fd9192017-07-06 11:06:31 -07001// Copyright 2016 The Gemmlowp Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include <unistd.h>
16#ifdef __APPLE__
17#include <sys/time.h>
18#endif
19
20#include <cstdint>
21#include <cstdlib>
22#include <ctime>
23#include <iomanip>
24#include <iostream>
25#include <map>
26#include <memory>
27#include <vector>
28
29#include "multi_thread_transform.h"
30#include "transform_kernels.h"
31
32using namespace gemmlowp::meta;
33
34double time() {
35#ifdef __APPLE__
36 timeval t;
37 gettimeofday(&t, nullptr);
38 return t.tv_sec + 1e-6 * t.tv_usec;
39#else
40 timespec t;
41 clock_gettime(CLOCK_REALTIME, &t);
42 return t.tv_sec + 1e-9 * t.tv_nsec;
43#endif
44}
45
46#define kernel_size (16)
47
48template <typename Context, typename Params>
49void run_benchmark(const std::string& name, int repetitions, int elements,
50 Context* context, const Params& params) {
51 std::cout << "Benchmark: " << name << std::endl;
52 std::cout << "Warmup single." << std::endl;
53
54 for (int i = 0; i < 10; ++i) {
55 Transform1D<Params, kernel_size>(params);
56 }
57
58 std::cout << "Benchmark single." << std::endl;
59
60 double start = time();
61
62 for (int i = 0; i < repetitions; ++i) {
63 Transform1D<Params, kernel_size>(params);
64 }
65
66 double wall_time = time() - start;
67 double ops = static_cast<double>(elements) * repetitions;
68 std::cout << "Avg: " << (wall_time / repetitions) << std::endl;
69 std::cout << "Perf: " << static_cast<std::int64_t>(ops / wall_time) << "/s."
70 << std::endl;
71
72 std::cout << "Warmup single." << std::endl;
73
74 for (int i = 0; i < 10; ++i) {
75 MultiThreadTransform1D<Context, Params, kernel_size>(context, params);
76 }
77
78 std::cout << "Benchmark multi." << std::endl;
79
80 start = time();
81
82 for (int i = 0; i < repetitions; ++i) {
83 MultiThreadTransform1D<Context, Params, kernel_size>(context, params);
84 }
85
86 wall_time = time() - start;
87 ops = static_cast<double>(elements) * repetitions;
88 std::cout << "Avg: " << (wall_time / repetitions) << std::endl;
89 std::cout << "Perf: " << static_cast<std::int64_t>(ops / wall_time) << "/s."
90 << std::endl;
91}
92
93int main() {
94 const int repetitions = 500;
95 const int elements = 4 * 1024 * 1024;
96
97 std::unique_ptr<std::int32_t[]> int32_array(new std::int32_t[elements]);
98 std::unique_ptr<std::uint8_t[]> uint8_array(new std::uint8_t[elements]);
99 std::unique_ptr<float[]> float_array(new float[elements]);
100
101 typedef SimpleContext<gemmlowp::WorkersPool> Context;
102 Context context(4, new gemmlowp::WorkersPool());
103
104 typedef Transform1DParams<std::int32_t, std::uint8_t, Requantize> RequantizeParams;
105 RequantizeParams requantize_params;
106 requantize_params.input = int32_array.get();
107 requantize_params.output = uint8_array.get();
108 requantize_params.kernel.count = elements;
109 requantize_params.kernel.input_range_min = -100.0f;
110 requantize_params.kernel.input_range_scale =
111 200.0f / ((static_cast<std::int64_t>(1) << 32) - 1);
112 requantize_params.kernel.input_range_offset =
113 static_cast<float>(std::numeric_limits<std::int32_t>::lowest());
114 requantize_params.kernel.output_range_min = -200.0f;
115 requantize_params.kernel.one_over_output_range_scale =
116 static_cast<float>((static_cast<std::int64_t>(1) << 8) - 1) / 500.0f;
117 requantize_params.kernel.output_range_offset =
118 static_cast<float>(std::numeric_limits<std::uint8_t>::lowest());
119
120 run_benchmark("Requantize", repetitions, elements, &context,
121 requantize_params);
122
123 typedef Transform1DParams<std::uint8_t, float, Dequantize> DequantizeParams;
124 DequantizeParams dequantize_params;
125 dequantize_params.input = uint8_array.get();
126 dequantize_params.output = float_array.get();
127 dequantize_params.kernel.count = elements;
128 dequantize_params.kernel.range_min = -100.0f;
129 dequantize_params.kernel.range_scale =
130 static_cast<float>((static_cast<std::int64_t>(1) << 8) - 1) / 200.0f;
131 dequantize_params.kernel.range_offset =
132 static_cast<float>(std::numeric_limits<std::uint8_t>::lowest());
133
134 run_benchmark("Dequantize", repetitions, elements, &context,
135 dequantize_params);
136
137 typedef Transform1DParams<float, std::uint8_t, Quantize> QuantizeParams;
138 QuantizeParams quantize_params;
139 quantize_params.input = float_array.get();
140 quantize_params.output = uint8_array.get();
141 quantize_params.kernel.count = elements;
142 quantize_params.kernel.range_min = -100.0f;
143 quantize_params.kernel.range_scale =
144 200.0f / ((static_cast<std::int64_t>(1) << 8) - 1);
145 quantize_params.kernel.range_offset =
146 static_cast<float>(std::numeric_limits<std::uint8_t>::lowest());
147
148 run_benchmark("Quantize", repetitions, elements, &context, quantize_params);
149
150 return 0;
151}