Miao Wang | a9fd919 | 2017-07-06 11:06:31 -0700 | [diff] [blame] | 1 | // Copyright 2016 The Gemmlowp Authors. All Rights Reserved. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #include <unistd.h> |
| 16 | #ifdef __APPLE__ |
| 17 | #include <sys/time.h> |
| 18 | #endif |
| 19 | |
| 20 | #include <cstdint> |
| 21 | #include <cstdlib> |
| 22 | #include <ctime> |
| 23 | #include <iomanip> |
| 24 | #include <iostream> |
| 25 | #include <map> |
| 26 | #include <memory> |
| 27 | #include <vector> |
| 28 | |
| 29 | #include "multi_thread_transform.h" |
| 30 | #include "transform_kernels.h" |
| 31 | |
| 32 | using namespace gemmlowp::meta; |
| 33 | |
| 34 | double time() { |
| 35 | #ifdef __APPLE__ |
| 36 | timeval t; |
| 37 | gettimeofday(&t, nullptr); |
| 38 | return t.tv_sec + 1e-6 * t.tv_usec; |
| 39 | #else |
| 40 | timespec t; |
| 41 | clock_gettime(CLOCK_REALTIME, &t); |
| 42 | return t.tv_sec + 1e-9 * t.tv_nsec; |
| 43 | #endif |
| 44 | } |
| 45 | |
| 46 | #define kernel_size (16) |
| 47 | |
| 48 | template <typename Context, typename Params> |
| 49 | void run_benchmark(const std::string& name, int repetitions, int elements, |
| 50 | Context* context, const Params& params) { |
| 51 | std::cout << "Benchmark: " << name << std::endl; |
| 52 | std::cout << "Warmup single." << std::endl; |
| 53 | |
| 54 | for (int i = 0; i < 10; ++i) { |
| 55 | Transform1D<Params, kernel_size>(params); |
| 56 | } |
| 57 | |
| 58 | std::cout << "Benchmark single." << std::endl; |
| 59 | |
| 60 | double start = time(); |
| 61 | |
| 62 | for (int i = 0; i < repetitions; ++i) { |
| 63 | Transform1D<Params, kernel_size>(params); |
| 64 | } |
| 65 | |
| 66 | double wall_time = time() - start; |
| 67 | double ops = static_cast<double>(elements) * repetitions; |
| 68 | std::cout << "Avg: " << (wall_time / repetitions) << std::endl; |
| 69 | std::cout << "Perf: " << static_cast<std::int64_t>(ops / wall_time) << "/s." |
| 70 | << std::endl; |
| 71 | |
| 72 | std::cout << "Warmup single." << std::endl; |
| 73 | |
| 74 | for (int i = 0; i < 10; ++i) { |
| 75 | MultiThreadTransform1D<Context, Params, kernel_size>(context, params); |
| 76 | } |
| 77 | |
| 78 | std::cout << "Benchmark multi." << std::endl; |
| 79 | |
| 80 | start = time(); |
| 81 | |
| 82 | for (int i = 0; i < repetitions; ++i) { |
| 83 | MultiThreadTransform1D<Context, Params, kernel_size>(context, params); |
| 84 | } |
| 85 | |
| 86 | wall_time = time() - start; |
| 87 | ops = static_cast<double>(elements) * repetitions; |
| 88 | std::cout << "Avg: " << (wall_time / repetitions) << std::endl; |
| 89 | std::cout << "Perf: " << static_cast<std::int64_t>(ops / wall_time) << "/s." |
| 90 | << std::endl; |
| 91 | } |
| 92 | |
| 93 | int main() { |
| 94 | const int repetitions = 500; |
| 95 | const int elements = 4 * 1024 * 1024; |
| 96 | |
| 97 | std::unique_ptr<std::int32_t[]> int32_array(new std::int32_t[elements]); |
| 98 | std::unique_ptr<std::uint8_t[]> uint8_array(new std::uint8_t[elements]); |
| 99 | std::unique_ptr<float[]> float_array(new float[elements]); |
| 100 | |
| 101 | typedef SimpleContext<gemmlowp::WorkersPool> Context; |
| 102 | Context context(4, new gemmlowp::WorkersPool()); |
| 103 | |
| 104 | typedef Transform1DParams<std::int32_t, std::uint8_t, Requantize> RequantizeParams; |
| 105 | RequantizeParams requantize_params; |
| 106 | requantize_params.input = int32_array.get(); |
| 107 | requantize_params.output = uint8_array.get(); |
| 108 | requantize_params.kernel.count = elements; |
| 109 | requantize_params.kernel.input_range_min = -100.0f; |
| 110 | requantize_params.kernel.input_range_scale = |
| 111 | 200.0f / ((static_cast<std::int64_t>(1) << 32) - 1); |
| 112 | requantize_params.kernel.input_range_offset = |
| 113 | static_cast<float>(std::numeric_limits<std::int32_t>::lowest()); |
| 114 | requantize_params.kernel.output_range_min = -200.0f; |
| 115 | requantize_params.kernel.one_over_output_range_scale = |
| 116 | static_cast<float>((static_cast<std::int64_t>(1) << 8) - 1) / 500.0f; |
| 117 | requantize_params.kernel.output_range_offset = |
| 118 | static_cast<float>(std::numeric_limits<std::uint8_t>::lowest()); |
| 119 | |
| 120 | run_benchmark("Requantize", repetitions, elements, &context, |
| 121 | requantize_params); |
| 122 | |
| 123 | typedef Transform1DParams<std::uint8_t, float, Dequantize> DequantizeParams; |
| 124 | DequantizeParams dequantize_params; |
| 125 | dequantize_params.input = uint8_array.get(); |
| 126 | dequantize_params.output = float_array.get(); |
| 127 | dequantize_params.kernel.count = elements; |
| 128 | dequantize_params.kernel.range_min = -100.0f; |
| 129 | dequantize_params.kernel.range_scale = |
| 130 | static_cast<float>((static_cast<std::int64_t>(1) << 8) - 1) / 200.0f; |
| 131 | dequantize_params.kernel.range_offset = |
| 132 | static_cast<float>(std::numeric_limits<std::uint8_t>::lowest()); |
| 133 | |
| 134 | run_benchmark("Dequantize", repetitions, elements, &context, |
| 135 | dequantize_params); |
| 136 | |
| 137 | typedef Transform1DParams<float, std::uint8_t, Quantize> QuantizeParams; |
| 138 | QuantizeParams quantize_params; |
| 139 | quantize_params.input = float_array.get(); |
| 140 | quantize_params.output = uint8_array.get(); |
| 141 | quantize_params.kernel.count = elements; |
| 142 | quantize_params.kernel.range_min = -100.0f; |
| 143 | quantize_params.kernel.range_scale = |
| 144 | 200.0f / ((static_cast<std::int64_t>(1) << 8) - 1); |
| 145 | quantize_params.kernel.range_offset = |
| 146 | static_cast<float>(std::numeric_limits<std::uint8_t>::lowest()); |
| 147 | |
| 148 | run_benchmark("Quantize", repetitions, elements, &context, quantize_params); |
| 149 | |
| 150 | return 0; |
| 151 | } |