blob: 2b179fd5ffe6a92155a7151689ed2e8d5ad63d60 [file] [log] [blame]
Anthony Barbier871448e2017-03-24 14:54:29 +00001/*
Anthony Barbier06ea0482018-02-22 15:45:35 +00002 * Copyright (c) 2016-2018 ARM Limited.
Anthony Barbier871448e2017-03-24 14:54:29 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CPP/CPPScheduler.h"
25
26#include "arm_compute/core/CPP/ICPPKernel.h"
27#include "arm_compute/core/Error.h"
28#include "arm_compute/core/Helpers.h"
29#include "arm_compute/core/Utils.h"
Jenkinsb3a371b2018-05-23 11:36:53 +010030#include "arm_compute/runtime/CPUUtils.h"
Anthony Barbier871448e2017-03-24 14:54:29 +000031
Jenkins52ba29e2018-08-29 15:32:11 +000032#include <atomic>
Kaizen8938bd32017-09-28 14:38:23 +010033#include <condition_variable>
Anthony Barbier871448e2017-03-24 14:54:29 +000034#include <iostream>
Kaizen8938bd32017-09-28 14:38:23 +010035#include <mutex>
Anthony Barbier871448e2017-03-24 14:54:29 +000036#include <system_error>
37#include <thread>
38
Kaizen8938bd32017-09-28 14:38:23 +010039namespace arm_compute
40{
Jenkins52ba29e2018-08-29 15:32:11 +000041namespace
42{
43class ThreadFeeder
44{
45public:
46 /** Constructor
47 *
48 * @param[in] start First value that will be returned by the feeder
49 * @param[in] end End condition (The last value returned by get_next() will be end - 1)
50 */
51 explicit ThreadFeeder(unsigned int start = 0, unsigned int end = 0)
52 : _atomic_counter(start), _end(end)
53 {
54 }
55 /** Return the next element in the range if there is one.
56 *
57 * @param[out] next Will contain the next element if there is one.
58 *
59 * @return False if the end of the range has been reached and next wasn't set.
60 */
61 bool get_next(unsigned int &next)
62 {
63 next = atomic_fetch_add_explicit(&_atomic_counter, 1u, std::memory_order_relaxed);
64 return next < _end;
65 }
66
67private:
68 std::atomic_uint _atomic_counter;
69 const unsigned int _end;
70};
71
72/** Execute workloads[info.thread_id] first, then call the feeder to get the index of the next workload to run.
73 *
74 * Will run workloads until the feeder reaches the end of its range.
75 *
76 * @param[in] workloads The array of workloads
77 * @param[in,out] feeder The feeder indicating which workload to execute next.
78 * @param[in] info Threading and CPU info.
79 */
80void process_workloads(std::vector<IScheduler::Workload> &workloads, ThreadFeeder &feeder, const ThreadInfo &info)
81{
82 unsigned int workload_index = info.thread_id;
83 do
84 {
85 ARM_COMPUTE_ERROR_ON(workload_index >= workloads.size());
86 workloads[workload_index](info);
87 }
88 while(feeder.get_next(workload_index));
89}
90
91} //namespace
92
93class CPPScheduler::Thread
Anthony Barbier871448e2017-03-24 14:54:29 +000094{
95public:
Kaizen8938bd32017-09-28 14:38:23 +010096 /** Start a new thread. */
Anthony Barbier871448e2017-03-24 14:54:29 +000097 Thread();
Kaizen8938bd32017-09-28 14:38:23 +010098
Anthony Barbier871448e2017-03-24 14:54:29 +000099 Thread(const Thread &) = delete;
100 Thread &operator=(const Thread &) = delete;
101 Thread(Thread &&) = delete;
102 Thread &operator=(Thread &&) = delete;
Kaizen8938bd32017-09-28 14:38:23 +0100103
104 /** Destructor. Make the thread join. */
Anthony Barbier871448e2017-03-24 14:54:29 +0000105 ~Thread();
Kaizen8938bd32017-09-28 14:38:23 +0100106
Jenkins52ba29e2018-08-29 15:32:11 +0000107 /** Request the worker thread to start executing workloads.
108 *
109 * The thread will start by executing workloads[info.thread_id] and will then call the feeder to
110 * get the index of the following workload to run.
111 *
112 * @note This function will return as soon as the workloads have been sent to the worker thread.
Anthony Barbier871448e2017-03-24 14:54:29 +0000113 * wait() needs to be called to ensure the execution is complete.
114 */
Jenkins52ba29e2018-08-29 15:32:11 +0000115 void start(std::vector<IScheduler::Workload> *workloads, ThreadFeeder &feeder, const ThreadInfo &info);
Kaizen8938bd32017-09-28 14:38:23 +0100116
117 /** Wait for the current kernel execution to complete. */
Anthony Barbier871448e2017-03-24 14:54:29 +0000118 void wait();
Kaizen8938bd32017-09-28 14:38:23 +0100119
120 /** Function ran by the worker thread. */
Anthony Barbier871448e2017-03-24 14:54:29 +0000121 void worker_thread();
122
123private:
Jenkins52ba29e2018-08-29 15:32:11 +0000124 std::thread _thread{};
125 ThreadInfo _info{};
126 std::vector<IScheduler::Workload> *_workloads{ nullptr };
127 ThreadFeeder *_feeder{ nullptr };
128 std::mutex _m{};
129 std::condition_variable _cv{};
130 bool _wait_for_work{ false };
131 bool _job_complete{ true };
132 std::exception_ptr _current_exception{ nullptr };
Anthony Barbier871448e2017-03-24 14:54:29 +0000133};
134
Jenkins52ba29e2018-08-29 15:32:11 +0000135CPPScheduler::Thread::Thread()
Anthony Barbier871448e2017-03-24 14:54:29 +0000136{
Anthony Barbier871448e2017-03-24 14:54:29 +0000137 _thread = std::thread(&Thread::worker_thread, this);
138}
139
Jenkins52ba29e2018-08-29 15:32:11 +0000140CPPScheduler::Thread::~Thread()
Anthony Barbier871448e2017-03-24 14:54:29 +0000141{
Kaizen8938bd32017-09-28 14:38:23 +0100142 // Make sure worker thread has ended
143 if(_thread.joinable())
144 {
Jenkins52ba29e2018-08-29 15:32:11 +0000145 ThreadFeeder feeder;
146 start(nullptr, feeder, ThreadInfo());
Kaizen8938bd32017-09-28 14:38:23 +0100147 _thread.join();
148 }
Anthony Barbier871448e2017-03-24 14:54:29 +0000149}
150
Jenkins52ba29e2018-08-29 15:32:11 +0000151void CPPScheduler::Thread::start(std::vector<IScheduler::Workload> *workloads, ThreadFeeder &feeder, const ThreadInfo &info)
Anthony Barbier871448e2017-03-24 14:54:29 +0000152{
Jenkins52ba29e2018-08-29 15:32:11 +0000153 _workloads = workloads;
154 _feeder = &feeder;
155 _info = info;
Kaizen8938bd32017-09-28 14:38:23 +0100156 {
157 std::lock_guard<std::mutex> lock(_m);
158 _wait_for_work = true;
159 _job_complete = false;
160 }
161 _cv.notify_one();
Anthony Barbier871448e2017-03-24 14:54:29 +0000162}
163
Jenkins52ba29e2018-08-29 15:32:11 +0000164void CPPScheduler::Thread::wait()
Anthony Barbier871448e2017-03-24 14:54:29 +0000165{
Kaizen8938bd32017-09-28 14:38:23 +0100166 {
167 std::unique_lock<std::mutex> lock(_m);
168 _cv.wait(lock, [&] { return _job_complete; });
169 }
170
Anthony Barbier871448e2017-03-24 14:54:29 +0000171 if(_current_exception)
172 {
173 std::rethrow_exception(_current_exception);
174 }
175}
176
Jenkins52ba29e2018-08-29 15:32:11 +0000177void CPPScheduler::Thread::worker_thread()
Anthony Barbier871448e2017-03-24 14:54:29 +0000178{
Kaizen8938bd32017-09-28 14:38:23 +0100179 while(true)
Anthony Barbier871448e2017-03-24 14:54:29 +0000180 {
Kaizen8938bd32017-09-28 14:38:23 +0100181 std::unique_lock<std::mutex> lock(_m);
182 _cv.wait(lock, [&] { return _wait_for_work; });
183 _wait_for_work = false;
184
Anthony Barbier871448e2017-03-24 14:54:29 +0000185 _current_exception = nullptr;
Kaizen8938bd32017-09-28 14:38:23 +0100186
Anthony Barbier871448e2017-03-24 14:54:29 +0000187 // Time to exit
Jenkins52ba29e2018-08-29 15:32:11 +0000188 if(_workloads == nullptr)
Anthony Barbier871448e2017-03-24 14:54:29 +0000189 {
190 return;
191 }
192
193 try
194 {
Jenkins52ba29e2018-08-29 15:32:11 +0000195 process_workloads(*_workloads, *_feeder, _info);
Anthony Barbier871448e2017-03-24 14:54:29 +0000196 }
197 catch(...)
198 {
199 _current_exception = std::current_exception();
200 }
Kaizen8938bd32017-09-28 14:38:23 +0100201
202 _job_complete = true;
203 lock.unlock();
204 _cv.notify_one();
Anthony Barbier871448e2017-03-24 14:54:29 +0000205 }
Anthony Barbier871448e2017-03-24 14:54:29 +0000206}
207
Anthony Barbier871448e2017-03-24 14:54:29 +0000208CPPScheduler &CPPScheduler::get()
209{
210 static CPPScheduler scheduler;
211 return scheduler;
212}
213
214CPPScheduler::CPPScheduler()
Anthony Barbier06ea0482018-02-22 15:45:35 +0000215 : _num_threads(num_threads_hint()),
Kaizen8938bd32017-09-28 14:38:23 +0100216 _threads(_num_threads - 1)
Anthony Barbier871448e2017-03-24 14:54:29 +0000217{
Anthony Barbier871448e2017-03-24 14:54:29 +0000218}
219
Anthony Barbierdbdab852017-06-23 15:42:00 +0100220void CPPScheduler::set_num_threads(unsigned int num_threads)
Anthony Barbier871448e2017-03-24 14:54:29 +0000221{
Jenkinsc3f34a42018-03-02 12:38:09 +0000222 _num_threads = num_threads == 0 ? num_threads_hint() : num_threads;
Kaizen8938bd32017-09-28 14:38:23 +0100223 _threads.resize(_num_threads - 1);
224}
225
226unsigned int CPPScheduler::num_threads() const
227{
228 return _num_threads;
Anthony Barbier871448e2017-03-24 14:54:29 +0000229}
230
Jenkinsb9abeae2018-11-22 11:58:08 +0000231#ifndef DOXYGEN_SKIP_THIS
Jenkins52ba29e2018-08-29 15:32:11 +0000232void CPPScheduler::run_workloads(std::vector<IScheduler::Workload> &workloads)
233{
234 const unsigned int num_threads = std::min(_num_threads, static_cast<unsigned int>(workloads.size()));
235 if(num_threads < 1)
236 {
237 return;
238 }
239 ThreadFeeder feeder(num_threads, workloads.size());
240 ThreadInfo info;
241 info.cpu_info = &_cpu_info;
242 info.num_threads = num_threads;
243 unsigned int t = 0;
244 auto thread_it = _threads.begin();
245 for(; t < num_threads - 1; ++t, ++thread_it)
246 {
247 info.thread_id = t;
248 thread_it->start(&workloads, feeder, info);
249 }
250
251 info.thread_id = t;
252 process_workloads(workloads, feeder, info);
253
254 try
255 {
256 for(auto &thread : _threads)
257 {
258 thread.wait();
259 }
260 }
261 catch(const std::system_error &e)
262 {
263 std::cerr << "Caught system_error with code " << e.code() << " meaning " << e.what() << '\n';
264 }
265}
Jenkinsb9abeae2018-11-22 11:58:08 +0000266#endif /* DOXYGEN_SKIP_THIS */
Jenkins52ba29e2018-08-29 15:32:11 +0000267
268void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
Anthony Barbier871448e2017-03-24 14:54:29 +0000269{
270 ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");
271
Anthony Barbierdbdab852017-06-23 15:42:00 +0100272 const Window &max_window = kernel->window();
Jenkins52ba29e2018-08-29 15:32:11 +0000273 const unsigned int num_iterations = max_window.num_iterations(hints.split_dimension());
274 const unsigned int num_threads = std::min(num_iterations, _num_threads);
Anthony Barbier871448e2017-03-24 14:54:29 +0000275
Kaizen8938bd32017-09-28 14:38:23 +0100276 if(num_iterations == 0)
Anthony Barbier871448e2017-03-24 14:54:29 +0000277 {
Kaizen8938bd32017-09-28 14:38:23 +0100278 return;
279 }
280
Jenkins52ba29e2018-08-29 15:32:11 +0000281 if(!kernel->is_parallelisable() || num_threads == 1)
Kaizen8938bd32017-09-28 14:38:23 +0100282 {
Jenkins52ba29e2018-08-29 15:32:11 +0000283 ThreadInfo info;
284 info.cpu_info = &_cpu_info;
Kaizen8938bd32017-09-28 14:38:23 +0100285 kernel->run(max_window, info);
Anthony Barbier871448e2017-03-24 14:54:29 +0000286 }
Anthony Barbier871448e2017-03-24 14:54:29 +0000287 else
288 {
Jenkins52ba29e2018-08-29 15:32:11 +0000289 unsigned int num_windows = 0;
290 switch(hints.strategy())
Kaizen8938bd32017-09-28 14:38:23 +0100291 {
Jenkins52ba29e2018-08-29 15:32:11 +0000292 case StrategyHint::STATIC:
293 num_windows = num_threads;
294 break;
295 case StrategyHint::DYNAMIC:
Anthony Barbier871448e2017-03-24 14:54:29 +0000296 {
Jenkins52ba29e2018-08-29 15:32:11 +0000297 // Make sure we don't use some windows which are too small as this might create some contention on the ThreadFeeder
298 const unsigned int max_iterations = static_cast<unsigned int>(_num_threads) * 3;
299 num_windows = num_iterations > max_iterations ? max_iterations : num_iterations;
300 break;
Anthony Barbier871448e2017-03-24 14:54:29 +0000301 }
Jenkins52ba29e2018-08-29 15:32:11 +0000302 default:
303 ARM_COMPUTE_ERROR("Unknown strategy");
Anthony Barbier871448e2017-03-24 14:54:29 +0000304 }
Jenkins52ba29e2018-08-29 15:32:11 +0000305 std::vector<IScheduler::Workload> workloads(num_windows);
306 for(unsigned int t = 0; t < num_windows; t++)
Anthony Barbier871448e2017-03-24 14:54:29 +0000307 {
Jenkins52ba29e2018-08-29 15:32:11 +0000308 //Capture 't' by copy, all the other variables by reference:
309 workloads[t] = [t, &hints, &max_window, &num_windows, &kernel](const ThreadInfo & info)
310 {
311 Window win = max_window.split_window(hints.split_dimension(), t, num_windows);
312 win.validate();
313 kernel->run(win, info);
314 };
Anthony Barbier871448e2017-03-24 14:54:29 +0000315 }
Jenkins52ba29e2018-08-29 15:32:11 +0000316 run_workloads(workloads);
Anthony Barbier871448e2017-03-24 14:54:29 +0000317 }
Anthony Barbier871448e2017-03-24 14:54:29 +0000318}
Kaizen8938bd32017-09-28 14:38:23 +0100319} // namespace arm_compute