blob: 5916bb46fde1ffee189abc8fdb3df437c5a31f7f [file] [log] [blame]
Anthony Barbier871448e2017-03-24 14:54:29 +00001/*
Anthony Barbier06ea0482018-02-22 15:45:35 +00002 * Copyright (c) 2016-2018 ARM Limited.
Anthony Barbier871448e2017-03-24 14:54:29 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CPP/CPPScheduler.h"
25
26#include "arm_compute/core/CPP/ICPPKernel.h"
27#include "arm_compute/core/Error.h"
28#include "arm_compute/core/Helpers.h"
29#include "arm_compute/core/Utils.h"
Jenkinsb3a371b2018-05-23 11:36:53 +010030#include "arm_compute/runtime/CPUUtils.h"
Anthony Barbier871448e2017-03-24 14:54:29 +000031
Jenkins52ba29e2018-08-29 15:32:11 +000032#include <atomic>
Kaizen8938bd32017-09-28 14:38:23 +010033#include <condition_variable>
Anthony Barbier871448e2017-03-24 14:54:29 +000034#include <iostream>
Kaizen8938bd32017-09-28 14:38:23 +010035#include <mutex>
Anthony Barbier871448e2017-03-24 14:54:29 +000036#include <system_error>
37#include <thread>
38
Kaizen8938bd32017-09-28 14:38:23 +010039namespace arm_compute
40{
Jenkins52ba29e2018-08-29 15:32:11 +000041namespace
42{
43class ThreadFeeder
44{
45public:
46 /** Constructor
47 *
48 * @param[in] start First value that will be returned by the feeder
49 * @param[in] end End condition (The last value returned by get_next() will be end - 1)
50 */
51 explicit ThreadFeeder(unsigned int start = 0, unsigned int end = 0)
52 : _atomic_counter(start), _end(end)
53 {
54 }
55 /** Return the next element in the range if there is one.
56 *
57 * @param[out] next Will contain the next element if there is one.
58 *
59 * @return False if the end of the range has been reached and next wasn't set.
60 */
61 bool get_next(unsigned int &next)
62 {
63 next = atomic_fetch_add_explicit(&_atomic_counter, 1u, std::memory_order_relaxed);
64 return next < _end;
65 }
66
67private:
68 std::atomic_uint _atomic_counter;
69 const unsigned int _end;
70};
71
72/** Execute workloads[info.thread_id] first, then call the feeder to get the index of the next workload to run.
73 *
74 * Will run workloads until the feeder reaches the end of its range.
75 *
76 * @param[in] workloads The array of workloads
77 * @param[in,out] feeder The feeder indicating which workload to execute next.
78 * @param[in] info Threading and CPU info.
79 */
80void process_workloads(std::vector<IScheduler::Workload> &workloads, ThreadFeeder &feeder, const ThreadInfo &info)
81{
82 unsigned int workload_index = info.thread_id;
83 do
84 {
85 ARM_COMPUTE_ERROR_ON(workload_index >= workloads.size());
86 workloads[workload_index](info);
87 }
88 while(feeder.get_next(workload_index));
89}
90
91} //namespace
92
93class CPPScheduler::Thread
Anthony Barbier871448e2017-03-24 14:54:29 +000094{
95public:
Kaizen8938bd32017-09-28 14:38:23 +010096 /** Start a new thread. */
Anthony Barbier871448e2017-03-24 14:54:29 +000097 Thread();
Kaizen8938bd32017-09-28 14:38:23 +010098
Anthony Barbier871448e2017-03-24 14:54:29 +000099 Thread(const Thread &) = delete;
100 Thread &operator=(const Thread &) = delete;
101 Thread(Thread &&) = delete;
102 Thread &operator=(Thread &&) = delete;
Kaizen8938bd32017-09-28 14:38:23 +0100103
104 /** Destructor. Make the thread join. */
Anthony Barbier871448e2017-03-24 14:54:29 +0000105 ~Thread();
Kaizen8938bd32017-09-28 14:38:23 +0100106
Jenkins52ba29e2018-08-29 15:32:11 +0000107 /** Request the worker thread to start executing workloads.
108 *
109 * The thread will start by executing workloads[info.thread_id] and will then call the feeder to
110 * get the index of the following workload to run.
111 *
112 * @note This function will return as soon as the workloads have been sent to the worker thread.
Anthony Barbier871448e2017-03-24 14:54:29 +0000113 * wait() needs to be called to ensure the execution is complete.
114 */
Jenkins52ba29e2018-08-29 15:32:11 +0000115 void start(std::vector<IScheduler::Workload> *workloads, ThreadFeeder &feeder, const ThreadInfo &info);
Kaizen8938bd32017-09-28 14:38:23 +0100116
117 /** Wait for the current kernel execution to complete. */
Anthony Barbier871448e2017-03-24 14:54:29 +0000118 void wait();
Kaizen8938bd32017-09-28 14:38:23 +0100119
120 /** Function ran by the worker thread. */
Anthony Barbier871448e2017-03-24 14:54:29 +0000121 void worker_thread();
122
123private:
Jenkins52ba29e2018-08-29 15:32:11 +0000124 std::thread _thread{};
125 ThreadInfo _info{};
126 std::vector<IScheduler::Workload> *_workloads{ nullptr };
127 ThreadFeeder *_feeder{ nullptr };
128 std::mutex _m{};
129 std::condition_variable _cv{};
130 bool _wait_for_work{ false };
131 bool _job_complete{ true };
132 std::exception_ptr _current_exception{ nullptr };
Anthony Barbier871448e2017-03-24 14:54:29 +0000133};
134
Jenkins52ba29e2018-08-29 15:32:11 +0000135CPPScheduler::Thread::Thread()
Anthony Barbier871448e2017-03-24 14:54:29 +0000136{
Anthony Barbier871448e2017-03-24 14:54:29 +0000137 _thread = std::thread(&Thread::worker_thread, this);
138}
139
Jenkins52ba29e2018-08-29 15:32:11 +0000140CPPScheduler::Thread::~Thread()
Anthony Barbier871448e2017-03-24 14:54:29 +0000141{
Kaizen8938bd32017-09-28 14:38:23 +0100142 // Make sure worker thread has ended
143 if(_thread.joinable())
144 {
Jenkins52ba29e2018-08-29 15:32:11 +0000145 ThreadFeeder feeder;
146 start(nullptr, feeder, ThreadInfo());
Kaizen8938bd32017-09-28 14:38:23 +0100147 _thread.join();
148 }
Anthony Barbier871448e2017-03-24 14:54:29 +0000149}
150
Jenkins52ba29e2018-08-29 15:32:11 +0000151void CPPScheduler::Thread::start(std::vector<IScheduler::Workload> *workloads, ThreadFeeder &feeder, const ThreadInfo &info)
Anthony Barbier871448e2017-03-24 14:54:29 +0000152{
Jenkins52ba29e2018-08-29 15:32:11 +0000153 _workloads = workloads;
154 _feeder = &feeder;
155 _info = info;
Kaizen8938bd32017-09-28 14:38:23 +0100156 {
157 std::lock_guard<std::mutex> lock(_m);
158 _wait_for_work = true;
159 _job_complete = false;
160 }
161 _cv.notify_one();
Anthony Barbier871448e2017-03-24 14:54:29 +0000162}
163
Jenkins52ba29e2018-08-29 15:32:11 +0000164void CPPScheduler::Thread::wait()
Anthony Barbier871448e2017-03-24 14:54:29 +0000165{
Kaizen8938bd32017-09-28 14:38:23 +0100166 {
167 std::unique_lock<std::mutex> lock(_m);
168 _cv.wait(lock, [&] { return _job_complete; });
169 }
170
Anthony Barbier871448e2017-03-24 14:54:29 +0000171 if(_current_exception)
172 {
173 std::rethrow_exception(_current_exception);
174 }
175}
176
Jenkins52ba29e2018-08-29 15:32:11 +0000177void CPPScheduler::Thread::worker_thread()
Anthony Barbier871448e2017-03-24 14:54:29 +0000178{
Kaizen8938bd32017-09-28 14:38:23 +0100179 while(true)
Anthony Barbier871448e2017-03-24 14:54:29 +0000180 {
Kaizen8938bd32017-09-28 14:38:23 +0100181 std::unique_lock<std::mutex> lock(_m);
182 _cv.wait(lock, [&] { return _wait_for_work; });
183 _wait_for_work = false;
184
Anthony Barbier871448e2017-03-24 14:54:29 +0000185 _current_exception = nullptr;
Kaizen8938bd32017-09-28 14:38:23 +0100186
Anthony Barbier871448e2017-03-24 14:54:29 +0000187 // Time to exit
Jenkins52ba29e2018-08-29 15:32:11 +0000188 if(_workloads == nullptr)
Anthony Barbier871448e2017-03-24 14:54:29 +0000189 {
190 return;
191 }
192
Jenkins514be652019-02-28 12:25:18 +0000193#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
Anthony Barbier871448e2017-03-24 14:54:29 +0000194 try
195 {
Jenkins514be652019-02-28 12:25:18 +0000196#endif /* ARM_COMPUTE_EXCEPTIONS_ENABLED */
Jenkins52ba29e2018-08-29 15:32:11 +0000197 process_workloads(*_workloads, *_feeder, _info);
Jenkins514be652019-02-28 12:25:18 +0000198
199#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
Anthony Barbier871448e2017-03-24 14:54:29 +0000200 }
201 catch(...)
202 {
203 _current_exception = std::current_exception();
204 }
Jenkins514be652019-02-28 12:25:18 +0000205#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */
Kaizen8938bd32017-09-28 14:38:23 +0100206 _job_complete = true;
207 lock.unlock();
208 _cv.notify_one();
Anthony Barbier871448e2017-03-24 14:54:29 +0000209 }
Anthony Barbier871448e2017-03-24 14:54:29 +0000210}
211
Anthony Barbier871448e2017-03-24 14:54:29 +0000212CPPScheduler &CPPScheduler::get()
213{
214 static CPPScheduler scheduler;
215 return scheduler;
216}
217
218CPPScheduler::CPPScheduler()
Anthony Barbier06ea0482018-02-22 15:45:35 +0000219 : _num_threads(num_threads_hint()),
Kaizen8938bd32017-09-28 14:38:23 +0100220 _threads(_num_threads - 1)
Anthony Barbier871448e2017-03-24 14:54:29 +0000221{
Anthony Barbier871448e2017-03-24 14:54:29 +0000222}
223
Anthony Barbierdbdab852017-06-23 15:42:00 +0100224void CPPScheduler::set_num_threads(unsigned int num_threads)
Anthony Barbier871448e2017-03-24 14:54:29 +0000225{
Jenkinsc3f34a42018-03-02 12:38:09 +0000226 _num_threads = num_threads == 0 ? num_threads_hint() : num_threads;
Kaizen8938bd32017-09-28 14:38:23 +0100227 _threads.resize(_num_threads - 1);
228}
229
230unsigned int CPPScheduler::num_threads() const
231{
232 return _num_threads;
Anthony Barbier871448e2017-03-24 14:54:29 +0000233}
234
Jenkinsb9abeae2018-11-22 11:58:08 +0000235#ifndef DOXYGEN_SKIP_THIS
Jenkins52ba29e2018-08-29 15:32:11 +0000236void CPPScheduler::run_workloads(std::vector<IScheduler::Workload> &workloads)
237{
238 const unsigned int num_threads = std::min(_num_threads, static_cast<unsigned int>(workloads.size()));
239 if(num_threads < 1)
240 {
241 return;
242 }
243 ThreadFeeder feeder(num_threads, workloads.size());
244 ThreadInfo info;
245 info.cpu_info = &_cpu_info;
246 info.num_threads = num_threads;
247 unsigned int t = 0;
248 auto thread_it = _threads.begin();
249 for(; t < num_threads - 1; ++t, ++thread_it)
250 {
251 info.thread_id = t;
252 thread_it->start(&workloads, feeder, info);
253 }
254
255 info.thread_id = t;
256 process_workloads(workloads, feeder, info);
Jenkins514be652019-02-28 12:25:18 +0000257#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
Jenkins52ba29e2018-08-29 15:32:11 +0000258 try
259 {
Jenkins514be652019-02-28 12:25:18 +0000260#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */
Jenkins52ba29e2018-08-29 15:32:11 +0000261 for(auto &thread : _threads)
262 {
263 thread.wait();
264 }
Jenkins514be652019-02-28 12:25:18 +0000265#ifndef ARM_COMPUTE_EXCEPTIONS_DISABLED
Jenkins52ba29e2018-08-29 15:32:11 +0000266 }
267 catch(const std::system_error &e)
268 {
269 std::cerr << "Caught system_error with code " << e.code() << " meaning " << e.what() << '\n';
270 }
Jenkins514be652019-02-28 12:25:18 +0000271#endif /* ARM_COMPUTE_EXCEPTIONS_DISABLED */
Jenkins52ba29e2018-08-29 15:32:11 +0000272}
Jenkinsb9abeae2018-11-22 11:58:08 +0000273#endif /* DOXYGEN_SKIP_THIS */
Jenkins52ba29e2018-08-29 15:32:11 +0000274
275void CPPScheduler::schedule(ICPPKernel *kernel, const Hints &hints)
Anthony Barbier871448e2017-03-24 14:54:29 +0000276{
277 ARM_COMPUTE_ERROR_ON_MSG(!kernel, "The child class didn't set the kernel");
278
Anthony Barbierdbdab852017-06-23 15:42:00 +0100279 const Window &max_window = kernel->window();
Jenkins52ba29e2018-08-29 15:32:11 +0000280 const unsigned int num_iterations = max_window.num_iterations(hints.split_dimension());
281 const unsigned int num_threads = std::min(num_iterations, _num_threads);
Anthony Barbier871448e2017-03-24 14:54:29 +0000282
Kaizen8938bd32017-09-28 14:38:23 +0100283 if(num_iterations == 0)
Anthony Barbier871448e2017-03-24 14:54:29 +0000284 {
Kaizen8938bd32017-09-28 14:38:23 +0100285 return;
286 }
287
Jenkins52ba29e2018-08-29 15:32:11 +0000288 if(!kernel->is_parallelisable() || num_threads == 1)
Kaizen8938bd32017-09-28 14:38:23 +0100289 {
Jenkins52ba29e2018-08-29 15:32:11 +0000290 ThreadInfo info;
291 info.cpu_info = &_cpu_info;
Kaizen8938bd32017-09-28 14:38:23 +0100292 kernel->run(max_window, info);
Anthony Barbier871448e2017-03-24 14:54:29 +0000293 }
Anthony Barbier871448e2017-03-24 14:54:29 +0000294 else
295 {
Jenkins52ba29e2018-08-29 15:32:11 +0000296 unsigned int num_windows = 0;
297 switch(hints.strategy())
Kaizen8938bd32017-09-28 14:38:23 +0100298 {
Jenkins52ba29e2018-08-29 15:32:11 +0000299 case StrategyHint::STATIC:
300 num_windows = num_threads;
301 break;
302 case StrategyHint::DYNAMIC:
Anthony Barbier871448e2017-03-24 14:54:29 +0000303 {
Jenkins52ba29e2018-08-29 15:32:11 +0000304 // Make sure we don't use some windows which are too small as this might create some contention on the ThreadFeeder
305 const unsigned int max_iterations = static_cast<unsigned int>(_num_threads) * 3;
306 num_windows = num_iterations > max_iterations ? max_iterations : num_iterations;
307 break;
Anthony Barbier871448e2017-03-24 14:54:29 +0000308 }
Jenkins52ba29e2018-08-29 15:32:11 +0000309 default:
310 ARM_COMPUTE_ERROR("Unknown strategy");
Anthony Barbier871448e2017-03-24 14:54:29 +0000311 }
Jenkins52ba29e2018-08-29 15:32:11 +0000312 std::vector<IScheduler::Workload> workloads(num_windows);
313 for(unsigned int t = 0; t < num_windows; t++)
Anthony Barbier871448e2017-03-24 14:54:29 +0000314 {
Jenkins52ba29e2018-08-29 15:32:11 +0000315 //Capture 't' by copy, all the other variables by reference:
316 workloads[t] = [t, &hints, &max_window, &num_windows, &kernel](const ThreadInfo & info)
317 {
318 Window win = max_window.split_window(hints.split_dimension(), t, num_windows);
319 win.validate();
320 kernel->run(win, info);
321 };
Anthony Barbier871448e2017-03-24 14:54:29 +0000322 }
Jenkins52ba29e2018-08-29 15:32:11 +0000323 run_workloads(workloads);
Anthony Barbier871448e2017-03-24 14:54:29 +0000324 }
Anthony Barbier871448e2017-03-24 14:54:29 +0000325}
Kaizen8938bd32017-09-28 14:38:23 +0100326} // namespace arm_compute