blob: d517af2262c0fee479981cd19602322707278b29 [file] [log] [blame]
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001/*
2 * kmp_wait_release.h -- Wait/Release implementation
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#ifndef KMP_WAIT_RELEASE_H
17#define KMP_WAIT_RELEASE_H
18
19#include "kmp.h"
20#include "kmp_itt.h"
21
22/*!
23@defgroup WAIT_RELEASE Wait/Release operations
24
25The definitions and functions here implement the lowest level thread
26synchronizations of suspending a thread and awaking it. They are used
27to build higher level operations such as barriers and fork/join.
28*/
29
30/*!
31@ingroup WAIT_RELEASE
32@{
33*/
34
35/*!
36 * The flag_type describes the storage used for the flag.
37 */
38enum flag_type {
39 flag32, /**< 32 bit flags */
40 flag64, /**< 64 bit flags */
41 flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
42};
43
44/*!
45 * Base class for wait/release volatile flag
46 */
47template <typename P>
48class kmp_flag {
49 volatile P * loc; /**< Pointer to the flag storage that is modified by another thread */
50 flag_type t; /**< "Type" of the flag in loc */
51 public:
52 typedef P flag_t;
53 kmp_flag(volatile P *p, flag_type ft) : loc(p), t(ft) {}
54 /*!
55 * @result the pointer to the actual flag
56 */
57 volatile P * get() { return loc; }
58 /*!
59 * @result the flag_type
60 */
61 flag_type get_type() { return t; }
62 // Derived classes must provide the following:
63 /*
64 kmp_info_t * get_waiter(kmp_uint32 i);
65 kmp_uint32 get_num_waiters();
66 bool done_check();
67 bool done_check_val(P old_loc);
68 bool notdone_check();
69 P internal_release();
70 P set_sleeping();
71 P unset_sleeping();
72 bool is_sleeping();
73 bool is_sleeping_val(P old_loc);
74 */
75};
76
77/* Spin wait loop that first does pause, then yield, then sleep. A thread that calls __kmp_wait_*
78 must make certain that another thread calls __kmp_release to wake it back up to prevent deadlocks! */
79template <class C>
80static inline void __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_spin
81 USE_ITT_BUILD_ARG(void * itt_sync_obj) )
82{
83 // NOTE: We may not belong to a team at this point.
84 volatile typename C::flag_t *spin = flag->get();
85 kmp_uint32 spins;
86 kmp_uint32 hibernate;
87 int th_gtid;
88 int tasks_completed = FALSE;
89
90 KMP_FSYNC_SPIN_INIT(spin, NULL);
91 if (flag->done_check()) {
92 KMP_FSYNC_SPIN_ACQUIRED(spin);
93 return;
94 }
95 th_gtid = this_thr->th.th_info.ds.ds_gtid;
96 KA_TRACE(20, ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
97
98 // Setup for waiting
99 KMP_INIT_YIELD(spins);
100
101 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
102 // The worker threads cannot rely on the team struct existing at this point.
103 // Use the bt values cached in the thread struct instead.
104#ifdef KMP_ADJUST_BLOCKTIME
105 if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
106 // Force immediate suspend if not set by user and more threads than available procs
107 hibernate = 0;
108 else
109 hibernate = this_thr->th.th_team_bt_intervals;
110#else
111 hibernate = this_thr->th.th_team_bt_intervals;
112#endif /* KMP_ADJUST_BLOCKTIME */
113
114 /* If the blocktime is nonzero, we want to make sure that we spin wait for the entirety
115 of the specified #intervals, plus up to one interval more. This increment make
116 certain that this thread doesn't go to sleep too soon. */
117 if (hibernate != 0)
118 hibernate++;
119
120 // Add in the current time value.
121 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
122 KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
123 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
124 hibernate - __kmp_global.g.g_time.dt.t_value));
125 }
126 KMP_MB();
127
128 // Main wait spin loop
129 while (flag->notdone_check()) {
130 int in_pool;
131
132 /* If the task team is NULL, it means one of things:
133 1) A newly-created thread is first being released by __kmp_fork_barrier(), and
134 its task team has not been set up yet.
135 2) All tasks have been executed to completion, this thread has decremented the task
136 team's ref ct and possibly deallocated it, and should no longer reference it.
137 3) Tasking is off for this region. This could be because we are in a serialized region
138 (perhaps the outer one), or else tasking was manually disabled (KMP_TASKING=0). */
139 kmp_task_team_t * task_team = NULL;
140 if (__kmp_tasking_mode != tskm_immediate_exec) {
141 task_team = this_thr->th.th_task_team;
142 if (task_team != NULL) {
143 if (!TCR_SYNC_4(task_team->tt.tt_active)) {
144 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
145 __kmp_unref_task_team(task_team, this_thr);
Andrey Churbanov6d224db2015-02-10 18:37:43 +0000146 } else if (KMP_TASKING_ENABLED(task_team)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000147 flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
148 USE_ITT_BUILD_ARG(itt_sync_obj), 0);
149 }
150 } // if
151 } // if
152
153 KMP_FSYNC_SPIN_PREPARE(spin);
154 if (TCR_4(__kmp_global.g.g_done)) {
155 if (__kmp_global.g.g_abort)
156 __kmp_abort_thread();
157 break;
158 }
159
160 // If we are oversubscribed, or have waited a bit (and KMP_LIBRARY=throughput), then yield
161 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
162 // TODO: Should it be number of cores instead of thread contexts? Like:
163 // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
164 // Need performance improvement data to make the change...
165 KMP_YIELD_SPIN(spins);
166
167 // Check if this thread was transferred from a team
168 // to the thread pool (or vice-versa) while spinning.
169 in_pool = !!TCR_4(this_thr->th.th_in_pool);
170 if (in_pool != !!this_thr->th.th_active_in_pool) {
171 if (in_pool) { // Recently transferred from team to pool
172 KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth);
173 this_thr->th.th_active_in_pool = TRUE;
174 /* Here, we cannot assert that:
175 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <= __kmp_thread_pool_nth);
176 __kmp_thread_pool_nth is inc/dec'd by the master thread while the fork/join
177 lock is held, whereas __kmp_thread_pool_active_nth is inc/dec'd asynchronously
178 by the workers. The two can get out of sync for brief periods of time. */
179 }
180 else { // Recently transferred from pool to team
181 KMP_TEST_THEN_DEC32((kmp_int32 *) &__kmp_thread_pool_active_nth);
182 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
183 this_thr->th.th_active_in_pool = FALSE;
184 }
185 }
186
187 // Don't suspend if KMP_BLOCKTIME is set to "infinite"
188 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
189 continue;
190
191 // Don't suspend if there is a likelihood of new tasks being spawned.
192 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
193 continue;
194
195 // If we have waited a bit more, fall asleep
196 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
197 continue;
198
199 KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
200
201 flag->suspend(th_gtid);
202
203 if (TCR_4(__kmp_global.g.g_done)) {
204 if (__kmp_global.g.g_abort)
205 __kmp_abort_thread();
206 break;
207 }
208 // TODO: If thread is done with work and times out, disband/free
209 }
210 KMP_FSYNC_SPIN_ACQUIRED(spin);
211}
212
213/* Release any threads specified as waiting on the flag by releasing the flag and resume the waiting thread
214 if indicated by the sleep bit(s). A thread that calls __kmp_wait_template must call this function to wake
215 up the potentially sleeping thread and prevent deadlocks! */
216template <class C>
217static inline void __kmp_release_template(C *flag)
218{
219#ifdef KMP_DEBUG
220 // FIX ME
221 kmp_info_t * wait_thr = flag->get_waiter(0);
222 int target_gtid = wait_thr->th.th_info.ds.ds_gtid;
223 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
224#endif
225 KF_TRACE(20, ("__kmp_release: T#%d releasing T#%d spin(%p)\n", gtid, target_gtid, flag->get()));
226 KMP_DEBUG_ASSERT(flag->get());
227 KMP_FSYNC_RELEASING(flag->get());
228
229 typename C::flag_t old_spin = flag->internal_release();
230
231 KF_TRACE(100, ("__kmp_release: T#%d old spin(%p)=%d, set new spin=%d\n",
232 gtid, flag->get(), old_spin, *(flag->get())));
233
234 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
235 // Only need to check sleep stuff if infinite block time not set
236 if (flag->is_sleeping_val(old_spin)) {
237 for (unsigned int i=0; i<flag->get_num_waiters(); ++i) {
238 kmp_info_t * waiter = flag->get_waiter(i);
239 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
240 // Wake up thread if needed
241 KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep spin(%p) set\n",
242 gtid, wait_gtid, flag->get()));
243 flag->resume(wait_gtid);
244 }
245 } else {
246 KF_TRACE(50, ("__kmp_release: T#%d don't wake up thread T#%d since sleep spin(%p) not set\n",
247 gtid, target_gtid, flag->get()));
248 }
249 }
250}
251
252template <typename FlagType>
253struct flag_traits {};
254
255template <>
256struct flag_traits<kmp_uint32> {
257 typedef kmp_uint32 flag_t;
258 static const flag_type t = flag32;
259 static inline flag_t tcr(flag_t f) { return TCR_4(f); }
260 static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_32((volatile kmp_int32 *)f); }
261 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR32((volatile kmp_int32 *)f, v); }
262 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND32((volatile kmp_int32 *)f, v); }
263};
264
265template <>
266struct flag_traits<kmp_uint64> {
267 typedef kmp_uint64 flag_t;
268 static const flag_type t = flag64;
269 static inline flag_t tcr(flag_t f) { return TCR_8(f); }
270 static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_64((volatile kmp_int64 *)f); }
271 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR64((volatile kmp_int64 *)f, v); }
272 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND64((volatile kmp_int64 *)f, v); }
273};
274
275template <typename FlagType>
276class kmp_basic_flag : public kmp_flag<FlagType> {
277 typedef flag_traits<FlagType> traits_type;
278 FlagType checker; /**< Value to compare flag to to check if flag has been released. */
279 kmp_info_t * waiting_threads[1]; /**< Array of threads sleeping on this thread. */
280 kmp_uint32 num_waiting_threads; /**< Number of threads sleeping on this thread. */
281public:
282 kmp_basic_flag(volatile FlagType *p) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
283 kmp_basic_flag(volatile FlagType *p, kmp_info_t *thr) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
284 waiting_threads[0] = thr;
285 }
286 kmp_basic_flag(volatile FlagType *p, FlagType c) : kmp_flag<FlagType>(p, traits_type::t), checker(c), num_waiting_threads(0) {}
287 /*!
288 * param i in index into waiting_threads
289 * @result the thread that is waiting at index i
290 */
291 kmp_info_t * get_waiter(kmp_uint32 i) {
292 KMP_DEBUG_ASSERT(i<num_waiting_threads);
293 return waiting_threads[i];
294 }
295 /*!
296 * @result num_waiting_threads
297 */
298 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
299 /*!
300 * @param thr in the thread which is now waiting
301 *
302 * Insert a waiting thread at index 0.
303 */
304 void set_waiter(kmp_info_t *thr) {
305 waiting_threads[0] = thr;
306 num_waiting_threads = 1;
307 }
308 /*!
309 * @result true if the flag object has been released.
310 */
311 bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
312 /*!
313 * @param old_loc in old value of flag
314 * @result true if the flag's old value indicates it was released.
315 */
316 bool done_check_val(FlagType old_loc) { return old_loc == checker; }
317 /*!
318 * @result true if the flag object is not yet released.
319 * Used in __kmp_wait_template like:
320 * @code
321 * while (flag.notdone_check()) { pause(); }
322 * @endcode
323 */
324 bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
325 /*!
326 * @result Actual flag value before release was applied.
327 * Trigger all waiting threads to run by modifying flag to release state.
328 */
329 FlagType internal_release() {
330 return traits_type::test_then_add4((volatile FlagType *)this->get());
331 }
332 /*!
333 * @result Actual flag value before sleep bit(s) set.
334 * Notes that there is at least one thread sleeping on the flag by setting sleep bit(s).
335 */
336 FlagType set_sleeping() {
337 return traits_type::test_then_or((volatile FlagType *)this->get(), KMP_BARRIER_SLEEP_STATE);
338 }
339 /*!
340 * @result Actual flag value before sleep bit(s) cleared.
341 * Notes that there are no longer threads sleeping on the flag by clearing sleep bit(s).
342 */
343 FlagType unset_sleeping() {
344 return traits_type::test_then_and((volatile FlagType *)this->get(), ~KMP_BARRIER_SLEEP_STATE);
345 }
346 /*!
347 * @param old_loc in old value of flag
348 * Test whether there are threads sleeping on the flag's old value in old_loc.
349 */
350 bool is_sleeping_val(FlagType old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
351 /*!
352 * Test whether there are threads sleeping on the flag.
353 */
354 bool is_sleeping() { return is_sleeping_val(*(this->get())); }
355};
356
357class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
358public:
359 kmp_flag_32(volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {}
360 kmp_flag_32(volatile kmp_uint32 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint32>(p, thr) {}
361 kmp_flag_32(volatile kmp_uint32 *p, kmp_uint32 c) : kmp_basic_flag<kmp_uint32>(p, c) {}
362 void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
363 void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
364 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
365 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
366 return __kmp_execute_tasks_32(this_thr, gtid, this, final_spin, thread_finished
367 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
368 }
369 void wait(kmp_info_t *this_thr, int final_spin
370 USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
371 __kmp_wait_template(this_thr, this, final_spin
372 USE_ITT_BUILD_ARG(itt_sync_obj));
373 }
374 void release() { __kmp_release_template(this); }
375};
376
377class kmp_flag_64 : public kmp_basic_flag<kmp_uint64> {
378public:
379 kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {}
380 kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint64>(p, thr) {}
381 kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) : kmp_basic_flag<kmp_uint64>(p, c) {}
382 void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
383 void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
384 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
385 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
386 return __kmp_execute_tasks_64(this_thr, gtid, this, final_spin, thread_finished
387 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
388 }
389 void wait(kmp_info_t *this_thr, int final_spin
390 USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
391 __kmp_wait_template(this_thr, this, final_spin
392 USE_ITT_BUILD_ARG(itt_sync_obj));
393 }
394 void release() { __kmp_release_template(this); }
395};
396
397// Hierarchical 64-bit on-core barrier instantiation
398class kmp_flag_oncore : public kmp_flag<kmp_uint64> {
399 kmp_uint64 checker;
400 kmp_info_t * waiting_threads[1];
401 kmp_uint32 num_waiting_threads;
402 kmp_uint32 offset; /**< Portion of flag that is of interest for an operation. */
403 bool flag_switch; /**< Indicates a switch in flag location. */
404 enum barrier_type bt; /**< Barrier type. */
405 kmp_info_t * this_thr; /**< Thread that may be redirected to different flag location. */
406#if USE_ITT_BUILD
407 void *itt_sync_obj; /**< ITT object that must be passed to new flag location. */
408#endif
409 char& byteref(volatile kmp_uint64* loc, size_t offset) { return ((char *)loc)[offset]; }
410public:
411 kmp_flag_oncore(volatile kmp_uint64 *p)
412 : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), flag_switch(false) {}
413 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
414 : kmp_flag<kmp_uint64>(p, flag_oncore), offset(idx), num_waiting_threads(0), flag_switch(false) {}
415 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, enum barrier_type bar_t,
416 kmp_info_t * thr
417#if USE_ITT_BUILD
418 , void *itt
419#endif
420 )
421 : kmp_flag<kmp_uint64>(p, flag_oncore), checker(c), offset(idx), bt(bar_t), this_thr(thr)
422#if USE_ITT_BUILD
423 , itt_sync_obj(itt)
424#endif
425 , num_waiting_threads(0), flag_switch(false) {}
426 kmp_info_t * get_waiter(kmp_uint32 i) {
427 KMP_DEBUG_ASSERT(i<num_waiting_threads);
428 return waiting_threads[i];
429 }
430 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
431 void set_waiter(kmp_info_t *thr) {
432 waiting_threads[0] = thr;
433 num_waiting_threads = 1;
434 }
435 bool done_check_val(kmp_uint64 old_loc) { return byteref(&old_loc,offset) == checker; }
436 bool done_check() { return done_check_val(*get()); }
437 bool notdone_check() {
438 // Calculate flag_switch
439 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
440 flag_switch = true;
441 if (byteref(get(),offset) != 1 && !flag_switch)
442 return true;
443 else if (flag_switch) {
444 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
445 kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, (kmp_uint64)KMP_BARRIER_STATE_BUMP);
446 __kmp_wait_64(this_thr, &flag, TRUE
447#if USE_ITT_BUILD
448 , itt_sync_obj
449#endif
450 );
451 }
452 return false;
453 }
454 kmp_uint64 internal_release() {
455 kmp_uint64 old_val;
456 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
457 old_val = *get();
458 byteref(get(),offset) = 1;
459 }
460 else {
461 kmp_uint64 mask=0;
462 byteref(&mask,offset) = 1;
463 old_val = KMP_TEST_THEN_OR64((volatile kmp_int64 *)get(), mask);
464 }
465 return old_val;
466 }
467 kmp_uint64 set_sleeping() {
468 return KMP_TEST_THEN_OR64((kmp_int64 volatile *)get(), KMP_BARRIER_SLEEP_STATE);
469 }
470 kmp_uint64 unset_sleeping() {
471 return KMP_TEST_THEN_AND64((kmp_int64 volatile *)get(), ~KMP_BARRIER_SLEEP_STATE);
472 }
473 bool is_sleeping_val(kmp_uint64 old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
474 bool is_sleeping() { return is_sleeping_val(*get()); }
475 void wait(kmp_info_t *this_thr, int final_spin
476 USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
477 __kmp_wait_template(this_thr, this, final_spin
478 USE_ITT_BUILD_ARG(itt_sync_obj));
479 }
480 void release() { __kmp_release_template(this); }
481 void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
482 void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
483 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
484 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
485 return __kmp_execute_tasks_oncore(this_thr, gtid, this, final_spin, thread_finished
486 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
487 }
488};
489
490/*!
491@}
492*/
493
494#endif // KMP_WAIT_RELEASE_H