blob: 599dcf0d772d0a928311e02d27bdb356d1ea0c4b [file] [log] [blame]
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001/*
2 * kmp_wait_release.h -- Wait/Release implementation
3 * $Revision: 43417 $
4 * $Date: 2014-08-26 14:06:38 -0500 (Tue, 26 Aug 2014) $
5 */
6
7
8//===----------------------------------------------------------------------===//
9//
10// The LLVM Compiler Infrastructure
11//
12// This file is dual licensed under the MIT and the University of Illinois Open
13// Source Licenses. See LICENSE.txt for details.
14//
15//===----------------------------------------------------------------------===//
16
17
18#ifndef KMP_WAIT_RELEASE_H
19#define KMP_WAIT_RELEASE_H
20
21#include "kmp.h"
22#include "kmp_itt.h"
23
24/*!
25@defgroup WAIT_RELEASE Wait/Release operations
26
27The definitions and functions here implement the lowest level thread
28synchronizations of suspending a thread and awaking it. They are used
29to build higher level operations such as barriers and fork/join.
30*/
31
32/*!
33@ingroup WAIT_RELEASE
34@{
35*/
36
37/*!
38 * The flag_type describes the storage used for the flag.
39 */
40enum flag_type {
41 flag32, /**< 32 bit flags */
42 flag64, /**< 64 bit flags */
43 flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
44};
45
46/*!
47 * Base class for wait/release volatile flag
48 */
49template <typename P>
50class kmp_flag {
51 volatile P * loc; /**< Pointer to the flag storage that is modified by another thread */
52 flag_type t; /**< "Type" of the flag in loc */
53 public:
54 typedef P flag_t;
55 kmp_flag(volatile P *p, flag_type ft) : loc(p), t(ft) {}
56 /*!
57 * @result the pointer to the actual flag
58 */
59 volatile P * get() { return loc; }
60 /*!
61 * @result the flag_type
62 */
63 flag_type get_type() { return t; }
64 // Derived classes must provide the following:
65 /*
66 kmp_info_t * get_waiter(kmp_uint32 i);
67 kmp_uint32 get_num_waiters();
68 bool done_check();
69 bool done_check_val(P old_loc);
70 bool notdone_check();
71 P internal_release();
72 P set_sleeping();
73 P unset_sleeping();
74 bool is_sleeping();
75 bool is_sleeping_val(P old_loc);
76 */
77};
78
79/* Spin wait loop that first does pause, then yield, then sleep. A thread that calls __kmp_wait_*
80 must make certain that another thread calls __kmp_release to wake it back up to prevent deadlocks! */
81template <class C>
82static inline void __kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_spin
83 USE_ITT_BUILD_ARG(void * itt_sync_obj) )
84{
85 // NOTE: We may not belong to a team at this point.
86 volatile typename C::flag_t *spin = flag->get();
87 kmp_uint32 spins;
88 kmp_uint32 hibernate;
89 int th_gtid;
90 int tasks_completed = FALSE;
91
92 KMP_FSYNC_SPIN_INIT(spin, NULL);
93 if (flag->done_check()) {
94 KMP_FSYNC_SPIN_ACQUIRED(spin);
95 return;
96 }
97 th_gtid = this_thr->th.th_info.ds.ds_gtid;
98 KA_TRACE(20, ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
99
100 // Setup for waiting
101 KMP_INIT_YIELD(spins);
102
103 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
104 // The worker threads cannot rely on the team struct existing at this point.
105 // Use the bt values cached in the thread struct instead.
106#ifdef KMP_ADJUST_BLOCKTIME
107 if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
108 // Force immediate suspend if not set by user and more threads than available procs
109 hibernate = 0;
110 else
111 hibernate = this_thr->th.th_team_bt_intervals;
112#else
113 hibernate = this_thr->th.th_team_bt_intervals;
114#endif /* KMP_ADJUST_BLOCKTIME */
115
116 /* If the blocktime is nonzero, we want to make sure that we spin wait for the entirety
117 of the specified #intervals, plus up to one interval more. This increment make
118 certain that this thread doesn't go to sleep too soon. */
119 if (hibernate != 0)
120 hibernate++;
121
122 // Add in the current time value.
123 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
124 KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
125 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
126 hibernate - __kmp_global.g.g_time.dt.t_value));
127 }
128 KMP_MB();
129
130 // Main wait spin loop
131 while (flag->notdone_check()) {
132 int in_pool;
133
134 /* If the task team is NULL, it means one of things:
135 1) A newly-created thread is first being released by __kmp_fork_barrier(), and
136 its task team has not been set up yet.
137 2) All tasks have been executed to completion, this thread has decremented the task
138 team's ref ct and possibly deallocated it, and should no longer reference it.
139 3) Tasking is off for this region. This could be because we are in a serialized region
140 (perhaps the outer one), or else tasking was manually disabled (KMP_TASKING=0). */
141 kmp_task_team_t * task_team = NULL;
142 if (__kmp_tasking_mode != tskm_immediate_exec) {
143 task_team = this_thr->th.th_task_team;
144 if (task_team != NULL) {
145 if (!TCR_SYNC_4(task_team->tt.tt_active)) {
146 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
147 __kmp_unref_task_team(task_team, this_thr);
148 } else if (KMP_TASKING_ENABLED(task_team, this_thr->th.th_task_state)) {
149 flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed
150 USE_ITT_BUILD_ARG(itt_sync_obj), 0);
151 }
152 } // if
153 } // if
154
155 KMP_FSYNC_SPIN_PREPARE(spin);
156 if (TCR_4(__kmp_global.g.g_done)) {
157 if (__kmp_global.g.g_abort)
158 __kmp_abort_thread();
159 break;
160 }
161
162 // If we are oversubscribed, or have waited a bit (and KMP_LIBRARY=throughput), then yield
163 KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc);
164 // TODO: Should it be number of cores instead of thread contexts? Like:
165 // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores);
166 // Need performance improvement data to make the change...
167 KMP_YIELD_SPIN(spins);
168
169 // Check if this thread was transferred from a team
170 // to the thread pool (or vice-versa) while spinning.
171 in_pool = !!TCR_4(this_thr->th.th_in_pool);
172 if (in_pool != !!this_thr->th.th_active_in_pool) {
173 if (in_pool) { // Recently transferred from team to pool
174 KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth);
175 this_thr->th.th_active_in_pool = TRUE;
176 /* Here, we cannot assert that:
177 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <= __kmp_thread_pool_nth);
178 __kmp_thread_pool_nth is inc/dec'd by the master thread while the fork/join
179 lock is held, whereas __kmp_thread_pool_active_nth is inc/dec'd asynchronously
180 by the workers. The two can get out of sync for brief periods of time. */
181 }
182 else { // Recently transferred from pool to team
183 KMP_TEST_THEN_DEC32((kmp_int32 *) &__kmp_thread_pool_active_nth);
184 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
185 this_thr->th.th_active_in_pool = FALSE;
186 }
187 }
188
189 // Don't suspend if KMP_BLOCKTIME is set to "infinite"
190 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
191 continue;
192
193 // Don't suspend if there is a likelihood of new tasks being spawned.
194 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
195 continue;
196
197 // If we have waited a bit more, fall asleep
198 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
199 continue;
200
201 KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
202
203 flag->suspend(th_gtid);
204
205 if (TCR_4(__kmp_global.g.g_done)) {
206 if (__kmp_global.g.g_abort)
207 __kmp_abort_thread();
208 break;
209 }
210 // TODO: If thread is done with work and times out, disband/free
211 }
212 KMP_FSYNC_SPIN_ACQUIRED(spin);
213}
214
215/* Release any threads specified as waiting on the flag by releasing the flag and resume the waiting thread
216 if indicated by the sleep bit(s). A thread that calls __kmp_wait_template must call this function to wake
217 up the potentially sleeping thread and prevent deadlocks! */
218template <class C>
219static inline void __kmp_release_template(C *flag)
220{
221#ifdef KMP_DEBUG
222 // FIX ME
223 kmp_info_t * wait_thr = flag->get_waiter(0);
224 int target_gtid = wait_thr->th.th_info.ds.ds_gtid;
225 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
226#endif
227 KF_TRACE(20, ("__kmp_release: T#%d releasing T#%d spin(%p)\n", gtid, target_gtid, flag->get()));
228 KMP_DEBUG_ASSERT(flag->get());
229 KMP_FSYNC_RELEASING(flag->get());
230
231 typename C::flag_t old_spin = flag->internal_release();
232
233 KF_TRACE(100, ("__kmp_release: T#%d old spin(%p)=%d, set new spin=%d\n",
234 gtid, flag->get(), old_spin, *(flag->get())));
235
236 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
237 // Only need to check sleep stuff if infinite block time not set
238 if (flag->is_sleeping_val(old_spin)) {
239 for (unsigned int i=0; i<flag->get_num_waiters(); ++i) {
240 kmp_info_t * waiter = flag->get_waiter(i);
241 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
242 // Wake up thread if needed
243 KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep spin(%p) set\n",
244 gtid, wait_gtid, flag->get()));
245 flag->resume(wait_gtid);
246 }
247 } else {
248 KF_TRACE(50, ("__kmp_release: T#%d don't wake up thread T#%d since sleep spin(%p) not set\n",
249 gtid, target_gtid, flag->get()));
250 }
251 }
252}
253
254template <typename FlagType>
255struct flag_traits {};
256
257template <>
258struct flag_traits<kmp_uint32> {
259 typedef kmp_uint32 flag_t;
260 static const flag_type t = flag32;
261 static inline flag_t tcr(flag_t f) { return TCR_4(f); }
262 static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_32((volatile kmp_int32 *)f); }
263 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR32((volatile kmp_int32 *)f, v); }
264 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND32((volatile kmp_int32 *)f, v); }
265};
266
267template <>
268struct flag_traits<kmp_uint64> {
269 typedef kmp_uint64 flag_t;
270 static const flag_type t = flag64;
271 static inline flag_t tcr(flag_t f) { return TCR_8(f); }
272 static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_64((volatile kmp_int64 *)f); }
273 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR64((volatile kmp_int64 *)f, v); }
274 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND64((volatile kmp_int64 *)f, v); }
275};
276
277template <typename FlagType>
278class kmp_basic_flag : public kmp_flag<FlagType> {
279 typedef flag_traits<FlagType> traits_type;
280 FlagType checker; /**< Value to compare flag to to check if flag has been released. */
281 kmp_info_t * waiting_threads[1]; /**< Array of threads sleeping on this thread. */
282 kmp_uint32 num_waiting_threads; /**< Number of threads sleeping on this thread. */
283public:
284 kmp_basic_flag(volatile FlagType *p) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
285 kmp_basic_flag(volatile FlagType *p, kmp_info_t *thr) : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
286 waiting_threads[0] = thr;
287 }
288 kmp_basic_flag(volatile FlagType *p, FlagType c) : kmp_flag<FlagType>(p, traits_type::t), checker(c), num_waiting_threads(0) {}
289 /*!
290 * param i in index into waiting_threads
291 * @result the thread that is waiting at index i
292 */
293 kmp_info_t * get_waiter(kmp_uint32 i) {
294 KMP_DEBUG_ASSERT(i<num_waiting_threads);
295 return waiting_threads[i];
296 }
297 /*!
298 * @result num_waiting_threads
299 */
300 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
301 /*!
302 * @param thr in the thread which is now waiting
303 *
304 * Insert a waiting thread at index 0.
305 */
306 void set_waiter(kmp_info_t *thr) {
307 waiting_threads[0] = thr;
308 num_waiting_threads = 1;
309 }
310 /*!
311 * @result true if the flag object has been released.
312 */
313 bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
314 /*!
315 * @param old_loc in old value of flag
316 * @result true if the flag's old value indicates it was released.
317 */
318 bool done_check_val(FlagType old_loc) { return old_loc == checker; }
319 /*!
320 * @result true if the flag object is not yet released.
321 * Used in __kmp_wait_template like:
322 * @code
323 * while (flag.notdone_check()) { pause(); }
324 * @endcode
325 */
326 bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
327 /*!
328 * @result Actual flag value before release was applied.
329 * Trigger all waiting threads to run by modifying flag to release state.
330 */
331 FlagType internal_release() {
332 return traits_type::test_then_add4((volatile FlagType *)this->get());
333 }
334 /*!
335 * @result Actual flag value before sleep bit(s) set.
336 * Notes that there is at least one thread sleeping on the flag by setting sleep bit(s).
337 */
338 FlagType set_sleeping() {
339 return traits_type::test_then_or((volatile FlagType *)this->get(), KMP_BARRIER_SLEEP_STATE);
340 }
341 /*!
342 * @result Actual flag value before sleep bit(s) cleared.
343 * Notes that there are no longer threads sleeping on the flag by clearing sleep bit(s).
344 */
345 FlagType unset_sleeping() {
346 return traits_type::test_then_and((volatile FlagType *)this->get(), ~KMP_BARRIER_SLEEP_STATE);
347 }
348 /*!
349 * @param old_loc in old value of flag
350 * Test whether there are threads sleeping on the flag's old value in old_loc.
351 */
352 bool is_sleeping_val(FlagType old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
353 /*!
354 * Test whether there are threads sleeping on the flag.
355 */
356 bool is_sleeping() { return is_sleeping_val(*(this->get())); }
357};
358
359class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
360public:
361 kmp_flag_32(volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {}
362 kmp_flag_32(volatile kmp_uint32 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint32>(p, thr) {}
363 kmp_flag_32(volatile kmp_uint32 *p, kmp_uint32 c) : kmp_basic_flag<kmp_uint32>(p, c) {}
364 void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
365 void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
366 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
367 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
368 return __kmp_execute_tasks_32(this_thr, gtid, this, final_spin, thread_finished
369 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
370 }
371 void wait(kmp_info_t *this_thr, int final_spin
372 USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
373 __kmp_wait_template(this_thr, this, final_spin
374 USE_ITT_BUILD_ARG(itt_sync_obj));
375 }
376 void release() { __kmp_release_template(this); }
377};
378
379class kmp_flag_64 : public kmp_basic_flag<kmp_uint64> {
380public:
381 kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {}
382 kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) : kmp_basic_flag<kmp_uint64>(p, thr) {}
383 kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) : kmp_basic_flag<kmp_uint64>(p, c) {}
384 void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
385 void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
386 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
387 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
388 return __kmp_execute_tasks_64(this_thr, gtid, this, final_spin, thread_finished
389 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
390 }
391 void wait(kmp_info_t *this_thr, int final_spin
392 USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
393 __kmp_wait_template(this_thr, this, final_spin
394 USE_ITT_BUILD_ARG(itt_sync_obj));
395 }
396 void release() { __kmp_release_template(this); }
397};
398
399// Hierarchical 64-bit on-core barrier instantiation
400class kmp_flag_oncore : public kmp_flag<kmp_uint64> {
401 kmp_uint64 checker;
402 kmp_info_t * waiting_threads[1];
403 kmp_uint32 num_waiting_threads;
404 kmp_uint32 offset; /**< Portion of flag that is of interest for an operation. */
405 bool flag_switch; /**< Indicates a switch in flag location. */
406 enum barrier_type bt; /**< Barrier type. */
407 kmp_info_t * this_thr; /**< Thread that may be redirected to different flag location. */
408#if USE_ITT_BUILD
409 void *itt_sync_obj; /**< ITT object that must be passed to new flag location. */
410#endif
411 char& byteref(volatile kmp_uint64* loc, size_t offset) { return ((char *)loc)[offset]; }
412public:
413 kmp_flag_oncore(volatile kmp_uint64 *p)
414 : kmp_flag<kmp_uint64>(p, flag_oncore), num_waiting_threads(0), flag_switch(false) {}
415 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
416 : kmp_flag<kmp_uint64>(p, flag_oncore), offset(idx), num_waiting_threads(0), flag_switch(false) {}
417 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, enum barrier_type bar_t,
418 kmp_info_t * thr
419#if USE_ITT_BUILD
420 , void *itt
421#endif
422 )
423 : kmp_flag<kmp_uint64>(p, flag_oncore), checker(c), offset(idx), bt(bar_t), this_thr(thr)
424#if USE_ITT_BUILD
425 , itt_sync_obj(itt)
426#endif
427 , num_waiting_threads(0), flag_switch(false) {}
428 kmp_info_t * get_waiter(kmp_uint32 i) {
429 KMP_DEBUG_ASSERT(i<num_waiting_threads);
430 return waiting_threads[i];
431 }
432 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
433 void set_waiter(kmp_info_t *thr) {
434 waiting_threads[0] = thr;
435 num_waiting_threads = 1;
436 }
437 bool done_check_val(kmp_uint64 old_loc) { return byteref(&old_loc,offset) == checker; }
438 bool done_check() { return done_check_val(*get()); }
439 bool notdone_check() {
440 // Calculate flag_switch
441 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
442 flag_switch = true;
443 if (byteref(get(),offset) != 1 && !flag_switch)
444 return true;
445 else if (flag_switch) {
446 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
447 kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, (kmp_uint64)KMP_BARRIER_STATE_BUMP);
448 __kmp_wait_64(this_thr, &flag, TRUE
449#if USE_ITT_BUILD
450 , itt_sync_obj
451#endif
452 );
453 }
454 return false;
455 }
456 kmp_uint64 internal_release() {
457 kmp_uint64 old_val;
458 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
459 old_val = *get();
460 byteref(get(),offset) = 1;
461 }
462 else {
463 kmp_uint64 mask=0;
464 byteref(&mask,offset) = 1;
465 old_val = KMP_TEST_THEN_OR64((volatile kmp_int64 *)get(), mask);
466 }
467 return old_val;
468 }
469 kmp_uint64 set_sleeping() {
470 return KMP_TEST_THEN_OR64((kmp_int64 volatile *)get(), KMP_BARRIER_SLEEP_STATE);
471 }
472 kmp_uint64 unset_sleeping() {
473 return KMP_TEST_THEN_AND64((kmp_int64 volatile *)get(), ~KMP_BARRIER_SLEEP_STATE);
474 }
475 bool is_sleeping_val(kmp_uint64 old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; }
476 bool is_sleeping() { return is_sleeping_val(*get()); }
477 void wait(kmp_info_t *this_thr, int final_spin
478 USE_ITT_BUILD_ARG(void * itt_sync_obj)) {
479 __kmp_wait_template(this_thr, this, final_spin
480 USE_ITT_BUILD_ARG(itt_sync_obj));
481 }
482 void release() { __kmp_release_template(this); }
483 void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
484 void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
485 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished
486 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) {
487 return __kmp_execute_tasks_oncore(this_thr, gtid, this, final_spin, thread_finished
488 USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
489 }
490};
491
492/*!
493@}
494*/
495
496#endif // KMP_WAIT_RELEASE_H