blob: cc125dc2a66ef0c67b6709f37d673efc4cc832db [file] [log] [blame]
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001/*
2 * kmp_barrier.cpp
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003 */
4
Jim Cownie4cc4bb42014-10-07 16:25:50 +00005//===----------------------------------------------------------------------===//
6//
7// The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
Jim Cownie4cc4bb42014-10-07 16:25:50 +000014#include "kmp.h"
15#include "kmp_wait_release.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000016#include "kmp_itt.h"
Jonathan Peytona0e159f2015-10-08 18:23:38 +000017#include "kmp_os.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000018#include "kmp_stats.h"
Joachim Protze82e94a52017-11-01 10:08:30 +000019#if OMPT_SUPPORT
20#include "ompt-specific.h"
21#endif
Jonathan Peytona0e159f2015-10-08 18:23:38 +000022
Jim Cownie4cc4bb42014-10-07 16:25:50 +000023#if KMP_MIC
24#include <immintrin.h>
25#define USE_NGO_STORES 1
26#endif // KMP_MIC
27
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000028#include "tsan_annotations.h"
29
Jim Cownie4cc4bb42014-10-07 16:25:50 +000030#if KMP_MIC && USE_NGO_STORES
31// ICV copying
Jonathan Peyton30419822017-05-12 18:01:32 +000032#define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src))
Jim Cownie4cc4bb42014-10-07 16:25:50 +000033#define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
Jonathan Peyton30419822017-05-12 18:01:32 +000034#define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
35#define ngo_sync() __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory")
Jim Cownie4cc4bb42014-10-07 16:25:50 +000036#else
Jonathan Peyton30419822017-05-12 18:01:32 +000037#define ngo_load(src) ((void)0)
Jim Cownie4cc4bb42014-10-07 16:25:50 +000038#define ngo_store_icvs(dst, src) copy_icvs((dst), (src))
Jonathan Peyton30419822017-05-12 18:01:32 +000039#define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE)
40#define ngo_sync() ((void)0)
Jim Cownie4cc4bb42014-10-07 16:25:50 +000041#endif /* KMP_MIC && USE_NGO_STORES */
42
43void __kmp_print_structure(void); // Forward declaration
44
45// ---------------------------- Barrier Algorithms ----------------------------
46
47// Linear Barrier
Jonathan Peyton30419822017-05-12 18:01:32 +000048static void __kmp_linear_barrier_gather(
49 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
50 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
51 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
Ed Maste414544c2017-07-07 21:06:05 +000052 kmp_team_t *team = this_thr->th.th_team;
53 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
54 kmp_info_t **other_threads = team->t.t_threads;
Jim Cownie4cc4bb42014-10-07 16:25:50 +000055
Jonathan Peyton30419822017-05-12 18:01:32 +000056 KA_TRACE(
57 20,
58 ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
59 gtid, team->t.t_id, tid, bt));
60 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +000061
62#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +000063 // Barrier imbalance - save arrive time to the thread
64 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
65 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
66 __itt_get_timestamp();
67 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +000068#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000069 // We now perform a linear reduction to signal that all of the threads have
70 // arrived.
71 if (!KMP_MASTER_TID(tid)) {
72 KA_TRACE(20,
73 ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
74 "arrived(%p): %llu => %llu\n",
75 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team),
76 team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived,
77 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
78 // Mark arrival to master thread
79 /* After performing this write, a worker thread may not assume that the team
80 is valid any more - it could be deallocated by the master thread at any
81 time. */
82 ANNOTATE_BARRIER_BEGIN(this_thr);
83 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]);
84 flag.release();
85 } else {
Ed Maste414544c2017-07-07 21:06:05 +000086 kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
87 int nproc = this_thr->th.th_team_nproc;
88 int i;
Jonathan Peyton30419822017-05-12 18:01:32 +000089 // Don't have to worry about sleep bit here or atomic since team setting
Jonathan Peyton94a114f2017-10-20 19:30:57 +000090 kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP;
Jim Cownie4cc4bb42014-10-07 16:25:50 +000091
Jonathan Peyton30419822017-05-12 18:01:32 +000092 // Collect all the worker team member threads.
93 for (i = 1; i < nproc; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +000094#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +000095 // Prefetch next thread's arrived count
96 if (i + 1 < nproc)
97 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived);
Jim Cownie4cc4bb42014-10-07 16:25:50 +000098#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +000099 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
100 "arrived(%p) == %llu\n",
101 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
102 team->t.t_id, i,
103 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000104
Jonathan Peyton30419822017-05-12 18:01:32 +0000105 // Wait for worker thread to arrive
106 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived,
107 new_state);
108 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
109 ANNOTATE_BARRIER_END(other_threads[i]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000110#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000111 // Barrier imbalance - write min of the thread time and the other thread
112 // time to the thread.
113 if (__kmp_forkjoin_frames_mode == 2) {
114 this_thr->th.th_bar_min_time = KMP_MIN(
115 this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time);
116 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000117#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000118 if (reduce) {
119 KA_TRACE(100,
120 ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
121 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
122 team->t.t_id, i));
123 ANNOTATE_REDUCE_AFTER(reduce);
124 (*reduce)(this_thr->th.th_local.reduce_data,
125 other_threads[i]->th.th_local.reduce_data);
126 ANNOTATE_REDUCE_BEFORE(reduce);
127 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
128 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000129 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000130 // Don't have to worry about sleep bit here or atomic since team setting
131 team_bar->b_arrived = new_state;
132 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
133 "arrived(%p) = %llu\n",
134 gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived,
135 new_state));
136 }
137 KA_TRACE(
138 20,
139 ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
140 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000141}
142
Jonathan Peyton30419822017-05-12 18:01:32 +0000143static void __kmp_linear_barrier_release(
144 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
145 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
146 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release);
Ed Maste414544c2017-07-07 21:06:05 +0000147 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
148 kmp_team_t *team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000149
Jonathan Peyton30419822017-05-12 18:01:32 +0000150 if (KMP_MASTER_TID(tid)) {
Ed Maste414544c2017-07-07 21:06:05 +0000151 unsigned int i;
152 kmp_uint32 nproc = this_thr->th.th_team_nproc;
153 kmp_info_t **other_threads;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000154
Jonathan Peyton30419822017-05-12 18:01:32 +0000155 team = __kmp_threads[gtid]->th.th_team;
156 KMP_DEBUG_ASSERT(team != NULL);
157 other_threads = team->t.t_threads;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000158
Jonathan Peyton30419822017-05-12 18:01:32 +0000159 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) master enter for "
160 "barrier type %d\n",
161 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000162
Jonathan Peyton30419822017-05-12 18:01:32 +0000163 if (nproc > 1) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000164#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +0000165 {
166 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
167 if (propagate_icvs) {
168 ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
169 for (i = 1; i < nproc; ++i) {
170 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i],
171 team, i, FALSE);
172 ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
173 &team->t.t_implicit_task_taskdata[0].td_icvs);
174 }
175 ngo_sync();
176 }
177 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000178#endif // KMP_BARRIER_ICV_PUSH
179
Jonathan Peyton30419822017-05-12 18:01:32 +0000180 // Now, release all of the worker threads
181 for (i = 1; i < nproc; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000182#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +0000183 // Prefetch next thread's go flag
184 if (i + 1 < nproc)
185 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000186#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000187 KA_TRACE(
188 20,
189 ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
190 "go(%p): %u => %u\n",
191 gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid,
192 team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go,
193 other_threads[i]->th.th_bar[bt].bb.b_go,
194 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
195 ANNOTATE_BARRIER_BEGIN(other_threads[i]);
196 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go,
197 other_threads[i]);
198 flag.release();
199 }
200 }
201 } else { // Wait for the MASTER thread to release us
202 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
203 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
204 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
205 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
206 ANNOTATE_BARRIER_END(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000207#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000208 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
209 // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is
210 // disabled)
211 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
212 // Cancel wait on previous parallel region...
213 __kmp_itt_task_starting(itt_sync_obj);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000214
Jonathan Peyton30419822017-05-12 18:01:32 +0000215 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
216 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000217
Jonathan Peyton30419822017-05-12 18:01:32 +0000218 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
219 if (itt_sync_obj != NULL)
220 // Call prepare as early as possible for "new" barrier
221 __kmp_itt_task_finished(itt_sync_obj);
222 } else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000223#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
224 // Early exit for reaping threads releasing forkjoin barrier
Jonathan Peyton30419822017-05-12 18:01:32 +0000225 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
226 return;
227// The worker thread may now assume that the team is valid.
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000228#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000229 tid = __kmp_tid_from_gtid(gtid);
230 team = __kmp_threads[gtid]->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000231#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000232 KMP_DEBUG_ASSERT(team != NULL);
233 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
234 KA_TRACE(20,
235 ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
236 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
237 KMP_MB(); // Flush all pending memory write invalidates.
238 }
239 KA_TRACE(
240 20,
241 ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
242 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000243}
244
245// Tree barrier
246static void
Jonathan Peyton30419822017-05-12 18:01:32 +0000247__kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
248 int tid, void (*reduce)(void *, void *)
249 USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
250 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather);
Ed Maste414544c2017-07-07 21:06:05 +0000251 kmp_team_t *team = this_thr->th.th_team;
252 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
253 kmp_info_t **other_threads = team->t.t_threads;
254 kmp_uint32 nproc = this_thr->th.th_team_nproc;
255 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
256 kmp_uint32 branch_factor = 1 << branch_bits;
257 kmp_uint32 child;
258 kmp_uint32 child_tid;
259 kmp_uint64 new_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000260
Jonathan Peyton30419822017-05-12 18:01:32 +0000261 KA_TRACE(
262 20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
263 gtid, team->t.t_id, tid, bt));
264 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000265
266#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000267 // Barrier imbalance - save arrive time to the thread
268 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
269 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
270 __itt_get_timestamp();
271 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000272#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000273 // Perform tree gather to wait until all threads have arrived; reduce any
274 // required data as we go
275 child_tid = (tid << branch_bits) + 1;
276 if (child_tid < nproc) {
277 // Parent threads wait for all their children to arrive
278 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
279 child = 1;
280 do {
Ed Maste414544c2017-07-07 21:06:05 +0000281 kmp_info_t *child_thr = other_threads[child_tid];
282 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000283#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +0000284 // Prefetch next thread's arrived count
285 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
286 KMP_CACHE_PREFETCH(
287 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000288#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000289 KA_TRACE(20,
290 ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
291 "arrived(%p) == %llu\n",
292 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
293 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
294 // Wait for child to arrive
295 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
296 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
297 ANNOTATE_BARRIER_END(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000298#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000299 // Barrier imbalance - write min of the thread time and a child time to
300 // the thread.
301 if (__kmp_forkjoin_frames_mode == 2) {
302 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
303 child_thr->th.th_bar_min_time);
304 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000305#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000306 if (reduce) {
307 KA_TRACE(100,
308 ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
309 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
310 team->t.t_id, child_tid));
311 ANNOTATE_REDUCE_AFTER(reduce);
312 (*reduce)(this_thr->th.th_local.reduce_data,
313 child_thr->th.th_local.reduce_data);
314 ANNOTATE_REDUCE_BEFORE(reduce);
315 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
316 }
317 child++;
318 child_tid++;
319 } while (child <= branch_factor && child_tid < nproc);
320 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000321
Jonathan Peyton30419822017-05-12 18:01:32 +0000322 if (!KMP_MASTER_TID(tid)) { // Worker threads
Ed Maste414544c2017-07-07 21:06:05 +0000323 kmp_int32 parent_tid = (tid - 1) >> branch_bits;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000324
Jonathan Peyton30419822017-05-12 18:01:32 +0000325 KA_TRACE(20,
326 ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
327 "arrived(%p): %llu => %llu\n",
328 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
329 team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
330 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000331
Jonathan Peyton30419822017-05-12 18:01:32 +0000332 // Mark arrival to parent thread
333 /* After performing this write, a worker thread may not assume that the team
334 is valid any more - it could be deallocated by the master thread at any
335 time. */
336 ANNOTATE_BARRIER_BEGIN(this_thr);
337 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]);
338 flag.release();
339 } else {
340 // Need to update the team arrived pointer if we are the master thread
341 if (nproc > 1) // New value was already computed above
342 team->t.t_bar[bt].b_arrived = new_state;
343 else
344 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
345 KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
346 "arrived(%p) = %llu\n",
347 gtid, team->t.t_id, tid, team->t.t_id,
348 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
349 }
350 KA_TRACE(20,
351 ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
352 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000353}
354
Jonathan Peyton30419822017-05-12 18:01:32 +0000355static void __kmp_tree_barrier_release(
356 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
357 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
358 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release);
Ed Maste414544c2017-07-07 21:06:05 +0000359 kmp_team_t *team;
360 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
361 kmp_uint32 nproc;
362 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
363 kmp_uint32 branch_factor = 1 << branch_bits;
364 kmp_uint32 child;
365 kmp_uint32 child_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000366
Jonathan Peyton30419822017-05-12 18:01:32 +0000367 // Perform a tree release for all of the threads that have been gathered
368 if (!KMP_MASTER_TID(
369 tid)) { // Handle fork barrier workers who aren't part of a team yet
370 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,
371 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
372 // Wait for parent thread to release us
373 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
374 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
375 ANNOTATE_BARRIER_END(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000376#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000377 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
378 // In fork barrier where we could not get the object reliably (or
379 // ITTNOTIFY is disabled)
380 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
381 // Cancel wait on previous parallel region...
382 __kmp_itt_task_starting(itt_sync_obj);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000383
Jonathan Peyton30419822017-05-12 18:01:32 +0000384 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
385 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000386
Jonathan Peyton30419822017-05-12 18:01:32 +0000387 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
388 if (itt_sync_obj != NULL)
389 // Call prepare as early as possible for "new" barrier
390 __kmp_itt_task_finished(itt_sync_obj);
391 } else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000392#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
393 // Early exit for reaping threads releasing forkjoin barrier
394 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
Jonathan Peyton30419822017-05-12 18:01:32 +0000395 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000396
Jonathan Peyton30419822017-05-12 18:01:32 +0000397 // The worker thread may now assume that the team is valid.
398 team = __kmp_threads[gtid]->th.th_team;
399 KMP_DEBUG_ASSERT(team != NULL);
400 tid = __kmp_tid_from_gtid(gtid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000401
Jonathan Peyton30419822017-05-12 18:01:32 +0000402 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
403 KA_TRACE(20,
404 ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid,
405 team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
406 KMP_MB(); // Flush all pending memory write invalidates.
407 } else {
408 team = __kmp_threads[gtid]->th.th_team;
409 KMP_DEBUG_ASSERT(team != NULL);
410 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) master enter for "
411 "barrier type %d\n",
412 gtid, team->t.t_id, tid, bt));
413 }
414 nproc = this_thr->th.th_team_nproc;
415 child_tid = (tid << branch_bits) + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000416
Jonathan Peyton30419822017-05-12 18:01:32 +0000417 if (child_tid < nproc) {
Ed Maste414544c2017-07-07 21:06:05 +0000418 kmp_info_t **other_threads = team->t.t_threads;
Jonathan Peyton30419822017-05-12 18:01:32 +0000419 child = 1;
420 // Parent threads release all their children
421 do {
Ed Maste414544c2017-07-07 21:06:05 +0000422 kmp_info_t *child_thr = other_threads[child_tid];
423 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000424#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +0000425 // Prefetch next thread's go count
426 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
427 KMP_CACHE_PREFETCH(
428 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000429#endif /* KMP_CACHE_MANAGE */
430
431#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +0000432 {
433 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
434 if (propagate_icvs) {
435 __kmp_init_implicit_task(team->t.t_ident,
436 team->t.t_threads[child_tid], team,
437 child_tid, FALSE);
438 copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
439 &team->t.t_implicit_task_taskdata[0].td_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000440 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000441 }
442#endif // KMP_BARRIER_ICV_PUSH
443 KA_TRACE(20,
444 ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
445 "go(%p): %u => %u\n",
446 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
447 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
448 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
449 // Release child from barrier
450 ANNOTATE_BARRIER_BEGIN(child_thr);
451 kmp_flag_64 flag(&child_bar->b_go, child_thr);
452 flag.release();
453 child++;
454 child_tid++;
455 } while (child <= branch_factor && child_tid < nproc);
456 }
457 KA_TRACE(
458 20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
459 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000460}
461
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000462// Hyper Barrier
463static void
Jonathan Peyton30419822017-05-12 18:01:32 +0000464__kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
465 int tid, void (*reduce)(void *, void *)
466 USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
467 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather);
Ed Maste414544c2017-07-07 21:06:05 +0000468 kmp_team_t *team = this_thr->th.th_team;
469 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
470 kmp_info_t **other_threads = team->t.t_threads;
471 kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE;
472 kmp_uint32 num_threads = this_thr->th.th_team_nproc;
473 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
474 kmp_uint32 branch_factor = 1 << branch_bits;
475 kmp_uint32 offset;
476 kmp_uint32 level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000477
Jonathan Peyton30419822017-05-12 18:01:32 +0000478 KA_TRACE(
479 20,
480 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
481 gtid, team->t.t_id, tid, bt));
482 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000483
484#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000485 // Barrier imbalance - save arrive time to the thread
486 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
487 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
488 __itt_get_timestamp();
489 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000490#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000491 /* Perform a hypercube-embedded tree gather to wait until all of the threads
492 have arrived, and reduce any required data as we go. */
493 kmp_flag_64 p_flag(&thr_bar->b_arrived);
494 for (level = 0, offset = 1; offset < num_threads;
495 level += branch_bits, offset <<= branch_bits) {
Ed Maste414544c2017-07-07 21:06:05 +0000496 kmp_uint32 child;
497 kmp_uint32 child_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000498
Jonathan Peyton30419822017-05-12 18:01:32 +0000499 if (((tid >> level) & (branch_factor - 1)) != 0) {
Ed Maste414544c2017-07-07 21:06:05 +0000500 kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000501
Jonathan Peyton30419822017-05-12 18:01:32 +0000502 KA_TRACE(20,
503 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
504 "arrived(%p): %llu => %llu\n",
505 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
506 team->t.t_id, parent_tid, &thr_bar->b_arrived,
507 thr_bar->b_arrived,
508 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
509 // Mark arrival to parent thread
510 /* After performing this write (in the last iteration of the enclosing for
511 loop), a worker thread may not assume that the team is valid any more
512 - it could be deallocated by the master thread at any time. */
513 ANNOTATE_BARRIER_BEGIN(this_thr);
514 p_flag.set_waiter(other_threads[parent_tid]);
515 p_flag.release();
516 break;
517 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000518
Jonathan Peyton30419822017-05-12 18:01:32 +0000519 // Parent threads wait for children to arrive
520 if (new_state == KMP_BARRIER_UNUSED_STATE)
521 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
522 for (child = 1, child_tid = tid + (1 << level);
523 child < branch_factor && child_tid < num_threads;
524 child++, child_tid += (1 << level)) {
Ed Maste414544c2017-07-07 21:06:05 +0000525 kmp_info_t *child_thr = other_threads[child_tid];
526 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000527#if KMP_CACHE_MANAGE
Ed Maste414544c2017-07-07 21:06:05 +0000528 kmp_uint32 next_child_tid = child_tid + (1 << level);
Jonathan Peyton30419822017-05-12 18:01:32 +0000529 // Prefetch next thread's arrived count
530 if (child + 1 < branch_factor && next_child_tid < num_threads)
531 KMP_CACHE_PREFETCH(
532 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000533#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000534 KA_TRACE(20,
535 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
536 "arrived(%p) == %llu\n",
537 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
538 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
539 // Wait for child to arrive
540 kmp_flag_64 c_flag(&child_bar->b_arrived, new_state);
541 c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
542 ANNOTATE_BARRIER_END(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000543#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000544 // Barrier imbalance - write min of the thread time and a child time to
545 // the thread.
546 if (__kmp_forkjoin_frames_mode == 2) {
547 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
548 child_thr->th.th_bar_min_time);
549 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000550#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000551 if (reduce) {
552 KA_TRACE(100,
553 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
554 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
555 team->t.t_id, child_tid));
556 ANNOTATE_REDUCE_AFTER(reduce);
557 (*reduce)(this_thr->th.th_local.reduce_data,
558 child_thr->th.th_local.reduce_data);
559 ANNOTATE_REDUCE_BEFORE(reduce);
560 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
561 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000562 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000563 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000564
Jonathan Peyton30419822017-05-12 18:01:32 +0000565 if (KMP_MASTER_TID(tid)) {
566 // Need to update the team arrived pointer if we are the master thread
567 if (new_state == KMP_BARRIER_UNUSED_STATE)
568 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
569 else
570 team->t.t_bar[bt].b_arrived = new_state;
571 KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
572 "arrived(%p) = %llu\n",
573 gtid, team->t.t_id, tid, team->t.t_id,
574 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
575 }
576 KA_TRACE(
577 20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
578 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000579}
580
581// The reverse versions seem to beat the forward versions overall
582#define KMP_REVERSE_HYPER_BAR
Jonathan Peyton30419822017-05-12 18:01:32 +0000583static void __kmp_hyper_barrier_release(
584 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
585 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
586 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release);
Ed Maste414544c2017-07-07 21:06:05 +0000587 kmp_team_t *team;
588 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
589 kmp_info_t **other_threads;
590 kmp_uint32 num_threads;
591 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
592 kmp_uint32 branch_factor = 1 << branch_bits;
593 kmp_uint32 child;
594 kmp_uint32 child_tid;
595 kmp_uint32 offset;
596 kmp_uint32 level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000597
Jonathan Peyton30419822017-05-12 18:01:32 +0000598 /* Perform a hypercube-embedded tree release for all of the threads that have
599 been gathered. If KMP_REVERSE_HYPER_BAR is defined (default) the threads
600 are released in the reverse order of the corresponding gather, otherwise
601 threads are released in the same order. */
602 if (KMP_MASTER_TID(tid)) { // master
603 team = __kmp_threads[gtid]->th.th_team;
604 KMP_DEBUG_ASSERT(team != NULL);
605 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for "
606 "barrier type %d\n",
607 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000608#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +0000609 if (propagate_icvs) { // master already has ICVs in final destination; copy
610 copy_icvs(&thr_bar->th_fixed_icvs,
611 &team->t.t_implicit_task_taskdata[tid].td_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000612 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000613#endif
614 } else { // Handle fork barrier workers who aren't part of a team yet
615 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,
616 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
617 // Wait for parent thread to release us
618 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
619 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
620 ANNOTATE_BARRIER_END(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000621#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000622 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
623 // In fork barrier where we could not get the object reliably
624 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
625 // Cancel wait on previous parallel region...
626 __kmp_itt_task_starting(itt_sync_obj);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000627
Jonathan Peyton30419822017-05-12 18:01:32 +0000628 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
629 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000630
Jonathan Peyton30419822017-05-12 18:01:32 +0000631 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
632 if (itt_sync_obj != NULL)
633 // Call prepare as early as possible for "new" barrier
634 __kmp_itt_task_finished(itt_sync_obj);
635 } else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000636#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
637 // Early exit for reaping threads releasing forkjoin barrier
638 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
Jonathan Peyton30419822017-05-12 18:01:32 +0000639 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000640
Jonathan Peyton30419822017-05-12 18:01:32 +0000641 // The worker thread may now assume that the team is valid.
642 team = __kmp_threads[gtid]->th.th_team;
643 KMP_DEBUG_ASSERT(team != NULL);
644 tid = __kmp_tid_from_gtid(gtid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000645
Jonathan Peyton30419822017-05-12 18:01:32 +0000646 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
647 KA_TRACE(20,
648 ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
649 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
650 KMP_MB(); // Flush all pending memory write invalidates.
651 }
652 num_threads = this_thr->th.th_team_nproc;
653 other_threads = team->t.t_threads;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000654
655#ifdef KMP_REVERSE_HYPER_BAR
Jonathan Peyton30419822017-05-12 18:01:32 +0000656 // Count up to correct level for parent
657 for (level = 0, offset = 1;
658 offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0);
659 level += branch_bits, offset <<= branch_bits)
660 ;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000661
Jonathan Peyton30419822017-05-12 18:01:32 +0000662 // Now go down from there
663 for (level -= branch_bits, offset >>= branch_bits; offset != 0;
664 level -= branch_bits, offset >>= branch_bits)
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000665#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000666 // Go down the tree, level by level
667 for (level = 0, offset = 1; offset < num_threads;
668 level += branch_bits, offset <<= branch_bits)
669#endif // KMP_REVERSE_HYPER_BAR
670 {
671#ifdef KMP_REVERSE_HYPER_BAR
672 /* Now go in reverse order through the children, highest to lowest.
673 Initial setting of child is conservative here. */
674 child = num_threads >> ((level == 0) ? level : level - 1);
675 for (child = (child < branch_factor - 1) ? child : branch_factor - 1,
676 child_tid = tid + (child << level);
677 child >= 1; child--, child_tid -= (1 << level))
678#else
679 if (((tid >> level) & (branch_factor - 1)) != 0)
680 // No need to go lower than this, since this is the level parent would be
681 // notified
682 break;
683 // Iterate through children on this level of the tree
684 for (child = 1, child_tid = tid + (1 << level);
685 child < branch_factor && child_tid < num_threads;
686 child++, child_tid += (1 << level))
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000687#endif // KMP_REVERSE_HYPER_BAR
688 {
Jonathan Peyton30419822017-05-12 18:01:32 +0000689 if (child_tid >= num_threads)
690 continue; // Child doesn't exist so keep going
691 else {
Ed Maste414544c2017-07-07 21:06:05 +0000692 kmp_info_t *child_thr = other_threads[child_tid];
693 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000694#if KMP_CACHE_MANAGE
Ed Maste414544c2017-07-07 21:06:05 +0000695 kmp_uint32 next_child_tid = child_tid - (1 << level);
Jonathan Peyton30419822017-05-12 18:01:32 +0000696// Prefetch next thread's go count
697#ifdef KMP_REVERSE_HYPER_BAR
698 if (child - 1 >= 1 && next_child_tid < num_threads)
699#else
700 if (child + 1 < branch_factor && next_child_tid < num_threads)
701#endif // KMP_REVERSE_HYPER_BAR
702 KMP_CACHE_PREFETCH(
703 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000704#endif /* KMP_CACHE_MANAGE */
705
706#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +0000707 if (propagate_icvs) // push my fixed ICVs to my child
708 copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000709#endif // KMP_BARRIER_ICV_PUSH
710
Jonathan Peyton30419822017-05-12 18:01:32 +0000711 KA_TRACE(
712 20,
713 ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
714 "go(%p): %u => %u\n",
715 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
716 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
717 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
718 // Release child from barrier
719 ANNOTATE_BARRIER_BEGIN(child_thr);
720 kmp_flag_64 flag(&child_bar->b_go, child_thr);
721 flag.release();
722 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000723 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000724 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000725#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +0000726 if (propagate_icvs &&
727 !KMP_MASTER_TID(tid)) { // copy ICVs locally to final dest
728 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
729 FALSE);
730 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
731 &thr_bar->th_fixed_icvs);
732 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000733#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000734 KA_TRACE(
735 20,
736 ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
737 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000738}
739
740// Hierarchical Barrier
741
742// Initialize thread barrier data
Jonathan Peyton30419822017-05-12 18:01:32 +0000743/* Initializes/re-initializes the hierarchical barrier data stored on a thread.
744 Performs the minimum amount of initialization required based on how the team
745 has changed. Returns true if leaf children will require both on-core and
746 traditional wake-up mechanisms. For example, if the team size increases,
747 threads already in the team will respond to on-core wakeup on their parent
748 thread, but threads newly added to the team will only be listening on the
749 their local b_go. */
750static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt,
751 kmp_bstate_t *thr_bar,
752 kmp_uint32 nproc, int gtid,
753 int tid, kmp_team_t *team) {
754 // Checks to determine if (re-)initialization is needed
755 bool uninitialized = thr_bar->team == NULL;
756 bool team_changed = team != thr_bar->team;
757 bool team_sz_changed = nproc != thr_bar->nproc;
758 bool tid_changed = tid != thr_bar->old_tid;
759 bool retval = false;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000760
Jonathan Peyton30419822017-05-12 18:01:32 +0000761 if (uninitialized || team_sz_changed) {
762 __kmp_get_hierarchy(nproc, thr_bar);
763 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000764
Jonathan Peyton30419822017-05-12 18:01:32 +0000765 if (uninitialized || team_sz_changed || tid_changed) {
766 thr_bar->my_level = thr_bar->depth - 1; // default for master
767 thr_bar->parent_tid = -1; // default for master
768 if (!KMP_MASTER_TID(
769 tid)) { // if not master, find parent thread in hierarchy
770 kmp_uint32 d = 0;
771 while (d < thr_bar->depth) { // find parent based on level of thread in
772 // hierarchy, and note level
773 kmp_uint32 rem;
774 if (d == thr_bar->depth - 2) { // reached level right below the master
775 thr_bar->parent_tid = 0;
776 thr_bar->my_level = d;
777 break;
778 } else if ((rem = tid % thr_bar->skip_per_level[d + 1]) !=
779 0) { // TODO: can we make this op faster?
780 // thread is not a subtree root at next level, so this is max
781 thr_bar->parent_tid = tid - rem;
782 thr_bar->my_level = d;
783 break;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000784 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000785 ++d;
786 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000787 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000788 thr_bar->offset = 7 - (tid - thr_bar->parent_tid - 1);
789 thr_bar->old_tid = tid;
790 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
791 thr_bar->team = team;
792 thr_bar->parent_bar =
793 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
794 }
795 if (uninitialized || team_changed || tid_changed) {
796 thr_bar->team = team;
797 thr_bar->parent_bar =
798 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
799 retval = true;
800 }
801 if (uninitialized || team_sz_changed || tid_changed) {
802 thr_bar->nproc = nproc;
803 thr_bar->leaf_kids = thr_bar->base_leaf_kids;
804 if (thr_bar->my_level == 0)
805 thr_bar->leaf_kids = 0;
806 if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc)
807 thr_bar->leaf_kids = nproc - tid - 1;
808 thr_bar->leaf_state = 0;
809 for (int i = 0; i < thr_bar->leaf_kids; ++i)
810 ((char *)&(thr_bar->leaf_state))[7 - i] = 1;
811 }
812 return retval;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000813}
814
Jonathan Peyton30419822017-05-12 18:01:32 +0000815static void __kmp_hierarchical_barrier_gather(
816 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
817 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
818 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather);
Ed Maste414544c2017-07-07 21:06:05 +0000819 kmp_team_t *team = this_thr->th.th_team;
820 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
821 kmp_uint32 nproc = this_thr->th.th_team_nproc;
822 kmp_info_t **other_threads = team->t.t_threads;
823 kmp_uint64 new_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000824
Jonathan Peyton30419822017-05-12 18:01:32 +0000825 int level = team->t.t_level;
Jonathan Peyton441f3372015-09-21 17:24:46 +0000826#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000827 if (other_threads[0]
828 ->th.th_teams_microtask) // are we inside the teams construct?
829 if (this_thr->th.th_teams_size.nteams > 1)
830 ++level; // level was not increased in teams construct for team_of_masters
Jonathan Peyton441f3372015-09-21 17:24:46 +0000831#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000832 if (level == 1)
833 thr_bar->use_oncore_barrier = 1;
834 else
835 thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000836
Jonathan Peyton30419822017-05-12 18:01:32 +0000837 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "
838 "barrier type %d\n",
839 gtid, team->t.t_id, tid, bt));
840 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000841
Andrey Churbanove6bfb732015-05-06 18:34:15 +0000842#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000843 // Barrier imbalance - save arrive time to the thread
844 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
845 this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
846 }
Andrey Churbanove6bfb732015-05-06 18:34:15 +0000847#endif
848
Jonathan Peyton30419822017-05-12 18:01:32 +0000849 (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid,
850 team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000851
Jonathan Peyton30419822017-05-12 18:01:32 +0000852 if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf)
Ed Maste414544c2017-07-07 21:06:05 +0000853 kmp_int32 child_tid;
Jonathan Peyton30419822017-05-12 18:01:32 +0000854 new_state =
855 (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
856 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
857 thr_bar->use_oncore_barrier) {
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000858 if (thr_bar->leaf_kids) {
859 // First, wait for leaf children to check-in on my b_arrived flag
Jonathan Peyton30419822017-05-12 18:01:32 +0000860 kmp_uint64 leaf_state =
861 KMP_MASTER_TID(tid)
862 ? thr_bar->b_arrived | thr_bar->leaf_state
863 : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
864 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
865 "for leaf kids\n",
866 gtid, team->t.t_id, tid));
867 kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state);
868 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
869 if (reduce) {
870 ANNOTATE_REDUCE_AFTER(reduce);
871 for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids;
872 ++child_tid) {
873 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
874 "T#%d(%d:%d)\n",
875 gtid, team->t.t_id, tid,
876 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
877 child_tid));
878 ANNOTATE_BARRIER_END(other_threads[child_tid]);
879 (*reduce)(this_thr->th.th_local.reduce_data,
880 other_threads[child_tid]->th.th_local.reduce_data);
881 }
882 ANNOTATE_REDUCE_BEFORE(reduce);
883 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000884 }
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000885 // clear leaf_state bits
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000886 KMP_TEST_THEN_AND64(&thr_bar->b_arrived, ~(thr_bar->leaf_state));
Jonathan Peyton30419822017-05-12 18:01:32 +0000887 }
888 // Next, wait for higher level children on each child's b_arrived flag
889 for (kmp_uint32 d = 1; d < thr_bar->my_level;
890 ++d) { // gather lowest level threads first, but skip 0
891 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
892 skip = thr_bar->skip_per_level[d];
893 if (last > nproc)
894 last = nproc;
895 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
Ed Maste414544c2017-07-07 21:06:05 +0000896 kmp_info_t *child_thr = other_threads[child_tid];
897 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jonathan Peyton30419822017-05-12 18:01:32 +0000898 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
899 "T#%d(%d:%d) "
900 "arrived(%p) == %llu\n",
901 gtid, team->t.t_id, tid,
902 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
903 child_tid, &child_bar->b_arrived, new_state));
904 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
905 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
906 ANNOTATE_BARRIER_END(child_thr);
907 if (reduce) {
908 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
909 "T#%d(%d:%d)\n",
910 gtid, team->t.t_id, tid,
911 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
912 child_tid));
913 ANNOTATE_REDUCE_AFTER(reduce);
914 (*reduce)(this_thr->th.th_local.reduce_data,
915 child_thr->th.th_local.reduce_data);
916 ANNOTATE_REDUCE_BEFORE(reduce);
917 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
918 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000919 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000920 }
921 } else { // Blocktime is not infinite
922 for (kmp_uint32 d = 0; d < thr_bar->my_level;
923 ++d) { // Gather lowest level threads first
924 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
925 skip = thr_bar->skip_per_level[d];
926 if (last > nproc)
927 last = nproc;
928 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
Ed Maste414544c2017-07-07 21:06:05 +0000929 kmp_info_t *child_thr = other_threads[child_tid];
930 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jonathan Peyton30419822017-05-12 18:01:32 +0000931 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
932 "T#%d(%d:%d) "
933 "arrived(%p) == %llu\n",
934 gtid, team->t.t_id, tid,
935 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
936 child_tid, &child_bar->b_arrived, new_state));
937 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
938 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
939 ANNOTATE_BARRIER_END(child_thr);
940 if (reduce) {
941 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
942 "T#%d(%d:%d)\n",
943 gtid, team->t.t_id, tid,
944 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
945 child_tid));
946 ANNOTATE_REDUCE_AFTER(reduce);
947 (*reduce)(this_thr->th.th_local.reduce_data,
948 child_thr->th.th_local.reduce_data);
949 ANNOTATE_REDUCE_BEFORE(reduce);
950 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
951 }
952 }
953 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000954 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000955 }
956 // All subordinates are gathered; now release parent if not master thread
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000957
Jonathan Peyton30419822017-05-12 18:01:32 +0000958 if (!KMP_MASTER_TID(tid)) { // worker threads release parent in hierarchy
959 KA_TRACE(
960 20,
961 ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
962 "arrived(%p): %llu => %llu\n",
963 gtid, team->t.t_id, tid,
964 __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id,
965 thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
966 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
967 /* Mark arrival to parent: After performing this write, a worker thread may
968 not assume that the team is valid any more - it could be deallocated by
969 the master thread at any time. */
970 if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
971 !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived
972 // flag; release it
973 ANNOTATE_BARRIER_BEGIN(this_thr);
974 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]);
975 flag.release();
976 } else { // Leaf does special release on the "offset" bits of parent's
977 // b_arrived flag
978 thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
979 kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset);
980 flag.set_waiter(other_threads[thr_bar->parent_tid]);
981 flag.release();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000982 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000983 } else { // Master thread needs to update the team's b_arrived value
984 team->t.t_bar[bt].b_arrived = new_state;
985 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
986 "arrived(%p) = %llu\n",
987 gtid, team->t.t_id, tid, team->t.t_id,
988 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
989 }
990 // Is the team access below unsafe or just technically invalid?
991 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "
992 "barrier type %d\n",
993 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000994}
995
Jonathan Peyton30419822017-05-12 18:01:32 +0000996static void __kmp_hierarchical_barrier_release(
997 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
998 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
999 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release);
Ed Maste414544c2017-07-07 21:06:05 +00001000 kmp_team_t *team;
1001 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1002 kmp_uint32 nproc;
Jonathan Peyton30419822017-05-12 18:01:32 +00001003 bool team_change = false; // indicates on-core barrier shouldn't be used
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001004
Jonathan Peyton30419822017-05-12 18:01:32 +00001005 if (KMP_MASTER_TID(tid)) {
1006 team = __kmp_threads[gtid]->th.th_team;
1007 KMP_DEBUG_ASSERT(team != NULL);
1008 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) master "
1009 "entered barrier type %d\n",
1010 gtid, team->t.t_id, tid, bt));
1011 } else { // Worker threads
1012 // Wait for parent thread to release me
1013 if (!thr_bar->use_oncore_barrier ||
1014 __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || thr_bar->my_level != 0 ||
1015 thr_bar->team == NULL) {
1016 // Use traditional method of waiting on my own b_go flag
1017 thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
1018 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
1019 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1020 ANNOTATE_BARRIER_END(this_thr);
1021 TCW_8(thr_bar->b_go,
1022 KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
1023 } else { // Thread barrier data is initialized, this is a leaf, blocktime is
1024 // infinite, not nested
1025 // Wait on my "offset" bits on parent's b_go flag
1026 thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG;
1027 kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP,
1028 thr_bar->offset, bt,
1029 this_thr USE_ITT_BUILD_ARG(itt_sync_obj));
1030 flag.wait(this_thr, TRUE);
1031 if (thr_bar->wait_flag ==
1032 KMP_BARRIER_SWITCHING) { // Thread was switched to own b_go
1033 TCW_8(thr_bar->b_go,
1034 KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
1035 } else { // Reset my bits on parent's b_go flag
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001036 (RCAST(volatile char *,
1037 &(thr_bar->parent_bar->b_go)))[thr_bar->offset] = 0;
Jonathan Peyton30419822017-05-12 18:01:32 +00001038 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001039 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001040 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
1041 // Early exit for reaping threads releasing forkjoin barrier
1042 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
1043 return;
1044 // The worker thread may now assume that the team is valid.
1045 team = __kmp_threads[gtid]->th.th_team;
1046 KMP_DEBUG_ASSERT(team != NULL);
1047 tid = __kmp_tid_from_gtid(gtid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001048
Jonathan Peyton30419822017-05-12 18:01:32 +00001049 KA_TRACE(
1050 20,
1051 ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1052 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
1053 KMP_MB(); // Flush all pending memory write invalidates.
1054 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001055
Jonathan Peyton30419822017-05-12 18:01:32 +00001056 nproc = this_thr->th.th_team_nproc;
1057 int level = team->t.t_level;
Jonathan Peyton441f3372015-09-21 17:24:46 +00001058#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001059 if (team->t.t_threads[0]
1060 ->th.th_teams_microtask) { // are we inside the teams construct?
1061 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1062 this_thr->th.th_teams_level == level)
1063 ++level; // level was not increased in teams construct for team_of_workers
1064 if (this_thr->th.th_teams_size.nteams > 1)
1065 ++level; // level was not increased in teams construct for team_of_masters
1066 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00001067#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001068 if (level == 1)
1069 thr_bar->use_oncore_barrier = 1;
1070 else
1071 thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001072
Jonathan Peyton30419822017-05-12 18:01:32 +00001073 // If the team size has increased, we still communicate with old leaves via
1074 // oncore barrier.
1075 unsigned short int old_leaf_kids = thr_bar->leaf_kids;
1076 kmp_uint64 old_leaf_state = thr_bar->leaf_state;
1077 team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid,
1078 tid, team);
1079 // But if the entire team changes, we won't use oncore barrier at all
1080 if (team_change)
1081 old_leaf_kids = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001082
1083#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +00001084 if (propagate_icvs) {
1085 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
1086 FALSE);
1087 if (KMP_MASTER_TID(
1088 tid)) { // master already has copy in final destination; copy
1089 copy_icvs(&thr_bar->th_fixed_icvs,
1090 &team->t.t_implicit_task_taskdata[tid].td_icvs);
1091 } else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1092 thr_bar->use_oncore_barrier) { // optimization for inf blocktime
1093 if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0)
1094 // leaves (on-core children) pull parent's fixed ICVs directly to local
1095 // ICV store
1096 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1097 &thr_bar->parent_bar->th_fixed_icvs);
1098 // non-leaves will get ICVs piggybacked with b_go via NGO store
1099 } else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs
1100 if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can
1101 // access
1102 copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
1103 else // leaves copy parent's fixed ICVs directly to local ICV store
1104 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1105 &thr_bar->parent_bar->th_fixed_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001106 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001107 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001108#endif // KMP_BARRIER_ICV_PUSH
1109
Jonathan Peyton30419822017-05-12 18:01:32 +00001110 // Now, release my children
1111 if (thr_bar->my_level) { // not a leaf
Ed Maste414544c2017-07-07 21:06:05 +00001112 kmp_int32 child_tid;
Jonathan Peyton30419822017-05-12 18:01:32 +00001113 kmp_uint32 last;
1114 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1115 thr_bar->use_oncore_barrier) {
1116 if (KMP_MASTER_TID(tid)) { // do a flat release
1117 // Set local b_go to bump children via NGO store of the cache line
1118 // containing IVCs and b_go.
1119 thr_bar->b_go = KMP_BARRIER_STATE_BUMP;
1120 // Use ngo stores if available; b_go piggybacks in the last 8 bytes of
1121 // the cache line
1122 ngo_load(&thr_bar->th_fixed_icvs);
1123 // This loops over all the threads skipping only the leaf nodes in the
1124 // hierarchy
1125 for (child_tid = thr_bar->skip_per_level[1]; child_tid < (int)nproc;
1126 child_tid += thr_bar->skip_per_level[1]) {
Ed Maste414544c2017-07-07 21:06:05 +00001127 kmp_bstate_t *child_bar =
Jonathan Peyton30419822017-05-12 18:01:32 +00001128 &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
1129 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
1130 "releasing T#%d(%d:%d)"
1131 " go(%p): %u => %u\n",
1132 gtid, team->t.t_id, tid,
1133 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1134 child_tid, &child_bar->b_go, child_bar->b_go,
1135 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1136 // Use ngo store (if available) to both store ICVs and release child
1137 // via child's b_go
1138 ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001139 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001140 ngo_sync();
1141 }
1142 TCW_8(thr_bar->b_go,
1143 KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
1144 // Now, release leaf children
1145 if (thr_bar->leaf_kids) { // if there are any
1146 // We test team_change on the off-chance that the level 1 team changed.
1147 if (team_change ||
1148 old_leaf_kids < thr_bar->leaf_kids) { // some old, some new
1149 if (old_leaf_kids) { // release old leaf kids
1150 thr_bar->b_go |= old_leaf_state;
1151 }
1152 // Release new leaf kids
1153 last = tid + thr_bar->skip_per_level[1];
1154 if (last > nproc)
1155 last = nproc;
1156 for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last;
1157 ++child_tid) { // skip_per_level[0]=1
Ed Maste414544c2017-07-07 21:06:05 +00001158 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1159 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jonathan Peyton30419822017-05-12 18:01:32 +00001160 KA_TRACE(
1161 20,
1162 ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
1163 " T#%d(%d:%d) go(%p): %u => %u\n",
1164 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1165 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1166 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1167 // Release child using child's b_go flag
1168 ANNOTATE_BARRIER_BEGIN(child_thr);
1169 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1170 flag.release();
1171 }
1172 } else { // Release all children at once with leaf_state bits on my own
1173 // b_go flag
1174 thr_bar->b_go |= thr_bar->leaf_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001175 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001176 }
1177 } else { // Blocktime is not infinite; do a simple hierarchical release
1178 for (int d = thr_bar->my_level - 1; d >= 0;
1179 --d) { // Release highest level threads first
1180 last = tid + thr_bar->skip_per_level[d + 1];
1181 kmp_uint32 skip = thr_bar->skip_per_level[d];
1182 if (last > nproc)
1183 last = nproc;
1184 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
Ed Maste414544c2017-07-07 21:06:05 +00001185 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1186 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jonathan Peyton30419822017-05-12 18:01:32 +00001187 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
1188 "releasing T#%d(%d:%d) go(%p): %u => %u\n",
1189 gtid, team->t.t_id, tid,
1190 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1191 child_tid, &child_bar->b_go, child_bar->b_go,
1192 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1193 // Release child using child's b_go flag
1194 ANNOTATE_BARRIER_BEGIN(child_thr);
1195 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1196 flag.release();
1197 }
1198 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001199 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001200#if KMP_BARRIER_ICV_PUSH
1201 if (propagate_icvs && !KMP_MASTER_TID(tid))
1202 // non-leaves copy ICVs from fixed ICVs to local dest
1203 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1204 &thr_bar->th_fixed_icvs);
1205#endif // KMP_BARRIER_ICV_PUSH
1206 }
1207 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "
1208 "barrier type %d\n",
1209 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001210}
1211
Jonathan Peyton30419822017-05-12 18:01:32 +00001212// End of Barrier Algorithms
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001213
1214// Internal function to do a barrier.
1215/* If is_split is true, do a split barrier, otherwise, do a plain barrier
Jonathan Peyton30419822017-05-12 18:01:32 +00001216 If reduce is non-NULL, do a split reduction barrier, otherwise, do a split
1217 barrier
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001218 Returns 0 if master thread, 1 if worker thread. */
Jonathan Peyton30419822017-05-12 18:01:32 +00001219int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
1220 size_t reduce_size, void *reduce_data,
1221 void (*reduce)(void *, void *)) {
1222 KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
1223 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
Ed Maste414544c2017-07-07 21:06:05 +00001224 int tid = __kmp_tid_from_gtid(gtid);
1225 kmp_info_t *this_thr = __kmp_threads[gtid];
1226 kmp_team_t *team = this_thr->th.th_team;
1227 int status = 0;
Jonathan Peyton30419822017-05-12 18:01:32 +00001228 ident_t *loc = __kmp_threads[gtid]->th.th_ident;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001229#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001230 ompt_data_t *my_task_data;
1231 ompt_data_t *my_parallel_data;
1232 void *return_address;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001233#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001234
Jonathan Peyton30419822017-05-12 18:01:32 +00001235 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid,
1236 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001237
Jonathan Peyton30419822017-05-12 18:01:32 +00001238 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001239#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001240 if (ompt_enabled.enabled) {
1241#if OMPT_OPTIONAL
1242 my_task_data = OMPT_CUR_TASK_DATA(this_thr);
1243 my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
1244 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1245 if (ompt_enabled.ompt_callback_sync_region) {
1246 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1247 ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
1248 my_task_data, return_address);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001249 }
Joachim Protze82e94a52017-11-01 10:08:30 +00001250 if (ompt_enabled.ompt_callback_sync_region_wait) {
1251 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1252 ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
1253 my_task_data, return_address);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001254 }
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001255#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001256 // It is OK to report the barrier state after the barrier begin callback.
1257 // According to the OMPT specification, a compliant implementation may
1258 // even delay reporting this state until the barrier begins to wait.
Joachim Protze82e94a52017-11-01 10:08:30 +00001259 this_thr->th.ompt_thread_info.state = omp_state_wait_barrier;
Jonathan Peyton30419822017-05-12 18:01:32 +00001260 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001261#endif
1262
Jonathan Peyton30419822017-05-12 18:01:32 +00001263 if (!team->t.t_serialized) {
1264#if USE_ITT_BUILD
1265 // This value will be used in itt notify events below.
1266 void *itt_sync_obj = NULL;
1267#if USE_ITT_NOTIFY
1268 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1269 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1270#endif
1271#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001272 if (__kmp_tasking_mode == tskm_extra_barrier) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001273 __kmp_tasking_barrier(team, this_thr, gtid);
1274 KA_TRACE(15,
1275 ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid,
1276 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001277 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001278
Jonathan Peyton30419822017-05-12 18:01:32 +00001279 /* Copy the blocktime info to the thread, where __kmp_wait_template() can
1280 access it when the team struct is not guaranteed to exist. */
1281 // See note about the corresponding code in __kmp_join_barrier() being
1282 // performance-critical.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001283 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
Jonathan Peytone1c7c132016-10-07 18:12:19 +00001284#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00001285 this_thr->th.th_team_bt_intervals =
1286 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1287 this_thr->th.th_team_bt_set =
1288 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
Jonathan Peyton2208a8512017-01-27 17:54:31 +00001289#else
Jonathan Peyton52527cd2017-09-05 15:45:48 +00001290 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
Jonathan Peyton2208a8512017-01-27 17:54:31 +00001291#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001292 }
1293
1294#if USE_ITT_BUILD
1295 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
Jonathan Peyton30419822017-05-12 18:01:32 +00001296 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001297#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001298#if USE_DEBUGGER
1299 // Let the debugger know: the thread arrived to the barrier and waiting.
1300 if (KMP_MASTER_TID(tid)) { // Master counter is stored in team structure.
1301 team->t.t_bar[bt].b_master_arrived += 1;
1302 } else {
1303 this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
1304 } // if
1305#endif /* USE_DEBUGGER */
1306 if (reduce != NULL) {
1307 // KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956
1308 this_thr->th.th_local.reduce_data = reduce_data;
1309 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001310
Jonathan Peyton30419822017-05-12 18:01:32 +00001311 if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
1312 __kmp_task_team_setup(
1313 this_thr, team,
1314 0); // use 0 to only setup the current team if nthreads > 1
1315
1316 switch (__kmp_barrier_gather_pattern[bt]) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001317 case bp_hyper_bar: {
Jonathan Peyton30419822017-05-12 18:01:32 +00001318 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits
1319 // to 0; use linear
1320 __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid,
1321 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1322 break;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001323 }
1324 case bp_hierarchical_bar: {
Jonathan Peyton30419822017-05-12 18:01:32 +00001325 __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid,
1326 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1327 break;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001328 }
1329 case bp_tree_bar: {
Jonathan Peyton30419822017-05-12 18:01:32 +00001330 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits
1331 // to 0; use linear
1332 __kmp_tree_barrier_gather(bt, this_thr, gtid, tid,
1333 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1334 break;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001335 }
1336 default: {
Jonathan Peyton30419822017-05-12 18:01:32 +00001337 __kmp_linear_barrier_gather(bt, this_thr, gtid, tid,
1338 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001339 }
1340 }
1341
Jonathan Peyton30419822017-05-12 18:01:32 +00001342 KMP_MB();
1343
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001344 if (KMP_MASTER_TID(tid)) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001345 status = 0;
1346 if (__kmp_tasking_mode != tskm_immediate_exec) {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001347 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
Jonathan Peyton30419822017-05-12 18:01:32 +00001348 }
1349#if USE_DEBUGGER
1350 // Let the debugger know: All threads are arrived and starting leaving the
1351 // barrier.
1352 team->t.t_bar[bt].b_team_arrived += 1;
1353#endif
1354
1355#if OMP_40_ENABLED
1356 // Reset cancellation flag for worksharing constructs
1357 if (team->t.t_cancel_request == cancel_loop ||
1358 team->t.t_cancel_request == cancel_sections) {
1359 team->t.t_cancel_request = cancel_noreq;
1360 }
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001361#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001362#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001363 /* TODO: In case of split reduction barrier, master thread may send
1364 acquired event early, before the final summation into the shared
1365 variable is done (final summation can be a long operation for array
1366 reductions). */
1367 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1368 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1369#endif /* USE_ITT_BUILD */
1370#if USE_ITT_BUILD && USE_ITT_NOTIFY
1371 // Barrier - report frame end (only if active_level == 1)
1372 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
1373 __kmp_forkjoin_frames_mode &&
1374#if OMP_40_ENABLED
1375 this_thr->th.th_teams_microtask == NULL &&
1376#endif
1377 team->t.t_active_level == 1) {
1378 kmp_uint64 cur_time = __itt_get_timestamp();
1379 kmp_info_t **other_threads = team->t.t_threads;
1380 int nproc = this_thr->th.th_team_nproc;
1381 int i;
1382 switch (__kmp_forkjoin_frames_mode) {
1383 case 1:
1384 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1385 loc, nproc);
1386 this_thr->th.th_frame_time = cur_time;
1387 break;
1388 case 2: // AC 2015-01-19: currently does not work for hierarchical (to
1389 // be fixed)
1390 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time,
1391 1, loc, nproc);
1392 break;
1393 case 3:
1394 if (__itt_metadata_add_ptr) {
1395 // Initialize with master's wait time
1396 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1397 // Set arrive time to zero to be able to check it in
1398 // __kmp_invoke_task(); the same is done inside the loop below
1399 this_thr->th.th_bar_arrive_time = 0;
1400 for (i = 1; i < nproc; ++i) {
1401 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1402 other_threads[i]->th.th_bar_arrive_time = 0;
1403 }
1404 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1405 cur_time, delta,
1406 (kmp_uint64)(reduce != NULL));
1407 }
1408 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1409 loc, nproc);
1410 this_thr->th.th_frame_time = cur_time;
1411 break;
1412 }
1413 }
1414#endif /* USE_ITT_BUILD */
1415 } else {
1416 status = 1;
1417#if USE_ITT_BUILD
1418 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1419 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1420#endif /* USE_ITT_BUILD */
1421 }
1422 if (status == 1 || !is_split) {
1423 switch (__kmp_barrier_release_pattern[bt]) {
1424 case bp_hyper_bar: {
1425 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1426 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
1427 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1428 break;
1429 }
1430 case bp_hierarchical_bar: {
1431 __kmp_hierarchical_barrier_release(
1432 bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1433 break;
1434 }
1435 case bp_tree_bar: {
1436 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1437 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
1438 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1439 break;
1440 }
1441 default: {
1442 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
1443 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1444 }
1445 }
1446 if (__kmp_tasking_mode != tskm_immediate_exec) {
1447 __kmp_task_team_sync(this_thr, team);
1448 }
1449 }
1450
1451#if USE_ITT_BUILD
1452 /* GEH: TODO: Move this under if-condition above and also include in
1453 __kmp_end_split_barrier(). This will more accurately represent the actual
1454 release time of the threads for split barriers. */
1455 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1456 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1457#endif /* USE_ITT_BUILD */
1458 } else { // Team is serialized.
1459 status = 0;
1460 if (__kmp_tasking_mode != tskm_immediate_exec) {
1461#if OMP_45_ENABLED
1462 if (this_thr->th.th_task_team != NULL) {
1463 void *itt_sync_obj = NULL;
1464#if USE_ITT_NOTIFY
1465 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1466 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1467 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1468 }
1469#endif
1470
1471 KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks ==
1472 TRUE);
1473 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
1474 __kmp_task_team_setup(this_thr, team, 0);
1475
1476#if USE_ITT_BUILD
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001477 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
Jonathan Peyton30419822017-05-12 18:01:32 +00001478 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1479#endif /* USE_ITT_BUILD */
1480 }
1481#else
1482 // The task team should be NULL for serialized code (tasks will be
1483 // executed immediately)
1484 KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL);
1485 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL);
1486#endif
1487 }
1488 }
1489 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
1490 gtid, __kmp_team_from_gtid(gtid)->t.t_id,
1491 __kmp_tid_from_gtid(gtid), status));
1492
1493#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001494 if (ompt_enabled.enabled) {
1495#if OMPT_OPTIONAL
1496 if (ompt_enabled.ompt_callback_sync_region_wait) {
1497 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1498 ompt_sync_region_barrier, ompt_scope_end, my_parallel_data,
1499 my_task_data, return_address);
1500 }
1501 if (ompt_enabled.ompt_callback_sync_region) {
1502 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1503 ompt_sync_region_barrier, ompt_scope_end, my_parallel_data,
1504 my_task_data, return_address);
Jonathan Peyton30419822017-05-12 18:01:32 +00001505 }
1506#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001507 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
Jonathan Peyton30419822017-05-12 18:01:32 +00001508 }
1509#endif
1510 ANNOTATE_BARRIER_END(&team->t.t_bar);
1511
1512 return status;
1513}
1514
1515void __kmp_end_split_barrier(enum barrier_type bt, int gtid) {
1516 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier);
1517 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
1518 int tid = __kmp_tid_from_gtid(gtid);
1519 kmp_info_t *this_thr = __kmp_threads[gtid];
1520 kmp_team_t *team = this_thr->th.th_team;
1521
1522 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1523 if (!team->t.t_serialized) {
1524 if (KMP_MASTER_GTID(gtid)) {
1525 switch (__kmp_barrier_release_pattern[bt]) {
1526 case bp_hyper_bar: {
1527 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1528 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
1529 FALSE USE_ITT_BUILD_ARG(NULL));
1530 break;
1531 }
1532 case bp_hierarchical_bar: {
1533 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid,
1534 FALSE USE_ITT_BUILD_ARG(NULL));
1535 break;
1536 }
1537 case bp_tree_bar: {
1538 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1539 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
1540 FALSE USE_ITT_BUILD_ARG(NULL));
1541 break;
1542 }
1543 default: {
1544 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
1545 FALSE USE_ITT_BUILD_ARG(NULL));
1546 }
1547 }
1548 if (__kmp_tasking_mode != tskm_immediate_exec) {
1549 __kmp_task_team_sync(this_thr, team);
1550 } // if
1551 }
1552 }
1553 ANNOTATE_BARRIER_END(&team->t.t_bar);
1554}
1555
1556void __kmp_join_barrier(int gtid) {
1557 KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier);
1558 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
Ed Maste414544c2017-07-07 21:06:05 +00001559 kmp_info_t *this_thr = __kmp_threads[gtid];
1560 kmp_team_t *team;
1561 kmp_uint nproc;
Jonathan Peyton30419822017-05-12 18:01:32 +00001562 kmp_info_t *master_thread;
1563 int tid;
1564#ifdef KMP_DEBUG
1565 int team_id;
1566#endif /* KMP_DEBUG */
1567#if USE_ITT_BUILD
1568 void *itt_sync_obj = NULL;
1569#if USE_ITT_NOTIFY
1570 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) // Don't call routine without need
1571 // Get object created at fork_barrier
1572 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1573#endif
1574#endif /* USE_ITT_BUILD */
1575 KMP_MB();
1576
1577 // Get current info
1578 team = this_thr->th.th_team;
1579 nproc = this_thr->th.th_team_nproc;
1580 KMP_DEBUG_ASSERT((int)nproc == team->t.t_nproc);
1581 tid = __kmp_tid_from_gtid(gtid);
1582#ifdef KMP_DEBUG
1583 team_id = team->t.t_id;
1584#endif /* KMP_DEBUG */
1585 master_thread = this_thr->th.th_team_master;
1586#ifdef KMP_DEBUG
1587 if (master_thread != team->t.t_threads[0]) {
1588 __kmp_print_structure();
1589 }
1590#endif /* KMP_DEBUG */
1591 KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]);
1592 KMP_MB();
1593
1594 // Verify state
1595 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
1596 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team));
1597 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root));
1598 KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]);
1599 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
1600 gtid, team_id, tid));
1601
1602 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1603#if OMPT_SUPPORT
Joachim Protze82e94a52017-11-01 10:08:30 +00001604 ompt_data_t *my_task_data;
1605 ompt_data_t *my_parallel_data;
1606 if (ompt_enabled.enabled) {
1607#if OMPT_OPTIONAL
1608 void *codeptr = NULL;
1609 int ds_tid = this_thr->th.th_info.ds.ds_tid;
1610 if (KMP_MASTER_TID(ds_tid) &&
1611 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
1612 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
1613 codeptr = team->t.ompt_team_info.master_return_address;
1614 my_task_data = OMPT_CUR_TASK_DATA(this_thr);
1615 my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr);
1616 if (ompt_enabled.ompt_callback_sync_region) {
1617 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1618 ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
1619 my_task_data, codeptr);
1620 }
1621 if (ompt_enabled.ompt_callback_sync_region_wait) {
1622 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1623 ompt_sync_region_barrier, ompt_scope_begin, my_parallel_data,
1624 my_task_data, codeptr);
1625 }
Joachim Protze14b512e2018-01-10 12:51:27 +00001626 if (!KMP_MASTER_TID(ds_tid))
1627 this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
Jonathan Peyton30419822017-05-12 18:01:32 +00001628#endif
Joachim Protze82e94a52017-11-01 10:08:30 +00001629 this_thr->th.ompt_thread_info.state = omp_state_wait_barrier_implicit;
1630 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001631#endif
1632
1633 if (__kmp_tasking_mode == tskm_extra_barrier) {
1634 __kmp_tasking_barrier(team, this_thr, gtid);
1635 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid,
1636 team_id, tid));
1637 }
1638#ifdef KMP_DEBUG
1639 if (__kmp_tasking_mode != tskm_immediate_exec) {
1640 KA_TRACE(20, ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
1641 "%p, th_task_team = %p\n",
1642 __kmp_gtid_from_thread(this_thr), team_id,
1643 team->t.t_task_team[this_thr->th.th_task_state],
1644 this_thr->th.th_task_team));
1645 KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==
1646 team->t.t_task_team[this_thr->th.th_task_state]);
1647 }
1648#endif /* KMP_DEBUG */
1649
1650 /* Copy the blocktime info to the thread, where __kmp_wait_template() can
1651 access it when the team struct is not guaranteed to exist. Doing these
1652 loads causes a cache miss slows down EPCC parallel by 2x. As a workaround,
1653 we do not perform the copy if blocktime=infinite, since the values are not
1654 used by __kmp_wait_template() in that case. */
1655 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1656#if KMP_USE_MONITOR
1657 this_thr->th.th_team_bt_intervals =
1658 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1659 this_thr->th.th_team_bt_set =
1660 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1661#else
Jonathan Peyton52527cd2017-09-05 15:45:48 +00001662 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001663#endif
1664 }
1665
1666#if USE_ITT_BUILD
1667 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1668 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001669#endif /* USE_ITT_BUILD */
1670
Jonathan Peyton30419822017-05-12 18:01:32 +00001671 switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
1672 case bp_hyper_bar: {
1673 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1674 __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1675 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1676 break;
1677 }
1678 case bp_hierarchical_bar: {
1679 __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1680 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1681 break;
1682 }
1683 case bp_tree_bar: {
1684 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1685 __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1686 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1687 break;
1688 }
1689 default: {
1690 __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1691 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1692 }
1693 }
1694
1695 /* From this point on, the team data structure may be deallocated at any time
1696 by the master thread - it is unsafe to reference it in any of the worker
1697 threads. Any per-team data items that need to be referenced before the
1698 end of the barrier should be moved to the kmp_task_team_t structs. */
1699 if (KMP_MASTER_TID(tid)) {
1700 if (__kmp_tasking_mode != tskm_immediate_exec) {
1701 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
1702 }
1703#if KMP_STATS_ENABLED
1704 // Have master thread flag the workers to indicate they are now waiting for
1705 // next parallel region, Also wake them up so they switch their timers to
1706 // idle.
1707 for (int i = 0; i < team->t.t_nproc; ++i) {
1708 kmp_info_t *team_thread = team->t.t_threads[i];
1709 if (team_thread == this_thr)
1710 continue;
1711 team_thread->th.th_stats->setIdleFlag();
1712 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
1713 team_thread->th.th_sleep_loc != NULL)
1714 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread),
1715 team_thread->th.th_sleep_loc);
1716 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +00001717#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001718#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001719 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1720 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1721#endif /* USE_ITT_BUILD */
1722
1723#if USE_ITT_BUILD && USE_ITT_NOTIFY
1724 // Join barrier - report frame end
1725 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
1726 __kmp_forkjoin_frames_mode &&
1727#if OMP_40_ENABLED
1728 this_thr->th.th_teams_microtask == NULL &&
1729#endif
1730 team->t.t_active_level == 1) {
1731 kmp_uint64 cur_time = __itt_get_timestamp();
1732 ident_t *loc = team->t.t_ident;
1733 kmp_info_t **other_threads = team->t.t_threads;
1734 int nproc = this_thr->th.th_team_nproc;
1735 int i;
1736 switch (__kmp_forkjoin_frames_mode) {
1737 case 1:
1738 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1739 loc, nproc);
1740 break;
1741 case 2:
1742 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1,
1743 loc, nproc);
1744 break;
1745 case 3:
1746 if (__itt_metadata_add_ptr) {
1747 // Initialize with master's wait time
1748 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1749 // Set arrive time to zero to be able to check it in
1750 // __kmp_invoke_task(); the same is done inside the loop below
1751 this_thr->th.th_bar_arrive_time = 0;
1752 for (i = 1; i < nproc; ++i) {
1753 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1754 other_threads[i]->th.th_bar_arrive_time = 0;
1755 }
1756 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1757 cur_time, delta, 0);
1758 }
1759 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1760 loc, nproc);
1761 this_thr->th.th_frame_time = cur_time;
1762 break;
1763 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001764 }
1765#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001766 }
1767#if USE_ITT_BUILD
1768 else {
1769 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1770 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1771 }
1772#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001773
1774#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001775 if (KMP_MASTER_TID(tid)) {
1776 KA_TRACE(
1777 15,
1778 ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
1779 gtid, team_id, tid, nproc));
1780 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001781#endif /* KMP_DEBUG */
1782
Jonathan Peyton30419822017-05-12 18:01:32 +00001783 // TODO now, mark worker threads as done so they may be disbanded
1784 KMP_MB(); // Flush all pending memory write invalidates.
1785 KA_TRACE(10,
1786 ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001787
Jonathan Peyton30419822017-05-12 18:01:32 +00001788 ANNOTATE_BARRIER_END(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001789}
1790
Jonathan Peyton30419822017-05-12 18:01:32 +00001791// TODO release worker threads' fork barriers as we are ready instead of all at
1792// once
1793void __kmp_fork_barrier(int gtid, int tid) {
1794 KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier);
1795 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
1796 kmp_info_t *this_thr = __kmp_threads[gtid];
1797 kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001798#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001799 void *itt_sync_obj = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001800#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001801 if (team)
1802 ANNOTATE_BARRIER_END(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001803
Jonathan Peyton30419822017-05-12 18:01:32 +00001804 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid,
1805 (team != NULL) ? team->t.t_id : -1, tid));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001806
Jonathan Peyton30419822017-05-12 18:01:32 +00001807 // th_team pointer only valid for master thread here
1808 if (KMP_MASTER_TID(tid)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001809#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001810 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1811 // Create itt barrier object
1812 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
1813 __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing
1814 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001815#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1816
1817#ifdef KMP_DEBUG
Ed Maste414544c2017-07-07 21:06:05 +00001818 kmp_info_t **other_threads = team->t.t_threads;
1819 int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001820
Jonathan Peyton30419822017-05-12 18:01:32 +00001821 // Verify state
1822 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001823
Jonathan Peyton30419822017-05-12 18:01:32 +00001824 for (i = 1; i < team->t.t_nproc; ++i) {
1825 KA_TRACE(500,
1826 ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
1827 "== %u.\n",
1828 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
1829 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
1830 other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go));
1831 KMP_DEBUG_ASSERT(
1832 (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) &
1833 ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE);
1834 KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team);
1835 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001836#endif
1837
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001838 if (__kmp_tasking_mode != tskm_immediate_exec) {
Jonathan Peyton642688b2017-06-01 16:46:36 +00001839 // 0 indicates setup current task team if nthreads > 1
1840 __kmp_task_team_setup(this_thr, team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001841 }
1842
Jonathan Peyton30419822017-05-12 18:01:32 +00001843 /* The master thread may have changed its blocktime between the join barrier
1844 and the fork barrier. Copy the blocktime info to the thread, where
1845 __kmp_wait_template() can access it when the team struct is not
1846 guaranteed to exist. */
1847 // See note about the corresponding code in __kmp_join_barrier() being
1848 // performance-critical
1849 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1850#if KMP_USE_MONITOR
1851 this_thr->th.th_team_bt_intervals =
1852 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1853 this_thr->th.th_team_bt_set =
1854 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1855#else
Jonathan Peyton52527cd2017-09-05 15:45:48 +00001856 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001857#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001858 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001859 } // master
1860
1861 switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
1862 case bp_hyper_bar: {
1863 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1864 __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1865 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1866 break;
1867 }
1868 case bp_hierarchical_bar: {
1869 __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1870 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1871 break;
1872 }
1873 case bp_tree_bar: {
1874 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1875 __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1876 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1877 break;
1878 }
1879 default: {
1880 __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1881 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1882 }
1883 }
1884
Joachim Protze82e94a52017-11-01 10:08:30 +00001885#if OMPT_SUPPORT
1886 if (ompt_enabled.enabled) {
1887 if (this_thr->th.ompt_thread_info.state ==
1888 omp_state_wait_barrier_implicit) {
1889 int ds_tid = this_thr->th.th_info.ds.ds_tid;
1890 ompt_data_t *tId = (team) ? OMPT_CUR_TASK_DATA(this_thr)
1891 : &(this_thr->th.ompt_thread_info.task_data);
1892 this_thr->th.ompt_thread_info.state = omp_state_overhead;
1893#if OMPT_OPTIONAL
1894 void *codeptr = NULL;
1895 if (KMP_MASTER_TID(ds_tid) &&
1896 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
1897 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
1898 codeptr = team->t.ompt_team_info.master_return_address;
1899 if (ompt_enabled.ompt_callback_sync_region_wait) {
1900 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
1901 ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
1902 }
1903 if (ompt_enabled.ompt_callback_sync_region) {
1904 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
1905 ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
1906 }
1907#endif
1908 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
1909 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1910 ompt_scope_end, NULL, tId, 0, ds_tid);
1911 }
1912 // return to idle state
1913 this_thr->th.ompt_thread_info.state = omp_state_overhead;
1914 }
1915 }
1916#endif
1917
Jonathan Peyton30419822017-05-12 18:01:32 +00001918 // Early exit for reaping threads releasing forkjoin barrier
1919 if (TCR_4(__kmp_global.g.g_done)) {
1920 this_thr->th.th_task_team = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001921
1922#if USE_ITT_BUILD && USE_ITT_NOTIFY
1923 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001924 if (!KMP_MASTER_TID(tid)) {
1925 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1926 if (itt_sync_obj)
1927 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1928 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001929 }
1930#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
Jonathan Peyton30419822017-05-12 18:01:32 +00001931 KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid));
1932 return;
1933 }
1934
1935 /* We can now assume that a valid team structure has been allocated by the
1936 master and propagated to all worker threads. The current thread, however,
1937 may not be part of the team, so we can't blindly assume that the team
1938 pointer is non-null. */
1939 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
1940 KMP_DEBUG_ASSERT(team != NULL);
1941 tid = __kmp_tid_from_gtid(gtid);
1942
1943#if KMP_BARRIER_ICV_PULL
1944 /* Master thread's copy of the ICVs was set up on the implicit taskdata in
1945 __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's
1946 implicit task has this data before this function is called. We cannot
1947 modify __kmp_fork_call() to look at the fixed ICVs in the master's thread
1948 struct, because it is not always the case that the threads arrays have
1949 been allocated when __kmp_fork_call() is executed. */
1950 {
1951 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
1952 if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs
1953 // Copy the initial ICVs from the master's thread struct to the implicit
1954 // task for this tid.
1955 KA_TRACE(10,
1956 ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
1957 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
1958 tid, FALSE);
1959 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1960 &team->t.t_threads[0]
1961 ->th.th_bar[bs_forkjoin_barrier]
1962 .bb.th_fixed_icvs);
1963 }
1964 }
1965#endif // KMP_BARRIER_ICV_PULL
1966
1967 if (__kmp_tasking_mode != tskm_immediate_exec) {
1968 __kmp_task_team_sync(this_thr, team);
1969 }
1970
1971#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
1972 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
1973 if (proc_bind == proc_bind_intel) {
1974#endif
1975#if KMP_AFFINITY_SUPPORTED
1976 // Call dynamic affinity settings
1977 if (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) {
1978 __kmp_balanced_affinity(tid, team->t.t_nproc);
1979 }
1980#endif // KMP_AFFINITY_SUPPORTED
1981#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
1982 } else if (proc_bind != proc_bind_false) {
1983 if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
1984 KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n",
1985 __kmp_gtid_from_thread(this_thr),
1986 this_thr->th.th_current_place));
1987 } else {
1988 __kmp_affinity_set_place(gtid);
1989 }
1990 }
1991#endif
1992
1993#if USE_ITT_BUILD && USE_ITT_NOTIFY
1994 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1995 if (!KMP_MASTER_TID(tid)) {
1996 // Get correct barrier object
1997 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1998 __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired
1999 } // (prepare called inside barrier_release)
2000 }
2001#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
2002 ANNOTATE_BARRIER_END(&team->t.t_bar);
2003 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid,
2004 team->t.t_id, tid));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002005}
2006
Jonathan Peyton30419822017-05-12 18:01:32 +00002007void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
2008 kmp_internal_control_t *new_icvs, ident_t *loc) {
2009 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002010
Jonathan Peyton30419822017-05-12 18:01:32 +00002011 KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
2012 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002013
Jonathan Peyton30419822017-05-12 18:01:32 +00002014/* Master thread's copy of the ICVs was set up on the implicit taskdata in
2015 __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's
2016 implicit task has this data before this function is called. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002017#if KMP_BARRIER_ICV_PULL
Jonathan Peyton30419822017-05-12 18:01:32 +00002018 /* Copy ICVs to master's thread structure into th_fixed_icvs (which remains
2019 untouched), where all of the worker threads can access them and make their
2020 own copies after the barrier. */
2021 KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be
2022 // allocated at this point
2023 copy_icvs(
2024 &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs,
2025 new_icvs);
2026 KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0,
2027 team->t.t_threads[0], team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002028#elif KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +00002029 // The ICVs will be propagated in the fork barrier, so nothing needs to be
2030 // done here.
2031 KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0,
2032 team->t.t_threads[0], team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002033#else
Jonathan Peyton30419822017-05-12 18:01:32 +00002034 // Copy the ICVs to each of the non-master threads. This takes O(nthreads)
2035 // time.
2036 ngo_load(new_icvs);
2037 KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be
2038 // allocated at this point
2039 for (int f = 1; f < new_nproc; ++f) { // Skip the master thread
2040 // TODO: GEH - pass in better source location info since usually NULL here
2041 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
2042 f, team->t.t_threads[f], team));
2043 __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE);
2044 ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
2045 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
2046 f, team->t.t_threads[f], team));
2047 }
2048 ngo_sync();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002049#endif // KMP_BARRIER_ICV_PULL
2050}