blob: cd0dc58426dd132b3e002c2cbd77650ef119e7f1 [file] [log] [blame]
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001/*
2 * kmp_barrier.cpp
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_wait_release.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000018#include "kmp_itt.h"
Jonathan Peytona0e159f2015-10-08 18:23:38 +000019#include "kmp_os.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000020#include "kmp_stats.h"
Jonathan Peytona0e159f2015-10-08 18:23:38 +000021
Jim Cownie4cc4bb42014-10-07 16:25:50 +000022
23#if KMP_MIC
24#include <immintrin.h>
25#define USE_NGO_STORES 1
26#endif // KMP_MIC
27
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000028#include "tsan_annotations.h"
29
Jim Cownie4cc4bb42014-10-07 16:25:50 +000030#if KMP_MIC && USE_NGO_STORES
31// ICV copying
Jonathan Peyton30419822017-05-12 18:01:32 +000032#define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src))
Jim Cownie4cc4bb42014-10-07 16:25:50 +000033#define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
Jonathan Peyton30419822017-05-12 18:01:32 +000034#define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
35#define ngo_sync() __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory")
Jim Cownie4cc4bb42014-10-07 16:25:50 +000036#else
Jonathan Peyton30419822017-05-12 18:01:32 +000037#define ngo_load(src) ((void)0)
Jim Cownie4cc4bb42014-10-07 16:25:50 +000038#define ngo_store_icvs(dst, src) copy_icvs((dst), (src))
Jonathan Peyton30419822017-05-12 18:01:32 +000039#define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE)
40#define ngo_sync() ((void)0)
Jim Cownie4cc4bb42014-10-07 16:25:50 +000041#endif /* KMP_MIC && USE_NGO_STORES */
42
43void __kmp_print_structure(void); // Forward declaration
44
45// ---------------------------- Barrier Algorithms ----------------------------
46
47// Linear Barrier
Jonathan Peyton30419822017-05-12 18:01:32 +000048static void __kmp_linear_barrier_gather(
49 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
50 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
51 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
Ed Maste414544c2017-07-07 21:06:05 +000052 kmp_team_t *team = this_thr->th.th_team;
53 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
54 kmp_info_t **other_threads = team->t.t_threads;
Jim Cownie4cc4bb42014-10-07 16:25:50 +000055
Jonathan Peyton30419822017-05-12 18:01:32 +000056 KA_TRACE(
57 20,
58 ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
59 gtid, team->t.t_id, tid, bt));
60 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +000061
62#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +000063 // Barrier imbalance - save arrive time to the thread
64 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
65 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
66 __itt_get_timestamp();
67 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +000068#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000069 // We now perform a linear reduction to signal that all of the threads have
70 // arrived.
71 if (!KMP_MASTER_TID(tid)) {
72 KA_TRACE(20,
73 ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
74 "arrived(%p): %llu => %llu\n",
75 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team),
76 team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived,
77 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
78 // Mark arrival to master thread
79 /* After performing this write, a worker thread may not assume that the team
80 is valid any more - it could be deallocated by the master thread at any
81 time. */
82 ANNOTATE_BARRIER_BEGIN(this_thr);
83 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]);
84 flag.release();
85 } else {
Ed Maste414544c2017-07-07 21:06:05 +000086 kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
87 int nproc = this_thr->th.th_team_nproc;
88 int i;
Jonathan Peyton30419822017-05-12 18:01:32 +000089 // Don't have to worry about sleep bit here or atomic since team setting
Ed Maste414544c2017-07-07 21:06:05 +000090 kmp_uint64 new_state =
Jonathan Peyton30419822017-05-12 18:01:32 +000091 team_bar->b_arrived + KMP_BARRIER_STATE_BUMP;
Jim Cownie4cc4bb42014-10-07 16:25:50 +000092
Jonathan Peyton30419822017-05-12 18:01:32 +000093 // Collect all the worker team member threads.
94 for (i = 1; i < nproc; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +000095#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +000096 // Prefetch next thread's arrived count
97 if (i + 1 < nproc)
98 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived);
Jim Cownie4cc4bb42014-10-07 16:25:50 +000099#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000100 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
101 "arrived(%p) == %llu\n",
102 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
103 team->t.t_id, i,
104 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000105
Jonathan Peyton30419822017-05-12 18:01:32 +0000106 // Wait for worker thread to arrive
107 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived,
108 new_state);
109 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
110 ANNOTATE_BARRIER_END(other_threads[i]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000111#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000112 // Barrier imbalance - write min of the thread time and the other thread
113 // time to the thread.
114 if (__kmp_forkjoin_frames_mode == 2) {
115 this_thr->th.th_bar_min_time = KMP_MIN(
116 this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time);
117 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000118#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000119 if (reduce) {
120 KA_TRACE(100,
121 ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
122 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
123 team->t.t_id, i));
124 ANNOTATE_REDUCE_AFTER(reduce);
125 (*reduce)(this_thr->th.th_local.reduce_data,
126 other_threads[i]->th.th_local.reduce_data);
127 ANNOTATE_REDUCE_BEFORE(reduce);
128 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
129 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000130 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000131 // Don't have to worry about sleep bit here or atomic since team setting
132 team_bar->b_arrived = new_state;
133 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
134 "arrived(%p) = %llu\n",
135 gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived,
136 new_state));
137 }
138 KA_TRACE(
139 20,
140 ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
141 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000142}
143
Jonathan Peyton30419822017-05-12 18:01:32 +0000144static void __kmp_linear_barrier_release(
145 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
146 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
147 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release);
Ed Maste414544c2017-07-07 21:06:05 +0000148 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
149 kmp_team_t *team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000150
Jonathan Peyton30419822017-05-12 18:01:32 +0000151 if (KMP_MASTER_TID(tid)) {
Ed Maste414544c2017-07-07 21:06:05 +0000152 unsigned int i;
153 kmp_uint32 nproc = this_thr->th.th_team_nproc;
154 kmp_info_t **other_threads;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000155
Jonathan Peyton30419822017-05-12 18:01:32 +0000156 team = __kmp_threads[gtid]->th.th_team;
157 KMP_DEBUG_ASSERT(team != NULL);
158 other_threads = team->t.t_threads;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000159
Jonathan Peyton30419822017-05-12 18:01:32 +0000160 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) master enter for "
161 "barrier type %d\n",
162 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000163
Jonathan Peyton30419822017-05-12 18:01:32 +0000164 if (nproc > 1) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000165#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +0000166 {
167 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
168 if (propagate_icvs) {
169 ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
170 for (i = 1; i < nproc; ++i) {
171 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i],
172 team, i, FALSE);
173 ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
174 &team->t.t_implicit_task_taskdata[0].td_icvs);
175 }
176 ngo_sync();
177 }
178 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000179#endif // KMP_BARRIER_ICV_PUSH
180
Jonathan Peyton30419822017-05-12 18:01:32 +0000181 // Now, release all of the worker threads
182 for (i = 1; i < nproc; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000183#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +0000184 // Prefetch next thread's go flag
185 if (i + 1 < nproc)
186 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000187#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000188 KA_TRACE(
189 20,
190 ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
191 "go(%p): %u => %u\n",
192 gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid,
193 team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go,
194 other_threads[i]->th.th_bar[bt].bb.b_go,
195 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
196 ANNOTATE_BARRIER_BEGIN(other_threads[i]);
197 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go,
198 other_threads[i]);
199 flag.release();
200 }
201 }
202 } else { // Wait for the MASTER thread to release us
203 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
204 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
205 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
206 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
207 ANNOTATE_BARRIER_END(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000208#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000209 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
210 // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is
211 // disabled)
212 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
213 // Cancel wait on previous parallel region...
214 __kmp_itt_task_starting(itt_sync_obj);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000215
Jonathan Peyton30419822017-05-12 18:01:32 +0000216 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
217 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000218
Jonathan Peyton30419822017-05-12 18:01:32 +0000219 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
220 if (itt_sync_obj != NULL)
221 // Call prepare as early as possible for "new" barrier
222 __kmp_itt_task_finished(itt_sync_obj);
223 } else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000224#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
225 // Early exit for reaping threads releasing forkjoin barrier
Jonathan Peyton30419822017-05-12 18:01:32 +0000226 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
227 return;
228// The worker thread may now assume that the team is valid.
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000229#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000230 tid = __kmp_tid_from_gtid(gtid);
231 team = __kmp_threads[gtid]->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000232#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000233 KMP_DEBUG_ASSERT(team != NULL);
234 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
235 KA_TRACE(20,
236 ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
237 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
238 KMP_MB(); // Flush all pending memory write invalidates.
239 }
240 KA_TRACE(
241 20,
242 ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
243 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000244}
245
246// Tree barrier
247static void
Jonathan Peyton30419822017-05-12 18:01:32 +0000248__kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
249 int tid, void (*reduce)(void *, void *)
250 USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
251 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather);
Ed Maste414544c2017-07-07 21:06:05 +0000252 kmp_team_t *team = this_thr->th.th_team;
253 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
254 kmp_info_t **other_threads = team->t.t_threads;
255 kmp_uint32 nproc = this_thr->th.th_team_nproc;
256 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
257 kmp_uint32 branch_factor = 1 << branch_bits;
258 kmp_uint32 child;
259 kmp_uint32 child_tid;
260 kmp_uint64 new_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000261
Jonathan Peyton30419822017-05-12 18:01:32 +0000262 KA_TRACE(
263 20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
264 gtid, team->t.t_id, tid, bt));
265 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000266
267#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000268 // Barrier imbalance - save arrive time to the thread
269 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
270 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
271 __itt_get_timestamp();
272 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000273#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000274 // Perform tree gather to wait until all threads have arrived; reduce any
275 // required data as we go
276 child_tid = (tid << branch_bits) + 1;
277 if (child_tid < nproc) {
278 // Parent threads wait for all their children to arrive
279 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
280 child = 1;
281 do {
Ed Maste414544c2017-07-07 21:06:05 +0000282 kmp_info_t *child_thr = other_threads[child_tid];
283 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000284#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +0000285 // Prefetch next thread's arrived count
286 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
287 KMP_CACHE_PREFETCH(
288 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000289#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000290 KA_TRACE(20,
291 ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
292 "arrived(%p) == %llu\n",
293 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
294 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
295 // Wait for child to arrive
296 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
297 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
298 ANNOTATE_BARRIER_END(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000299#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000300 // Barrier imbalance - write min of the thread time and a child time to
301 // the thread.
302 if (__kmp_forkjoin_frames_mode == 2) {
303 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
304 child_thr->th.th_bar_min_time);
305 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000306#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000307 if (reduce) {
308 KA_TRACE(100,
309 ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
310 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
311 team->t.t_id, child_tid));
312 ANNOTATE_REDUCE_AFTER(reduce);
313 (*reduce)(this_thr->th.th_local.reduce_data,
314 child_thr->th.th_local.reduce_data);
315 ANNOTATE_REDUCE_BEFORE(reduce);
316 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
317 }
318 child++;
319 child_tid++;
320 } while (child <= branch_factor && child_tid < nproc);
321 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000322
Jonathan Peyton30419822017-05-12 18:01:32 +0000323 if (!KMP_MASTER_TID(tid)) { // Worker threads
Ed Maste414544c2017-07-07 21:06:05 +0000324 kmp_int32 parent_tid = (tid - 1) >> branch_bits;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000325
Jonathan Peyton30419822017-05-12 18:01:32 +0000326 KA_TRACE(20,
327 ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
328 "arrived(%p): %llu => %llu\n",
329 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
330 team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
331 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000332
Jonathan Peyton30419822017-05-12 18:01:32 +0000333 // Mark arrival to parent thread
334 /* After performing this write, a worker thread may not assume that the team
335 is valid any more - it could be deallocated by the master thread at any
336 time. */
337 ANNOTATE_BARRIER_BEGIN(this_thr);
338 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]);
339 flag.release();
340 } else {
341 // Need to update the team arrived pointer if we are the master thread
342 if (nproc > 1) // New value was already computed above
343 team->t.t_bar[bt].b_arrived = new_state;
344 else
345 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
346 KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
347 "arrived(%p) = %llu\n",
348 gtid, team->t.t_id, tid, team->t.t_id,
349 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
350 }
351 KA_TRACE(20,
352 ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
353 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000354}
355
Jonathan Peyton30419822017-05-12 18:01:32 +0000356static void __kmp_tree_barrier_release(
357 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
358 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
359 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release);
Ed Maste414544c2017-07-07 21:06:05 +0000360 kmp_team_t *team;
361 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
362 kmp_uint32 nproc;
363 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
364 kmp_uint32 branch_factor = 1 << branch_bits;
365 kmp_uint32 child;
366 kmp_uint32 child_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000367
Jonathan Peyton30419822017-05-12 18:01:32 +0000368 // Perform a tree release for all of the threads that have been gathered
369 if (!KMP_MASTER_TID(
370 tid)) { // Handle fork barrier workers who aren't part of a team yet
371 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,
372 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
373 // Wait for parent thread to release us
374 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
375 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
376 ANNOTATE_BARRIER_END(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000377#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000378 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
379 // In fork barrier where we could not get the object reliably (or
380 // ITTNOTIFY is disabled)
381 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
382 // Cancel wait on previous parallel region...
383 __kmp_itt_task_starting(itt_sync_obj);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000384
Jonathan Peyton30419822017-05-12 18:01:32 +0000385 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
386 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000387
Jonathan Peyton30419822017-05-12 18:01:32 +0000388 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
389 if (itt_sync_obj != NULL)
390 // Call prepare as early as possible for "new" barrier
391 __kmp_itt_task_finished(itt_sync_obj);
392 } else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000393#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
394 // Early exit for reaping threads releasing forkjoin barrier
395 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
Jonathan Peyton30419822017-05-12 18:01:32 +0000396 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000397
Jonathan Peyton30419822017-05-12 18:01:32 +0000398 // The worker thread may now assume that the team is valid.
399 team = __kmp_threads[gtid]->th.th_team;
400 KMP_DEBUG_ASSERT(team != NULL);
401 tid = __kmp_tid_from_gtid(gtid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000402
Jonathan Peyton30419822017-05-12 18:01:32 +0000403 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
404 KA_TRACE(20,
405 ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid,
406 team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
407 KMP_MB(); // Flush all pending memory write invalidates.
408 } else {
409 team = __kmp_threads[gtid]->th.th_team;
410 KMP_DEBUG_ASSERT(team != NULL);
411 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) master enter for "
412 "barrier type %d\n",
413 gtid, team->t.t_id, tid, bt));
414 }
415 nproc = this_thr->th.th_team_nproc;
416 child_tid = (tid << branch_bits) + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000417
Jonathan Peyton30419822017-05-12 18:01:32 +0000418 if (child_tid < nproc) {
Ed Maste414544c2017-07-07 21:06:05 +0000419 kmp_info_t **other_threads = team->t.t_threads;
Jonathan Peyton30419822017-05-12 18:01:32 +0000420 child = 1;
421 // Parent threads release all their children
422 do {
Ed Maste414544c2017-07-07 21:06:05 +0000423 kmp_info_t *child_thr = other_threads[child_tid];
424 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000425#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +0000426 // Prefetch next thread's go count
427 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
428 KMP_CACHE_PREFETCH(
429 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000430#endif /* KMP_CACHE_MANAGE */
431
432#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +0000433 {
434 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
435 if (propagate_icvs) {
436 __kmp_init_implicit_task(team->t.t_ident,
437 team->t.t_threads[child_tid], team,
438 child_tid, FALSE);
439 copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
440 &team->t.t_implicit_task_taskdata[0].td_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000441 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000442 }
443#endif // KMP_BARRIER_ICV_PUSH
444 KA_TRACE(20,
445 ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
446 "go(%p): %u => %u\n",
447 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
448 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
449 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
450 // Release child from barrier
451 ANNOTATE_BARRIER_BEGIN(child_thr);
452 kmp_flag_64 flag(&child_bar->b_go, child_thr);
453 flag.release();
454 child++;
455 child_tid++;
456 } while (child <= branch_factor && child_tid < nproc);
457 }
458 KA_TRACE(
459 20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
460 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000461}
462
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000463// Hyper Barrier
464static void
Jonathan Peyton30419822017-05-12 18:01:32 +0000465__kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
466 int tid, void (*reduce)(void *, void *)
467 USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
468 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather);
Ed Maste414544c2017-07-07 21:06:05 +0000469 kmp_team_t *team = this_thr->th.th_team;
470 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
471 kmp_info_t **other_threads = team->t.t_threads;
472 kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE;
473 kmp_uint32 num_threads = this_thr->th.th_team_nproc;
474 kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
475 kmp_uint32 branch_factor = 1 << branch_bits;
476 kmp_uint32 offset;
477 kmp_uint32 level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000478
Jonathan Peyton30419822017-05-12 18:01:32 +0000479 KA_TRACE(
480 20,
481 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
482 gtid, team->t.t_id, tid, bt));
483 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000484
485#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000486 // Barrier imbalance - save arrive time to the thread
487 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
488 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
489 __itt_get_timestamp();
490 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000491#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000492 /* Perform a hypercube-embedded tree gather to wait until all of the threads
493 have arrived, and reduce any required data as we go. */
494 kmp_flag_64 p_flag(&thr_bar->b_arrived);
495 for (level = 0, offset = 1; offset < num_threads;
496 level += branch_bits, offset <<= branch_bits) {
Ed Maste414544c2017-07-07 21:06:05 +0000497 kmp_uint32 child;
498 kmp_uint32 child_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000499
Jonathan Peyton30419822017-05-12 18:01:32 +0000500 if (((tid >> level) & (branch_factor - 1)) != 0) {
Ed Maste414544c2017-07-07 21:06:05 +0000501 kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000502
Jonathan Peyton30419822017-05-12 18:01:32 +0000503 KA_TRACE(20,
504 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
505 "arrived(%p): %llu => %llu\n",
506 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
507 team->t.t_id, parent_tid, &thr_bar->b_arrived,
508 thr_bar->b_arrived,
509 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
510 // Mark arrival to parent thread
511 /* After performing this write (in the last iteration of the enclosing for
512 loop), a worker thread may not assume that the team is valid any more
513 - it could be deallocated by the master thread at any time. */
514 ANNOTATE_BARRIER_BEGIN(this_thr);
515 p_flag.set_waiter(other_threads[parent_tid]);
516 p_flag.release();
517 break;
518 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000519
Jonathan Peyton30419822017-05-12 18:01:32 +0000520 // Parent threads wait for children to arrive
521 if (new_state == KMP_BARRIER_UNUSED_STATE)
522 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
523 for (child = 1, child_tid = tid + (1 << level);
524 child < branch_factor && child_tid < num_threads;
525 child++, child_tid += (1 << level)) {
Ed Maste414544c2017-07-07 21:06:05 +0000526 kmp_info_t *child_thr = other_threads[child_tid];
527 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000528#if KMP_CACHE_MANAGE
Ed Maste414544c2017-07-07 21:06:05 +0000529 kmp_uint32 next_child_tid = child_tid + (1 << level);
Jonathan Peyton30419822017-05-12 18:01:32 +0000530 // Prefetch next thread's arrived count
531 if (child + 1 < branch_factor && next_child_tid < num_threads)
532 KMP_CACHE_PREFETCH(
533 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000534#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000535 KA_TRACE(20,
536 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
537 "arrived(%p) == %llu\n",
538 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
539 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
540 // Wait for child to arrive
541 kmp_flag_64 c_flag(&child_bar->b_arrived, new_state);
542 c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
543 ANNOTATE_BARRIER_END(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000544#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000545 // Barrier imbalance - write min of the thread time and a child time to
546 // the thread.
547 if (__kmp_forkjoin_frames_mode == 2) {
548 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
549 child_thr->th.th_bar_min_time);
550 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000551#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000552 if (reduce) {
553 KA_TRACE(100,
554 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
555 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
556 team->t.t_id, child_tid));
557 ANNOTATE_REDUCE_AFTER(reduce);
558 (*reduce)(this_thr->th.th_local.reduce_data,
559 child_thr->th.th_local.reduce_data);
560 ANNOTATE_REDUCE_BEFORE(reduce);
561 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
562 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000563 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000564 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000565
Jonathan Peyton30419822017-05-12 18:01:32 +0000566 if (KMP_MASTER_TID(tid)) {
567 // Need to update the team arrived pointer if we are the master thread
568 if (new_state == KMP_BARRIER_UNUSED_STATE)
569 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
570 else
571 team->t.t_bar[bt].b_arrived = new_state;
572 KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
573 "arrived(%p) = %llu\n",
574 gtid, team->t.t_id, tid, team->t.t_id,
575 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
576 }
577 KA_TRACE(
578 20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
579 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000580}
581
582// The reverse versions seem to beat the forward versions overall
583#define KMP_REVERSE_HYPER_BAR
Jonathan Peyton30419822017-05-12 18:01:32 +0000584static void __kmp_hyper_barrier_release(
585 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
586 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
587 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release);
Ed Maste414544c2017-07-07 21:06:05 +0000588 kmp_team_t *team;
589 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
590 kmp_info_t **other_threads;
591 kmp_uint32 num_threads;
592 kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
593 kmp_uint32 branch_factor = 1 << branch_bits;
594 kmp_uint32 child;
595 kmp_uint32 child_tid;
596 kmp_uint32 offset;
597 kmp_uint32 level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000598
Jonathan Peyton30419822017-05-12 18:01:32 +0000599 /* Perform a hypercube-embedded tree release for all of the threads that have
600 been gathered. If KMP_REVERSE_HYPER_BAR is defined (default) the threads
601 are released in the reverse order of the corresponding gather, otherwise
602 threads are released in the same order. */
603 if (KMP_MASTER_TID(tid)) { // master
604 team = __kmp_threads[gtid]->th.th_team;
605 KMP_DEBUG_ASSERT(team != NULL);
606 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for "
607 "barrier type %d\n",
608 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000609#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +0000610 if (propagate_icvs) { // master already has ICVs in final destination; copy
611 copy_icvs(&thr_bar->th_fixed_icvs,
612 &team->t.t_implicit_task_taskdata[tid].td_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000613 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000614#endif
615 } else { // Handle fork barrier workers who aren't part of a team yet
616 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,
617 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
618 // Wait for parent thread to release us
619 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
620 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
621 ANNOTATE_BARRIER_END(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000622#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000623 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
624 // In fork barrier where we could not get the object reliably
625 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
626 // Cancel wait on previous parallel region...
627 __kmp_itt_task_starting(itt_sync_obj);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000628
Jonathan Peyton30419822017-05-12 18:01:32 +0000629 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
630 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000631
Jonathan Peyton30419822017-05-12 18:01:32 +0000632 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
633 if (itt_sync_obj != NULL)
634 // Call prepare as early as possible for "new" barrier
635 __kmp_itt_task_finished(itt_sync_obj);
636 } else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000637#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
638 // Early exit for reaping threads releasing forkjoin barrier
639 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
Jonathan Peyton30419822017-05-12 18:01:32 +0000640 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000641
Jonathan Peyton30419822017-05-12 18:01:32 +0000642 // The worker thread may now assume that the team is valid.
643 team = __kmp_threads[gtid]->th.th_team;
644 KMP_DEBUG_ASSERT(team != NULL);
645 tid = __kmp_tid_from_gtid(gtid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000646
Jonathan Peyton30419822017-05-12 18:01:32 +0000647 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
648 KA_TRACE(20,
649 ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
650 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
651 KMP_MB(); // Flush all pending memory write invalidates.
652 }
653 num_threads = this_thr->th.th_team_nproc;
654 other_threads = team->t.t_threads;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000655
656#ifdef KMP_REVERSE_HYPER_BAR
Jonathan Peyton30419822017-05-12 18:01:32 +0000657 // Count up to correct level for parent
658 for (level = 0, offset = 1;
659 offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0);
660 level += branch_bits, offset <<= branch_bits)
661 ;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000662
Jonathan Peyton30419822017-05-12 18:01:32 +0000663 // Now go down from there
664 for (level -= branch_bits, offset >>= branch_bits; offset != 0;
665 level -= branch_bits, offset >>= branch_bits)
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000666#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000667 // Go down the tree, level by level
668 for (level = 0, offset = 1; offset < num_threads;
669 level += branch_bits, offset <<= branch_bits)
670#endif // KMP_REVERSE_HYPER_BAR
671 {
672#ifdef KMP_REVERSE_HYPER_BAR
673 /* Now go in reverse order through the children, highest to lowest.
674 Initial setting of child is conservative here. */
675 child = num_threads >> ((level == 0) ? level : level - 1);
676 for (child = (child < branch_factor - 1) ? child : branch_factor - 1,
677 child_tid = tid + (child << level);
678 child >= 1; child--, child_tid -= (1 << level))
679#else
680 if (((tid >> level) & (branch_factor - 1)) != 0)
681 // No need to go lower than this, since this is the level parent would be
682 // notified
683 break;
684 // Iterate through children on this level of the tree
685 for (child = 1, child_tid = tid + (1 << level);
686 child < branch_factor && child_tid < num_threads;
687 child++, child_tid += (1 << level))
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000688#endif // KMP_REVERSE_HYPER_BAR
689 {
Jonathan Peyton30419822017-05-12 18:01:32 +0000690 if (child_tid >= num_threads)
691 continue; // Child doesn't exist so keep going
692 else {
Ed Maste414544c2017-07-07 21:06:05 +0000693 kmp_info_t *child_thr = other_threads[child_tid];
694 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000695#if KMP_CACHE_MANAGE
Ed Maste414544c2017-07-07 21:06:05 +0000696 kmp_uint32 next_child_tid = child_tid - (1 << level);
Jonathan Peyton30419822017-05-12 18:01:32 +0000697// Prefetch next thread's go count
698#ifdef KMP_REVERSE_HYPER_BAR
699 if (child - 1 >= 1 && next_child_tid < num_threads)
700#else
701 if (child + 1 < branch_factor && next_child_tid < num_threads)
702#endif // KMP_REVERSE_HYPER_BAR
703 KMP_CACHE_PREFETCH(
704 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000705#endif /* KMP_CACHE_MANAGE */
706
707#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +0000708 if (propagate_icvs) // push my fixed ICVs to my child
709 copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000710#endif // KMP_BARRIER_ICV_PUSH
711
Jonathan Peyton30419822017-05-12 18:01:32 +0000712 KA_TRACE(
713 20,
714 ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
715 "go(%p): %u => %u\n",
716 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
717 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
718 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
719 // Release child from barrier
720 ANNOTATE_BARRIER_BEGIN(child_thr);
721 kmp_flag_64 flag(&child_bar->b_go, child_thr);
722 flag.release();
723 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000724 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000725 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000726#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +0000727 if (propagate_icvs &&
728 !KMP_MASTER_TID(tid)) { // copy ICVs locally to final dest
729 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
730 FALSE);
731 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
732 &thr_bar->th_fixed_icvs);
733 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000734#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000735 KA_TRACE(
736 20,
737 ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
738 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000739}
740
741// Hierarchical Barrier
742
743// Initialize thread barrier data
Jonathan Peyton30419822017-05-12 18:01:32 +0000744/* Initializes/re-initializes the hierarchical barrier data stored on a thread.
745 Performs the minimum amount of initialization required based on how the team
746 has changed. Returns true if leaf children will require both on-core and
747 traditional wake-up mechanisms. For example, if the team size increases,
748 threads already in the team will respond to on-core wakeup on their parent
749 thread, but threads newly added to the team will only be listening on the
750 their local b_go. */
751static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt,
752 kmp_bstate_t *thr_bar,
753 kmp_uint32 nproc, int gtid,
754 int tid, kmp_team_t *team) {
755 // Checks to determine if (re-)initialization is needed
756 bool uninitialized = thr_bar->team == NULL;
757 bool team_changed = team != thr_bar->team;
758 bool team_sz_changed = nproc != thr_bar->nproc;
759 bool tid_changed = tid != thr_bar->old_tid;
760 bool retval = false;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000761
Jonathan Peyton30419822017-05-12 18:01:32 +0000762 if (uninitialized || team_sz_changed) {
763 __kmp_get_hierarchy(nproc, thr_bar);
764 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000765
Jonathan Peyton30419822017-05-12 18:01:32 +0000766 if (uninitialized || team_sz_changed || tid_changed) {
767 thr_bar->my_level = thr_bar->depth - 1; // default for master
768 thr_bar->parent_tid = -1; // default for master
769 if (!KMP_MASTER_TID(
770 tid)) { // if not master, find parent thread in hierarchy
771 kmp_uint32 d = 0;
772 while (d < thr_bar->depth) { // find parent based on level of thread in
773 // hierarchy, and note level
774 kmp_uint32 rem;
775 if (d == thr_bar->depth - 2) { // reached level right below the master
776 thr_bar->parent_tid = 0;
777 thr_bar->my_level = d;
778 break;
779 } else if ((rem = tid % thr_bar->skip_per_level[d + 1]) !=
780 0) { // TODO: can we make this op faster?
781 // thread is not a subtree root at next level, so this is max
782 thr_bar->parent_tid = tid - rem;
783 thr_bar->my_level = d;
784 break;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000785 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000786 ++d;
787 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000788 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000789 thr_bar->offset = 7 - (tid - thr_bar->parent_tid - 1);
790 thr_bar->old_tid = tid;
791 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
792 thr_bar->team = team;
793 thr_bar->parent_bar =
794 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
795 }
796 if (uninitialized || team_changed || tid_changed) {
797 thr_bar->team = team;
798 thr_bar->parent_bar =
799 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
800 retval = true;
801 }
802 if (uninitialized || team_sz_changed || tid_changed) {
803 thr_bar->nproc = nproc;
804 thr_bar->leaf_kids = thr_bar->base_leaf_kids;
805 if (thr_bar->my_level == 0)
806 thr_bar->leaf_kids = 0;
807 if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc)
808 thr_bar->leaf_kids = nproc - tid - 1;
809 thr_bar->leaf_state = 0;
810 for (int i = 0; i < thr_bar->leaf_kids; ++i)
811 ((char *)&(thr_bar->leaf_state))[7 - i] = 1;
812 }
813 return retval;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000814}
815
Jonathan Peyton30419822017-05-12 18:01:32 +0000816static void __kmp_hierarchical_barrier_gather(
817 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
818 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
819 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather);
Ed Maste414544c2017-07-07 21:06:05 +0000820 kmp_team_t *team = this_thr->th.th_team;
821 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
822 kmp_uint32 nproc = this_thr->th.th_team_nproc;
823 kmp_info_t **other_threads = team->t.t_threads;
824 kmp_uint64 new_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000825
Jonathan Peyton30419822017-05-12 18:01:32 +0000826 int level = team->t.t_level;
Jonathan Peyton441f3372015-09-21 17:24:46 +0000827#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000828 if (other_threads[0]
829 ->th.th_teams_microtask) // are we inside the teams construct?
830 if (this_thr->th.th_teams_size.nteams > 1)
831 ++level; // level was not increased in teams construct for team_of_masters
Jonathan Peyton441f3372015-09-21 17:24:46 +0000832#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000833 if (level == 1)
834 thr_bar->use_oncore_barrier = 1;
835 else
836 thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000837
Jonathan Peyton30419822017-05-12 18:01:32 +0000838 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "
839 "barrier type %d\n",
840 gtid, team->t.t_id, tid, bt));
841 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000842
Andrey Churbanove6bfb732015-05-06 18:34:15 +0000843#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000844 // Barrier imbalance - save arrive time to the thread
845 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
846 this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
847 }
Andrey Churbanove6bfb732015-05-06 18:34:15 +0000848#endif
849
Jonathan Peyton30419822017-05-12 18:01:32 +0000850 (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid,
851 team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000852
Jonathan Peyton30419822017-05-12 18:01:32 +0000853 if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf)
Ed Maste414544c2017-07-07 21:06:05 +0000854 kmp_int32 child_tid;
Jonathan Peyton30419822017-05-12 18:01:32 +0000855 new_state =
856 (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
857 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
858 thr_bar->use_oncore_barrier) {
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000859 if (thr_bar->leaf_kids) {
860 // First, wait for leaf children to check-in on my b_arrived flag
Jonathan Peyton30419822017-05-12 18:01:32 +0000861 kmp_uint64 leaf_state =
862 KMP_MASTER_TID(tid)
863 ? thr_bar->b_arrived | thr_bar->leaf_state
864 : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
865 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
866 "for leaf kids\n",
867 gtid, team->t.t_id, tid));
868 kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state);
869 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
870 if (reduce) {
871 ANNOTATE_REDUCE_AFTER(reduce);
872 for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids;
873 ++child_tid) {
874 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
875 "T#%d(%d:%d)\n",
876 gtid, team->t.t_id, tid,
877 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
878 child_tid));
879 ANNOTATE_BARRIER_END(other_threads[child_tid]);
880 (*reduce)(this_thr->th.th_local.reduce_data,
881 other_threads[child_tid]->th.th_local.reduce_data);
882 }
883 ANNOTATE_REDUCE_BEFORE(reduce);
884 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000885 }
Andrey Churbanovc47afcd2017-07-03 11:24:08 +0000886 // clear leaf_state bits
Andrey Churbanov5ba90c72017-07-17 09:03:14 +0000887 KMP_TEST_THEN_AND64(&thr_bar->b_arrived, ~(thr_bar->leaf_state));
Jonathan Peyton30419822017-05-12 18:01:32 +0000888 }
889 // Next, wait for higher level children on each child's b_arrived flag
890 for (kmp_uint32 d = 1; d < thr_bar->my_level;
891 ++d) { // gather lowest level threads first, but skip 0
892 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
893 skip = thr_bar->skip_per_level[d];
894 if (last > nproc)
895 last = nproc;
896 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
Ed Maste414544c2017-07-07 21:06:05 +0000897 kmp_info_t *child_thr = other_threads[child_tid];
898 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jonathan Peyton30419822017-05-12 18:01:32 +0000899 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
900 "T#%d(%d:%d) "
901 "arrived(%p) == %llu\n",
902 gtid, team->t.t_id, tid,
903 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
904 child_tid, &child_bar->b_arrived, new_state));
905 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
906 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
907 ANNOTATE_BARRIER_END(child_thr);
908 if (reduce) {
909 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
910 "T#%d(%d:%d)\n",
911 gtid, team->t.t_id, tid,
912 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
913 child_tid));
914 ANNOTATE_REDUCE_AFTER(reduce);
915 (*reduce)(this_thr->th.th_local.reduce_data,
916 child_thr->th.th_local.reduce_data);
917 ANNOTATE_REDUCE_BEFORE(reduce);
918 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
919 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000920 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000921 }
922 } else { // Blocktime is not infinite
923 for (kmp_uint32 d = 0; d < thr_bar->my_level;
924 ++d) { // Gather lowest level threads first
925 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
926 skip = thr_bar->skip_per_level[d];
927 if (last > nproc)
928 last = nproc;
929 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
Ed Maste414544c2017-07-07 21:06:05 +0000930 kmp_info_t *child_thr = other_threads[child_tid];
931 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jonathan Peyton30419822017-05-12 18:01:32 +0000932 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
933 "T#%d(%d:%d) "
934 "arrived(%p) == %llu\n",
935 gtid, team->t.t_id, tid,
936 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
937 child_tid, &child_bar->b_arrived, new_state));
938 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
939 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
940 ANNOTATE_BARRIER_END(child_thr);
941 if (reduce) {
942 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
943 "T#%d(%d:%d)\n",
944 gtid, team->t.t_id, tid,
945 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
946 child_tid));
947 ANNOTATE_REDUCE_AFTER(reduce);
948 (*reduce)(this_thr->th.th_local.reduce_data,
949 child_thr->th.th_local.reduce_data);
950 ANNOTATE_REDUCE_BEFORE(reduce);
951 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
952 }
953 }
954 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000955 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000956 }
957 // All subordinates are gathered; now release parent if not master thread
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000958
Jonathan Peyton30419822017-05-12 18:01:32 +0000959 if (!KMP_MASTER_TID(tid)) { // worker threads release parent in hierarchy
960 KA_TRACE(
961 20,
962 ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
963 "arrived(%p): %llu => %llu\n",
964 gtid, team->t.t_id, tid,
965 __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id,
966 thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
967 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
968 /* Mark arrival to parent: After performing this write, a worker thread may
969 not assume that the team is valid any more - it could be deallocated by
970 the master thread at any time. */
971 if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
972 !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived
973 // flag; release it
974 ANNOTATE_BARRIER_BEGIN(this_thr);
975 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]);
976 flag.release();
977 } else { // Leaf does special release on the "offset" bits of parent's
978 // b_arrived flag
979 thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
980 kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset);
981 flag.set_waiter(other_threads[thr_bar->parent_tid]);
982 flag.release();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000983 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000984 } else { // Master thread needs to update the team's b_arrived value
985 team->t.t_bar[bt].b_arrived = new_state;
986 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
987 "arrived(%p) = %llu\n",
988 gtid, team->t.t_id, tid, team->t.t_id,
989 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
990 }
991 // Is the team access below unsafe or just technically invalid?
992 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "
993 "barrier type %d\n",
994 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000995}
996
Jonathan Peyton30419822017-05-12 18:01:32 +0000997static void __kmp_hierarchical_barrier_release(
998 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
999 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
1000 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release);
Ed Maste414544c2017-07-07 21:06:05 +00001001 kmp_team_t *team;
1002 kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1003 kmp_uint32 nproc;
Jonathan Peyton30419822017-05-12 18:01:32 +00001004 bool team_change = false; // indicates on-core barrier shouldn't be used
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001005
Jonathan Peyton30419822017-05-12 18:01:32 +00001006 if (KMP_MASTER_TID(tid)) {
1007 team = __kmp_threads[gtid]->th.th_team;
1008 KMP_DEBUG_ASSERT(team != NULL);
1009 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) master "
1010 "entered barrier type %d\n",
1011 gtid, team->t.t_id, tid, bt));
1012 } else { // Worker threads
1013 // Wait for parent thread to release me
1014 if (!thr_bar->use_oncore_barrier ||
1015 __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || thr_bar->my_level != 0 ||
1016 thr_bar->team == NULL) {
1017 // Use traditional method of waiting on my own b_go flag
1018 thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
1019 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
1020 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1021 ANNOTATE_BARRIER_END(this_thr);
1022 TCW_8(thr_bar->b_go,
1023 KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
1024 } else { // Thread barrier data is initialized, this is a leaf, blocktime is
1025 // infinite, not nested
1026 // Wait on my "offset" bits on parent's b_go flag
1027 thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG;
1028 kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP,
1029 thr_bar->offset, bt,
1030 this_thr USE_ITT_BUILD_ARG(itt_sync_obj));
1031 flag.wait(this_thr, TRUE);
1032 if (thr_bar->wait_flag ==
1033 KMP_BARRIER_SWITCHING) { // Thread was switched to own b_go
1034 TCW_8(thr_bar->b_go,
1035 KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
1036 } else { // Reset my bits on parent's b_go flag
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001037 (RCAST(volatile char *,
1038 &(thr_bar->parent_bar->b_go)))[thr_bar->offset] = 0;
Jonathan Peyton30419822017-05-12 18:01:32 +00001039 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001040 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001041 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
1042 // Early exit for reaping threads releasing forkjoin barrier
1043 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
1044 return;
1045 // The worker thread may now assume that the team is valid.
1046 team = __kmp_threads[gtid]->th.th_team;
1047 KMP_DEBUG_ASSERT(team != NULL);
1048 tid = __kmp_tid_from_gtid(gtid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001049
Jonathan Peyton30419822017-05-12 18:01:32 +00001050 KA_TRACE(
1051 20,
1052 ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1053 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
1054 KMP_MB(); // Flush all pending memory write invalidates.
1055 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001056
Jonathan Peyton30419822017-05-12 18:01:32 +00001057 nproc = this_thr->th.th_team_nproc;
1058 int level = team->t.t_level;
Jonathan Peyton441f3372015-09-21 17:24:46 +00001059#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001060 if (team->t.t_threads[0]
1061 ->th.th_teams_microtask) { // are we inside the teams construct?
1062 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1063 this_thr->th.th_teams_level == level)
1064 ++level; // level was not increased in teams construct for team_of_workers
1065 if (this_thr->th.th_teams_size.nteams > 1)
1066 ++level; // level was not increased in teams construct for team_of_masters
1067 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00001068#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001069 if (level == 1)
1070 thr_bar->use_oncore_barrier = 1;
1071 else
1072 thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001073
Jonathan Peyton30419822017-05-12 18:01:32 +00001074 // If the team size has increased, we still communicate with old leaves via
1075 // oncore barrier.
1076 unsigned short int old_leaf_kids = thr_bar->leaf_kids;
1077 kmp_uint64 old_leaf_state = thr_bar->leaf_state;
1078 team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid,
1079 tid, team);
1080 // But if the entire team changes, we won't use oncore barrier at all
1081 if (team_change)
1082 old_leaf_kids = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001083
1084#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +00001085 if (propagate_icvs) {
1086 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
1087 FALSE);
1088 if (KMP_MASTER_TID(
1089 tid)) { // master already has copy in final destination; copy
1090 copy_icvs(&thr_bar->th_fixed_icvs,
1091 &team->t.t_implicit_task_taskdata[tid].td_icvs);
1092 } else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1093 thr_bar->use_oncore_barrier) { // optimization for inf blocktime
1094 if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0)
1095 // leaves (on-core children) pull parent's fixed ICVs directly to local
1096 // ICV store
1097 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1098 &thr_bar->parent_bar->th_fixed_icvs);
1099 // non-leaves will get ICVs piggybacked with b_go via NGO store
1100 } else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs
1101 if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can
1102 // access
1103 copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
1104 else // leaves copy parent's fixed ICVs directly to local ICV store
1105 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1106 &thr_bar->parent_bar->th_fixed_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001107 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001108 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001109#endif // KMP_BARRIER_ICV_PUSH
1110
Jonathan Peyton30419822017-05-12 18:01:32 +00001111 // Now, release my children
1112 if (thr_bar->my_level) { // not a leaf
Ed Maste414544c2017-07-07 21:06:05 +00001113 kmp_int32 child_tid;
Jonathan Peyton30419822017-05-12 18:01:32 +00001114 kmp_uint32 last;
1115 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1116 thr_bar->use_oncore_barrier) {
1117 if (KMP_MASTER_TID(tid)) { // do a flat release
1118 // Set local b_go to bump children via NGO store of the cache line
1119 // containing IVCs and b_go.
1120 thr_bar->b_go = KMP_BARRIER_STATE_BUMP;
1121 // Use ngo stores if available; b_go piggybacks in the last 8 bytes of
1122 // the cache line
1123 ngo_load(&thr_bar->th_fixed_icvs);
1124 // This loops over all the threads skipping only the leaf nodes in the
1125 // hierarchy
1126 for (child_tid = thr_bar->skip_per_level[1]; child_tid < (int)nproc;
1127 child_tid += thr_bar->skip_per_level[1]) {
Ed Maste414544c2017-07-07 21:06:05 +00001128 kmp_bstate_t *child_bar =
Jonathan Peyton30419822017-05-12 18:01:32 +00001129 &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
1130 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
1131 "releasing T#%d(%d:%d)"
1132 " go(%p): %u => %u\n",
1133 gtid, team->t.t_id, tid,
1134 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1135 child_tid, &child_bar->b_go, child_bar->b_go,
1136 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1137 // Use ngo store (if available) to both store ICVs and release child
1138 // via child's b_go
1139 ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001140 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001141 ngo_sync();
1142 }
1143 TCW_8(thr_bar->b_go,
1144 KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
1145 // Now, release leaf children
1146 if (thr_bar->leaf_kids) { // if there are any
1147 // We test team_change on the off-chance that the level 1 team changed.
1148 if (team_change ||
1149 old_leaf_kids < thr_bar->leaf_kids) { // some old, some new
1150 if (old_leaf_kids) { // release old leaf kids
1151 thr_bar->b_go |= old_leaf_state;
1152 }
1153 // Release new leaf kids
1154 last = tid + thr_bar->skip_per_level[1];
1155 if (last > nproc)
1156 last = nproc;
1157 for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last;
1158 ++child_tid) { // skip_per_level[0]=1
Ed Maste414544c2017-07-07 21:06:05 +00001159 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1160 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jonathan Peyton30419822017-05-12 18:01:32 +00001161 KA_TRACE(
1162 20,
1163 ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
1164 " T#%d(%d:%d) go(%p): %u => %u\n",
1165 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1166 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1167 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1168 // Release child using child's b_go flag
1169 ANNOTATE_BARRIER_BEGIN(child_thr);
1170 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1171 flag.release();
1172 }
1173 } else { // Release all children at once with leaf_state bits on my own
1174 // b_go flag
1175 thr_bar->b_go |= thr_bar->leaf_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001176 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001177 }
1178 } else { // Blocktime is not infinite; do a simple hierarchical release
1179 for (int d = thr_bar->my_level - 1; d >= 0;
1180 --d) { // Release highest level threads first
1181 last = tid + thr_bar->skip_per_level[d + 1];
1182 kmp_uint32 skip = thr_bar->skip_per_level[d];
1183 if (last > nproc)
1184 last = nproc;
1185 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
Ed Maste414544c2017-07-07 21:06:05 +00001186 kmp_info_t *child_thr = team->t.t_threads[child_tid];
1187 kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jonathan Peyton30419822017-05-12 18:01:32 +00001188 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
1189 "releasing T#%d(%d:%d) go(%p): %u => %u\n",
1190 gtid, team->t.t_id, tid,
1191 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1192 child_tid, &child_bar->b_go, child_bar->b_go,
1193 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1194 // Release child using child's b_go flag
1195 ANNOTATE_BARRIER_BEGIN(child_thr);
1196 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1197 flag.release();
1198 }
1199 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001200 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001201#if KMP_BARRIER_ICV_PUSH
1202 if (propagate_icvs && !KMP_MASTER_TID(tid))
1203 // non-leaves copy ICVs from fixed ICVs to local dest
1204 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1205 &thr_bar->th_fixed_icvs);
1206#endif // KMP_BARRIER_ICV_PUSH
1207 }
1208 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "
1209 "barrier type %d\n",
1210 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001211}
1212
Jonathan Peyton30419822017-05-12 18:01:32 +00001213// End of Barrier Algorithms
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001214
1215// Internal function to do a barrier.
1216/* If is_split is true, do a split barrier, otherwise, do a plain barrier
Jonathan Peyton30419822017-05-12 18:01:32 +00001217 If reduce is non-NULL, do a split reduction barrier, otherwise, do a split
1218 barrier
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001219 Returns 0 if master thread, 1 if worker thread. */
Jonathan Peyton30419822017-05-12 18:01:32 +00001220int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
1221 size_t reduce_size, void *reduce_data,
1222 void (*reduce)(void *, void *)) {
1223 KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
1224 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
Ed Maste414544c2017-07-07 21:06:05 +00001225 int tid = __kmp_tid_from_gtid(gtid);
1226 kmp_info_t *this_thr = __kmp_threads[gtid];
1227 kmp_team_t *team = this_thr->th.th_team;
1228 int status = 0;
Jonathan Peyton30419822017-05-12 18:01:32 +00001229 ident_t *loc = __kmp_threads[gtid]->th.th_ident;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001230#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001231 ompt_task_id_t my_task_id;
1232 ompt_parallel_id_t my_parallel_id;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001233#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001234
Jonathan Peyton30419822017-05-12 18:01:32 +00001235 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid,
1236 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001237
Jonathan Peyton30419822017-05-12 18:01:32 +00001238 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001239#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001240 if (ompt_enabled) {
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001241#if OMPT_BLAME
Jonathan Peyton30419822017-05-12 18:01:32 +00001242 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
1243 my_parallel_id = team->t.ompt_team_info.parallel_id;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001244
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001245#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001246 if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) {
1247 if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) {
1248 ompt_callbacks.ompt_callback(ompt_event_single_others_end)(
1249 my_parallel_id, my_task_id);
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001250 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001251 }
1252#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001253 if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
1254 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(my_parallel_id,
1255 my_task_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001256 }
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001257#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001258 // It is OK to report the barrier state after the barrier begin callback.
1259 // According to the OMPT specification, a compliant implementation may
1260 // even delay reporting this state until the barrier begins to wait.
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001261 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
Jonathan Peyton30419822017-05-12 18:01:32 +00001262 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001263#endif
1264
Jonathan Peyton30419822017-05-12 18:01:32 +00001265 if (!team->t.t_serialized) {
1266#if USE_ITT_BUILD
1267 // This value will be used in itt notify events below.
1268 void *itt_sync_obj = NULL;
1269#if USE_ITT_NOTIFY
1270 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1271 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1272#endif
1273#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001274 if (__kmp_tasking_mode == tskm_extra_barrier) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001275 __kmp_tasking_barrier(team, this_thr, gtid);
1276 KA_TRACE(15,
1277 ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid,
1278 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001279 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001280
Jonathan Peyton30419822017-05-12 18:01:32 +00001281 /* Copy the blocktime info to the thread, where __kmp_wait_template() can
1282 access it when the team struct is not guaranteed to exist. */
1283 // See note about the corresponding code in __kmp_join_barrier() being
1284 // performance-critical.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001285 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
Jonathan Peytone1c7c132016-10-07 18:12:19 +00001286#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00001287 this_thr->th.th_team_bt_intervals =
1288 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1289 this_thr->th.th_team_bt_set =
1290 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
Jonathan Peyton2208a8512017-01-27 17:54:31 +00001291#else
Jonathan Peyton52527cd2017-09-05 15:45:48 +00001292 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
Jonathan Peyton2208a8512017-01-27 17:54:31 +00001293#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001294 }
1295
1296#if USE_ITT_BUILD
1297 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
Jonathan Peyton30419822017-05-12 18:01:32 +00001298 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001299#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001300#if USE_DEBUGGER
1301 // Let the debugger know: the thread arrived to the barrier and waiting.
1302 if (KMP_MASTER_TID(tid)) { // Master counter is stored in team structure.
1303 team->t.t_bar[bt].b_master_arrived += 1;
1304 } else {
1305 this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
1306 } // if
1307#endif /* USE_DEBUGGER */
1308 if (reduce != NULL) {
1309 // KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956
1310 this_thr->th.th_local.reduce_data = reduce_data;
1311 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001312
Jonathan Peyton30419822017-05-12 18:01:32 +00001313 if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
1314 __kmp_task_team_setup(
1315 this_thr, team,
1316 0); // use 0 to only setup the current team if nthreads > 1
1317
1318 switch (__kmp_barrier_gather_pattern[bt]) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001319 case bp_hyper_bar: {
Jonathan Peyton30419822017-05-12 18:01:32 +00001320 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits
1321 // to 0; use linear
1322 __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid,
1323 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1324 break;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001325 }
1326 case bp_hierarchical_bar: {
Jonathan Peyton30419822017-05-12 18:01:32 +00001327 __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid,
1328 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1329 break;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001330 }
1331 case bp_tree_bar: {
Jonathan Peyton30419822017-05-12 18:01:32 +00001332 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits
1333 // to 0; use linear
1334 __kmp_tree_barrier_gather(bt, this_thr, gtid, tid,
1335 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1336 break;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001337 }
1338 default: {
Jonathan Peyton30419822017-05-12 18:01:32 +00001339 __kmp_linear_barrier_gather(bt, this_thr, gtid, tid,
1340 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001341 }
1342 }
1343
Jonathan Peyton30419822017-05-12 18:01:32 +00001344 KMP_MB();
1345
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001346 if (KMP_MASTER_TID(tid)) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001347 status = 0;
1348 if (__kmp_tasking_mode != tskm_immediate_exec) {
Andrey Churbanovc47afcd2017-07-03 11:24:08 +00001349 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
Jonathan Peyton30419822017-05-12 18:01:32 +00001350 }
1351#if USE_DEBUGGER
1352 // Let the debugger know: All threads are arrived and starting leaving the
1353 // barrier.
1354 team->t.t_bar[bt].b_team_arrived += 1;
1355#endif
1356
1357#if OMP_40_ENABLED
1358 // Reset cancellation flag for worksharing constructs
1359 if (team->t.t_cancel_request == cancel_loop ||
1360 team->t.t_cancel_request == cancel_sections) {
1361 team->t.t_cancel_request = cancel_noreq;
1362 }
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001363#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001364#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001365 /* TODO: In case of split reduction barrier, master thread may send
1366 acquired event early, before the final summation into the shared
1367 variable is done (final summation can be a long operation for array
1368 reductions). */
1369 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1370 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1371#endif /* USE_ITT_BUILD */
1372#if USE_ITT_BUILD && USE_ITT_NOTIFY
1373 // Barrier - report frame end (only if active_level == 1)
1374 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
1375 __kmp_forkjoin_frames_mode &&
1376#if OMP_40_ENABLED
1377 this_thr->th.th_teams_microtask == NULL &&
1378#endif
1379 team->t.t_active_level == 1) {
1380 kmp_uint64 cur_time = __itt_get_timestamp();
1381 kmp_info_t **other_threads = team->t.t_threads;
1382 int nproc = this_thr->th.th_team_nproc;
1383 int i;
1384 switch (__kmp_forkjoin_frames_mode) {
1385 case 1:
1386 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1387 loc, nproc);
1388 this_thr->th.th_frame_time = cur_time;
1389 break;
1390 case 2: // AC 2015-01-19: currently does not work for hierarchical (to
1391 // be fixed)
1392 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time,
1393 1, loc, nproc);
1394 break;
1395 case 3:
1396 if (__itt_metadata_add_ptr) {
1397 // Initialize with master's wait time
1398 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1399 // Set arrive time to zero to be able to check it in
1400 // __kmp_invoke_task(); the same is done inside the loop below
1401 this_thr->th.th_bar_arrive_time = 0;
1402 for (i = 1; i < nproc; ++i) {
1403 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1404 other_threads[i]->th.th_bar_arrive_time = 0;
1405 }
1406 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1407 cur_time, delta,
1408 (kmp_uint64)(reduce != NULL));
1409 }
1410 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1411 loc, nproc);
1412 this_thr->th.th_frame_time = cur_time;
1413 break;
1414 }
1415 }
1416#endif /* USE_ITT_BUILD */
1417 } else {
1418 status = 1;
1419#if USE_ITT_BUILD
1420 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1421 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1422#endif /* USE_ITT_BUILD */
1423 }
1424 if (status == 1 || !is_split) {
1425 switch (__kmp_barrier_release_pattern[bt]) {
1426 case bp_hyper_bar: {
1427 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1428 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
1429 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1430 break;
1431 }
1432 case bp_hierarchical_bar: {
1433 __kmp_hierarchical_barrier_release(
1434 bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1435 break;
1436 }
1437 case bp_tree_bar: {
1438 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1439 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
1440 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1441 break;
1442 }
1443 default: {
1444 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
1445 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1446 }
1447 }
1448 if (__kmp_tasking_mode != tskm_immediate_exec) {
1449 __kmp_task_team_sync(this_thr, team);
1450 }
1451 }
1452
1453#if USE_ITT_BUILD
1454 /* GEH: TODO: Move this under if-condition above and also include in
1455 __kmp_end_split_barrier(). This will more accurately represent the actual
1456 release time of the threads for split barriers. */
1457 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1458 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1459#endif /* USE_ITT_BUILD */
1460 } else { // Team is serialized.
1461 status = 0;
1462 if (__kmp_tasking_mode != tskm_immediate_exec) {
1463#if OMP_45_ENABLED
1464 if (this_thr->th.th_task_team != NULL) {
1465 void *itt_sync_obj = NULL;
1466#if USE_ITT_NOTIFY
1467 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1468 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1469 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1470 }
1471#endif
1472
1473 KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks ==
1474 TRUE);
1475 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
1476 __kmp_task_team_setup(this_thr, team, 0);
1477
1478#if USE_ITT_BUILD
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001479 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
Jonathan Peyton30419822017-05-12 18:01:32 +00001480 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1481#endif /* USE_ITT_BUILD */
1482 }
1483#else
1484 // The task team should be NULL for serialized code (tasks will be
1485 // executed immediately)
1486 KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL);
1487 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL);
1488#endif
1489 }
1490 }
1491 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
1492 gtid, __kmp_team_from_gtid(gtid)->t.t_id,
1493 __kmp_tid_from_gtid(gtid), status));
1494
1495#if OMPT_SUPPORT
1496 if (ompt_enabled) {
1497#if OMPT_BLAME
1498 if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
1499 ompt_callbacks.ompt_callback(ompt_event_barrier_end)(my_parallel_id,
1500 my_task_id);
1501 }
1502#endif
1503 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1504 }
1505#endif
1506 ANNOTATE_BARRIER_END(&team->t.t_bar);
1507
1508 return status;
1509}
1510
1511void __kmp_end_split_barrier(enum barrier_type bt, int gtid) {
1512 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier);
1513 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
1514 int tid = __kmp_tid_from_gtid(gtid);
1515 kmp_info_t *this_thr = __kmp_threads[gtid];
1516 kmp_team_t *team = this_thr->th.th_team;
1517
1518 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1519 if (!team->t.t_serialized) {
1520 if (KMP_MASTER_GTID(gtid)) {
1521 switch (__kmp_barrier_release_pattern[bt]) {
1522 case bp_hyper_bar: {
1523 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1524 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
1525 FALSE USE_ITT_BUILD_ARG(NULL));
1526 break;
1527 }
1528 case bp_hierarchical_bar: {
1529 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid,
1530 FALSE USE_ITT_BUILD_ARG(NULL));
1531 break;
1532 }
1533 case bp_tree_bar: {
1534 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1535 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
1536 FALSE USE_ITT_BUILD_ARG(NULL));
1537 break;
1538 }
1539 default: {
1540 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
1541 FALSE USE_ITT_BUILD_ARG(NULL));
1542 }
1543 }
1544 if (__kmp_tasking_mode != tskm_immediate_exec) {
1545 __kmp_task_team_sync(this_thr, team);
1546 } // if
1547 }
1548 }
1549 ANNOTATE_BARRIER_END(&team->t.t_bar);
1550}
1551
1552void __kmp_join_barrier(int gtid) {
1553 KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier);
1554 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
Ed Maste414544c2017-07-07 21:06:05 +00001555 kmp_info_t *this_thr = __kmp_threads[gtid];
1556 kmp_team_t *team;
1557 kmp_uint nproc;
Jonathan Peyton30419822017-05-12 18:01:32 +00001558 kmp_info_t *master_thread;
1559 int tid;
1560#ifdef KMP_DEBUG
1561 int team_id;
1562#endif /* KMP_DEBUG */
1563#if USE_ITT_BUILD
1564 void *itt_sync_obj = NULL;
1565#if USE_ITT_NOTIFY
1566 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) // Don't call routine without need
1567 // Get object created at fork_barrier
1568 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1569#endif
1570#endif /* USE_ITT_BUILD */
1571 KMP_MB();
1572
1573 // Get current info
1574 team = this_thr->th.th_team;
1575 nproc = this_thr->th.th_team_nproc;
1576 KMP_DEBUG_ASSERT((int)nproc == team->t.t_nproc);
1577 tid = __kmp_tid_from_gtid(gtid);
1578#ifdef KMP_DEBUG
1579 team_id = team->t.t_id;
1580#endif /* KMP_DEBUG */
1581 master_thread = this_thr->th.th_team_master;
1582#ifdef KMP_DEBUG
1583 if (master_thread != team->t.t_threads[0]) {
1584 __kmp_print_structure();
1585 }
1586#endif /* KMP_DEBUG */
1587 KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]);
1588 KMP_MB();
1589
1590 // Verify state
1591 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
1592 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team));
1593 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root));
1594 KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]);
1595 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
1596 gtid, team_id, tid));
1597
1598 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1599#if OMPT_SUPPORT
1600#if OMPT_TRACE
1601 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
1602 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
1603 team->t.ompt_team_info.parallel_id,
1604 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1605 }
1606#endif
1607 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
1608#endif
1609
1610 if (__kmp_tasking_mode == tskm_extra_barrier) {
1611 __kmp_tasking_barrier(team, this_thr, gtid);
1612 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid,
1613 team_id, tid));
1614 }
1615#ifdef KMP_DEBUG
1616 if (__kmp_tasking_mode != tskm_immediate_exec) {
1617 KA_TRACE(20, ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
1618 "%p, th_task_team = %p\n",
1619 __kmp_gtid_from_thread(this_thr), team_id,
1620 team->t.t_task_team[this_thr->th.th_task_state],
1621 this_thr->th.th_task_team));
1622 KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==
1623 team->t.t_task_team[this_thr->th.th_task_state]);
1624 }
1625#endif /* KMP_DEBUG */
1626
1627 /* Copy the blocktime info to the thread, where __kmp_wait_template() can
1628 access it when the team struct is not guaranteed to exist. Doing these
1629 loads causes a cache miss slows down EPCC parallel by 2x. As a workaround,
1630 we do not perform the copy if blocktime=infinite, since the values are not
1631 used by __kmp_wait_template() in that case. */
1632 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1633#if KMP_USE_MONITOR
1634 this_thr->th.th_team_bt_intervals =
1635 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1636 this_thr->th.th_team_bt_set =
1637 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1638#else
Jonathan Peyton52527cd2017-09-05 15:45:48 +00001639 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
Jonathan Peyton30419822017-05-12 18:01:32 +00001640#endif
1641 }
1642
1643#if USE_ITT_BUILD
1644 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1645 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001646#endif /* USE_ITT_BUILD */
1647
Jonathan Peyton30419822017-05-12 18:01:32 +00001648 switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
1649 case bp_hyper_bar: {
1650 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1651 __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1652 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1653 break;
1654 }
1655 case bp_hierarchical_bar: {
1656 __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1657 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1658 break;
1659 }
1660 case bp_tree_bar: {
1661 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1662 __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1663 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1664 break;
1665 }
1666 default: {
1667 __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1668 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1669 }
1670 }
1671
1672 /* From this point on, the team data structure may be deallocated at any time
1673 by the master thread - it is unsafe to reference it in any of the worker
1674 threads. Any per-team data items that need to be referenced before the
1675 end of the barrier should be moved to the kmp_task_team_t structs. */
1676 if (KMP_MASTER_TID(tid)) {
1677 if (__kmp_tasking_mode != tskm_immediate_exec) {
1678 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
1679 }
1680#if KMP_STATS_ENABLED
1681 // Have master thread flag the workers to indicate they are now waiting for
1682 // next parallel region, Also wake them up so they switch their timers to
1683 // idle.
1684 for (int i = 0; i < team->t.t_nproc; ++i) {
1685 kmp_info_t *team_thread = team->t.t_threads[i];
1686 if (team_thread == this_thr)
1687 continue;
1688 team_thread->th.th_stats->setIdleFlag();
1689 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
1690 team_thread->th.th_sleep_loc != NULL)
1691 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread),
1692 team_thread->th.th_sleep_loc);
1693 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +00001694#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001695#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001696 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1697 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1698#endif /* USE_ITT_BUILD */
1699
1700#if USE_ITT_BUILD && USE_ITT_NOTIFY
1701 // Join barrier - report frame end
1702 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
1703 __kmp_forkjoin_frames_mode &&
1704#if OMP_40_ENABLED
1705 this_thr->th.th_teams_microtask == NULL &&
1706#endif
1707 team->t.t_active_level == 1) {
1708 kmp_uint64 cur_time = __itt_get_timestamp();
1709 ident_t *loc = team->t.t_ident;
1710 kmp_info_t **other_threads = team->t.t_threads;
1711 int nproc = this_thr->th.th_team_nproc;
1712 int i;
1713 switch (__kmp_forkjoin_frames_mode) {
1714 case 1:
1715 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1716 loc, nproc);
1717 break;
1718 case 2:
1719 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1,
1720 loc, nproc);
1721 break;
1722 case 3:
1723 if (__itt_metadata_add_ptr) {
1724 // Initialize with master's wait time
1725 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1726 // Set arrive time to zero to be able to check it in
1727 // __kmp_invoke_task(); the same is done inside the loop below
1728 this_thr->th.th_bar_arrive_time = 0;
1729 for (i = 1; i < nproc; ++i) {
1730 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1731 other_threads[i]->th.th_bar_arrive_time = 0;
1732 }
1733 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1734 cur_time, delta, 0);
1735 }
1736 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1737 loc, nproc);
1738 this_thr->th.th_frame_time = cur_time;
1739 break;
1740 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001741 }
1742#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001743 }
1744#if USE_ITT_BUILD
1745 else {
1746 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1747 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1748 }
1749#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001750
1751#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001752 if (KMP_MASTER_TID(tid)) {
1753 KA_TRACE(
1754 15,
1755 ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
1756 gtid, team_id, tid, nproc));
1757 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001758#endif /* KMP_DEBUG */
1759
Jonathan Peyton30419822017-05-12 18:01:32 +00001760 // TODO now, mark worker threads as done so they may be disbanded
1761 KMP_MB(); // Flush all pending memory write invalidates.
1762 KA_TRACE(10,
1763 ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001764
1765#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001766 if (ompt_enabled) {
Jonathan Peytoncab67cc2015-09-18 16:24:46 +00001767#if OMPT_BLAME
Jonathan Peyton30419822017-05-12 18:01:32 +00001768 if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
1769 ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
1770 team->t.ompt_team_info.parallel_id,
1771 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001772 }
1773#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001774
1775 // return to default state
1776 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
1777 }
1778#endif
1779 ANNOTATE_BARRIER_END(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001780}
1781
Jonathan Peyton30419822017-05-12 18:01:32 +00001782// TODO release worker threads' fork barriers as we are ready instead of all at
1783// once
1784void __kmp_fork_barrier(int gtid, int tid) {
1785 KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier);
1786 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
1787 kmp_info_t *this_thr = __kmp_threads[gtid];
1788 kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001789#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001790 void *itt_sync_obj = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001791#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001792 if (team)
1793 ANNOTATE_BARRIER_END(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001794
Jonathan Peyton30419822017-05-12 18:01:32 +00001795 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid,
1796 (team != NULL) ? team->t.t_id : -1, tid));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001797
Jonathan Peyton30419822017-05-12 18:01:32 +00001798 // th_team pointer only valid for master thread here
1799 if (KMP_MASTER_TID(tid)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001800#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001801 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1802 // Create itt barrier object
1803 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
1804 __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing
1805 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001806#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1807
1808#ifdef KMP_DEBUG
Ed Maste414544c2017-07-07 21:06:05 +00001809 kmp_info_t **other_threads = team->t.t_threads;
1810 int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001811
Jonathan Peyton30419822017-05-12 18:01:32 +00001812 // Verify state
1813 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001814
Jonathan Peyton30419822017-05-12 18:01:32 +00001815 for (i = 1; i < team->t.t_nproc; ++i) {
1816 KA_TRACE(500,
1817 ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
1818 "== %u.\n",
1819 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
1820 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
1821 other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go));
1822 KMP_DEBUG_ASSERT(
1823 (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) &
1824 ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE);
1825 KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team);
1826 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001827#endif
1828
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001829 if (__kmp_tasking_mode != tskm_immediate_exec) {
Jonathan Peyton642688b2017-06-01 16:46:36 +00001830 // 0 indicates setup current task team if nthreads > 1
1831 __kmp_task_team_setup(this_thr, team, 0);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001832 }
1833
Jonathan Peyton30419822017-05-12 18:01:32 +00001834 /* The master thread may have changed its blocktime between the join barrier
1835 and the fork barrier. Copy the blocktime info to the thread, where
1836 __kmp_wait_template() can access it when the team struct is not
1837 guaranteed to exist. */
1838 // See note about the corresponding code in __kmp_join_barrier() being
1839 // performance-critical
1840 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1841#if KMP_USE_MONITOR
1842 this_thr->th.th_team_bt_intervals =
1843 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1844 this_thr->th.th_team_bt_set =
1845 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1846#else
Jonathan Peyton52527cd2017-09-05 15:45:48 +00001847 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(team, tid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001848#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001849 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001850 } // master
1851
1852 switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
1853 case bp_hyper_bar: {
1854 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1855 __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1856 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1857 break;
1858 }
1859 case bp_hierarchical_bar: {
1860 __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1861 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1862 break;
1863 }
1864 case bp_tree_bar: {
1865 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1866 __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1867 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1868 break;
1869 }
1870 default: {
1871 __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1872 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1873 }
1874 }
1875
1876 // Early exit for reaping threads releasing forkjoin barrier
1877 if (TCR_4(__kmp_global.g.g_done)) {
1878 this_thr->th.th_task_team = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001879
1880#if USE_ITT_BUILD && USE_ITT_NOTIFY
1881 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001882 if (!KMP_MASTER_TID(tid)) {
1883 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1884 if (itt_sync_obj)
1885 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1886 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001887 }
1888#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
Jonathan Peyton30419822017-05-12 18:01:32 +00001889 KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid));
1890 return;
1891 }
1892
1893 /* We can now assume that a valid team structure has been allocated by the
1894 master and propagated to all worker threads. The current thread, however,
1895 may not be part of the team, so we can't blindly assume that the team
1896 pointer is non-null. */
1897 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
1898 KMP_DEBUG_ASSERT(team != NULL);
1899 tid = __kmp_tid_from_gtid(gtid);
1900
1901#if KMP_BARRIER_ICV_PULL
1902 /* Master thread's copy of the ICVs was set up on the implicit taskdata in
1903 __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's
1904 implicit task has this data before this function is called. We cannot
1905 modify __kmp_fork_call() to look at the fixed ICVs in the master's thread
1906 struct, because it is not always the case that the threads arrays have
1907 been allocated when __kmp_fork_call() is executed. */
1908 {
1909 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
1910 if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs
1911 // Copy the initial ICVs from the master's thread struct to the implicit
1912 // task for this tid.
1913 KA_TRACE(10,
1914 ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
1915 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
1916 tid, FALSE);
1917 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1918 &team->t.t_threads[0]
1919 ->th.th_bar[bs_forkjoin_barrier]
1920 .bb.th_fixed_icvs);
1921 }
1922 }
1923#endif // KMP_BARRIER_ICV_PULL
1924
1925 if (__kmp_tasking_mode != tskm_immediate_exec) {
1926 __kmp_task_team_sync(this_thr, team);
1927 }
1928
1929#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
1930 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
1931 if (proc_bind == proc_bind_intel) {
1932#endif
1933#if KMP_AFFINITY_SUPPORTED
1934 // Call dynamic affinity settings
1935 if (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) {
1936 __kmp_balanced_affinity(tid, team->t.t_nproc);
1937 }
1938#endif // KMP_AFFINITY_SUPPORTED
1939#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
1940 } else if (proc_bind != proc_bind_false) {
1941 if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
1942 KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n",
1943 __kmp_gtid_from_thread(this_thr),
1944 this_thr->th.th_current_place));
1945 } else {
1946 __kmp_affinity_set_place(gtid);
1947 }
1948 }
1949#endif
1950
1951#if USE_ITT_BUILD && USE_ITT_NOTIFY
1952 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1953 if (!KMP_MASTER_TID(tid)) {
1954 // Get correct barrier object
1955 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1956 __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired
1957 } // (prepare called inside barrier_release)
1958 }
1959#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1960 ANNOTATE_BARRIER_END(&team->t.t_bar);
1961 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid,
1962 team->t.t_id, tid));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001963}
1964
Jonathan Peyton30419822017-05-12 18:01:32 +00001965void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
1966 kmp_internal_control_t *new_icvs, ident_t *loc) {
1967 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001968
Jonathan Peyton30419822017-05-12 18:01:32 +00001969 KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
1970 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001971
Jonathan Peyton30419822017-05-12 18:01:32 +00001972/* Master thread's copy of the ICVs was set up on the implicit taskdata in
1973 __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's
1974 implicit task has this data before this function is called. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001975#if KMP_BARRIER_ICV_PULL
Jonathan Peyton30419822017-05-12 18:01:32 +00001976 /* Copy ICVs to master's thread structure into th_fixed_icvs (which remains
1977 untouched), where all of the worker threads can access them and make their
1978 own copies after the barrier. */
1979 KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be
1980 // allocated at this point
1981 copy_icvs(
1982 &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs,
1983 new_icvs);
1984 KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0,
1985 team->t.t_threads[0], team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001986#elif KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +00001987 // The ICVs will be propagated in the fork barrier, so nothing needs to be
1988 // done here.
1989 KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0,
1990 team->t.t_threads[0], team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001991#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001992 // Copy the ICVs to each of the non-master threads. This takes O(nthreads)
1993 // time.
1994 ngo_load(new_icvs);
1995 KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be
1996 // allocated at this point
1997 for (int f = 1; f < new_nproc; ++f) { // Skip the master thread
1998 // TODO: GEH - pass in better source location info since usually NULL here
1999 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
2000 f, team->t.t_threads[f], team));
2001 __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE);
2002 ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
2003 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
2004 f, team->t.t_threads[f], team));
2005 }
2006 ngo_sync();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002007#endif // KMP_BARRIER_ICV_PULL
2008}