blob: f25915f01545164d950bf37c5b85dc4690873233 [file] [log] [blame]
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001/*
2 * kmp_barrier.cpp
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_wait_release.h"
Jim Cownie4cc4bb42014-10-07 16:25:50 +000018#include "kmp_itt.h"
Jonathan Peytona0e159f2015-10-08 18:23:38 +000019#include "kmp_os.h"
Jonathan Peyton30419822017-05-12 18:01:32 +000020#include "kmp_stats.h"
Jonathan Peytona0e159f2015-10-08 18:23:38 +000021
Jim Cownie4cc4bb42014-10-07 16:25:50 +000022
23#if KMP_MIC
24#include <immintrin.h>
25#define USE_NGO_STORES 1
26#endif // KMP_MIC
27
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000028#include "tsan_annotations.h"
29
Jim Cownie4cc4bb42014-10-07 16:25:50 +000030#if KMP_MIC && USE_NGO_STORES
31// ICV copying
Jonathan Peyton30419822017-05-12 18:01:32 +000032#define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src))
Jim Cownie4cc4bb42014-10-07 16:25:50 +000033#define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
Jonathan Peyton30419822017-05-12 18:01:32 +000034#define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
35#define ngo_sync() __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory")
Jim Cownie4cc4bb42014-10-07 16:25:50 +000036#else
Jonathan Peyton30419822017-05-12 18:01:32 +000037#define ngo_load(src) ((void)0)
Jim Cownie4cc4bb42014-10-07 16:25:50 +000038#define ngo_store_icvs(dst, src) copy_icvs((dst), (src))
Jonathan Peyton30419822017-05-12 18:01:32 +000039#define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE)
40#define ngo_sync() ((void)0)
Jim Cownie4cc4bb42014-10-07 16:25:50 +000041#endif /* KMP_MIC && USE_NGO_STORES */
42
43void __kmp_print_structure(void); // Forward declaration
44
45// ---------------------------- Barrier Algorithms ----------------------------
46
47// Linear Barrier
Jonathan Peyton30419822017-05-12 18:01:32 +000048static void __kmp_linear_barrier_gather(
49 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
50 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
51 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
52 register kmp_team_t *team = this_thr->th.th_team;
53 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
54 register kmp_info_t **other_threads = team->t.t_threads;
Jim Cownie4cc4bb42014-10-07 16:25:50 +000055
Jonathan Peyton30419822017-05-12 18:01:32 +000056 KA_TRACE(
57 20,
58 ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
59 gtid, team->t.t_id, tid, bt));
60 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +000061
62#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +000063 // Barrier imbalance - save arrive time to the thread
64 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
65 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
66 __itt_get_timestamp();
67 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +000068#endif
Jonathan Peyton30419822017-05-12 18:01:32 +000069 // We now perform a linear reduction to signal that all of the threads have
70 // arrived.
71 if (!KMP_MASTER_TID(tid)) {
72 KA_TRACE(20,
73 ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
74 "arrived(%p): %llu => %llu\n",
75 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team),
76 team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived,
77 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
78 // Mark arrival to master thread
79 /* After performing this write, a worker thread may not assume that the team
80 is valid any more - it could be deallocated by the master thread at any
81 time. */
82 ANNOTATE_BARRIER_BEGIN(this_thr);
83 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]);
84 flag.release();
85 } else {
86 register kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
87 register int nproc = this_thr->th.th_team_nproc;
88 register int i;
89 // Don't have to worry about sleep bit here or atomic since team setting
90 register kmp_uint64 new_state =
91 team_bar->b_arrived + KMP_BARRIER_STATE_BUMP;
Jim Cownie4cc4bb42014-10-07 16:25:50 +000092
Jonathan Peyton30419822017-05-12 18:01:32 +000093 // Collect all the worker team member threads.
94 for (i = 1; i < nproc; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +000095#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +000096 // Prefetch next thread's arrived count
97 if (i + 1 < nproc)
98 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived);
Jim Cownie4cc4bb42014-10-07 16:25:50 +000099#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000100 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
101 "arrived(%p) == %llu\n",
102 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
103 team->t.t_id, i,
104 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000105
Jonathan Peyton30419822017-05-12 18:01:32 +0000106 // Wait for worker thread to arrive
107 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived,
108 new_state);
109 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
110 ANNOTATE_BARRIER_END(other_threads[i]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000111#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000112 // Barrier imbalance - write min of the thread time and the other thread
113 // time to the thread.
114 if (__kmp_forkjoin_frames_mode == 2) {
115 this_thr->th.th_bar_min_time = KMP_MIN(
116 this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time);
117 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000118#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000119 if (reduce) {
120 KA_TRACE(100,
121 ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
122 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team),
123 team->t.t_id, i));
124 ANNOTATE_REDUCE_AFTER(reduce);
125 (*reduce)(this_thr->th.th_local.reduce_data,
126 other_threads[i]->th.th_local.reduce_data);
127 ANNOTATE_REDUCE_BEFORE(reduce);
128 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
129 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000130 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000131 // Don't have to worry about sleep bit here or atomic since team setting
132 team_bar->b_arrived = new_state;
133 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
134 "arrived(%p) = %llu\n",
135 gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived,
136 new_state));
137 }
138 KA_TRACE(
139 20,
140 ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
141 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000142}
143
Jonathan Peyton30419822017-05-12 18:01:32 +0000144static void __kmp_linear_barrier_release(
145 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
146 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
147 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release);
148 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
149 register kmp_team_t *team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000150
Jonathan Peyton30419822017-05-12 18:01:32 +0000151 if (KMP_MASTER_TID(tid)) {
152 register unsigned int i;
153 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
154 register kmp_info_t **other_threads;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000155
Jonathan Peyton30419822017-05-12 18:01:32 +0000156 team = __kmp_threads[gtid]->th.th_team;
157 KMP_DEBUG_ASSERT(team != NULL);
158 other_threads = team->t.t_threads;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000159
Jonathan Peyton30419822017-05-12 18:01:32 +0000160 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) master enter for "
161 "barrier type %d\n",
162 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000163
Jonathan Peyton30419822017-05-12 18:01:32 +0000164 if (nproc > 1) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000165#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +0000166 {
167 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
168 if (propagate_icvs) {
169 ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
170 for (i = 1; i < nproc; ++i) {
171 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i],
172 team, i, FALSE);
173 ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
174 &team->t.t_implicit_task_taskdata[0].td_icvs);
175 }
176 ngo_sync();
177 }
178 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000179#endif // KMP_BARRIER_ICV_PUSH
180
Jonathan Peyton30419822017-05-12 18:01:32 +0000181 // Now, release all of the worker threads
182 for (i = 1; i < nproc; ++i) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000183#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +0000184 // Prefetch next thread's go flag
185 if (i + 1 < nproc)
186 KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000187#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000188 KA_TRACE(
189 20,
190 ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
191 "go(%p): %u => %u\n",
192 gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid,
193 team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go,
194 other_threads[i]->th.th_bar[bt].bb.b_go,
195 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
196 ANNOTATE_BARRIER_BEGIN(other_threads[i]);
197 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go,
198 other_threads[i]);
199 flag.release();
200 }
201 }
202 } else { // Wait for the MASTER thread to release us
203 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
204 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
205 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
206 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
207 ANNOTATE_BARRIER_END(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000208#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000209 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
210 // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is
211 // disabled)
212 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
213 // Cancel wait on previous parallel region...
214 __kmp_itt_task_starting(itt_sync_obj);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000215
Jonathan Peyton30419822017-05-12 18:01:32 +0000216 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
217 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000218
Jonathan Peyton30419822017-05-12 18:01:32 +0000219 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
220 if (itt_sync_obj != NULL)
221 // Call prepare as early as possible for "new" barrier
222 __kmp_itt_task_finished(itt_sync_obj);
223 } else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000224#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
225 // Early exit for reaping threads releasing forkjoin barrier
Jonathan Peyton30419822017-05-12 18:01:32 +0000226 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
227 return;
228// The worker thread may now assume that the team is valid.
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000229#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +0000230 tid = __kmp_tid_from_gtid(gtid);
231 team = __kmp_threads[gtid]->th.th_team;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000232#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000233 KMP_DEBUG_ASSERT(team != NULL);
234 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
235 KA_TRACE(20,
236 ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
237 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
238 KMP_MB(); // Flush all pending memory write invalidates.
239 }
240 KA_TRACE(
241 20,
242 ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
243 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000244}
245
246// Tree barrier
247static void
Jonathan Peyton30419822017-05-12 18:01:32 +0000248__kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
249 int tid, void (*reduce)(void *, void *)
250 USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
251 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather);
252 register kmp_team_t *team = this_thr->th.th_team;
253 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
254 register kmp_info_t **other_threads = team->t.t_threads;
255 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
256 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
257 register kmp_uint32 branch_factor = 1 << branch_bits;
258 register kmp_uint32 child;
259 register kmp_uint32 child_tid;
260 register kmp_uint64 new_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000261
Jonathan Peyton30419822017-05-12 18:01:32 +0000262 KA_TRACE(
263 20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
264 gtid, team->t.t_id, tid, bt));
265 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000266
267#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000268 // Barrier imbalance - save arrive time to the thread
269 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
270 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
271 __itt_get_timestamp();
272 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000273#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000274 // Perform tree gather to wait until all threads have arrived; reduce any
275 // required data as we go
276 child_tid = (tid << branch_bits) + 1;
277 if (child_tid < nproc) {
278 // Parent threads wait for all their children to arrive
279 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
280 child = 1;
281 do {
282 register kmp_info_t *child_thr = other_threads[child_tid];
283 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000284#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +0000285 // Prefetch next thread's arrived count
286 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
287 KMP_CACHE_PREFETCH(
288 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000289#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000290 KA_TRACE(20,
291 ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
292 "arrived(%p) == %llu\n",
293 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
294 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
295 // Wait for child to arrive
296 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
297 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
298 ANNOTATE_BARRIER_END(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000299#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000300 // Barrier imbalance - write min of the thread time and a child time to
301 // the thread.
302 if (__kmp_forkjoin_frames_mode == 2) {
303 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
304 child_thr->th.th_bar_min_time);
305 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000306#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000307 if (reduce) {
308 KA_TRACE(100,
309 ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
310 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
311 team->t.t_id, child_tid));
312 ANNOTATE_REDUCE_AFTER(reduce);
313 (*reduce)(this_thr->th.th_local.reduce_data,
314 child_thr->th.th_local.reduce_data);
315 ANNOTATE_REDUCE_BEFORE(reduce);
316 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
317 }
318 child++;
319 child_tid++;
320 } while (child <= branch_factor && child_tid < nproc);
321 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000322
Jonathan Peyton30419822017-05-12 18:01:32 +0000323 if (!KMP_MASTER_TID(tid)) { // Worker threads
324 register kmp_int32 parent_tid = (tid - 1) >> branch_bits;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000325
Jonathan Peyton30419822017-05-12 18:01:32 +0000326 KA_TRACE(20,
327 ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
328 "arrived(%p): %llu => %llu\n",
329 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
330 team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
331 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000332
Jonathan Peyton30419822017-05-12 18:01:32 +0000333 // Mark arrival to parent thread
334 /* After performing this write, a worker thread may not assume that the team
335 is valid any more - it could be deallocated by the master thread at any
336 time. */
337 ANNOTATE_BARRIER_BEGIN(this_thr);
338 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]);
339 flag.release();
340 } else {
341 // Need to update the team arrived pointer if we are the master thread
342 if (nproc > 1) // New value was already computed above
343 team->t.t_bar[bt].b_arrived = new_state;
344 else
345 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
346 KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d "
347 "arrived(%p) = %llu\n",
348 gtid, team->t.t_id, tid, team->t.t_id,
349 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
350 }
351 KA_TRACE(20,
352 ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
353 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000354}
355
Jonathan Peyton30419822017-05-12 18:01:32 +0000356static void __kmp_tree_barrier_release(
357 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
358 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
359 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release);
360 register kmp_team_t *team;
361 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
362 register kmp_uint32 nproc;
363 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
364 register kmp_uint32 branch_factor = 1 << branch_bits;
365 register kmp_uint32 child;
366 register kmp_uint32 child_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000367
Jonathan Peyton30419822017-05-12 18:01:32 +0000368 // Perform a tree release for all of the threads that have been gathered
369 if (!KMP_MASTER_TID(
370 tid)) { // Handle fork barrier workers who aren't part of a team yet
371 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid,
372 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
373 // Wait for parent thread to release us
374 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
375 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
376 ANNOTATE_BARRIER_END(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000377#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000378 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
379 // In fork barrier where we could not get the object reliably (or
380 // ITTNOTIFY is disabled)
381 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
382 // Cancel wait on previous parallel region...
383 __kmp_itt_task_starting(itt_sync_obj);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000384
Jonathan Peyton30419822017-05-12 18:01:32 +0000385 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
386 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000387
Jonathan Peyton30419822017-05-12 18:01:32 +0000388 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
389 if (itt_sync_obj != NULL)
390 // Call prepare as early as possible for "new" barrier
391 __kmp_itt_task_finished(itt_sync_obj);
392 } else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000393#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
394 // Early exit for reaping threads releasing forkjoin barrier
395 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
Jonathan Peyton30419822017-05-12 18:01:32 +0000396 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000397
Jonathan Peyton30419822017-05-12 18:01:32 +0000398 // The worker thread may now assume that the team is valid.
399 team = __kmp_threads[gtid]->th.th_team;
400 KMP_DEBUG_ASSERT(team != NULL);
401 tid = __kmp_tid_from_gtid(gtid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000402
Jonathan Peyton30419822017-05-12 18:01:32 +0000403 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
404 KA_TRACE(20,
405 ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid,
406 team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
407 KMP_MB(); // Flush all pending memory write invalidates.
408 } else {
409 team = __kmp_threads[gtid]->th.th_team;
410 KMP_DEBUG_ASSERT(team != NULL);
411 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) master enter for "
412 "barrier type %d\n",
413 gtid, team->t.t_id, tid, bt));
414 }
415 nproc = this_thr->th.th_team_nproc;
416 child_tid = (tid << branch_bits) + 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000417
Jonathan Peyton30419822017-05-12 18:01:32 +0000418 if (child_tid < nproc) {
419 register kmp_info_t **other_threads = team->t.t_threads;
420 child = 1;
421 // Parent threads release all their children
422 do {
423 register kmp_info_t *child_thr = other_threads[child_tid];
424 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000425#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +0000426 // Prefetch next thread's go count
427 if (child + 1 <= branch_factor && child_tid + 1 < nproc)
428 KMP_CACHE_PREFETCH(
429 &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000430#endif /* KMP_CACHE_MANAGE */
431
432#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +0000433 {
434 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
435 if (propagate_icvs) {
436 __kmp_init_implicit_task(team->t.t_ident,
437 team->t.t_threads[child_tid], team,
438 child_tid, FALSE);
439 copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
440 &team->t.t_implicit_task_taskdata[0].td_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000441 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000442 }
443#endif // KMP_BARRIER_ICV_PUSH
444 KA_TRACE(20,
445 ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
446 "go(%p): %u => %u\n",
447 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
448 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
449 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
450 // Release child from barrier
451 ANNOTATE_BARRIER_BEGIN(child_thr);
452 kmp_flag_64 flag(&child_bar->b_go, child_thr);
453 flag.release();
454 child++;
455 child_tid++;
456 } while (child <= branch_factor && child_tid < nproc);
457 }
458 KA_TRACE(
459 20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
460 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000461}
462
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000463// Hyper Barrier
464static void
Jonathan Peyton30419822017-05-12 18:01:32 +0000465__kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
466 int tid, void (*reduce)(void *, void *)
467 USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
468 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather);
469 register kmp_team_t *team = this_thr->th.th_team;
470 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
471 register kmp_info_t **other_threads = team->t.t_threads;
472 register kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE;
473 register kmp_uint32 num_threads = this_thr->th.th_team_nproc;
474 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
475 register kmp_uint32 branch_factor = 1 << branch_bits;
476 register kmp_uint32 offset;
477 register kmp_uint32 level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000478
Jonathan Peyton30419822017-05-12 18:01:32 +0000479 KA_TRACE(
480 20,
481 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
482 gtid, team->t.t_id, tid, bt));
483 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000484
485#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000486 // Barrier imbalance - save arrive time to the thread
487 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
488 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time =
489 __itt_get_timestamp();
490 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000491#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000492 /* Perform a hypercube-embedded tree gather to wait until all of the threads
493 have arrived, and reduce any required data as we go. */
494 kmp_flag_64 p_flag(&thr_bar->b_arrived);
495 for (level = 0, offset = 1; offset < num_threads;
496 level += branch_bits, offset <<= branch_bits) {
497 register kmp_uint32 child;
498 register kmp_uint32 child_tid;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000499
Jonathan Peyton30419822017-05-12 18:01:32 +0000500 if (((tid >> level) & (branch_factor - 1)) != 0) {
501 register kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000502
Jonathan Peyton30419822017-05-12 18:01:32 +0000503 KA_TRACE(20,
504 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
505 "arrived(%p): %llu => %llu\n",
506 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team),
507 team->t.t_id, parent_tid, &thr_bar->b_arrived,
508 thr_bar->b_arrived,
509 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
510 // Mark arrival to parent thread
511 /* After performing this write (in the last iteration of the enclosing for
512 loop), a worker thread may not assume that the team is valid any more
513 - it could be deallocated by the master thread at any time. */
514 ANNOTATE_BARRIER_BEGIN(this_thr);
515 p_flag.set_waiter(other_threads[parent_tid]);
516 p_flag.release();
517 break;
518 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000519
Jonathan Peyton30419822017-05-12 18:01:32 +0000520 // Parent threads wait for children to arrive
521 if (new_state == KMP_BARRIER_UNUSED_STATE)
522 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
523 for (child = 1, child_tid = tid + (1 << level);
524 child < branch_factor && child_tid < num_threads;
525 child++, child_tid += (1 << level)) {
526 register kmp_info_t *child_thr = other_threads[child_tid];
527 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000528#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +0000529 register kmp_uint32 next_child_tid = child_tid + (1 << level);
530 // Prefetch next thread's arrived count
531 if (child + 1 < branch_factor && next_child_tid < num_threads)
532 KMP_CACHE_PREFETCH(
533 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000534#endif /* KMP_CACHE_MANAGE */
Jonathan Peyton30419822017-05-12 18:01:32 +0000535 KA_TRACE(20,
536 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
537 "arrived(%p) == %llu\n",
538 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
539 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
540 // Wait for child to arrive
541 kmp_flag_64 c_flag(&child_bar->b_arrived, new_state);
542 c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
543 ANNOTATE_BARRIER_END(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000544#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000545 // Barrier imbalance - write min of the thread time and a child time to
546 // the thread.
547 if (__kmp_forkjoin_frames_mode == 2) {
548 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
549 child_thr->th.th_bar_min_time);
550 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000551#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000552 if (reduce) {
553 KA_TRACE(100,
554 ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
555 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
556 team->t.t_id, child_tid));
557 ANNOTATE_REDUCE_AFTER(reduce);
558 (*reduce)(this_thr->th.th_local.reduce_data,
559 child_thr->th.th_local.reduce_data);
560 ANNOTATE_REDUCE_BEFORE(reduce);
561 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
562 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000563 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000564 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000565
Jonathan Peyton30419822017-05-12 18:01:32 +0000566 if (KMP_MASTER_TID(tid)) {
567 // Need to update the team arrived pointer if we are the master thread
568 if (new_state == KMP_BARRIER_UNUSED_STATE)
569 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
570 else
571 team->t.t_bar[bt].b_arrived = new_state;
572 KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d "
573 "arrived(%p) = %llu\n",
574 gtid, team->t.t_id, tid, team->t.t_id,
575 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
576 }
577 KA_TRACE(
578 20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
579 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000580}
581
582// The reverse versions seem to beat the forward versions overall
583#define KMP_REVERSE_HYPER_BAR
Jonathan Peyton30419822017-05-12 18:01:32 +0000584static void __kmp_hyper_barrier_release(
585 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
586 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
587 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release);
588 register kmp_team_t *team;
589 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
590 register kmp_info_t **other_threads;
591 register kmp_uint32 num_threads;
592 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
593 register kmp_uint32 branch_factor = 1 << branch_bits;
594 register kmp_uint32 child;
595 register kmp_uint32 child_tid;
596 register kmp_uint32 offset;
597 register kmp_uint32 level;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000598
Jonathan Peyton30419822017-05-12 18:01:32 +0000599 /* Perform a hypercube-embedded tree release for all of the threads that have
600 been gathered. If KMP_REVERSE_HYPER_BAR is defined (default) the threads
601 are released in the reverse order of the corresponding gather, otherwise
602 threads are released in the same order. */
603 if (KMP_MASTER_TID(tid)) { // master
604 team = __kmp_threads[gtid]->th.th_team;
605 KMP_DEBUG_ASSERT(team != NULL);
606 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for "
607 "barrier type %d\n",
608 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000609#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +0000610 if (propagate_icvs) { // master already has ICVs in final destination; copy
611 copy_icvs(&thr_bar->th_fixed_icvs,
612 &team->t.t_implicit_task_taskdata[tid].td_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000613 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000614#endif
615 } else { // Handle fork barrier workers who aren't part of a team yet
616 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid,
617 &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
618 // Wait for parent thread to release us
619 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
620 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
621 ANNOTATE_BARRIER_END(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000622#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000623 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
624 // In fork barrier where we could not get the object reliably
625 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
626 // Cancel wait on previous parallel region...
627 __kmp_itt_task_starting(itt_sync_obj);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000628
Jonathan Peyton30419822017-05-12 18:01:32 +0000629 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
630 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000631
Jonathan Peyton30419822017-05-12 18:01:32 +0000632 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
633 if (itt_sync_obj != NULL)
634 // Call prepare as early as possible for "new" barrier
635 __kmp_itt_task_finished(itt_sync_obj);
636 } else
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000637#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
638 // Early exit for reaping threads releasing forkjoin barrier
639 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
Jonathan Peyton30419822017-05-12 18:01:32 +0000640 return;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000641
Jonathan Peyton30419822017-05-12 18:01:32 +0000642 // The worker thread may now assume that the team is valid.
643 team = __kmp_threads[gtid]->th.th_team;
644 KMP_DEBUG_ASSERT(team != NULL);
645 tid = __kmp_tid_from_gtid(gtid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000646
Jonathan Peyton30419822017-05-12 18:01:32 +0000647 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
648 KA_TRACE(20,
649 ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
650 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
651 KMP_MB(); // Flush all pending memory write invalidates.
652 }
653 num_threads = this_thr->th.th_team_nproc;
654 other_threads = team->t.t_threads;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000655
656#ifdef KMP_REVERSE_HYPER_BAR
Jonathan Peyton30419822017-05-12 18:01:32 +0000657 // Count up to correct level for parent
658 for (level = 0, offset = 1;
659 offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0);
660 level += branch_bits, offset <<= branch_bits)
661 ;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000662
Jonathan Peyton30419822017-05-12 18:01:32 +0000663 // Now go down from there
664 for (level -= branch_bits, offset >>= branch_bits; offset != 0;
665 level -= branch_bits, offset >>= branch_bits)
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000666#else
Jonathan Peyton30419822017-05-12 18:01:32 +0000667 // Go down the tree, level by level
668 for (level = 0, offset = 1; offset < num_threads;
669 level += branch_bits, offset <<= branch_bits)
670#endif // KMP_REVERSE_HYPER_BAR
671 {
672#ifdef KMP_REVERSE_HYPER_BAR
673 /* Now go in reverse order through the children, highest to lowest.
674 Initial setting of child is conservative here. */
675 child = num_threads >> ((level == 0) ? level : level - 1);
676 for (child = (child < branch_factor - 1) ? child : branch_factor - 1,
677 child_tid = tid + (child << level);
678 child >= 1; child--, child_tid -= (1 << level))
679#else
680 if (((tid >> level) & (branch_factor - 1)) != 0)
681 // No need to go lower than this, since this is the level parent would be
682 // notified
683 break;
684 // Iterate through children on this level of the tree
685 for (child = 1, child_tid = tid + (1 << level);
686 child < branch_factor && child_tid < num_threads;
687 child++, child_tid += (1 << level))
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000688#endif // KMP_REVERSE_HYPER_BAR
689 {
Jonathan Peyton30419822017-05-12 18:01:32 +0000690 if (child_tid >= num_threads)
691 continue; // Child doesn't exist so keep going
692 else {
693 register kmp_info_t *child_thr = other_threads[child_tid];
694 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000695#if KMP_CACHE_MANAGE
Jonathan Peyton30419822017-05-12 18:01:32 +0000696 register kmp_uint32 next_child_tid = child_tid - (1 << level);
697// Prefetch next thread's go count
698#ifdef KMP_REVERSE_HYPER_BAR
699 if (child - 1 >= 1 && next_child_tid < num_threads)
700#else
701 if (child + 1 < branch_factor && next_child_tid < num_threads)
702#endif // KMP_REVERSE_HYPER_BAR
703 KMP_CACHE_PREFETCH(
704 &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000705#endif /* KMP_CACHE_MANAGE */
706
707#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +0000708 if (propagate_icvs) // push my fixed ICVs to my child
709 copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000710#endif // KMP_BARRIER_ICV_PUSH
711
Jonathan Peyton30419822017-05-12 18:01:32 +0000712 KA_TRACE(
713 20,
714 ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
715 "go(%p): %u => %u\n",
716 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
717 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
718 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
719 // Release child from barrier
720 ANNOTATE_BARRIER_BEGIN(child_thr);
721 kmp_flag_64 flag(&child_bar->b_go, child_thr);
722 flag.release();
723 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000724 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000725 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000726#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +0000727 if (propagate_icvs &&
728 !KMP_MASTER_TID(tid)) { // copy ICVs locally to final dest
729 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
730 FALSE);
731 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
732 &thr_bar->th_fixed_icvs);
733 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000734#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000735 KA_TRACE(
736 20,
737 ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
738 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000739}
740
741// Hierarchical Barrier
742
743// Initialize thread barrier data
Jonathan Peyton30419822017-05-12 18:01:32 +0000744/* Initializes/re-initializes the hierarchical barrier data stored on a thread.
745 Performs the minimum amount of initialization required based on how the team
746 has changed. Returns true if leaf children will require both on-core and
747 traditional wake-up mechanisms. For example, if the team size increases,
748 threads already in the team will respond to on-core wakeup on their parent
749 thread, but threads newly added to the team will only be listening on the
750 their local b_go. */
751static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt,
752 kmp_bstate_t *thr_bar,
753 kmp_uint32 nproc, int gtid,
754 int tid, kmp_team_t *team) {
755 // Checks to determine if (re-)initialization is needed
756 bool uninitialized = thr_bar->team == NULL;
757 bool team_changed = team != thr_bar->team;
758 bool team_sz_changed = nproc != thr_bar->nproc;
759 bool tid_changed = tid != thr_bar->old_tid;
760 bool retval = false;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000761
Jonathan Peyton30419822017-05-12 18:01:32 +0000762 if (uninitialized || team_sz_changed) {
763 __kmp_get_hierarchy(nproc, thr_bar);
764 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000765
Jonathan Peyton30419822017-05-12 18:01:32 +0000766 if (uninitialized || team_sz_changed || tid_changed) {
767 thr_bar->my_level = thr_bar->depth - 1; // default for master
768 thr_bar->parent_tid = -1; // default for master
769 if (!KMP_MASTER_TID(
770 tid)) { // if not master, find parent thread in hierarchy
771 kmp_uint32 d = 0;
772 while (d < thr_bar->depth) { // find parent based on level of thread in
773 // hierarchy, and note level
774 kmp_uint32 rem;
775 if (d == thr_bar->depth - 2) { // reached level right below the master
776 thr_bar->parent_tid = 0;
777 thr_bar->my_level = d;
778 break;
779 } else if ((rem = tid % thr_bar->skip_per_level[d + 1]) !=
780 0) { // TODO: can we make this op faster?
781 // thread is not a subtree root at next level, so this is max
782 thr_bar->parent_tid = tid - rem;
783 thr_bar->my_level = d;
784 break;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000785 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000786 ++d;
787 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000788 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000789 thr_bar->offset = 7 - (tid - thr_bar->parent_tid - 1);
790 thr_bar->old_tid = tid;
791 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
792 thr_bar->team = team;
793 thr_bar->parent_bar =
794 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
795 }
796 if (uninitialized || team_changed || tid_changed) {
797 thr_bar->team = team;
798 thr_bar->parent_bar =
799 &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
800 retval = true;
801 }
802 if (uninitialized || team_sz_changed || tid_changed) {
803 thr_bar->nproc = nproc;
804 thr_bar->leaf_kids = thr_bar->base_leaf_kids;
805 if (thr_bar->my_level == 0)
806 thr_bar->leaf_kids = 0;
807 if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc)
808 thr_bar->leaf_kids = nproc - tid - 1;
809 thr_bar->leaf_state = 0;
810 for (int i = 0; i < thr_bar->leaf_kids; ++i)
811 ((char *)&(thr_bar->leaf_state))[7 - i] = 1;
812 }
813 return retval;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000814}
815
Jonathan Peyton30419822017-05-12 18:01:32 +0000816static void __kmp_hierarchical_barrier_gather(
817 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
818 void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
819 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather);
820 register kmp_team_t *team = this_thr->th.th_team;
821 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
822 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
823 register kmp_info_t **other_threads = team->t.t_threads;
824 register kmp_uint64 new_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000825
Jonathan Peyton30419822017-05-12 18:01:32 +0000826 int level = team->t.t_level;
Jonathan Peyton441f3372015-09-21 17:24:46 +0000827#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +0000828 if (other_threads[0]
829 ->th.th_teams_microtask) // are we inside the teams construct?
830 if (this_thr->th.th_teams_size.nteams > 1)
831 ++level; // level was not increased in teams construct for team_of_masters
Jonathan Peyton441f3372015-09-21 17:24:46 +0000832#endif
Jonathan Peyton30419822017-05-12 18:01:32 +0000833 if (level == 1)
834 thr_bar->use_oncore_barrier = 1;
835 else
836 thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000837
Jonathan Peyton30419822017-05-12 18:01:32 +0000838 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for "
839 "barrier type %d\n",
840 gtid, team->t.t_id, tid, bt));
841 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000842
Andrey Churbanove6bfb732015-05-06 18:34:15 +0000843#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +0000844 // Barrier imbalance - save arrive time to the thread
845 if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
846 this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
847 }
Andrey Churbanove6bfb732015-05-06 18:34:15 +0000848#endif
849
Jonathan Peyton30419822017-05-12 18:01:32 +0000850 (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid,
851 team);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000852
Jonathan Peyton30419822017-05-12 18:01:32 +0000853 if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf)
854 register kmp_int32 child_tid;
855 new_state =
856 (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
857 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
858 thr_bar->use_oncore_barrier) {
859 if (thr_bar->leaf_kids) { // First, wait for leaf children to check-in on
860 // my b_arrived flag
861 kmp_uint64 leaf_state =
862 KMP_MASTER_TID(tid)
863 ? thr_bar->b_arrived | thr_bar->leaf_state
864 : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
865 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting "
866 "for leaf kids\n",
867 gtid, team->t.t_id, tid));
868 kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state);
869 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
870 if (reduce) {
871 ANNOTATE_REDUCE_AFTER(reduce);
872 for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids;
873 ++child_tid) {
874 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
875 "T#%d(%d:%d)\n",
876 gtid, team->t.t_id, tid,
877 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
878 child_tid));
879 ANNOTATE_BARRIER_END(other_threads[child_tid]);
880 (*reduce)(this_thr->th.th_local.reduce_data,
881 other_threads[child_tid]->th.th_local.reduce_data);
882 }
883 ANNOTATE_REDUCE_BEFORE(reduce);
884 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000885 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000886 (void)KMP_TEST_THEN_AND64(
887 (volatile kmp_int64 *)&thr_bar->b_arrived,
888 ~(thr_bar->leaf_state)); // clear leaf_state bits
889 }
890 // Next, wait for higher level children on each child's b_arrived flag
891 for (kmp_uint32 d = 1; d < thr_bar->my_level;
892 ++d) { // gather lowest level threads first, but skip 0
893 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
894 skip = thr_bar->skip_per_level[d];
895 if (last > nproc)
896 last = nproc;
897 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
898 register kmp_info_t *child_thr = other_threads[child_tid];
899 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
900 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
901 "T#%d(%d:%d) "
902 "arrived(%p) == %llu\n",
903 gtid, team->t.t_id, tid,
904 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
905 child_tid, &child_bar->b_arrived, new_state));
906 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
907 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
908 ANNOTATE_BARRIER_END(child_thr);
909 if (reduce) {
910 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
911 "T#%d(%d:%d)\n",
912 gtid, team->t.t_id, tid,
913 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
914 child_tid));
915 ANNOTATE_REDUCE_AFTER(reduce);
916 (*reduce)(this_thr->th.th_local.reduce_data,
917 child_thr->th.th_local.reduce_data);
918 ANNOTATE_REDUCE_BEFORE(reduce);
919 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
920 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000921 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000922 }
923 } else { // Blocktime is not infinite
924 for (kmp_uint32 d = 0; d < thr_bar->my_level;
925 ++d) { // Gather lowest level threads first
926 kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1],
927 skip = thr_bar->skip_per_level[d];
928 if (last > nproc)
929 last = nproc;
930 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
931 register kmp_info_t *child_thr = other_threads[child_tid];
932 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
933 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait "
934 "T#%d(%d:%d) "
935 "arrived(%p) == %llu\n",
936 gtid, team->t.t_id, tid,
937 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
938 child_tid, &child_bar->b_arrived, new_state));
939 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
940 flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
941 ANNOTATE_BARRIER_END(child_thr);
942 if (reduce) {
943 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += "
944 "T#%d(%d:%d)\n",
945 gtid, team->t.t_id, tid,
946 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
947 child_tid));
948 ANNOTATE_REDUCE_AFTER(reduce);
949 (*reduce)(this_thr->th.th_local.reduce_data,
950 child_thr->th.th_local.reduce_data);
951 ANNOTATE_REDUCE_BEFORE(reduce);
952 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
953 }
954 }
955 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000956 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000957 }
958 // All subordinates are gathered; now release parent if not master thread
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000959
Jonathan Peyton30419822017-05-12 18:01:32 +0000960 if (!KMP_MASTER_TID(tid)) { // worker threads release parent in hierarchy
961 KA_TRACE(
962 20,
963 ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
964 "arrived(%p): %llu => %llu\n",
965 gtid, team->t.t_id, tid,
966 __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id,
967 thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived,
968 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
969 /* Mark arrival to parent: After performing this write, a worker thread may
970 not assume that the team is valid any more - it could be deallocated by
971 the master thread at any time. */
972 if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
973 !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived
974 // flag; release it
975 ANNOTATE_BARRIER_BEGIN(this_thr);
976 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]);
977 flag.release();
978 } else { // Leaf does special release on the "offset" bits of parent's
979 // b_arrived flag
980 thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
981 kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset);
982 flag.set_waiter(other_threads[thr_bar->parent_tid]);
983 flag.release();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000984 }
Jonathan Peyton30419822017-05-12 18:01:32 +0000985 } else { // Master thread needs to update the team's b_arrived value
986 team->t.t_bar[bt].b_arrived = new_state;
987 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d "
988 "arrived(%p) = %llu\n",
989 gtid, team->t.t_id, tid, team->t.t_id,
990 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
991 }
992 // Is the team access below unsafe or just technically invalid?
993 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for "
994 "barrier type %d\n",
995 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000996}
997
Jonathan Peyton30419822017-05-12 18:01:32 +0000998static void __kmp_hierarchical_barrier_release(
999 enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
1000 int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
1001 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release);
1002 register kmp_team_t *team;
1003 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
1004 register kmp_uint32 nproc;
1005 bool team_change = false; // indicates on-core barrier shouldn't be used
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001006
Jonathan Peyton30419822017-05-12 18:01:32 +00001007 if (KMP_MASTER_TID(tid)) {
1008 team = __kmp_threads[gtid]->th.th_team;
1009 KMP_DEBUG_ASSERT(team != NULL);
1010 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) master "
1011 "entered barrier type %d\n",
1012 gtid, team->t.t_id, tid, bt));
1013 } else { // Worker threads
1014 // Wait for parent thread to release me
1015 if (!thr_bar->use_oncore_barrier ||
1016 __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || thr_bar->my_level != 0 ||
1017 thr_bar->team == NULL) {
1018 // Use traditional method of waiting on my own b_go flag
1019 thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
1020 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
1021 flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1022 ANNOTATE_BARRIER_END(this_thr);
1023 TCW_8(thr_bar->b_go,
1024 KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
1025 } else { // Thread barrier data is initialized, this is a leaf, blocktime is
1026 // infinite, not nested
1027 // Wait on my "offset" bits on parent's b_go flag
1028 thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG;
1029 kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP,
1030 thr_bar->offset, bt,
1031 this_thr USE_ITT_BUILD_ARG(itt_sync_obj));
1032 flag.wait(this_thr, TRUE);
1033 if (thr_bar->wait_flag ==
1034 KMP_BARRIER_SWITCHING) { // Thread was switched to own b_go
1035 TCW_8(thr_bar->b_go,
1036 KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
1037 } else { // Reset my bits on parent's b_go flag
1038 ((char *)&(thr_bar->parent_bar->b_go))[thr_bar->offset] = 0;
1039 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001040 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001041 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
1042 // Early exit for reaping threads releasing forkjoin barrier
1043 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
1044 return;
1045 // The worker thread may now assume that the team is valid.
1046 team = __kmp_threads[gtid]->th.th_team;
1047 KMP_DEBUG_ASSERT(team != NULL);
1048 tid = __kmp_tid_from_gtid(gtid);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001049
Jonathan Peyton30419822017-05-12 18:01:32 +00001050 KA_TRACE(
1051 20,
1052 ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1053 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
1054 KMP_MB(); // Flush all pending memory write invalidates.
1055 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001056
Jonathan Peyton30419822017-05-12 18:01:32 +00001057 nproc = this_thr->th.th_team_nproc;
1058 int level = team->t.t_level;
Jonathan Peyton441f3372015-09-21 17:24:46 +00001059#if OMP_40_ENABLED
Jonathan Peyton30419822017-05-12 18:01:32 +00001060 if (team->t.t_threads[0]
1061 ->th.th_teams_microtask) { // are we inside the teams construct?
1062 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1063 this_thr->th.th_teams_level == level)
1064 ++level; // level was not increased in teams construct for team_of_workers
1065 if (this_thr->th.th_teams_size.nteams > 1)
1066 ++level; // level was not increased in teams construct for team_of_masters
1067 }
Jonathan Peyton441f3372015-09-21 17:24:46 +00001068#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001069 if (level == 1)
1070 thr_bar->use_oncore_barrier = 1;
1071 else
1072 thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001073
Jonathan Peyton30419822017-05-12 18:01:32 +00001074 // If the team size has increased, we still communicate with old leaves via
1075 // oncore barrier.
1076 unsigned short int old_leaf_kids = thr_bar->leaf_kids;
1077 kmp_uint64 old_leaf_state = thr_bar->leaf_state;
1078 team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid,
1079 tid, team);
1080 // But if the entire team changes, we won't use oncore barrier at all
1081 if (team_change)
1082 old_leaf_kids = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001083
1084#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +00001085 if (propagate_icvs) {
1086 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid,
1087 FALSE);
1088 if (KMP_MASTER_TID(
1089 tid)) { // master already has copy in final destination; copy
1090 copy_icvs(&thr_bar->th_fixed_icvs,
1091 &team->t.t_implicit_task_taskdata[tid].td_icvs);
1092 } else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1093 thr_bar->use_oncore_barrier) { // optimization for inf blocktime
1094 if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0)
1095 // leaves (on-core children) pull parent's fixed ICVs directly to local
1096 // ICV store
1097 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1098 &thr_bar->parent_bar->th_fixed_icvs);
1099 // non-leaves will get ICVs piggybacked with b_go via NGO store
1100 } else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs
1101 if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can
1102 // access
1103 copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
1104 else // leaves copy parent's fixed ICVs directly to local ICV store
1105 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1106 &thr_bar->parent_bar->th_fixed_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001107 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001108 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001109#endif // KMP_BARRIER_ICV_PUSH
1110
Jonathan Peyton30419822017-05-12 18:01:32 +00001111 // Now, release my children
1112 if (thr_bar->my_level) { // not a leaf
1113 register kmp_int32 child_tid;
1114 kmp_uint32 last;
1115 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1116 thr_bar->use_oncore_barrier) {
1117 if (KMP_MASTER_TID(tid)) { // do a flat release
1118 // Set local b_go to bump children via NGO store of the cache line
1119 // containing IVCs and b_go.
1120 thr_bar->b_go = KMP_BARRIER_STATE_BUMP;
1121 // Use ngo stores if available; b_go piggybacks in the last 8 bytes of
1122 // the cache line
1123 ngo_load(&thr_bar->th_fixed_icvs);
1124 // This loops over all the threads skipping only the leaf nodes in the
1125 // hierarchy
1126 for (child_tid = thr_bar->skip_per_level[1]; child_tid < (int)nproc;
1127 child_tid += thr_bar->skip_per_level[1]) {
1128 register kmp_bstate_t *child_bar =
1129 &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
1130 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
1131 "releasing T#%d(%d:%d)"
1132 " go(%p): %u => %u\n",
1133 gtid, team->t.t_id, tid,
1134 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1135 child_tid, &child_bar->b_go, child_bar->b_go,
1136 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1137 // Use ngo store (if available) to both store ICVs and release child
1138 // via child's b_go
1139 ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001140 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001141 ngo_sync();
1142 }
1143 TCW_8(thr_bar->b_go,
1144 KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
1145 // Now, release leaf children
1146 if (thr_bar->leaf_kids) { // if there are any
1147 // We test team_change on the off-chance that the level 1 team changed.
1148 if (team_change ||
1149 old_leaf_kids < thr_bar->leaf_kids) { // some old, some new
1150 if (old_leaf_kids) { // release old leaf kids
1151 thr_bar->b_go |= old_leaf_state;
1152 }
1153 // Release new leaf kids
1154 last = tid + thr_bar->skip_per_level[1];
1155 if (last > nproc)
1156 last = nproc;
1157 for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last;
1158 ++child_tid) { // skip_per_level[0]=1
1159 register kmp_info_t *child_thr = team->t.t_threads[child_tid];
1160 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1161 KA_TRACE(
1162 20,
1163 ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
1164 " T#%d(%d:%d) go(%p): %u => %u\n",
1165 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1166 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1167 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1168 // Release child using child's b_go flag
1169 ANNOTATE_BARRIER_BEGIN(child_thr);
1170 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1171 flag.release();
1172 }
1173 } else { // Release all children at once with leaf_state bits on my own
1174 // b_go flag
1175 thr_bar->b_go |= thr_bar->leaf_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001176 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001177 }
1178 } else { // Blocktime is not infinite; do a simple hierarchical release
1179 for (int d = thr_bar->my_level - 1; d >= 0;
1180 --d) { // Release highest level threads first
1181 last = tid + thr_bar->skip_per_level[d + 1];
1182 kmp_uint32 skip = thr_bar->skip_per_level[d];
1183 if (last > nproc)
1184 last = nproc;
1185 for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) {
1186 register kmp_info_t *child_thr = team->t.t_threads[child_tid];
1187 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1188 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) "
1189 "releasing T#%d(%d:%d) go(%p): %u => %u\n",
1190 gtid, team->t.t_id, tid,
1191 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
1192 child_tid, &child_bar->b_go, child_bar->b_go,
1193 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1194 // Release child using child's b_go flag
1195 ANNOTATE_BARRIER_BEGIN(child_thr);
1196 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1197 flag.release();
1198 }
1199 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001200 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001201#if KMP_BARRIER_ICV_PUSH
1202 if (propagate_icvs && !KMP_MASTER_TID(tid))
1203 // non-leaves copy ICVs from fixed ICVs to local dest
1204 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1205 &thr_bar->th_fixed_icvs);
1206#endif // KMP_BARRIER_ICV_PUSH
1207 }
1208 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for "
1209 "barrier type %d\n",
1210 gtid, team->t.t_id, tid, bt));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001211}
1212
Jonathan Peyton30419822017-05-12 18:01:32 +00001213
1214// End of Barrier Algorithms
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001215
1216// Internal function to do a barrier.
1217/* If is_split is true, do a split barrier, otherwise, do a plain barrier
Jonathan Peyton30419822017-05-12 18:01:32 +00001218 If reduce is non-NULL, do a split reduction barrier, otherwise, do a split
1219 barrier
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001220 Returns 0 if master thread, 1 if worker thread. */
Jonathan Peyton30419822017-05-12 18:01:32 +00001221int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
1222 size_t reduce_size, void *reduce_data,
1223 void (*reduce)(void *, void *)) {
1224 KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
1225 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
1226 register int tid = __kmp_tid_from_gtid(gtid);
1227 register kmp_info_t *this_thr = __kmp_threads[gtid];
1228 register kmp_team_t *team = this_thr->th.th_team;
1229 register int status = 0;
1230 ident_t *loc = __kmp_threads[gtid]->th.th_ident;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001231#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001232 ompt_task_id_t my_task_id;
1233 ompt_parallel_id_t my_parallel_id;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001234#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001235
Jonathan Peyton30419822017-05-12 18:01:32 +00001236 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid,
1237 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001238
Jonathan Peyton30419822017-05-12 18:01:32 +00001239 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001240#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001241 if (ompt_enabled) {
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001242#if OMPT_BLAME
Jonathan Peyton30419822017-05-12 18:01:32 +00001243 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
1244 my_parallel_id = team->t.ompt_team_info.parallel_id;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001245
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001246#if OMPT_TRACE
Jonathan Peyton30419822017-05-12 18:01:32 +00001247 if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) {
1248 if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) {
1249 ompt_callbacks.ompt_callback(ompt_event_single_others_end)(
1250 my_parallel_id, my_task_id);
Olga Malyshevadbdcfa12017-04-04 13:56:50 +00001251 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001252 }
1253#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001254 if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
1255 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(my_parallel_id,
1256 my_task_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001257 }
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001258#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001259 // It is OK to report the barrier state after the barrier begin callback.
1260 // According to the OMPT specification, a compliant implementation may
1261 // even delay reporting this state until the barrier begins to wait.
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001262 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
Jonathan Peyton30419822017-05-12 18:01:32 +00001263 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001264#endif
1265
Jonathan Peyton30419822017-05-12 18:01:32 +00001266 if (!team->t.t_serialized) {
1267#if USE_ITT_BUILD
1268 // This value will be used in itt notify events below.
1269 void *itt_sync_obj = NULL;
1270#if USE_ITT_NOTIFY
1271 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1272 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1273#endif
1274#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001275 if (__kmp_tasking_mode == tskm_extra_barrier) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001276 __kmp_tasking_barrier(team, this_thr, gtid);
1277 KA_TRACE(15,
1278 ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid,
1279 __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001280 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001281
Jonathan Peyton30419822017-05-12 18:01:32 +00001282 /* Copy the blocktime info to the thread, where __kmp_wait_template() can
1283 access it when the team struct is not guaranteed to exist. */
1284 // See note about the corresponding code in __kmp_join_barrier() being
1285 // performance-critical.
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001286 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
Jonathan Peytone1c7c132016-10-07 18:12:19 +00001287#if KMP_USE_MONITOR
Jonathan Peyton30419822017-05-12 18:01:32 +00001288 this_thr->th.th_team_bt_intervals =
1289 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1290 this_thr->th.th_team_bt_set =
1291 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
Jonathan Peyton2208a8512017-01-27 17:54:31 +00001292#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001293 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL();
Jonathan Peyton2208a8512017-01-27 17:54:31 +00001294#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001295 }
1296
1297#if USE_ITT_BUILD
1298 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
Jonathan Peyton30419822017-05-12 18:01:32 +00001299 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001300#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001301#if USE_DEBUGGER
1302 // Let the debugger know: the thread arrived to the barrier and waiting.
1303 if (KMP_MASTER_TID(tid)) { // Master counter is stored in team structure.
1304 team->t.t_bar[bt].b_master_arrived += 1;
1305 } else {
1306 this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
1307 } // if
1308#endif /* USE_DEBUGGER */
1309 if (reduce != NULL) {
1310 // KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956
1311 this_thr->th.th_local.reduce_data = reduce_data;
1312 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001313
Jonathan Peyton30419822017-05-12 18:01:32 +00001314 if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
1315 __kmp_task_team_setup(
1316 this_thr, team,
1317 0); // use 0 to only setup the current team if nthreads > 1
1318
1319 switch (__kmp_barrier_gather_pattern[bt]) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001320 case bp_hyper_bar: {
Jonathan Peyton30419822017-05-12 18:01:32 +00001321 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits
1322 // to 0; use linear
1323 __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid,
1324 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1325 break;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001326 }
1327 case bp_hierarchical_bar: {
Jonathan Peyton30419822017-05-12 18:01:32 +00001328 __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid,
1329 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1330 break;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001331 }
1332 case bp_tree_bar: {
Jonathan Peyton30419822017-05-12 18:01:32 +00001333 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits
1334 // to 0; use linear
1335 __kmp_tree_barrier_gather(bt, this_thr, gtid, tid,
1336 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
1337 break;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001338 }
1339 default: {
Jonathan Peyton30419822017-05-12 18:01:32 +00001340 __kmp_linear_barrier_gather(bt, this_thr, gtid, tid,
1341 reduce USE_ITT_BUILD_ARG(itt_sync_obj));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001342 }
1343 }
1344
Jonathan Peyton30419822017-05-12 18:01:32 +00001345 KMP_MB();
1346
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001347 if (KMP_MASTER_TID(tid)) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001348 status = 0;
1349 if (__kmp_tasking_mode != tskm_immediate_exec) {
1350 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
1351 }
1352#if USE_DEBUGGER
1353 // Let the debugger know: All threads are arrived and starting leaving the
1354 // barrier.
1355 team->t.t_bar[bt].b_team_arrived += 1;
1356#endif
1357
1358#if OMP_40_ENABLED
1359 // Reset cancellation flag for worksharing constructs
1360 if (team->t.t_cancel_request == cancel_loop ||
1361 team->t.t_cancel_request == cancel_sections) {
1362 team->t.t_cancel_request = cancel_noreq;
1363 }
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001364#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001365#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001366 /* TODO: In case of split reduction barrier, master thread may send
1367 acquired event early, before the final summation into the shared
1368 variable is done (final summation can be a long operation for array
1369 reductions). */
1370 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1371 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1372#endif /* USE_ITT_BUILD */
1373#if USE_ITT_BUILD && USE_ITT_NOTIFY
1374 // Barrier - report frame end (only if active_level == 1)
1375 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
1376 __kmp_forkjoin_frames_mode &&
1377#if OMP_40_ENABLED
1378 this_thr->th.th_teams_microtask == NULL &&
1379#endif
1380 team->t.t_active_level == 1) {
1381 kmp_uint64 cur_time = __itt_get_timestamp();
1382 kmp_info_t **other_threads = team->t.t_threads;
1383 int nproc = this_thr->th.th_team_nproc;
1384 int i;
1385 switch (__kmp_forkjoin_frames_mode) {
1386 case 1:
1387 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1388 loc, nproc);
1389 this_thr->th.th_frame_time = cur_time;
1390 break;
1391 case 2: // AC 2015-01-19: currently does not work for hierarchical (to
1392 // be fixed)
1393 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time,
1394 1, loc, nproc);
1395 break;
1396 case 3:
1397 if (__itt_metadata_add_ptr) {
1398 // Initialize with master's wait time
1399 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1400 // Set arrive time to zero to be able to check it in
1401 // __kmp_invoke_task(); the same is done inside the loop below
1402 this_thr->th.th_bar_arrive_time = 0;
1403 for (i = 1; i < nproc; ++i) {
1404 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1405 other_threads[i]->th.th_bar_arrive_time = 0;
1406 }
1407 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1408 cur_time, delta,
1409 (kmp_uint64)(reduce != NULL));
1410 }
1411 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1412 loc, nproc);
1413 this_thr->th.th_frame_time = cur_time;
1414 break;
1415 }
1416 }
1417#endif /* USE_ITT_BUILD */
1418 } else {
1419 status = 1;
1420#if USE_ITT_BUILD
1421 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1422 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1423#endif /* USE_ITT_BUILD */
1424 }
1425 if (status == 1 || !is_split) {
1426 switch (__kmp_barrier_release_pattern[bt]) {
1427 case bp_hyper_bar: {
1428 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1429 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
1430 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1431 break;
1432 }
1433 case bp_hierarchical_bar: {
1434 __kmp_hierarchical_barrier_release(
1435 bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1436 break;
1437 }
1438 case bp_tree_bar: {
1439 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1440 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
1441 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1442 break;
1443 }
1444 default: {
1445 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
1446 FALSE USE_ITT_BUILD_ARG(itt_sync_obj));
1447 }
1448 }
1449 if (__kmp_tasking_mode != tskm_immediate_exec) {
1450 __kmp_task_team_sync(this_thr, team);
1451 }
1452 }
1453
1454#if USE_ITT_BUILD
1455 /* GEH: TODO: Move this under if-condition above and also include in
1456 __kmp_end_split_barrier(). This will more accurately represent the actual
1457 release time of the threads for split barriers. */
1458 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1459 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1460#endif /* USE_ITT_BUILD */
1461 } else { // Team is serialized.
1462 status = 0;
1463 if (__kmp_tasking_mode != tskm_immediate_exec) {
1464#if OMP_45_ENABLED
1465 if (this_thr->th.th_task_team != NULL) {
1466 void *itt_sync_obj = NULL;
1467#if USE_ITT_NOTIFY
1468 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1469 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1470 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1471 }
1472#endif
1473
1474 KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks ==
1475 TRUE);
1476 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
1477 __kmp_task_team_setup(this_thr, team, 0);
1478
1479#if USE_ITT_BUILD
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001480 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
Jonathan Peyton30419822017-05-12 18:01:32 +00001481 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1482#endif /* USE_ITT_BUILD */
1483 }
1484#else
1485 // The task team should be NULL for serialized code (tasks will be
1486 // executed immediately)
1487 KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL);
1488 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL);
1489#endif
1490 }
1491 }
1492 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
1493 gtid, __kmp_team_from_gtid(gtid)->t.t_id,
1494 __kmp_tid_from_gtid(gtid), status));
1495
1496#if OMPT_SUPPORT
1497 if (ompt_enabled) {
1498#if OMPT_BLAME
1499 if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
1500 ompt_callbacks.ompt_callback(ompt_event_barrier_end)(my_parallel_id,
1501 my_task_id);
1502 }
1503#endif
1504 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1505 }
1506#endif
1507 ANNOTATE_BARRIER_END(&team->t.t_bar);
1508
1509 return status;
1510}
1511
1512void __kmp_end_split_barrier(enum barrier_type bt, int gtid) {
1513 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier);
1514 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
1515 int tid = __kmp_tid_from_gtid(gtid);
1516 kmp_info_t *this_thr = __kmp_threads[gtid];
1517 kmp_team_t *team = this_thr->th.th_team;
1518
1519 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1520 if (!team->t.t_serialized) {
1521 if (KMP_MASTER_GTID(gtid)) {
1522 switch (__kmp_barrier_release_pattern[bt]) {
1523 case bp_hyper_bar: {
1524 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1525 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid,
1526 FALSE USE_ITT_BUILD_ARG(NULL));
1527 break;
1528 }
1529 case bp_hierarchical_bar: {
1530 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid,
1531 FALSE USE_ITT_BUILD_ARG(NULL));
1532 break;
1533 }
1534 case bp_tree_bar: {
1535 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1536 __kmp_tree_barrier_release(bt, this_thr, gtid, tid,
1537 FALSE USE_ITT_BUILD_ARG(NULL));
1538 break;
1539 }
1540 default: {
1541 __kmp_linear_barrier_release(bt, this_thr, gtid, tid,
1542 FALSE USE_ITT_BUILD_ARG(NULL));
1543 }
1544 }
1545 if (__kmp_tasking_mode != tskm_immediate_exec) {
1546 __kmp_task_team_sync(this_thr, team);
1547 } // if
1548 }
1549 }
1550 ANNOTATE_BARRIER_END(&team->t.t_bar);
1551}
1552
1553void __kmp_join_barrier(int gtid) {
1554 KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier);
1555 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
1556 register kmp_info_t *this_thr = __kmp_threads[gtid];
1557 register kmp_team_t *team;
1558 register kmp_uint nproc;
1559 kmp_info_t *master_thread;
1560 int tid;
1561#ifdef KMP_DEBUG
1562 int team_id;
1563#endif /* KMP_DEBUG */
1564#if USE_ITT_BUILD
1565 void *itt_sync_obj = NULL;
1566#if USE_ITT_NOTIFY
1567 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) // Don't call routine without need
1568 // Get object created at fork_barrier
1569 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1570#endif
1571#endif /* USE_ITT_BUILD */
1572 KMP_MB();
1573
1574 // Get current info
1575 team = this_thr->th.th_team;
1576 nproc = this_thr->th.th_team_nproc;
1577 KMP_DEBUG_ASSERT((int)nproc == team->t.t_nproc);
1578 tid = __kmp_tid_from_gtid(gtid);
1579#ifdef KMP_DEBUG
1580 team_id = team->t.t_id;
1581#endif /* KMP_DEBUG */
1582 master_thread = this_thr->th.th_team_master;
1583#ifdef KMP_DEBUG
1584 if (master_thread != team->t.t_threads[0]) {
1585 __kmp_print_structure();
1586 }
1587#endif /* KMP_DEBUG */
1588 KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]);
1589 KMP_MB();
1590
1591 // Verify state
1592 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
1593 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team));
1594 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root));
1595 KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]);
1596 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
1597 gtid, team_id, tid));
1598
1599 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
1600#if OMPT_SUPPORT
1601#if OMPT_TRACE
1602 if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
1603 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
1604 team->t.ompt_team_info.parallel_id,
1605 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1606 }
1607#endif
1608 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
1609#endif
1610
1611 if (__kmp_tasking_mode == tskm_extra_barrier) {
1612 __kmp_tasking_barrier(team, this_thr, gtid);
1613 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid,
1614 team_id, tid));
1615 }
1616#ifdef KMP_DEBUG
1617 if (__kmp_tasking_mode != tskm_immediate_exec) {
1618 KA_TRACE(20, ("__kmp_join_barrier: T#%d, old team = %d, old task_team = "
1619 "%p, th_task_team = %p\n",
1620 __kmp_gtid_from_thread(this_thr), team_id,
1621 team->t.t_task_team[this_thr->th.th_task_state],
1622 this_thr->th.th_task_team));
1623 KMP_DEBUG_ASSERT(this_thr->th.th_task_team ==
1624 team->t.t_task_team[this_thr->th.th_task_state]);
1625 }
1626#endif /* KMP_DEBUG */
1627
1628 /* Copy the blocktime info to the thread, where __kmp_wait_template() can
1629 access it when the team struct is not guaranteed to exist. Doing these
1630 loads causes a cache miss slows down EPCC parallel by 2x. As a workaround,
1631 we do not perform the copy if blocktime=infinite, since the values are not
1632 used by __kmp_wait_template() in that case. */
1633 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1634#if KMP_USE_MONITOR
1635 this_thr->th.th_team_bt_intervals =
1636 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1637 this_thr->th.th_team_bt_set =
1638 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1639#else
1640 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL();
1641#endif
1642 }
1643
1644#if USE_ITT_BUILD
1645 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1646 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001647#endif /* USE_ITT_BUILD */
1648
Jonathan Peyton30419822017-05-12 18:01:32 +00001649 switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
1650 case bp_hyper_bar: {
1651 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1652 __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1653 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1654 break;
1655 }
1656 case bp_hierarchical_bar: {
1657 __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1658 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1659 break;
1660 }
1661 case bp_tree_bar: {
1662 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1663 __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1664 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1665 break;
1666 }
1667 default: {
1668 __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid,
1669 NULL USE_ITT_BUILD_ARG(itt_sync_obj));
1670 }
1671 }
1672
1673 /* From this point on, the team data structure may be deallocated at any time
1674 by the master thread - it is unsafe to reference it in any of the worker
1675 threads. Any per-team data items that need to be referenced before the
1676 end of the barrier should be moved to the kmp_task_team_t structs. */
1677 if (KMP_MASTER_TID(tid)) {
1678 if (__kmp_tasking_mode != tskm_immediate_exec) {
1679 __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
1680 }
1681#if KMP_STATS_ENABLED
1682 // Have master thread flag the workers to indicate they are now waiting for
1683 // next parallel region, Also wake them up so they switch their timers to
1684 // idle.
1685 for (int i = 0; i < team->t.t_nproc; ++i) {
1686 kmp_info_t *team_thread = team->t.t_threads[i];
1687 if (team_thread == this_thr)
1688 continue;
1689 team_thread->th.th_stats->setIdleFlag();
1690 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME &&
1691 team_thread->th.th_sleep_loc != NULL)
1692 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread),
1693 team_thread->th.th_sleep_loc);
1694 }
Andrey Churbanov51aecb82015-05-06 19:22:36 +00001695#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001696#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001697 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1698 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1699#endif /* USE_ITT_BUILD */
1700
1701#if USE_ITT_BUILD && USE_ITT_NOTIFY
1702 // Join barrier - report frame end
1703 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
1704 __kmp_forkjoin_frames_mode &&
1705#if OMP_40_ENABLED
1706 this_thr->th.th_teams_microtask == NULL &&
1707#endif
1708 team->t.t_active_level == 1) {
1709 kmp_uint64 cur_time = __itt_get_timestamp();
1710 ident_t *loc = team->t.t_ident;
1711 kmp_info_t **other_threads = team->t.t_threads;
1712 int nproc = this_thr->th.th_team_nproc;
1713 int i;
1714 switch (__kmp_forkjoin_frames_mode) {
1715 case 1:
1716 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1717 loc, nproc);
1718 break;
1719 case 2:
1720 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1,
1721 loc, nproc);
1722 break;
1723 case 3:
1724 if (__itt_metadata_add_ptr) {
1725 // Initialize with master's wait time
1726 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
1727 // Set arrive time to zero to be able to check it in
1728 // __kmp_invoke_task(); the same is done inside the loop below
1729 this_thr->th.th_bar_arrive_time = 0;
1730 for (i = 1; i < nproc; ++i) {
1731 delta += (cur_time - other_threads[i]->th.th_bar_arrive_time);
1732 other_threads[i]->th.th_bar_arrive_time = 0;
1733 }
1734 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time,
1735 cur_time, delta, 0);
1736 }
1737 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0,
1738 loc, nproc);
1739 this_thr->th.th_frame_time = cur_time;
1740 break;
1741 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001742 }
1743#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001744 }
1745#if USE_ITT_BUILD
1746 else {
1747 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1748 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1749 }
1750#endif /* USE_ITT_BUILD */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001751
1752#if KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001753 if (KMP_MASTER_TID(tid)) {
1754 KA_TRACE(
1755 15,
1756 ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
1757 gtid, team_id, tid, nproc));
1758 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001759#endif /* KMP_DEBUG */
1760
Jonathan Peyton30419822017-05-12 18:01:32 +00001761 // TODO now, mark worker threads as done so they may be disbanded
1762 KMP_MB(); // Flush all pending memory write invalidates.
1763 KA_TRACE(10,
1764 ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001765
1766#if OMPT_SUPPORT
Jonathan Peyton30419822017-05-12 18:01:32 +00001767 if (ompt_enabled) {
Jonathan Peytoncab67cc2015-09-18 16:24:46 +00001768#if OMPT_BLAME
Jonathan Peyton30419822017-05-12 18:01:32 +00001769 if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
1770 ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
1771 team->t.ompt_team_info.parallel_id,
1772 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001773 }
1774#endif
Jonathan Peyton30419822017-05-12 18:01:32 +00001775
1776 // return to default state
1777 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
1778 }
1779#endif
1780 ANNOTATE_BARRIER_END(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001781}
1782
Jonathan Peyton30419822017-05-12 18:01:32 +00001783// TODO release worker threads' fork barriers as we are ready instead of all at
1784// once
1785void __kmp_fork_barrier(int gtid, int tid) {
1786 KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier);
1787 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
1788 kmp_info_t *this_thr = __kmp_threads[gtid];
1789 kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001790#if USE_ITT_BUILD
Jonathan Peyton30419822017-05-12 18:01:32 +00001791 void *itt_sync_obj = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001792#endif /* USE_ITT_BUILD */
Jonathan Peyton30419822017-05-12 18:01:32 +00001793 if (team)
1794 ANNOTATE_BARRIER_END(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001795
Jonathan Peyton30419822017-05-12 18:01:32 +00001796 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid,
1797 (team != NULL) ? team->t.t_id : -1, tid));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001798
Jonathan Peyton30419822017-05-12 18:01:32 +00001799 // th_team pointer only valid for master thread here
1800 if (KMP_MASTER_TID(tid)) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001801#if USE_ITT_BUILD && USE_ITT_NOTIFY
Jonathan Peyton30419822017-05-12 18:01:32 +00001802 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1803 // Create itt barrier object
1804 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
1805 __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing
1806 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001807#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1808
1809#ifdef KMP_DEBUG
Jonathan Peyton30419822017-05-12 18:01:32 +00001810 register kmp_info_t **other_threads = team->t.t_threads;
1811 register int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001812
Jonathan Peyton30419822017-05-12 18:01:32 +00001813 // Verify state
1814 KMP_MB();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001815
Jonathan Peyton30419822017-05-12 18:01:32 +00001816 for (i = 1; i < team->t.t_nproc; ++i) {
1817 KA_TRACE(500,
1818 ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go "
1819 "== %u.\n",
1820 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
1821 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
1822 other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go));
1823 KMP_DEBUG_ASSERT(
1824 (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) &
1825 ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE);
1826 KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team);
1827 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001828#endif
1829
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001830 if (__kmp_tasking_mode != tskm_immediate_exec) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001831 __kmp_task_team_setup(
1832 this_thr, team,
1833 0); // 0 indicates setup current task team if nthreads > 1
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001834 }
1835
Jonathan Peyton30419822017-05-12 18:01:32 +00001836 /* The master thread may have changed its blocktime between the join barrier
1837 and the fork barrier. Copy the blocktime info to the thread, where
1838 __kmp_wait_template() can access it when the team struct is not
1839 guaranteed to exist. */
1840 // See note about the corresponding code in __kmp_join_barrier() being
1841 // performance-critical
1842 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
1843#if KMP_USE_MONITOR
1844 this_thr->th.th_team_bt_intervals =
1845 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1846 this_thr->th.th_team_bt_set =
1847 team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
1848#else
1849 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001850#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001851 }
Jonathan Peyton30419822017-05-12 18:01:32 +00001852 } // master
1853
1854 switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
1855 case bp_hyper_bar: {
1856 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1857 __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1858 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1859 break;
1860 }
1861 case bp_hierarchical_bar: {
1862 __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1863 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1864 break;
1865 }
1866 case bp_tree_bar: {
1867 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1868 __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1869 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1870 break;
1871 }
1872 default: {
1873 __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid,
1874 TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
1875 }
1876 }
1877
1878 // Early exit for reaping threads releasing forkjoin barrier
1879 if (TCR_4(__kmp_global.g.g_done)) {
1880 this_thr->th.th_task_team = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001881
1882#if USE_ITT_BUILD && USE_ITT_NOTIFY
1883 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
Jonathan Peyton30419822017-05-12 18:01:32 +00001884 if (!KMP_MASTER_TID(tid)) {
1885 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1886 if (itt_sync_obj)
1887 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1888 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001889 }
1890#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
Jonathan Peyton30419822017-05-12 18:01:32 +00001891 KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid));
1892 return;
1893 }
1894
1895 /* We can now assume that a valid team structure has been allocated by the
1896 master and propagated to all worker threads. The current thread, however,
1897 may not be part of the team, so we can't blindly assume that the team
1898 pointer is non-null. */
1899 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
1900 KMP_DEBUG_ASSERT(team != NULL);
1901 tid = __kmp_tid_from_gtid(gtid);
1902
1903#if KMP_BARRIER_ICV_PULL
1904 /* Master thread's copy of the ICVs was set up on the implicit taskdata in
1905 __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's
1906 implicit task has this data before this function is called. We cannot
1907 modify __kmp_fork_call() to look at the fixed ICVs in the master's thread
1908 struct, because it is not always the case that the threads arrays have
1909 been allocated when __kmp_fork_call() is executed. */
1910 {
1911 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
1912 if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs
1913 // Copy the initial ICVs from the master's thread struct to the implicit
1914 // task for this tid.
1915 KA_TRACE(10,
1916 ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
1917 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team,
1918 tid, FALSE);
1919 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1920 &team->t.t_threads[0]
1921 ->th.th_bar[bs_forkjoin_barrier]
1922 .bb.th_fixed_icvs);
1923 }
1924 }
1925#endif // KMP_BARRIER_ICV_PULL
1926
1927 if (__kmp_tasking_mode != tskm_immediate_exec) {
1928 __kmp_task_team_sync(this_thr, team);
1929 }
1930
1931#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
1932 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
1933 if (proc_bind == proc_bind_intel) {
1934#endif
1935#if KMP_AFFINITY_SUPPORTED
1936 // Call dynamic affinity settings
1937 if (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) {
1938 __kmp_balanced_affinity(tid, team->t.t_nproc);
1939 }
1940#endif // KMP_AFFINITY_SUPPORTED
1941#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
1942 } else if (proc_bind != proc_bind_false) {
1943 if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
1944 KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n",
1945 __kmp_gtid_from_thread(this_thr),
1946 this_thr->th.th_current_place));
1947 } else {
1948 __kmp_affinity_set_place(gtid);
1949 }
1950 }
1951#endif
1952
1953#if USE_ITT_BUILD && USE_ITT_NOTIFY
1954 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1955 if (!KMP_MASTER_TID(tid)) {
1956 // Get correct barrier object
1957 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1958 __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired
1959 } // (prepare called inside barrier_release)
1960 }
1961#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1962 ANNOTATE_BARRIER_END(&team->t.t_bar);
1963 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid,
1964 team->t.t_id, tid));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001965}
1966
Jonathan Peyton30419822017-05-12 18:01:32 +00001967void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,
1968 kmp_internal_control_t *new_icvs, ident_t *loc) {
1969 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001970
Jonathan Peyton30419822017-05-12 18:01:32 +00001971 KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
1972 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001973
Jonathan Peyton30419822017-05-12 18:01:32 +00001974/* Master thread's copy of the ICVs was set up on the implicit taskdata in
1975 __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's
1976 implicit task has this data before this function is called. */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001977#if KMP_BARRIER_ICV_PULL
Jonathan Peyton30419822017-05-12 18:01:32 +00001978 /* Copy ICVs to master's thread structure into th_fixed_icvs (which remains
1979 untouched), where all of the worker threads can access them and make their
1980 own copies after the barrier. */
1981 KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be
1982 // allocated at this point
1983 copy_icvs(
1984 &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs,
1985 new_icvs);
1986 KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0,
1987 team->t.t_threads[0], team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001988#elif KMP_BARRIER_ICV_PUSH
Jonathan Peyton30419822017-05-12 18:01:32 +00001989 // The ICVs will be propagated in the fork barrier, so nothing needs to be
1990 // done here.
1991 KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0,
1992 team->t.t_threads[0], team));
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001993#else
Jonathan Peyton30419822017-05-12 18:01:32 +00001994 // Copy the ICVs to each of the non-master threads. This takes O(nthreads)
1995 // time.
1996 ngo_load(new_icvs);
1997 KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be
1998 // allocated at this point
1999 for (int f = 1; f < new_nproc; ++f) { // Skip the master thread
2000 // TODO: GEH - pass in better source location info since usually NULL here
2001 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
2002 f, team->t.t_threads[f], team));
2003 __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE);
2004 ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
2005 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
2006 f, team->t.t_threads[f], team));
2007 }
2008 ngo_sync();
Jim Cownie4cc4bb42014-10-07 16:25:50 +00002009#endif // KMP_BARRIER_ICV_PULL
2010}