blob: d08873fdcf3d15a205e76f1fba72c8cfda84ed6d [file] [log] [blame]
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001/*
2 * kmp_barrier.cpp
Jim Cownie4cc4bb42014-10-07 16:25:50 +00003 */
4
5
6//===----------------------------------------------------------------------===//
7//
8// The LLVM Compiler Infrastructure
9//
10// This file is dual licensed under the MIT and the University of Illinois Open
11// Source Licenses. See LICENSE.txt for details.
12//
13//===----------------------------------------------------------------------===//
14
15
16#include "kmp.h"
17#include "kmp_wait_release.h"
18#include "kmp_stats.h"
19#include "kmp_itt.h"
Jonathan Peytona0e159f2015-10-08 18:23:38 +000020#include "kmp_os.h"
21
Jim Cownie4cc4bb42014-10-07 16:25:50 +000022
23#if KMP_MIC
24#include <immintrin.h>
25#define USE_NGO_STORES 1
26#endif // KMP_MIC
27
Jonas Hahnfeld50fed042016-11-07 15:58:36 +000028#include "tsan_annotations.h"
29
Jim Cownie4cc4bb42014-10-07 16:25:50 +000030#if KMP_MIC && USE_NGO_STORES
31// ICV copying
32#define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src))
33#define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
34#define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt)
35#define ngo_sync() __asm__ volatile ("lock; addl $0,0(%%rsp)" ::: "memory")
36#else
37#define ngo_load(src) ((void)0)
38#define ngo_store_icvs(dst, src) copy_icvs((dst), (src))
Jonathan Peyton01b58b72015-07-09 18:20:51 +000039#define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE)
Jim Cownie4cc4bb42014-10-07 16:25:50 +000040#define ngo_sync() ((void)0)
41#endif /* KMP_MIC && USE_NGO_STORES */
42
43void __kmp_print_structure(void); // Forward declaration
44
45// ---------------------------- Barrier Algorithms ----------------------------
46
47// Linear Barrier
48static void
49__kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
50 void (*reduce)(void *, void *)
51 USE_ITT_BUILD_ARG(void * itt_sync_obj) )
52{
Jonathan Peyton5375fe82016-11-14 21:13:44 +000053 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
Jim Cownie4cc4bb42014-10-07 16:25:50 +000054 register kmp_team_t *team = this_thr->th.th_team;
55 register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
56 register kmp_info_t **other_threads = team->t.t_threads;
57
58 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
59 gtid, team->t.t_id, tid, bt));
60 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
61
62#if USE_ITT_BUILD && USE_ITT_NOTIFY
63 // Barrier imbalance - save arrive time to the thread
Andrey Churbanov51aecb82015-05-06 19:22:36 +000064 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +000065 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
66 }
67#endif
68 // We now perform a linear reduction to signal that all of the threads have arrived.
69 if (!KMP_MASTER_TID(tid)) {
70 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
Jonathan Peytond26e2132015-09-10 18:44:30 +000071 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +000072 __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar->b_arrived,
73 thr_bar->b_arrived, thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
74 // Mark arrival to master thread
75 /* After performing this write, a worker thread may not assume that the team is valid
76 any more - it could be deallocated by the master thread at any time. */
Jonas Hahnfeld35801a22017-02-15 08:14:22 +000077 ANNOTATE_BARRIER_BEGIN(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +000078 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]);
79 flag.release();
80 } else {
81 register kmp_balign_team_t *team_bar = &team->t.t_bar[bt];
82 register int nproc = this_thr->th.th_team_nproc;
83 register int i;
84 // Don't have to worry about sleep bit here or atomic since team setting
Jonathan Peytond26e2132015-09-10 18:44:30 +000085 register kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP;
Jim Cownie4cc4bb42014-10-07 16:25:50 +000086
87 // Collect all the worker team member threads.
88 for (i=1; i<nproc; ++i) {
89#if KMP_CACHE_MANAGE
90 // Prefetch next thread's arrived count
91 if (i+1 < nproc)
92 KMP_CACHE_PREFETCH(&other_threads[i+1]->th.th_bar[bt].bb.b_arrived);
93#endif /* KMP_CACHE_MANAGE */
94 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
Jonathan Peytond26e2132015-09-10 18:44:30 +000095 "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +000096 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
97 &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state));
98
99 // Wait for worker thread to arrive
100 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state);
101 flag.wait(this_thr, FALSE
102 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000103 ANNOTATE_BARRIER_END(other_threads[i]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000104#if USE_ITT_BUILD && USE_ITT_NOTIFY
105 // Barrier imbalance - write min of the thread time and the other thread time to the thread.
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000106 if (__kmp_forkjoin_frames_mode == 2) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000107 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
108 other_threads[i]->th.th_bar_min_time);
109 }
110#endif
111 if (reduce) {
112 KA_TRACE(100, ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", gtid,
113 team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team->t.t_id, i));
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000114 ANNOTATE_REDUCE_AFTER(reduce);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000115 (*reduce)(this_thr->th.th_local.reduce_data,
116 other_threads[i]->th.th_local.reduce_data);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000117 ANNOTATE_REDUCE_BEFORE(reduce);
118 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000119 }
120 }
121 // Don't have to worry about sleep bit here or atomic since team setting
122 team_bar->b_arrived = new_state;
Jonathan Peytond26e2132015-09-10 18:44:30 +0000123 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000124 gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived, new_state));
125 }
126 KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
127 gtid, team->t.t_id, tid, bt));
128}
129
130static void
131__kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
132 int propagate_icvs
133 USE_ITT_BUILD_ARG(void *itt_sync_obj) )
134{
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000135 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000136 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
137 register kmp_team_t *team;
138
139 if (KMP_MASTER_TID(tid)) {
140 register unsigned int i;
141 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
142 register kmp_info_t **other_threads;
143
144 team = __kmp_threads[gtid]->th.th_team;
145 KMP_DEBUG_ASSERT(team != NULL);
146 other_threads = team->t.t_threads;
147
148 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
149 gtid, team->t.t_id, tid, bt));
150
151 if (nproc > 1) {
152#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton45be4502015-08-11 21:36:41 +0000153 {
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000154 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000155 if (propagate_icvs) {
156 ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
157 for (i=1; i<nproc; ++i) {
158 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], team, i, FALSE);
159 ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs,
160 &team->t.t_implicit_task_taskdata[0].td_icvs);
161 }
162 ngo_sync();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000163 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000164 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000165#endif // KMP_BARRIER_ICV_PUSH
166
167 // Now, release all of the worker threads
168 for (i=1; i<nproc; ++i) {
169#if KMP_CACHE_MANAGE
170 // Prefetch next thread's go flag
171 if (i+1 < nproc)
172 KMP_CACHE_PREFETCH(&other_threads[i+1]->th.th_bar[bt].bb.b_go);
173#endif /* KMP_CACHE_MANAGE */
174 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
175 "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
176 other_threads[i]->th.th_info.ds.ds_gtid, team->t.t_id, i,
177 &other_threads[i]->th.th_bar[bt].bb.b_go,
178 other_threads[i]->th.th_bar[bt].bb.b_go,
179 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP));
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000180 ANNOTATE_BARRIER_BEGIN(other_threads[i]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000181 kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go, other_threads[i]);
182 flag.release();
183 }
184 }
185 } else { // Wait for the MASTER thread to release us
186 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
187 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
188 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
189 flag.wait(this_thr, TRUE
190 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000191 ANNOTATE_BARRIER_END(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000192#if USE_ITT_BUILD && USE_ITT_NOTIFY
193 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
194 // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is disabled)
195 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
196 // Cancel wait on previous parallel region...
197 __kmp_itt_task_starting(itt_sync_obj);
198
199 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
200 return;
201
202 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
203 if (itt_sync_obj != NULL)
204 // Call prepare as early as possible for "new" barrier
205 __kmp_itt_task_finished(itt_sync_obj);
206 } else
207#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
208 // Early exit for reaping threads releasing forkjoin barrier
209 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
210 return;
211 // The worker thread may now assume that the team is valid.
212#ifdef KMP_DEBUG
213 tid = __kmp_tid_from_gtid(gtid);
214 team = __kmp_threads[gtid]->th.th_team;
215#endif
216 KMP_DEBUG_ASSERT(team != NULL);
217 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
218 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
219 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
220 KMP_MB(); // Flush all pending memory write invalidates.
221 }
222 KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
223 gtid, team->t.t_id, tid, bt));
224}
225
226// Tree barrier
227static void
228__kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
229 void (*reduce)(void *, void *)
230 USE_ITT_BUILD_ARG(void *itt_sync_obj) )
231{
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000232 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000233 register kmp_team_t *team = this_thr->th.th_team;
234 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
235 register kmp_info_t **other_threads = team->t.t_threads;
236 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
237 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
238 register kmp_uint32 branch_factor = 1 << branch_bits;
239 register kmp_uint32 child;
240 register kmp_uint32 child_tid;
Jonathan Peytond26e2132015-09-10 18:44:30 +0000241 register kmp_uint64 new_state;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000242
243 KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
244 gtid, team->t.t_id, tid, bt));
245 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
246
247#if USE_ITT_BUILD && USE_ITT_NOTIFY
248 // Barrier imbalance - save arrive time to the thread
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000249 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000250 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
251 }
252#endif
253 // Perform tree gather to wait until all threads have arrived; reduce any required data as we go
254 child_tid = (tid << branch_bits) + 1;
255 if (child_tid < nproc) {
256 // Parent threads wait for all their children to arrive
257 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
258 child = 1;
259 do {
260 register kmp_info_t *child_thr = other_threads[child_tid];
261 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
262#if KMP_CACHE_MANAGE
263 // Prefetch next thread's arrived count
264 if (child+1 <= branch_factor && child_tid+1 < nproc)
265 KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_arrived);
266#endif /* KMP_CACHE_MANAGE */
267 KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
Jonathan Peytond26e2132015-09-10 18:44:30 +0000268 "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000269 __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid,
270 &child_bar->b_arrived, new_state));
271 // Wait for child to arrive
272 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
273 flag.wait(this_thr, FALSE
274 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000275 ANNOTATE_BARRIER_END(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000276#if USE_ITT_BUILD && USE_ITT_NOTIFY
277 // Barrier imbalance - write min of the thread time and a child time to the thread.
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000278 if (__kmp_forkjoin_frames_mode == 2) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000279 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
280 child_thr->th.th_bar_min_time);
281 }
282#endif
283 if (reduce) {
284 KA_TRACE(100, ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
285 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
286 team->t.t_id, child_tid));
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000287 ANNOTATE_REDUCE_AFTER(reduce);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000288 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000289 ANNOTATE_REDUCE_BEFORE(reduce);
290 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000291 }
292 child++;
293 child_tid++;
294 }
295 while (child <= branch_factor && child_tid < nproc);
296 }
297
298 if (!KMP_MASTER_TID(tid)) { // Worker threads
299 register kmp_int32 parent_tid = (tid - 1) >> branch_bits;
300
301 KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
Jonathan Peytond26e2132015-09-10 18:44:30 +0000302 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000303 __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid,
304 &thr_bar->b_arrived, thr_bar->b_arrived,
305 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
306
307 // Mark arrival to parent thread
308 /* After performing this write, a worker thread may not assume that the team is valid
309 any more - it could be deallocated by the master thread at any time. */
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000310 ANNOTATE_BARRIER_BEGIN(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000311 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]);
312 flag.release();
313 } else {
314 // Need to update the team arrived pointer if we are the master thread
315 if (nproc > 1) // New value was already computed above
316 team->t.t_bar[bt].b_arrived = new_state;
317 else
318 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
Jonathan Peytond26e2132015-09-10 18:44:30 +0000319 KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000320 gtid, team->t.t_id, tid, team->t.t_id,
321 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
322 }
323 KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
324 gtid, team->t.t_id, tid, bt));
325}
326
327static void
328__kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
329 int propagate_icvs
330 USE_ITT_BUILD_ARG(void *itt_sync_obj) )
331{
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000332 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000333 register kmp_team_t *team;
334 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
335 register kmp_uint32 nproc;
336 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt];
337 register kmp_uint32 branch_factor = 1 << branch_bits;
338 register kmp_uint32 child;
339 register kmp_uint32 child_tid;
340
341 // Perform a tree release for all of the threads that have been gathered
342 if (!KMP_MASTER_TID(tid)) { // Handle fork barrier workers who aren't part of a team yet
343 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n",
344 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
345 // Wait for parent thread to release us
346 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
347 flag.wait(this_thr, TRUE
348 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000349 ANNOTATE_BARRIER_END(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000350#if USE_ITT_BUILD && USE_ITT_NOTIFY
351 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
352 // In fork barrier where we could not get the object reliably (or ITTNOTIFY is disabled)
353 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
354 // Cancel wait on previous parallel region...
355 __kmp_itt_task_starting(itt_sync_obj);
356
357 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
358 return;
359
360 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
361 if (itt_sync_obj != NULL)
362 // Call prepare as early as possible for "new" barrier
363 __kmp_itt_task_finished(itt_sync_obj);
364 } else
365#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
366 // Early exit for reaping threads releasing forkjoin barrier
367 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
368 return;
369
370 // The worker thread may now assume that the team is valid.
371 team = __kmp_threads[gtid]->th.th_team;
372 KMP_DEBUG_ASSERT(team != NULL);
373 tid = __kmp_tid_from_gtid(gtid);
374
375 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
376 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
377 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
378 KMP_MB(); // Flush all pending memory write invalidates.
379 } else {
380 team = __kmp_threads[gtid]->th.th_team;
381 KMP_DEBUG_ASSERT(team != NULL);
382 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
383 gtid, team->t.t_id, tid, bt));
384 }
385 nproc = this_thr->th.th_team_nproc;
386 child_tid = (tid << branch_bits) + 1;
387
388 if (child_tid < nproc) {
389 register kmp_info_t **other_threads = team->t.t_threads;
390 child = 1;
391 // Parent threads release all their children
392 do {
393 register kmp_info_t *child_thr = other_threads[child_tid];
394 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
395#if KMP_CACHE_MANAGE
396 // Prefetch next thread's go count
397 if (child+1 <= branch_factor && child_tid+1 < nproc)
398 KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_go);
399#endif /* KMP_CACHE_MANAGE */
400
401#if KMP_BARRIER_ICV_PUSH
Jonathan Peyton45be4502015-08-11 21:36:41 +0000402 {
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000403 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
Jonathan Peyton45be4502015-08-11 21:36:41 +0000404 if (propagate_icvs) {
405 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
406 team, child_tid, FALSE);
407 copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs,
408 &team->t.t_implicit_task_taskdata[0].td_icvs);
409 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000410 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000411#endif // KMP_BARRIER_ICV_PUSH
412 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
413 "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
414 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
415 child_tid, &child_bar->b_go, child_bar->b_go,
416 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
417 // Release child from barrier
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000418 ANNOTATE_BARRIER_BEGIN(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000419 kmp_flag_64 flag(&child_bar->b_go, child_thr);
420 flag.release();
421 child++;
422 child_tid++;
423 }
424 while (child <= branch_factor && child_tid < nproc);
425 }
426 KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
427 gtid, team->t.t_id, tid, bt));
428}
429
430
431// Hyper Barrier
432static void
433__kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
434 void (*reduce)(void *, void *)
435 USE_ITT_BUILD_ARG(void *itt_sync_obj) )
436{
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000437 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000438 register kmp_team_t *team = this_thr->th.th_team;
439 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
440 register kmp_info_t **other_threads = team->t.t_threads;
Jonathan Peytond26e2132015-09-10 18:44:30 +0000441 register kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000442 register kmp_uint32 num_threads = this_thr->th.th_team_nproc;
443 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt];
444 register kmp_uint32 branch_factor = 1 << branch_bits;
445 register kmp_uint32 offset;
446 register kmp_uint32 level;
447
448 KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
449 gtid, team->t.t_id, tid, bt));
450
451 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
452
453#if USE_ITT_BUILD && USE_ITT_NOTIFY
454 // Barrier imbalance - save arrive time to the thread
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000455 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000456 this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp();
457 }
458#endif
459 /* Perform a hypercube-embedded tree gather to wait until all of the threads have
460 arrived, and reduce any required data as we go. */
461 kmp_flag_64 p_flag(&thr_bar->b_arrived);
462 for (level=0, offset=1; offset<num_threads; level+=branch_bits, offset<<=branch_bits)
463 {
464 register kmp_uint32 child;
465 register kmp_uint32 child_tid;
466
467 if (((tid >> level) & (branch_factor - 1)) != 0) {
468 register kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) -1);
469
470 KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
Jonathan Peytond26e2132015-09-10 18:44:30 +0000471 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000472 __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid,
473 &thr_bar->b_arrived, thr_bar->b_arrived,
474 thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP));
475 // Mark arrival to parent thread
476 /* After performing this write (in the last iteration of the enclosing for loop),
477 a worker thread may not assume that the team is valid any more - it could be
478 deallocated by the master thread at any time. */
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000479 ANNOTATE_BARRIER_BEGIN(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000480 p_flag.set_waiter(other_threads[parent_tid]);
Jonathan Peyton1bd61b42015-10-08 19:44:16 +0000481 p_flag.release();
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000482 break;
483 }
484
485 // Parent threads wait for children to arrive
486 if (new_state == KMP_BARRIER_UNUSED_STATE)
487 new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
488 for (child=1, child_tid=tid+(1 << level); child<branch_factor && child_tid<num_threads;
489 child++, child_tid+=(1 << level))
490 {
491 register kmp_info_t *child_thr = other_threads[child_tid];
492 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
493#if KMP_CACHE_MANAGE
494 register kmp_uint32 next_child_tid = child_tid + (1 << level);
495 // Prefetch next thread's arrived count
496 if (child+1 < branch_factor && next_child_tid < num_threads)
497 KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived);
498#endif /* KMP_CACHE_MANAGE */
499 KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) "
Jonathan Peytond26e2132015-09-10 18:44:30 +0000500 "arrived(%p) == %llu\n", gtid, team->t.t_id, tid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000501 __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid,
502 &child_bar->b_arrived, new_state));
503 // Wait for child to arrive
504 kmp_flag_64 c_flag(&child_bar->b_arrived, new_state);
505 c_flag.wait(this_thr, FALSE
506 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000507 ANNOTATE_BARRIER_END(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000508#if USE_ITT_BUILD && USE_ITT_NOTIFY
509 // Barrier imbalance - write min of the thread time and a child time to the thread.
Andrey Churbanov51aecb82015-05-06 19:22:36 +0000510 if (__kmp_forkjoin_frames_mode == 2) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000511 this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time,
512 child_thr->th.th_bar_min_time);
513 }
514#endif
515 if (reduce) {
516 KA_TRACE(100, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n",
517 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
518 team->t.t_id, child_tid));
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000519 ANNOTATE_REDUCE_AFTER(reduce);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000520 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000521 ANNOTATE_REDUCE_BEFORE(reduce);
522 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000523 }
524 }
525 }
526
527 if (KMP_MASTER_TID(tid)) {
528 // Need to update the team arrived pointer if we are the master thread
529 if (new_state == KMP_BARRIER_UNUSED_STATE)
530 team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP;
531 else
532 team->t.t_bar[bt].b_arrived = new_state;
Jonathan Peytond26e2132015-09-10 18:44:30 +0000533 KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000534 gtid, team->t.t_id, tid, team->t.t_id,
535 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
536 }
537 KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
538 gtid, team->t.t_id, tid, bt));
539}
540
541// The reverse versions seem to beat the forward versions overall
542#define KMP_REVERSE_HYPER_BAR
543static void
544__kmp_hyper_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
545 int propagate_icvs
546 USE_ITT_BUILD_ARG(void *itt_sync_obj) )
547{
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000548 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000549 register kmp_team_t *team;
550 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
551 register kmp_info_t **other_threads;
552 register kmp_uint32 num_threads;
553 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
554 register kmp_uint32 branch_factor = 1 << branch_bits;
555 register kmp_uint32 child;
556 register kmp_uint32 child_tid;
557 register kmp_uint32 offset;
558 register kmp_uint32 level;
559
560 /* Perform a hypercube-embedded tree release for all of the threads that have been gathered.
561 If KMP_REVERSE_HYPER_BAR is defined (default) the threads are released in the reverse
562 order of the corresponding gather, otherwise threads are released in the same order. */
563 if (KMP_MASTER_TID(tid)) { // master
564 team = __kmp_threads[gtid]->th.th_team;
565 KMP_DEBUG_ASSERT(team != NULL);
566 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
567 gtid, team->t.t_id, tid, bt));
568#if KMP_BARRIER_ICV_PUSH
569 if (propagate_icvs) { // master already has ICVs in final destination; copy
570 copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs);
571 }
572#endif
573 }
574 else { // Handle fork barrier workers who aren't part of a team yet
575 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
576 gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP));
577 // Wait for parent thread to release us
578 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
579 flag.wait(this_thr, TRUE
580 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000581 ANNOTATE_BARRIER_END(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000582#if USE_ITT_BUILD && USE_ITT_NOTIFY
583 if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) {
584 // In fork barrier where we could not get the object reliably
585 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1);
586 // Cancel wait on previous parallel region...
587 __kmp_itt_task_starting(itt_sync_obj);
588
589 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
590 return;
591
592 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
593 if (itt_sync_obj != NULL)
594 // Call prepare as early as possible for "new" barrier
595 __kmp_itt_task_finished(itt_sync_obj);
596 } else
597#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
598 // Early exit for reaping threads releasing forkjoin barrier
599 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
600 return;
601
602 // The worker thread may now assume that the team is valid.
603 team = __kmp_threads[gtid]->th.th_team;
604 KMP_DEBUG_ASSERT(team != NULL);
605 tid = __kmp_tid_from_gtid(gtid);
606
607 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
608 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
609 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
610 KMP_MB(); // Flush all pending memory write invalidates.
611 }
612 num_threads = this_thr->th.th_team_nproc;
613 other_threads = team->t.t_threads;
614
615#ifdef KMP_REVERSE_HYPER_BAR
616 // Count up to correct level for parent
617 for (level=0, offset=1; offset<num_threads && (((tid>>level) & (branch_factor-1)) == 0);
618 level+=branch_bits, offset<<=branch_bits);
619
620 // Now go down from there
621 for (level-=branch_bits, offset>>=branch_bits; offset != 0;
622 level-=branch_bits, offset>>=branch_bits)
623#else
624 // Go down the tree, level by level
625 for (level=0, offset=1; offset<num_threads; level+=branch_bits, offset<<=branch_bits)
626#endif // KMP_REVERSE_HYPER_BAR
627 {
628#ifdef KMP_REVERSE_HYPER_BAR
629 /* Now go in reverse order through the children, highest to lowest.
630 Initial setting of child is conservative here. */
631 child = num_threads >> ((level==0)?level:level-1);
632 for (child=(child<branch_factor-1) ? child : branch_factor-1, child_tid=tid+(child<<level);
633 child>=1; child--, child_tid-=(1<<level))
634#else
635 if (((tid >> level) & (branch_factor - 1)) != 0)
636 // No need to go lower than this, since this is the level parent would be notified
637 break;
638 // Iterate through children on this level of the tree
639 for (child=1, child_tid=tid+(1<<level); child<branch_factor && child_tid<num_threads;
640 child++, child_tid+=(1<<level))
641#endif // KMP_REVERSE_HYPER_BAR
642 {
643 if (child_tid >= num_threads) continue; // Child doesn't exist so keep going
644 else {
645 register kmp_info_t *child_thr = other_threads[child_tid];
646 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
647#if KMP_CACHE_MANAGE
648 register kmp_uint32 next_child_tid = child_tid - (1 << level);
649 // Prefetch next thread's go count
650# ifdef KMP_REVERSE_HYPER_BAR
651 if (child-1 >= 1 && next_child_tid < num_threads)
652# else
653 if (child+1 < branch_factor && next_child_tid < num_threads)
654# endif // KMP_REVERSE_HYPER_BAR
655 KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_go);
656#endif /* KMP_CACHE_MANAGE */
657
658#if KMP_BARRIER_ICV_PUSH
659 if (propagate_icvs) // push my fixed ICVs to my child
660 copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
661#endif // KMP_BARRIER_ICV_PUSH
662
663 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)"
664 "go(%p): %u => %u\n", gtid, team->t.t_id, tid,
665 __kmp_gtid_from_tid(child_tid, team), team->t.t_id,
666 child_tid, &child_bar->b_go, child_bar->b_go,
667 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
668 // Release child from barrier
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000669 ANNOTATE_BARRIER_BEGIN(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000670 kmp_flag_64 flag(&child_bar->b_go, child_thr);
671 flag.release();
672 }
673 }
674 }
675#if KMP_BARRIER_ICV_PUSH
676 if (propagate_icvs && !KMP_MASTER_TID(tid)) { // copy ICVs locally to final dest
677 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
678 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs);
679 }
680#endif
681 KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
682 gtid, team->t.t_id, tid, bt));
683}
684
685// Hierarchical Barrier
686
687// Initialize thread barrier data
688/* Initializes/re-initializes the hierarchical barrier data stored on a thread. Performs the
689 minimum amount of initialization required based on how the team has changed. Returns true if
690 leaf children will require both on-core and traditional wake-up mechanisms. For example, if the
691 team size increases, threads already in the team will respond to on-core wakeup on their parent
692 thread, but threads newly added to the team will only be listening on the their local b_go. */
693static bool
694__kmp_init_hierarchical_barrier_thread(enum barrier_type bt, kmp_bstate_t *thr_bar, kmp_uint32 nproc,
695 int gtid, int tid, kmp_team_t *team)
696{
697 // Checks to determine if (re-)initialization is needed
698 bool uninitialized = thr_bar->team == NULL;
699 bool team_changed = team != thr_bar->team;
700 bool team_sz_changed = nproc != thr_bar->nproc;
701 bool tid_changed = tid != thr_bar->old_tid;
702 bool retval = false;
703
704 if (uninitialized || team_sz_changed) {
705 __kmp_get_hierarchy(nproc, thr_bar);
706 }
707
708 if (uninitialized || team_sz_changed || tid_changed) {
709 thr_bar->my_level = thr_bar->depth-1; // default for master
710 thr_bar->parent_tid = -1; // default for master
711 if (!KMP_MASTER_TID(tid)) { // if not master, find parent thread in hierarchy
712 kmp_uint32 d=0;
713 while (d<thr_bar->depth) { // find parent based on level of thread in hierarchy, and note level
714 kmp_uint32 rem;
715 if (d == thr_bar->depth-2) { // reached level right below the master
716 thr_bar->parent_tid = 0;
717 thr_bar->my_level = d;
718 break;
719 }
720 else if ((rem = tid%thr_bar->skip_per_level[d+1]) != 0) { // TODO: can we make this op faster?
721 // thread is not a subtree root at next level, so this is max
722 thr_bar->parent_tid = tid - rem;
723 thr_bar->my_level = d;
724 break;
725 }
726 ++d;
727 }
728 }
729 thr_bar->offset = 7-(tid-thr_bar->parent_tid-1);
730 thr_bar->old_tid = tid;
731 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
Jonathan Peytonb0b83c82015-11-09 16:28:32 +0000732 thr_bar->team = team;
733 thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000734 }
735 if (uninitialized || team_changed || tid_changed) {
736 thr_bar->team = team;
737 thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb;
738 retval = true;
739 }
740 if (uninitialized || team_sz_changed || tid_changed) {
741 thr_bar->nproc = nproc;
742 thr_bar->leaf_kids = thr_bar->base_leaf_kids;
743 if (thr_bar->my_level == 0) thr_bar->leaf_kids=0;
744 if (thr_bar->leaf_kids && (kmp_uint32)tid+thr_bar->leaf_kids+1 > nproc)
745 thr_bar->leaf_kids = nproc - tid - 1;
746 thr_bar->leaf_state = 0;
747 for (int i=0; i<thr_bar->leaf_kids; ++i) ((char *)&(thr_bar->leaf_state))[7-i] = 1;
748 }
749 return retval;
750}
751
752static void
753__kmp_hierarchical_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr,
754 int gtid, int tid, void (*reduce) (void *, void *)
755 USE_ITT_BUILD_ARG(void * itt_sync_obj) )
756{
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000757 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000758 register kmp_team_t *team = this_thr->th.th_team;
759 register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
760 register kmp_uint32 nproc = this_thr->th.th_team_nproc;
761 register kmp_info_t **other_threads = team->t.t_threads;
762 register kmp_uint64 new_state;
763
Andrey Churbanov42a79212015-01-27 16:50:31 +0000764 int level = team->t.t_level;
Jonathan Peyton441f3372015-09-21 17:24:46 +0000765#if OMP_40_ENABLED
Andrey Churbanov42a79212015-01-27 16:50:31 +0000766 if (other_threads[0]->th.th_teams_microtask) // are we inside the teams construct?
767 if (this_thr->th.th_teams_size.nteams > 1)
768 ++level; // level was not increased in teams construct for team_of_masters
Jonathan Peyton441f3372015-09-21 17:24:46 +0000769#endif
Andrey Churbanov42a79212015-01-27 16:50:31 +0000770 if (level == 1) thr_bar->use_oncore_barrier = 1;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000771 else thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
772
773 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
774 gtid, team->t.t_id, tid, bt));
775 KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]);
776
Andrey Churbanove6bfb732015-05-06 18:34:15 +0000777#if USE_ITT_BUILD && USE_ITT_NOTIFY
778 // Barrier imbalance - save arrive time to the thread
779 if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) {
780 this_thr->th.th_bar_arrive_time = __itt_get_timestamp();
781 }
782#endif
783
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000784 (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team);
785
786 if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf)
787 register kmp_int32 child_tid;
788 new_state = (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
789 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) {
790 if (thr_bar->leaf_kids) { // First, wait for leaf children to check-in on my b_arrived flag
Jonathan Peytond26e2132015-09-10 18:44:30 +0000791 kmp_uint64 leaf_state = KMP_MASTER_TID(tid) ? thr_bar->b_arrived | thr_bar->leaf_state : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state;
Jonathan Peyton90862c42015-11-12 21:40:39 +0000792 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting for leaf kids\n",
793 gtid, team->t.t_id, tid));
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000794 kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state);
795 flag.wait(this_thr, FALSE
796 USE_ITT_BUILD_ARG(itt_sync_obj) );
797 if (reduce) {
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000798 ANNOTATE_REDUCE_AFTER(reduce);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000799 for (child_tid=tid+1; child_tid<=tid+thr_bar->leaf_kids; ++child_tid) {
800 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
801 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
802 team->t.t_id, child_tid));
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000803 ANNOTATE_BARRIER_END(other_threads[child_tid]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000804 (*reduce)(this_thr->th.th_local.reduce_data, other_threads[child_tid]->th.th_local.reduce_data);
805 }
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000806 ANNOTATE_REDUCE_BEFORE(reduce);
807 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000808 }
809 (void) KMP_TEST_THEN_AND64((volatile kmp_int64 *)&thr_bar->b_arrived, ~(thr_bar->leaf_state)); // clear leaf_state bits
810 }
811 // Next, wait for higher level children on each child's b_arrived flag
812 for (kmp_uint32 d=1; d<thr_bar->my_level; ++d) { // gather lowest level threads first, but skip 0
813 kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d];
814 if (last > nproc) last = nproc;
815 for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
816 register kmp_info_t *child_thr = other_threads[child_tid];
817 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
818 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
Jonathan Peytond26e2132015-09-10 18:44:30 +0000819 "arrived(%p) == %llu\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000820 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
821 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
822 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
823 flag.wait(this_thr, FALSE
824 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000825 ANNOTATE_BARRIER_END(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000826 if (reduce) {
827 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
828 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
829 team->t.t_id, child_tid));
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000830 ANNOTATE_REDUCE_AFTER(reduce);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000831 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000832 ANNOTATE_REDUCE_BEFORE(reduce);
833 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000834 }
835 }
836 }
837 }
838 else { // Blocktime is not infinite
839 for (kmp_uint32 d=0; d<thr_bar->my_level; ++d) { // Gather lowest level threads first
840 kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d];
841 if (last > nproc) last = nproc;
842 for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
843 register kmp_info_t *child_thr = other_threads[child_tid];
844 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
845 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
Jonathan Peytond26e2132015-09-10 18:44:30 +0000846 "arrived(%p) == %llu\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000847 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
848 team->t.t_id, child_tid, &child_bar->b_arrived, new_state));
849 kmp_flag_64 flag(&child_bar->b_arrived, new_state);
850 flag.wait(this_thr, FALSE
851 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000852 ANNOTATE_BARRIER_END(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000853 if (reduce) {
854 KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
855 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
856 team->t.t_id, child_tid));
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000857 ANNOTATE_REDUCE_AFTER(reduce);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000858 (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data);
Jonas Hahnfeld50fed042016-11-07 15:58:36 +0000859 ANNOTATE_REDUCE_BEFORE(reduce);
860 ANNOTATE_REDUCE_BEFORE(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000861 }
862 }
863 }
864 }
865 }
866 // All subordinates are gathered; now release parent if not master thread
867
868 if (!KMP_MASTER_TID(tid)) { // worker threads release parent in hierarchy
869 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
Jonathan Peytond26e2132015-09-10 18:44:30 +0000870 "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid,
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000871 __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id, thr_bar->parent_tid,
872 &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived+KMP_BARRIER_STATE_BUMP));
873 /* Mark arrival to parent: After performing this write, a worker thread may not assume that
874 the team is valid any more - it could be deallocated by the master thread at any time. */
875 if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
876 || !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived flag; release it
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000877 ANNOTATE_BARRIER_BEGIN(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000878 kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]);
879 flag.release();
880 }
881 else { // Leaf does special release on the "offset" bits of parent's b_arrived flag
Jonathan Peytond26e2132015-09-10 18:44:30 +0000882 thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP;
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000883 kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset);
884 flag.set_waiter(other_threads[thr_bar->parent_tid]);
885 flag.release();
886 }
887 } else { // Master thread needs to update the team's b_arrived value
Jonathan Peytond26e2132015-09-10 18:44:30 +0000888 team->t.t_bar[bt].b_arrived = new_state;
889 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n",
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000890 gtid, team->t.t_id, tid, team->t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived));
891 }
892 // Is the team access below unsafe or just technically invalid?
893 KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
894 gtid, team->t.t_id, tid, bt));
895}
896
897static void
898__kmp_hierarchical_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid,
899 int propagate_icvs
900 USE_ITT_BUILD_ARG(void * itt_sync_obj) )
901{
Jonathan Peyton5375fe82016-11-14 21:13:44 +0000902 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000903 register kmp_team_t *team;
904 register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
905 register kmp_uint32 nproc;
906 bool team_change = false; // indicates on-core barrier shouldn't be used
907
908 if (KMP_MASTER_TID(tid)) {
909 team = __kmp_threads[gtid]->th.th_team;
910 KMP_DEBUG_ASSERT(team != NULL);
911 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) master entered barrier type %d\n",
912 gtid, team->t.t_id, tid, bt));
913 }
914 else { // Worker threads
915 // Wait for parent thread to release me
916 if (!thr_bar->use_oncore_barrier || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME
917 || thr_bar->my_level != 0 || thr_bar->team == NULL) {
918 // Use traditional method of waiting on my own b_go flag
919 thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG;
920 kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP);
921 flag.wait(this_thr, TRUE
922 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonas Hahnfeld35801a22017-02-15 08:14:22 +0000923 ANNOTATE_BARRIER_END(this_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000924 TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
925 }
926 else { // Thread barrier data is initialized, this is a leaf, blocktime is infinite, not nested
927 // Wait on my "offset" bits on parent's b_go flag
928 thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG;
929 kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP, thr_bar->offset,
930 bt, this_thr
931 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jonathan Peytona0e159f2015-10-08 18:23:38 +0000932 flag.wait(this_thr, TRUE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000933 if (thr_bar->wait_flag == KMP_BARRIER_SWITCHING) { // Thread was switched to own b_go
934 TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
935 }
936 else { // Reset my bits on parent's b_go flag
937 ((char*)&(thr_bar->parent_bar->b_go))[thr_bar->offset] = 0;
938 }
939 }
940 thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING;
941 // Early exit for reaping threads releasing forkjoin barrier
942 if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done))
943 return;
944 // The worker thread may now assume that the team is valid.
945 team = __kmp_threads[gtid]->th.th_team;
946 KMP_DEBUG_ASSERT(team != NULL);
947 tid = __kmp_tid_from_gtid(gtid);
948
949 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
950 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE));
951 KMP_MB(); // Flush all pending memory write invalidates.
952 }
953
Jonathan Peytona0e159f2015-10-08 18:23:38 +0000954 nproc = this_thr->th.th_team_nproc;
Andrey Churbanov42a79212015-01-27 16:50:31 +0000955 int level = team->t.t_level;
Jonathan Peyton441f3372015-09-21 17:24:46 +0000956#if OMP_40_ENABLED
Andrey Churbanov42a79212015-01-27 16:50:31 +0000957 if (team->t.t_threads[0]->th.th_teams_microtask ) { // are we inside the teams construct?
958 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && this_thr->th.th_teams_level == level)
959 ++level; // level was not increased in teams construct for team_of_workers
960 if( this_thr->th.th_teams_size.nteams > 1 )
961 ++level; // level was not increased in teams construct for team_of_masters
962 }
Jonathan Peyton441f3372015-09-21 17:24:46 +0000963#endif
Andrey Churbanov42a79212015-01-27 16:50:31 +0000964 if (level == 1) thr_bar->use_oncore_barrier = 1;
965 else thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000966
967 // If the team size has increased, we still communicate with old leaves via oncore barrier.
968 unsigned short int old_leaf_kids = thr_bar->leaf_kids;
969 kmp_uint64 old_leaf_state = thr_bar->leaf_state;
970 team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team);
971 // But if the entire team changes, we won't use oncore barrier at all
972 if (team_change) old_leaf_kids = 0;
973
974#if KMP_BARRIER_ICV_PUSH
975 if (propagate_icvs) {
Jonathan Peyton2211cfe2015-08-12 20:59:48 +0000976 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
Jim Cownie4cc4bb42014-10-07 16:25:50 +0000977 if (KMP_MASTER_TID(tid)) { // master already has copy in final destination; copy
978 copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs);
979 }
980 else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) { // optimization for inf blocktime
981 if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0)
982 // leaves (on-core children) pull parent's fixed ICVs directly to local ICV store
983 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
984 &thr_bar->parent_bar->th_fixed_icvs);
985 // non-leaves will get ICVs piggybacked with b_go via NGO store
986 }
987 else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs
988 if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can access
989 copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs);
990 else // leaves copy parent's fixed ICVs directly to local ICV store
991 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
992 &thr_bar->parent_bar->th_fixed_icvs);
993 }
994 }
995#endif // KMP_BARRIER_ICV_PUSH
996
997 // Now, release my children
998 if (thr_bar->my_level) { // not a leaf
999 register kmp_int32 child_tid;
1000 kmp_uint32 last;
1001 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) {
1002 if (KMP_MASTER_TID(tid)) { // do a flat release
1003 // Set local b_go to bump children via NGO store of the cache line containing IVCs and b_go.
1004 thr_bar->b_go = KMP_BARRIER_STATE_BUMP;
1005 // Use ngo stores if available; b_go piggybacks in the last 8 bytes of the cache line
1006 ngo_load(&thr_bar->th_fixed_icvs);
1007 // This loops over all the threads skipping only the leaf nodes in the hierarchy
1008 for (child_tid=thr_bar->skip_per_level[1]; child_tid<(int)nproc; child_tid+=thr_bar->skip_per_level[1]) {
1009 register kmp_bstate_t *child_bar = &team->t.t_threads[child_tid]->th.th_bar[bt].bb;
1010 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)"
1011 " go(%p): %u => %u\n",
1012 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1013 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1014 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1015 // Use ngo store (if available) to both store ICVs and release child via child's b_go
1016 ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs);
1017 }
1018 ngo_sync();
1019 }
1020 TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time
1021 // Now, release leaf children
1022 if (thr_bar->leaf_kids) { // if there are any
1023 // We test team_change on the off-chance that the level 1 team changed.
1024 if (team_change || old_leaf_kids < thr_bar->leaf_kids) { // some old leaf_kids, some new
1025 if (old_leaf_kids) { // release old leaf kids
1026 thr_bar->b_go |= old_leaf_state;
1027 }
1028 // Release new leaf kids
1029 last = tid+thr_bar->skip_per_level[1];
1030 if (last > nproc) last = nproc;
1031 for (child_tid=tid+1+old_leaf_kids; child_tid<(int)last; ++child_tid) { // skip_per_level[0]=1
1032 register kmp_info_t *child_thr = team->t.t_threads[child_tid];
1033 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1034 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing"
1035 " T#%d(%d:%d) go(%p): %u => %u\n",
1036 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1037 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1038 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1039 // Release child using child's b_go flag
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001040 ANNOTATE_BARRIER_BEGIN(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001041 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1042 flag.release();
1043 }
1044 }
1045 else { // Release all children at once with leaf_state bits on my own b_go flag
1046 thr_bar->b_go |= thr_bar->leaf_state;
1047 }
1048 }
1049 }
1050 else { // Blocktime is not infinite; do a simple hierarchical release
1051 for (int d=thr_bar->my_level-1; d>=0; --d) { // Release highest level threads first
1052 last = tid+thr_bar->skip_per_level[d+1];
1053 kmp_uint32 skip = thr_bar->skip_per_level[d];
1054 if (last > nproc) last = nproc;
1055 for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) {
1056 register kmp_info_t *child_thr = team->t.t_threads[child_tid];
1057 register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb;
1058 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)"
1059 " go(%p): %u => %u\n",
1060 gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team),
1061 team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go,
1062 child_bar->b_go + KMP_BARRIER_STATE_BUMP));
1063 // Release child using child's b_go flag
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001064 ANNOTATE_BARRIER_BEGIN(child_thr);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001065 kmp_flag_64 flag(&child_bar->b_go, child_thr);
1066 flag.release();
1067 }
1068 }
1069 }
1070#if KMP_BARRIER_ICV_PUSH
1071 if (propagate_icvs && !KMP_MASTER_TID(tid)) // non-leaves copy ICVs from fixed ICVs to local dest
1072 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs);
1073#endif // KMP_BARRIER_ICV_PUSH
1074 }
1075 KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1076 gtid, team->t.t_id, tid, bt));
1077}
1078
1079// ---------------------------- End of Barrier Algorithms ----------------------------
1080
1081// Internal function to do a barrier.
1082/* If is_split is true, do a split barrier, otherwise, do a plain barrier
1083 If reduce is non-NULL, do a split reduction barrier, otherwise, do a split barrier
1084 Returns 0 if master thread, 1 if worker thread. */
1085int
1086__kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
1087 void *reduce_data, void (*reduce)(void *, void *))
1088{
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001089 KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001090 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001091 register int tid = __kmp_tid_from_gtid(gtid);
1092 register kmp_info_t *this_thr = __kmp_threads[gtid];
1093 register kmp_team_t *team = this_thr->th.th_team;
1094 register int status = 0;
1095 ident_t *loc = __kmp_threads[gtid]->th.th_ident;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001096#if OMPT_SUPPORT
1097 ompt_task_id_t my_task_id;
1098 ompt_parallel_id_t my_parallel_id;
1099#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001100
1101 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n",
1102 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1103
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001104 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001105#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001106 if (ompt_enabled) {
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001107#if OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001108 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
1109 my_parallel_id = team->t.ompt_team_info.parallel_id;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001110
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001111#if OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001112 if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) {
1113 if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) {
1114 ompt_callbacks.ompt_callback(ompt_event_single_others_end)(
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001115 my_parallel_id, my_task_id);
1116 }
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001117 }
1118#endif
1119 if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
1120 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
1121 my_parallel_id, my_task_id);
1122 }
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001123#endif
1124 // It is OK to report the barrier state after the barrier begin callback.
1125 // According to the OMPT specification, a compliant implementation may
1126 // even delay reporting this state until the barrier begins to wait.
1127 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001128 }
1129#endif
1130
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001131 if (! team->t.t_serialized) {
1132#if USE_ITT_BUILD
1133 // This value will be used in itt notify events below.
1134 void *itt_sync_obj = NULL;
1135# if USE_ITT_NOTIFY
1136 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1137 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1138# endif
1139#endif /* USE_ITT_BUILD */
1140 if (__kmp_tasking_mode == tskm_extra_barrier) {
1141 __kmp_tasking_barrier(team, this_thr, gtid);
1142 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n",
1143 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid)));
1144 }
1145
1146 /* Copy the blocktime info to the thread, where __kmp_wait_template() can access it when
1147 the team struct is not guaranteed to exist. */
1148 // See note about the corresponding code in __kmp_join_barrier() being performance-critical.
1149 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
Jonathan Peytone1c7c132016-10-07 18:12:19 +00001150#if KMP_USE_MONITOR
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001151 this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1152 this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
Jonathan Peyton2208a8512017-01-27 17:54:31 +00001153#else
1154 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL();
1155#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001156 }
1157
1158#if USE_ITT_BUILD
1159 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1160 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1161#endif /* USE_ITT_BUILD */
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001162#if USE_DEBUGGER
1163 // Let the debugger know: the thread arrived to the barrier and waiting.
1164 if (KMP_MASTER_TID(tid)) { // Master counter is stored in team structure.
1165 team->t.t_bar[bt].b_master_arrived += 1;
1166 } else {
1167 this_thr->th.th_bar[bt].bb.b_worker_arrived += 1;
1168 } // if
1169#endif /* USE_DEBUGGER */
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001170 if (reduce != NULL) {
1171 //KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956
1172 this_thr->th.th_local.reduce_data = reduce_data;
1173 }
Jonathan Peytonb0b83c82015-11-09 16:28:32 +00001174
1175 if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec)
1176 __kmp_task_team_setup(this_thr, team, 0); // use 0 to only setup the current team if nthreads > 1
1177
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001178 switch (__kmp_barrier_gather_pattern[bt]) {
1179 case bp_hyper_bar: {
1180 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits to 0; use linear
1181 __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid, reduce
1182 USE_ITT_BUILD_ARG(itt_sync_obj) );
1183 break;
1184 }
1185 case bp_hierarchical_bar: {
1186 __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid, reduce
1187 USE_ITT_BUILD_ARG(itt_sync_obj));
1188 break;
1189 }
1190 case bp_tree_bar: {
1191 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits to 0; use linear
1192 __kmp_tree_barrier_gather(bt, this_thr, gtid, tid, reduce
1193 USE_ITT_BUILD_ARG(itt_sync_obj) );
1194 break;
1195 }
1196 default: {
1197 __kmp_linear_barrier_gather(bt, this_thr, gtid, tid, reduce
1198 USE_ITT_BUILD_ARG(itt_sync_obj) );
1199 }
1200 }
1201
1202 KMP_MB();
1203
1204 if (KMP_MASTER_TID(tid)) {
1205 status = 0;
1206 if (__kmp_tasking_mode != tskm_immediate_exec) {
1207 __kmp_task_team_wait(this_thr, team
1208 USE_ITT_BUILD_ARG(itt_sync_obj) );
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001209 }
Jonathan Peyton8fbb49a2015-07-09 18:16:58 +00001210#if USE_DEBUGGER
1211 // Let the debugger know: All threads are arrived and starting leaving the barrier.
1212 team->t.t_bar[bt].b_team_arrived += 1;
1213#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001214
1215#if USE_ITT_BUILD
1216 /* TODO: In case of split reduction barrier, master thread may send acquired event early,
1217 before the final summation into the shared variable is done (final summation can be a
1218 long operation for array reductions). */
1219 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1220 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1221#endif /* USE_ITT_BUILD */
1222#if USE_ITT_BUILD && USE_ITT_NOTIFY
Andrey Churbanov51aecb82015-05-06 19:22:36 +00001223 // Barrier - report frame end (only if active_level == 1)
1224 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode &&
1225#if OMP_40_ENABLED
1226 this_thr->th.th_teams_microtask == NULL &&
1227#endif
1228 team->t.t_active_level == 1)
1229 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001230 kmp_uint64 cur_time = __itt_get_timestamp();
Andrey Churbanov51aecb82015-05-06 19:22:36 +00001231 kmp_info_t **other_threads = team->t.t_threads;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001232 int nproc = this_thr->th.th_team_nproc;
1233 int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001234 switch(__kmp_forkjoin_frames_mode) {
1235 case 1:
1236 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1237 this_thr->th.th_frame_time = cur_time;
1238 break;
Andrey Churbanov51aecb82015-05-06 19:22:36 +00001239 case 2: // AC 2015-01-19: currently does not work for hierarchical (to be fixed)
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001240 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc);
1241 break;
1242 case 3:
1243 if( __itt_metadata_add_ptr ) {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00001244 // Initialize with master's wait time
1245 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001246 // Set arrive time to zero to be able to check it in __kmp_invoke_task(); the same is done inside the loop below
1247 this_thr->th.th_bar_arrive_time = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001248 for (i=1; i<nproc; ++i) {
1249 delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001250 other_threads[i]->th.th_bar_arrive_time = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001251 }
1252 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, (kmp_uint64)( reduce != NULL));
1253 }
1254 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1255 this_thr->th.th_frame_time = cur_time;
1256 break;
1257 }
1258 }
1259#endif /* USE_ITT_BUILD */
1260 } else {
1261 status = 1;
1262#if USE_ITT_BUILD
1263 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1264 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1265#endif /* USE_ITT_BUILD */
1266 }
1267 if (status == 1 || ! is_split) {
1268 switch (__kmp_barrier_release_pattern[bt]) {
1269 case bp_hyper_bar: {
1270 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1271 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE
1272 USE_ITT_BUILD_ARG(itt_sync_obj) );
1273 break;
1274 }
1275 case bp_hierarchical_bar: {
1276 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE
1277 USE_ITT_BUILD_ARG(itt_sync_obj) );
1278 break;
1279 }
1280 case bp_tree_bar: {
1281 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1282 __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE
1283 USE_ITT_BUILD_ARG(itt_sync_obj) );
1284 break;
1285 }
1286 default: {
1287 __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE
1288 USE_ITT_BUILD_ARG(itt_sync_obj) );
1289 }
1290 }
1291 if (__kmp_tasking_mode != tskm_immediate_exec) {
1292 __kmp_task_team_sync(this_thr, team);
1293 }
1294 }
1295
1296#if USE_ITT_BUILD
1297 /* GEH: TODO: Move this under if-condition above and also include in
1298 __kmp_end_split_barrier(). This will more accurately represent the actual release time
1299 of the threads for split barriers. */
1300 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1301 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1302#endif /* USE_ITT_BUILD */
1303 } else { // Team is serialized.
1304 status = 0;
1305 if (__kmp_tasking_mode != tskm_immediate_exec) {
Jonathan Peytondf6818b2016-06-14 17:57:47 +00001306#if OMP_45_ENABLED
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001307 if ( this_thr->th.th_task_team != NULL ) {
1308 void *itt_sync_obj = NULL;
1309#if USE_ITT_NOTIFY
1310 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1311 itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1);
1312 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1313 }
1314#endif
1315
Jonathan Peytonfe9a1d72015-08-26 19:58:48 +00001316 KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001317 __kmp_task_team_wait(this_thr, team
1318 USE_ITT_BUILD_ARG(itt_sync_obj));
Jonathan Peyton54127982015-11-04 21:37:48 +00001319 __kmp_task_team_setup(this_thr, team, 0);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001320
1321#if USE_ITT_BUILD
1322 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1323 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1324#endif /* USE_ITT_BUILD */
1325 }
1326#else
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001327 // The task team should be NULL for serialized code (tasks will be executed immediately)
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001328 KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001329 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL);
Andrey Churbanov535b6fa2015-05-07 17:41:51 +00001330#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001331 }
1332 }
1333 KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
1334 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid), status));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001335
1336#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001337 if (ompt_enabled) {
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001338#if OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001339 if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001340 ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
1341 my_parallel_id, my_task_id);
1342 }
1343#endif
1344 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1345 }
1346#endif
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001347 ANNOTATE_BARRIER_END(&team->t.t_bar);
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001348
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001349 return status;
1350}
1351
1352
1353void
1354__kmp_end_split_barrier(enum barrier_type bt, int gtid)
1355{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001356 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier);
1357 KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001358 int tid = __kmp_tid_from_gtid(gtid);
1359 kmp_info_t *this_thr = __kmp_threads[gtid];
1360 kmp_team_t *team = this_thr->th.th_team;
1361
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001362 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001363 if (!team->t.t_serialized) {
1364 if (KMP_MASTER_GTID(gtid)) {
1365 switch (__kmp_barrier_release_pattern[bt]) {
1366 case bp_hyper_bar: {
1367 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1368 __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE
1369 USE_ITT_BUILD_ARG(NULL) );
1370 break;
1371 }
1372 case bp_hierarchical_bar: {
1373 __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE
1374 USE_ITT_BUILD_ARG(NULL));
1375 break;
1376 }
1377 case bp_tree_bar: {
1378 KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]);
1379 __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE
1380 USE_ITT_BUILD_ARG(NULL) );
1381 break;
1382 }
1383 default: {
1384 __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE
1385 USE_ITT_BUILD_ARG(NULL) );
1386 }
1387 }
1388 if (__kmp_tasking_mode != tskm_immediate_exec) {
1389 __kmp_task_team_sync(this_thr, team);
1390 } // if
1391 }
1392 }
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001393 ANNOTATE_BARRIER_END(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001394}
1395
1396
1397void
1398__kmp_join_barrier(int gtid)
1399{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001400 KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001401 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001402 register kmp_info_t *this_thr = __kmp_threads[gtid];
1403 register kmp_team_t *team;
1404 register kmp_uint nproc;
1405 kmp_info_t *master_thread;
1406 int tid;
1407#ifdef KMP_DEBUG
1408 int team_id;
1409#endif /* KMP_DEBUG */
1410#if USE_ITT_BUILD
1411 void *itt_sync_obj = NULL;
1412# if USE_ITT_NOTIFY
1413 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) // Don't call routine without need
1414 // Get object created at fork_barrier
1415 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1416# endif
1417#endif /* USE_ITT_BUILD */
1418 KMP_MB();
1419
1420 // Get current info
1421 team = this_thr->th.th_team;
1422 nproc = this_thr->th.th_team_nproc;
1423 KMP_DEBUG_ASSERT((int)nproc == team->t.t_nproc);
1424 tid = __kmp_tid_from_gtid(gtid);
1425#ifdef KMP_DEBUG
1426 team_id = team->t.t_id;
1427#endif /* KMP_DEBUG */
1428 master_thread = this_thr->th.th_team_master;
1429#ifdef KMP_DEBUG
1430 if (master_thread != team->t.t_threads[0]) {
1431 __kmp_print_structure();
1432 }
1433#endif /* KMP_DEBUG */
1434 KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]);
1435 KMP_MB();
1436
1437 // Verify state
1438 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
1439 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team));
1440 KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root));
1441 KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]);
1442 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n", gtid, team_id, tid));
1443
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001444 ANNOTATE_BARRIER_BEGIN(&team->t.t_bar);
Jonathan Peyton61118492016-05-20 19:03:38 +00001445#if OMPT_SUPPORT
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001446#if OMPT_TRACE
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001447 if (ompt_enabled &&
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001448 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
1449 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(
1450 team->t.ompt_team_info.parallel_id,
1451 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
1452 }
Jonathan Peyton117a94f2015-06-29 17:28:57 +00001453#endif
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001454 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
1455#endif
1456
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001457 if (__kmp_tasking_mode == tskm_extra_barrier) {
1458 __kmp_tasking_barrier(team, this_thr, gtid);
1459 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid, team_id, tid));
1460 }
1461# ifdef KMP_DEBUG
1462 if (__kmp_tasking_mode != tskm_immediate_exec) {
1463 KA_TRACE(20, ( "__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n",
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001464 __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state],
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001465 this_thr->th.th_task_team));
Andrey Churbanov6d224db2015-02-10 18:37:43 +00001466 KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001467 }
1468# endif /* KMP_DEBUG */
1469
1470 /* Copy the blocktime info to the thread, where __kmp_wait_template() can access it when the
1471 team struct is not guaranteed to exist. Doing these loads causes a cache miss slows
1472 down EPCC parallel by 2x. As a workaround, we do not perform the copy if blocktime=infinite,
1473 since the values are not used by __kmp_wait_template() in that case. */
1474 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
Jonathan Peytone1c7c132016-10-07 18:12:19 +00001475#if KMP_USE_MONITOR
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001476 this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1477 this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
Jonathan Peyton2208a8512017-01-27 17:54:31 +00001478#else
1479 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL();
1480#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001481 }
1482
1483#if USE_ITT_BUILD
1484 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1485 __kmp_itt_barrier_starting(gtid, itt_sync_obj);
1486#endif /* USE_ITT_BUILD */
1487
1488 switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) {
1489 case bp_hyper_bar: {
1490 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1491 __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1492 USE_ITT_BUILD_ARG(itt_sync_obj) );
1493 break;
1494 }
1495 case bp_hierarchical_bar: {
1496 __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1497 USE_ITT_BUILD_ARG(itt_sync_obj) );
1498 break;
1499 }
1500 case bp_tree_bar: {
1501 KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]);
1502 __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1503 USE_ITT_BUILD_ARG(itt_sync_obj) );
1504 break;
1505 }
1506 default: {
1507 __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL
1508 USE_ITT_BUILD_ARG(itt_sync_obj) );
1509 }
1510 }
1511
1512 /* From this point on, the team data structure may be deallocated at any time by the
1513 master thread - it is unsafe to reference it in any of the worker threads. Any per-team
1514 data items that need to be referenced before the end of the barrier should be moved to
1515 the kmp_task_team_t structs. */
1516 if (KMP_MASTER_TID(tid)) {
1517 if (__kmp_tasking_mode != tskm_immediate_exec) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001518 __kmp_task_team_wait(this_thr, team
1519 USE_ITT_BUILD_ARG(itt_sync_obj) );
1520 }
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001521#if KMP_STATS_ENABLED
1522 // Have master thread flag the workers to indicate they are now waiting for
1523 // next parallel region, Also wake them up so they switch their timers to idle.
1524 for (int i=0; i<team->t.t_nproc; ++i) {
1525 kmp_info_t* team_thread = team->t.t_threads[i];
1526 if (team_thread == this_thr)
1527 continue;
1528 team_thread->th.th_stats->setIdleFlag();
1529 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && team_thread->th.th_sleep_loc != NULL)
1530 __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread), team_thread->th.th_sleep_loc);
1531 }
1532#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001533#if USE_ITT_BUILD
1534 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1535 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1536#endif /* USE_ITT_BUILD */
1537
1538# if USE_ITT_BUILD && USE_ITT_NOTIFY
1539 // Join barrier - report frame end
Andrey Churbanov51aecb82015-05-06 19:22:36 +00001540 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode &&
1541#if OMP_40_ENABLED
1542 this_thr->th.th_teams_microtask == NULL &&
1543#endif
1544 team->t.t_active_level == 1)
1545 {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001546 kmp_uint64 cur_time = __itt_get_timestamp();
1547 ident_t * loc = team->t.t_ident;
Andrey Churbanov51aecb82015-05-06 19:22:36 +00001548 kmp_info_t **other_threads = team->t.t_threads;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001549 int nproc = this_thr->th.th_team_nproc;
1550 int i;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001551 switch(__kmp_forkjoin_frames_mode) {
1552 case 1:
1553 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1554 break;
1555 case 2:
1556 __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc);
1557 break;
1558 case 3:
1559 if( __itt_metadata_add_ptr ) {
Andrey Churbanov51aecb82015-05-06 19:22:36 +00001560 // Initialize with master's wait time
1561 kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time;
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001562 // Set arrive time to zero to be able to check it in __kmp_invoke_task(); the same is done inside the loop below
1563 this_thr->th.th_bar_arrive_time = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001564 for (i=1; i<nproc; ++i) {
1565 delta += ( cur_time - other_threads[i]->th.th_bar_arrive_time );
Jonathan Peyton99ef4d02016-04-14 16:06:49 +00001566 other_threads[i]->th.th_bar_arrive_time = 0;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001567 }
1568 __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, 0);
1569 }
1570 __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc);
1571 this_thr->th.th_frame_time = cur_time;
1572 break;
1573 }
1574 }
1575# endif /* USE_ITT_BUILD */
1576 }
1577#if USE_ITT_BUILD
1578 else {
1579 if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
1580 __kmp_itt_barrier_middle(gtid, itt_sync_obj);
1581 }
1582#endif /* USE_ITT_BUILD */
1583
1584#if KMP_DEBUG
1585 if (KMP_MASTER_TID(tid)) {
1586 KA_TRACE(15, ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
1587 gtid, team_id, tid, nproc));
1588 }
1589#endif /* KMP_DEBUG */
1590
1591 // TODO now, mark worker threads as done so they may be disbanded
1592 KMP_MB(); // Flush all pending memory write invalidates.
1593 KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid));
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001594
1595#if OMPT_SUPPORT
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001596 if (ompt_enabled) {
Jonathan Peytoncab67cc2015-09-18 16:24:46 +00001597#if OMPT_BLAME
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001598 if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001599 ompt_callbacks.ompt_callback(ompt_event_barrier_end)(
1600 team->t.ompt_team_info.parallel_id,
1601 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
Jonathan Peytonb68a85d2015-09-21 18:11:22 +00001602 }
Andrey Churbanovd7d088f2015-04-29 16:42:24 +00001603#endif
1604
1605 // return to default state
1606 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
1607 }
1608#endif
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001609 ANNOTATE_BARRIER_END(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001610}
1611
1612
1613// TODO release worker threads' fork barriers as we are ready instead of all at once
1614void
1615__kmp_fork_barrier(int gtid, int tid)
1616{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001617 KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier);
Jonathan Peyton11dc82f2016-05-05 16:15:57 +00001618 KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001619 kmp_info_t *this_thr = __kmp_threads[gtid];
1620 kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
1621#if USE_ITT_BUILD
1622 void * itt_sync_obj = NULL;
1623#endif /* USE_ITT_BUILD */
Jonas Hahnfeld50fed042016-11-07 15:58:36 +00001624 if (team)
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001625 ANNOTATE_BARRIER_END(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001626
1627 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n",
1628 gtid, (team != NULL) ? team->t.t_id : -1, tid));
1629
1630 // th_team pointer only valid for master thread here
1631 if (KMP_MASTER_TID(tid)) {
1632#if USE_ITT_BUILD && USE_ITT_NOTIFY
1633 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1634 // Create itt barrier object
1635 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1);
1636 __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing
1637 }
1638#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1639
1640#ifdef KMP_DEBUG
1641 register kmp_info_t **other_threads = team->t.t_threads;
1642 register int i;
1643
1644 // Verify state
1645 KMP_MB();
1646
1647 for(i=1; i<team->t.t_nproc; ++i) {
1648 KA_TRACE(500, ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go == %u.\n",
1649 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
1650 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
1651 other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go));
1652 KMP_DEBUG_ASSERT((TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go)
1653 & ~(KMP_BARRIER_SLEEP_STATE))
1654 == KMP_INIT_BARRIER_STATE);
1655 KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team);
1656 }
1657#endif
1658
1659 if (__kmp_tasking_mode != tskm_immediate_exec) {
Jonathan Peyton54127982015-11-04 21:37:48 +00001660 __kmp_task_team_setup(this_thr, team, 0); // 0 indicates setup current task team if nthreads > 1
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001661 }
1662
1663 /* The master thread may have changed its blocktime between the join barrier and the
1664 fork barrier. Copy the blocktime info to the thread, where __kmp_wait_template() can
1665 access it when the team struct is not guaranteed to exist. */
1666 // See note about the corresponding code in __kmp_join_barrier() being performance-critical
1667 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
Jonathan Peytone1c7c132016-10-07 18:12:19 +00001668#if KMP_USE_MONITOR
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001669 this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
1670 this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
Jonathan Peyton2208a8512017-01-27 17:54:31 +00001671#else
1672 this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL();
1673#endif
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001674 }
1675 } // master
1676
1677 switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) {
1678 case bp_hyper_bar: {
1679 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1680 __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1681 USE_ITT_BUILD_ARG(itt_sync_obj) );
1682 break;
1683 }
1684 case bp_hierarchical_bar: {
1685 __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1686 USE_ITT_BUILD_ARG(itt_sync_obj) );
1687 break;
1688 }
1689 case bp_tree_bar: {
1690 KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]);
1691 __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1692 USE_ITT_BUILD_ARG(itt_sync_obj) );
1693 break;
1694 }
1695 default: {
1696 __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
1697 USE_ITT_BUILD_ARG(itt_sync_obj) );
1698 }
1699 }
1700
1701 // Early exit for reaping threads releasing forkjoin barrier
1702 if (TCR_4(__kmp_global.g.g_done)) {
Jonathan Peyton54127982015-11-04 21:37:48 +00001703 this_thr->th.th_task_team = NULL;
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001704
1705#if USE_ITT_BUILD && USE_ITT_NOTIFY
1706 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1707 if (!KMP_MASTER_TID(tid)) {
1708 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1709 if (itt_sync_obj)
1710 __kmp_itt_barrier_finished(gtid, itt_sync_obj);
1711 }
1712 }
1713#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1714 KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid));
1715 return;
1716 }
1717
1718 /* We can now assume that a valid team structure has been allocated by the master and
1719 propagated to all worker threads. The current thread, however, may not be part of the
1720 team, so we can't blindly assume that the team pointer is non-null. */
1721 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
1722 KMP_DEBUG_ASSERT(team != NULL);
1723 tid = __kmp_tid_from_gtid(gtid);
1724
1725
1726#if KMP_BARRIER_ICV_PULL
1727 /* Master thread's copy of the ICVs was set up on the implicit taskdata in
1728 __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's implicit task has
1729 this data before this function is called. We cannot modify __kmp_fork_call() to look at
1730 the fixed ICVs in the master's thread struct, because it is not always the case that the
1731 threads arrays have been allocated when __kmp_fork_call() is executed. */
Jonathan Peyton45be4502015-08-11 21:36:41 +00001732 {
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001733 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
Jonathan Peyton45be4502015-08-11 21:36:41 +00001734 if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs
1735 // Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
1736 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
1737 __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE);
1738 copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs,
1739 &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs);
1740 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001741 }
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001742#endif // KMP_BARRIER_ICV_PULL
1743
1744 if (__kmp_tasking_mode != tskm_immediate_exec) {
1745 __kmp_task_team_sync(this_thr, team);
1746 }
1747
1748#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
1749 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
1750 if (proc_bind == proc_bind_intel) {
1751#endif
Andrey Churbanovf28f6132015-01-13 14:54:00 +00001752#if KMP_AFFINITY_SUPPORTED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001753 // Call dynamic affinity settings
1754 if(__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) {
1755 __kmp_balanced_affinity(tid, team->t.t_nproc);
1756 }
Andrey Churbanovf28f6132015-01-13 14:54:00 +00001757#endif // KMP_AFFINITY_SUPPORTED
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001758#if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
1759 }
Andrey Churbanov94e569e2015-03-10 09:19:47 +00001760 else if (proc_bind != proc_bind_false) {
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001761 if (this_thr->th.th_new_place == this_thr->th.th_current_place) {
1762 KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n",
1763 __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place));
1764 }
1765 else {
1766 __kmp_affinity_set_place(gtid);
1767 }
1768 }
1769#endif
1770
1771#if USE_ITT_BUILD && USE_ITT_NOTIFY
1772 if (__itt_sync_create_ptr || KMP_ITT_DEBUG) {
1773 if (!KMP_MASTER_TID(tid)) {
1774 // Get correct barrier object
1775 itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier);
1776 __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired
1777 } // (prepare called inside barrier_release)
1778 }
1779#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
Jonas Hahnfeld35801a22017-02-15 08:14:22 +00001780 ANNOTATE_BARRIER_END(&team->t.t_bar);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001781 KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid, team->t.t_id, tid));
1782}
1783
1784
1785void
1786__kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc )
1787{
Jonathan Peyton5375fe82016-11-14 21:13:44 +00001788 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy);
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001789
1790 KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
1791 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
1792
1793 /* Master thread's copy of the ICVs was set up on the implicit taskdata in
1794 __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's implicit task has
1795 this data before this function is called. */
1796#if KMP_BARRIER_ICV_PULL
1797 /* Copy ICVs to master's thread structure into th_fixed_icvs (which remains untouched), where
1798 all of the worker threads can access them and make their own copies after the barrier. */
1799 KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be allocated at this point
1800 copy_icvs(&team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs, new_icvs);
1801 KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n",
1802 0, team->t.t_threads[0], team));
1803#elif KMP_BARRIER_ICV_PUSH
1804 // The ICVs will be propagated in the fork barrier, so nothing needs to be done here.
1805 KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n",
1806 0, team->t.t_threads[0], team));
1807#else
1808 // Copy the ICVs to each of the non-master threads. This takes O(nthreads) time.
1809 ngo_load(new_icvs);
1810 KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be allocated at this point
Jonathan Peyton91b78702015-06-08 19:39:07 +00001811 for (int f=1; f<new_nproc; ++f) { // Skip the master thread
Jim Cownie4cc4bb42014-10-07 16:25:50 +00001812 // TODO: GEH - pass in better source location info since usually NULL here
1813 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
1814 f, team->t.t_threads[f], team));
1815 __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE);
1816 ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs);
1817 KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n",
1818 f, team->t.t_threads[f], team));
1819 }
1820 ngo_sync();
1821#endif // KMP_BARRIER_ICV_PULL
1822}